From 3f1f808b9eeb3f7cd923c6b89fbb57583202e76d Mon Sep 17 00:00:00 2001 From: Dustin Firebaugh Date: Sat, 8 Mar 2025 23:51:58 -0500 Subject: [PATCH] Full-file syntax highlighting for diff pages (#33766) Fix #33358, fix #21970 This adds a step in the `GitDiffForRender` that does syntax highlighting for the entire file and then only references lines from that syntax highlighted code. This allows things like multi-line comments to be syntax highlighted correctly. --------- Co-authored-by: wxiaoguang --- modules/git/blob.go | 20 +- modules/highlight/highlight.go | 3 +- routers/api/v1/repo/pull.go | 3 +- routers/web/repo/blame.go | 2 +- routers/web/repo/commit.go | 2 +- routers/web/repo/compare.go | 2 +- routers/web/repo/pull.go | 2 +- services/gitdiff/gitdiff.go | 352 ++++++++++++++----------- services/gitdiff/gitdiff_test.go | 19 +- services/gitdiff/highlightdiff.go | 94 +++++-- services/gitdiff/highlightdiff_test.go | 137 ++++------ services/repository/files/diff_test.go | 1 - templates/repo/diff/image_diff.tmpl | 12 +- tests/integration/pull_commit_test.go | 37 +-- 14 files changed, 362 insertions(+), 324 deletions(-) diff --git a/modules/git/blob.go b/modules/git/blob.go index bcecb42e16ebb..b7857dbbc6129 100644 --- a/modules/git/blob.go +++ b/modules/git/blob.go @@ -7,6 +7,7 @@ package git import ( "bytes" "encoding/base64" + "errors" "io" "code.gitea.io/gitea/modules/typesniffer" @@ -34,8 +35,9 @@ func (b *Blob) GetBlobContent(limit int64) (string, error) { return string(buf), err } -// GetBlobLineCount gets line count of the blob -func (b *Blob) GetBlobLineCount() (int, error) { +// GetBlobLineCount gets line count of the blob. +// It will also try to write the content to w if it's not nil, then we could pre-fetch the content without reading it again. +func (b *Blob) GetBlobLineCount(w io.Writer) (int, error) { reader, err := b.DataAsync() if err != nil { return 0, err @@ -44,20 +46,20 @@ func (b *Blob) GetBlobLineCount() (int, error) { buf := make([]byte, 32*1024) count := 1 lineSep := []byte{'\n'} - - c, err := reader.Read(buf) - if c == 0 && err == io.EOF { - return 0, nil - } for { + c, err := reader.Read(buf) + if w != nil { + if _, err := w.Write(buf[:c]); err != nil { + return count, err + } + } count += bytes.Count(buf[:c], lineSep) switch { - case err == io.EOF: + case errors.Is(err, io.EOF): return count, nil case err != nil: return count, err } - c, err = reader.Read(buf) } } diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index d7ab3f7afd3e7..77f24fa3f380b 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -11,6 +11,7 @@ import ( gohtml "html" "html/template" "io" + "path" "path/filepath" "strings" "sync" @@ -83,7 +84,7 @@ func Code(fileName, language, code string) (output template.HTML, lexerName stri } if lexer == nil { - if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { + if val, ok := highlightMapping[path.Ext(fileName)]; ok { // use mapped value to find lexer lexer = lexers.Get(val) } diff --git a/routers/api/v1/repo/pull.go b/routers/api/v1/repo/pull.go index 3698ff601045a..2f32f01c4f518 100644 --- a/routers/api/v1/repo/pull.go +++ b/routers/api/v1/repo/pull.go @@ -1591,8 +1591,7 @@ func GetPullRequestFiles(ctx *context.APIContext) { maxLines := setting.Git.MaxGitDiffLines // FIXME: If there are too many files in the repo, may cause some unpredictable issues. - // FIXME: it doesn't need to call "GetDiff" to do various parsing and highlighting - diff, err := gitdiff.GetDiff(ctx, baseGitRepo, + diff, err := gitdiff.GetDiffForAPI(ctx, baseGitRepo, &gitdiff.DiffOptions{ BeforeCommitID: startCommitID, AfterCommitID: endCommitID, diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index eddfcab07824e..e79029a55e4fc 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -97,7 +97,7 @@ func RefBlame(ctx *context.Context) { return } - ctx.Data["NumLines"], err = blob.GetBlobLineCount() + ctx.Data["NumLines"], err = blob.GetBlobLineCount(nil) if err != nil { ctx.NotFound(err) return diff --git a/routers/web/repo/commit.go b/routers/web/repo/commit.go index 997e9f68696e4..bbdcf9875eee8 100644 --- a/routers/web/repo/commit.go +++ b/routers/web/repo/commit.go @@ -314,7 +314,7 @@ func Diff(ctx *context.Context) { maxLines, maxFiles = -1, -1 } - diff, err := gitdiff.GetDiff(ctx, gitRepo, &gitdiff.DiffOptions{ + diff, err := gitdiff.GetDiffForRender(ctx, gitRepo, &gitdiff.DiffOptions{ AfterCommitID: commitID, SkipTo: ctx.FormString("skip-to"), MaxLines: maxLines, diff --git a/routers/web/repo/compare.go b/routers/web/repo/compare.go index d9c6305ce17df..c1726d0790e61 100644 --- a/routers/web/repo/compare.go +++ b/routers/web/repo/compare.go @@ -614,7 +614,7 @@ func PrepareCompareDiff( fileOnly := ctx.FormBool("file-only") - diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo, + diff, err := gitdiff.GetDiffForRender(ctx, ci.HeadGitRepo, &gitdiff.DiffOptions{ BeforeCommitID: beforeCommitID, AfterCommitID: headCommitID, diff --git a/routers/web/repo/pull.go b/routers/web/repo/pull.go index edbf399c772ac..9eb6917617428 100644 --- a/routers/web/repo/pull.go +++ b/routers/web/repo/pull.go @@ -749,7 +749,7 @@ func viewPullFiles(ctx *context.Context, specifiedStartCommit, specifiedEndCommi diffOptions.BeforeCommitID = startCommitID } - diff, err := gitdiff.GetDiff(ctx, gitRepo, diffOptions, files...) + diff, err := gitdiff.GetDiffForRender(ctx, gitRepo, diffOptions, files...) if err != nil { ctx.ServerError("GetDiff", err) return diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index bf39e70127465..3a552547b8997 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -31,6 +31,7 @@ import ( "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/translation" + "code.gitea.io/gitea/modules/util" "github.com/sergi/go-diff/diffmatchpatch" stdcharset "golang.org/x/net/html/charset" @@ -75,12 +76,12 @@ const ( // DiffLine represents a line difference in a DiffSection. type DiffLine struct { - LeftIdx int - RightIdx int - Match int + LeftIdx int // line number, 1-based + RightIdx int // line number, 1-based + Match int // line number, 1-based Type DiffLineType Content string - Comments issues_model.CommentList + Comments issues_model.CommentList // related PR code comments SectionInfo *DiffLineSectionInfo } @@ -95,9 +96,18 @@ type DiffLineSectionInfo struct { RightHunkSize int } +// DiffHTMLOperation is the HTML version of diffmatchpatch.Diff +type DiffHTMLOperation struct { + Type diffmatchpatch.Operation + HTML template.HTML +} + // BlobExcerptChunkSize represent max lines of excerpt const BlobExcerptChunkSize = 20 +// MaxDiffHighlightEntireFileSize is the maximum file size that will be highlighted with "entire file diff" +const MaxDiffHighlightEntireFileSize = 1 * 1024 * 1024 + // GetType returns the type of DiffLine. func (d *DiffLine) GetType() int { return int(d.Type) @@ -112,8 +122,9 @@ func (d *DiffLine) GetHTMLDiffLineType() string { return "del" case DiffLineSection: return "tag" + default: + return "same" } - return "same" } // CanComment returns whether a line can get commented @@ -196,38 +207,6 @@ type DiffSection struct { Lines []*DiffLine } -var ( - addedCodePrefix = []byte(``) - removedCodePrefix = []byte(``) - codeTagSuffix = []byte(``) -) - -func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string { - buf := bytes.NewBuffer(nil) - // restore the line wrapper tags and , if necessary - for _, tag := range lineWrapperTags { - buf.WriteString(tag) - } - for _, diff := range diffs { - switch { - case diff.Type == diffmatchpatch.DiffEqual: - buf.WriteString(diff.Text) - case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd: - buf.Write(addedCodePrefix) - buf.WriteString(diff.Text) - buf.Write(codeTagSuffix) - case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel: - buf.Write(removedCodePrefix) - buf.WriteString(diff.Text) - buf.Write(codeTagSuffix) - } - } - for range lineWrapperTags { - buf.WriteString("") - } - return buf.String() -} - // GetLine gets a specific line by type (add or del) and file line number func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine { var ( @@ -271,10 +250,10 @@ LOOP: return nil } -var diffMatchPatch = diffmatchpatch.New() - -func init() { - diffMatchPatch.DiffEditCost = 100 +func defaultDiffMatchPatch() *diffmatchpatch.DiffMatchPatch { + dmp := diffmatchpatch.New() + dmp.DiffEditCost = 100 + return dmp } // DiffInline is a struct that has a content and escape status @@ -283,97 +262,114 @@ type DiffInline struct { Content template.HTML } -// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped +// DiffInlineWithUnicodeEscape makes a DiffInline with hidden Unicode characters escaped func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline { status, content := charset.EscapeControlHTML(s, locale) return DiffInline{EscapeStatus: status, Content: content} } -// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped -func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline { - highlighted, _ := highlight.Code(fileName, language, code) - status, content := charset.EscapeControlHTML(highlighted, locale) - return DiffInline{EscapeStatus: status, Content: content} -} - -// GetComputedInlineDiffFor computes inline diff for the given line. -func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline { +func (diffSection *DiffSection) getLineContentForRender(lineIdx int, diffLine *DiffLine, fileLanguage string, highlightLines map[int]template.HTML) template.HTML { + h, ok := highlightLines[lineIdx-1] + if ok { + return h + } + if diffLine.Content == "" { + return "" + } if setting.Git.DisableDiffHighlight { - return getLineContent(diffLine.Content[1:], locale) + return template.HTML(html.EscapeString(diffLine.Content[1:])) } + h, _ = highlight.Code(diffSection.Name, fileLanguage, diffLine.Content[1:]) + return h +} - var ( - compareDiffLine *DiffLine - diff1 string - diff2 string - ) - - language := "" +func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType, leftLine, rightLine *DiffLine, locale translation.Locale) DiffInline { + var fileLanguage string + var highlightedLeftLines, highlightedRightLines map[int]template.HTML + // when a "diff section" is manually prepared by ExcerptBlob, it doesn't have "file" information if diffSection.file != nil { - language = diffSection.file.Language + fileLanguage = diffSection.file.Language + highlightedLeftLines, highlightedRightLines = diffSection.file.highlightedLeftLines, diffSection.file.highlightedRightLines + } + + hcd := newHighlightCodeDiff() + var diff1, diff2, lineHTML template.HTML + if leftLine != nil { + diff1 = diffSection.getLineContentForRender(leftLine.LeftIdx, leftLine, fileLanguage, highlightedLeftLines) + lineHTML = util.Iif(diffLineType == DiffLinePlain, diff1, "") + } + if rightLine != nil { + diff2 = diffSection.getLineContentForRender(rightLine.RightIdx, rightLine, fileLanguage, highlightedRightLines) + lineHTML = util.Iif(diffLineType == DiffLinePlain, diff2, "") } + if diffLineType != DiffLinePlain { + // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back + // if the line wrappers are still needed in the future, it can be added back by "diffLineWithHighlightWrapper(hcd.lineWrapperTags. ...)" + lineHTML = hcd.diffLineWithHighlight(diffLineType, diff1, diff2) + } + return DiffInlineWithUnicodeEscape(lineHTML, locale) +} +// GetComputedInlineDiffFor computes inline diff for the given line. +func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline { // try to find equivalent diff line. ignore, otherwise switch diffLine.Type { case DiffLineSection: return getLineContent(diffLine.Content[1:], locale) case DiffLineAdd: - compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx) - if compareDiffLine == nil { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) - } - diff1 = compareDiffLine.Content - diff2 = diffLine.Content + compareDiffLine := diffSection.GetLine(DiffLineDel, diffLine.RightIdx) + return diffSection.getDiffLineForRender(DiffLineAdd, compareDiffLine, diffLine, locale) case DiffLineDel: - compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) - if compareDiffLine == nil { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) - } - diff1 = diffLine.Content - diff2 = compareDiffLine.Content - default: - if strings.IndexByte(" +-", diffLine.Content[0]) > -1 { - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale) - } - return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale) + compareDiffLine := diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) + return diffSection.getDiffLineForRender(DiffLineDel, diffLine, compareDiffLine, locale) + default: // Plain + // TODO: there was an "if" check: `if diffLine.Content >strings.IndexByte(" +-", diffLine.Content[0]) > -1 { ... } else { ... }` + // no idea why it needs that check, it seems that the "if" should be always true, so try to simplify the code + return diffSection.getDiffLineForRender(DiffLinePlain, nil, diffLine, locale) } - - hcd := newHighlightCodeDiff() - diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:]) - // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back - // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)" - diffHTML := diffToHTML(nil, diffRecord, diffLine.Type) - return DiffInlineWithUnicodeEscape(template.HTML(diffHTML), locale) } // DiffFile represents a file diff. type DiffFile struct { - Name string - NameHash string - OldName string - Index int - Addition, Deletion int - Type DiffFileType - IsCreated bool - IsDeleted bool - IsBin bool - IsLFSFile bool - IsRenamed bool - IsAmbiguous bool - Sections []*DiffSection - IsIncomplete bool - IsIncompleteLineTooLong bool - IsProtected bool - IsGenerated bool - IsVendored bool + // only used internally to parse Ambiguous filenames + isAmbiguous bool + + // basic fields (parsed from diff result) + Name string + NameHash string + OldName string + Addition int + Deletion int + Type DiffFileType + Mode string + OldMode string + IsCreated bool + IsDeleted bool + IsBin bool + IsLFSFile bool + IsRenamed bool + IsSubmodule bool + // basic fields but for render purpose only + Sections []*DiffSection + IsIncomplete bool + IsIncompleteLineTooLong bool + + // will be filled by the extra loop in GitDiffForRender + Language string + IsGenerated bool + IsVendored bool + SubmoduleDiffInfo *SubmoduleDiffInfo // IsSubmodule==true, then there must be a SubmoduleDiffInfo + + // will be filled by route handler + IsProtected bool + + // will be filled by SyncUserSpecificDiff IsViewed bool // User specific HasChangedSinceLastReview bool // User specific - Language string - Mode string - OldMode string - IsSubmodule bool // if IsSubmodule==true, then there must be a SubmoduleDiffInfo - SubmoduleDiffInfo *SubmoduleDiffInfo + // for render purpose only, will be filled by the extra loop in GitDiffForRender + highlightedLeftLines map[int]template.HTML + highlightedRightLines map[int]template.HTML } // GetType returns type of diff file. @@ -381,18 +377,23 @@ func (diffFile *DiffFile) GetType() int { return int(diffFile.Type) } -// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file -func (diffFile *DiffFile) GetTailSection(leftCommit, rightCommit *git.Commit) *DiffSection { +type DiffLimitedContent struct { + LeftContent, RightContent *limitByteWriter +} + +// GetTailSectionAndLimitedContent creates a fake DiffLineSection if the last section is not the end of the file +func (diffFile *DiffFile) GetTailSectionAndLimitedContent(leftCommit, rightCommit *git.Commit) (_ *DiffSection, diffLimitedContent DiffLimitedContent) { if len(diffFile.Sections) == 0 || leftCommit == nil || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile { - return nil + return nil, diffLimitedContent } lastSection := diffFile.Sections[len(diffFile.Sections)-1] lastLine := lastSection.Lines[len(lastSection.Lines)-1] - leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name) - rightLineCount := getCommitFileLineCount(rightCommit, diffFile.Name) + leftLineCount, leftContent := getCommitFileLineCountAndLimitedContent(leftCommit, diffFile.Name) + rightLineCount, rightContent := getCommitFileLineCountAndLimitedContent(rightCommit, diffFile.Name) + diffLimitedContent = DiffLimitedContent{LeftContent: leftContent, RightContent: rightContent} if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx { - return nil + return nil, diffLimitedContent } tailDiffLine := &DiffLine{ Type: DiffLineSection, @@ -406,7 +407,7 @@ func (diffFile *DiffFile) GetTailSection(leftCommit, rightCommit *git.Commit) *D }, } tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}} - return tailSection + return tailSection, diffLimitedContent } // GetDiffFileName returns the name of the diff file, or its old name in case it was deleted @@ -438,16 +439,29 @@ func (diffFile *DiffFile) ModeTranslationKey(mode string) string { } } -func getCommitFileLineCount(commit *git.Commit, filePath string) int { +type limitByteWriter struct { + buf bytes.Buffer + limit int +} + +func (l *limitByteWriter) Write(p []byte) (n int, err error) { + if l.buf.Len()+len(p) > l.limit { + p = p[:l.limit-l.buf.Len()] + } + return l.buf.Write(p) +} + +func getCommitFileLineCountAndLimitedContent(commit *git.Commit, filePath string) (lineCount int, limitWriter *limitByteWriter) { blob, err := commit.GetBlobByPath(filePath) if err != nil { - return 0 + return 0, nil } - lineCount, err := blob.GetBlobLineCount() + w := &limitByteWriter{limit: MaxDiffHighlightEntireFileSize + 1} + lineCount, err = blob.GetBlobLineCount(w) if err != nil { - return 0 + return 0, nil } - return lineCount + return lineCount, w } // Diff represents a difference between two git trees. @@ -526,13 +540,13 @@ parsingLoop: } if maxFiles > -1 && len(diff.Files) >= maxFiles { - lastFile := createDiffFile(diff, line) + lastFile := createDiffFile(line) diff.End = lastFile.Name diff.IsIncomplete = true break parsingLoop } - curFile = createDiffFile(diff, line) + curFile = createDiffFile(line) if skipping { if curFile.Name != skipToFile { line, err = skipToNextDiffHead(input) @@ -615,28 +629,28 @@ parsingLoop: case strings.HasPrefix(line, "rename from "): curFile.IsRenamed = true curFile.Type = DiffFileRename - if curFile.IsAmbiguous { + if curFile.isAmbiguous { curFile.OldName = prepareValue(line, "rename from ") } case strings.HasPrefix(line, "rename to "): curFile.IsRenamed = true curFile.Type = DiffFileRename - if curFile.IsAmbiguous { + if curFile.isAmbiguous { curFile.Name = prepareValue(line, "rename to ") - curFile.IsAmbiguous = false + curFile.isAmbiguous = false } case strings.HasPrefix(line, "copy from "): curFile.IsRenamed = true curFile.Type = DiffFileCopy - if curFile.IsAmbiguous { + if curFile.isAmbiguous { curFile.OldName = prepareValue(line, "copy from ") } case strings.HasPrefix(line, "copy to "): curFile.IsRenamed = true curFile.Type = DiffFileCopy - if curFile.IsAmbiguous { + if curFile.isAmbiguous { curFile.Name = prepareValue(line, "copy to ") - curFile.IsAmbiguous = false + curFile.isAmbiguous = false } case strings.HasPrefix(line, "new file"): curFile.Type = DiffFileAdd @@ -663,7 +677,7 @@ parsingLoop: curFile.IsBin = true case strings.HasPrefix(line, "--- "): // Handle ambiguous filenames - if curFile.IsAmbiguous { + if curFile.isAmbiguous { // The shortest string that can end up here is: // "--- a\t\n" without the quotes. // This line has a len() of 7 but doesn't contain a oldName. @@ -681,7 +695,7 @@ parsingLoop: // Otherwise do nothing with this line case strings.HasPrefix(line, "+++ "): // Handle ambiguous filenames - if curFile.IsAmbiguous { + if curFile.isAmbiguous { if len(line) > 6 && line[4] == 'b' { curFile.Name = line[6 : len(line)-1] if line[len(line)-2] == '\t' { @@ -693,7 +707,7 @@ parsingLoop: } else { curFile.Name = curFile.OldName } - curFile.IsAmbiguous = false + curFile.isAmbiguous = false } // Otherwise do nothing with this line, but now switch to parsing hunks lineBytes, isFragment, err := parseHunks(ctx, curFile, maxLines, maxLineCharacters, input) @@ -1006,7 +1020,7 @@ func parseHunks(ctx context.Context, curFile *DiffFile, maxLines, maxLineCharact } } -func createDiffFile(diff *Diff, line string) *DiffFile { +func createDiffFile(line string) *DiffFile { // The a/ and b/ filenames are the same unless rename/copy is involved. // Especially, even for a creation or a deletion, /dev/null is not used // in place of the a/ or b/ filenames. @@ -1017,12 +1031,11 @@ func createDiffFile(diff *Diff, line string) *DiffFile { // // Path names are quoted if necessary. // - // This means that you should always be able to determine the file name even when there + // This means that you should always be able to determine the file name even when // there is potential ambiguity... // // but we can be simpler with our heuristics by just forcing git to prefix things nicely curFile := &DiffFile{ - Index: len(diff.Files) + 1, Type: DiffFileChange, Sections: make([]*DiffSection, 0, 10), } @@ -1034,7 +1047,7 @@ func createDiffFile(diff *Diff, line string) *DiffFile { curFile.OldName, oldNameAmbiguity = readFileName(rd) curFile.Name, newNameAmbiguity = readFileName(rd) if oldNameAmbiguity && newNameAmbiguity { - curFile.IsAmbiguous = true + curFile.isAmbiguous = true // OK we should bet that the oldName and the newName are the same if they can be made to be same // So we need to start again ... if (len(line)-len(cmdDiffHead)-1)%2 == 0 { @@ -1121,20 +1134,21 @@ func guessBeforeCommitForDiff(gitRepo *git.Repository, beforeCommitID string, af return actualBeforeCommit, actualBeforeCommitID, nil } -// GetDiff builds a Diff between two commits of a repository. +// getDiffBasic builds a Diff between two commits of a repository. // Passing the empty string as beforeCommitID returns a diff from the parent commit. // The whitespaceBehavior is either an empty string or a git flag -func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { +// Returned beforeCommit could be nil if the afterCommit doesn't have parent commit +func getDiffBasic(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (_ *Diff, beforeCommit, afterCommit *git.Commit, err error) { repoPath := gitRepo.Path - afterCommit, err := gitRepo.GetCommit(opts.AfterCommitID) + afterCommit, err = gitRepo.GetCommit(opts.AfterCommitID) if err != nil { - return nil, err + return nil, nil, nil, err } - actualBeforeCommit, actualBeforeCommitID, err := guessBeforeCommitForDiff(gitRepo, opts.BeforeCommitID, afterCommit) + beforeCommit, beforeCommitID, err := guessBeforeCommitForDiff(gitRepo, opts.BeforeCommitID, afterCommit) if err != nil { - return nil, err + return nil, nil, nil, err } cmdDiff := git.NewCommand(). @@ -1150,7 +1164,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi parsePatchSkipToFile = "" } - cmdDiff.AddDynamicArguments(actualBeforeCommitID.String(), opts.AfterCommitID) + cmdDiff.AddDynamicArguments(beforeCommitID.String(), opts.AfterCommitID) cmdDiff.AddDashesAndList(files...) cmdCtx, cmdCancel := context.WithCancel(ctx) @@ -1180,12 +1194,25 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi // Ensure the git process is killed if it didn't exit already cmdCancel() if err != nil { - return nil, fmt.Errorf("unable to ParsePatch: %w", err) + return nil, nil, nil, fmt.Errorf("unable to ParsePatch: %w", err) } diff.Start = opts.SkipTo + return diff, beforeCommit, afterCommit, nil +} - checker, deferable := gitRepo.CheckAttributeReader(opts.AfterCommitID) - defer deferable() +func GetDiffForAPI(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { + diff, _, _, err := getDiffBasic(ctx, gitRepo, opts, files...) + return diff, err +} + +func GetDiffForRender(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { + diff, beforeCommit, afterCommit, err := getDiffBasic(ctx, gitRepo, opts, files...) + if err != nil { + return nil, err + } + + checker, deferrable := gitRepo.CheckAttributeReader(opts.AfterCommitID) + defer deferrable() for _, diffFile := range diff.Files { isVendored := optional.None[bool]() @@ -1205,7 +1232,7 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi // Populate Submodule URLs if diffFile.SubmoduleDiffInfo != nil { - diffFile.SubmoduleDiffInfo.PopulateURL(diffFile, actualBeforeCommit, afterCommit) + diffFile.SubmoduleDiffInfo.PopulateURL(diffFile, beforeCommit, afterCommit) } if !isVendored.Has() { @@ -1217,15 +1244,46 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi isGenerated = optional.Some(analyze.IsGenerated(diffFile.Name)) } diffFile.IsGenerated = isGenerated.Value() - - tailSection := diffFile.GetTailSection(actualBeforeCommit, afterCommit) + tailSection, limitedContent := diffFile.GetTailSectionAndLimitedContent(beforeCommit, afterCommit) if tailSection != nil { diffFile.Sections = append(diffFile.Sections, tailSection) } + + if !setting.Git.DisableDiffHighlight { + if limitedContent.LeftContent != nil && limitedContent.LeftContent.buf.Len() < MaxDiffHighlightEntireFileSize { + diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.String()) + } + if limitedContent.RightContent != nil && limitedContent.RightContent.buf.Len() < MaxDiffHighlightEntireFileSize { + diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.String()) + } + } } + return diff, nil } +func highlightCodeLines(diffFile *DiffFile, isLeft bool, content string) map[int]template.HTML { + highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content) + splitLines := strings.Split(string(highlightedNewContent), "\n") + lines := make(map[int]template.HTML, len(splitLines)) + // only save the highlighted lines we need, but not the whole file, to save memory + for _, sec := range diffFile.Sections { + for _, ln := range sec.Lines { + lineIdx := ln.LeftIdx + if !isLeft { + lineIdx = ln.RightIdx + } + if lineIdx >= 1 { + idx := lineIdx - 1 + if idx < len(splitLines) { + lines[idx] = template.HTML(splitLines[idx]) + } + } + } + } + return lines +} + type DiffShortStat struct { NumFiles, TotalAddition, TotalDeletion int } diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index 41b4077cf24d3..71394b191587e 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -17,27 +17,10 @@ import ( "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/setting" - dmp "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func TestDiffToHTML(t *testing.T) { - assert.Equal(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "foo "}, - {Type: dmp.DiffInsert, Text: "bar"}, - {Type: dmp.DiffDelete, Text: " baz"}, - {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineAdd)) - - assert.Equal(t, "foo bar biz", diffToHTML(nil, []dmp.Diff{ - {Type: dmp.DiffEqual, Text: "foo "}, - {Type: dmp.DiffDelete, Text: "bar"}, - {Type: dmp.DiffInsert, Text: " baz"}, - {Type: dmp.DiffEqual, Text: " biz"}, - }, DiffLineDel)) -} - func TestParsePatch_skipTo(t *testing.T) { type testcase struct { name string @@ -621,7 +604,7 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) { defer gitRepo.Close() for _, behavior := range []git.TrustedCmdArgs{{"-w"}, {"--ignore-space-at-eol"}, {"-b"}, nil} { - diffs, err := GetDiff(t.Context(), gitRepo, + diffs, err := GetDiffForAPI(t.Context(), gitRepo, &DiffOptions{ AfterCommitID: "d8e0bbb45f200e67d9a784ce55bd90821af45ebd", BeforeCommitID: "72866af952e98d02a73003501836074b286a78f6", diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go index 35d48445504ae..5891e612490f3 100644 --- a/services/gitdiff/highlightdiff.go +++ b/services/gitdiff/highlightdiff.go @@ -4,10 +4,10 @@ package gitdiff import ( + "bytes" + "html/template" "strings" - "code.gitea.io/gitea/modules/highlight" - "github.com/sergi/go-diff/diffmatchpatch" ) @@ -77,7 +77,7 @@ func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool { return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount) } -func (hcd *highlightCodeDiff) collectUsedRunes(code string) { +func (hcd *highlightCodeDiff) collectUsedRunes(code template.HTML) { for _, r := range code { if hcd.isInPlaceholderRange(r) { // put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore. @@ -86,27 +86,76 @@ func (hcd *highlightCodeDiff) collectUsedRunes(code string) { } } -func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff { +func (hcd *highlightCodeDiff) diffLineWithHighlight(lineType DiffLineType, codeA, codeB template.HTML) template.HTML { + return hcd.diffLineWithHighlightWrapper(nil, lineType, codeA, codeB) +} + +func (hcd *highlightCodeDiff) diffLineWithHighlightWrapper(lineWrapperTags []string, lineType DiffLineType, codeA, codeB template.HTML) template.HTML { hcd.collectUsedRunes(codeA) hcd.collectUsedRunes(codeB) - highlightCodeA, _ := highlight.Code(filename, language, codeA) - highlightCodeB, _ := highlight.Code(filename, language, codeB) + convertedCodeA := hcd.convertToPlaceholders(codeA) + convertedCodeB := hcd.convertToPlaceholders(codeB) + + dmp := defaultDiffMatchPatch() + diffs := dmp.DiffMain(convertedCodeA, convertedCodeB, true) + diffs = dmp.DiffCleanupEfficiency(diffs) + + buf := bytes.NewBuffer(nil) - convertedCodeA := hcd.convertToPlaceholders(string(highlightCodeA)) - convertedCodeB := hcd.convertToPlaceholders(string(highlightCodeB)) + // restore the line wrapper tags and , if necessary + for _, tag := range lineWrapperTags { + buf.WriteString(tag) + } - diffs := diffMatchPatch.DiffMain(convertedCodeA, convertedCodeB, true) - diffs = diffMatchPatch.DiffCleanupEfficiency(diffs) + addedCodePrefix := hcd.registerTokenAsPlaceholder(``) + removedCodePrefix := hcd.registerTokenAsPlaceholder(``) + codeTagSuffix := hcd.registerTokenAsPlaceholder(``) - for i := range diffs { - hcd.recoverOneDiff(&diffs[i]) + if codeTagSuffix != 0 { + for _, diff := range diffs { + switch { + case diff.Type == diffmatchpatch.DiffEqual: + buf.WriteString(diff.Text) + case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd: + buf.WriteRune(addedCodePrefix) + buf.WriteString(diff.Text) + buf.WriteRune(codeTagSuffix) + case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel: + buf.WriteRune(removedCodePrefix) + buf.WriteString(diff.Text) + buf.WriteRune(codeTagSuffix) + } + } + } else { + // placeholder map space is exhausted + for _, diff := range diffs { + take := diff.Type == diffmatchpatch.DiffEqual || (diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd) || (diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel) + if take { + buf.WriteString(diff.Text) + } + } } - return diffs + for range lineWrapperTags { + buf.WriteString("") + } + return hcd.recoverOneDiff(buf.String()) +} + +func (hcd *highlightCodeDiff) registerTokenAsPlaceholder(token string) rune { + placeholder, ok := hcd.tokenPlaceholderMap[token] + if !ok { + placeholder = hcd.nextPlaceholder() + if placeholder != 0 { + hcd.tokenPlaceholderMap[token] = placeholder + hcd.placeholderTokenMap[placeholder] = token + } + } + return placeholder } // convertToPlaceholders totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes. -func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { +func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) string { var tagStack []string res := strings.Builder{} @@ -115,6 +164,7 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { var beforeToken, token string var valid bool + htmlCode := string(htmlContent) // the standard chroma highlight HTML is " ... " for { beforeToken, token, htmlCode, valid = extractHTMLToken(htmlCode) @@ -151,14 +201,7 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { } // else: impossible // remember the placeholder and token in the map - placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap] - if !ok { - placeholder = hcd.nextPlaceholder() - if placeholder != 0 { - hcd.tokenPlaceholderMap[tokenInMap] = placeholder - hcd.placeholderTokenMap[placeholder] = tokenInMap - } - } + placeholder := hcd.registerTokenAsPlaceholder(tokenInMap) if placeholder != 0 { res.WriteRune(placeholder) // use the placeholder to replace the token @@ -179,11 +222,11 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string { return res.String() } -func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { +func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML { sb := strings.Builder{} var tagStack []string - for _, r := range diff.Text { + for _, r := range str { token, ok := hcd.placeholderTokenMap[r] if !ok || token == "" { sb.WriteRune(r) // if the rune is not a placeholder, write it as it is @@ -217,6 +260,5 @@ func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) { } // else: impossible. every tag was pushed into the stack by the code above and is valid HTML opening tag } } - - diff.Text = sb.String() + return template.HTML(sb.String()) } diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go index 545a060e20491..16649682b4fc6 100644 --- a/services/gitdiff/highlightdiff_test.go +++ b/services/gitdiff/highlightdiff_test.go @@ -5,121 +5,72 @@ package gitdiff import ( "fmt" + "html/template" "strings" "testing" - "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" ) func TestDiffWithHighlight(t *testing.T) { - hcd := newHighlightCodeDiff() - diffs := hcd.diffWithHighlight( - "main.v", "", - " run('<>')\n", - " run(db)\n", - ) - - expected := ` run('<>')` - output := diffToHTML(nil, diffs, DiffLineDel) - assert.Equal(t, expected, output) - - expected = ` run(db)` - output = diffToHTML(nil, diffs, DiffLineAdd) - assert.Equal(t, expected, output) - - hcd = newHighlightCodeDiff() - hcd.placeholderTokenMap['O'] = "" - hcd.placeholderTokenMap['C'] = "" - diff := diffmatchpatch.Diff{} - - diff.Text = "OC" - hcd.recoverOneDiff(&diff) - assert.Equal(t, "", diff.Text) - - diff.Text = "O" - hcd.recoverOneDiff(&diff) - assert.Equal(t, "", diff.Text) - - diff.Text = "C" - hcd.recoverOneDiff(&diff) - assert.Equal(t, "", diff.Text) + t.Run("DiffLineAddDel", func(t *testing.T) { + hcd := newHighlightCodeDiff() + codeA := template.HTML(`x foo y`) + codeB := template.HTML(`x bar y`) + outDel := hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB) + assert.Equal(t, `x foo y`, string(outDel)) + outAdd := hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB) + assert.Equal(t, `x bar y`, string(outAdd)) + }) + + t.Run("OpenCloseTags", func(t *testing.T) { + hcd := newHighlightCodeDiff() + hcd.placeholderTokenMap['O'], hcd.placeholderTokenMap['C'] = "", "" + assert.Equal(t, "", string(hcd.recoverOneDiff("OC"))) + assert.Equal(t, "", string(hcd.recoverOneDiff("O"))) + assert.Equal(t, "", string(hcd.recoverOneDiff("C"))) + }) } func TestDiffWithHighlightPlaceholder(t *testing.T) { hcd := newHighlightCodeDiff() - diffs := hcd.diffWithHighlight( - "main.js", "", - "a='\U00100000'", - "a='\U0010FFFD''", - ) + output := hcd.diffLineWithHighlight(DiffLineDel, "a='\U00100000'", "a='\U0010FFFD''") assert.Equal(t, "", hcd.placeholderTokenMap[0x00100000]) assert.Equal(t, "", hcd.placeholderTokenMap[0x0010FFFD]) - - expected := fmt.Sprintf(`a='%s'`, "\U00100000") - output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel) - assert.Equal(t, expected, output) + expected := fmt.Sprintf(`a='%s'`, "\U00100000") + assert.Equal(t, expected, string(output)) hcd = newHighlightCodeDiff() - diffs = hcd.diffWithHighlight( - "main.js", "", - "a='\U00100000'", - "a='\U0010FFFD'", - ) - expected = fmt.Sprintf(`a='%s'`, "\U0010FFFD") - output = diffToHTML(nil, diffs, DiffLineAdd) - assert.Equal(t, expected, output) + output = hcd.diffLineWithHighlight(DiffLineAdd, "a='\U00100000'", "a='\U0010FFFD'") + expected = fmt.Sprintf(`a='%s'`, "\U0010FFFD") + assert.Equal(t, expected, string(output)) } func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) { hcd := newHighlightCodeDiff() hcd.placeholderMaxCount = 0 - diffs := hcd.diffWithHighlight( - "main.js", "", - "'", - ``, - ) - output := diffToHTML(nil, diffs, DiffLineDel) - expected := fmt.Sprintf(`%s#39;`, "\uFFFD") - assert.Equal(t, expected, output) - - hcd = newHighlightCodeDiff() - hcd.placeholderMaxCount = 0 - diffs = hcd.diffWithHighlight( - "main.js", "", - "a < b", - "a > b", - ) - output = diffToHTML(nil, diffs, DiffLineDel) - expected = fmt.Sprintf(`a %slt; b`, "\uFFFD") - assert.Equal(t, expected, output) - - output = diffToHTML(nil, diffs, DiffLineAdd) - expected = fmt.Sprintf(`a %sgt; b`, "\uFFFD") - assert.Equal(t, expected, output) + placeHolderAmp := string(rune(0xFFFD)) + output := hcd.diffLineWithHighlight(DiffLineDel, `<`, `>`) + assert.Equal(t, placeHolderAmp+"lt;", string(output)) + output = hcd.diffLineWithHighlight(DiffLineAdd, `<`, `>`) + assert.Equal(t, placeHolderAmp+"gt;", string(output)) } func TestDiffWithHighlightTagMatch(t *testing.T) { - totalOverflow := 0 - for i := 0; i < 100; i++ { - hcd := newHighlightCodeDiff() - hcd.placeholderMaxCount = i - diffs := hcd.diffWithHighlight( - "main.js", "", - "a='1'", - "b='2'", - ) - totalOverflow += hcd.placeholderOverflowCount - - output := diffToHTML(nil, diffs, DiffLineDel) - c1 := strings.Count(output, "<`, `>`)) + totalOverflow += hcd.placeholderOverflowCount + assert.Equal(t, strings.Count(output, "
-
+
{{if .blobBase}} @@ -52,7 +52,7 @@
{{if and .blobBase .blobHead}} -
+
@@ -66,7 +66,7 @@
-
+
diff --git a/tests/integration/pull_commit_test.go b/tests/integration/pull_commit_test.go index 8d98349fd3560..fc111f528f698 100644 --- a/tests/integration/pull_commit_test.go +++ b/tests/integration/pull_commit_test.go @@ -5,30 +5,33 @@ package integration import ( "net/http" - "net/url" "testing" pull_service "code.gitea.io/gitea/services/pull" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestListPullCommits(t *testing.T) { - onGiteaRun(t, func(t *testing.T, u *url.URL) { - session := loginUser(t, "user5") - req := NewRequest(t, "GET", "/user2/repo1/pulls/3/commits/list") - resp := session.MakeRequest(t, req, http.StatusOK) - - var pullCommitList struct { - Commits []pull_service.CommitInfo `json:"commits"` - LastReviewCommitSha string `json:"last_review_commit_sha"` - } - DecodeJSON(t, resp, &pullCommitList) - - if assert.Len(t, pullCommitList.Commits, 2) { - assert.Equal(t, "985f0301dba5e7b34be866819cd15ad3d8f508ee", pullCommitList.Commits[0].ID) - assert.Equal(t, "5c050d3b6d2db231ab1f64e324f1b6b9a0b181c2", pullCommitList.Commits[1].ID) - } - assert.Equal(t, "4a357436d925b5c974181ff12a994538ddc5a269", pullCommitList.LastReviewCommitSha) + session := loginUser(t, "user5") + req := NewRequest(t, "GET", "/user2/repo1/pulls/3/commits/list") + resp := session.MakeRequest(t, req, http.StatusOK) + + var pullCommitList struct { + Commits []pull_service.CommitInfo `json:"commits"` + LastReviewCommitSha string `json:"last_review_commit_sha"` + } + DecodeJSON(t, resp, &pullCommitList) + + require.Len(t, pullCommitList.Commits, 2) + assert.Equal(t, "985f0301dba5e7b34be866819cd15ad3d8f508ee", pullCommitList.Commits[0].ID) + assert.Equal(t, "5c050d3b6d2db231ab1f64e324f1b6b9a0b181c2", pullCommitList.Commits[1].ID) + assert.Equal(t, "4a357436d925b5c974181ff12a994538ddc5a269", pullCommitList.LastReviewCommitSha) + + t.Run("CommitBlobExcerpt", func(t *testing.T) { + req = NewRequest(t, "GET", "/user2/repo1/blob_excerpt/985f0301dba5e7b34be866819cd15ad3d8f508ee?last_left=0&last_right=0&left=2&right=2&left_hunk_size=2&right_hunk_size=2&path=README.md&style=split&direction=up") + resp = session.MakeRequest(t, req, http.StatusOK) + assert.Contains(t, resp.Body.String(), `# repo1`) }) }