diff --git a/modules/git/diff.go b/modules/git/diff.go index a198695fc0..d7732eaa29 100644 --- a/modules/git/diff.go +++ b/modules/git/diff.go @@ -28,44 +28,37 @@ const ( // GetRawDiff dumps diff results of repository in given commit ID to io.Writer. func GetRawDiff(repo *Repository, commitID string, diffType RawDiffType, writer io.Writer) (retErr error) { - diffOutput, diffFinish, err := getRepoRawDiffForFile(repo.Ctx, repo, "", commitID, diffType, "") + cmd, err := getRepoRawDiffForFileCmd(repo.Ctx, repo, "", commitID, diffType, "") if err != nil { - return err + return fmt.Errorf("getRepoRawDiffForFileCmd: %w", err) } - defer func() { - err := diffFinish() - if retErr == nil { - retErr = err // only return command's error if no previous error - } - }() - _, err = io.Copy(writer, diffOutput) - return err + return cmd.WithStdoutCopy(writer).RunWithStderr(repo.Ctx) } // GetFileDiffCutAroundLine cuts the old or new part of the diff of a file around a specific line number func GetFileDiffCutAroundLine( repo *Repository, startCommit, endCommit, treePath string, line int64, old bool, numbersOfLine int, -) (_ string, retErr error) { - diffOutput, diffFinish, err := getRepoRawDiffForFile(repo.Ctx, repo, startCommit, endCommit, RawDiffNormal, treePath) +) (ret string, retErr error) { + cmd, err := getRepoRawDiffForFileCmd(repo.Ctx, repo, startCommit, endCommit, RawDiffNormal, treePath) if err != nil { - return "", err + return "", fmt.Errorf("getRepoRawDiffForFileCmd: %w", err) } - defer func() { - err := diffFinish() - if retErr == nil { - retErr = err // only return command's error if no previous error - } - }() - return CutDiffAroundLine(diffOutput, line, old, numbersOfLine) + stdoutReader, stdoutClose := cmd.MakeStdoutPipe() + defer stdoutClose() + cmd.WithPipelineFunc(func(ctx gitcmd.Context) error { + ret, err = CutDiffAroundLine(stdoutReader, line, old, numbersOfLine) + return err + }) + return ret, cmd.RunWithStderr(repo.Ctx) } // getRepoRawDiffForFile returns an io.Reader for the diff results of file in given commit ID // and a "finish" function to wait for the git command and clean up resources after reading is done. -func getRepoRawDiffForFile(ctx context.Context, repo *Repository, startCommit, endCommit string, diffType RawDiffType, file string) (io.Reader, func() gitcmd.RunStdError, error) { +func getRepoRawDiffForFileCmd(_ context.Context, repo *Repository, startCommit, endCommit string, diffType RawDiffType, file string) (*gitcmd.Command, error) { commit, err := repo.GetCommit(endCommit) if err != nil { - return nil, nil, err + return nil, err } var files []string if len(file) > 0 { @@ -84,7 +77,7 @@ func getRepoRawDiffForFile(ctx context.Context, repo *Repository, startCommit, e } else { c, err := commit.Parent(0) if err != nil { - return nil, nil, err + return nil, err } cmd.AddArguments("diff"). AddOptionFormat("--find-renames=%s", setting.Git.DiffRenameSimilarityThreshold). @@ -99,25 +92,15 @@ func getRepoRawDiffForFile(ctx context.Context, repo *Repository, startCommit, e } else { c, err := commit.Parent(0) if err != nil { - return nil, nil, err + return nil, err } query := fmt.Sprintf("%s...%s", endCommit, c.ID.String()) cmd.AddArguments("format-patch", "--no-signature", "--stdout").AddDynamicArguments(query).AddDashesAndList(files...) } default: - return nil, nil, util.NewInvalidArgumentErrorf("invalid diff type: %s", diffType) + return nil, util.NewInvalidArgumentErrorf("invalid diff type: %s", diffType) } - - stdoutReader, stdoutReaderClose := cmd.MakeStdoutPipe() - err = cmd.StartWithStderr(ctx) - if err != nil { - stdoutReaderClose() - return nil, nil, err - } - return stdoutReader, func() gitcmd.RunStdError { - stdoutReaderClose() - return cmd.WaitWithStderr() - }, nil + return cmd, nil } // ParseDiffHunkString parse the diff hunk content and return @@ -254,7 +237,7 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi } } if err := scanner.Err(); err != nil { - return "", err + return "", fmt.Errorf("CutDiffAroundLine: scan: %w", err) } // No hunk found diff --git a/modules/git/gitcmd/command.go b/modules/git/gitcmd/command.go index f780cdf6c9..e9b51802fe 100644 --- a/modules/git/gitcmd/command.go +++ b/modules/git/gitcmd/command.go @@ -306,6 +306,10 @@ func (c *Command) MakeStdinPipe() (writer PipeWriter, closer func()) { // MakeStdoutPipe creates a reader for the command's stdout. // The returned closer function must be called by the caller to close the pipe. // After the pipe reader is closed, the unread data will be discarded. +// +// If the process (git command) still tries to write after the pipe is closed, the Wait error will be "signal: broken pipe". +// WithPipelineFunc + Run won't return "broken pipe" error in this case if the callback returns no error. +// But if you are calling Start / Wait family functions, you should either drain the pipe before close it, or handle the Wait error correctly. func (c *Command) MakeStdoutPipe() (reader PipeReader, closer func()) { return c.makeStdoutStderr(&c.cmdStdout) } diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index fc8699829c..c7416c7a10 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -11,20 +11,16 @@ import ( gohtml "html" "html/template" "io" - "path" "strings" "sync" - "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" "github.com/alecthomas/chroma/v2" "github.com/alecthomas/chroma/v2/formatters/html" - "github.com/alecthomas/chroma/v2/lexers" "github.com/alecthomas/chroma/v2/styles" - "github.com/go-enry/go-enry/v2" ) // don't index files larger than this many bytes for performance purposes @@ -84,85 +80,21 @@ func UnsafeSplitHighlightedLines(code template.HTML) (ret [][]byte) { } } -func getChromaLexerByLanguage(fileName, lang string) chroma.Lexer { - lang, _, _ = strings.Cut(lang, "?") // maybe, the value from gitattributes might contain `?` parameters? - ext := path.Ext(fileName) - // the "lang" might come from enry, it has different naming for some languages - switch lang { - case "F#": - lang = "FSharp" - case "Pascal": - lang = "ObjectPascal" - case "C": - if ext == ".C" || ext == ".H" { - lang = "C++" - } - } - if lang == "" && util.AsciiEqualFold(ext, ".sql") { - // there is a bug when using MySQL lexer: "--\nSELECT", the second line will be rendered as comment incorrectly - lang = "SQL" - } - // lexers.Get is slow if the language name can't be matched directly: it does extra "Match" call to iterate all lexers - return lexers.Get(lang) -} - -// GetChromaLexerWithFallback returns a chroma lexer by given file name, language and code content. All parameters can be optional. -// When code content is provided, it will be slow if no lexer is found by file name or language. -// If no lexer is found, it will return the fallback lexer. -func GetChromaLexerWithFallback(fileName, lang string, code []byte) (lexer chroma.Lexer) { - if lang != "" { - lexer = getChromaLexerByLanguage(fileName, lang) - } - - if lexer == nil { - fileExt := path.Ext(fileName) - if val, ok := globalVars().highlightMapping[fileExt]; ok { - lexer = getChromaLexerByLanguage(fileName, val) // use mapped value to find lexer - } - } - - if lexer == nil { - // when using "code" to detect, analyze.GetCodeLanguage is slower, it iterates many rules to detect language from content - // this is the old logic: use enry to detect language, and use chroma to render, but their naming is different for some languages - enryLanguage := analyze.GetCodeLanguage(fileName, code) - lexer = getChromaLexerByLanguage(fileName, enryLanguage) - if lexer == nil { - if enryLanguage != enry.OtherLanguage { - log.Warn("No chroma lexer found for enry detected language: %s (file: %s), need to fix the language mapping between enry and chroma.", enryLanguage, fileName) - } - lexer = lexers.Match(fileName) // lexers.Match will search by its basename and extname - } - } - - return util.IfZero(lexer, lexers.Fallback) -} - -func renderCode(fileName, language, code string, slowGuess bool) (output template.HTML, lexerName string) { +// RenderCodeSlowGuess tries to get a lexer by file name and language first, +// if not found, it will try to guess the lexer by code content, which is slow (more than several hundreds of milliseconds). +func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexer chroma.Lexer, lexerDisplayName string) { // diff view newline will be passed as empty, change to literal '\n' so it can be copied // preserve literal newline in blame view if code == "" || code == "\n" { - return "\n", "" + return "\n", nil, "" } if len(code) > sizeLimit { - return template.HTML(template.HTMLEscapeString(code)), "" + return template.HTML(template.HTMLEscapeString(code)), nil, "" } - var codeForGuessLexer []byte - if slowGuess { - // it is slower to guess lexer by code content, so only do it when necessary - codeForGuessLexer = util.UnsafeStringToBytes(code) - } - lexer := GetChromaLexerWithFallback(fileName, language, codeForGuessLexer) - return RenderCodeByLexer(lexer, code), formatLexerName(lexer.Config().Name) -} - -func RenderCodeFast(fileName, language, code string) (output template.HTML, lexerName string) { - return renderCode(fileName, language, code, false) -} - -func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexerName string) { - return renderCode(fileName, language, code, true) + lexer = detectChromaLexerWithAnalyze(fileName, language, util.UnsafeStringToBytes(code)) // it is also slow + return RenderCodeByLexer(lexer, code), lexer, formatLexerName(lexer.Config().Name) } // RenderCodeByLexer returns a HTML version of code string with chroma syntax highlighting classes @@ -204,7 +136,7 @@ func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, st html.PreventSurroundingPre(true), ) - lexer := GetChromaLexerWithFallback(fileName, language, code) + lexer := detectChromaLexerWithAnalyze(fileName, language, code) lexerName := formatLexerName(lexer.Config().Name) iterator, err := lexer.Tokenise(nil, string(code)) diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index 69aff07b04..d026210475 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -205,36 +205,3 @@ func TestUnsafeSplitHighlightedLines(t *testing.T) { assert.Equal(t, "a\n", string(ret[0])) assert.Equal(t, "b\n", string(ret[1])) } - -func TestGetChromaLexer(t *testing.T) { - globalVars().highlightMapping[".my-html"] = "HTML" - t.Cleanup(func() { delete(globalVars().highlightMapping, ".my-html") }) - - cases := []struct { - fileName string - language string - content string - expected string - }{ - {"test.py", "", "", "Python"}, - - {"any-file", "javascript", "", "JavaScript"}, - {"any-file", "", "/* vim: set filetype=python */", "Python"}, - {"any-file", "", "", "fallback"}, - - {"test.fs", "", "", "Forth"}, - {"test.fs", "F#", "", "FSharp"}, - {"test.fs", "", "let x = 1", "FSharp"}, - - {"test.c", "", "", "C"}, - {"test.C", "", "", "C++"}, - {"OLD-CODE.PAS", "", "", "ObjectPascal"}, - {"test.my-html", "", "", "HTML"}, - } - for _, c := range cases { - lexer := GetChromaLexerWithFallback(c.fileName, c.language, []byte(c.content)) - if assert.NotNil(t, lexer, "case: %+v", c) { - assert.Equal(t, c.expected, lexer.Config().Name, "case: %+v", c) - } - } -} diff --git a/modules/highlight/lexerdetect.go b/modules/highlight/lexerdetect.go new file mode 100644 index 0000000000..5b39617566 --- /dev/null +++ b/modules/highlight/lexerdetect.go @@ -0,0 +1,279 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package highlight + +import ( + "path" + "strings" + "sync" + + "code.gitea.io/gitea/modules/analyze" + "code.gitea.io/gitea/modules/log" + + "github.com/alecthomas/chroma/v2" + "github.com/alecthomas/chroma/v2/lexers" + "github.com/go-enry/go-enry/v2" +) + +const mapKeyLowerPrefix = "lower/" + +// chromaLexers is fully managed by us to do fast lookup for chroma lexers by file name or language name +// Don't use lexers.Get because it is very slow in many cases (iterate all rules, filepath glob match, etc.) +var chromaLexers = sync.OnceValue(func() (ret struct { + conflictingExtLangMap map[string]string + + lowerNameMap map[string]chroma.Lexer // lexer name (lang name) in lower-case + fileBaseMap map[string]chroma.Lexer + fileExtMap map[string]chroma.Lexer + fileParts []struct { + part string + lexer chroma.Lexer + } +}, +) { + ret.lowerNameMap = make(map[string]chroma.Lexer) + ret.fileBaseMap = make(map[string]chroma.Lexer) + ret.fileExtMap = make(map[string]chroma.Lexer) + + // Chroma has overlaps in file extension for different languages, + // When we need to do fast render, there is no way to detect the language by content, + // So we can only choose some default languages for the overlapped file extensions. + ret.conflictingExtLangMap = map[string]string{ + ".as": "ActionScript 3", // ActionScript + ".asm": "NASM", // TASM, NASM, RGBDS Assembly, Z80 Assembly + ".ASM": "NASM", + ".bas": "VB.net", // QBasic + ".bf": "Beef", // Brainfuck + ".fs": "FSharp", // Forth + ".gd": "GDScript", // GDScript3 + ".h": "C", // Objective-C + ".hcl": "Terraform", // HCL + ".hh": "C++", // HolyC + ".inc": "PHP", // ObjectPascal, POVRay, SourcePawn, PHTML + ".m": "Objective-C", // Matlab, Mathematica, Mason + ".mc": "Mason", // MonkeyC + ".network": "SYSTEMD", // INI + ".php": "PHP", // PHTML + ".php3": "PHP", // PHTML + ".php4": "PHP", // PHTML + ".php5": "PHP", // PHTML + ".pl": "Perl", // Prolog, Raku + ".pm": "Perl", // Promela, Raku + ".pp": "ObjectPascal", // Puppet + ".s": "ArmAsm", // GAS + ".S": "ArmAsm", // R, GAS + ".service": "SYSTEMD", // INI + ".socket": "SYSTEMD", // INI + ".sql": "SQL", // MySQL + ".t": "Perl", // Raku + ".ts": "TypeScript", // TypoScript + ".v": "V", // verilog + ".xslt": "HTML", // XML + } + + isPlainPattern := func(key string) bool { + return !strings.ContainsAny(key, "*?[]") // only support simple patterns + } + + setMapWithLowerKey := func(m map[string]chroma.Lexer, key string, lexer chroma.Lexer) { + if _, conflict := m[key]; conflict { + panic("duplicate key in lexer map: " + key + ", need to add it to conflictingExtLangMap") + } + m[key] = lexer + m[mapKeyLowerPrefix+strings.ToLower(key)] = lexer + } + + processFileName := func(fileName string, lexer chroma.Lexer) bool { + if isPlainPattern(fileName) { + // full base name match + setMapWithLowerKey(ret.fileBaseMap, fileName, lexer) + return true + } + if strings.HasPrefix(fileName, "*") { + // ext name match: "*.js" + fileExt := strings.Trim(fileName, "*") + if isPlainPattern(fileExt) { + presetName := ret.conflictingExtLangMap[fileExt] + if presetName == "" || lexer.Config().Name == presetName { + setMapWithLowerKey(ret.fileExtMap, fileExt, lexer) + } + return true + } + } + if strings.HasSuffix(fileName, "*") { + // part match: "*.env.*" + filePart := strings.Trim(fileName, "*") + if isPlainPattern(filePart) { + ret.fileParts = append(ret.fileParts, struct { + part string + lexer chroma.Lexer + }{ + part: filePart, + lexer: lexer, + }) + return true + } + } + return false + } + + expandGlobPatterns := func(patterns []string) []string { + // expand patterns like "file.[ch]" to "file.c" and "file.h", only one pair of "[]" is supported, enough for current Chroma lexers + for idx, s := range patterns { + idx1 := strings.IndexByte(s, '[') + idx2 := strings.IndexByte(s, ']') + if idx1 != -1 && idx2 != -1 && idx2 > idx1+1 { + left, mid, right := s[:idx1], s[idx1+1:idx2], s[idx2+1:] + patterns[idx] = left + mid[0:1] + right + for i := 1; i < len(mid); i++ { + patterns = append(patterns, left+mid[i:i+1]+right) + } + } + } + return patterns + } + + // add lexers to our map, for fast lookup + for _, lexer := range lexers.GlobalLexerRegistry.Lexers { + cfg := lexer.Config() + ret.lowerNameMap[strings.ToLower(lexer.Config().Name)] = lexer + for _, alias := range cfg.Aliases { + ret.lowerNameMap[strings.ToLower(alias)] = lexer + } + for _, s := range expandGlobPatterns(cfg.Filenames) { + if !processFileName(s, lexer) { + panic("unsupported file name pattern in lexer: " + s) + } + } + for _, s := range expandGlobPatterns(cfg.AliasFilenames) { + if !processFileName(s, lexer) { + panic("unsupported alias file name pattern in lexer: " + s) + } + } + } + + // final check: make sure the default ext-lang mapping is correct, nothing is missing + for ext, lexerName := range ret.conflictingExtLangMap { + if lexer, ok := ret.fileExtMap[ext]; !ok || lexer.Config().Name != lexerName { + panic("missing default ext-lang mapping for: " + ext) + } + } + return ret +}) + +func normalizeFileNameLang(fileName, fileLang string) (string, string) { + fileName = path.Base(fileName) + fileLang, _, _ = strings.Cut(fileLang, "?") // maybe, the value from gitattributes might contain `?` parameters? + ext := path.Ext(fileName) + // the "lang" might come from enry or gitattributes, it has different naming for some languages + switch fileLang { + case "F#": + fileLang = "FSharp" + case "Pascal": + fileLang = "ObjectPascal" + case "C": + if ext == ".C" || ext == ".H" { + fileLang = "C++" + } + } + return fileName, fileLang +} + +func DetectChromaLexerByFileName(fileName, fileLang string) chroma.Lexer { + lexer, _ := detectChromaLexerByFileName(fileName, fileLang) + return lexer +} + +func detectChromaLexerByFileName(fileName, fileLang string) (_ chroma.Lexer, byLang bool) { + fileName, fileLang = normalizeFileNameLang(fileName, fileLang) + fileExt := path.Ext(fileName) + + // apply custom mapping for file extension, highest priority, for example: + // * ".my-js" -> ".js" + // * ".my-html" -> "HTML" + if fileExt != "" { + if val, ok := globalVars().highlightMapping[fileExt]; ok { + if strings.HasPrefix(val, ".") { + fileName = "dummy" + val + fileLang = "" + } else { + fileLang = val + } + } + } + + // try to use language for lexer name + if fileLang != "" { + lexer := chromaLexers().lowerNameMap[strings.ToLower(fileLang)] + if lexer != nil { + return lexer, true + } + } + + if fileName == "" { + return lexers.Fallback, false + } + + // try base name + { + baseName := path.Base(fileName) + if lexer, ok := chromaLexers().fileBaseMap[baseName]; ok { + return lexer, false + } else if lexer, ok = chromaLexers().fileBaseMap[mapKeyLowerPrefix+strings.ToLower(baseName)]; ok { + return lexer, false + } + } + + if fileExt == "" { + return lexers.Fallback, false + } + + // try ext name + { + if lexer, ok := chromaLexers().fileExtMap[fileExt]; ok { + return lexer, false + } else if lexer, ok = chromaLexers().fileExtMap[mapKeyLowerPrefix+strings.ToLower(fileExt)]; ok { + return lexer, false + } + } + + // try file part match, for example: ".env.local" for "*.env.*" + // it assumes that there must be a dot in filename (fileExt isn't empty) + for _, item := range chromaLexers().fileParts { + if strings.Contains(fileName, item.part) { + return item.lexer, false + } + } + return lexers.Fallback, false +} + +// detectChromaLexerWithAnalyze returns a chroma lexer by given file name, language and code content. All parameters can be optional. +// When code content is provided, it will be slow if no lexer is found by file name or language. +// If no lexer is found, it will return the fallback lexer. +func detectChromaLexerWithAnalyze(fileName, lang string, code []byte) chroma.Lexer { + lexer, byLang := detectChromaLexerByFileName(fileName, lang) + + // if lang is provided, and it matches a lexer, use it directly + if byLang { + return lexer + } + + // if a lexer is detected and there is no conflict for the file extension, use it directly + fileExt := path.Ext(fileName) + _, hasConflicts := chromaLexers().conflictingExtLangMap[fileExt] + if !hasConflicts && lexer != lexers.Fallback { + return lexer + } + + // try to detect language by content, for best guessing for the language + // when using "code" to detect, analyze.GetCodeLanguage is slow, it iterates many rules to detect language from content + analyzedLanguage := analyze.GetCodeLanguage(fileName, code) + lexer = DetectChromaLexerByFileName(fileName, analyzedLanguage) + if lexer == lexers.Fallback { + if analyzedLanguage != enry.OtherLanguage { + log.Warn("No chroma lexer found for enry detected language: %s (file: %s), need to fix the language mapping between enry and chroma.", analyzedLanguage, fileName) + } + } + return lexer +} diff --git a/modules/highlight/lexerdetect_test.go b/modules/highlight/lexerdetect_test.go new file mode 100644 index 0000000000..868e793a68 --- /dev/null +++ b/modules/highlight/lexerdetect_test.go @@ -0,0 +1,90 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package highlight + +import ( + "strings" + "testing" + + "github.com/alecthomas/chroma/v2/lexers" + "github.com/stretchr/testify/assert" +) + +func BenchmarkDetectChromaLexerByFileName(b *testing.B) { + for b.Loop() { + // BenchmarkDetectChromaLexerByFileName-12 18214717 61.35 ns/op + DetectChromaLexerByFileName("a.sql", "") + } +} + +func BenchmarkDetectChromaLexerWithAnalyze(b *testing.B) { + b.StopTimer() + code := []byte(strings.Repeat("SELECT * FROM table;\n", 1000)) + b.StartTimer() + for b.Loop() { + // BenchmarkRenderCodeSlowGuess-12 87946 13310 ns/op + detectChromaLexerWithAnalyze("a", "", code) + } +} + +func BenchmarkChromaAnalyze(b *testing.B) { + b.StopTimer() + code := strings.Repeat("SELECT * FROM table;\n", 1000) + b.StartTimer() + for b.Loop() { + // comparing to detectChromaLexerWithAnalyze (go-enry), "chroma/lexers.Analyse" is very slow + // BenchmarkChromaAnalyze-12 519 2247104 ns/op + lexers.Analyse(code) + } +} + +func BenchmarkRenderCodeByLexer(b *testing.B) { + b.StopTimer() + code := strings.Repeat("SELECT * FROM table;\n", 1000) + lexer := DetectChromaLexerByFileName("a.sql", "") + b.StartTimer() + for b.Loop() { + // Really slow ....... + // BenchmarkRenderCodeByLexer-12 22 47159038 ns/op + RenderCodeByLexer(lexer, code) + } +} + +func TestDetectChromaLexer(t *testing.T) { + globalVars().highlightMapping[".my-html"] = "HTML" + t.Cleanup(func() { delete(globalVars().highlightMapping, ".my-html") }) + + cases := []struct { + fileName string + language string + content string + expected string + }{ + {"test.py", "", "", "Python"}, + + {"any-file", "javascript", "", "JavaScript"}, + {"any-file", "", "/* vim: set filetype=python */", "Python"}, + {"any-file", "", "", "fallback"}, + + {"test.fs", "", "", "FSharp"}, + {"test.fs", "F#", "", "FSharp"}, + {"test.fs", "", "let x = 1", "FSharp"}, + + {"test.c", "", "", "C"}, + {"test.C", "", "", "C++"}, + {"OLD-CODE.PAS", "", "", "ObjectPascal"}, + {"test.my-html", "", "", "HTML"}, + + {"a.php", "", "", "PHP"}, + {"a.sql", "", "", "SQL"}, + {"dhcpd.conf", "", "", "ISCdhcpd"}, + {".env.my-production", "", "", "Bash"}, + } + for _, c := range cases { + lexer := detectChromaLexerWithAnalyze(c.fileName, c.language, []byte(c.content)) + if assert.NotNil(t, lexer, "case: %+v", c) { + assert.Equal(t, c.expected, lexer.Config().Name, "case: %+v", c) + } + } +} diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 907dd1a537..eb20b70e71 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -72,7 +72,8 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error { func HighlightSearchResultCode(filename, language string, lineNums []int, code string) []*ResultLine { // we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting - hl, _ := highlight.RenderCodeFast(filename, language, code) + lexer := highlight.DetectChromaLexerByFileName(filename, language) + hl := highlight.RenderCodeByLexer(lexer, code) highlightedLines := strings.Split(string(hl), "\n") // The lineNums outputted by render might not match the original lineNums, because "highlight" removes the last `\n` diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go index 17d994734a..fd3071645a 100644 --- a/modules/markup/orgmode/orgmode.go +++ b/modules/markup/orgmode/orgmode.go @@ -56,7 +56,7 @@ func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error } }() - lexer := highlight.GetChromaLexerWithFallback("", lang, nil) // don't use content to detect, it is too slow + lexer := highlight.DetectChromaLexerByFileName("", lang) // don't use content to detect, it is too slow lexer = chroma.Coalesce(lexer) sb := &strings.Builder{} diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go index 25eb88eefc..4fb61bee6d 100644 --- a/routers/web/repo/blame.go +++ b/routers/web/repo/blame.go @@ -267,7 +267,7 @@ func renderBlame(ctx *context.Context, blameParts []*gitrepo.BlamePart, commitNa bufContent := buf.Bytes() bufContent = charset.ToUTF8(bufContent, charset.ConvertOpts{}) - highlighted, lexerName := highlight.RenderCodeSlowGuess(path.Base(ctx.Repo.TreePath), language, util.UnsafeBytesToString(bufContent)) + highlighted, _, lexerDisplayName := highlight.RenderCodeSlowGuess(path.Base(ctx.Repo.TreePath), language, util.UnsafeBytesToString(bufContent)) unsafeLines := highlight.UnsafeSplitHighlightedLines(highlighted) for i, br := range rows { var line template.HTML @@ -280,5 +280,5 @@ func renderBlame(ctx *context.Context, blameParts []*gitrepo.BlamePart, commitNa ctx.Data["EscapeStatus"] = escapeStatus ctx.Data["BlameRows"] = rows - ctx.Data["LexerName"] = lexerName + ctx.Data["LexerName"] = lexerDisplayName } diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 6b29582208..7777cf4a1c 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -40,6 +40,7 @@ import ( "code.gitea.io/gitea/modules/translation" "code.gitea.io/gitea/modules/util" + "github.com/alecthomas/chroma/v2" "github.com/sergi/go-diff/diffmatchpatch" stdcharset "golang.org/x/net/html/charset" "golang.org/x/text/encoding" @@ -306,6 +307,7 @@ type DiffSection struct { language *diffVarMutable[string] highlightedLeftLines *diffVarMutable[map[int]template.HTML] highlightedRightLines *diffVarMutable[map[int]template.HTML] + highlightLexer *diffVarMutable[chroma.Lexer] FileName string Lines []*DiffLine @@ -347,8 +349,10 @@ func (diffSection *DiffSection) getLineContentForRender(lineIdx int, diffLine *D if setting.Git.DisableDiffHighlight { return template.HTML(html.EscapeString(diffLine.Content[1:])) } - h, _ = highlight.RenderCodeFast(diffSection.FileName, fileLanguage, diffLine.Content[1:]) - return h + if diffSection.highlightLexer.value == nil { + diffSection.highlightLexer.value = highlight.DetectChromaLexerByFileName(diffSection.FileName, fileLanguage) + } + return highlight.RenderCodeByLexer(diffSection.highlightLexer.value, diffLine.Content[1:]) } func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType, leftLine, rightLine *DiffLine, locale translation.Locale) DiffInline { @@ -391,6 +395,12 @@ func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType, // GetComputedInlineDiffFor computes inline diff for the given line. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline { + defer func() { + if err := recover(); err != nil { + // the logic is too complex in this function, help to catch any panic because Golang template doesn't print the stack + log.Error("panic in GetComputedInlineDiffFor: %v\nStack: %s", err, log.Stack(2)) + } + }() // try to find equivalent diff line. ignore, otherwise switch diffLine.Type { case DiffLineSection: @@ -452,6 +462,7 @@ type DiffFile struct { // for render purpose only, will be filled by the extra loop in GitDiffForRender, the maps of lines are 0-based language diffVarMutable[string] + highlightRender diffVarMutable[chroma.Lexer] // cache render (atm: lexer) for current file, only detect once for line-by-line mode highlightedLeftLines diffVarMutable[map[int]template.HTML] highlightedRightLines diffVarMutable[map[int]template.HTML] } @@ -932,6 +943,7 @@ func skipToNextDiffHead(input *bufio.Reader) (line string, err error) { func newDiffSectionForDiffFile(curFile *DiffFile) *DiffSection { return &DiffSection{ language: &curFile.language, + highlightLexer: &curFile.highlightRender, highlightedLeftLines: &curFile.highlightedLeftLines, highlightedRightLines: &curFile.highlightedRightLines, } @@ -1395,7 +1407,8 @@ func highlightCodeLines(name, lang string, sections []*DiffSection, isLeft bool, } content := util.UnsafeBytesToString(charset.ToUTF8(rawContent, charset.ConvertOpts{})) - highlightedNewContent, _ := highlight.RenderCodeFast(name, lang, content) + lexer := highlight.DetectChromaLexerByFileName(name, lang) + highlightedNewContent := highlight.RenderCodeByLexer(lexer, content) unsafeLines := highlight.UnsafeSplitHighlightedLines(highlightedNewContent) lines := make(map[int]template.HTML, len(unsafeLines)) // only save the highlighted lines we need, but not the whole file, to save memory diff --git a/services/gitdiff/gitdiff_excerpt.go b/services/gitdiff/gitdiff_excerpt.go index be66d8e2af..4b1958fc11 100644 --- a/services/gitdiff/gitdiff_excerpt.go +++ b/services/gitdiff/gitdiff_excerpt.go @@ -11,6 +11,8 @@ import ( "io" "code.gitea.io/gitea/modules/setting" + + "github.com/alecthomas/chroma/v2" ) type BlobExcerptOptions struct { @@ -65,6 +67,7 @@ func BuildBlobExcerptDiffSection(filePath string, reader io.Reader, opts BlobExc chunkSize := BlobExcerptChunkSize section := &DiffSection{ language: &diffVarMutable[string]{value: language}, + highlightLexer: &diffVarMutable[chroma.Lexer]{}, highlightedLeftLines: &diffVarMutable[map[int]template.HTML]{}, highlightedRightLines: &diffVarMutable[map[int]template.HTML]{}, FileName: filePath, diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go index b99b7e3675..ea9a8829ed 100644 --- a/services/gitdiff/highlightdiff_test.go +++ b/services/gitdiff/highlightdiff_test.go @@ -76,8 +76,8 @@ func TestDiffWithHighlight(t *testing.T) { }) t.Run("ComplexDiff1", func(t *testing.T) { - oldCode, _ := highlight.RenderCodeFast("a.go", "Go", `xxx || yyy`) - newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot&xxx || bot&yyy`) + oldCode, _, _ := highlight.RenderCodeSlowGuess("a.go", "Go", `xxx || yyy`) + newCode, _, _ := highlight.RenderCodeSlowGuess("a.go", "Go", `bot&xxx || bot&yyy`) hcd := newHighlightCodeDiff() out := hcd.diffLineWithHighlight(DiffLineAdd, oldCode, newCode) assert.Equal(t, strings.ReplaceAll(`