From ed9cbbcad147ec4e47c1b5c26a582b3d17cadc51 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Fri, 3 Jul 2026 15:22:59 +0200 Subject: [PATCH 1/5] Improve JS/TS comments filter. --- embedding/commentfilter/config.go | 19 +- embedding/commentfilter/filter_test.go | 8 +- embedding/commentfilter/javascript_filter.go | 405 +++++++++++++++++++ 3 files changed, 416 insertions(+), 16 deletions(-) create mode 100644 embedding/commentfilter/javascript_filter.go diff --git a/embedding/commentfilter/config.go b/embedding/commentfilter/config.go index e4df86f..e28cfa0 100644 --- a/embedding/commentfilter/config.go +++ b/embedding/commentfilter/config.go @@ -48,10 +48,10 @@ var filtersByExtension = map[string]filterEntry{ ".hxx": filterConfig(MarkerCommentFilter{Syntax: cStyleSyntax}, regularModes), // JavaScript - ".js": filterConfig(MarkerCommentFilter{Syntax: jsSyntax}, allModes), - ".jsx": filterConfig(MarkerCommentFilter{Syntax: jsSyntax}, allModes), - ".ts": filterConfig(MarkerCommentFilter{Syntax: jsSyntax}, allModes), - ".tsx": filterConfig(MarkerCommentFilter{Syntax: jsSyntax}, allModes), + ".js": filterConfig(JavaScriptCommentFilter{}, allModes), + ".jsx": filterConfig(JavaScriptCommentFilter{}, allModes), + ".ts": filterConfig(JavaScriptCommentFilter{}, allModes), + ".tsx": filterConfig(JavaScriptCommentFilter{}, allModes), // Go ".go": filterConfig(MarkerCommentFilter{Syntax: goSyntax}, regularModes), @@ -103,17 +103,6 @@ var javaSyntax = CommentMarker{ QuoteChars: "\"'", } -var jsSyntax = CommentMarker{ - Inline: []string{"//"}, - Block: []BlockMarker{ - {Start: cStyleBlockCommentStart, End: cStyleBlockCommentEnd}, - }, - Documentation: DocumentationMarker{ - Block: []BlockMarker{{Start: cStyleDocCommentStart, End: cStyleBlockCommentEnd}}, - }, - QuoteChars: jsQuoteChars, -} - var csharpSyntax = CommentMarker{ Inline: []string{"//"}, Block: []BlockMarker{ diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index 8c23506..05fa4da 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -301,15 +301,21 @@ var _ = Describe("Comment filter", func() { }) Describe("JavaScript and TypeScript", func() { - It("should strip comments without treating template literals as comments", func() { + It("should strip comments without treating regex and template text as comments", func() { lines := []string{ "// module comment", "const url = `http://example.org/*not-comment*/`;", + "const pattern = /https?:\\/\\/example\\.com\\/docs/;", + "const help = `Keep // and /* markers */ in template text`;", + "const nested = `${format(value /* remove this real comment */)}`;", "const value = 42; // inline comment", } expected := []string{ "const url = `http://example.org/*not-comment*/`;", + "const pattern = /https?:\\/\\/example\\.com\\/docs/;", + "const help = `Keep // and /* markers */ in template text`;", + "const nested = `${format(value )}`;", "const value = 42; ", } diff --git a/embedding/commentfilter/javascript_filter.go b/embedding/commentfilter/javascript_filter.go new file mode 100644 index 0000000..819e98a --- /dev/null +++ b/embedding/commentfilter/javascript_filter.go @@ -0,0 +1,405 @@ +// Copyright 2026, TeamDev. All rights reserved. +// +// Redistribution and use in source and/or binary forms, with or without +// modification, must retain the above copyright notice and the following +// disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package commentfilter + +import "strings" + +const jsTemplateInterpolationStart = "${" + +// JavaScriptCommentFilter filters JavaScript and TypeScript comments while preserving literal text. +type JavaScriptCommentFilter struct{} + +// javascriptState tracks JavaScript lexical state that can span source lines. +type javascriptState struct { + // blockActive reports whether scanning is inside a block comment. + blockActive bool + + // blockKeep reports whether the active block comment should be retained. + blockKeep bool + + // blockEnd contains the closing marker for the active block comment. + blockEnd string + + // template reports whether scanning is inside template literal text. + template bool + + // templateInterpolationDepth is the active brace depth of a template interpolation. + templateInterpolationDepth int +} + +// javascriptLineFilter filters one JavaScript or TypeScript source line. +type javascriptLineFilter struct { + // line is the source line being filtered. + line string + + // mode selects which comments to retain. + mode Mode + + // state tracks JavaScript constructs across lines. + state *javascriptState + + // result accumulates the filtered source line. + result strings.Builder + + // position is the current byte index in line. + position int + + // hadComment reports whether the line contained a recognized comment. + hadComment bool +} + +// Filter removes or preserves JavaScript and TypeScript comments according to mode. +// +// Parameters: +// lines - provides JavaScript or TypeScript source lines. +// mode - selects comments to retain. +// +// Returns filtered source lines. +func (JavaScriptCommentFilter) Filter(lines []string, mode Mode) []string { + var filtered []string + state := javascriptState{} + for _, line := range lines { + filteredLine, hadComment := filterJavaScriptLine(line, mode, &state) + if hadComment && strings.TrimSpace(filteredLine) == "" { + continue + } + filtered = append(filtered, filteredLine) + } + + return filtered +} + +// filterJavaScriptLine removes or preserves recognized JavaScript comments from one line. +func filterJavaScriptLine(line string, mode Mode, state *javascriptState) (string, bool) { + filter := javascriptLineFilter{ + line: line, + mode: mode, + state: state, + } + + return filter.filterLine() +} + +// filterLine walks the current line until it reaches its end or a line comment. +func (f *javascriptLineFilter) filterLine() (string, bool) { + for f.position < len(f.line) { + if f.consumeActiveBlock() { + continue + } + if f.consumeTemplateInterpolation() { + continue + } + if f.consumeTemplateText() { + continue + } + if f.consumeString() { + continue + } + if f.consumeRegexLiteral() { + continue + } + if consumed, stop := f.consumeComment(); consumed { + if stop { + break + } + + continue + } + f.consumeCodeByte() + } + + return f.result.String(), f.hadComment +} + +// consumeActiveBlock consumes text while the scanner is inside a block comment. +func (f *javascriptLineFilter) consumeActiveBlock() bool { + if !f.state.blockActive { + return false + } + f.hadComment = true + end := strings.Index(f.line[f.position:], f.state.blockEnd) + if end < 0 { + if f.state.blockKeep { + f.result.WriteString(f.line[f.position:]) + } + f.position = len(f.line) + + return true + } + endPosition := f.position + end + len(f.state.blockEnd) + if f.state.blockKeep { + f.result.WriteString(f.line[f.position:endPosition]) + } + f.position = endPosition + f.state.blockActive = false + f.state.blockEnd = "" + + return true +} + +// consumeTemplateInterpolation resumes JavaScript expression scanning inside `${...}`. +func (f *javascriptLineFilter) consumeTemplateInterpolation() bool { + if f.state.templateInterpolationDepth == 0 { + return false + } + f.consumeInterpolationDepth(&f.state.templateInterpolationDepth) + if f.state.templateInterpolationDepth == 0 { + f.state.template = true + } + + return true +} + +// consumeTemplateText copies template text and filters comments inside `${...}` code. +func (f *javascriptLineFilter) consumeTemplateText() bool { + if !f.state.template && f.line[f.position] != '`' { + return false + } + if !f.state.template { + f.state.template = true + f.consumeCodeByte() + } + for f.position < len(f.line) { + switch { + case f.line[f.position] == '\\': + f.writeEscapedByte() + case f.line[f.position] == '`': + f.consumeCodeByte() + f.state.template = false + + return true + case strings.HasPrefix(f.line[f.position:], jsTemplateInterpolationStart): + f.result.WriteString(jsTemplateInterpolationStart) + f.position += len(jsTemplateInterpolationStart) + f.state.template = false + f.state.templateInterpolationDepth = 1 + f.consumeTemplateInterpolation() + if f.state.templateInterpolationDepth > 0 { + return true + } + default: + f.consumeCodeByte() + } + } + + return true +} + +// consumeString copies a quoted string without scanning comment markers inside it. +func (f *javascriptLineFilter) consumeString() bool { + if f.position >= len(f.line) { + return false + } + switch f.line[f.position] { + case '"', '\'': + quoteEnd := quotedSegmentEnd(f.line, f.position, "\"'") + f.result.WriteString(f.line[f.position:quoteEnd]) + f.position = quoteEnd + + return true + default: + return false + } +} + +// consumeRegexLiteral copies a regular-expression literal without treating its content as comments. +func (f *javascriptLineFilter) consumeRegexLiteral() bool { + if !f.regexStartsHere() { + return false + } + f.consumeCodeByte() + inClass := false + for f.position < len(f.line) { + switch f.line[f.position] { + case '\\': + f.writeEscapedByte() + case '[': + inClass = true + f.consumeCodeByte() + case ']': + inClass = false + f.consumeCodeByte() + case '/': + if inClass { + f.consumeCodeByte() + + continue + } + f.consumeCodeByte() + f.consumeRegexFlags() + + return true + default: + f.consumeCodeByte() + } + } + + return true +} + +// regexStartsHere reports whether the slash at the current position can start a regex literal. +func (f *javascriptLineFilter) regexStartsHere() bool { + if f.line[f.position] != '/' { + return false + } + if strings.HasPrefix(f.line[f.position:], "//") || + strings.HasPrefix(f.line[f.position:], cStyleBlockCommentStart) { + return false + } + previous := previousSignificantByte(f.line[:f.position]) + if previous == 0 { + return true + } + + return strings.ContainsRune("([{=,:;!&|?+-*~^<>%", rune(previous)) +} + +// consumeRegexFlags copies identifier characters after a regex literal closing slash. +func (f *javascriptLineFilter) consumeRegexFlags() { + for f.position < len(f.line) { + char := f.line[f.position] + if !isASCIIIdentifierByte(char) { + return + } + f.consumeCodeByte() + } +} + +// consumeInterpolationDepth filters comments inside interpolation code until depth closes or line ends. +func (f *javascriptLineFilter) consumeInterpolationDepth(depth *int) { + for f.position < len(f.line) { + if f.consumeActiveBlock() { + continue + } + if f.consumeString() { + continue + } + if f.consumeRegexLiteral() { + continue + } + if consumed, stop := f.consumeComment(); consumed { + if stop { + return + } + + continue + } + var done bool + *depth, done = f.consumeInterpolationCode(*depth) + if done { + *depth = 0 + + return + } + } +} + +// consumeInterpolationCode copies expression code and updates interpolation brace depth. +func (f *javascriptLineFilter) consumeInterpolationCode(depth int) (int, bool) { + switch f.line[f.position] { + case '{': + depth++ + f.consumeCodeByte() + + return depth, false + case '}': + depth-- + f.consumeCodeByte() + + return depth, depth == 0 + default: + f.consumeCodeByte() + + return depth, false + } +} + +// consumeComment consumes a JavaScript comment and reports whether it ended the line. +func (f *javascriptLineFilter) consumeComment() (bool, bool) { + if strings.HasPrefix(f.line[f.position:], cStyleDocCommentStart) { + f.startBlockComment(f.mode == RetainDocumentation) + + return true, false + } + if strings.HasPrefix(f.line[f.position:], cStyleBlockCommentStart) { + f.startBlockComment(f.mode == RetainBlock || f.mode == RetainRegular) + + return true, false + } + if strings.HasPrefix(f.line[f.position:], "//") { + f.hadComment = true + if f.mode == RetainInline || f.mode == RetainRegular { + f.result.WriteString(f.line[f.position:]) + } + f.position = len(f.line) + + return true, true + } + + return false, false +} + +// startBlockComment records the active block comment markers and whether to keep them. +func (f *javascriptLineFilter) startBlockComment(keep bool) { + f.hadComment = true + f.state.blockActive = true + f.state.blockKeep = keep + f.state.blockEnd = cStyleBlockCommentEnd +} + +// writeEscapedByte copies an escaped byte pair from a literal. +func (f *javascriptLineFilter) writeEscapedByte() { + f.result.WriteByte(f.line[f.position]) + f.position++ + if f.position < len(f.line) { + f.result.WriteByte(f.line[f.position]) + f.position++ + } +} + +// consumeCodeByte copies one source byte. +func (f *javascriptLineFilter) consumeCodeByte() { + f.result.WriteByte(f.line[f.position]) + f.position++ +} + +// previousSignificantByte returns the last non-space byte in text. +func previousSignificantByte(text string) byte { + for position := len(text) - 1; position >= 0; position-- { + if !isASCIISpace(text[position]) { + return text[position] + } + } + + return 0 +} + +// isASCIISpace reports whether char is an ASCII whitespace byte. +func isASCIISpace(char byte) bool { + return char == ' ' || char == '\t' || char == '\n' || char == '\r' +} + +// isASCIIIdentifierByte reports whether char can appear in a JavaScript identifier or regex flag. +func isASCIIIdentifierByte(char byte) bool { + return char == '_' || + char == '$' || + (char >= 'a' && char <= 'z') || + (char >= 'A' && char <= 'Z') || + (char >= '0' && char <= '9') +} From 3d292f11329648efeb9ee0209da54e0d1d63b925 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Fri, 3 Jul 2026 16:15:47 +0200 Subject: [PATCH 2/5] Improve interpolation comments filtering. --- embedding/commentfilter/filter_test.go | 14 ++++++++ embedding/commentfilter/javascript_filter.go | 35 +++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index 05fa4da..4a262b8 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -321,6 +321,20 @@ var _ = Describe("Comment filter", func() { assertFiltered("sample.ts", RetainNone, lines, expected) }) + + It("should preserve nested template literals inside template interpolations", func() { + lines := []string{ + "const msg = `${items.map(i => `// ${i}`).join()}`;", + "const braces = `${items.map(i => `}`).join()}`; // real comment", + } + + expected := []string{ + "const msg = `${items.map(i => `// ${i}`).join()}`;", + "const braces = `${items.map(i => `}`).join()}`; ", + } + + assertFiltered("sample.ts", RetainNone, lines, expected) + }) }) Describe("C#", func() { diff --git a/embedding/commentfilter/javascript_filter.go b/embedding/commentfilter/javascript_filter.go index 819e98a..5b3006b 100644 --- a/embedding/commentfilter/javascript_filter.go +++ b/embedding/commentfilter/javascript_filter.go @@ -281,12 +281,15 @@ func (f *javascriptLineFilter) consumeRegexFlags() { } } -// consumeInterpolationDepth filters comments inside interpolation code until depth closes or line ends. +// consumeInterpolationDepth filters interpolation code until depth closes or line ends. func (f *javascriptLineFilter) consumeInterpolationDepth(depth *int) { for f.position < len(f.line) { if f.consumeActiveBlock() { continue } + if f.consumeNestedTemplateLiteral() { + continue + } if f.consumeString() { continue } @@ -310,6 +313,36 @@ func (f *javascriptLineFilter) consumeInterpolationDepth(depth *int) { } } +// consumeNestedTemplateLiteral copies a template literal found inside interpolation code. +func (f *javascriptLineFilter) consumeNestedTemplateLiteral() bool { + if f.position >= len(f.line) || f.line[f.position] != '`' { + return false + } + f.consumeCodeByte() + for f.position < len(f.line) { + switch { + case f.line[f.position] == '\\': + f.writeEscapedByte() + case f.line[f.position] == '`': + f.consumeCodeByte() + + return true + case strings.HasPrefix(f.line[f.position:], jsTemplateInterpolationStart): + f.result.WriteString(jsTemplateInterpolationStart) + f.position += len(jsTemplateInterpolationStart) + depth := 1 + f.consumeInterpolationDepth(&depth) + if depth > 0 { + return true + } + default: + f.consumeCodeByte() + } + } + + return true +} + // consumeInterpolationCode copies expression code and updates interpolation brace depth. func (f *javascriptLineFilter) consumeInterpolationCode(depth int) (int, bool) { switch f.line[f.position] { From 0a55d83f5d88da56180e8aeb68b4aa0d207ecfc6 Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Fri, 3 Jul 2026 16:29:05 +0200 Subject: [PATCH 3/5] Improve readability. --- embedding/commentfilter/filter_test.go | 32 ++++++ embedding/commentfilter/javascript_filter.go | 113 +++++++++++++++---- 2 files changed, 120 insertions(+), 25 deletions(-) diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index 4a262b8..bac56c0 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -335,6 +335,38 @@ var _ = Describe("Comment filter", func() { assertFiltered("sample.ts", RetainNone, lines, expected) }) + + It("should preserve multi-line template literal text", func() { + lines := []string{ + "const help = `Keep // marker", + "and /* marker */ text`; // real comment", + } + + expected := []string{ + "const help = `Keep // marker", + "and /* marker */ text`; ", + } + + assertFiltered("sample.ts", RetainNone, lines, expected) + }) + + It("should preserve regex literals after expression-starting keywords", func() { + lines := []string{ + "function parse() { return /\"/; } // real comment", + "case /\"/.source: // real comment", + "const type = typeof /\"/; // real comment", + "const ratio = value++ / 2; // real comment", + } + + expected := []string{ + "function parse() { return /\"/; } ", + "case /\"/.source: ", + "const type = typeof /\"/; ", + "const ratio = value++ / 2; ", + } + + assertFiltered("sample.ts", RetainNone, lines, expected) + }) }) Describe("C#", func() { diff --git a/embedding/commentfilter/javascript_filter.go b/embedding/commentfilter/javascript_filter.go index 5b3006b..5cf32f7 100644 --- a/embedding/commentfilter/javascript_filter.go +++ b/embedding/commentfilter/javascript_filter.go @@ -64,6 +64,24 @@ type javascriptLineFilter struct { hadComment bool } +// commentConsumeResult describes a consumed JavaScript comment. +type commentConsumeResult struct { + // consumed reports whether a recognized comment marker was consumed. + consumed bool + + // stopLine reports whether the consumed comment reaches the end of the source line. + stopLine bool +} + +// interpolationCodeResult describes the effect of one consumed interpolation byte. +type interpolationCodeResult struct { + // depth is the brace depth after consuming the byte at the scanner position. + depth int + + // closed reports whether the consumed byte closed the current interpolation expression. + closed bool +} + // Filter removes or preserves JavaScript and TypeScript comments according to mode. // // Parameters: @@ -114,8 +132,8 @@ func (f *javascriptLineFilter) filterLine() (string, bool) { if f.consumeRegexLiteral() { continue } - if consumed, stop := f.consumeComment(); consumed { - if stop { + if comment := f.consumeComment(); comment.consumed { + if comment.stopLine { break } @@ -262,12 +280,21 @@ func (f *javascriptLineFilter) regexStartsHere() bool { strings.HasPrefix(f.line[f.position:], cStyleBlockCommentStart) { return false } - previous := previousSignificantByte(f.line[:f.position]) - if previous == 0 { + previous := previousSignificantToken(f.line[:f.position]) + if previous == "" { + return true + } + if previous == "++" || previous == "--" { + return false + } + if regexPrecedingKeyword(previous) { return true } + if len(previous) != 1 { + return false + } - return strings.ContainsRune("([{=,:;!&|?+-*~^<>%", rune(previous)) + return strings.ContainsRune("([{=,:;!&|?+-*~^<>%", rune(previous[0])) } // consumeRegexFlags copies identifier characters after a regex literal closing slash. @@ -282,6 +309,9 @@ func (f *javascriptLineFilter) consumeRegexFlags() { } // consumeInterpolationDepth filters interpolation code until depth closes or line ends. +// +// Parameters: +// depth - current brace depth of the interpolation expression; updated in place. func (f *javascriptLineFilter) consumeInterpolationDepth(depth *int) { for f.position < len(f.line) { if f.consumeActiveBlock() { @@ -296,16 +326,16 @@ func (f *javascriptLineFilter) consumeInterpolationDepth(depth *int) { if f.consumeRegexLiteral() { continue } - if consumed, stop := f.consumeComment(); consumed { - if stop { + if comment := f.consumeComment(); comment.consumed { + if comment.stopLine { return } continue } - var done bool - *depth, done = f.consumeInterpolationCode(*depth) - if done { + code := f.consumeInterpolationCode(*depth) + *depth = code.depth + if code.closed { *depth = 0 return @@ -344,36 +374,43 @@ func (f *javascriptLineFilter) consumeNestedTemplateLiteral() bool { } // consumeInterpolationCode copies expression code and updates interpolation brace depth. -func (f *javascriptLineFilter) consumeInterpolationCode(depth int) (int, bool) { +// +// Parameters: +// depth - current brace depth before consuming the byte at the scanner position. +// +// Returns interpolation code result. +func (f *javascriptLineFilter) consumeInterpolationCode(depth int) interpolationCodeResult { switch f.line[f.position] { case '{': depth++ f.consumeCodeByte() - return depth, false + return interpolationCodeResult{depth: depth} case '}': depth-- f.consumeCodeByte() - return depth, depth == 0 + return interpolationCodeResult{depth: depth, closed: depth == 0} default: f.consumeCodeByte() - return depth, false + return interpolationCodeResult{depth: depth} } } -// consumeComment consumes a JavaScript comment and reports whether it ended the line. -func (f *javascriptLineFilter) consumeComment() (bool, bool) { +// consumeComment consumes a JavaScript comment when one starts at the scanner position. +// +// Returns comment consume result. +func (f *javascriptLineFilter) consumeComment() commentConsumeResult { if strings.HasPrefix(f.line[f.position:], cStyleDocCommentStart) { f.startBlockComment(f.mode == RetainDocumentation) - return true, false + return commentConsumeResult{consumed: true} } if strings.HasPrefix(f.line[f.position:], cStyleBlockCommentStart) { f.startBlockComment(f.mode == RetainBlock || f.mode == RetainRegular) - return true, false + return commentConsumeResult{consumed: true} } if strings.HasPrefix(f.line[f.position:], "//") { f.hadComment = true @@ -382,10 +419,10 @@ func (f *javascriptLineFilter) consumeComment() (bool, bool) { } f.position = len(f.line) - return true, true + return commentConsumeResult{consumed: true, stopLine: true} } - return false, false + return commentConsumeResult{} } // startBlockComment records the active block comment markers and whether to keep them. @@ -412,15 +449,41 @@ func (f *javascriptLineFilter) consumeCodeByte() { f.position++ } -// previousSignificantByte returns the last non-space byte in text. -func previousSignificantByte(text string) byte { +// previousSignificantToken returns the last non-space token in text. +func previousSignificantToken(text string) string { for position := len(text) - 1; position >= 0; position-- { - if !isASCIISpace(text[position]) { - return text[position] + if isASCIISpace(text[position]) { + continue + } + if isASCIIIdentifierByte(text[position]) { + end := position + 1 + for position >= 0 && isASCIIIdentifierByte(text[position]) { + position-- + } + + return text[position+1 : end] + } + if position > 0 { + token := text[position-1 : position+1] + if token == "++" || token == "--" { + return token + } } + + return text[position : position+1] } - return 0 + return "" +} + +// regexPrecedingKeyword reports whether keyword can precede a regex literal. +func regexPrecedingKeyword(keyword string) bool { + switch keyword { + case "case", "delete", "return", "throw", "typeof", "void", "yield": + return true + default: + return false + } } // isASCIISpace reports whether char is an ASCII whitespace byte. From 1edecc8650ce397c5e3ce066bdba76e3b677f0ab Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Fri, 3 Jul 2026 16:52:39 +0200 Subject: [PATCH 4/5] Improve tests. --- embedding/commentfilter/filter_test.go | 8 ++++++++ embedding/commentfilter/javascript_filter.go | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index bac56c0..f3d02df 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -355,6 +355,10 @@ var _ = Describe("Comment filter", func() { "function parse() { return /\"/; } // real comment", "case /\"/.source: // real comment", "const type = typeof /\"/; // real comment", + "const match = await /\"/; // real comment", + "const hasValue = name in /\"/; // real comment", + "const isPattern = value instanceof /\"/; // real comment", + "if (missing) {} else /\"/.test(value); // real comment", "const ratio = value++ / 2; // real comment", } @@ -362,6 +366,10 @@ var _ = Describe("Comment filter", func() { "function parse() { return /\"/; } ", "case /\"/.source: ", "const type = typeof /\"/; ", + "const match = await /\"/; ", + "const hasValue = name in /\"/; ", + "const isPattern = value instanceof /\"/; ", + "if (missing) {} else /\"/.test(value); ", "const ratio = value++ / 2; ", } diff --git a/embedding/commentfilter/javascript_filter.go b/embedding/commentfilter/javascript_filter.go index 5cf32f7..2696074 100644 --- a/embedding/commentfilter/javascript_filter.go +++ b/embedding/commentfilter/javascript_filter.go @@ -479,7 +479,8 @@ func previousSignificantToken(text string) string { // regexPrecedingKeyword reports whether keyword can precede a regex literal. func regexPrecedingKeyword(keyword string) bool { switch keyword { - case "case", "delete", "return", "throw", "typeof", "void", "yield": + case "await", "case", "delete", "else", "in", "instanceof", "return", + "throw", "typeof", "void", "yield": return true default: return false From 627bee92875350e83024e47ff5ea7d1a239eb04f Mon Sep 17 00:00:00 2001 From: Vladyslav Kuksiuk Date: Fri, 3 Jul 2026 17:07:20 +0200 Subject: [PATCH 5/5] Improve readability. --- embedding/commentfilter/filter_test.go | 4 +++ embedding/commentfilter/javascript_filter.go | 30 ++++++++++++++------ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/embedding/commentfilter/filter_test.go b/embedding/commentfilter/filter_test.go index f3d02df..c406dd7 100644 --- a/embedding/commentfilter/filter_test.go +++ b/embedding/commentfilter/filter_test.go @@ -326,11 +326,15 @@ var _ = Describe("Comment filter", func() { lines := []string{ "const msg = `${items.map(i => `// ${i}`).join()}`;", "const braces = `${items.map(i => `}`).join()}`; // real comment", + "const multiline = `${items.map(i => `// text", + "still } /* text */ ${i}`).join()}`; // real comment", } expected := []string{ "const msg = `${items.map(i => `// ${i}`).join()}`;", "const braces = `${items.map(i => `}`).join()}`; ", + "const multiline = `${items.map(i => `// text", + "still } /* text */ ${i}`).join()}`; ", } assertFiltered("sample.ts", RetainNone, lines, expected) diff --git a/embedding/commentfilter/javascript_filter.go b/embedding/commentfilter/javascript_filter.go index 2696074..0d620f0 100644 --- a/embedding/commentfilter/javascript_filter.go +++ b/embedding/commentfilter/javascript_filter.go @@ -33,14 +33,14 @@ type javascriptState struct { // blockKeep reports whether the active block comment should be retained. blockKeep bool - // blockEnd contains the closing marker for the active block comment. - blockEnd string - // template reports whether scanning is inside template literal text. template bool // templateInterpolationDepth is the active brace depth of a template interpolation. templateInterpolationDepth int + + // nestedTemplate reports whether interpolation scanning is inside nested template text. + nestedTemplate bool } // javascriptLineFilter filters one JavaScript or TypeScript source line. @@ -151,7 +151,7 @@ func (f *javascriptLineFilter) consumeActiveBlock() bool { return false } f.hadComment = true - end := strings.Index(f.line[f.position:], f.state.blockEnd) + end := strings.Index(f.line[f.position:], cStyleBlockCommentEnd) if end < 0 { if f.state.blockKeep { f.result.WriteString(f.line[f.position:]) @@ -160,13 +160,12 @@ func (f *javascriptLineFilter) consumeActiveBlock() bool { return true } - endPosition := f.position + end + len(f.state.blockEnd) + endPosition := f.position + end + len(cStyleBlockCommentEnd) if f.state.blockKeep { f.result.WriteString(f.line[f.position:endPosition]) } f.position = endPosition f.state.blockActive = false - f.state.blockEnd = "" return true } @@ -345,16 +344,16 @@ func (f *javascriptLineFilter) consumeInterpolationDepth(depth *int) { // consumeNestedTemplateLiteral copies a template literal found inside interpolation code. func (f *javascriptLineFilter) consumeNestedTemplateLiteral() bool { - if f.position >= len(f.line) || f.line[f.position] != '`' { + if !f.startOrResumeNestedTemplateLiteral() { return false } - f.consumeCodeByte() for f.position < len(f.line) { switch { case f.line[f.position] == '\\': f.writeEscapedByte() case f.line[f.position] == '`': f.consumeCodeByte() + f.state.nestedTemplate = false return true case strings.HasPrefix(f.line[f.position:], jsTemplateInterpolationStart): @@ -373,6 +372,20 @@ func (f *javascriptLineFilter) consumeNestedTemplateLiteral() bool { return true } +// startOrResumeNestedTemplateLiteral enters or resumes nested template scanning. +func (f *javascriptLineFilter) startOrResumeNestedTemplateLiteral() bool { + if f.state.nestedTemplate { + return true + } + if f.position >= len(f.line) || f.line[f.position] != '`' { + return false + } + f.state.nestedTemplate = true + f.consumeCodeByte() + + return true +} + // consumeInterpolationCode copies expression code and updates interpolation brace depth. // // Parameters: @@ -430,7 +443,6 @@ func (f *javascriptLineFilter) startBlockComment(keep bool) { f.hadComment = true f.state.blockActive = true f.state.blockKeep = keep - f.state.blockEnd = cStyleBlockCommentEnd } // writeEscapedByte copies an escaped byte pair from a literal.