Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions embedding/commentfilter/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (
cStyleBlockCommentStart = "/*"
cStyleBlockCommentEnd = "*/"
cStyleDocCommentStart = "/**"
javaTextBlockDelimiter = "\"\"\""
jsQuoteChars = "\"'`"
)

Expand Down Expand Up @@ -98,6 +99,7 @@ var javaSyntax = CommentMarker{
Documentation: DocumentationMarker{
Block: []BlockMarker{{Start: cStyleDocCommentStart, End: cStyleBlockCommentEnd}},
},
TextBlocks: []string{javaTextBlockDelimiter},
QuoteChars: "\"'",
}

Expand Down
43 changes: 43 additions & 0 deletions embedding/commentfilter/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,49 @@ var _ = Describe("Comment filter", func() {

assertFiltered("Api.java", RetainRegular, lines, expected)
})

It("should strip comments without treating text block content as comments", func() {
lines := []string{
"// header comment",
"String help = \"\"\"",
" Keep this // text.",
" Keep this /* text */ too.",
" \"\"\";",
"String value = \"not a // comment\"; // inline comment",
}

expected := []string{
"String help = \"\"\"",
" Keep this // text.",
" Keep this /* text */ too.",
" \"\"\";",
"String value = \"not a // comment\"; ",
}

assertFiltered("Api.java", RetainNone, lines, expected)
})

It("should not close text blocks on escaped triple quotes", func() {
lines := []string{
"String help = \"\"\"",
` Quote: \"""`,
` Escaped quote: \"`,
" Keep this // text.",
" \"\"\";",
"String value = \"kept\"; // real comment",
}

expected := []string{
"String help = \"\"\"",
` Quote: \"""`,
` Escaped quote: \"`,
" Keep this // text.",
" \"\"\";",
"String value = \"kept\"; ",
}

assertFiltered("Api.java", RetainNone, lines, expected)
})
})

Describe("Kotlin", func() {
Expand Down
81 changes: 77 additions & 4 deletions embedding/commentfilter/marker_comment_filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ type CommentMarker struct {
// Documentation contains API documentation comment markers.
Documentation DocumentationMarker

// TextBlocks contains delimiters that open and close multi-line text literals.
TextBlocks []string

// QuoteChars contains characters that open and close quoted strings.
QuoteChars string
}
Expand All @@ -59,7 +62,7 @@ type MarkerCommentFilter struct {
Syntax CommentMarker
}

// blockState tracks an active block comment across source lines.
// blockState tracks active multi-line lexical constructs across source lines.
type blockState struct {
// active reports whether scanning is inside a block comment.
active bool
Expand All @@ -69,6 +72,12 @@ type blockState struct {

// keep reports whether the active comment should be retained.
keep bool

// textBlockActive reports whether scanning is inside a text block.
textBlockActive bool

// textBlockDelimiter contains the marker that closes the active text block.
textBlockDelimiter string
}

// markerLineFilter tracks lexical comment filtering state for one source line.
Expand All @@ -82,7 +91,7 @@ type markerLineFilter struct {
// mode selects which comments to retain.
mode Mode

// state tracks block comments across lines.
// state tracks multi-line lexical constructs across lines.
state *blockState

// result accumulates the filtered source line.
Expand Down Expand Up @@ -138,6 +147,12 @@ func (f *markerLineFilter) filterLine() (string, bool) {
if f.consumeActiveBlock() {
continue
}
if f.consumeActiveTextBlock() {
continue
}
if f.consumeTextBlockStart() {
continue
}
if f.consumeQuotedSegment() {
continue
}
Expand Down Expand Up @@ -179,6 +194,57 @@ func (f *markerLineFilter) consumeActiveBlock() bool {
return true
}

// consumeActiveTextBlock copies text block content until the closing delimiter.
func (f *markerLineFilter) consumeActiveTextBlock() bool {
if !f.state.textBlockActive {
return false
}
endPosition, found := textBlockEnd(f.line, f.position, f.state.textBlockDelimiter)
if !found {
f.result.WriteString(f.line[f.position:])
f.position = len(f.line)

return true
}
f.result.WriteString(f.line[f.position:endPosition])
f.position = endPosition
f.state.textBlockActive = false
f.state.textBlockDelimiter = ""

return true
}

// textBlockEnd returns the end offset of a text block close delimiter.
func textBlockEnd(line string, position int, delimiter string) (int, bool) {
for cursor := position; cursor < len(line); {
if line[cursor] == '\\' {
cursor += 2

continue
}
if strings.HasPrefix(line[cursor:], delimiter) {
return cursor + len(delimiter), true
}
cursor++
}

return len(line), false
}

// consumeTextBlockStart starts a configured text block literal.
func (f *markerLineFilter) consumeTextBlockStart() bool {
delimiter, found := prefixFrom(f.line, f.position, f.filter.Syntax.TextBlocks)
if !found {
return false
}
f.result.WriteString(delimiter)
f.position += len(delimiter)
f.state.textBlockActive = true
f.state.textBlockDelimiter = delimiter

return true
}

// consumeQuotedSegment copies a quoted segment without scanning comment markers inside it.
func (f *markerLineFilter) consumeQuotedSegment() bool {
quoteEnd := quotedSegmentEnd(f.line, f.position, f.filter.Syntax.QuoteChars)
Expand Down Expand Up @@ -264,13 +330,20 @@ func (f *markerLineFilter) consumeCodeByte() {

// prefixAt reports whether one of the given prefixes starts at the position.
func prefixAt(line string, position int, prefixes []string) bool {
_, found := prefixFrom(line, position, prefixes)

return found
}

// prefixFrom returns the prefix starting at position when one exists.
func prefixFrom(line string, position int, prefixes []string) (string, bool) {
for _, prefix := range prefixes {
if strings.HasPrefix(line[position:], prefix) {
return true
return prefix, true
}
}

return false
return "", false
}

// blockAt reports whether one of the given block markers starts at the position.
Expand Down
Loading