From e3ea9a4b5c7535d3ff499ae56bd1127d1e714d52 Mon Sep 17 00:00:00 2001 From: fym998 <61316972+fym998@users.noreply.github.com> Date: Sat, 30 May 2026 12:57:08 +0800 Subject: [PATCH 1/6] fix(edit): correct newString escaping in loose_escape fallback path When the LLM miscounts backslash escapes (common in LaTeX, JSON nesting), the exact match fails and the loose_escape regex recovers the matched position. However the fallback path was writing the LLM's original newString verbatim, which carried the same escaping errors as the old_string. This silently corrupted files by introducing doubled or missing backslashes on every replacement that went through the loose_escape path without a successful llm_escape_correction round-trip. Add fixNewStringEscaping() which tokenizes strings into backslash runs and text segments, aligns old_string with the regex-matched text to compute per-run backslash count ratios, and applies the same ratios to newString. When newString has more backslash runs than old_string, the last ratio is reused (the escaping error is typically uniform). Also update replacementOldString to use the matched text so both sides are consistent, matching the pattern already established by tab_correction. --- src/tools/edit-handler.ts | 96 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/src/tools/edit-handler.ts b/src/tools/edit-handler.ts index b687c4e..f422523 100644 --- a/src/tools/edit-handler.ts +++ b/src/tools/edit-handler.ts @@ -248,6 +248,11 @@ export async function handleEditTool( if (matches.length === 0) { matches = [looseEscapeMatches[0]]; matchedVia = "loose_escape"; + replacementOldString = looseEscapeMatches[0].text; + const correctedNew = fixNewStringEscaping(oldString, looseEscapeMatches[0].text, newString); + if (correctedNew !== null) { + replacementNewString = correctedNew; + } } } } @@ -560,6 +565,97 @@ function stripReadResultLineTabs(value: string): string { return value.replaceAll("\n\t", "\n"); } +type TokenSegment = { type: "slash"; length: number } | { type: "text"; value: string }; + +function fixNewStringEscaping(oldString: string, matchedText: string, newString: string): string | null { + if (oldString === matchedText) { + return null; // no escaping difference to correct + } + + const oldTokens = tokenizeLooseEscaping(oldString); + const matchedTokens = tokenizeLooseEscaping(matchedText); + const newTokens = tokenizeLooseEscaping(newString); + + // Align oldTokens and matchedTokens: text segments must be identical + const ratios: Array = []; + let oi = 0; + let mi = 0; + while (oi < oldTokens.length || mi < matchedTokens.length) { + const oldTok = oi < oldTokens.length ? oldTokens[oi] : null; + const matchedTok = mi < matchedTokens.length ? matchedTokens[mi] : null; + + if (oldTok && oldTok.type === "text" && matchedTok && matchedTok.type === "text") { + if (oldTok.value !== matchedTok.value) { + return null; // alignment broken; text differs more than escaping + } + oi += 1; + mi += 1; + } else if (oldTok && oldTok.type === "slash") { + if (matchedTok && matchedTok.type === "slash") { + ratios.push(matchedTok.length / oldTok.length); + oi += 1; + mi += 1; + } else { + // old has backslashes but matched does not + ratios.push(0); + oi += 1; + // mi stays (matchedTok is a text token or null; will be consumed on next iteration) + } + } else if (matchedTok && matchedTok.type === "slash") { + // matched has backslashes but old does not — should not happen with loose_escape regex + ratios.push(matchedTok.length); // relative to 1 + mi += 1; + } else { + return null; // unexpected token pattern + } + } + + // Apply ratios to newString; reuse last ratio for trailing slash runs + let result = ""; + let ri = 0; + let lastRatio: number | null = null; + for (const tok of newTokens) { + if (tok.type === "slash") { + const ratio = ri < ratios.length ? ratios[ri] : lastRatio; + if (ratio !== null) { + const correctedCount = Math.max(0, Math.round(tok.length * ratio)); + result += "\\".repeat(correctedCount); + } else { + result += "\\".repeat(tok.length); + } + if (ri < ratios.length) { + lastRatio = ratios[ri]; + } + ri += 1; + } else { + result += tok.value; + } + } + + return result === newString ? null : result; +} + +function tokenizeLooseEscaping(value: string): TokenSegment[] { + const segments: TokenSegment[] = []; + let index = 0; + while (index < value.length) { + if (value[index] === "\\") { + const start = index; + while (index < value.length && value[index] === "\\") { + index += 1; + } + segments.push({ type: "slash", length: index - start }); + } else { + const start = index; + while (index < value.length && value[index] !== "\\") { + index += 1; + } + segments.push({ type: "text", value: value.slice(start, index) }); + } + } + return segments; +} + function buildCandidateMetadata( sessionId: string, filePath: string, From 5c7b30047d107036e59e84216e68c46afc08a25b Mon Sep 17 00:00:00 2001 From: fym998 <61316972+fym998@users.noreply.github.com> Date: Sat, 30 May 2026 12:57:40 +0800 Subject: [PATCH 2/6] test(edit): verify newString escaping correction in loose_escape fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two tests that exercise the loose_escape fallback path without an LLM client (so correctEscapedStringsWithLLM is skipped), confirming that the escaping correction is applied to newString: - Over-escaped LaTeX commands: \\alpha → \alpha - Over-escaped LaTeX accent: H\\\"{o}tel → H\"{o}tel (both backslash and quote doubled by LLM) Both cases verify that the resulting file content uses correctly escaped single-backslash LaTeX, not the LLM's original multi-backslash new_string. --- src/tests/tool-handlers.test.ts | 48 +++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/tests/tool-handlers.test.ts b/src/tests/tool-handlers.test.ts index aefef2f..830a2a1 100644 --- a/src/tests/tool-handlers.test.ts +++ b/src/tests/tool-handlers.test.ts @@ -629,6 +629,54 @@ test("Edit accepts a unique loose-escape match for over-escaped unicode sequence assert.equal(fs.readFileSync(filePath, "utf8"), 'const sequence = "\\u001B[13;130u";\n'); }); +test("Edit corrects newString escaping in loose_escape fallback when LLM correction is unavailable", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "latex.tex"); + // LaTeX file with single-backslash commands + fs.writeFileSync(filePath, String.raw`\alpha + \beta = \gamma` + "\n", "utf8"); + + const sessionId = "loose-escape-newstring-fix"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + // Simulate LLM over-escaping (doubled backslashes), no LLM client available for correction + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`\\alpha + \\beta`, + new_string: String.raw`\\delta + \\epsilon`, + }, + createContext(sessionId, workspace) + ); + + assert.equal(editResult.ok, true); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`\delta + \epsilon = \gamma` + "\n"); +}); + +test("Edit corrects newString escaping in loose_escape fallback for over-escaped LaTeX accent", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "latex2.tex"); + // LaTeX accent command: H\"{o}tel — backslash before double-quote + fs.writeFileSync(filePath, String.raw`H\"{o}tel is nice` + "\n", "utf8"); + + const sessionId = "loose-escape-accent"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + // LLM over-escaped both the backslash AND the quote: H\\\"{o}tel + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`H\\\"{o}tel`, + new_string: String.raw`M\\\"{u}nchen`, + }, + createContext(sessionId, workspace) + ); + + assert.equal(editResult.ok, true); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`M\"{u}nchen is nice` + "\n"); +}); + test("Edit strips accidental read-result tabs after newlines when that creates a unique match", async () => { const workspace = createTempWorkspace(); const filePath = path.join(workspace, "tabs.ts"); From 708ce675197c0205b2fad9433a97f29904fce52b Mon Sep 17 00:00:00 2001 From: fym998 <61316972+fym998@users.noreply.github.com> Date: Sat, 30 May 2026 13:09:38 +0800 Subject: [PATCH 3/6] fix(edit): handle zero-count loose escape correction Allow loose_escape newString correction to handle cases where old_string escapes a character but the matched file text has no backslash, such as " matching a literal quote. Keep the correction aligned with the loose_escape regex semantics and preserve reuse of the last ratio for extra new_string backslash runs. Add regression coverage for quote escapes collapsing to zero backslashes and for extra new_string backslash runs reusing the last correction ratio. --- src/tests/tool-handlers.test.ts | 44 +++++++++++++++ src/tools/edit-handler.ts | 94 ++++++++++++++++++++------------- 2 files changed, 101 insertions(+), 37 deletions(-) diff --git a/src/tests/tool-handlers.test.ts b/src/tests/tool-handlers.test.ts index 830a2a1..bb87a69 100644 --- a/src/tests/tool-handlers.test.ts +++ b/src/tests/tool-handlers.test.ts @@ -677,6 +677,50 @@ test("Edit corrects newString escaping in loose_escape fallback for over-escaped assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`M\"{u}nchen is nice` + "\n"); }); +test("Edit removes quote escapes from newString in loose_escape fallback when matched text has none", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "quoted.ts"); + fs.writeFileSync(filePath, 'const label = "alpha";\n', "utf8"); + + const sessionId = "loose-escape-quote-zero-ratio"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`const label = \"alpha\";`, + new_string: String.raw`const label = \"beta\";`, + }, + createContext(sessionId, workspace) + ); + + assert.equal(editResult.ok, true); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal(fs.readFileSync(filePath, "utf8"), 'const label = "beta";\n'); +}); + +test("Edit reuses the last loose_escape ratio for extra backslash runs in newString", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "latex-extra-runs.tex"); + fs.writeFileSync(filePath, String.raw`\alpha is here` + "\n", "utf8"); + + const sessionId = "loose-escape-extra-new-runs"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`\\alpha`, + new_string: String.raw`\\beta + \\gamma`, + }, + createContext(sessionId, workspace) + ); + + assert.equal(editResult.ok, true); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`\beta + \gamma is here` + "\n"); +}); + test("Edit strips accidental read-result tabs after newlines when that creates a unique match", async () => { const workspace = createTempWorkspace(); const filePath = path.join(workspace, "tabs.ts"); diff --git a/src/tools/edit-handler.ts b/src/tools/edit-handler.ts index f422523..ee577f0 100644 --- a/src/tools/edit-handler.ts +++ b/src/tools/edit-handler.ts @@ -572,45 +572,14 @@ function fixNewStringEscaping(oldString: string, matchedText: string, newString: return null; // no escaping difference to correct } - const oldTokens = tokenizeLooseEscaping(oldString); - const matchedTokens = tokenizeLooseEscaping(matchedText); - const newTokens = tokenizeLooseEscaping(newString); - - // Align oldTokens and matchedTokens: text segments must be identical - const ratios: Array = []; - let oi = 0; - let mi = 0; - while (oi < oldTokens.length || mi < matchedTokens.length) { - const oldTok = oi < oldTokens.length ? oldTokens[oi] : null; - const matchedTok = mi < matchedTokens.length ? matchedTokens[mi] : null; - - if (oldTok && oldTok.type === "text" && matchedTok && matchedTok.type === "text") { - if (oldTok.value !== matchedTok.value) { - return null; // alignment broken; text differs more than escaping - } - oi += 1; - mi += 1; - } else if (oldTok && oldTok.type === "slash") { - if (matchedTok && matchedTok.type === "slash") { - ratios.push(matchedTok.length / oldTok.length); - oi += 1; - mi += 1; - } else { - // old has backslashes but matched does not - ratios.push(0); - oi += 1; - // mi stays (matchedTok is a text token or null; will be consumed on next iteration) - } - } else if (matchedTok && matchedTok.type === "slash") { - // matched has backslashes but old does not — should not happen with loose_escape regex - ratios.push(matchedTok.length); // relative to 1 - mi += 1; - } else { - return null; // unexpected token pattern - } + const ratios = collectLooseEscapeRatios(oldString, matchedText); + if (!ratios) { + return null; } - // Apply ratios to newString; reuse last ratio for trailing slash runs + const newTokens = tokenizeLooseEscaping(newString); + + // Apply ratios to newString; reuse last ratio for trailing slash runs. let result = ""; let ri = 0; let lastRatio: number | null = null; @@ -635,6 +604,57 @@ function fixNewStringEscaping(oldString: string, matchedText: string, newString: return result === newString ? null : result; } +function collectLooseEscapeRatios(oldString: string, matchedText: string): number[] | null { + const ratios: number[] = []; + let oi = 0; + let mi = 0; + + while (oi < oldString.length) { + if (oldString[oi] !== "\\") { + if (mi >= matchedText.length || matchedText[mi] !== oldString[oi]) { + return null; + } + oi += 1; + mi += 1; + continue; + } + + const oldSlashStart = oi; + while (oi < oldString.length && oldString[oi] === "\\") { + oi += 1; + } + const oldSlashCount = oi - oldSlashStart; + + if (oi >= oldString.length) { + const matchedSlashStart = mi; + while (mi < matchedText.length && matchedText[mi] === "\\") { + mi += 1; + } + const matchedSlashCount = mi - matchedSlashStart; + if (matchedSlashCount !== oldSlashCount) { + return null; + } + ratios.push(1); + continue; + } + + const anchor = oldString[oi]; + const matchedSlashStart = mi; + while (mi < matchedText.length && matchedText[mi] === "\\") { + mi += 1; + } + const matchedSlashCount = mi - matchedSlashStart; + if (mi >= matchedText.length || matchedText[mi] !== anchor) { + return null; + } + ratios.push(matchedSlashCount / oldSlashCount); + oi += 1; + mi += 1; + } + + return mi === matchedText.length ? ratios : null; +} + function tokenizeLooseEscaping(value: string): TokenSegment[] { const segments: TokenSegment[] = []; let index = 0; From 77a06df60dc7354d87d13c37ac07601e407868f0 Mon Sep 17 00:00:00 2001 From: fym998 <61316972+fym998@users.noreply.github.com> Date: Sat, 30 May 2026 15:08:25 +0800 Subject: [PATCH 4/6] fix(edit): prefer deterministic loose escape correction Run local newString escaping correction before invoking LLM correction for unique loose_escape matches. Only fall back to LLM correction when deterministic ratio inference is ambiguous, such as mixed escaping ratios with extra new_string slash runs. Update edit handler tests to assert deterministic cases avoid LLM calls, and add LLM fallback coverage for mixed escaping in LaTeX, JS unicode escapes, and JSON strings. --- src/tests/tool-handlers.test.ts | 217 +++++++++++++++++++++++++++++--- src/tools/edit-handler.ts | 78 +++++++----- 2 files changed, 249 insertions(+), 46 deletions(-) diff --git a/src/tests/tool-handlers.test.ts b/src/tests/tool-handlers.test.ts index bb87a69..3ebbe8b 100644 --- a/src/tests/tool-handlers.test.ts +++ b/src/tests/tool-handlers.test.ts @@ -539,6 +539,7 @@ test("Edit accepts a unique loose-escape match when only escaping differs", asyn const sessionId = "closest-match"; const snippet = await readSnippet(filePath, sessionId, workspace); + let llmCalls = 0; const editResult = await handleEditTool( { snippet_id: snippet.id, @@ -550,19 +551,10 @@ test("Edit accepts a unique loose-escape match when only escaping differs", asyn client: { chat: { completions: { - create: async () => ({ - choices: [ - { - message: { - content: - "" + - "" + - "" + - "", - }, - }, - ], - }), + create: async () => { + llmCalls += 1; + throw new Error("LLM correction should not be called when deterministic correction succeeds."); + }, }, }, } as any, @@ -573,11 +565,12 @@ test("Edit accepts a unique loose-escape match when only escaping differs", asyn ); assert.equal(editResult.ok, true); - assert.equal(editResult.metadata?.matched_via, "llm_escape_correction"); + assert.equal(llmCalls, 0); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); assert.equal(fs.readFileSync(filePath, "utf8"), "params['city_json'] = city\n"); }); -test("Edit accepts a unique loose-escape match for over-escaped unicode sequences", async () => { +test("Edit deterministically corrects a unique loose-escape match for over-escaped unicode sequences", async () => { const workspace = createTempWorkspace(); const filePath = path.join(workspace, "keys.ts"); fs.writeFileSync(filePath, 'const sequence = "\\u001B[13;2~";\n', "utf8"); @@ -592,6 +585,45 @@ test("Edit accepts a unique loose-escape match for over-escaped unicode sequence old_string: 'const sequence = "\\\\u001B[13;2~";', new_string: 'const sequence = "\\\\u001B[13;130u";', }, + createContext(sessionId, workspace, { + createOpenAIClient: () => ({ + client: { + chat: { + completions: { + create: async () => { + llmCalls += 1; + throw new Error("LLM correction should not be called when deterministic correction succeeds."); + }, + }, + }, + } as any, + model: "test-model", + thinkingEnabled: false, + }), + }) + ); + + assert.equal(editResult.ok, true); + assert.equal(llmCalls, 0); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal(fs.readFileSync(filePath, "utf8"), 'const sequence = "\\u001B[13;130u";\n'); +}); + +test("Edit uses LLM correction when mixed escaping ratios make deterministic correction ambiguous", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "mixed-escape.tex"); + fs.writeFileSync(filePath, String.raw`\alpha + "x"` + "\n", "utf8"); + + const sessionId = "mixed-escape-llm-fallback"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + let llmCalls = 0; + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`\\alpha + \"x\"`, + new_string: String.raw`\\beta + \"y\" + \\gamma`, + }, createContext(sessionId, workspace, { createOpenAIClient: () => ({ client: { @@ -606,8 +638,8 @@ test("Edit accepts a unique loose-escape match for over-escaped unicode sequence message: { content: "" + - '' + - '' + + '' + + '' + "", }, }, @@ -626,7 +658,156 @@ test("Edit accepts a unique loose-escape match for over-escaped unicode sequence assert.equal(editResult.ok, true); assert.equal(llmCalls, 1); assert.equal(editResult.metadata?.matched_via, "llm_escape_correction"); - assert.equal(fs.readFileSync(filePath, "utf8"), 'const sequence = "\\u001B[13;130u";\n'); + assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`\beta + "y" + \gamma` + "\n"); +}); + +test("Edit uses LLM correction for ambiguous mixed escaping with JS unicode escapes", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "mixed-unicode.ts"); + fs.writeFileSync(filePath, 'const sequence = "\\u001B[13;2~" + "done";\n', "utf8"); + + const sessionId = "mixed-unicode-llm-fallback"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + let llmCalls = 0; + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: 'const sequence = "\\\\u001B[13;2~" + \\"done\\";', + new_string: 'const sequence = "\\\\u001B[13;130u" + \\"done\\" + "\\\\u001B[0m";', + }, + createContext(sessionId, workspace, { + createOpenAIClient: () => ({ + client: { + chat: { + completions: { + create: async (request: { messages?: Array<{ content?: string }> }) => { + llmCalls += 1; + assert.match(String(request.messages?.[1]?.content ?? ""), /" + + '' + + '' + + "", + }, + }, + ], + }; + }, + }, + }, + } as any, + model: "test-model", + thinkingEnabled: false, + }), + }) + ); + + assert.equal(editResult.ok, true); + assert.equal(llmCalls, 1); + assert.equal(editResult.metadata?.matched_via, "llm_escape_correction"); + assert.equal(fs.readFileSync(filePath, "utf8"), 'const sequence = "\\u001B[13;130u" + "done" + "\\u001B[0m";\n'); +}); + +test("Edit deterministically corrects JSON string escaping without calling LLM", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "pattern.json"); + fs.writeFileSync(filePath, ["{", ' "pattern": "\\\\d+",', ' "label": "count"', "}"].join("\n") + "\n", "utf8"); + + const sessionId = "json-deterministic-loose-escape"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + let llmCalls = 0; + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`"pattern": "\\\\d+"`, + new_string: String.raw`"pattern": "\\\\w+"`, + }, + createContext(sessionId, workspace, { + createOpenAIClient: () => ({ + client: { + chat: { + completions: { + create: async () => { + llmCalls += 1; + throw new Error("LLM correction should not be called when deterministic correction succeeds."); + }, + }, + }, + } as any, + model: "test-model", + thinkingEnabled: false, + }), + }) + ); + + assert.equal(editResult.ok, true); + assert.equal(llmCalls, 0); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal( + fs.readFileSync(filePath, "utf8"), + ["{", ' "pattern": "\\\\w+",', ' "label": "count"', "}"].join("\n") + "\n" + ); +}); + +test("Edit uses LLM correction for ambiguous mixed escaping in JSON strings", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "message.json"); + fs.writeFileSync(filePath, ["{", ' "message": "path \\"C:\\\\tmp\\""', "}"].join("\n") + "\n", "utf8"); + + const sessionId = "json-mixed-llm-fallback"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + let llmCalls = 0; + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`"message": "path \\\"C:\\\\tmp\\\""`, + new_string: String.raw`"message": "path \\\"D:\\\\logs\\\" and regex \\\\d+"`, + }, + createContext(sessionId, workspace, { + createOpenAIClient: () => ({ + client: { + chat: { + completions: { + create: async (request: { messages?: Array<{ content?: string }> }) => { + llmCalls += 1; + assert.match(String(request.messages?.[1]?.content ?? ""), /" + + '' + + '' + + "", + }, + }, + ], + }; + }, + }, + }, + } as any, + model: "test-model", + thinkingEnabled: false, + }), + }) + ); + + assert.equal(editResult.ok, true); + assert.equal(llmCalls, 1); + assert.equal(editResult.metadata?.matched_via, "llm_escape_correction"); + assert.equal( + fs.readFileSync(filePath, "utf8"), + ["{", ' "message": "path \\"D:\\\\logs\\" and regex \\\\d+"', "}"].join("\n") + "\n" + ); }); test("Edit corrects newString escaping in loose_escape fallback when LLM correction is unavailable", async () => { diff --git a/src/tools/edit-handler.ts b/src/tools/edit-handler.ts index ee577f0..e3a49ca 100644 --- a/src/tools/edit-handler.ts +++ b/src/tools/edit-handler.ts @@ -56,6 +56,14 @@ type CorrectedEditStrings = { newString: string; }; +type EscapeCorrectionResult = + | { + ok: true; + newString: string; + changed: boolean; + } + | { ok: false }; + const editSchema = z.strictObject({ file_path: z.string().optional(), snippet_id: z.string().min(1, "snippet_id is required."), @@ -227,31 +235,37 @@ export async function handleEditTool( if (matches.length === 0) { const looseEscapeMatches = findLooseEscapeMatches(raw, oldString, scope); if (looseEscapeMatches.length === 1 && looseEscapeMatches[0]?.score === 1) { - const correctedStrings = await correctEscapedStringsWithLLM( - raw.slice(scope.startOffset, scope.endOffset), - oldString, - newString, - looseEscapeMatches[0].text, - context - ); - - if (correctedStrings) { - const correctedMatches = findOccurrences(raw, correctedStrings.oldString, scope); - if (correctedMatches.length > 0) { - matches = correctedMatches; - matchedVia = "llm_escape_correction"; - replacementOldString = correctedStrings.oldString; - replacementNewString = correctedStrings.newString; - } - } + const looseEscapeMatch = looseEscapeMatches[0]; + const deterministicCorrection = fixNewStringEscaping(oldString, looseEscapeMatch.text, newString); - if (matches.length === 0) { - matches = [looseEscapeMatches[0]]; + if (deterministicCorrection.ok) { + matches = [looseEscapeMatch]; matchedVia = "loose_escape"; - replacementOldString = looseEscapeMatches[0].text; - const correctedNew = fixNewStringEscaping(oldString, looseEscapeMatches[0].text, newString); - if (correctedNew !== null) { - replacementNewString = correctedNew; + replacementOldString = looseEscapeMatch.text; + replacementNewString = deterministicCorrection.newString; + } else { + const correctedStrings = await correctEscapedStringsWithLLM( + raw.slice(scope.startOffset, scope.endOffset), + oldString, + newString, + looseEscapeMatch.text, + context + ); + + if (correctedStrings) { + const correctedMatches = findOccurrences(raw, correctedStrings.oldString, scope); + if (correctedMatches.length > 0) { + matches = correctedMatches; + matchedVia = "llm_escape_correction"; + replacementOldString = correctedStrings.oldString; + replacementNewString = correctedStrings.newString; + } + } + + if (matches.length === 0) { + matches = [looseEscapeMatch]; + matchedVia = "loose_escape"; + replacementOldString = looseEscapeMatch.text; } } } @@ -567,25 +581,29 @@ function stripReadResultLineTabs(value: string): string { type TokenSegment = { type: "slash"; length: number } | { type: "text"; value: string }; -function fixNewStringEscaping(oldString: string, matchedText: string, newString: string): string | null { +function fixNewStringEscaping(oldString: string, matchedText: string, newString: string): EscapeCorrectionResult { if (oldString === matchedText) { - return null; // no escaping difference to correct + return { ok: true, newString, changed: false }; } const ratios = collectLooseEscapeRatios(oldString, matchedText); if (!ratios) { - return null; + return { ok: false }; } const newTokens = tokenizeLooseEscaping(newString); + const canReuseLastRatio = ratios.length > 0 && ratios.every((ratio) => Math.abs(ratio - ratios[0]) < Number.EPSILON); - // Apply ratios to newString; reuse last ratio for trailing slash runs. + // Apply ratios to newString; reuse the last ratio only when the observed escaping error is uniform. let result = ""; let ri = 0; let lastRatio: number | null = null; for (const tok of newTokens) { if (tok.type === "slash") { const ratio = ri < ratios.length ? ratios[ri] : lastRatio; + if (ri >= ratios.length && !canReuseLastRatio) { + return { ok: false }; + } if (ratio !== null) { const correctedCount = Math.max(0, Math.round(tok.length * ratio)); result += "\\".repeat(correctedCount); @@ -601,7 +619,11 @@ function fixNewStringEscaping(oldString: string, matchedText: string, newString: } } - return result === newString ? null : result; + return { + ok: true, + newString: result, + changed: result !== newString, + }; } function collectLooseEscapeRatios(oldString: string, matchedText: string): number[] | null { From 93b4f638e5b54985c039d7a956d2a8eae8530137 Mon Sep 17 00:00:00 2001 From: fym998 <61316972+fym998@users.noreply.github.com> Date: Sat, 30 May 2026 15:13:54 +0800 Subject: [PATCH 5/6] fix(edit): return error instead of silently writing uncorrected newString when escaping is ambiguous and LLM unavailable When deterministic escape correction fails (mixed/inconsistent ratios) and no LLM is available to disambiguate, return a clear error instead of silently writing the uncorrected (potentially over-escaped) newString to the file. This is safer because the LLM can then re-read the file with exact escaping or fall back to the Bash tool. --- src/tests/tool-handlers.test.ts | 25 +++++++++++++++++++++++++ src/tools/edit-handler.ts | 14 +++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/tests/tool-handlers.test.ts b/src/tests/tool-handlers.test.ts index 3ebbe8b..86ee3f8 100644 --- a/src/tests/tool-handlers.test.ts +++ b/src/tests/tool-handlers.test.ts @@ -713,6 +713,31 @@ test("Edit uses LLM correction for ambiguous mixed escaping with JS unicode esca assert.equal(fs.readFileSync(filePath, "utf8"), 'const sequence = "\\u001B[13;130u" + "done" + "\\u001B[0m";\n'); }); +test("Edit returns an error when deterministic correction is ambiguous and no LLM is available", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "ambiguous-no-llm.tex"); + // Mixed escaping: single-backslash LaTeX command + bare double-quotes + fs.writeFileSync(filePath, String.raw`\alpha + "x"` + "\n", "utf8"); + + const sessionId = "ambiguous-no-llm"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + // LLM over-escaped inconsistently: doubled backslash for \alpha, backslash-escaped double-quote + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`\\alpha + \"x\"`, + new_string: String.raw`\\beta + \"y\" + \\gamma`, + }, + createContext(sessionId, workspace) + ); + + assert.equal(editResult.ok, false); + assert.match(editResult.error ?? "", /escaping/); + // File should be untouched + assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`\alpha + "x"` + "\n"); +}); + test("Edit deterministically corrects JSON string escaping without calling LLM", async () => { const workspace = createTempWorkspace(); const filePath = path.join(workspace, "pattern.json"); diff --git a/src/tools/edit-handler.ts b/src/tools/edit-handler.ts index e3a49ca..047d172 100644 --- a/src/tools/edit-handler.ts +++ b/src/tools/edit-handler.ts @@ -263,9 +263,17 @@ export async function handleEditTool( } if (matches.length === 0) { - matches = [looseEscapeMatch]; - matchedVia = "loose_escape"; - replacementOldString = looseEscapeMatch.text; + return { + ok: false, + name: "edit", + error: + "old_string escaping doesn't match the file and cannot be corrected " + + "deterministically (inconsistent escaping patterns). " + + "Re-read the file and use exact escaping, or use the Bash tool instead.", + metadata: { + scope: formatScopeMetadata(scope), + }, + }; } } } From 6fd4e2ebf31a0f88c6316b2a1ca7e76696c24af2 Mon Sep 17 00:00:00 2001 From: fym998 <61316972+fym998@users.noreply.github.com> Date: Sat, 30 May 2026 16:17:31 +0800 Subject: [PATCH 6/6] refactor(edit): rename escape correction cursors --- src/tools/edit-handler.ts | 62 +++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/tools/edit-handler.ts b/src/tools/edit-handler.ts index 047d172..c2dcf96 100644 --- a/src/tools/edit-handler.ts +++ b/src/tools/edit-handler.ts @@ -604,12 +604,12 @@ function fixNewStringEscaping(oldString: string, matchedText: string, newString: // Apply ratios to newString; reuse the last ratio only when the observed escaping error is uniform. let result = ""; - let ri = 0; + let slashRatioIndex = 0; let lastRatio: number | null = null; for (const tok of newTokens) { if (tok.type === "slash") { - const ratio = ri < ratios.length ? ratios[ri] : lastRatio; - if (ri >= ratios.length && !canReuseLastRatio) { + const ratio = slashRatioIndex < ratios.length ? ratios[slashRatioIndex] : lastRatio; + if (slashRatioIndex >= ratios.length && !canReuseLastRatio) { return { ok: false }; } if (ratio !== null) { @@ -618,10 +618,10 @@ function fixNewStringEscaping(oldString: string, matchedText: string, newString: } else { result += "\\".repeat(tok.length); } - if (ri < ratios.length) { - lastRatio = ratios[ri]; + if (slashRatioIndex < ratios.length) { + lastRatio = ratios[slashRatioIndex]; } - ri += 1; + slashRatioIndex += 1; } else { result += tok.value; } @@ -636,31 +636,31 @@ function fixNewStringEscaping(oldString: string, matchedText: string, newString: function collectLooseEscapeRatios(oldString: string, matchedText: string): number[] | null { const ratios: number[] = []; - let oi = 0; - let mi = 0; + let oldCursor = 0; + let matchedCursor = 0; - while (oi < oldString.length) { - if (oldString[oi] !== "\\") { - if (mi >= matchedText.length || matchedText[mi] !== oldString[oi]) { + while (oldCursor < oldString.length) { + if (oldString[oldCursor] !== "\\") { + if (matchedCursor >= matchedText.length || matchedText[matchedCursor] !== oldString[oldCursor]) { return null; } - oi += 1; - mi += 1; + oldCursor += 1; + matchedCursor += 1; continue; } - const oldSlashStart = oi; - while (oi < oldString.length && oldString[oi] === "\\") { - oi += 1; + const oldSlashStart = oldCursor; + while (oldCursor < oldString.length && oldString[oldCursor] === "\\") { + oldCursor += 1; } - const oldSlashCount = oi - oldSlashStart; + const oldSlashCount = oldCursor - oldSlashStart; - if (oi >= oldString.length) { - const matchedSlashStart = mi; - while (mi < matchedText.length && matchedText[mi] === "\\") { - mi += 1; + if (oldCursor >= oldString.length) { + const matchedSlashStart = matchedCursor; + while (matchedCursor < matchedText.length && matchedText[matchedCursor] === "\\") { + matchedCursor += 1; } - const matchedSlashCount = mi - matchedSlashStart; + const matchedSlashCount = matchedCursor - matchedSlashStart; if (matchedSlashCount !== oldSlashCount) { return null; } @@ -668,21 +668,21 @@ function collectLooseEscapeRatios(oldString: string, matchedText: string): numbe continue; } - const anchor = oldString[oi]; - const matchedSlashStart = mi; - while (mi < matchedText.length && matchedText[mi] === "\\") { - mi += 1; + const anchor = oldString[oldCursor]; + const matchedSlashStart = matchedCursor; + while (matchedCursor < matchedText.length && matchedText[matchedCursor] === "\\") { + matchedCursor += 1; } - const matchedSlashCount = mi - matchedSlashStart; - if (mi >= matchedText.length || matchedText[mi] !== anchor) { + const matchedSlashCount = matchedCursor - matchedSlashStart; + if (matchedCursor >= matchedText.length || matchedText[matchedCursor] !== anchor) { return null; } ratios.push(matchedSlashCount / oldSlashCount); - oi += 1; - mi += 1; + oldCursor += 1; + matchedCursor += 1; } - return mi === matchedText.length ? ratios : null; + return matchedCursor === matchedText.length ? ratios : null; } function tokenizeLooseEscaping(value: string): TokenSegment[] {