diff --git a/src/tests/tool-handlers.test.ts b/src/tests/tool-handlers.test.ts index aefef2f..86ee3f8 100644 --- a/src/tests/tool-handlers.test.ts +++ b/src/tests/tool-handlers.test.ts @@ -539,6 +539,7 @@ test("Edit accepts a unique loose-escape match when only escaping differs", asyn const sessionId = "closest-match"; const snippet = await readSnippet(filePath, sessionId, workspace); + let llmCalls = 0; const editResult = await handleEditTool( { snippet_id: snippet.id, @@ -550,19 +551,10 @@ test("Edit accepts a unique loose-escape match when only escaping differs", asyn client: { chat: { completions: { - create: async () => ({ - choices: [ - { - message: { - content: - "" + - "" + - "" + - "", - }, - }, - ], - }), + create: async () => { + llmCalls += 1; + throw new Error("LLM correction should not be called when deterministic correction succeeds."); + }, }, }, } as any, @@ -573,11 +565,12 @@ test("Edit accepts a unique loose-escape match when only escaping differs", asyn ); assert.equal(editResult.ok, true); - assert.equal(editResult.metadata?.matched_via, "llm_escape_correction"); + assert.equal(llmCalls, 0); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); assert.equal(fs.readFileSync(filePath, "utf8"), "params['city_json'] = city\n"); }); -test("Edit accepts a unique loose-escape match for over-escaped unicode sequences", async () => { +test("Edit deterministically corrects a unique loose-escape match for over-escaped unicode sequences", async () => { const workspace = createTempWorkspace(); const filePath = path.join(workspace, "keys.ts"); fs.writeFileSync(filePath, 'const sequence = "\\u001B[13;2~";\n', "utf8"); @@ -592,6 +585,45 @@ test("Edit accepts a unique loose-escape match for over-escaped unicode sequence old_string: 'const sequence = "\\\\u001B[13;2~";', new_string: 'const sequence = "\\\\u001B[13;130u";', }, + createContext(sessionId, workspace, { + createOpenAIClient: () => ({ + client: { + chat: { + completions: { + create: async () => { + llmCalls += 1; + throw new Error("LLM correction should not be called when deterministic correction succeeds."); + }, + }, + }, + } as any, + model: "test-model", + thinkingEnabled: false, + }), + }) + ); + + assert.equal(editResult.ok, true); + assert.equal(llmCalls, 0); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal(fs.readFileSync(filePath, "utf8"), 'const sequence = "\\u001B[13;130u";\n'); +}); + +test("Edit uses LLM correction when mixed escaping ratios make deterministic correction ambiguous", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "mixed-escape.tex"); + fs.writeFileSync(filePath, String.raw`\alpha + "x"` + "\n", "utf8"); + + const sessionId = "mixed-escape-llm-fallback"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + let llmCalls = 0; + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`\\alpha + \"x\"`, + new_string: String.raw`\\beta + \"y\" + \\gamma`, + }, createContext(sessionId, workspace, { createOpenAIClient: () => ({ client: { @@ -606,8 +638,8 @@ test("Edit accepts a unique loose-escape match for over-escaped unicode sequence message: { content: "" + - '' + - '' + + '' + + '' + "", }, }, @@ -626,7 +658,273 @@ test("Edit accepts a unique loose-escape match for over-escaped unicode sequence assert.equal(editResult.ok, true); assert.equal(llmCalls, 1); assert.equal(editResult.metadata?.matched_via, "llm_escape_correction"); - assert.equal(fs.readFileSync(filePath, "utf8"), 'const sequence = "\\u001B[13;130u";\n'); + assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`\beta + "y" + \gamma` + "\n"); +}); + +test("Edit uses LLM correction for ambiguous mixed escaping with JS unicode escapes", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "mixed-unicode.ts"); + fs.writeFileSync(filePath, 'const sequence = "\\u001B[13;2~" + "done";\n', "utf8"); + + const sessionId = "mixed-unicode-llm-fallback"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + let llmCalls = 0; + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: 'const sequence = "\\\\u001B[13;2~" + \\"done\\";', + new_string: 'const sequence = "\\\\u001B[13;130u" + \\"done\\" + "\\\\u001B[0m";', + }, + createContext(sessionId, workspace, { + createOpenAIClient: () => ({ + client: { + chat: { + completions: { + create: async (request: { messages?: Array<{ content?: string }> }) => { + llmCalls += 1; + assert.match(String(request.messages?.[1]?.content ?? ""), /" + + '' + + '' + + "", + }, + }, + ], + }; + }, + }, + }, + } as any, + model: "test-model", + thinkingEnabled: false, + }), + }) + ); + + assert.equal(editResult.ok, true); + assert.equal(llmCalls, 1); + assert.equal(editResult.metadata?.matched_via, "llm_escape_correction"); + assert.equal(fs.readFileSync(filePath, "utf8"), 'const sequence = "\\u001B[13;130u" + "done" + "\\u001B[0m";\n'); +}); + +test("Edit returns an error when deterministic correction is ambiguous and no LLM is available", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "ambiguous-no-llm.tex"); + // Mixed escaping: single-backslash LaTeX command + bare double-quotes + fs.writeFileSync(filePath, String.raw`\alpha + "x"` + "\n", "utf8"); + + const sessionId = "ambiguous-no-llm"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + // LLM over-escaped inconsistently: doubled backslash for \alpha, backslash-escaped double-quote + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`\\alpha + \"x\"`, + new_string: String.raw`\\beta + \"y\" + \\gamma`, + }, + createContext(sessionId, workspace) + ); + + assert.equal(editResult.ok, false); + assert.match(editResult.error ?? "", /escaping/); + // File should be untouched + assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`\alpha + "x"` + "\n"); +}); + +test("Edit deterministically corrects JSON string escaping without calling LLM", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "pattern.json"); + fs.writeFileSync(filePath, ["{", ' "pattern": "\\\\d+",', ' "label": "count"', "}"].join("\n") + "\n", "utf8"); + + const sessionId = "json-deterministic-loose-escape"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + let llmCalls = 0; + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`"pattern": "\\\\d+"`, + new_string: String.raw`"pattern": "\\\\w+"`, + }, + createContext(sessionId, workspace, { + createOpenAIClient: () => ({ + client: { + chat: { + completions: { + create: async () => { + llmCalls += 1; + throw new Error("LLM correction should not be called when deterministic correction succeeds."); + }, + }, + }, + } as any, + model: "test-model", + thinkingEnabled: false, + }), + }) + ); + + assert.equal(editResult.ok, true); + assert.equal(llmCalls, 0); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal( + fs.readFileSync(filePath, "utf8"), + ["{", ' "pattern": "\\\\w+",', ' "label": "count"', "}"].join("\n") + "\n" + ); +}); + +test("Edit uses LLM correction for ambiguous mixed escaping in JSON strings", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "message.json"); + fs.writeFileSync(filePath, ["{", ' "message": "path \\"C:\\\\tmp\\""', "}"].join("\n") + "\n", "utf8"); + + const sessionId = "json-mixed-llm-fallback"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + let llmCalls = 0; + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`"message": "path \\\"C:\\\\tmp\\\""`, + new_string: String.raw`"message": "path \\\"D:\\\\logs\\\" and regex \\\\d+"`, + }, + createContext(sessionId, workspace, { + createOpenAIClient: () => ({ + client: { + chat: { + completions: { + create: async (request: { messages?: Array<{ content?: string }> }) => { + llmCalls += 1; + assert.match(String(request.messages?.[1]?.content ?? ""), /" + + '' + + '' + + "", + }, + }, + ], + }; + }, + }, + }, + } as any, + model: "test-model", + thinkingEnabled: false, + }), + }) + ); + + assert.equal(editResult.ok, true); + assert.equal(llmCalls, 1); + assert.equal(editResult.metadata?.matched_via, "llm_escape_correction"); + assert.equal( + fs.readFileSync(filePath, "utf8"), + ["{", ' "message": "path \\"D:\\\\logs\\" and regex \\\\d+"', "}"].join("\n") + "\n" + ); +}); + +test("Edit corrects newString escaping in loose_escape fallback when LLM correction is unavailable", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "latex.tex"); + // LaTeX file with single-backslash commands + fs.writeFileSync(filePath, String.raw`\alpha + \beta = \gamma` + "\n", "utf8"); + + const sessionId = "loose-escape-newstring-fix"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + // Simulate LLM over-escaping (doubled backslashes), no LLM client available for correction + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`\\alpha + \\beta`, + new_string: String.raw`\\delta + \\epsilon`, + }, + createContext(sessionId, workspace) + ); + + assert.equal(editResult.ok, true); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`\delta + \epsilon = \gamma` + "\n"); +}); + +test("Edit corrects newString escaping in loose_escape fallback for over-escaped LaTeX accent", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "latex2.tex"); + // LaTeX accent command: H\"{o}tel — backslash before double-quote + fs.writeFileSync(filePath, String.raw`H\"{o}tel is nice` + "\n", "utf8"); + + const sessionId = "loose-escape-accent"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + // LLM over-escaped both the backslash AND the quote: H\\\"{o}tel + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`H\\\"{o}tel`, + new_string: String.raw`M\\\"{u}nchen`, + }, + createContext(sessionId, workspace) + ); + + assert.equal(editResult.ok, true); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`M\"{u}nchen is nice` + "\n"); +}); + +test("Edit removes quote escapes from newString in loose_escape fallback when matched text has none", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "quoted.ts"); + fs.writeFileSync(filePath, 'const label = "alpha";\n', "utf8"); + + const sessionId = "loose-escape-quote-zero-ratio"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`const label = \"alpha\";`, + new_string: String.raw`const label = \"beta\";`, + }, + createContext(sessionId, workspace) + ); + + assert.equal(editResult.ok, true); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal(fs.readFileSync(filePath, "utf8"), 'const label = "beta";\n'); +}); + +test("Edit reuses the last loose_escape ratio for extra backslash runs in newString", async () => { + const workspace = createTempWorkspace(); + const filePath = path.join(workspace, "latex-extra-runs.tex"); + fs.writeFileSync(filePath, String.raw`\alpha is here` + "\n", "utf8"); + + const sessionId = "loose-escape-extra-new-runs"; + const snippet = await readSnippet(filePath, sessionId, workspace); + + const editResult = await handleEditTool( + { + snippet_id: snippet.id, + old_string: String.raw`\\alpha`, + new_string: String.raw`\\beta + \\gamma`, + }, + createContext(sessionId, workspace) + ); + + assert.equal(editResult.ok, true); + assert.equal(editResult.metadata?.matched_via, "loose_escape"); + assert.equal(fs.readFileSync(filePath, "utf8"), String.raw`\beta + \gamma is here` + "\n"); }); test("Edit strips accidental read-result tabs after newlines when that creates a unique match", async () => { diff --git a/src/tools/edit-handler.ts b/src/tools/edit-handler.ts index b687c4e..c2dcf96 100644 --- a/src/tools/edit-handler.ts +++ b/src/tools/edit-handler.ts @@ -56,6 +56,14 @@ type CorrectedEditStrings = { newString: string; }; +type EscapeCorrectionResult = + | { + ok: true; + newString: string; + changed: boolean; + } + | { ok: false }; + const editSchema = z.strictObject({ file_path: z.string().optional(), snippet_id: z.string().min(1, "snippet_id is required."), @@ -227,27 +235,46 @@ export async function handleEditTool( if (matches.length === 0) { const looseEscapeMatches = findLooseEscapeMatches(raw, oldString, scope); if (looseEscapeMatches.length === 1 && looseEscapeMatches[0]?.score === 1) { - const correctedStrings = await correctEscapedStringsWithLLM( - raw.slice(scope.startOffset, scope.endOffset), - oldString, - newString, - looseEscapeMatches[0].text, - context - ); - - if (correctedStrings) { - const correctedMatches = findOccurrences(raw, correctedStrings.oldString, scope); - if (correctedMatches.length > 0) { - matches = correctedMatches; - matchedVia = "llm_escape_correction"; - replacementOldString = correctedStrings.oldString; - replacementNewString = correctedStrings.newString; - } - } + const looseEscapeMatch = looseEscapeMatches[0]; + const deterministicCorrection = fixNewStringEscaping(oldString, looseEscapeMatch.text, newString); - if (matches.length === 0) { - matches = [looseEscapeMatches[0]]; + if (deterministicCorrection.ok) { + matches = [looseEscapeMatch]; matchedVia = "loose_escape"; + replacementOldString = looseEscapeMatch.text; + replacementNewString = deterministicCorrection.newString; + } else { + const correctedStrings = await correctEscapedStringsWithLLM( + raw.slice(scope.startOffset, scope.endOffset), + oldString, + newString, + looseEscapeMatch.text, + context + ); + + if (correctedStrings) { + const correctedMatches = findOccurrences(raw, correctedStrings.oldString, scope); + if (correctedMatches.length > 0) { + matches = correctedMatches; + matchedVia = "llm_escape_correction"; + replacementOldString = correctedStrings.oldString; + replacementNewString = correctedStrings.newString; + } + } + + if (matches.length === 0) { + return { + ok: false, + name: "edit", + error: + "old_string escaping doesn't match the file and cannot be corrected " + + "deterministically (inconsistent escaping patterns). " + + "Re-read the file and use exact escaping, or use the Bash tool instead.", + metadata: { + scope: formatScopeMetadata(scope), + }, + }; + } } } } @@ -560,6 +587,125 @@ function stripReadResultLineTabs(value: string): string { return value.replaceAll("\n\t", "\n"); } +type TokenSegment = { type: "slash"; length: number } | { type: "text"; value: string }; + +function fixNewStringEscaping(oldString: string, matchedText: string, newString: string): EscapeCorrectionResult { + if (oldString === matchedText) { + return { ok: true, newString, changed: false }; + } + + const ratios = collectLooseEscapeRatios(oldString, matchedText); + if (!ratios) { + return { ok: false }; + } + + const newTokens = tokenizeLooseEscaping(newString); + const canReuseLastRatio = ratios.length > 0 && ratios.every((ratio) => Math.abs(ratio - ratios[0]) < Number.EPSILON); + + // Apply ratios to newString; reuse the last ratio only when the observed escaping error is uniform. + let result = ""; + let slashRatioIndex = 0; + let lastRatio: number | null = null; + for (const tok of newTokens) { + if (tok.type === "slash") { + const ratio = slashRatioIndex < ratios.length ? ratios[slashRatioIndex] : lastRatio; + if (slashRatioIndex >= ratios.length && !canReuseLastRatio) { + return { ok: false }; + } + if (ratio !== null) { + const correctedCount = Math.max(0, Math.round(tok.length * ratio)); + result += "\\".repeat(correctedCount); + } else { + result += "\\".repeat(tok.length); + } + if (slashRatioIndex < ratios.length) { + lastRatio = ratios[slashRatioIndex]; + } + slashRatioIndex += 1; + } else { + result += tok.value; + } + } + + return { + ok: true, + newString: result, + changed: result !== newString, + }; +} + +function collectLooseEscapeRatios(oldString: string, matchedText: string): number[] | null { + const ratios: number[] = []; + let oldCursor = 0; + let matchedCursor = 0; + + while (oldCursor < oldString.length) { + if (oldString[oldCursor] !== "\\") { + if (matchedCursor >= matchedText.length || matchedText[matchedCursor] !== oldString[oldCursor]) { + return null; + } + oldCursor += 1; + matchedCursor += 1; + continue; + } + + const oldSlashStart = oldCursor; + while (oldCursor < oldString.length && oldString[oldCursor] === "\\") { + oldCursor += 1; + } + const oldSlashCount = oldCursor - oldSlashStart; + + if (oldCursor >= oldString.length) { + const matchedSlashStart = matchedCursor; + while (matchedCursor < matchedText.length && matchedText[matchedCursor] === "\\") { + matchedCursor += 1; + } + const matchedSlashCount = matchedCursor - matchedSlashStart; + if (matchedSlashCount !== oldSlashCount) { + return null; + } + ratios.push(1); + continue; + } + + const anchor = oldString[oldCursor]; + const matchedSlashStart = matchedCursor; + while (matchedCursor < matchedText.length && matchedText[matchedCursor] === "\\") { + matchedCursor += 1; + } + const matchedSlashCount = matchedCursor - matchedSlashStart; + if (matchedCursor >= matchedText.length || matchedText[matchedCursor] !== anchor) { + return null; + } + ratios.push(matchedSlashCount / oldSlashCount); + oldCursor += 1; + matchedCursor += 1; + } + + return matchedCursor === matchedText.length ? ratios : null; +} + +function tokenizeLooseEscaping(value: string): TokenSegment[] { + const segments: TokenSegment[] = []; + let index = 0; + while (index < value.length) { + if (value[index] === "\\") { + const start = index; + while (index < value.length && value[index] === "\\") { + index += 1; + } + segments.push({ type: "slash", length: index - start }); + } else { + const start = index; + while (index < value.length && value[index] !== "\\") { + index += 1; + } + segments.push({ type: "text", value: value.slice(start, index) }); + } + } + return segments; +} + function buildCandidateMetadata( sessionId: string, filePath: string,