diff --git a/docs/configuration.md b/docs/configuration.md index 1cce9a1..6dd2254 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -31,6 +31,8 @@ Deep Code 使用 `settings.json` 设置文件进行持久化配置,支持两 | `thinkingEnabled` | boolean | 是否启用思考模式(DeepSeek V4 系列默认启用) | | `reasoningEffort` | string | 推理强度,可选 `"high"` 或 `"max"`(默认 `"max"`) | | `debugLogEnabled` | boolean | 是否启用调试日志输出(默认 `false`) | +| `requestTimeoutMs` | number | 主模型请求超时时间,单位毫秒。`0` 表示不启用超时(默认 `0`) | +| `maxRetries` | number | 主模型请求遇到临时网络错误时的最大重试次数(默认 `2`,最大 `10`) | | `notify` | string | 任务完成通知脚本的完整路径(如 Slack 通知脚本) | | `webSearchTool` | string | 自定义联网搜索脚本的完整路径 | | `mcpServers` | object | MCP 服务器配置(键为服务名,值为 McpServerConfig 对象) | @@ -45,6 +47,8 @@ Deep Code 使用 `settings.json` 设置文件进行持久化配置,支持两 | `THINKING_ENABLED` | string | 是否启用思考模式 | | `REASONING_EFFORT` | string | 推理强度 | | `DEBUG_LOG_ENABLED` | string | 是否启用调试日志输出 | +| `REQUEST_TIMEOUT_MS` | string | 主模型请求超时时间,单位毫秒。`0` 表示不启用超时 | +| `MAX_RETRIES` | string | 主模型请求遇到临时网络错误时的最大重试次数 | | `<其他任意KEY>` | string | 自定义环境变量 | #### `thinkingEnabled` — 思考模式 @@ -130,6 +134,22 @@ MCP(Model Context Protocol)服务器配置。值是键值对,键为服务 设为 `true` 可让程序输出详细的调试日志(默认 `false`),用于排查 API 调用和工具执行的问题。 +#### `requestTimeoutMs` 与 `maxRetries` — 请求超时与重试 + +用于控制主模型 Chat Completion 请求的超时和自动重试: + +```json +{ + "requestTimeoutMs": 300000, + "maxRetries": 2 +} +``` + +- `requestTimeoutMs` 单位为毫秒。默认 `0` 表示不启用请求超时,避免长时间思考或长输出被意外中断。 +- `maxRetries` 默认 `2`,最大 `10`。仅对临时错误重试,例如网络断开、请求超时、HTTP `408`、`409`、`425`、`429` 和 `5xx`。 +- 用户主动中断、权限拒绝、HTTP `400`、`401`、`403`、`404` 等配置或请求错误不会自动重试。 +- 也可以通过环境变量配置:`DEEPCODE_REQUEST_TIMEOUT_MS=300000`、`DEEPCODE_MAX_RETRIES=2`。 + ## 环境变量优先级 环境变量是配置应用程序的常用方式,尤其适用于敏感信息(如 api-key)或可能在不同环境之间更改的设置。 diff --git a/docs/configuration_en.md b/docs/configuration_en.md index fa396f9..586a285 100644 --- a/docs/configuration_en.md +++ b/docs/configuration_en.md @@ -31,6 +31,8 @@ The following are all the top-level fields supported in `settings.json`, along w | `thinkingEnabled` | boolean | Whether to enable thinking mode (enabled by default for DeepSeek V4 series)| | `reasoningEffort` | string | Reasoning intensity, either `"high"` or `"max"` (default `"max"`) | | `debugLogEnabled` | boolean | Enable debug log output (default `false`) | +| `requestTimeoutMs` | number | Main model request timeout in milliseconds. `0` disables the timeout (default `0`) | +| `maxRetries` | number | Maximum retry count for transient main model request failures (default `2`, max `10`) | | `notify` | string | Full path to a task-completion notification script (e.g., Slack notification script) | | `webSearchTool` | string | Full path to a custom web search script | | `mcpServers` | object | MCP server configurations (keys are service names, values are McpServerConfig objects) | @@ -45,6 +47,8 @@ The following are all the top-level fields supported in `settings.json`, along w | `THINKING_ENABLED`| string | Enable thinking mode | | `REASONING_EFFORT`| string | Reasoning intensity | | `DEBUG_LOG_ENABLED`| string| Enable debug log output | +| `REQUEST_TIMEOUT_MS`| string | Main model request timeout in milliseconds. `0` disables the timeout | +| `MAX_RETRIES` | string | Maximum retry count for transient main model request failures | | `` | string | Custom environment variable | #### `thinkingEnabled` — Thinking Mode @@ -129,6 +133,22 @@ For detailed MCP usage instructions, refer to [mcp.md](mcp.md). Set to `true` to enable detailed debug logging (default `false`), useful for troubleshooting API calls and tool execution. +#### `requestTimeoutMs` and `maxRetries` — Request Timeout and Retries + +Controls timeout and automatic retries for main model Chat Completion requests: + +```json +{ + "requestTimeoutMs": 300000, + "maxRetries": 2 +} +``` + +- `requestTimeoutMs` is in milliseconds. The default `0` disables request timeout so long thinking or long outputs are not interrupted unexpectedly. +- `maxRetries` defaults to `2` and is capped at `10`. Retries apply only to transient failures, such as network disconnects, request timeouts, HTTP `408`, `409`, `425`, `429`, and `5xx`. +- User interrupts, permission denials, HTTP `400`, `401`, `403`, `404`, and other request/configuration errors are not retried. +- You can also configure these through environment variables: `DEEPCODE_REQUEST_TIMEOUT_MS=300000`, `DEEPCODE_MAX_RETRIES=2`. + ## Environment Variable Priority Environment variables are a common way to configure applications, especially for sensitive information (such as api-key) or settings that may change between environments. @@ -181,4 +201,4 @@ Applied in the following priority order (lower-numbered overridden by higher-num 2. User-level settings.json: `{"env": {"MCP_GITHUB_PERSONAL_ACCESS_TOKEN": "..."}}` 3. Project-level settings.json: `{"mcpServers":{"github":{"env":{"GITHUB_PERSONAL_ACCESS_TOKEN":"..."}}}}` 4. Project-level settings.json: `{"env": {"MCP_GITHUB_PERSONAL_ACCESS_TOKEN": "..."}}` -5. System environment variable: `DEEPCODE_MCP_GITHUB_PERSONAL_ACCESS_TOKEN=... deepcode` \ No newline at end of file +5. System environment variable: `DEEPCODE_MCP_GITHUB_PERSONAL_ACCESS_TOKEN=... deepcode` diff --git a/src/common/openai-client.ts b/src/common/openai-client.ts index ee3dd66..1160e15 100644 --- a/src/common/openai-client.ts +++ b/src/common/openai-client.ts @@ -26,6 +26,8 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): { thinkingEnabled: boolean; reasoningEffort: "high" | "max"; debugLogEnabled: boolean; + requestTimeoutMs: number; + maxRetries: number; notify?: string; webSearchTool?: string; env: Record; @@ -40,6 +42,8 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): { thinkingEnabled: settings.thinkingEnabled, reasoningEffort: settings.reasoningEffort, debugLogEnabled: settings.debugLogEnabled, + requestTimeoutMs: settings.requestTimeoutMs, + maxRetries: settings.maxRetries, notify: settings.notify, webSearchTool: settings.webSearchTool, env: settings.env, @@ -56,6 +60,8 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): { thinkingEnabled: settings.thinkingEnabled, reasoningEffort: settings.reasoningEffort, debugLogEnabled: settings.debugLogEnabled, + requestTimeoutMs: settings.requestTimeoutMs, + maxRetries: settings.maxRetries, notify: settings.notify, webSearchTool: settings.webSearchTool, env: settings.env, @@ -91,6 +97,8 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): { thinkingEnabled: settings.thinkingEnabled, reasoningEffort: settings.reasoningEffort, debugLogEnabled: settings.debugLogEnabled, + requestTimeoutMs: settings.requestTimeoutMs, + maxRetries: settings.maxRetries, notify: settings.notify, webSearchTool: settings.webSearchTool, env: settings.env, diff --git a/src/session.ts b/src/session.ts index 349c48e..ac6160d 100644 --- a/src/session.ts +++ b/src/session.ts @@ -26,7 +26,13 @@ import { type ToolExecutionHooks, } from "./tools/executor"; import { McpManager } from "./mcp/mcp-manager"; -import type { McpServerConfig, PermissionScope, PermissionSettings } from "./settings"; +import { + DEFAULT_MAX_RETRIES, + DEFAULT_REQUEST_TIMEOUT_MS, + type McpServerConfig, + type PermissionScope, + type PermissionSettings, +} from "./settings"; import { logApiError } from "./common/error-logger"; import { logOpenAIChatCompletionDebug, normalizeDebugError } from "./common/debug-logger"; import { killProcessTree } from "./common/process-tree"; @@ -62,6 +68,8 @@ const DEFAULT_NEW_PROMPT_API_URL = "https://deepcode.vegamo.cn/api/plugin/new"; const NEW_PROMPT_REPORT_TIMEOUT_MS = 3000; const DEFAULT_COMPACT_PROMPT_TOKEN_THRESHOLD = 128 * 1024; const DEEPSEEK_V4_COMPACT_PROMPT_TOKEN_THRESHOLD = 512 * 1024; +const CHAT_RETRY_BASE_DELAY_MS = 250; +const CHAT_RETRY_MAX_DELAY_MS = 4000; type ChatCompletionDebugOptions = { enabled?: boolean; @@ -264,6 +272,8 @@ type SessionManagerOptions = { webSearchTool?: string; mcpServers?: Record; permissions?: Required; + requestTimeoutMs?: number; + maxRetries?: number; }; renderMarkdown: (text: string) => string; onAssistantMessage: (message: SessionMessage, shouldConnect: boolean) => void; @@ -290,6 +300,8 @@ export class SessionManager { webSearchTool?: string; mcpServers?: Record; permissions?: Required; + requestTimeoutMs?: number; + maxRetries?: number; }; private readonly onAssistantMessage: (message: SessionMessage, shouldConnect: boolean) => void; private readonly onSessionEntryUpdated?: (entry: SessionEntry) => void; @@ -418,6 +430,161 @@ export class SessionManager { throw error; } + private getChatRequestControls(): { requestTimeoutMs: number; maxRetries: number } { + const settings = this.getResolvedSettings(); + const requestTimeoutMs = + typeof settings.requestTimeoutMs === "number" + ? Math.max(0, Math.round(settings.requestTimeoutMs)) + : DEFAULT_REQUEST_TIMEOUT_MS; + const maxRetries = + typeof settings.maxRetries === "number" ? Math.max(0, Math.round(settings.maxRetries)) : DEFAULT_MAX_RETRIES; + return { requestTimeoutMs, maxRetries }; + } + + private createAttemptOptions( + options: Record | undefined, + requestTimeoutMs: number + ): { + options?: Record; + cleanup: () => void; + didTimeout: () => boolean; + } { + const parentSignal = options?.signal instanceof AbortSignal ? options.signal : undefined; + if (!parentSignal && requestTimeoutMs <= 0) { + return { + options, + cleanup: () => {}, + didTimeout: () => false, + }; + } + + const controller = new AbortController(); + let timedOut = false; + let timeout: ReturnType | null = null; + + const abortFromParent = () => { + if (!controller.signal.aborted) { + controller.abort(); + } + }; + + if (parentSignal?.aborted) { + controller.abort(); + } else { + parentSignal?.addEventListener("abort", abortFromParent, { once: true }); + } + + if (requestTimeoutMs > 0) { + timeout = setTimeout(() => { + timedOut = true; + if (!controller.signal.aborted) { + controller.abort(); + } + }, requestTimeoutMs); + } + + return { + options: { + ...(options ?? {}), + signal: controller.signal, + }, + cleanup: () => { + if (timeout) { + clearTimeout(timeout); + } + parentSignal?.removeEventListener("abort", abortFromParent); + }, + didTimeout: () => timedOut, + }; + } + + private normalizeChatCompletionError(error: unknown, timedOut: boolean, requestTimeoutMs: number): Error | unknown { + if (!timedOut) { + return error; + } + const timeoutError = new Error(`Request timed out after ${requestTimeoutMs}ms.`); + timeoutError.name = "TimeoutError"; + return timeoutError; + } + + private getErrorStatus(error: unknown): number | null { + if (!error || typeof error !== "object") { + return null; + } + const status = (error as { status?: unknown; statusCode?: unknown }).status; + if (typeof status === "number") { + return status; + } + const statusCode = (error as { statusCode?: unknown }).statusCode; + return typeof statusCode === "number" ? statusCode : null; + } + + private getErrorCode(error: unknown): string { + if (!error || typeof error !== "object") { + return ""; + } + const code = (error as { code?: unknown }).code; + return typeof code === "string" ? code : ""; + } + + private isRetryableChatCompletionError(error: unknown, timedOut: boolean): boolean { + if (timedOut) { + return true; + } + if (this.isAbortLikeError(error)) { + return false; + } + + const status = this.getErrorStatus(error); + if (status != null) { + return status === 408 || status === 409 || status === 425 || status === 429 || status >= 500; + } + + const code = this.getErrorCode(error); + if (["ECONNRESET", "ECONNREFUSED", "ETIMEDOUT", "EAI_AGAIN", "ENOTFOUND"].includes(code)) { + return true; + } + + const name = error instanceof Error ? error.name : ""; + if (/APIConnectionError|APIConnectionTimeoutError|FetchError|TimeoutError/i.test(name)) { + return true; + } + + const message = error instanceof Error ? error.message : String(error); + return /fetch failed|terminated|network|connection|socket|timeout|timed out/i.test(message); + } + + private getChatRetryDelayMs(attemptIndex: number): number { + return Math.min(CHAT_RETRY_BASE_DELAY_MS * 2 ** attemptIndex, CHAT_RETRY_MAX_DELAY_MS); + } + + private async waitForChatRetry(delayMs: number, signal?: AbortSignal): Promise { + this.throwIfAborted(signal); + if (delayMs <= 0) { + return; + } + + await new Promise((resolve, reject) => { + let timer: ReturnType | null = setTimeout(() => { + timer = null; + signal?.removeEventListener("abort", onAbort); + resolve(); + }, delayMs); + + const onAbort = () => { + if (timer) { + clearTimeout(timer); + timer = null; + } + const error = new Error("Request was aborted."); + error.name = "AbortError"; + reject(error); + }; + + signal?.addEventListener("abort", onAbort, { once: true }); + }); + } + private async createChatCompletionStream( client: NonNullable["client"]>, request: Record, @@ -433,6 +600,9 @@ export class SessionManager { const startedAtMs = Date.now(); let estimatedTokens = 0; this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "start", sessionId); + const parentSignal = options?.signal instanceof AbortSignal ? options.signal : undefined; + const { requestTimeoutMs, maxRetries } = this.getChatRequestControls(); + const maxAttempts = maxRetries + 1; const streamRequest = { ...request, @@ -443,212 +613,220 @@ export class SessionManager { }, }; - let response: unknown; - try { - response = await ( - client.chat.completions.create as unknown as ( - body: Record, - options?: Record - ) => Promise - )(streamRequest, options); - } catch (error) { - this.logChatCompletionDebug(debug, { - timestamp: new Date().toISOString(), - location: debug?.location ?? "SessionManager.createChatCompletionStream:create", - requestId, - sessionId, - model: typeof request.model === "string" ? request.model : undefined, - baseURL: debug?.baseURL, - durationMs: Date.now() - startedAtMs, - params: { ...debug?.params, options: summarizeCompletionOptions(options) }, - request: streamRequest, - error: normalizeDebugError(error), - }); - logApiError({ - timestamp: new Date().toISOString(), - location: "SessionManager.createChatCompletionStream:create", - requestId, - sessionId, - model: typeof request.model === "string" ? request.model : undefined, - error: { - name: error instanceof Error ? error.name : "UnknownError", - message: error instanceof Error ? error.message : String(error), - stack: error instanceof Error ? error.stack : undefined, - }, - request: streamRequest, - }); - this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "end", sessionId); - throw error; - } - - if (!response || typeof (response as { [Symbol.asyncIterator]?: unknown })[Symbol.asyncIterator] !== "function") { - this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "end", sessionId); - this.logChatCompletionDebug(debug, { - timestamp: new Date().toISOString(), - location: debug?.location ?? "SessionManager.createChatCompletionStream", - requestId, - sessionId, - model: typeof request.model === "string" ? request.model : undefined, - baseURL: debug?.baseURL, - durationMs: Date.now() - startedAtMs, - params: { ...debug?.params, options: summarizeCompletionOptions(options) }, - request: streamRequest, - response, - }); - return response as { choices?: Array<{ message?: Record }>; usage?: ModelUsage | null }; - } - - let content = ""; - let reasoningContent = ""; - let refusal: string | null = null; - let usage: ModelUsage | null = null; - const responseChunks: unknown[] = []; - const toolCallsByIndex = new Map< - number, - { - id?: string; - type?: string; - function?: { name?: string; arguments?: string }; - } - >(); - - const trackText = (value: unknown) => { - if (typeof value !== "string" || value.length === 0) { - return; - } - estimatedTokens += this.estimateStreamTokens(value); - this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "update", sessionId); - }; - try { - for await (const chunk of response as AsyncIterable>) { - if (debug?.enabled) { - responseChunks.push(chunk); - } - if ("usage" in chunk && chunk.usage != null) { - usage = chunk.usage as ModelUsage; - } + for (let attemptIndex = 0; attemptIndex < maxAttempts; attemptIndex++) { + this.throwIfAborted(parentSignal); + const attemptOptions = this.createAttemptOptions(options, requestTimeoutMs); + const attemptParams = { + ...debug?.params, + attempt: attemptIndex + 1, + maxAttempts, + requestTimeoutMs, + maxRetries, + options: summarizeCompletionOptions(attemptOptions.options), + }; + let response: unknown; + let responseChunks: unknown[] | undefined; + let locationSuffix = "create"; - const choices = Array.isArray(chunk.choices) ? chunk.choices : []; - for (const choice of choices) { - const delta = isUsageRecord(choice) && isUsageRecord(choice.delta) ? choice.delta : null; - if (!delta) { - continue; + try { + response = await ( + client.chat.completions.create as unknown as ( + body: Record, + options?: Record + ) => Promise + )(streamRequest, attemptOptions.options); + + if ( + !response || + typeof (response as { [Symbol.asyncIterator]?: unknown })[Symbol.asyncIterator] !== "function" + ) { + this.logChatCompletionDebug(debug, { + timestamp: new Date().toISOString(), + location: debug?.location ?? "SessionManager.createChatCompletionStream", + requestId, + sessionId, + model: typeof request.model === "string" ? request.model : undefined, + baseURL: debug?.baseURL, + durationMs: Date.now() - startedAtMs, + params: attemptParams, + request: streamRequest, + response, + }); + return response as { choices?: Array<{ message?: Record }>; usage?: ModelUsage | null }; } - const contentDelta = delta.content; - if (typeof contentDelta === "string") { - content += contentDelta; - trackText(contentDelta); - } + let content = ""; + let reasoningContent = ""; + let refusal: string | null = null; + let usage: ModelUsage | null = null; + responseChunks = []; + const toolCallsByIndex = new Map< + number, + { + id?: string; + type?: string; + function?: { name?: string; arguments?: string }; + } + >(); - const reasoningDelta = delta.reasoning_content ?? delta.reasoning; - if (typeof reasoningDelta === "string") { - reasoningContent += reasoningDelta; - trackText(reasoningDelta); - } + const trackText = (value: unknown) => { + if (typeof value !== "string" || value.length === 0) { + return; + } + estimatedTokens += this.estimateStreamTokens(value); + this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "update", sessionId); + }; - if (typeof delta.refusal === "string") { - refusal = `${refusal ?? ""}${delta.refusal}`; - trackText(delta.refusal); - } + locationSuffix = "stream"; + for await (const chunk of response as AsyncIterable>) { + if (debug?.enabled) { + responseChunks.push(chunk); + } + if ("usage" in chunk && chunk.usage != null) { + usage = chunk.usage as ModelUsage; + } - const rawToolCalls = delta.tool_calls; - if (Array.isArray(rawToolCalls)) { - for (const rawToolCall of rawToolCalls) { - if (!isUsageRecord(rawToolCall)) { + const choices = Array.isArray(chunk.choices) ? chunk.choices : []; + for (const choice of choices) { + const delta = isUsageRecord(choice) && isUsageRecord(choice.delta) ? choice.delta : null; + if (!delta) { continue; } - const index = typeof rawToolCall.index === "number" ? rawToolCall.index : toolCallsByIndex.size; - const current = toolCallsByIndex.get(index) ?? {}; - if (typeof rawToolCall.id === "string") { - current.id = rawToolCall.id; + + const contentDelta = delta.content; + if (typeof contentDelta === "string") { + content += contentDelta; + trackText(contentDelta); } - if (typeof rawToolCall.type === "string") { - current.type = rawToolCall.type; + + const reasoningDelta = delta.reasoning_content ?? delta.reasoning; + if (typeof reasoningDelta === "string") { + reasoningContent += reasoningDelta; + trackText(reasoningDelta); } - const rawFunction = isUsageRecord(rawToolCall.function) ? rawToolCall.function : null; - if (rawFunction) { - current.function = current.function ?? {}; - if (typeof rawFunction.name === "string") { - current.function.name = `${current.function.name ?? ""}${rawFunction.name}`; - trackText(rawFunction.name); - } - if (typeof rawFunction.arguments === "string") { - current.function.arguments = `${current.function.arguments ?? ""}${rawFunction.arguments}`; - trackText(rawFunction.arguments); + + if (typeof delta.refusal === "string") { + refusal = `${refusal ?? ""}${delta.refusal}`; + trackText(delta.refusal); + } + + const rawToolCalls = delta.tool_calls; + if (Array.isArray(rawToolCalls)) { + for (const rawToolCall of rawToolCalls) { + if (!isUsageRecord(rawToolCall)) { + continue; + } + const index = typeof rawToolCall.index === "number" ? rawToolCall.index : toolCallsByIndex.size; + const current = toolCallsByIndex.get(index) ?? {}; + if (typeof rawToolCall.id === "string") { + current.id = rawToolCall.id; + } + if (typeof rawToolCall.type === "string") { + current.type = rawToolCall.type; + } + const rawFunction = isUsageRecord(rawToolCall.function) ? rawToolCall.function : null; + if (rawFunction) { + current.function = current.function ?? {}; + if (typeof rawFunction.name === "string") { + current.function.name = `${current.function.name ?? ""}${rawFunction.name}`; + trackText(rawFunction.name); + } + if (typeof rawFunction.arguments === "string") { + current.function.arguments = `${current.function.arguments ?? ""}${rawFunction.arguments}`; + trackText(rawFunction.arguments); + } + } + toolCallsByIndex.set(index, current); } } - toolCallsByIndex.set(index, current); } } + + const toolCalls = Array.from(toolCallsByIndex.entries()) + .sort(([left], [right]) => left - right) + .map(([, toolCall]) => toolCall); + const normalizedToolCalls = this.normalizeLlmToolCalls(toolCalls); + const message: Record = { content }; + if (normalizedToolCalls) { + message.tool_calls = normalizedToolCalls; + } + if (reasoningContent.length > 0) { + message.reasoning_content = reasoningContent; + } + if (refusal != null) { + message.refusal = refusal; + } + + const finalResponse = { + choices: [{ message }], + usage, + }; + this.logChatCompletionDebug(debug, { + timestamp: new Date().toISOString(), + location: debug?.location ?? "SessionManager.createChatCompletionStream", + requestId, + sessionId, + model: typeof request.model === "string" ? request.model : undefined, + baseURL: debug?.baseURL, + durationMs: Date.now() - startedAtMs, + params: attemptParams, + request: streamRequest, + responseChunks, + response: finalResponse, + }); + return finalResponse; + } catch (rawError) { + const timedOut = attemptOptions.didTimeout(); + const error = this.normalizeChatCompletionError(rawError, timedOut, requestTimeoutMs); + const retrying = + attemptIndex < maxRetries && !parentSignal?.aborted && this.isRetryableChatCompletionError(error, timedOut); + const normalizedError = normalizeDebugError(error); + const location = debug?.location ?? `SessionManager.createChatCompletionStream:${locationSuffix}`; + const params = { + ...attemptParams, + timedOut, + retrying, + }; + + this.logChatCompletionDebug(debug, { + timestamp: new Date().toISOString(), + location, + requestId, + sessionId, + model: typeof request.model === "string" ? request.model : undefined, + baseURL: debug?.baseURL, + durationMs: Date.now() - startedAtMs, + params, + request: streamRequest, + responseChunks, + error: normalizedError, + }); + logApiError({ + timestamp: new Date().toISOString(), + location, + requestId, + sessionId, + model: typeof request.model === "string" ? request.model : undefined, + baseURL: debug?.baseURL, + error: normalizedError, + request: streamRequest, + }); + + if (!retrying) { + throw error; + } + + await this.waitForChatRetry(this.getChatRetryDelayMs(attemptIndex), parentSignal); + } finally { + attemptOptions.cleanup(); } } - } catch (error) { - this.logChatCompletionDebug(debug, { - timestamp: new Date().toISOString(), - location: debug?.location ?? "SessionManager.createChatCompletionStream:stream", - requestId, - sessionId, - model: typeof request.model === "string" ? request.model : undefined, - baseURL: debug?.baseURL, - durationMs: Date.now() - startedAtMs, - params: { ...debug?.params, options: summarizeCompletionOptions(options) }, - request: streamRequest, - responseChunks, - error: normalizeDebugError(error), - }); - logApiError({ - timestamp: new Date().toISOString(), - location: "SessionManager.createChatCompletionStream:stream", - requestId, - sessionId, - model: typeof request.model === "string" ? request.model : undefined, - error: { - name: error instanceof Error ? error.name : "UnknownError", - message: error instanceof Error ? error.message : String(error), - stack: error instanceof Error ? error.stack : undefined, - }, - request: streamRequest, - }); - throw error; + + throw new Error("Chat completion request failed before any attempt was made."); } finally { this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "end", sessionId); } - - const toolCalls = Array.from(toolCallsByIndex.entries()) - .sort(([left], [right]) => left - right) - .map(([, toolCall]) => toolCall); - const normalizedToolCalls = this.normalizeLlmToolCalls(toolCalls); - const message: Record = { content }; - if (normalizedToolCalls) { - message.tool_calls = normalizedToolCalls; - } - if (reasoningContent.length > 0) { - message.reasoning_content = reasoningContent; - } - if (refusal != null) { - message.refusal = refusal; - } - - const finalResponse = { - choices: [{ message }], - usage, - }; - this.logChatCompletionDebug(debug, { - timestamp: new Date().toISOString(), - location: debug?.location ?? "SessionManager.createChatCompletionStream", - requestId, - sessionId, - model: typeof request.model === "string" ? request.model : undefined, - baseURL: debug?.baseURL, - durationMs: Date.now() - startedAtMs, - params: { ...debug?.params, options: summarizeCompletionOptions(options) }, - request: streamRequest, - responseChunks, - response: finalResponse, - }); - return finalResponse; } private logChatCompletionDebug( diff --git a/src/settings.ts b/src/settings.ts index b7a7a77..ce5e9b2 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -10,6 +10,8 @@ export type DeepcodingEnv = Record & { THINKING_ENABLED?: string; REASONING_EFFORT?: string; DEBUG_LOG_ENABLED?: string; + REQUEST_TIMEOUT_MS?: string; + MAX_RETRIES?: string; }; export type ReasoningEffort = "high" | "max"; @@ -47,6 +49,8 @@ export type DeepcodingSettings = { thinkingEnabled?: boolean; reasoningEffort?: ReasoningEffort; debugLogEnabled?: boolean; + requestTimeoutMs?: number; + maxRetries?: number; notify?: string; webSearchTool?: string; mcpServers?: Record; @@ -61,6 +65,8 @@ export type ResolvedDeepcodingSettings = { thinkingEnabled: boolean; reasoningEffort: ReasoningEffort; debugLogEnabled: boolean; + requestTimeoutMs: number; + maxRetries: number; notify?: string; webSearchTool?: string; mcpServers?: Record; @@ -101,6 +107,14 @@ function trimString(value: unknown): string { return typeof value === "string" ? value.trim() : ""; } +function parseNonNegativeInteger(value: unknown, max: number): number | undefined { + const raw = typeof value === "number" ? value : typeof value === "string" ? Number(value.trim()) : NaN; + if (!Number.isFinite(raw) || raw < 0) { + return undefined; + } + return Math.min(Math.round(raw), max); +} + const VALID_PERMISSION_SCOPES = new Set([ "read-in-cwd", "read-out-cwd", @@ -313,6 +327,22 @@ export function resolveSettingsSources( parseBoolean(userEnv.DEBUG_LOG_ENABLED) ?? false; + const requestTimeoutMs = + parseNonNegativeInteger(systemEnv.REQUEST_TIMEOUT_MS, MAX_REQUEST_TIMEOUT_MS) ?? + parseNonNegativeInteger(projectSettings?.requestTimeoutMs, MAX_REQUEST_TIMEOUT_MS) ?? + parseNonNegativeInteger(projectEnv.REQUEST_TIMEOUT_MS, MAX_REQUEST_TIMEOUT_MS) ?? + parseNonNegativeInteger(userSettings?.requestTimeoutMs, MAX_REQUEST_TIMEOUT_MS) ?? + parseNonNegativeInteger(userEnv.REQUEST_TIMEOUT_MS, MAX_REQUEST_TIMEOUT_MS) ?? + DEFAULT_REQUEST_TIMEOUT_MS; + + const maxRetries = + parseNonNegativeInteger(systemEnv.MAX_RETRIES, MAX_MAX_RETRIES) ?? + parseNonNegativeInteger(projectSettings?.maxRetries, MAX_MAX_RETRIES) ?? + parseNonNegativeInteger(projectEnv.MAX_RETRIES, MAX_MAX_RETRIES) ?? + parseNonNegativeInteger(userSettings?.maxRetries, MAX_MAX_RETRIES) ?? + parseNonNegativeInteger(userEnv.MAX_RETRIES, MAX_MAX_RETRIES) ?? + DEFAULT_MAX_RETRIES; + const notify = trimString(systemEnv.NOTIFY) || trimString(projectSettings?.notify) || trimString(userSettings?.notify) || ""; const webSearchTool = @@ -329,6 +359,8 @@ export function resolveSettingsSources( thinkingEnabled, reasoningEffort, debugLogEnabled, + requestTimeoutMs, + maxRetries, notify: notify || undefined, webSearchTool: webSearchTool || undefined, mcpServers: mergeMcpServers(userSettings, projectSettings, userEnv, projectEnv, systemEnv), @@ -380,6 +412,11 @@ export function applyModelConfigSelection( export const DEFAULT_MODEL = "deepseek-v4-pro"; export const DEFAULT_BASE_URL = "https://api.deepseek.com"; +export const DEFAULT_REQUEST_TIMEOUT_MS = 0; +export const DEFAULT_MAX_RETRIES = 2; + +const MAX_REQUEST_TIMEOUT_MS = 24 * 60 * 60 * 1000; +const MAX_MAX_RETRIES = 10; // --------------------------------------------------------------------------- // Settings file I/O diff --git a/src/tests/debug-logger.test.ts b/src/tests/debug-logger.test.ts index 7b1aad4..5f796a3 100644 --- a/src/tests/debug-logger.test.ts +++ b/src/tests/debug-logger.test.ts @@ -7,8 +7,12 @@ import { getDebugLogPath, logOpenAIChatCompletionDebug } from "../common/debug-l test("debug logger appends full entries without rotation", () => { const originalHome = process.env.HOME; + const originalUserProfile = process.env.USERPROFILE; const home = fs.mkdtempSync(path.join(os.tmpdir(), "deepcode-debug-log-home-")); process.env.HOME = home; + if (process.platform === "win32") { + process.env.USERPROFILE = home; + } try { for (let index = 0; index < 25; index += 1) { logOpenAIChatCompletionDebug({ @@ -42,5 +46,11 @@ test("debug logger appends full entries without rotation", () => { } else { process.env.HOME = originalHome; } + if (originalUserProfile === undefined) { + delete process.env.USERPROFILE; + } else { + process.env.USERPROFILE = originalUserProfile; + } + fs.rmSync(home, { recursive: true, force: true }); } }); diff --git a/src/tests/session.test.ts b/src/tests/session.test.ts index 6af3cb2..d988e81 100644 --- a/src/tests/session.test.ts +++ b/src/tests/session.test.ts @@ -2209,6 +2209,143 @@ test("SessionManager streams chat completions and counts reasoning progress", as assert.equal(progressEvents[2]?.formattedTokens, "3"); }); +test("SessionManager retries transient chat completion failures", async () => { + const workspace = createTempDir("deepcode-retry-workspace-"); + const home = createTempDir("deepcode-retry-home-"); + setHomeDir(home); + + let attempts = 0; + const signals: Array = []; + const client = { + chat: { + completions: { + create: async (_request: Record, options?: { signal?: AbortSignal }) => { + attempts += 1; + signals.push(options?.signal); + if (attempts === 1) { + throw new Error("fetch failed"); + } + return createChatResponse("recovered", { + prompt_tokens: 2, + completion_tokens: 1, + total_tokens: 3, + }); + }, + }, + }, + }; + + const manager = new SessionManager({ + projectRoot: workspace, + createOpenAIClient: () => ({ + client: client as any, + model: "test-model", + baseURL: "https://api.deepseek.com", + thinkingEnabled: false, + }), + getResolvedSettings: () => ({ model: "test-model", requestTimeoutMs: 1000, maxRetries: 1 }), + renderMarkdown: (text) => text, + onAssistantMessage: () => {}, + }); + + const sessionId = await manager.createSession({ text: "" }); + const assistantMessage = manager.listSessionMessages(sessionId).find((message) => message.role === "assistant"); + + assert.equal(attempts, 2); + assert.equal( + signals.every((signal) => signal instanceof AbortSignal), + true + ); + assert.equal(assistantMessage?.content, "recovered"); + assert.equal(manager.getSession(sessionId)?.status, "completed"); +}); + +test("SessionManager does not retry non-retryable chat completion errors", async () => { + const workspace = createTempDir("deepcode-no-retry-workspace-"); + const home = createTempDir("deepcode-no-retry-home-"); + setHomeDir(home); + + let attempts = 0; + const client = { + chat: { + completions: { + create: async () => { + attempts += 1; + const error = new Error("bad request") as Error & { status?: number }; + error.status = 400; + throw error; + }, + }, + }, + }; + + const manager = new SessionManager({ + projectRoot: workspace, + createOpenAIClient: () => ({ + client: client as any, + model: "test-model", + baseURL: "https://api.deepseek.com", + thinkingEnabled: false, + }), + getResolvedSettings: () => ({ model: "test-model", maxRetries: 2 }), + renderMarkdown: (text) => text, + onAssistantMessage: () => {}, + }); + + const sessionId = await manager.createSession({ text: "" }); + const session = manager.getSession(sessionId); + + assert.equal(attempts, 1); + assert.equal(session?.status, "failed"); + assert.equal(session?.failReason, "bad request"); +}); + +test("SessionManager fails timed-out chat completion requests without treating them as user interrupts", async () => { + const workspace = createTempDir("deepcode-timeout-workspace-"); + const home = createTempDir("deepcode-timeout-home-"); + setHomeDir(home); + + const client = { + chat: { + completions: { + create: async (_request: Record, options?: { signal?: AbortSignal }) => { + return new Promise((_resolve, reject) => { + const abort = () => { + const error = new Error("aborted"); + error.name = "AbortError"; + reject(error); + }; + if (options?.signal?.aborted) { + abort(); + return; + } + options?.signal?.addEventListener("abort", abort, { once: true }); + }); + }, + }, + }, + }; + + const manager = new SessionManager({ + projectRoot: workspace, + createOpenAIClient: () => ({ + client: client as any, + model: "test-model", + baseURL: "https://api.deepseek.com", + thinkingEnabled: false, + }), + getResolvedSettings: () => ({ model: "test-model", requestTimeoutMs: 10, maxRetries: 0 }), + renderMarkdown: (text) => text, + onAssistantMessage: () => {}, + }); + + const sessionId = await manager.createSession({ text: "" }); + const session = manager.getSession(sessionId); + + assert.equal(session?.status, "failed"); + assert.match(session?.failReason ?? "", /Request timed out after 10ms/); +}); + test("SessionManager persists session and user message before skill matching is cancelled", async () => { const workspace = createTempDir("deepcode-skill-abort-workspace-"); const home = createTempDir("deepcode-skill-abort-home-"); diff --git a/src/tests/settings-and-notify.test.ts b/src/tests/settings-and-notify.test.ts index 52f8671..3956d89 100644 --- a/src/tests/settings-and-notify.test.ts +++ b/src/tests/settings-and-notify.test.ts @@ -7,7 +7,13 @@ import { type NotifyContext, type NotifySpawn, } from "../common/notify"; -import { applyModelConfigSelection, resolveSettings, resolveSettingsSources } from "../settings"; +import { + DEFAULT_MAX_RETRIES, + DEFAULT_REQUEST_TIMEOUT_MS, + applyModelConfigSelection, + resolveSettings, + resolveSettingsSources, +} from "../settings"; const TEST_PROCESS_ENV = {}; @@ -83,6 +89,53 @@ test("resolveSettings reads THINKING_ENABLED, REASONING_EFFORT, and DEBUG_LOG_EN assert.equal(resolved.baseURL, "https://default.example.com"); }); +test("resolveSettings reads request timeout and retry controls with documented precedence", () => { + const resolved = resolveSettingsSources( + { + requestTimeoutMs: 1000, + maxRetries: 1, + env: { + REQUEST_TIMEOUT_MS: "2000", + MAX_RETRIES: "2", + }, + }, + { + requestTimeoutMs: 3000, + env: { + MAX_RETRIES: "3", + }, + }, + { + model: "default-model", + baseURL: "https://default.example.com", + }, + { + DEEPCODE_REQUEST_TIMEOUT_MS: "4000", + } + ); + + assert.equal(resolved.requestTimeoutMs, 4000); + assert.equal(resolved.maxRetries, 3); +}); + +test("resolveSettings defaults and clamps request retry controls", () => { + const resolved = resolveSettings( + { + requestTimeoutMs: -1, + maxRetries: 999, + }, + { + model: "default-model", + baseURL: "https://default.example.com", + }, + TEST_PROCESS_ENV + ); + + assert.equal(resolved.requestTimeoutMs, DEFAULT_REQUEST_TIMEOUT_MS); + assert.equal(resolved.maxRetries, 10); + assert.equal(DEFAULT_MAX_RETRIES, 2); +}); + test("resolveSettings ignores removed legacy env.THINKING", () => { const resolved = resolveSettings( { diff --git a/src/tools/executor.ts b/src/tools/executor.ts index 220fc89..dfa7bf8 100644 --- a/src/tools/executor.ts +++ b/src/tools/executor.ts @@ -16,6 +16,8 @@ export type CreateOpenAIClient = () => { thinkingEnabled: boolean; reasoningEffort?: ReasoningEffort; debugLogEnabled?: boolean; + requestTimeoutMs?: number; + maxRetries?: number; notify?: string; webSearchTool?: string; env?: Record;