diff --git a/docs/configuration.md b/docs/configuration.md
index 1cce9a1..6dd2254 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -31,6 +31,8 @@ Deep Code 使用 `settings.json` 设置文件进行持久化配置，支持两
 | `thinkingEnabled`    | boolean   | 是否启用思考模式（DeepSeek V4 系列默认启用）                         |
 | `reasoningEffort`    | string    | 推理强度，可选 `"high"` 或 `"max"`（默认 `"max"`）                  |
 | `debugLogEnabled`    | boolean   | 是否启用调试日志输出（默认 `false`）                                 |
+| `requestTimeoutMs`   | number    | 主模型请求超时时间，单位毫秒。`0` 表示不启用超时（默认 `0`）           |
+| `maxRetries`         | number    | 主模型请求遇到临时网络错误时的最大重试次数（默认 `2`，最大 `10`）      |
 | `notify`             | string    | 任务完成通知脚本的完整路径（如 Slack 通知脚本）                      |
 | `webSearchTool`      | string    | 自定义联网搜索脚本的完整路径                                         |
 | `mcpServers`         | object    | MCP 服务器配置（键为服务名，值为 McpServerConfig 对象）              |
@@ -45,6 +47,8 @@ Deep Code 使用 `settings.json` 设置文件进行持久化配置，支持两
 | `THINKING_ENABLED`  | string | 是否启用思考模式                                         |
 | `REASONING_EFFORT`  | string | 推理强度                                                |
 | `DEBUG_LOG_ENABLED`  | string | 是否启用调试日志输出                                     |
+| `REQUEST_TIMEOUT_MS` | string | 主模型请求超时时间，单位毫秒。`0` 表示不启用超时          |
+| `MAX_RETRIES`        | string | 主模型请求遇到临时网络错误时的最大重试次数                |
 | `<其他任意KEY>` | string | 自定义环境变量 |
 
 #### `thinkingEnabled` — 思考模式
@@ -130,6 +134,22 @@ MCP（Model Context Protocol）服务器配置。值是键值对，键为服务
 
 设为 `true` 可让程序输出详细的调试日志（默认 `false`），用于排查 API 调用和工具执行的问题。
 
+#### `requestTimeoutMs` 与 `maxRetries` — 请求超时与重试
+
+用于控制主模型 Chat Completion 请求的超时和自动重试：
+
+```json
+{
+  "requestTimeoutMs": 300000,
+  "maxRetries": 2
+}
+```
+
+- `requestTimeoutMs` 单位为毫秒。默认 `0` 表示不启用请求超时，避免长时间思考或长输出被意外中断。
+- `maxRetries` 默认 `2`，最大 `10`。仅对临时错误重试，例如网络断开、请求超时、HTTP `408`、`409`、`425`、`429` 和 `5xx`。
+- 用户主动中断、权限拒绝、HTTP `400`、`401`、`403`、`404` 等配置或请求错误不会自动重试。
+- 也可以通过环境变量配置：`DEEPCODE_REQUEST_TIMEOUT_MS=300000`、`DEEPCODE_MAX_RETRIES=2`。
+
 ## 环境变量优先级
 
 环境变量是配置应用程序的常用方式，尤其适用于敏感信息（如 api-key）或可能在不同环境之间更改的设置。
diff --git a/docs/configuration_en.md b/docs/configuration_en.md
index fa396f9..586a285 100644
--- a/docs/configuration_en.md
+++ b/docs/configuration_en.md
@@ -31,6 +31,8 @@ The following are all the top-level fields supported in `settings.json`, along w
 | `thinkingEnabled`  | boolean | Whether to enable thinking mode (enabled by default for DeepSeek V4 series)|
 | `reasoningEffort`  | string  | Reasoning intensity, either `"high"` or `"max"` (default `"max"`)          |
 | `debugLogEnabled`  | boolean | Enable debug log output (default `false`)                                   |
+| `requestTimeoutMs` | number  | Main model request timeout in milliseconds. `0` disables the timeout (default `0`) |
+| `maxRetries`       | number  | Maximum retry count for transient main model request failures (default `2`, max `10`) |
 | `notify`           | string  | Full path to a task-completion notification script (e.g., Slack notification script) |
 | `webSearchTool`    | string  | Full path to a custom web search script                                     |
 | `mcpServers`       | object  | MCP server configurations (keys are service names, values are McpServerConfig objects) |
@@ -45,6 +47,8 @@ The following are all the top-level fields supported in `settings.json`, along w
 | `THINKING_ENABLED`| string | Enable thinking mode                                            |
 | `REASONING_EFFORT`| string | Reasoning intensity                                             |
 | `DEBUG_LOG_ENABLED`| string| Enable debug log output                                         |
+| `REQUEST_TIMEOUT_MS`| string | Main model request timeout in milliseconds. `0` disables the timeout |
+| `MAX_RETRIES`     | string | Maximum retry count for transient main model request failures   |
 | `<any other KEY>` | string | Custom environment variable                                     |
 
 #### `thinkingEnabled` — Thinking Mode
@@ -129,6 +133,22 @@ For detailed MCP usage instructions, refer to [mcp.md](mcp.md).
 
 Set to `true` to enable detailed debug logging (default `false`), useful for troubleshooting API calls and tool execution.
 
+#### `requestTimeoutMs` and `maxRetries` — Request Timeout and Retries
+
+Controls timeout and automatic retries for main model Chat Completion requests:
+
+```json
+{
+  "requestTimeoutMs": 300000,
+  "maxRetries": 2
+}
+```
+
+- `requestTimeoutMs` is in milliseconds. The default `0` disables request timeout so long thinking or long outputs are not interrupted unexpectedly.
+- `maxRetries` defaults to `2` and is capped at `10`. Retries apply only to transient failures, such as network disconnects, request timeouts, HTTP `408`, `409`, `425`, `429`, and `5xx`.
+- User interrupts, permission denials, HTTP `400`, `401`, `403`, `404`, and other request/configuration errors are not retried.
+- You can also configure these through environment variables: `DEEPCODE_REQUEST_TIMEOUT_MS=300000`, `DEEPCODE_MAX_RETRIES=2`.
+
 ## Environment Variable Priority
 
 Environment variables are a common way to configure applications, especially for sensitive information (such as api-key) or settings that may change between environments.
@@ -181,4 +201,4 @@ Applied in the following priority order (lower-numbered overridden by higher-num
 2. User-level settings.json: `{"env": {"MCP_GITHUB_PERSONAL_ACCESS_TOKEN": "..."}}`
 3. Project-level settings.json: `{"mcpServers":{"github":{"env":{"GITHUB_PERSONAL_ACCESS_TOKEN":"..."}}}}`
 4. Project-level settings.json: `{"env": {"MCP_GITHUB_PERSONAL_ACCESS_TOKEN": "..."}}`
-5. System environment variable: `DEEPCODE_MCP_GITHUB_PERSONAL_ACCESS_TOKEN=... deepcode`
\ No newline at end of file
+5. System environment variable: `DEEPCODE_MCP_GITHUB_PERSONAL_ACCESS_TOKEN=... deepcode`
diff --git a/src/common/openai-client.ts b/src/common/openai-client.ts
index ee3dd66..1160e15 100644
--- a/src/common/openai-client.ts
+++ b/src/common/openai-client.ts
@@ -26,6 +26,8 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): {
   thinkingEnabled: boolean;
   reasoningEffort: "high" | "max";
   debugLogEnabled: boolean;
+  requestTimeoutMs: number;
+  maxRetries: number;
   notify?: string;
   webSearchTool?: string;
   env: Record<string, string>;
@@ -40,6 +42,8 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): {
       thinkingEnabled: settings.thinkingEnabled,
       reasoningEffort: settings.reasoningEffort,
       debugLogEnabled: settings.debugLogEnabled,
+      requestTimeoutMs: settings.requestTimeoutMs,
+      maxRetries: settings.maxRetries,
       notify: settings.notify,
       webSearchTool: settings.webSearchTool,
       env: settings.env,
@@ -56,6 +60,8 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): {
       thinkingEnabled: settings.thinkingEnabled,
       reasoningEffort: settings.reasoningEffort,
       debugLogEnabled: settings.debugLogEnabled,
+      requestTimeoutMs: settings.requestTimeoutMs,
+      maxRetries: settings.maxRetries,
       notify: settings.notify,
       webSearchTool: settings.webSearchTool,
       env: settings.env,
@@ -91,6 +97,8 @@ export function createOpenAIClient(projectRoot: string = process.cwd()): {
     thinkingEnabled: settings.thinkingEnabled,
     reasoningEffort: settings.reasoningEffort,
     debugLogEnabled: settings.debugLogEnabled,
+    requestTimeoutMs: settings.requestTimeoutMs,
+    maxRetries: settings.maxRetries,
     notify: settings.notify,
     webSearchTool: settings.webSearchTool,
     env: settings.env,
diff --git a/src/session.ts b/src/session.ts
index 349c48e..ac6160d 100644
--- a/src/session.ts
+++ b/src/session.ts
@@ -26,7 +26,13 @@ import {
   type ToolExecutionHooks,
 } from "./tools/executor";
 import { McpManager } from "./mcp/mcp-manager";
-import type { McpServerConfig, PermissionScope, PermissionSettings } from "./settings";
+import {
+  DEFAULT_MAX_RETRIES,
+  DEFAULT_REQUEST_TIMEOUT_MS,
+  type McpServerConfig,
+  type PermissionScope,
+  type PermissionSettings,
+} from "./settings";
 import { logApiError } from "./common/error-logger";
 import { logOpenAIChatCompletionDebug, normalizeDebugError } from "./common/debug-logger";
 import { killProcessTree } from "./common/process-tree";
@@ -62,6 +68,8 @@ const DEFAULT_NEW_PROMPT_API_URL = "https://deepcode.vegamo.cn/api/plugin/new";
 const NEW_PROMPT_REPORT_TIMEOUT_MS = 3000;
 const DEFAULT_COMPACT_PROMPT_TOKEN_THRESHOLD = 128 * 1024;
 const DEEPSEEK_V4_COMPACT_PROMPT_TOKEN_THRESHOLD = 512 * 1024;
+const CHAT_RETRY_BASE_DELAY_MS = 250;
+const CHAT_RETRY_MAX_DELAY_MS = 4000;
 
 type ChatCompletionDebugOptions = {
   enabled?: boolean;
@@ -264,6 +272,8 @@ type SessionManagerOptions = {
     webSearchTool?: string;
     mcpServers?: Record<string, McpServerConfig>;
     permissions?: Required<PermissionSettings>;
+    requestTimeoutMs?: number;
+    maxRetries?: number;
   };
   renderMarkdown: (text: string) => string;
   onAssistantMessage: (message: SessionMessage, shouldConnect: boolean) => void;
@@ -290,6 +300,8 @@ export class SessionManager {
     webSearchTool?: string;
     mcpServers?: Record<string, McpServerConfig>;
     permissions?: Required<PermissionSettings>;
+    requestTimeoutMs?: number;
+    maxRetries?: number;
   };
   private readonly onAssistantMessage: (message: SessionMessage, shouldConnect: boolean) => void;
   private readonly onSessionEntryUpdated?: (entry: SessionEntry) => void;
@@ -418,6 +430,161 @@ export class SessionManager {
     throw error;
   }
 
+  private getChatRequestControls(): { requestTimeoutMs: number; maxRetries: number } {
+    const settings = this.getResolvedSettings();
+    const requestTimeoutMs =
+      typeof settings.requestTimeoutMs === "number"
+        ? Math.max(0, Math.round(settings.requestTimeoutMs))
+        : DEFAULT_REQUEST_TIMEOUT_MS;
+    const maxRetries =
+      typeof settings.maxRetries === "number" ? Math.max(0, Math.round(settings.maxRetries)) : DEFAULT_MAX_RETRIES;
+    return { requestTimeoutMs, maxRetries };
+  }
+
+  private createAttemptOptions(
+    options: Record<string, unknown> | undefined,
+    requestTimeoutMs: number
+  ): {
+    options?: Record<string, unknown>;
+    cleanup: () => void;
+    didTimeout: () => boolean;
+  } {
+    const parentSignal = options?.signal instanceof AbortSignal ? options.signal : undefined;
+    if (!parentSignal && requestTimeoutMs <= 0) {
+      return {
+        options,
+        cleanup: () => {},
+        didTimeout: () => false,
+      };
+    }
+
+    const controller = new AbortController();
+    let timedOut = false;
+    let timeout: ReturnType<typeof setTimeout> | null = null;
+
+    const abortFromParent = () => {
+      if (!controller.signal.aborted) {
+        controller.abort();
+      }
+    };
+
+    if (parentSignal?.aborted) {
+      controller.abort();
+    } else {
+      parentSignal?.addEventListener("abort", abortFromParent, { once: true });
+    }
+
+    if (requestTimeoutMs > 0) {
+      timeout = setTimeout(() => {
+        timedOut = true;
+        if (!controller.signal.aborted) {
+          controller.abort();
+        }
+      }, requestTimeoutMs);
+    }
+
+    return {
+      options: {
+        ...(options ?? {}),
+        signal: controller.signal,
+      },
+      cleanup: () => {
+        if (timeout) {
+          clearTimeout(timeout);
+        }
+        parentSignal?.removeEventListener("abort", abortFromParent);
+      },
+      didTimeout: () => timedOut,
+    };
+  }
+
+  private normalizeChatCompletionError(error: unknown, timedOut: boolean, requestTimeoutMs: number): Error | unknown {
+    if (!timedOut) {
+      return error;
+    }
+    const timeoutError = new Error(`Request timed out after ${requestTimeoutMs}ms.`);
+    timeoutError.name = "TimeoutError";
+    return timeoutError;
+  }
+
+  private getErrorStatus(error: unknown): number | null {
+    if (!error || typeof error !== "object") {
+      return null;
+    }
+    const status = (error as { status?: unknown; statusCode?: unknown }).status;
+    if (typeof status === "number") {
+      return status;
+    }
+    const statusCode = (error as { statusCode?: unknown }).statusCode;
+    return typeof statusCode === "number" ? statusCode : null;
+  }
+
+  private getErrorCode(error: unknown): string {
+    if (!error || typeof error !== "object") {
+      return "";
+    }
+    const code = (error as { code?: unknown }).code;
+    return typeof code === "string" ? code : "";
+  }
+
+  private isRetryableChatCompletionError(error: unknown, timedOut: boolean): boolean {
+    if (timedOut) {
+      return true;
+    }
+    if (this.isAbortLikeError(error)) {
+      return false;
+    }
+
+    const status = this.getErrorStatus(error);
+    if (status != null) {
+      return status === 408 || status === 409 || status === 425 || status === 429 || status >= 500;
+    }
+
+    const code = this.getErrorCode(error);
+    if (["ECONNRESET", "ECONNREFUSED", "ETIMEDOUT", "EAI_AGAIN", "ENOTFOUND"].includes(code)) {
+      return true;
+    }
+
+    const name = error instanceof Error ? error.name : "";
+    if (/APIConnectionError|APIConnectionTimeoutError|FetchError|TimeoutError/i.test(name)) {
+      return true;
+    }
+
+    const message = error instanceof Error ? error.message : String(error);
+    return /fetch failed|terminated|network|connection|socket|timeout|timed out/i.test(message);
+  }
+
+  private getChatRetryDelayMs(attemptIndex: number): number {
+    return Math.min(CHAT_RETRY_BASE_DELAY_MS * 2 ** attemptIndex, CHAT_RETRY_MAX_DELAY_MS);
+  }
+
+  private async waitForChatRetry(delayMs: number, signal?: AbortSignal): Promise<void> {
+    this.throwIfAborted(signal);
+    if (delayMs <= 0) {
+      return;
+    }
+
+    await new Promise<void>((resolve, reject) => {
+      let timer: ReturnType<typeof setTimeout> | null = setTimeout(() => {
+        timer = null;
+        signal?.removeEventListener("abort", onAbort);
+        resolve();
+      }, delayMs);
+
+      const onAbort = () => {
+        if (timer) {
+          clearTimeout(timer);
+          timer = null;
+        }
+        const error = new Error("Request was aborted.");
+        error.name = "AbortError";
+        reject(error);
+      };
+
+      signal?.addEventListener("abort", onAbort, { once: true });
+    });
+  }
+
   private async createChatCompletionStream(
     client: NonNullable<ReturnType<CreateOpenAIClient>["client"]>,
     request: Record<string, unknown>,
@@ -433,6 +600,9 @@ export class SessionManager {
     const startedAtMs = Date.now();
     let estimatedTokens = 0;
     this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "start", sessionId);
+    const parentSignal = options?.signal instanceof AbortSignal ? options.signal : undefined;
+    const { requestTimeoutMs, maxRetries } = this.getChatRequestControls();
+    const maxAttempts = maxRetries + 1;
 
     const streamRequest = {
       ...request,
@@ -443,212 +613,220 @@ export class SessionManager {
       },
     };
 
-    let response: unknown;
-    try {
-      response = await (
-        client.chat.completions.create as unknown as (
-          body: Record<string, unknown>,
-          options?: Record<string, unknown>
-        ) => Promise<unknown>
-      )(streamRequest, options);
-    } catch (error) {
-      this.logChatCompletionDebug(debug, {
-        timestamp: new Date().toISOString(),
-        location: debug?.location ?? "SessionManager.createChatCompletionStream:create",
-        requestId,
-        sessionId,
-        model: typeof request.model === "string" ? request.model : undefined,
-        baseURL: debug?.baseURL,
-        durationMs: Date.now() - startedAtMs,
-        params: { ...debug?.params, options: summarizeCompletionOptions(options) },
-        request: streamRequest,
-        error: normalizeDebugError(error),
-      });
-      logApiError({
-        timestamp: new Date().toISOString(),
-        location: "SessionManager.createChatCompletionStream:create",
-        requestId,
-        sessionId,
-        model: typeof request.model === "string" ? request.model : undefined,
-        error: {
-          name: error instanceof Error ? error.name : "UnknownError",
-          message: error instanceof Error ? error.message : String(error),
-          stack: error instanceof Error ? error.stack : undefined,
-        },
-        request: streamRequest,
-      });
-      this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "end", sessionId);
-      throw error;
-    }
-
-    if (!response || typeof (response as { [Symbol.asyncIterator]?: unknown })[Symbol.asyncIterator] !== "function") {
-      this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "end", sessionId);
-      this.logChatCompletionDebug(debug, {
-        timestamp: new Date().toISOString(),
-        location: debug?.location ?? "SessionManager.createChatCompletionStream",
-        requestId,
-        sessionId,
-        model: typeof request.model === "string" ? request.model : undefined,
-        baseURL: debug?.baseURL,
-        durationMs: Date.now() - startedAtMs,
-        params: { ...debug?.params, options: summarizeCompletionOptions(options) },
-        request: streamRequest,
-        response,
-      });
-      return response as { choices?: Array<{ message?: Record<string, unknown> }>; usage?: ModelUsage | null };
-    }
-
-    let content = "";
-    let reasoningContent = "";
-    let refusal: string | null = null;
-    let usage: ModelUsage | null = null;
-    const responseChunks: unknown[] = [];
-    const toolCallsByIndex = new Map<
-      number,
-      {
-        id?: string;
-        type?: string;
-        function?: { name?: string; arguments?: string };
-      }
-    >();
-
-    const trackText = (value: unknown) => {
-      if (typeof value !== "string" || value.length === 0) {
-        return;
-      }
-      estimatedTokens += this.estimateStreamTokens(value);
-      this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "update", sessionId);
-    };
-
     try {
-      for await (const chunk of response as AsyncIterable<Record<string, unknown>>) {
-        if (debug?.enabled) {
-          responseChunks.push(chunk);
-        }
-        if ("usage" in chunk && chunk.usage != null) {
-          usage = chunk.usage as ModelUsage;
-        }
+      for (let attemptIndex = 0; attemptIndex < maxAttempts; attemptIndex++) {
+        this.throwIfAborted(parentSignal);
+        const attemptOptions = this.createAttemptOptions(options, requestTimeoutMs);
+        const attemptParams = {
+          ...debug?.params,
+          attempt: attemptIndex + 1,
+          maxAttempts,
+          requestTimeoutMs,
+          maxRetries,
+          options: summarizeCompletionOptions(attemptOptions.options),
+        };
+        let response: unknown;
+        let responseChunks: unknown[] | undefined;
+        let locationSuffix = "create";
 
-        const choices = Array.isArray(chunk.choices) ? chunk.choices : [];
-        for (const choice of choices) {
-          const delta = isUsageRecord(choice) && isUsageRecord(choice.delta) ? choice.delta : null;
-          if (!delta) {
-            continue;
+        try {
+          response = await (
+            client.chat.completions.create as unknown as (
+              body: Record<string, unknown>,
+              options?: Record<string, unknown>
+            ) => Promise<unknown>
+          )(streamRequest, attemptOptions.options);
+
+          if (
+            !response ||
+            typeof (response as { [Symbol.asyncIterator]?: unknown })[Symbol.asyncIterator] !== "function"
+          ) {
+            this.logChatCompletionDebug(debug, {
+              timestamp: new Date().toISOString(),
+              location: debug?.location ?? "SessionManager.createChatCompletionStream",
+              requestId,
+              sessionId,
+              model: typeof request.model === "string" ? request.model : undefined,
+              baseURL: debug?.baseURL,
+              durationMs: Date.now() - startedAtMs,
+              params: attemptParams,
+              request: streamRequest,
+              response,
+            });
+            return response as { choices?: Array<{ message?: Record<string, unknown> }>; usage?: ModelUsage | null };
           }
 
-          const contentDelta = delta.content;
-          if (typeof contentDelta === "string") {
-            content += contentDelta;
-            trackText(contentDelta);
-          }
+          let content = "";
+          let reasoningContent = "";
+          let refusal: string | null = null;
+          let usage: ModelUsage | null = null;
+          responseChunks = [];
+          const toolCallsByIndex = new Map<
+            number,
+            {
+              id?: string;
+              type?: string;
+              function?: { name?: string; arguments?: string };
+            }
+          >();
 
-          const reasoningDelta = delta.reasoning_content ?? delta.reasoning;
-          if (typeof reasoningDelta === "string") {
-            reasoningContent += reasoningDelta;
-            trackText(reasoningDelta);
-          }
+          const trackText = (value: unknown) => {
+            if (typeof value !== "string" || value.length === 0) {
+              return;
+            }
+            estimatedTokens += this.estimateStreamTokens(value);
+            this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "update", sessionId);
+          };
 
-          if (typeof delta.refusal === "string") {
-            refusal = `${refusal ?? ""}${delta.refusal}`;
-            trackText(delta.refusal);
-          }
+          locationSuffix = "stream";
+          for await (const chunk of response as AsyncIterable<Record<string, unknown>>) {
+            if (debug?.enabled) {
+              responseChunks.push(chunk);
+            }
+            if ("usage" in chunk && chunk.usage != null) {
+              usage = chunk.usage as ModelUsage;
+            }
 
-          const rawToolCalls = delta.tool_calls;
-          if (Array.isArray(rawToolCalls)) {
-            for (const rawToolCall of rawToolCalls) {
-              if (!isUsageRecord(rawToolCall)) {
+            const choices = Array.isArray(chunk.choices) ? chunk.choices : [];
+            for (const choice of choices) {
+              const delta = isUsageRecord(choice) && isUsageRecord(choice.delta) ? choice.delta : null;
+              if (!delta) {
                 continue;
               }
-              const index = typeof rawToolCall.index === "number" ? rawToolCall.index : toolCallsByIndex.size;
-              const current = toolCallsByIndex.get(index) ?? {};
-              if (typeof rawToolCall.id === "string") {
-                current.id = rawToolCall.id;
+
+              const contentDelta = delta.content;
+              if (typeof contentDelta === "string") {
+                content += contentDelta;
+                trackText(contentDelta);
               }
-              if (typeof rawToolCall.type === "string") {
-                current.type = rawToolCall.type;
+
+              const reasoningDelta = delta.reasoning_content ?? delta.reasoning;
+              if (typeof reasoningDelta === "string") {
+                reasoningContent += reasoningDelta;
+                trackText(reasoningDelta);
               }
-              const rawFunction = isUsageRecord(rawToolCall.function) ? rawToolCall.function : null;
-              if (rawFunction) {
-                current.function = current.function ?? {};
-                if (typeof rawFunction.name === "string") {
-                  current.function.name = `${current.function.name ?? ""}${rawFunction.name}`;
-                  trackText(rawFunction.name);
-                }
-                if (typeof rawFunction.arguments === "string") {
-                  current.function.arguments = `${current.function.arguments ?? ""}${rawFunction.arguments}`;
-                  trackText(rawFunction.arguments);
+
+              if (typeof delta.refusal === "string") {
+                refusal = `${refusal ?? ""}${delta.refusal}`;
+                trackText(delta.refusal);
+              }
+
+              const rawToolCalls = delta.tool_calls;
+              if (Array.isArray(rawToolCalls)) {
+                for (const rawToolCall of rawToolCalls) {
+                  if (!isUsageRecord(rawToolCall)) {
+                    continue;
+                  }
+                  const index = typeof rawToolCall.index === "number" ? rawToolCall.index : toolCallsByIndex.size;
+                  const current = toolCallsByIndex.get(index) ?? {};
+                  if (typeof rawToolCall.id === "string") {
+                    current.id = rawToolCall.id;
+                  }
+                  if (typeof rawToolCall.type === "string") {
+                    current.type = rawToolCall.type;
+                  }
+                  const rawFunction = isUsageRecord(rawToolCall.function) ? rawToolCall.function : null;
+                  if (rawFunction) {
+                    current.function = current.function ?? {};
+                    if (typeof rawFunction.name === "string") {
+                      current.function.name = `${current.function.name ?? ""}${rawFunction.name}`;
+                      trackText(rawFunction.name);
+                    }
+                    if (typeof rawFunction.arguments === "string") {
+                      current.function.arguments = `${current.function.arguments ?? ""}${rawFunction.arguments}`;
+                      trackText(rawFunction.arguments);
+                    }
+                  }
+                  toolCallsByIndex.set(index, current);
                 }
               }
-              toolCallsByIndex.set(index, current);
             }
           }
+
+          const toolCalls = Array.from(toolCallsByIndex.entries())
+            .sort(([left], [right]) => left - right)
+            .map(([, toolCall]) => toolCall);
+          const normalizedToolCalls = this.normalizeLlmToolCalls(toolCalls);
+          const message: Record<string, unknown> = { content };
+          if (normalizedToolCalls) {
+            message.tool_calls = normalizedToolCalls;
+          }
+          if (reasoningContent.length > 0) {
+            message.reasoning_content = reasoningContent;
+          }
+          if (refusal != null) {
+            message.refusal = refusal;
+          }
+
+          const finalResponse = {
+            choices: [{ message }],
+            usage,
+          };
+          this.logChatCompletionDebug(debug, {
+            timestamp: new Date().toISOString(),
+            location: debug?.location ?? "SessionManager.createChatCompletionStream",
+            requestId,
+            sessionId,
+            model: typeof request.model === "string" ? request.model : undefined,
+            baseURL: debug?.baseURL,
+            durationMs: Date.now() - startedAtMs,
+            params: attemptParams,
+            request: streamRequest,
+            responseChunks,
+            response: finalResponse,
+          });
+          return finalResponse;
+        } catch (rawError) {
+          const timedOut = attemptOptions.didTimeout();
+          const error = this.normalizeChatCompletionError(rawError, timedOut, requestTimeoutMs);
+          const retrying =
+            attemptIndex < maxRetries && !parentSignal?.aborted && this.isRetryableChatCompletionError(error, timedOut);
+          const normalizedError = normalizeDebugError(error);
+          const location = debug?.location ?? `SessionManager.createChatCompletionStream:${locationSuffix}`;
+          const params = {
+            ...attemptParams,
+            timedOut,
+            retrying,
+          };
+
+          this.logChatCompletionDebug(debug, {
+            timestamp: new Date().toISOString(),
+            location,
+            requestId,
+            sessionId,
+            model: typeof request.model === "string" ? request.model : undefined,
+            baseURL: debug?.baseURL,
+            durationMs: Date.now() - startedAtMs,
+            params,
+            request: streamRequest,
+            responseChunks,
+            error: normalizedError,
+          });
+          logApiError({
+            timestamp: new Date().toISOString(),
+            location,
+            requestId,
+            sessionId,
+            model: typeof request.model === "string" ? request.model : undefined,
+            baseURL: debug?.baseURL,
+            error: normalizedError,
+            request: streamRequest,
+          });
+
+          if (!retrying) {
+            throw error;
+          }
+
+          await this.waitForChatRetry(this.getChatRetryDelayMs(attemptIndex), parentSignal);
+        } finally {
+          attemptOptions.cleanup();
         }
       }
-    } catch (error) {
-      this.logChatCompletionDebug(debug, {
-        timestamp: new Date().toISOString(),
-        location: debug?.location ?? "SessionManager.createChatCompletionStream:stream",
-        requestId,
-        sessionId,
-        model: typeof request.model === "string" ? request.model : undefined,
-        baseURL: debug?.baseURL,
-        durationMs: Date.now() - startedAtMs,
-        params: { ...debug?.params, options: summarizeCompletionOptions(options) },
-        request: streamRequest,
-        responseChunks,
-        error: normalizeDebugError(error),
-      });
-      logApiError({
-        timestamp: new Date().toISOString(),
-        location: "SessionManager.createChatCompletionStream:stream",
-        requestId,
-        sessionId,
-        model: typeof request.model === "string" ? request.model : undefined,
-        error: {
-          name: error instanceof Error ? error.name : "UnknownError",
-          message: error instanceof Error ? error.message : String(error),
-          stack: error instanceof Error ? error.stack : undefined,
-        },
-        request: streamRequest,
-      });
-      throw error;
+
+      throw new Error("Chat completion request failed before any attempt was made.");
     } finally {
       this.emitLlmStreamProgress(requestId, startedAt, estimatedTokens, "end", sessionId);
     }
-
-    const toolCalls = Array.from(toolCallsByIndex.entries())
-      .sort(([left], [right]) => left - right)
-      .map(([, toolCall]) => toolCall);
-    const normalizedToolCalls = this.normalizeLlmToolCalls(toolCalls);
-    const message: Record<string, unknown> = { content };
-    if (normalizedToolCalls) {
-      message.tool_calls = normalizedToolCalls;
-    }
-    if (reasoningContent.length > 0) {
-      message.reasoning_content = reasoningContent;
-    }
-    if (refusal != null) {
-      message.refusal = refusal;
-    }
-
-    const finalResponse = {
-      choices: [{ message }],
-      usage,
-    };
-    this.logChatCompletionDebug(debug, {
-      timestamp: new Date().toISOString(),
-      location: debug?.location ?? "SessionManager.createChatCompletionStream",
-      requestId,
-      sessionId,
-      model: typeof request.model === "string" ? request.model : undefined,
-      baseURL: debug?.baseURL,
-      durationMs: Date.now() - startedAtMs,
-      params: { ...debug?.params, options: summarizeCompletionOptions(options) },
-      request: streamRequest,
-      responseChunks,
-      response: finalResponse,
-    });
-    return finalResponse;
   }
 
   private logChatCompletionDebug(
diff --git a/src/settings.ts b/src/settings.ts
index b7a7a77..ce5e9b2 100644
--- a/src/settings.ts
+++ b/src/settings.ts
@@ -10,6 +10,8 @@ export type DeepcodingEnv = Record<string, string | undefined> & {
   THINKING_ENABLED?: string;
   REASONING_EFFORT?: string;
   DEBUG_LOG_ENABLED?: string;
+  REQUEST_TIMEOUT_MS?: string;
+  MAX_RETRIES?: string;
 };
 
 export type ReasoningEffort = "high" | "max";
@@ -47,6 +49,8 @@ export type DeepcodingSettings = {
   thinkingEnabled?: boolean;
   reasoningEffort?: ReasoningEffort;
   debugLogEnabled?: boolean;
+  requestTimeoutMs?: number;
+  maxRetries?: number;
   notify?: string;
   webSearchTool?: string;
   mcpServers?: Record<string, McpServerConfig>;
@@ -61,6 +65,8 @@ export type ResolvedDeepcodingSettings = {
   thinkingEnabled: boolean;
   reasoningEffort: ReasoningEffort;
   debugLogEnabled: boolean;
+  requestTimeoutMs: number;
+  maxRetries: number;
   notify?: string;
   webSearchTool?: string;
   mcpServers?: Record<string, McpServerConfig>;
@@ -101,6 +107,14 @@ function trimString(value: unknown): string {
   return typeof value === "string" ? value.trim() : "";
 }
 
+function parseNonNegativeInteger(value: unknown, max: number): number | undefined {
+  const raw = typeof value === "number" ? value : typeof value === "string" ? Number(value.trim()) : NaN;
+  if (!Number.isFinite(raw) || raw < 0) {
+    return undefined;
+  }
+  return Math.min(Math.round(raw), max);
+}
+
 const VALID_PERMISSION_SCOPES = new Set<PermissionScope>([
   "read-in-cwd",
   "read-out-cwd",
@@ -313,6 +327,22 @@ export function resolveSettingsSources(
     parseBoolean(userEnv.DEBUG_LOG_ENABLED) ??
     false;
 
+  const requestTimeoutMs =
+    parseNonNegativeInteger(systemEnv.REQUEST_TIMEOUT_MS, MAX_REQUEST_TIMEOUT_MS) ??
+    parseNonNegativeInteger(projectSettings?.requestTimeoutMs, MAX_REQUEST_TIMEOUT_MS) ??
+    parseNonNegativeInteger(projectEnv.REQUEST_TIMEOUT_MS, MAX_REQUEST_TIMEOUT_MS) ??
+    parseNonNegativeInteger(userSettings?.requestTimeoutMs, MAX_REQUEST_TIMEOUT_MS) ??
+    parseNonNegativeInteger(userEnv.REQUEST_TIMEOUT_MS, MAX_REQUEST_TIMEOUT_MS) ??
+    DEFAULT_REQUEST_TIMEOUT_MS;
+
+  const maxRetries =
+    parseNonNegativeInteger(systemEnv.MAX_RETRIES, MAX_MAX_RETRIES) ??
+    parseNonNegativeInteger(projectSettings?.maxRetries, MAX_MAX_RETRIES) ??
+    parseNonNegativeInteger(projectEnv.MAX_RETRIES, MAX_MAX_RETRIES) ??
+    parseNonNegativeInteger(userSettings?.maxRetries, MAX_MAX_RETRIES) ??
+    parseNonNegativeInteger(userEnv.MAX_RETRIES, MAX_MAX_RETRIES) ??
+    DEFAULT_MAX_RETRIES;
+
   const notify =
     trimString(systemEnv.NOTIFY) || trimString(projectSettings?.notify) || trimString(userSettings?.notify) || "";
   const webSearchTool =
@@ -329,6 +359,8 @@ export function resolveSettingsSources(
     thinkingEnabled,
     reasoningEffort,
     debugLogEnabled,
+    requestTimeoutMs,
+    maxRetries,
     notify: notify || undefined,
     webSearchTool: webSearchTool || undefined,
     mcpServers: mergeMcpServers(userSettings, projectSettings, userEnv, projectEnv, systemEnv),
@@ -380,6 +412,11 @@ export function applyModelConfigSelection(
 
 export const DEFAULT_MODEL = "deepseek-v4-pro";
 export const DEFAULT_BASE_URL = "https://api.deepseek.com";
+export const DEFAULT_REQUEST_TIMEOUT_MS = 0;
+export const DEFAULT_MAX_RETRIES = 2;
+
+const MAX_REQUEST_TIMEOUT_MS = 24 * 60 * 60 * 1000;
+const MAX_MAX_RETRIES = 10;
 
 // ---------------------------------------------------------------------------
 // Settings file I/O
diff --git a/src/tests/debug-logger.test.ts b/src/tests/debug-logger.test.ts
index 7b1aad4..5f796a3 100644
--- a/src/tests/debug-logger.test.ts
+++ b/src/tests/debug-logger.test.ts
@@ -7,8 +7,12 @@ import { getDebugLogPath, logOpenAIChatCompletionDebug } from "../common/debug-l
 
 test("debug logger appends full entries without rotation", () => {
   const originalHome = process.env.HOME;
+  const originalUserProfile = process.env.USERPROFILE;
   const home = fs.mkdtempSync(path.join(os.tmpdir(), "deepcode-debug-log-home-"));
   process.env.HOME = home;
+  if (process.platform === "win32") {
+    process.env.USERPROFILE = home;
+  }
   try {
     for (let index = 0; index < 25; index += 1) {
       logOpenAIChatCompletionDebug({
@@ -42,5 +46,11 @@ test("debug logger appends full entries without rotation", () => {
     } else {
       process.env.HOME = originalHome;
     }
+    if (originalUserProfile === undefined) {
+      delete process.env.USERPROFILE;
+    } else {
+      process.env.USERPROFILE = originalUserProfile;
+    }
+    fs.rmSync(home, { recursive: true, force: true });
   }
 });
diff --git a/src/tests/session.test.ts b/src/tests/session.test.ts
index 6af3cb2..d988e81 100644
--- a/src/tests/session.test.ts
+++ b/src/tests/session.test.ts
@@ -2209,6 +2209,143 @@ test("SessionManager streams chat completions and counts reasoning progress", as
   assert.equal(progressEvents[2]?.formattedTokens, "3");
 });
 
+test("SessionManager retries transient chat completion failures", async () => {
+  const workspace = createTempDir("deepcode-retry-workspace-");
+  const home = createTempDir("deepcode-retry-home-");
+  setHomeDir(home);
+
+  let attempts = 0;
+  const signals: Array<AbortSignal | undefined> = [];
+  const client = {
+    chat: {
+      completions: {
+        create: async (_request: Record<string, unknown>, options?: { signal?: AbortSignal }) => {
+          attempts += 1;
+          signals.push(options?.signal);
+          if (attempts === 1) {
+            throw new Error("fetch failed");
+          }
+          return createChatResponse("recovered", {
+            prompt_tokens: 2,
+            completion_tokens: 1,
+            total_tokens: 3,
+          });
+        },
+      },
+    },
+  };
+
+  const manager = new SessionManager({
+    projectRoot: workspace,
+    createOpenAIClient: () => ({
+      client: client as any,
+      model: "test-model",
+      baseURL: "https://api.deepseek.com",
+      thinkingEnabled: false,
+    }),
+    getResolvedSettings: () => ({ model: "test-model", requestTimeoutMs: 1000, maxRetries: 1 }),
+    renderMarkdown: (text) => text,
+    onAssistantMessage: () => {},
+  });
+
+  const sessionId = await manager.createSession({ text: "" });
+  const assistantMessage = manager.listSessionMessages(sessionId).find((message) => message.role === "assistant");
+
+  assert.equal(attempts, 2);
+  assert.equal(
+    signals.every((signal) => signal instanceof AbortSignal),
+    true
+  );
+  assert.equal(assistantMessage?.content, "recovered");
+  assert.equal(manager.getSession(sessionId)?.status, "completed");
+});
+
+test("SessionManager does not retry non-retryable chat completion errors", async () => {
+  const workspace = createTempDir("deepcode-no-retry-workspace-");
+  const home = createTempDir("deepcode-no-retry-home-");
+  setHomeDir(home);
+
+  let attempts = 0;
+  const client = {
+    chat: {
+      completions: {
+        create: async () => {
+          attempts += 1;
+          const error = new Error("bad request") as Error & { status?: number };
+          error.status = 400;
+          throw error;
+        },
+      },
+    },
+  };
+
+  const manager = new SessionManager({
+    projectRoot: workspace,
+    createOpenAIClient: () => ({
+      client: client as any,
+      model: "test-model",
+      baseURL: "https://api.deepseek.com",
+      thinkingEnabled: false,
+    }),
+    getResolvedSettings: () => ({ model: "test-model", maxRetries: 2 }),
+    renderMarkdown: (text) => text,
+    onAssistantMessage: () => {},
+  });
+
+  const sessionId = await manager.createSession({ text: "" });
+  const session = manager.getSession(sessionId);
+
+  assert.equal(attempts, 1);
+  assert.equal(session?.status, "failed");
+  assert.equal(session?.failReason, "bad request");
+});
+
+test("SessionManager fails timed-out chat completion requests without treating them as user interrupts", async () => {
+  const workspace = createTempDir("deepcode-timeout-workspace-");
+  const home = createTempDir("deepcode-timeout-home-");
+  setHomeDir(home);
+
+  const client = {
+    chat: {
+      completions: {
+        create: async (_request: Record<string, unknown>, options?: { signal?: AbortSignal }) => {
+          return new Promise((_resolve, reject) => {
+            const abort = () => {
+              const error = new Error("aborted");
+              error.name = "AbortError";
+              reject(error);
+            };
+            if (options?.signal?.aborted) {
+              abort();
+              return;
+            }
+            options?.signal?.addEventListener("abort", abort, { once: true });
+          });
+        },
+      },
+    },
+  };
+
+  const manager = new SessionManager({
+    projectRoot: workspace,
+    createOpenAIClient: () => ({
+      client: client as any,
+      model: "test-model",
+      baseURL: "https://api.deepseek.com",
+      thinkingEnabled: false,
+    }),
+    getResolvedSettings: () => ({ model: "test-model", requestTimeoutMs: 10, maxRetries: 0 }),
+    renderMarkdown: (text) => text,
+    onAssistantMessage: () => {},
+  });
+
+  const sessionId = await manager.createSession({ text: "" });
+  const session = manager.getSession(sessionId);
+
+  assert.equal(session?.status, "failed");
+  assert.match(session?.failReason ?? "", /Request timed out after 10ms/);
+});
+
 test("SessionManager persists session and user message before skill matching is cancelled", async () => {
   const workspace = createTempDir("deepcode-skill-abort-workspace-");
   const home = createTempDir("deepcode-skill-abort-home-");
diff --git a/src/tests/settings-and-notify.test.ts b/src/tests/settings-and-notify.test.ts
index 52f8671..3956d89 100644
--- a/src/tests/settings-and-notify.test.ts
+++ b/src/tests/settings-and-notify.test.ts
@@ -7,7 +7,13 @@ import {
   type NotifyContext,
   type NotifySpawn,
 } from "../common/notify";
-import { applyModelConfigSelection, resolveSettings, resolveSettingsSources } from "../settings";
+import {
+  DEFAULT_MAX_RETRIES,
+  DEFAULT_REQUEST_TIMEOUT_MS,
+  applyModelConfigSelection,
+  resolveSettings,
+  resolveSettingsSources,
+} from "../settings";
 
 const TEST_PROCESS_ENV = {};
 
@@ -83,6 +89,53 @@ test("resolveSettings reads THINKING_ENABLED, REASONING_EFFORT, and DEBUG_LOG_EN
   assert.equal(resolved.baseURL, "https://default.example.com");
 });
 
+test("resolveSettings reads request timeout and retry controls with documented precedence", () => {
+  const resolved = resolveSettingsSources(
+    {
+      requestTimeoutMs: 1000,
+      maxRetries: 1,
+      env: {
+        REQUEST_TIMEOUT_MS: "2000",
+        MAX_RETRIES: "2",
+      },
+    },
+    {
+      requestTimeoutMs: 3000,
+      env: {
+        MAX_RETRIES: "3",
+      },
+    },
+    {
+      model: "default-model",
+      baseURL: "https://default.example.com",
+    },
+    {
+      DEEPCODE_REQUEST_TIMEOUT_MS: "4000",
+    }
+  );
+
+  assert.equal(resolved.requestTimeoutMs, 4000);
+  assert.equal(resolved.maxRetries, 3);
+});
+
+test("resolveSettings defaults and clamps request retry controls", () => {
+  const resolved = resolveSettings(
+    {
+      requestTimeoutMs: -1,
+      maxRetries: 999,
+    },
+    {
+      model: "default-model",
+      baseURL: "https://default.example.com",
+    },
+    TEST_PROCESS_ENV
+  );
+
+  assert.equal(resolved.requestTimeoutMs, DEFAULT_REQUEST_TIMEOUT_MS);
+  assert.equal(resolved.maxRetries, 10);
+  assert.equal(DEFAULT_MAX_RETRIES, 2);
+});
+
 test("resolveSettings ignores removed legacy env.THINKING", () => {
   const resolved = resolveSettings(
     {
diff --git a/src/tools/executor.ts b/src/tools/executor.ts
index 220fc89..dfa7bf8 100644
--- a/src/tools/executor.ts
+++ b/src/tools/executor.ts
@@ -16,6 +16,8 @@ export type CreateOpenAIClient = () => {
   thinkingEnabled: boolean;
   reasoningEffort?: ReasoningEffort;
   debugLogEnabled?: boolean;
+  requestTimeoutMs?: number;
+  maxRetries?: number;
   notify?: string;
   webSearchTool?: string;
   env?: Record<string, string>;