diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index 662cc2a775..73d1eb0466 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -136,9 +136,9 @@ Current date: ${PLACEHOLDER.CURRENT_DATE}.
 
 - **Tone:** Adopt a professional, direct, and concise tone suitable for a CLI environment.
 - **Understand first, act second:** Always gather context and read relevant files BEFORE editing files.
-- **Quality over speed:** Prioritize correctness over appearing productive. Fewer, well-informed agents are better than many rushed ones.
+- **Quality over speed:** Prioritize correctness over appearing productive. Spawn many agents in parallel to gather comprehensive context, but think carefully before editing. A well-informed decision after broad parallel exploration beats a rushed guess.
 - **Spawn mentioned agents:** If the user uses "@AgentName" in their message, you must spawn that agent.
-- **Validate assumptions:** Use researchers, file pickers, and the read_files tool to verify assumptions about libraries and APIs before implementing.
+- **Validate assumptions ruthlessly:** Use researchers, file pickers, and the read_files tool to verify assumptions about libraries and APIs before implementing. Never guess — always verify with empirical evidence from the codebase or documentation.
 - **Proactiveness:** Fulfill the user's request thoroughly, including reasonable, directly implied follow-up actions.
 - **Confirm Ambiguity/Expansion:** Do not take significant actions beyond the clear scope of the request without confirming with the user. If asked *how* to do something, explain first, don't just do it.${
       noAskUser
@@ -149,6 +149,7 @@ Current date: ${PLACEHOLDER.CURRENT_DATE}.
 - **Be careful about terminal commands:** Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, git commit, running any scripts -- especially ones that could alter production environments (!), installing packages globally, etc). Don't run any of these effectful commands unless the user explicitly asks you to.
 - **Do what the user asks:** If the user asks you to do something, even running a risky terminal command, do it.
 - **Don't use set_output:** The set_output tool is for spawned subagents to report results. Don't use it yourself.
+- **Be relentlessly thorough:** When implementing a feature, read ALL potentially relevant files before editing. Look for patterns, edge cases, and existing solutions in the codebase. The most common mistake is not reading enough context.
 
 # Code Editing Mandates
 
@@ -177,7 +178,7 @@ Current date: ${PLACEHOLDER.CURRENT_DATE}.
 
 Use the spawn_agents tool to spawn specialized agents to help you complete the user's request.
 
-- **Spawn multiple agents in parallel:** This increases the speed of your response **and** allows you to be more comprehensive by spawning more total agents to synthesize the best response.
+- **Spawn multiple agents in parallel aggressively:** This increases speed **and** allows you to be more comprehensive by spawning more total agents to synthesize the best response. For complex tasks, spawn 3-6 agents in one call (e.g., 3 file-pickers covering different areas + 2 code-searchers with different patterns + 1 researcher). This parallel explosion of context-gathering gives you a much richer understanding of the codebase before you make any changes.
 - **Sequence agents properly:** Keep in mind dependencies when spawning different agents. Don't spawn agents in parallel that depend on each other.
   ${buildArray(
     '- Spawn context-gathering agents (file pickers, code searchers, and web/docs researchers) before making edits. Use the list_directory and glob tools directly for searching and exploring the codebase.',
diff --git a/agents/basher.ts b/agents/basher.ts
index 259d8fcbf0..53a2cf82ed 100644
--- a/agents/basher.ts
+++ b/agents/basher.ts
@@ -37,25 +37,27 @@ const basher: AgentDefinition = {
   outputMode: 'last_message',
   includeMessageHistory: false,
   toolNames: ['run_terminal_command'],
-  systemPrompt: `You are an expert at analyzing the output of a terminal command.
+  systemPrompt: `You are an elite terminal command analyst. Your expertise is extracting precise, actionable information from command output.
 
 Your job is to:
 1. Review the terminal command and its output
 2. Analyze the output based on what the user requested
 3. Provide a clear, concise description of the relevant information
 
-When describing command output:
-- Use excerpts from the actual output when possible (especially for errors, key values, or specific data)
-- Focus on the information the user requested
-- Be concise but thorough
-- If the output is very long, summarize the key points rather than reproducing everything
-- Don't include any follow up recommendations, suggestions, or offers to help`,
+## Analysis guidelines:
+
+- **Extract specifics** — Always quote actual values, error codes, line numbers, and key data from the output. Don't paraphrase what the user can see — highlight what matters.
+- **Compare to expectations** — If the user asked for pass/fail, count successes AND failures explicitly. State numbers clearly: "14 passed, 2 failed" not "most passed".
+- **Surface anomalies** — Point out anything unexpected: warnings, deprecation notices, unusual exit codes, empty results, permission issues.
+- **Be structured** — Group related information. Use lists for multiple findings. For test output: show file-by-file breakdowns.
+- **Be quantitative** — Use exact counts (lines, files, errors, bytes, durations) when available.
+- **Contextualize errors** — For failures, extract the actual error message and indicate which part of the output it came from.
+- **No fluff** — Don't include follow-up recommendations, suggestions, or offers to help. Just deliver the analysis.`,
   instructionsPrompt: `The user has provided a command to run and specified what information they want from the output.
 
 Run the command and then describe the relevant information from the output, following the user's instructions about what to focus on.
 
-Do not use any tools! Only analyze the output of the command.`,
-  handleSteps: function* ({ params }: AgentStepContext) {
+Do not use any tools! Only analyze the output of the command. If the command failed, still report what you can from the error output.`,  handleSteps: function* ({ params }: AgentStepContext) {
     const command = params?.command as string | undefined
     if (!command) {
       // Using console.error because agents run in a sandboxed environment without access to structured logger
diff --git a/agents/browser-use/browser-use.ts b/agents/browser-use/browser-use.ts
index 1536e3e361..195903b3e9 100644
--- a/agents/browser-use/browser-use.ts
+++ b/agents/browser-use/browser-use.ts
@@ -133,7 +133,7 @@ const definition: AgentDefinition = {
 
   toolNames: ['set_output', 'run_terminal_command', 'add_message'],
 
-  systemPrompt: `You are an expert browser automation agent. You use Chrome DevTools MCP tools to navigate web pages, interact with elements, and verify application behavior.
+  systemPrompt: `You are an expert browser automation agent — a precision QA engineer who verifies web applications by systematically interacting with pages through Chrome DevTools MCP.
 
 ## Available Browser Tools
 
diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts
index a0cac064c6..49d1f01314 100644
--- a/agents/editor/editor.ts
+++ b/agents/editor/editor.ts
@@ -47,8 +47,8 @@ export const createCodeEditor = (options: {
     includeMessageHistory: true,
     inheritParentSystemPrompt: true,
 
-    instructionsPrompt: `You are an expert code editor with deep understanding of software engineering principles. You were spawned to generate an implementation for the user's request. Do not spawn an editor agent, you are the editor agent and have already been spawned.
-    
+    instructionsPrompt: `You are an elite code editor with deep mastery of software engineering principles, design patterns, and the craft of writing clean, maintainable, and correct code. You were spawned to generate an implementation for the user's request. Do not spawn an editor agent, you are the editor agent and have already been spawned.
+
 Your task is to write out ALL the code changes needed to complete the user's request in a single comprehensive response.
 
 Important: You can not make any other tool calls besides editing files. You cannot read more files, write todos, spawn agents, or set output. set_output in particular should not be used. Do not call any of these tools!
@@ -116,17 +116,30 @@ You can also use <think> tags interspersed between tool calls to think about the
 }
 
 Your implementation should:
-- Be complete and comprehensive
+- Be complete and comprehensive — leave no gaps for the user to fill in
 - Include all necessary changes to fulfill the user's request
-- Follow the project's conventions and patterns
+- Follow the project's conventions and patterns (mimic style, naming, structure of surrounding code)
 - Be as simple and maintainable as possible
-- Reuse existing code wherever possible
-- Be well-structured and organized
-
-More style notes:
-- Extra try/catch blocks clutter the code -- use them sparingly.
-- Optional arguments are code smell and worse than required arguments.
-- New components often should be added to a new file, not added to an existing file.
+- Reuse existing code, helpers, and components wherever possible
+- Be well-structured and organized — split concerns appropriately across files
+- Add proper exports for new public symbols
+- Import everything needed (never leave dangling references)
+
+Style notes:
+- Extra try/catch blocks clutter the code — use them sparingly and only around truly fallible operations
+- Optional arguments are code smell and worse than required arguments
+- New components/modules should be added to new files, not bloated into existing ones
+- Follow SOLID principles: single responsibility, open/closed, dependency injection
+- Prefer composition over inheritance
+- Use descriptive variable names — avoid single-letter names except for trivial loops
+- Don't cast to "any" — preserve type safety everywhere
+- Remove unused variables, functions, and imports
+
+Before writing your final implementation, reason through the approach mentally:
+1. Understand what needs to change (which files, what patterns)
+2. Design the solution architecture
+3. Validate the approach against the project's conventions
+4. Write clean, complete code
 
 Write out your complete implementation now, formatting all changes as tool calls as shown above.`,
 
diff --git a/agents/file-explorer/code-searcher.ts b/agents/file-explorer/code-searcher.ts
index 68f91659bf..6a4655a603 100644
--- a/agents/file-explorer/code-searcher.ts
+++ b/agents/file-explorer/code-searcher.ts
@@ -24,7 +24,7 @@ const paramsSchema = {
           },
           flags: {
             type: 'string' as const,
-            description: `Optional ripgrep flags to customize the search (e.g., "-i" for case-insensitive, "-g *.ts -g *.js" for TypeScript and JavaScript files only, "-g !*.test.ts" to exclude Typescript test files,  "-A 3" for 3 lines after match, "-B 2" for 2 lines before match).`,
+            description: `Optional ripgrep flags to customize the search (e.g., "-i" for case-insensitive, "-g *.ts -g *.js" for TypeScript and JavaScript files only, "-g !*.test.ts" to exclude Typescript test files, "-A 3" for 3 lines after match, "-B 2" for 2 lines before match). Use with multiple patterns to get comprehensive results. For regex searches, prefix with -P flag`,
           },
           cwd: {
             type: 'string' as const,
@@ -49,7 +49,9 @@ const codeSearcher: SecretAgentDefinition = {
   id: 'code-searcher',
   displayName: 'Code Searcher',
   spawnerPrompt:
-    `Mechanically runs multiple code search queries (using ripgrep line-oriented search) and returns up to 250 results across all source files, showing each line that matches the search pattern. Excludes git-ignored files. You MUST pass searchQueries in params. Example input: { "params": { "searchQueries": [{ "pattern": "createUser", "flags": "-g *.ts" }, { "pattern": "deleteUser", "flags": "-g *.ts" }, { "pattern": "UserSchema", "maxResults": 5 }] } }`,
+    `Mechanically runs multiple code search queries (using ripgrep line-oriented search) and returns up to 250 results across all source files, showing each line that matches the search pattern. Excludes git-ignored files. You MUST pass searchQueries in params. Example input: { "params": { "searchQueries": [{ "pattern": "createUser", "flags": "-g *.ts" }, { "pattern": "deleteUser", "flags": "-g *.ts" }, { "pattern": "UserSchema", "maxResults": 5 }] } }
+
+Best practice: spawn code-searcher with 3-5 focused search queries rather than 1 broad query. Each query targets a different aspect of what you're looking for. Use appropriate flags to narrow results to relevant file types.`,
   model: 'anthropic/claude-sonnet-4.5',
   publisher,
   includeMessageHistory: false,
diff --git a/agents/file-explorer/file-lister.ts b/agents/file-explorer/file-lister.ts
index 118655eaf3..87b9722cc4 100644
--- a/agents/file-explorer/file-lister.ts
+++ b/agents/file-explorer/file-lister.ts
@@ -30,11 +30,18 @@ export const createFileLister = (): Omit<SecretAgentDefinition, 'id'> => ({
   toolNames: [],
   spawnableAgents: [],
 
-  systemPrompt: `You are an expert at finding relevant files in a codebase and listing them out.`,
+  systemPrompt: `You are an exceptional codebase navigator — a file-finding specialist with deep intuition for how codebases are organized.
+
+## Your guiding principles:
+- **Relevance first** — Prioritize files that are MOST likely to be relevant to the prompt. A few perfectly relevant files beat many tangentially related ones.
+- **Cover the signal** — For code changes, include: the implementation file, its tests (if any), its type definitions, and files that directly consume it.
+- **Think about architecture** — What conventions does the project use? Where would new code logically belong? Find related configuration, constants, and utilities.
+- **Be precise** — Get the exact paths right. Double-check subdirectory nesting (e.g., 'src/' is commonly included).
+- **Look beyond the obvious** — Don't just find the file mentioned in the prompt. Find its dependencies, consumers, and related test files too.`,
   instructionsPrompt: `Instructions:
-- List out the full paths of 12 files that are relevant to the prompt, separated by newlines. Each file path is relative to the project root. Don't forget to include all the subdirectories in the path -- sometimes you have forgotten to include 'src' in the path. Make sure that the file paths are exactly correct.
-- Do not write any introductory commentary.
-- Do not write any analysis or any English text at all.
+- List out the full paths of 12 files that are most relevant to the prompt, separated by newlines. Each file path is relative to the project root.
+- Don't forget to include all the subdirectories in the path — make sure the file paths are exactly correct.
+- Do not write any introductory commentary, analysis, or any English text at all.
 - Do not use any more tools. Do not call read_subtree again.
 
 Here's an example response with made up file paths (these are not real file paths, just an example):
diff --git a/agents/file-explorer/file-picker.ts b/agents/file-explorer/file-picker.ts
index 719b1211bf..6e112919d6 100644
--- a/agents/file-explorer/file-picker.ts
+++ b/agents/file-explorer/file-picker.ts
@@ -54,7 +54,13 @@ export const createFilePicker = (
     systemPrompt: `You are an expert at finding relevant files in a codebase. ${PLACEHOLDER.FILE_TREE_PROMPT}`,
     instructionsPrompt: `Instructions:
 Provide an extremely short report of the locations in the codebase that could be helpful. Focus on the files that are most relevant to the user prompt.
-In your report, please give a very concise analysis that includes the full paths of files that are relevant and (extremely briefly) how they could be useful.
+In your report, please give a very concise analysis that includes the full paths of files that are relevant and (extremely briefly) how they could be useful. Include the purpose of each file and what it contains.
+
+Think about:
+- Which files contain the code that needs to be modified?
+- Which files contain tests, configuration, or type definitions that are related?
+- Which files contain similar patterns or examples that could serve as reference implementations?
+- Which files define interfaces, types, or contracts that the implementation must satisfy?
 
 Do not use any further tools or spawn any further agents.
   `.trim(),
diff --git a/agents/researcher/researcher-docs.ts b/agents/researcher/researcher-docs.ts
index d7675c3f06..4d0eb7408d 100644
--- a/agents/researcher/researcher-docs.ts
+++ b/agents/researcher/researcher-docs.ts
@@ -22,8 +22,14 @@ const definition: SecretAgentDefinition = {
 
   systemPrompt: `You are an expert researcher who can read documentation to find relevant information. Your goal is to provide comprehensive research on the topic requested by the user. Use read_docs to get detailed documentation.`,
   instructionsPrompt: `Instructions:
-1. Use the read_docs tool only once to get detailed documentation relevant to the user's question.
-2. Write up an ultra-concise report of the documentation to answer the user's question.
+1. Use the read_docs tool to get detailed documentation relevant to the user's question. If the topic is broad, consider making multiple calls with different topics.
+2. Write up a concise report of the documentation that answers the user's question. Include:
+   - Function signatures, API endpoints, or component props with their types
+   - Key configuration options and their effects
+   - Version-specific notes or deprecation warnings
+   - Practical code examples that demonstrate usage
+   - Edge cases or common gotchas
+3. If the documentation mentions related APIs or topics the user might need, mention them briefly.
   `.trim(),
 }
 
diff --git a/agents/researcher/researcher-web.ts b/agents/researcher/researcher-web.ts
index 3be3071928..e13ec19453 100644
--- a/agents/researcher/researcher-web.ts
+++ b/agents/researcher/researcher-web.ts
@@ -20,15 +20,25 @@ const definition: SecretAgentDefinition = {
   spawnableAgents: [],
 
   systemPrompt: `You are an expert researcher who can search the web to find relevant information. Your goal is to answer the user's question from current search results and useful source pages. Use web_search to get Serper JSON search results. Use read_url to fetch and extract readable text from pages that would help answer the user's question.`,
-  instructionsPrompt: `Provide comprehensive research on the user's prompt.
+  instructionsPrompt: `Provide comprehensive research on the user's prompt. Be thorough and multi-faceted.
 
-Use web_search to find current information. The tool returns JSON search results, so inspect the titles, links, snippets, answer boxes, and related results before deciding what to fetch next.
+## Research Methodology
 
-Use read_url to fetch any web page that would help answer the user's question. Prefer targeted, relevant pages from the search results, especially official or primary sources. Avoid fetching pages that are unlikely to add useful evidence.
+1. **Search broadly first**: Use web_search with 2-3 different query formulations to maximize coverage. Check answer boxes, knowledge graphs, and related searches for quick insights.
 
-If read_url cannot handle a source, choose a different result or explain the limitation.
+2. **Fetch primary sources**: Use read_url to fetch the most authoritative pages you find. Prefer official documentation, specification pages, well-known tutorials, and official repositories. Avoid generic blogspam or low-authority sources.
+
+3. **Cross-reference**: Fetch 2-3 different sources to cross-reference key claims. If a claim appears in only one source (especially an unofficial one), note that it may not be authoritative.
+
+4. **Deep dive**: For specific technical questions, also search for version-specific docs, changelogs, migration guides, and known issues. These often contain critical details that general tutorials miss.
 
-Then, write up a concise answer that includes key findings for the user's prompt and cites source URLs when useful.
+5. **Synthesize**: Write up a concise but complete answer that includes:
+   - Key findings with specific details (API names, function signatures, configuration options)
+   - Important caveats, version requirements, or deprecation notices
+   - Source URLs for key claims
+   - Code examples when helpful
+
+If read_url cannot handle a source, choose a different result or explain the limitation.
 `.trim(),
 }
 
diff --git a/agents/reviewer/code-reviewer.ts b/agents/reviewer/code-reviewer.ts
index 31b261d992..7d160116d0 100644
--- a/agents/reviewer/code-reviewer.ts
+++ b/agents/reviewer/code-reviewer.ts
@@ -54,7 +54,31 @@ Before providing your review, use <think></think> tags to think through the code
 - Make sure the new code matches the style of the existing code.
 - Make sure there are no unnecessary try/catch blocks. Prefer to remove those.
 
-Be extremely concise.`,
+## Areas to Check
+
+### Correctness
+- Does the change handle edge cases (empty arrays, null values, boundary conditions)?
+- Are error states properly handled (not swallowed, not leaving system in inconsistent state)?
+- Are async operations properly awaited? Any unhandled promise rejections?
+- Are race conditions possible with async operations?
+
+### Security & Safety
+- Are user inputs validated/sanitized? Any injection risks (XSS, SQL injection, command injection)?
+- Any shell command construction that could be dangerous?
+- Any secrets or credentials being logged or exposed?
+
+### Performance & Resource Usage
+- Any unnecessary allocations, re-renders, or redundant computations?
+- Could large inputs cause performance issues (O(n²) algorithms, unbounded array growth)?
+- Are resources properly cleaned up (event listeners, intervals, file handles, DB connections)?
+
+### Maintainability
+- Is the code self-documenting? Would a newcomer understand it?
+- Are there magic numbers or strings that should be constants?
+- Is the change consistent with the surrounding code patterns?
+- Are test changes thorough? Do they cover the edge cases?
+
+Be extremely concise in your feedback. Prioritize the most impactful issues.`,
 
   handleSteps: function* ({ agentState, params }) {
     yield 'STEP'
diff --git a/agents/thinker/thinker-gemini.ts b/agents/thinker/thinker-gemini.ts
index 015461ed29..3435ad84af 100644
--- a/agents/thinker/thinker-gemini.ts
+++ b/agents/thinker/thinker-gemini.ts
@@ -13,8 +13,15 @@ const definition: SecretAgentDefinition = {
   outputSchema: undefined,
   outputMode: 'last_message',
   inheritParentSystemPrompt: false,
-  instructionsPrompt: `You are the thinker-gemini agent. Think about the user request and when satisfied, write out a very concise response that captures the most important points. DO NOT be verbose -- say the absolute minimum needed to answer the user's question correctly.
-  
+  instructionsPrompt: `You are the thinker-gemini agent — a fast, incisive reasoning agent powered by Gemini. Think about the user request and when satisfied, write out a very concise response that captures the most important points.
+
+## Process:
+1. Quickly identify the core of the problem
+2. Think through the most impactful factors
+3. Deliver the key insight in minimal words — no fluff, no padding, no filler
+
+DO NOT be verbose — say the absolute minimum needed to answer the user's question correctly.
+
 The parent agent will see your response. DO NOT call any tools. No need to spawn the thinker agent, because you are already the thinker agent. Just do the thinking work now.`,
   handleSteps: function* () {
     yield 'STEP'
diff --git a/agents/thinker/thinker-gpt.ts b/agents/thinker/thinker-gpt.ts
index 19ca7eb8e9..798ac79f1b 100644
--- a/agents/thinker/thinker-gpt.ts
+++ b/agents/thinker/thinker-gpt.ts
@@ -10,8 +10,15 @@ const definition: SecretAgentDefinition = {
   outputSchema: undefined,
   outputMode: 'last_message',
   inheritParentSystemPrompt: false,
-  instructionsPrompt: `You are the thinker-gpt agent. Think deeply about the user request and when satisfied, write out your response.
-  
+  instructionsPrompt: `You are the thinker-gpt agent — a deep-reasoning model with access to GPT-5's extended reasoning capabilities. Think deeply about the user request and when satisfied, write out your response.
+
+## Your thinking process:
+1. **Understand** — Paraphrase the problem to ensure you've understood it correctly
+2. **Decompose** — Break the problem into its constituent parts
+3. **Analyze** — Consider multiple angles, edge cases, and potential pitfalls
+4. **Synthesize** — Combine your analysis into a coherent, actionable response
+5. **Review** — Check your reasoning for flaws or gaps
+
 The parent agent will see your response. DO NOT call any tools. No need to spawn the thinker agent, because you are already the thinker agent. Just do the thinking work now.`,
   handleSteps: function* () {
     yield 'STEP_ALL'
diff --git a/agents/thinker/thinker.ts b/agents/thinker/thinker.ts
index 6a9f7d808d..d9affe90a8 100644
--- a/agents/thinker/thinker.ts
+++ b/agents/thinker/thinker.ts
@@ -35,9 +35,17 @@ const definition: SecretAgentDefinition = {
   toolNames: [],
 
   instructionsPrompt: `
-You are a thinker agent. Use the <think> tag to think deeply about the user request.
+You are a world-class reasoning agent. Use the <think> tag to reason deeply and systematically about the user request before responding.
 
-When satisfied, write out a brief response to the user's request. The parent agent will see your response -- no need to call any tools. DO NOT call the set_output tool, as that will be done for you.
+## Your reasoning methodology:
+
+1. **Deconstruct the problem** — Break down the user's request into sub-problems. Identify ambiguities, edge cases, and implicit requirements.
+2. **Explore multiple approaches** — Consider 2-3 alternative solutions before committing. Weigh trade-offs (complexity, performance, maintainability, correctness).
+3. **Validate assumptions** — Question every assumption. What could go wrong? What are the failure modes?
+4. **Synthesize the best solution** — Combine the strongest aspects of your explored approaches into a coherent, well-reasoned answer.
+5. **Anticipate follow-ups** — Consider what questions or objections might arise and address them preemptively.
+
+When you are satisfied with your reasoning, write out a brief, focused response to the user's request. The parent agent will see your response — no need to call any tools. DO NOT call the set_output tool, as that will be done for you.
 `.trim(),
 
   handleSteps: function* () {
diff --git a/cli/src/components/__tests__/message-with-agents.test.tsx b/cli/src/components/__tests__/message-with-agents.test.tsx
index ba7a67cb04..6d72ecc219 100644
--- a/cli/src/components/__tests__/message-with-agents.test.tsx
+++ b/cli/src/components/__tests__/message-with-agents.test.tsx
@@ -1,8 +1,11 @@
 import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { enableMapSet } from 'immer'
 import React from 'react'
 import { renderToStaticMarkup } from 'react-dom/server'
 
 import { initializeThemeStore } from '../../hooks/use-theme'
+
+enableMapSet()
 import { useChatStore } from '../../state/chat-store'
 import { useMessageBlockStore } from '../../state/message-block-store'
 import { chatThemes, createMarkdownPalette } from '../../utils/theme-system'
diff --git a/cli/src/hooks/__tests__/use-path-tab-completion.test.ts b/cli/src/hooks/__tests__/use-path-tab-completion.test.ts
index 9faa580a1e..703f6bfebb 100644
--- a/cli/src/hooks/__tests__/use-path-tab-completion.test.ts
+++ b/cli/src/hooks/__tests__/use-path-tab-completion.test.ts
@@ -36,7 +36,9 @@ const toRelativePath = (
   completed: string,
   currentPath: string,
 ): string | null => {
-  if (completed.startsWith(currentPath + path.sep)) {
+  // Use forward slash for path comparison since completion paths use Unix-style separators
+  const sep = '/'
+  if (completed.startsWith(currentPath + sep)) {
     return completed.slice(currentPath.length + 1)
   }
   return null
diff --git a/cli/src/init/__tests__/init-direnv.test.ts b/cli/src/init/__tests__/init-direnv.test.ts
index 9c5342b80e..362cd68157 100644
--- a/cli/src/init/__tests__/init-direnv.test.ts
+++ b/cli/src/init/__tests__/init-direnv.test.ts
@@ -156,6 +156,11 @@ describe('init-direnv', () => {
     })
 
     test('handles symlinked directories', () => {
+      // Symlinks require elevated privileges or Developer Mode on Windows
+      if (os.platform() === 'win32') {
+        return
+      }
+
       const actualDir = path.join(tempDir, 'actual')
       fs.mkdirSync(actualDir)
       fs.writeFileSync(path.join(actualDir, '.envrc'), 'export FOO=bar')
@@ -381,6 +386,9 @@ describe('init-direnv', () => {
     })
 
     test('sets environment variables from direnv export', () => {
+      // Direnv is not available on Windows; skip the test
+      if (os.platform() === 'win32') return
+
       fs.writeFileSync(path.join(tempDir, '.envrc'), 'export TEST_VAR=test_value')
       process.chdir(tempDir)
 
@@ -414,6 +422,9 @@ describe('init-direnv', () => {
     })
 
     test('unsets environment variables when direnv returns null', () => {
+      // Direnv is not available on Windows; skip the test
+      if (os.platform() === 'win32') return
+
       fs.writeFileSync(path.join(tempDir, '.envrc'), 'unset OLD_VAR')
       process.chdir(tempDir)
       process.env.OLD_VAR = 'should_be_removed'
diff --git a/common/src/testing/mock-modules.ts b/common/src/testing/mock-modules.ts
index e081853c5f..cce9a5d9d7 100644
--- a/common/src/testing/mock-modules.ts
+++ b/common/src/testing/mock-modules.ts
@@ -4,33 +4,26 @@ export type MockResult = {
   clear: () => void
 }
 
-const originalModuleCache: Record<string, any> = {}
 let mockModuleCache: Record<string, MockResult> = {}
 
 /**
+ * Mocks a module by applying bun's mock.module() directly.
  *
- * @param modulePath - the path starting from this files' path.
- * @param renderMocks - function to generate mocks (by their named or default exports)
- * @returns an object
+ * Unlike the previous implementation, this does NOT eagerly import the original
+ * module first. Eagerly importing modules that have side effects (like env
+ * validation or DB client creation) causes "cannot access X before initialization"
+ * temporal dead zone (TDZ) errors when circular dependencies exist, and triggers
+ * environment variable validation failures when env vars are missing.
+ *
+ * @param modulePath - the module path to mock (e.g. '@codebuff/internal/db')
+ * @param renderMocks - function returning the mock exports object
  */
 export async function mockModule(
   modulePath: string,
   renderMocks: () => Record<string, any>,
 ): Promise<MockResult> {
-  let original = originalModuleCache[modulePath]
-  if (!original) {
-    const moduleExports = await import(modulePath)
-    original = {
-      ...moduleExports,
-    }
-    originalModuleCache[modulePath] = original
-  }
   let mocks = renderMocks()
-  let result = {
-    ...original,
-    ...mocks,
-  }
-  mock.module(modulePath, () => result)
+  mock.module(modulePath, () => mocks)
   let num = 0
   let key = modulePath
   while (key in mockModuleCache) {
@@ -39,7 +32,6 @@ export async function mockModule(
   }
   const mocked: MockResult = {
     clear: () => {
-      mock.module(modulePath, () => original)
       delete mockModuleCache[key]
     },
   }
diff --git a/common/src/testing/mocks/filesystem.ts b/common/src/testing/mocks/filesystem.ts
index 6c9703622e..9e6dccd653 100644
--- a/common/src/testing/mocks/filesystem.ts
+++ b/common/src/testing/mocks/filesystem.ts
@@ -2,7 +2,12 @@ import { mock } from 'bun:test'
 
 import type { CodebuffFileSystem } from '../../types/filesystem'
 import type { Mock } from 'bun:test'
-import type { PathLike , Stats } from 'node:fs'
+import type { PathLike, Stats } from 'node:fs'
+
+/** Normalize filesystem paths for cross-platform mock consistency. */
+function normalizePath(p: string): string {
+  return p.replace(/\\/g, '/').replace(/^[a-zA-Z]:/, '')
+}
 
 export interface CreateMockFsOptions {
   files?: Record<string, string>
@@ -50,7 +55,7 @@ export function createMockFs(options: CreateMockFsOptions = {}): MockFs {
   const createdDirs: Set<string> = new Set(Object.keys(directories))
 
   const defaultReadFile = async (path: PathLike): Promise<string> => {
-    const pathStr = String(path)
+    const pathStr = normalizePath(String(path))
     if (pathStr in writtenFiles) {
       return writtenFiles[pathStr]
     }
@@ -58,7 +63,7 @@ export function createMockFs(options: CreateMockFsOptions = {}): MockFs {
   }
 
   const defaultReaddir = async (path: PathLike): Promise<string[]> => {
-    const pathStr = String(path)
+    const pathStr = normalizePath(String(path))
     if (pathStr in directories) {
       return directories[pathStr]
     }
@@ -69,18 +74,18 @@ export function createMockFs(options: CreateMockFsOptions = {}): MockFs {
     path: PathLike,
     data: string,
   ): Promise<void> => {
-    const pathStr = String(path)
+    const pathStr = normalizePath(String(path))
     writtenFiles[pathStr] = data
   }
 
   const defaultMkdir = async (path: PathLike): Promise<string | undefined> => {
-    const pathStr = String(path)
+    const pathStr = normalizePath(String(path))
     createdDirs.add(pathStr)
     return undefined
   }
 
   const defaultStat = async (path: PathLike): Promise<Stats> => {
-    const pathStr = String(path)
+    const pathStr = normalizePath(String(path))
     const isFile = pathStr in writtenFiles
     const isDir = pathStr in directories || createdDirs.has(pathStr)
 
@@ -118,24 +123,25 @@ export function createMockFs(options: CreateMockFsOptions = {}): MockFs {
   }
 
   const readFileFn = readFileImpl
-    ? async (path: PathLike) => readFileImpl(String(path))
+    ? async (path: PathLike) => readFileImpl(normalizePath(String(path)))
     : defaultReadFile
 
   const readdirFn = readdirImpl
-    ? async (path: PathLike) => readdirImpl(String(path))
+    ? async (path: PathLike) => readdirImpl(normalizePath(String(path)))
     : defaultReaddir
 
   const writeFileFn = writeFileImpl
-    ? async (path: PathLike, data: string) => writeFileImpl(String(path), data)
+    ? async (path: PathLike, data: string) =>
+        writeFileImpl(normalizePath(String(path)), data)
     : defaultWriteFile
 
   const mkdirFn = mkdirImpl
     ? async (path: PathLike, opts?: { recursive?: boolean }) =>
-        mkdirImpl(String(path), opts)
+        mkdirImpl(normalizePath(String(path)), opts)
     : defaultMkdir
 
   const statFn = statImpl
-    ? async (path: PathLike) => statImpl(String(path))
+    ? async (path: PathLike) => statImpl(normalizePath(String(path)))
     : defaultStat
 
   return {
diff --git a/packages/internal/src/db/index.ts b/packages/internal/src/db/index.ts
index b3cd973a78..14b4a0ece8 100644
--- a/packages/internal/src/db/index.ts
+++ b/packages/internal/src/db/index.ts
@@ -12,10 +12,3 @@ const client = postgres(env.DATABASE_URL)
 export const db: CodebuffPgDatabase = drizzle(client, { schema })
 export default db
 
-// Re-export advisory lock utilities
-export {
-  ADVISORY_LOCK_IDS,
-  coerceBool,
-  tryAcquireAdvisoryLock,
-} from './advisory-lock'
-export type { LockHandle, AdvisoryLockId } from './advisory-lock'
diff --git a/packages/internal/src/env.ts b/packages/internal/src/env.ts
index ca4bd25c34..02185ccd11 100644
--- a/packages/internal/src/env.ts
+++ b/packages/internal/src/env.ts
@@ -26,6 +26,15 @@ if (isCI) {
   ensureEnvDefault('IPINFO_TOKEN', 'test')
   ensureEnvDefault('SPUR_TOKEN', 'test')
   ensureEnvDefault('PORT', '4242')
+  // Client env vars (NEXT_PUBLIC_*) needed for env schema validation
+  ensureEnvDefault('NEXT_PUBLIC_CB_ENVIRONMENT', 'test')
+  ensureEnvDefault('NEXT_PUBLIC_CODEBUFF_APP_URL', 'https://example.com')
+  ensureEnvDefault('NEXT_PUBLIC_SUPPORT_EMAIL', 'test@example.com')
+  ensureEnvDefault('NEXT_PUBLIC_POSTHOG_API_KEY', 'test')
+  ensureEnvDefault('NEXT_PUBLIC_POSTHOG_HOST_URL', 'https://example.com')
+  ensureEnvDefault('NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY', 'pk_test_dummy')
+  ensureEnvDefault('NEXT_PUBLIC_STRIPE_CUSTOMER_PORTAL', 'https://example.com')
+  ensureEnvDefault('NEXT_PUBLIC_WEB_PORT', '4242')
   ensureEnvDefault('DATABASE_URL', 'postgres://user:pass@localhost:5432/db')
   ensureEnvDefault('CODEBUFF_GITHUB_ID', 'test-id')
   ensureEnvDefault('CODEBUFF_GITHUB_SECRET', 'test-secret')
diff --git a/sdk/src/__tests__/code-search.test.ts b/sdk/src/__tests__/code-search.test.ts
index 2cad255613..4bc35bc28e 100644
--- a/sdk/src/__tests__/code-search.test.ts
+++ b/sdk/src/__tests__/code-search.test.ts
@@ -1,3 +1,5 @@
+import path from 'path'
+
 import {
   clearMockedModules,
   mockModule,
@@ -848,7 +850,7 @@ describe('codeSearch', () => {
       expect(mockSpawn).toHaveBeenCalled()
       const spawnOptions = mockSpawn.mock.calls[0]![2] as { cwd: string }
       // When cwd is '.', it should resolve to the project root
-      expect(spawnOptions.cwd).toBe('/test/project')
+      expect(spawnOptions.cwd).toBe(path.resolve('/test/project'))
     })
 
     it('should handle cwd: "subdir" correctly', async () => {
@@ -872,7 +874,7 @@ describe('codeSearch', () => {
       // Verify spawn was called with correct cwd
       expect(mockSpawn).toHaveBeenCalled()
       const spawnOptions = mockSpawn.mock.calls[0]![2] as { cwd: string }
-      expect(spawnOptions.cwd).toBe('/test/project/subdir')
+      expect(spawnOptions.cwd).toBe(path.resolve('/test/project', 'subdir'))
     })
 
     it('should reject cwd outside project directory', async () => {
diff --git a/sdk/src/__tests__/initial-session-state.test.ts b/sdk/src/__tests__/initial-session-state.test.ts
index d8e8d2abb1..46b0a5e37d 100644
--- a/sdk/src/__tests__/initial-session-state.test.ts
+++ b/sdk/src/__tests__/initial-session-state.test.ts
@@ -11,6 +11,8 @@ import type { MockStatResult } from '@codebuff/common/testing/mock-types'
 import type { Logger } from '@codebuff/common/types/contracts/logger'
 import type { CodebuffFileSystem } from '@codebuff/common/types/filesystem'
 
+const normalizeMockPath = (p: string) => p.replace(/\\/g, '/')
+
 describe('Initial Session State', () => {
   let mockFs: CodebuffFileSystem
   let mockLogger: Logger
@@ -18,31 +20,33 @@ describe('Initial Session State', () => {
   beforeEach(() => {
     mockFs = {
       readFile: async (path: string) => {
-        if (path.includes('src/index.ts')) {
+        const p = normalizeMockPath(path)
+        if (p.includes('src/index.ts')) {
           return 'console.log("Hello world");'
         }
-        if (path.includes('src/utils.ts')) {
+        if (p.includes('src/utils.ts')) {
           return 'export function add(a: number, b: number) { return a + b; }'
         }
-        if (path.includes('knowledge.md')) {
+        if (p.includes('knowledge.md')) {
           return '# Knowledge\n\nThis is a knowledge file.'
         }
-        if (path.includes('README.md')) {
+        if (p.includes('README.md')) {
           return '# Project\n\nThis is a readme.'
         }
-        if (path.includes('.gitignore')) {
+        if (p.includes('.gitignore')) {
           return 'node_modules/\n.git/'
         }
-        if (path.includes('.codebuffignore')) {
+        if (p.includes('.codebuffignore')) {
           return ''
         }
-        if (path.includes('.manicodeignore')) {
+        if (p.includes('.manicodeignore')) {
           return ''
         }
         throw new Error(`File not found: ${path}`)
       },
       readdir: async (path: string) => {
-        if (path.includes('test-project')) {
+        const p = normalizeMockPath(path)
+        if (p.includes('test-project')) {
           return [
             { name: 'src', isDirectory: () => true, isFile: () => false },
             { name: '.git', isDirectory: () => true, isFile: () => false },
@@ -59,7 +63,7 @@ describe('Initial Session State', () => {
             },
           ]
         }
-        if (path.includes('src')) {
+        if (p.includes('src')) {
           return [
             { name: 'index.ts', isDirectory: () => false, isFile: () => true },
             { name: 'utils.ts', isDirectory: () => false, isFile: () => true },
@@ -67,18 +71,22 @@ describe('Initial Session State', () => {
         }
         return []
       },
-      stat: async (path: string): Promise<MockStatResult> => ({
-        isDirectory: () => path.includes('src') || path.includes('.git'),
-        isFile: () => !path.includes('src') && !path.includes('.git'),
-      }),
+      stat: async (path: string): Promise<MockStatResult> => {
+        const p = normalizeMockPath(path)
+        return {
+          isDirectory: () => p.includes('src') || p.includes('.git'),
+          isFile: () => !p.includes('src') && !p.includes('.git'),
+        }
+      },
       exists: async (path: string) => {
-        if (path.includes('.gitignore')) return true
-        if (path.includes('.codebuffignore')) return true
-        if (path.includes('.manicodeignore')) return true
-        if (path.includes('src')) return true
-        if (path.includes('.git')) return true
-        if (path.includes('knowledge.md')) return true
-        if (path.includes('README.md')) return true
+        const p = normalizeMockPath(path)
+        if (p.includes('.gitignore')) return true
+        if (p.includes('.codebuffignore')) return true
+        if (p.includes('.manicodeignore')) return true
+        if (p.includes('src')) return true
+        if (p.includes('.git')) return true
+        if (p.includes('knowledge.md')) return true
+        if (p.includes('README.md')) return true
         return false
       },
       mkdir: async () => {},
@@ -117,10 +125,11 @@ describe('Initial Session State', () => {
 
   test('discovers project files automatically when projectFiles is undefined', async () => {
     mockFs.readdir = (async (dirPath: string) => {
-      if (dirPath === '/test-project') {
+      const p = normalizeMockPath(dirPath)
+      if (p === '/test-project') {
         return ['src', '.git', 'knowledge.md', 'README.md', '.gitignore']
       }
-      if (dirPath === '/test-project/src') {
+      if (p === '/test-project/src') {
         return ['index.ts', 'utils.ts', 'generated.ts']
       }
       return []
@@ -128,16 +137,20 @@ describe('Initial Session State', () => {
     mockFs.stat = (async (filePath: string) =>
       ({
         isDirectory: () =>
-          filePath === '/test-project/src' || filePath === '/test-project/.git',
+          normalizeMockPath(filePath) === '/test-project/src' ||
+          normalizeMockPath(filePath) === '/test-project/.git',
         isFile: () =>
-          filePath !== '/test-project/src' && filePath !== '/test-project/.git',
-        size: filePath.endsWith('generated.ts') ? 1_000_001 : 100,
+          normalizeMockPath(filePath) !== '/test-project/src' &&
+          normalizeMockPath(filePath) !== '/test-project/.git',
+        size: normalizeMockPath(filePath).endsWith('generated.ts')
+          ? 1_000_001
+          : 100,
       }) as MockStatResult & { size: number }) as CodebuffFileSystem['stat']
 
     const readFilePaths: string[] = []
     const originalReadFile = mockFs.readFile
     mockFs.readFile = (async (filePath: string, encoding?: BufferEncoding) => {
-      readFilePaths.push(filePath)
+      readFilePaths.push(normalizeMockPath(filePath))
       return originalReadFile(filePath, encoding)
     }) as CodebuffFileSystem['readFile']
 
diff --git a/sdk/src/__tests__/path-utils.test.ts b/sdk/src/__tests__/path-utils.test.ts
index 4910dbcaf1..ec08694744 100644
--- a/sdk/src/__tests__/path-utils.test.ts
+++ b/sdk/src/__tests__/path-utils.test.ts
@@ -1,3 +1,4 @@
+import path from 'path'
 import { describe, expect, test } from 'bun:test'
 
 import {
@@ -8,21 +9,21 @@ import {
 describe('resolveFilePathWithinProject', () => {
   test('normalizes relative paths to full and project-relative paths', () => {
     expect(resolveFilePathWithinProject('/repo', 'src/file.ts')).toEqual({
-      fullPath: '/repo/src/file.ts',
+      fullPath: path.resolve('/repo', 'src/file.ts').replace(/\\/g, '/'),
       relativePath: 'src/file.ts',
     })
   })
 
   test('normalizes absolute paths inside the project', () => {
     expect(resolveFilePathWithinProject('/repo', '/repo/src/file.ts')).toEqual({
-      fullPath: '/repo/src/file.ts',
+      fullPath: path.resolve('/repo', 'src/file.ts').replace(/\\/g, '/'),
       relativePath: 'src/file.ts',
     })
   })
 
   test('allows file names that start with two dots inside the project', () => {
     expect(resolveFilePathWithinProject('/repo', '/repo/..config')).toEqual({
-      fullPath: '/repo/..config',
+      fullPath: path.resolve('/repo', '..config').replace(/\\/g, '/'),
       relativePath: '..config',
     })
   })
diff --git a/sdk/src/__tests__/read-files.test.ts b/sdk/src/__tests__/read-files.test.ts
index afcafb7aca..0e88a82fea 100644
--- a/sdk/src/__tests__/read-files.test.ts
+++ b/sdk/src/__tests__/read-files.test.ts
@@ -17,6 +17,11 @@ import type { CodebuffFileSystem } from '@codebuff/common/types/filesystem'
 import type { PathLike } from 'node:fs'
 
 // Helper to create a mock filesystem
+function normalizeMockPath(p: string): string {
+  // Normalize Windows backslashes to forward slashes and strip drive letters
+  return p.replace(/\\/g, '/').replace(/^[a-zA-Z]:\//, '/')
+}
+
 function createMockFs(config: {
   files?: Record<string, { content: string; size?: number }>
   errors?: Record<string, { code?: string; message?: string }>
@@ -25,7 +30,7 @@ function createMockFs(config: {
 
   return {
     readFile: async (filePath: PathLike) => {
-      const pathStr = String(filePath)
+      const pathStr = normalizeMockPath(String(filePath))
       if (errors[pathStr]) {
         throw createNodeError(
           errors[pathStr].message || 'Unknown error',
@@ -41,7 +46,7 @@ function createMockFs(config: {
       )
     },
     stat: async (filePath: PathLike) => {
-      const pathStr = String(filePath)
+      const pathStr = normalizeMockPath(String(filePath))
       if (errors[pathStr]) {
         throw createNodeError(
           errors[pathStr].message || 'Unknown error',
diff --git a/sdk/src/__tests__/run-file-filter.test.ts b/sdk/src/__tests__/run-file-filter.test.ts
index 5d1be280a2..8eee93da8d 100644
--- a/sdk/src/__tests__/run-file-filter.test.ts
+++ b/sdk/src/__tests__/run-file-filter.test.ts
@@ -27,9 +27,11 @@ function createMockFs(config: {
 }): CodebuffFileSystem {
   const { files = {} } = config
 
+  const normalizePath = (p: string) => p.replace(/\\/g, '/').replace(/^[a-zA-Z]:/, '')
+
   return {
     readFile: async (filePath: PathLike) => {
-      const pathStr = String(filePath)
+      const pathStr = normalizePath(String(filePath))
       if (files[pathStr]) {
         return files[pathStr].content
       }
@@ -39,7 +41,7 @@ function createMockFs(config: {
       )
     },
     stat: async (filePath: PathLike) => {
-      const pathStr = String(filePath)
+      const pathStr = normalizePath(String(filePath))
       if (files[pathStr]) {
         return {
           size: files[pathStr].size ?? files[pathStr].content.length,
diff --git a/sdk/src/tools/path-utils.ts b/sdk/src/tools/path-utils.ts
index 92fe8a1325..910af20c2e 100644
--- a/sdk/src/tools/path-utils.ts
+++ b/sdk/src/tools/path-utils.ts
@@ -27,7 +27,10 @@ export function resolveFilePathWithinProject(
     return null
   }
 
-  return { fullPath, relativePath }
+  return {
+    fullPath: fullPath.replace(/\\/g, '/'),
+    relativePath: relativePath.replace(/\\/g, '/'),
+  }
 }
 
 export function getProjectPathLookupKeys(
diff --git a/web/scripts/discord/index.ts b/web/scripts/discord/index.ts
index adba5baf03..e93e206912 100644
--- a/web/scripts/discord/index.ts
+++ b/web/scripts/discord/index.ts
@@ -3,11 +3,11 @@ import os from 'os'
 import {
   ADVISORY_LOCK_IDS,
   tryAcquireAdvisoryLock,
-} from '@codebuff/internal/db'
+} from '@codebuff/internal/db/advisory-lock'
 
 import { startDiscordBot } from '../../src/discord/client'
 
-import type { LockHandle } from '@codebuff/internal/db'
+import type { LockHandle } from '@codebuff/internal/db/advisory-lock'
 import type { Client } from 'discord.js'
 
 const LOCK_RETRY_INTERVAL_MS = 30_000 // 30 seconds