Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 70 additions & 2 deletions web/src/app/api/v1/chat/completions/_post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ import {
handleDeepSeekStream,
isDeepSeekModel,
} from '@/llm-api/deepseek'
import {
isLikelyDeepSeekOutage,
shouldBypassDeepSeek,
} from '@/llm-api/deepseek-health'
import {
handleMoonshotNonStream,
handleMoonshotStream,
Expand Down Expand Up @@ -118,6 +122,23 @@ import { withDefaultProperties } from '@codebuff/common/analytics'
import { checkFreeModeRateLimit as defaultCheckFreeModeRateLimit } from './free-mode-rate-limiter'
import { beginChatCompletionRequestMetrics } from './request-metrics'

/**
* Decide whether a failed DeepSeek request should transparently fail over to
* Fireworks. Pre-stream errors (network/timeout/5xx) on a model that has a
* known Fireworks fallback are eligible. The circuit-breaker failure was
* already recorded inside the DeepSeek handler.
*/
function canFailoverDeepSeekToFireworks(
error: unknown,
model: string,
): boolean {
if (!isFireworksModel(model)) return false
if (error instanceof DeepSeekError) {
return isLikelyDeepSeekOutage(undefined, error.statusCode)
}
return isLikelyDeepSeekOutage(error)
}

export const formatQuotaResetCountdown = (
nextQuotaReset: string | null | undefined,
): string => {
Expand Down Expand Up @@ -814,10 +835,18 @@ export async function postChatCompletions(params: {
const useMoonshot = !useOpenCodeZen && isMoonshotModel(typedBody.model)
const useCanopyWave =
!useMoonshot && !useOpenCodeZen && isCanopyWaveModel(typedBody.model)
const deepseekBypassed = shouldBypassDeepSeek(typedBody.model)
if (deepseekBypassed) {
providerLogger.info(
{ model: typedBody.model },
'DeepSeek circuit open — routing to Fireworks fallback',
)
}
const useDeepSeek =
!useMoonshot &&
!useOpenCodeZen &&
!useCanopyWave &&
!deepseekBypassed &&
isDeepSeekModel(typedBody.model)
const useFireworks =
!useMoonshot &&
Expand All @@ -841,6 +870,23 @@ export async function postChatCompletions(params: {
logger: providerLogger,
insertMessageBigquery,
}
const callDeepSeekStream = async () => {
try {
return await handleDeepSeekStream(baseArgs)
} catch (error) {
if (canFailoverDeepSeekToFireworks(error, typedBody.model)) {
providerLogger.warn(
{
model: typedBody.model,
error: getErrorObject(error),
},
'DeepSeek failed pre-stream — falling back to Fireworks',
)
return await handleFireworksStream(baseArgs)
}
throw error
}
}
const stream = useSiliconFlow
? await handleSiliconFlowStream(baseArgs)
: useMoonshot
Expand All @@ -850,7 +896,7 @@ export async function postChatCompletions(params: {
: useCanopyWave
? await handleCanopyWaveStream(baseArgs)
: useDeepSeek
? await handleDeepSeekStream(baseArgs)
? await callDeepSeekStream()
: useFireworks
? await handleFireworksStream(baseArgs)
: useOpenAIDirect
Expand Down Expand Up @@ -886,10 +932,18 @@ export async function postChatCompletions(params: {
const useMoonshot = !useOpenCodeZen && isMoonshotModel(model)
const useCanopyWave =
!useMoonshot && !useOpenCodeZen && isCanopyWaveModel(model)
const deepseekBypassed = shouldBypassDeepSeek(model)
if (deepseekBypassed) {
providerLogger.info(
{ model },
'DeepSeek circuit open — routing to Fireworks fallback',
)
}
const useDeepSeek =
!useMoonshot &&
!useOpenCodeZen &&
!useCanopyWave &&
!deepseekBypassed &&
isDeepSeekModel(model)
const useFireworks =
!useMoonshot &&
Expand All @@ -914,6 +968,20 @@ export async function postChatCompletions(params: {
logger: providerLogger,
insertMessageBigquery,
}
const callDeepSeekNonStream = async () => {
try {
return await handleDeepSeekNonStream(baseArgs)
} catch (error) {
if (canFailoverDeepSeekToFireworks(error, model)) {
providerLogger.warn(
{ model, error: getErrorObject(error) },
'DeepSeek failed — falling back to Fireworks',
)
return await handleFireworksNonStream(baseArgs)
}
throw error
}
}
const nonStreamRequest = useSiliconFlow
? handleSiliconFlowNonStream(baseArgs)
: useMoonshot
Expand All @@ -923,7 +991,7 @@ export async function postChatCompletions(params: {
: useCanopyWave
? handleCanopyWaveNonStream(baseArgs)
: useDeepSeek
? handleDeepSeekNonStream(baseArgs)
? callDeepSeekNonStream()
: useFireworks
? handleFireworksNonStream(baseArgs)
: shouldUseOpenAIEndpoint
Expand Down
113 changes: 113 additions & 0 deletions web/src/llm-api/__tests__/deepseek-health.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import { afterEach, beforeEach, describe, expect, it } from 'bun:test'

import { deepseekModels } from '@codebuff/common/constants/model-config'

import {
DEEPSEEK_CIRCUIT_CONFIG,
__resetDeepSeekCircuitForTests,
isDeepSeekCircuitOpen,
isLikelyDeepSeekOutage,
recordDeepSeekFailure,
recordDeepSeekSuccess,
shouldBypassDeepSeek,
} from '../deepseek-health'

describe('DeepSeek circuit breaker', () => {
beforeEach(() => {
__resetDeepSeekCircuitForTests()
})
afterEach(() => {
__resetDeepSeekCircuitForTests()
})

it('starts closed', () => {
expect(isDeepSeekCircuitOpen()).toBe(false)
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4Flash)).toBe(false)
})

it('stays closed after fewer failures than threshold', () => {
for (let i = 0; i < DEEPSEEK_CIRCUIT_CONFIG.FAILURE_THRESHOLD - 1; i++) {
recordDeepSeekFailure()
}
expect(isDeepSeekCircuitOpen()).toBe(false)
})

it('opens after threshold failures in the window', () => {
for (let i = 0; i < DEEPSEEK_CIRCUIT_CONFIG.FAILURE_THRESHOLD; i++) {
recordDeepSeekFailure()
}
expect(isDeepSeekCircuitOpen()).toBe(true)
})

it('only bypasses v4-flash variants, not v4-pro', () => {
for (let i = 0; i < DEEPSEEK_CIRCUIT_CONFIG.FAILURE_THRESHOLD; i++) {
recordDeepSeekFailure()
}
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4Flash)).toBe(true)
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4FlashDirect)).toBe(
true,
)
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4Pro)).toBe(false)
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4ProDirect)).toBe(false)
expect(shouldBypassDeepSeek('anthropic/claude-sonnet-4.5')).toBe(false)
})

it('resets on success', () => {
for (let i = 0; i < DEEPSEEK_CIRCUIT_CONFIG.FAILURE_THRESHOLD; i++) {
recordDeepSeekFailure()
}
expect(isDeepSeekCircuitOpen()).toBe(true)
recordDeepSeekSuccess()
expect(isDeepSeekCircuitOpen()).toBe(false)
expect(shouldBypassDeepSeek(deepseekModels.deepseekV4Flash)).toBe(false)
})
})

describe('isLikelyDeepSeekOutage', () => {
it('treats 5xx, 408, 429 as outages', () => {
expect(isLikelyDeepSeekOutage(undefined, 500)).toBe(true)
expect(isLikelyDeepSeekOutage(undefined, 502)).toBe(true)
expect(isLikelyDeepSeekOutage(undefined, 503)).toBe(true)
expect(isLikelyDeepSeekOutage(undefined, 504)).toBe(true)
expect(isLikelyDeepSeekOutage(undefined, 408)).toBe(true)
expect(isLikelyDeepSeekOutage(undefined, 429)).toBe(true)
})

it('does not treat 4xx (other than 408/429) as outages', () => {
expect(isLikelyDeepSeekOutage(undefined, 400)).toBe(false)
expect(isLikelyDeepSeekOutage(undefined, 401)).toBe(false)
expect(isLikelyDeepSeekOutage(undefined, 403)).toBe(false)
expect(isLikelyDeepSeekOutage(undefined, 404)).toBe(false)
})

it('classifies undici header-timeout errors as outages', () => {
const error = Object.assign(new Error('Headers Timeout Error'), {
code: 'UND_ERR_HEADERS_TIMEOUT',
})
expect(isLikelyDeepSeekOutage(error)).toBe(true)
})

it('classifies common network errors as outages', () => {
for (const code of [
'ECONNRESET',
'ECONNREFUSED',
'ENOTFOUND',
'ETIMEDOUT',
]) {
const error = Object.assign(new Error('boom'), { code })
expect(isLikelyDeepSeekOutage(error)).toBe(true)
}
})

it('classifies AbortError as outage', () => {
const error = new Error('aborted')
error.name = 'AbortError'
expect(isLikelyDeepSeekOutage(error)).toBe(true)
})

it('treats generic non-network errors as non-outage', () => {
expect(isLikelyDeepSeekOutage(new Error('bad json'))).toBe(false)
expect(isLikelyDeepSeekOutage(undefined)).toBe(false)
expect(isLikelyDeepSeekOutage('string')).toBe(false)
})
})
Loading
Loading