diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index d66fba5aaf..fd6bfd57c8 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -514,9 +514,8 @@ export function useFreebuffSession(): UseFreebuffSessionResult { return } if (next.status === 'model_unavailable') { - // Server says the requested model isn't available right now (e.g. - // legacy GLM 5.1 outside deployment hours). Flip to the - // always-available fallback for this run. In-memory only — + // Server says the requested model isn't available right now. Flip + // to the always-available fallback for this run. In-memory only — // `setSelectedModel` doesn't persist, so the user's saved preference // is preserved for their next launch. useFreebuffModelStore @@ -637,15 +636,15 @@ export function useFreebuffSession(): UseFreebuffSessionResult { if (response.status === 'none' || response.status === 'queued') { apply({ status: 'none', - accessTier: - response.accessTier ?? landingSession.accessTier, + accessTier: response.accessTier ?? landingSession.accessTier, queueDepthByModel: response.queueDepthByModel ?? landingSession.queueDepthByModel, rateLimitsByModel: response.rateLimitsByModel ?? landingSession.rateLimitsByModel, - countryCode: response.countryCode ?? landingSession.countryCode, + countryCode: + response.countryCode ?? landingSession.countryCode, countryBlockReason: response.countryBlockReason ?? landingSession.countryBlockReason, diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts index ee39ed975b..ca0a020419 100644 --- a/common/src/__tests__/freebuff-models.test.ts +++ b/common/src/__tests__/freebuff-models.test.ts @@ -5,7 +5,6 @@ import { DEFAULT_FREEBUFF_MODEL_ID, FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, LIMITED_FREEBUFF_MODEL_ID, FREEBUFF_MINIMAX_MODEL_ID, @@ -84,15 +83,14 @@ describe('freebuff model availability', () => { ).toBe(false) }) - test('supports GLM 5.1 as a legacy server-side model without selecting it for new clients', () => { - expect(FREEBUFF_MODELS.map((model) => model.id)).not.toContain( - FREEBUFF_GLM_MODEL_ID, + test('does not support GLM 5.1 for freebuff sessions', () => { + const glm = 'z-ai/glm-5.1' + expect(FREEBUFF_MODELS.map((model) => model.id)).not.toContain(glm) + expect(SUPPORTED_FREEBUFF_MODELS.map((model) => model.id)).not.toContain( + glm, ) - expect(SUPPORTED_FREEBUFF_MODELS.map((model) => model.id)).toContain( - FREEBUFF_GLM_MODEL_ID, - ) - expect(isFreebuffModelId(FREEBUFF_GLM_MODEL_ID)).toBe(false) - expect(isSupportedFreebuffModelId(FREEBUFF_GLM_MODEL_ID)).toBe(true) + expect(isFreebuffModelId(glm)).toBe(false) + expect(isSupportedFreebuffModelId(glm)).toBe(false) }) test('formats the close time in the user local timezone while deployment is open', () => { diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index 2d1a55c7ff..9b8c8bb055 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -5,7 +5,6 @@ import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GEMINI_PRO_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_MINIMAX_MODEL_ID, SUPPORTED_FREEBUFF_MODELS, @@ -68,7 +67,6 @@ export const FREE_MODE_AGENT_MODELS: Record> = { // Root orchestrator 'base2-free': new Set([ FREEBUFF_MINIMAX_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, @@ -94,10 +92,7 @@ export const FREE_MODE_AGENT_MODELS: Record> = { 'tmux-cli': new Set([FREEBUFF_MINIMAX_MODEL_ID]), // Code reviewer for free mode - 'code-reviewer-minimax': new Set([ - FREEBUFF_MINIMAX_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, - ]), + 'code-reviewer-minimax': new Set([FREEBUFF_MINIMAX_MODEL_ID]), 'code-reviewer-kimi': new Set([FREEBUFF_KIMI_MODEL_ID]), 'code-reviewer-deepseek': new Set([FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]), 'code-reviewer-deepseek-flash': new Set([ diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index 715b258b50..95f79644a9 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -35,7 +35,6 @@ export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day' export const FREEBUFF_GEMINI_PRO_MODEL_ID = 'google/gemini-3.1-pro-preview' export const FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID = 'deepseek/deepseek-v4-pro' export const FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID = 'deepseek/deepseek-v4-flash' -export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1' export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6' export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7' export const FREEBUFF_PREMIUM_SESSION_LIMIT = 5 @@ -102,29 +101,15 @@ export const FREEBUFF_MODELS = [ }, ] as const satisfies readonly FreebuffModelOption[] -export const LEGACY_FREEBUFF_MODELS = [ - { - id: FREEBUFF_GLM_MODEL_ID, - displayName: 'GLM 5.1', - tagline: 'Legacy', - availability: 'deployment_hours', - }, -] as const satisfies readonly FreebuffModelOption[] - export const FREEBUFF_PREMIUM_MODEL_IDS = [ FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, ] as const -export const SUPPORTED_FREEBUFF_MODELS = [ - ...FREEBUFF_MODELS, - ...LEGACY_FREEBUFF_MODELS, -] as const satisfies readonly FreebuffModelOption[] +export const SUPPORTED_FREEBUFF_MODELS = FREEBUFF_MODELS export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id'] -export type SupportedFreebuffModelId = - (typeof SUPPORTED_FREEBUFF_MODELS)[number]['id'] +export type SupportedFreebuffModelId = FreebuffModelId export type FreebuffPremiumModelId = (typeof FREEBUFF_PREMIUM_MODEL_IDS)[number] /** What new freebuff users see selected in the picker. MiniMax is the diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md index 76af547f3d..bc9cfc9881 100644 --- a/docs/freebuff-waiting-room.md +++ b/docs/freebuff-waiting-room.md @@ -5,7 +5,7 @@ The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs: 1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones. -2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; GLM 5.1 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available. +2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; models without a dedicated deployment are treated as serverless and always available. 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput. Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session. @@ -153,18 +153,18 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r ### Tunables -| Constant | Location | Default | Purpose | -| ---------------------------- | ----------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. | -| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `deepseek-v4-pro`, `kimi-k2.6`, `minimax-m2.7`, `deepseek-v4-flash` | Selectable models; each gets its own queue and admission slot. | -| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | `glm-5.1` | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. | -| `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. | -| `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | -| `SESSION_GRACE_MS` | `web/src/server/free-session/config.ts` | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | +| Constant | Location | Default | Purpose | +| ---------------------------- | ----------------------------------------- | ------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `ADMISSION_TICK_MS` | `config.ts` | 15000 | How often the ticker fires. Up to one user is admitted per model per tick. | +| `FREEBUFF_MODELS` | `common/src/constants/freebuff-models.ts` | `deepseek-v4-pro`, `kimi-k2.6`, `minimax-m2.7`, `deepseek-v4-flash` | Selectable models; each gets its own queue and admission slot. | +| `FIREWORKS_DEPLOYMENT_MAP` | `web/src/llm-api/fireworks-config.ts` | none for current freebuff models | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback). | +| `HEALTH_CACHE_TTL_MS` | `fireworks-health.ts` | 25000 | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit. | +| `FREEBUFF_SESSION_LENGTH_MS` | env | 3_600_000 | Session lifetime | +| `SESSION_GRACE_MS` | `web/src/server/free-session/config.ts` | 1_800_000 | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. | ### Premium Session Quota -DeepSeek V4 Pro, Kimi, and legacy GLM share a per-user premium quota. The server counts `free_session_admit` rows from the last midnight in `America/Los_Angeles`; when the user reaches `FREEBUFF_PREMIUM_SESSION_LIMIT`, the next premium `POST /session` is rejected until the next Pacific midnight reset. MiniMax and DeepSeek V4 Flash remain unlimited. +DeepSeek V4 Pro and Kimi share a per-user premium quota. The server counts `free_session_admit` rows from the last midnight in `America/Los_Angeles`; when the user reaches `FREEBUFF_PREMIUM_SESSION_LIMIT`, the next premium `POST /session` is rejected until the next Pacific midnight reset. MiniMax and DeepSeek V4 Flash remain unlimited. ## HTTP API @@ -198,7 +198,7 @@ Response shapes: "queueDepth": 43, // size of this model's queue "queueDepthByModel": { // snapshot of every model's queue — powers the "minimax/minimax-m2.7": 43, // "N ahead" hint in the selector. Missing - "z-ai/glm-5.1": 4 // entries should be treated as 0. + "deepseek/deepseek-v4-pro": 4 // entries should be treated as 0. }, "estimatedWaitMs": 384000, "queuedAt": "2026-04-17T12:00:00Z" @@ -298,7 +298,7 @@ waitMs = (position - 1) * 24_000 - Position 1 → 0 (next tick admits you) - Position 2 → 24s, and so on. -`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a GLM Fireworks incident or outside 9am ET-5pm PT, only GLM's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter. +`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `deepseek/deepseek-v4-pro` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses, so the real wait can be longer or shorter. ## CLI Integration (frontend-side contract) @@ -337,7 +337,7 @@ The `disabled` response means the server has the waiting room turned off. CLI tr | Spamming POST/GET to starve admission tick | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. | | Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time. | | Fireworks metrics endpoint down / slow | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses. | -| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded GLM deployment doesn't block MiniMax admissions. | +| One deployment degraded while others are fine | Health is classified per-deployment; only the affected model's queue pauses, so a degraded dedicated deployment doesn't block serverless model admissions. | | Zombie expired sessions holding capacity | Swept on every admission tick, even when upstream is unhealthy | ## Testing diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index 5704535f89..566516441a 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -7,8 +7,6 @@ import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GEMINI_PRO_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, - isFreebuffDeploymentHours, } from '@codebuff/common/constants/freebuff-models' import { openCodeZenModels } from '@codebuff/common/constants/model-config' import { postChatCompletions } from '../_post' @@ -963,7 +961,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { }) it( - 'lets old freebuff clients keep using GLM 5.1 through Fireworks availability rules', + 'rejects removed GLM 5.1 for free mode before provider calls', async () => { const fetchedBodies: Record[] = [] const fetchViaFireworks = mock( @@ -994,7 +992,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { method: 'POST', headers: allowedFreeModeHeaders('test-api-key-new-free'), body: JSON.stringify({ - model: FREEBUFF_GLM_MODEL_ID, + model: 'z-ai/glm-5.1', stream: false, codebuff_metadata: { run_id: 'run-free', @@ -1019,19 +1017,9 @@ describe('/api/v1/chat/completions POST endpoint', () => { }) const body = await response.json() - if (isFreebuffDeploymentHours()) { - expect(response.status).toBe(200) - expect(fetchedBodies).toHaveLength(1) - expect(fetchedBodies[0].model).toBe( - 'accounts/fireworks/models/glm-5p1', - ) - expect(body.model).toBe(FREEBUFF_GLM_MODEL_ID) - expect(body.provider).toBe('Fireworks') - } else { - expect(response.status).toBe(503) - expect(fetchedBodies).toHaveLength(0) - expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS') - } + expect(response.status).toBe(403) + expect(fetchedBodies).toHaveLength(0) + expect(body.error).toBe('free_mode_invalid_agent_model') }, FETCH_PATH_TEST_TIMEOUT_MS, ) diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index 54dc6c90de..46ad2763c1 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -380,17 +380,17 @@ describe('POST /api/v1/freebuff/session', () => { expect(body.ipPrivacySignals).toBeUndefined() }) - test('returns model_unavailable for legacy GLM 5.1 outside deployment hours', async () => { + test('falls back for removed GLM 5.1 requests', async () => { const sessionDeps = makeSessionDeps() const resp = await postFreebuffSession( makeReq('ok', { model: 'z-ai/glm-5.1' }), makeDeps(sessionDeps, 'u1'), ) - expect(resp.status).toBe(409) + expect(resp.status).toBe(200) const body = await resp.json() - expect(body.status).toBe('model_unavailable') - expect(body.availableHours).toBe('9am ET-5pm PT every day') - expect(sessionDeps.rows.size).toBe(0) + expect(body.status).toBe('queued') + expect(body.model).toBe('minimax/minimax-m2.7') + expect(sessionDeps.rows.get('u1')?.model).toBe('minimax/minimax-m2.7') }) // Banned bots with valid API keys were POSTing every few seconds and diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts index f55ab3b796..2ad5c0d0c3 100644 --- a/web/src/server/free-session/__tests__/admission.test.ts +++ b/web/src/server/free-session/__tests__/admission.test.ts @@ -1,7 +1,5 @@ import { describe, expect, test } from 'bun:test' -import { FREEBUFF_GLM_MODEL_ID } from '@codebuff/common/constants/freebuff-models' - import { runAdmissionTick } from '../admission' import type { AdmissionDeps } from '../admission' @@ -113,17 +111,6 @@ describe('runAdmissionTick', () => { expect(result.skipped).toBeNull() }) - test('legacy GLM 5.1 is admitted during deployment hours', async () => { - const deps = makeAdmissionDeps({ - models: [FREEBUFF_GLM_MODEL_ID], - now: () => new Date('2026-04-17T16:00:00Z'), - getFleetHealth: async () => ({ [FREEBUFF_GLM_MODEL_ID]: 'healthy' }), - }) - const result = await runAdmissionTick(deps) - expect(result.admitted).toBe(1) - expect(result.skipped).toBeNull() - }) - test('propagates expiry count and admit count together', async () => { const deps = makeAdmissionDeps({ sweepExpired: async () => 2, diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index b85c682cb3..9503241269 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -4,7 +4,6 @@ import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, FREEBUFF_GEMINI_PRO_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_LIMITED_SESSION_LIMIT, FREEBUFF_PREMIUM_SESSION_LIMIT, @@ -25,6 +24,7 @@ import type { InternalSessionRow } from '../types' const SESSION_LEN = 60 * 60 * 1000 const GRACE_MS = 30 * 60 * 1000 const DEFAULT_MODEL = 'minimax/minimax-m2.7' +const REMOVED_GLM_MODEL = 'z-ai/glm-5.1' const DEFAULT_PREMIUM_RESET_AT = '2026-04-18T07:00:00.000Z' function expectedRateLimit(model: string, recentCount: number) { @@ -264,42 +264,25 @@ describe('requestSession', () => { expect(state.instanceId).toBe('inst-1') }) - test('deployment-hours-only model is unavailable outside deployment hours', async () => { - // Legacy GLM 5.1 is the only freebuff model still gated to deployment - // hours — Kimi and DeepSeek both run 24/7 from the picker. + test('removed GLM 5.1 request falls back to the default model', async () => { const state = await requestSession({ userId: 'u1', - model: FREEBUFF_GLM_MODEL_ID, - deps, - }) - expect(state).toEqual({ - status: 'model_unavailable', - requestedModel: FREEBUFF_GLM_MODEL_ID, - availableHours: '9am ET-5pm PT every day', - }) - expect(deps.rows.size).toBe(0) - }) - - test('legacy GLM 5.1 model is still accepted for old clients during deployment hours', async () => { - deps._tick(new Date('2026-04-17T16:00:00Z')) - const state = await requestSession({ - userId: 'u1', - model: FREEBUFF_GLM_MODEL_ID, + model: REMOVED_GLM_MODEL, deps, }) expect(state.status).toBe('queued') if (state.status !== 'queued') throw new Error('unreachable') - expect(deps.rows.get('u1')?.model).toBe(FREEBUFF_GLM_MODEL_ID) - expect(state.rateLimit).toEqual(expectedRateLimit(FREEBUFF_GLM_MODEL_ID, 0)) + expect(state.model).toBe(DEFAULT_MODEL) + expect(deps.rows.get('u1')?.model).toBe(DEFAULT_MODEL) }) - test('legacy GLM 5.1 active session can be reclaimed outside deployment hours', async () => { + test('removed GLM 5.1 active session cannot be reclaimed', async () => { const admittedAt = new Date(deps._now().getTime() - 10 * 60 * 1000) deps.rows.set('u1', { user_id: 'u1', status: 'active', active_instance_id: 'inst-pre', - model: FREEBUFF_GLM_MODEL_ID, + model: REMOVED_GLM_MODEL, queued_at: admittedAt, admitted_at: admittedAt, expires_at: new Date(deps._now().getTime() + SESSION_LEN), @@ -309,13 +292,13 @@ describe('requestSession', () => { const state = await requestSession({ userId: 'u1', - model: FREEBUFF_GLM_MODEL_ID, + model: REMOVED_GLM_MODEL, deps, }) - expect(state.status).toBe('active') - if (state.status !== 'active') throw new Error('unreachable') - expect(state.instanceId).not.toBe('inst-pre') - expect(state.rateLimit).toEqual(expectedRateLimit(FREEBUFF_GLM_MODEL_ID, 0)) + expect(state.status).toBe('queued') + if (state.status !== 'queued') throw new Error('unreachable') + expect(state.model).toBe(DEFAULT_MODEL) + expect(deps.rows.get('u1')?.model).toBe(DEFAULT_MODEL) }) test('queued response includes a per-model depth snapshot for the selector', async () => { @@ -548,27 +531,25 @@ describe('requestSession', () => { expect(deps.rows.has('u1')).toBe(false) }) - test('rate_limited: legacy GLM 5.1 uses the shared premium quota', async () => { + test('rate_limited: removed GLM 5.1 request does not use the shared premium quota', async () => { deps._tick(PREMIUM_OPEN_TIME) const now = deps._now() for (let i = 0; i < PREMIUM_LIMIT; i++) { deps.admits.push({ user_id: 'u1', - model: FREEBUFF_GLM_MODEL_ID, + model: PREMIUM_MODEL, admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000), }) } const state = await requestSession({ userId: 'u1', - model: FREEBUFF_GLM_MODEL_ID, + model: REMOVED_GLM_MODEL, deps, }) - expect(state.status).toBe('rate_limited') - if (state.status !== 'rate_limited') throw new Error('unreachable') - expect(state.model).toBe(FREEBUFF_GLM_MODEL_ID) - expect(state.limit).toBe(PREMIUM_LIMIT) - expect(state.windowHours).toBe(PREMIUM_WINDOW_HOURS) + expect(state.status).toBe('queued') + if (state.status !== 'queued') throw new Error('unreachable') + expect(state.model).toBe(DEFAULT_MODEL) }) test("rate_limited: admits before today's Pacific reset do not count", async () => { @@ -1311,6 +1292,24 @@ describe('checkSessionAdmissible', () => { expect(result.remainingMs).toBe(SESSION_LEN) }) + test('active removed GLM 5.1 session is not admissible', async () => { + await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) + const row = deps.rows.get('u1')! + row.model = REMOVED_GLM_MODEL + row.status = 'active' + row.admitted_at = deps._now() + row.expires_at = new Date(deps._now().getTime() + SESSION_LEN) + + const result = await checkSessionAdmissible({ + userId: 'u1', + claimedInstanceId: row.active_instance_id, + requestedModel: REMOVED_GLM_MODEL, + deps, + }) + if (result.ok) throw new Error('unreachable') + expect(result.code).toBe('session_model_mismatch') + }) + test('active Kimi session admits Gemini thinker requests', async () => { await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) const row = deps.rows.get('u1')! diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index da51cee0e7..97a6caf287 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -1,7 +1,6 @@ import { FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID, FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, FREEBUFF_KIMI_MODEL_ID, FREEBUFF_MINIMAX_MODEL_ID, } from '@codebuff/common/constants/freebuff-models' @@ -58,7 +57,6 @@ export function getSessionGraceMs(): number { const INSTANT_ADMIT_CAPACITY: Record = { [FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID]: 1000, [FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]: 1000, - [FREEBUFF_GLM_MODEL_ID]: 50, [FREEBUFF_KIMI_MODEL_ID]: 1000, [FREEBUFF_MINIMAX_MODEL_ID]: 1000, }