diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts
index d66fba5aaf..fd6bfd57c8 100644
--- a/cli/src/hooks/use-freebuff-session.ts
+++ b/cli/src/hooks/use-freebuff-session.ts
@@ -514,9 +514,8 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
           return
         }
         if (next.status === 'model_unavailable') {
-          // Server says the requested model isn't available right now (e.g.
-          // legacy GLM 5.1 outside deployment hours). Flip to the
-          // always-available fallback for this run. In-memory only —
+          // Server says the requested model isn't available right now. Flip
+          // to the always-available fallback for this run. In-memory only —
           // `setSelectedModel` doesn't persist, so the user's saved preference
           // is preserved for their next launch.
           useFreebuffModelStore
@@ -637,15 +636,15 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
               if (response.status === 'none' || response.status === 'queued') {
                 apply({
                   status: 'none',
-                  accessTier:
-                    response.accessTier ?? landingSession.accessTier,
+                  accessTier: response.accessTier ?? landingSession.accessTier,
                   queueDepthByModel:
                     response.queueDepthByModel ??
                     landingSession.queueDepthByModel,
                   rateLimitsByModel:
                     response.rateLimitsByModel ??
                     landingSession.rateLimitsByModel,
-                  countryCode: response.countryCode ?? landingSession.countryCode,
+                  countryCode:
+                    response.countryCode ?? landingSession.countryCode,
                   countryBlockReason:
                     response.countryBlockReason ??
                     landingSession.countryBlockReason,
diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts
index ee39ed975b..ca0a020419 100644
--- a/common/src/__tests__/freebuff-models.test.ts
+++ b/common/src/__tests__/freebuff-models.test.ts
@@ -5,7 +5,6 @@ import {
   DEFAULT_FREEBUFF_MODEL_ID,
   FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID,
   FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID,
-  FREEBUFF_GLM_MODEL_ID,
   FREEBUFF_KIMI_MODEL_ID,
   LIMITED_FREEBUFF_MODEL_ID,
   FREEBUFF_MINIMAX_MODEL_ID,
@@ -84,15 +83,14 @@ describe('freebuff model availability', () => {
     ).toBe(false)
   })
 
-  test('supports GLM 5.1 as a legacy server-side model without selecting it for new clients', () => {
-    expect(FREEBUFF_MODELS.map((model) => model.id)).not.toContain(
-      FREEBUFF_GLM_MODEL_ID,
+  test('does not support GLM 5.1 for freebuff sessions', () => {
+    const glm = 'z-ai/glm-5.1'
+    expect(FREEBUFF_MODELS.map((model) => model.id)).not.toContain(glm)
+    expect(SUPPORTED_FREEBUFF_MODELS.map((model) => model.id)).not.toContain(
+      glm,
     )
-    expect(SUPPORTED_FREEBUFF_MODELS.map((model) => model.id)).toContain(
-      FREEBUFF_GLM_MODEL_ID,
-    )
-    expect(isFreebuffModelId(FREEBUFF_GLM_MODEL_ID)).toBe(false)
-    expect(isSupportedFreebuffModelId(FREEBUFF_GLM_MODEL_ID)).toBe(true)
+    expect(isFreebuffModelId(glm)).toBe(false)
+    expect(isSupportedFreebuffModelId(glm)).toBe(false)
   })
 
   test('formats the close time in the user local timezone while deployment is open', () => {
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index 2d1a55c7ff..9b8c8bb055 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -5,7 +5,6 @@ import {
   FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID,
   FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID,
   FREEBUFF_GEMINI_PRO_MODEL_ID,
-  FREEBUFF_GLM_MODEL_ID,
   FREEBUFF_KIMI_MODEL_ID,
   FREEBUFF_MINIMAX_MODEL_ID,
   SUPPORTED_FREEBUFF_MODELS,
@@ -68,7 +67,6 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
   'base2-free': new Set([
     FREEBUFF_MINIMAX_MODEL_ID,
-    FREEBUFF_GLM_MODEL_ID,
     FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID,
     FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID,
     FREEBUFF_KIMI_MODEL_ID,
@@ -94,10 +92,7 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   'tmux-cli': new Set([FREEBUFF_MINIMAX_MODEL_ID]),
 
   // Code reviewer for free mode
-  'code-reviewer-minimax': new Set([
-    FREEBUFF_MINIMAX_MODEL_ID,
-    FREEBUFF_GLM_MODEL_ID,
-  ]),
+  'code-reviewer-minimax': new Set([FREEBUFF_MINIMAX_MODEL_ID]),
   'code-reviewer-kimi': new Set([FREEBUFF_KIMI_MODEL_ID]),
   'code-reviewer-deepseek': new Set([FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]),
   'code-reviewer-deepseek-flash': new Set([
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index 715b258b50..95f79644a9 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -35,7 +35,6 @@ export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day'
 export const FREEBUFF_GEMINI_PRO_MODEL_ID = 'google/gemini-3.1-pro-preview'
 export const FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID = 'deepseek/deepseek-v4-pro'
 export const FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID = 'deepseek/deepseek-v4-flash'
-export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
 export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
 export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7'
 export const FREEBUFF_PREMIUM_SESSION_LIMIT = 5
@@ -102,29 +101,15 @@ export const FREEBUFF_MODELS = [
   },
 ] as const satisfies readonly FreebuffModelOption[]
 
-export const LEGACY_FREEBUFF_MODELS = [
-  {
-    id: FREEBUFF_GLM_MODEL_ID,
-    displayName: 'GLM 5.1',
-    tagline: 'Legacy',
-    availability: 'deployment_hours',
-  },
-] as const satisfies readonly FreebuffModelOption[]
-
 export const FREEBUFF_PREMIUM_MODEL_IDS = [
   FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID,
   FREEBUFF_KIMI_MODEL_ID,
-  FREEBUFF_GLM_MODEL_ID,
 ] as const
 
-export const SUPPORTED_FREEBUFF_MODELS = [
-  ...FREEBUFF_MODELS,
-  ...LEGACY_FREEBUFF_MODELS,
-] as const satisfies readonly FreebuffModelOption[]
+export const SUPPORTED_FREEBUFF_MODELS = FREEBUFF_MODELS
 
 export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
-export type SupportedFreebuffModelId =
-  (typeof SUPPORTED_FREEBUFF_MODELS)[number]['id']
+export type SupportedFreebuffModelId = FreebuffModelId
 export type FreebuffPremiumModelId = (typeof FREEBUFF_PREMIUM_MODEL_IDS)[number]
 
 /** What new freebuff users see selected in the picker. MiniMax is the
diff --git a/docs/freebuff-waiting-room.md b/docs/freebuff-waiting-room.md
index 76af547f3d..bc9cfc9881 100644
--- a/docs/freebuff-waiting-room.md
+++ b/docs/freebuff-waiting-room.md
@@ -5,7 +5,7 @@
 The waiting room is the admission control layer for **free-mode** requests against the freebuff Fireworks deployments. It has three jobs:
 
 1. **Drip-admit users per model** — each selectable freebuff model has its own FIFO queue. Admission runs one tick (default `ADMISSION_TICK_MS`, 15s) that tries to admit one user per model, so heavier models can sit cold without starving lighter ones.
-2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; GLM 5.1 is available during 9am ET-5pm PT on weekdays, while MiniMax M2.7 is serverless and always available.
+2. **Gate on per-deployment health and hours** — a single fleet probe per tick (`getFleetHealth` in `web/src/server/free-session/fireworks-health.ts`) hits the Fireworks metrics endpoint and classifies each dedicated deployment as `healthy | degraded | unhealthy`. Only models whose deployment is `healthy` and currently available admit that tick; models without a dedicated deployment are treated as serverless and always available.
 3. **One instance per account** — prevent a single user from running N concurrent freebuff CLIs to get N× throughput.
 
 Users who cannot be admitted immediately are placed in the queue for their chosen model and given an estimated wait time. Admitted users get a fixed-length session (default 1h) bound to the model they were admitted on; chat completions use that model for the life of the session.
@@ -153,18 +153,18 @@ The final tick result carries a `queueDepthByModel` map and a single `skipped` r
 
 ### Tunables
 
-| Constant                     | Location                                  | Default                                                             | Purpose                                                                                                                                                                       |
-| ---------------------------- | ----------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `ADMISSION_TICK_MS`          | `config.ts`                               | 15000                                                               | How often the ticker fires. Up to one user is admitted per model per tick.                                                                                                    |
-| `FREEBUFF_MODELS`            | `common/src/constants/freebuff-models.ts` | `deepseek-v4-pro`, `kimi-k2.6`, `minimax-m2.7`, `deepseek-v4-flash` | Selectable models; each gets its own queue and admission slot.                                                                                                                |
-| `FIREWORKS_DEPLOYMENT_MAP`   | `web/src/llm-api/fireworks-config.ts`     | `glm-5.1`                                                           | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback) — drop this default when they migrate to their own deployments. |
-| `HEALTH_CACHE_TTL_MS`        | `fireworks-health.ts`                     | 25000                                                               | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit.                                                                           |
-| `FREEBUFF_SESSION_LENGTH_MS` | env                                       | 3_600_000                                                           | Session lifetime                                                                                                                                                              |
-| `SESSION_GRACE_MS`           | `web/src/server/free-session/config.ts`   | 1_800_000                                                           | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`.   |
+| Constant                     | Location                                  | Default                                                             | Purpose                                                                                                                                                                     |
+| ---------------------------- | ----------------------------------------- | ------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `ADMISSION_TICK_MS`          | `config.ts`                               | 15000                                                               | How often the ticker fires. Up to one user is admitted per model per tick.                                                                                                  |
+| `FREEBUFF_MODELS`            | `common/src/constants/freebuff-models.ts` | `deepseek-v4-pro`, `kimi-k2.6`, `minimax-m2.7`, `deepseek-v4-flash` | Selectable models; each gets its own queue and admission slot.                                                                                                              |
+| `FIREWORKS_DEPLOYMENT_MAP`   | `web/src/llm-api/fireworks-config.ts`     | none for current freebuff models                                    | Models with dedicated Fireworks deployments. Models not listed are treated as `healthy` (serverless fallback).                                                              |
+| `HEALTH_CACHE_TTL_MS`        | `fireworks-health.ts`                     | 25000                                                               | Fleet probe cache TTL. Sits just under the Fireworks 30s exporter cadence and 6 req/min rate limit.                                                                         |
+| `FREEBUFF_SESSION_LENGTH_MS` | env                                       | 3_600_000                                                           | Session lifetime                                                                                                                                                            |
+| `SESSION_GRACE_MS`           | `web/src/server/free-session/config.ts`   | 1_800_000                                                           | Drain window after expiry — gate still admits requests so an in-flight agent can finish, but the CLI is expected to block new prompts. Hard cutoff at `expires_at + grace`. |
 
 ### Premium Session Quota
 
-DeepSeek V4 Pro, Kimi, and legacy GLM share a per-user premium quota. The server counts `free_session_admit` rows from the last midnight in `America/Los_Angeles`; when the user reaches `FREEBUFF_PREMIUM_SESSION_LIMIT`, the next premium `POST /session` is rejected until the next Pacific midnight reset. MiniMax and DeepSeek V4 Flash remain unlimited.
+DeepSeek V4 Pro and Kimi share a per-user premium quota. The server counts `free_session_admit` rows from the last midnight in `America/Los_Angeles`; when the user reaches `FREEBUFF_PREMIUM_SESSION_LIMIT`, the next premium `POST /session` is rejected until the next Pacific midnight reset. MiniMax and DeepSeek V4 Flash remain unlimited.
 
 ## HTTP API
 
@@ -198,7 +198,7 @@ Response shapes:
   "queueDepth": 43,        // size of this model's queue
   "queueDepthByModel": {   // snapshot of every model's queue — powers the
     "minimax/minimax-m2.7": 43, //  "N ahead" hint in the selector. Missing
-    "z-ai/glm-5.1": 4   //  entries should be treated as 0.
+    "deepseek/deepseek-v4-pro": 4 // entries should be treated as 0.
   },
   "estimatedWaitMs": 384000,
   "queuedAt": "2026-04-17T12:00:00Z"
@@ -298,7 +298,7 @@ waitMs = (position - 1) * 24_000
 - Position 1 → 0 (next tick admits you)
 - Position 2 → 24s, and so on.
 
-`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `z-ai/glm-5.1` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence, health-gated pauses, and deployment-hours availability (during a GLM Fireworks incident or outside 9am ET-5pm PT, only GLM's queue stalls; MiniMax keeps draining), so the real wait can be longer or shorter.
+`position` is scoped to this model's queue — a user at position 1 in the `minimax/minimax-m2.7` queue is not affected by the depth of the `deepseek/deepseek-v4-pro` queue. The estimate is intentionally decoupled from the admission tick — it's a human-friendly rule-of-thumb for the UI, not a precise projection. Actual wait depends on admission-tick cadence and health-gated pauses, so the real wait can be longer or shorter.
 
 ## CLI Integration (frontend-side contract)
 
@@ -337,7 +337,7 @@ The `disabled` response means the server has the waiting room turned off. CLI tr
 | Spamming POST/GET to starve admission tick                    | Admission uses per-model Postgres advisory locks; DDoS protection is upstream (Next's global rate limits). Consider adding a per-user limiter on `/session` if traffic warrants. |
 | Repeatedly POSTing different models to get across every queue | Single row per user (PK on `user_id`); switching models moves the row, never clones it. A user holds exactly one queue slot at any time.                                         |
 | Fireworks metrics endpoint down / slow                        | `getFleetHealth()` fails closed (timeout, non-OK, or missing API key) → every dedicated-deployment model is flagged `unhealthy` and its queue pauses.                            |
-| One deployment degraded while others are fine                 | Health is classified per-deployment; only the affected model's queue pauses, so a degraded GLM deployment doesn't block MiniMax admissions.                                      |
+| One deployment degraded while others are fine                 | Health is classified per-deployment; only the affected model's queue pauses, so a degraded dedicated deployment doesn't block serverless model admissions.                       |
 | Zombie expired sessions holding capacity                      | Swept on every admission tick, even when upstream is unhealthy                                                                                                                   |
 
 ## Testing
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index 5704535f89..566516441a 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -7,8 +7,6 @@ import {
   FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID,
   FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID,
   FREEBUFF_GEMINI_PRO_MODEL_ID,
-  FREEBUFF_GLM_MODEL_ID,
-  isFreebuffDeploymentHours,
 } from '@codebuff/common/constants/freebuff-models'
 import { openCodeZenModels } from '@codebuff/common/constants/model-config'
 import { postChatCompletions } from '../_post'
@@ -963,7 +961,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
     })
 
     it(
-      'lets old freebuff clients keep using GLM 5.1 through Fireworks availability rules',
+      'rejects removed GLM 5.1 for free mode before provider calls',
       async () => {
         const fetchedBodies: Record<string, unknown>[] = []
         const fetchViaFireworks = mock(
@@ -994,7 +992,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
             method: 'POST',
             headers: allowedFreeModeHeaders('test-api-key-new-free'),
             body: JSON.stringify({
-              model: FREEBUFF_GLM_MODEL_ID,
+              model: 'z-ai/glm-5.1',
               stream: false,
               codebuff_metadata: {
                 run_id: 'run-free',
@@ -1019,19 +1017,9 @@ describe('/api/v1/chat/completions POST endpoint', () => {
         })
 
         const body = await response.json()
-        if (isFreebuffDeploymentHours()) {
-          expect(response.status).toBe(200)
-          expect(fetchedBodies).toHaveLength(1)
-          expect(fetchedBodies[0].model).toBe(
-            'accounts/fireworks/models/glm-5p1',
-          )
-          expect(body.model).toBe(FREEBUFF_GLM_MODEL_ID)
-          expect(body.provider).toBe('Fireworks')
-        } else {
-          expect(response.status).toBe(503)
-          expect(fetchedBodies).toHaveLength(0)
-          expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
-        }
+        expect(response.status).toBe(403)
+        expect(fetchedBodies).toHaveLength(0)
+        expect(body.error).toBe('free_mode_invalid_agent_model')
       },
       FETCH_PATH_TEST_TIMEOUT_MS,
     )
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
index 54dc6c90de..46ad2763c1 100644
--- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
+++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -380,17 +380,17 @@ describe('POST /api/v1/freebuff/session', () => {
     expect(body.ipPrivacySignals).toBeUndefined()
   })
 
-  test('returns model_unavailable for legacy GLM 5.1 outside deployment hours', async () => {
+  test('falls back for removed GLM 5.1 requests', async () => {
     const sessionDeps = makeSessionDeps()
     const resp = await postFreebuffSession(
       makeReq('ok', { model: 'z-ai/glm-5.1' }),
       makeDeps(sessionDeps, 'u1'),
     )
-    expect(resp.status).toBe(409)
+    expect(resp.status).toBe(200)
     const body = await resp.json()
-    expect(body.status).toBe('model_unavailable')
-    expect(body.availableHours).toBe('9am ET-5pm PT every day')
-    expect(sessionDeps.rows.size).toBe(0)
+    expect(body.status).toBe('queued')
+    expect(body.model).toBe('minimax/minimax-m2.7')
+    expect(sessionDeps.rows.get('u1')?.model).toBe('minimax/minimax-m2.7')
   })
 
   // Banned bots with valid API keys were POSTing every few seconds and
diff --git a/web/src/server/free-session/__tests__/admission.test.ts b/web/src/server/free-session/__tests__/admission.test.ts
index f55ab3b796..2ad5c0d0c3 100644
--- a/web/src/server/free-session/__tests__/admission.test.ts
+++ b/web/src/server/free-session/__tests__/admission.test.ts
@@ -1,7 +1,5 @@
 import { describe, expect, test } from 'bun:test'
 
-import { FREEBUFF_GLM_MODEL_ID } from '@codebuff/common/constants/freebuff-models'
-
 import { runAdmissionTick } from '../admission'
 
 import type { AdmissionDeps } from '../admission'
@@ -113,17 +111,6 @@ describe('runAdmissionTick', () => {
     expect(result.skipped).toBeNull()
   })
 
-  test('legacy GLM 5.1 is admitted during deployment hours', async () => {
-    const deps = makeAdmissionDeps({
-      models: [FREEBUFF_GLM_MODEL_ID],
-      now: () => new Date('2026-04-17T16:00:00Z'),
-      getFleetHealth: async () => ({ [FREEBUFF_GLM_MODEL_ID]: 'healthy' }),
-    })
-    const result = await runAdmissionTick(deps)
-    expect(result.admitted).toBe(1)
-    expect(result.skipped).toBeNull()
-  })
-
   test('propagates expiry count and admit count together', async () => {
     const deps = makeAdmissionDeps({
       sweepExpired: async () => 2,
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index b85c682cb3..9503241269 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -4,7 +4,6 @@ import {
   FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID,
   FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID,
   FREEBUFF_GEMINI_PRO_MODEL_ID,
-  FREEBUFF_GLM_MODEL_ID,
   FREEBUFF_KIMI_MODEL_ID,
   FREEBUFF_LIMITED_SESSION_LIMIT,
   FREEBUFF_PREMIUM_SESSION_LIMIT,
@@ -25,6 +24,7 @@ import type { InternalSessionRow } from '../types'
 const SESSION_LEN = 60 * 60 * 1000
 const GRACE_MS = 30 * 60 * 1000
 const DEFAULT_MODEL = 'minimax/minimax-m2.7'
+const REMOVED_GLM_MODEL = 'z-ai/glm-5.1'
 const DEFAULT_PREMIUM_RESET_AT = '2026-04-18T07:00:00.000Z'
 
 function expectedRateLimit(model: string, recentCount: number) {
@@ -264,42 +264,25 @@ describe('requestSession', () => {
     expect(state.instanceId).toBe('inst-1')
   })
 
-  test('deployment-hours-only model is unavailable outside deployment hours', async () => {
-    // Legacy GLM 5.1 is the only freebuff model still gated to deployment
-    // hours — Kimi and DeepSeek both run 24/7 from the picker.
+  test('removed GLM 5.1 request falls back to the default model', async () => {
     const state = await requestSession({
       userId: 'u1',
-      model: FREEBUFF_GLM_MODEL_ID,
-      deps,
-    })
-    expect(state).toEqual({
-      status: 'model_unavailable',
-      requestedModel: FREEBUFF_GLM_MODEL_ID,
-      availableHours: '9am ET-5pm PT every day',
-    })
-    expect(deps.rows.size).toBe(0)
-  })
-
-  test('legacy GLM 5.1 model is still accepted for old clients during deployment hours', async () => {
-    deps._tick(new Date('2026-04-17T16:00:00Z'))
-    const state = await requestSession({
-      userId: 'u1',
-      model: FREEBUFF_GLM_MODEL_ID,
+      model: REMOVED_GLM_MODEL,
       deps,
     })
     expect(state.status).toBe('queued')
     if (state.status !== 'queued') throw new Error('unreachable')
-    expect(deps.rows.get('u1')?.model).toBe(FREEBUFF_GLM_MODEL_ID)
-    expect(state.rateLimit).toEqual(expectedRateLimit(FREEBUFF_GLM_MODEL_ID, 0))
+    expect(state.model).toBe(DEFAULT_MODEL)
+    expect(deps.rows.get('u1')?.model).toBe(DEFAULT_MODEL)
   })
 
-  test('legacy GLM 5.1 active session can be reclaimed outside deployment hours', async () => {
+  test('removed GLM 5.1 active session cannot be reclaimed', async () => {
     const admittedAt = new Date(deps._now().getTime() - 10 * 60 * 1000)
     deps.rows.set('u1', {
       user_id: 'u1',
       status: 'active',
       active_instance_id: 'inst-pre',
-      model: FREEBUFF_GLM_MODEL_ID,
+      model: REMOVED_GLM_MODEL,
       queued_at: admittedAt,
       admitted_at: admittedAt,
       expires_at: new Date(deps._now().getTime() + SESSION_LEN),
@@ -309,13 +292,13 @@ describe('requestSession', () => {
 
     const state = await requestSession({
       userId: 'u1',
-      model: FREEBUFF_GLM_MODEL_ID,
+      model: REMOVED_GLM_MODEL,
       deps,
     })
-    expect(state.status).toBe('active')
-    if (state.status !== 'active') throw new Error('unreachable')
-    expect(state.instanceId).not.toBe('inst-pre')
-    expect(state.rateLimit).toEqual(expectedRateLimit(FREEBUFF_GLM_MODEL_ID, 0))
+    expect(state.status).toBe('queued')
+    if (state.status !== 'queued') throw new Error('unreachable')
+    expect(state.model).toBe(DEFAULT_MODEL)
+    expect(deps.rows.get('u1')?.model).toBe(DEFAULT_MODEL)
   })
 
   test('queued response includes a per-model depth snapshot for the selector', async () => {
@@ -548,27 +531,25 @@ describe('requestSession', () => {
     expect(deps.rows.has('u1')).toBe(false)
   })
 
-  test('rate_limited: legacy GLM 5.1 uses the shared premium quota', async () => {
+  test('rate_limited: removed GLM 5.1 request does not use the shared premium quota', async () => {
     deps._tick(PREMIUM_OPEN_TIME)
     const now = deps._now()
     for (let i = 0; i < PREMIUM_LIMIT; i++) {
       deps.admits.push({
         user_id: 'u1',
-        model: FREEBUFF_GLM_MODEL_ID,
+        model: PREMIUM_MODEL,
         admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000),
       })
     }
 
     const state = await requestSession({
       userId: 'u1',
-      model: FREEBUFF_GLM_MODEL_ID,
+      model: REMOVED_GLM_MODEL,
       deps,
     })
-    expect(state.status).toBe('rate_limited')
-    if (state.status !== 'rate_limited') throw new Error('unreachable')
-    expect(state.model).toBe(FREEBUFF_GLM_MODEL_ID)
-    expect(state.limit).toBe(PREMIUM_LIMIT)
-    expect(state.windowHours).toBe(PREMIUM_WINDOW_HOURS)
+    expect(state.status).toBe('queued')
+    if (state.status !== 'queued') throw new Error('unreachable')
+    expect(state.model).toBe(DEFAULT_MODEL)
   })
 
   test("rate_limited: admits before today's Pacific reset do not count", async () => {
@@ -1311,6 +1292,24 @@ describe('checkSessionAdmissible', () => {
     expect(result.remainingMs).toBe(SESSION_LEN)
   })
 
+  test('active removed GLM 5.1 session is not admissible', async () => {
+    await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
+    const row = deps.rows.get('u1')!
+    row.model = REMOVED_GLM_MODEL
+    row.status = 'active'
+    row.admitted_at = deps._now()
+    row.expires_at = new Date(deps._now().getTime() + SESSION_LEN)
+
+    const result = await checkSessionAdmissible({
+      userId: 'u1',
+      claimedInstanceId: row.active_instance_id,
+      requestedModel: REMOVED_GLM_MODEL,
+      deps,
+    })
+    if (result.ok) throw new Error('unreachable')
+    expect(result.code).toBe('session_model_mismatch')
+  })
+
   test('active Kimi session admits Gemini thinker requests', async () => {
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     const row = deps.rows.get('u1')!
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
index da51cee0e7..97a6caf287 100644
--- a/web/src/server/free-session/config.ts
+++ b/web/src/server/free-session/config.ts
@@ -1,7 +1,6 @@
 import {
   FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID,
   FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID,
-  FREEBUFF_GLM_MODEL_ID,
   FREEBUFF_KIMI_MODEL_ID,
   FREEBUFF_MINIMAX_MODEL_ID,
 } from '@codebuff/common/constants/freebuff-models'
@@ -58,7 +57,6 @@ export function getSessionGraceMs(): number {
 const INSTANT_ADMIT_CAPACITY: Record<string, number> = {
   [FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID]: 1000,
   [FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID]: 1000,
-  [FREEBUFF_GLM_MODEL_ID]: 50,
   [FREEBUFF_KIMI_MODEL_ID]: 1000,
   [FREEBUFF_MINIMAX_MODEL_ID]: 1000,
 }