From 3c4940b5f1138d435557742600434f220f359eac Mon Sep 17 00:00:00 2001 From: peiwenz2 <110538780+peiwenz2@users.noreply.github.com> Date: Sun, 24 May 2026 20:36:14 +0800 Subject: [PATCH 1/2] fix(provider): enable prompt caching for DashScope-routed Qwen models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The applyCaching gate in provider/transform.ts only fired for @ai-sdk/anthropic and @ai-sdk/alibaba. opencode's catalog wires every DashScope (Alibaba Cloud Model Studio / Bailian) model — alibaba, alibaba-cn, alibaba-coding-plan(-cn) — through @ai-sdk/openai-compatible pointing at https://dashscope[-intl].aliyuncs.com/compatible-mode/v1, so no cache_control markers were ever sent on the wire. Without caching, browser tasks on qwen3-max pay full price every turn and the model can end up more expensive than Opus 4.7 — exactly the cost cliff hinted at in the model recommendations. Bailian's cache_control protocol is shaped like Anthropic's (5m TTL, 4-breakpoint cap, 10% cache_read / 125% cache_write), so the existing 4-marker strategy carries over without needing a separate code path. Changes: - Add isDashScopeRoutedModel() helper covering alibaba*, dashscope, bailian and the @ai-sdk/alibaba SDK path. Include it in the applyCaching gate. - DashScope wants cache_control on a content block, not on the message envelope. System messages arrive here as strings; lift them into a single-block array before the content-level marker is applied so the AI SDK openai-compatible plugin can spread the cache_control field onto the wire block (per packages/openai-compatible/src/chat/convert-to-openai-compatible-chat-messages.ts in vercel/ai). - Add @ai-sdk/alibaba to sdkKey() so persisted providerOptions stored under the catalog's providerID (e.g. alibaba-cn) remap to the SDK's "alibaba" namespace on session reload. - Regression tests in test/provider/dashscope-cache.test.ts cover the alibaba-cn / alibaba / alibaba-coding-plan-cn paths, the 4-breakpoint cap, the string-to-block lift, the @ai-sdk/alibaba SDK path, and the gateway-exclusion contract. Refs: https://www.alibabacloud.com/help/zh/model-studio/context-cache --- packages/opencode/src/provider/transform.ts | 56 +++- .../test/provider/dashscope-cache.test.ts | 300 ++++++++++++++++++ 2 files changed, 355 insertions(+), 1 deletion(-) create mode 100644 packages/opencode/test/provider/dashscope-cache.test.ts diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts index 56a35d9af..85d0cbe2c 100644 --- a/packages/opencode/src/provider/transform.ts +++ b/packages/opencode/src/provider/transform.ts @@ -44,6 +44,11 @@ function sdkKey(npm: string): string | undefined { return "gateway" case "@openrouter/ai-sdk-provider": return "openrouter" + case "@ai-sdk/alibaba": + // Persisted messages store providerOptions under the stored providerID + // (e.g. "alibaba-cn"); remap to the SDK-expected "alibaba" namespace so + // cacheControl set in `applyCaching` survives a session reload. + return "alibaba" case "ai-gateway-provider": // ai-gateway-provider/unified wraps createOpenAICompatible({ name: "Unified" }), // and @ai-sdk/openai-compatible parses compatibleOptions from one of @@ -338,6 +343,27 @@ function normalizeMessages( return msgs } +// Detect models routed through DashScope (Alibaba Cloud Model Studio / +// Bailian / Tongyi) regardless of which AI SDK package the catalog mapped +// them to. opencode's snapshot today ships `alibaba`, `alibaba-cn`, +// `alibaba-coding-plan(-cn)` etc. as `@ai-sdk/openai-compatible` pointing at +// `https://dashscope[-intl].aliyuncs.com/compatible-mode/v1`, but the +// upstream cache-control protocol is the same one `@ai-sdk/alibaba` speaks: +// https://www.alibabacloud.com/help/zh/model-studio/context-cache . +// +// Without this check, `applyCaching` is skipped for DashScope users — every +// turn re-sends the whole conversation at full price and the model looks far +// more expensive than it actually is. (Bailian bills cached input at 10% of +// the standard rate, cache writes at 125%, with a 5-minute TTL and a hard +// cap of 4 markers per request — the same shape `applyCaching` already emits +// for Anthropic, so the existing strategy carries over.) +function isDashScopeRoutedModel(model: Provider.Model): boolean { + if (model.providerID.startsWith("alibaba")) return true + if (model.providerID === "dashscope" || model.providerID === "bailian") return true + if (model.api.npm === "@ai-sdk/alibaba") return true + return false +} + function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] { const system = msgs.filter((msg) => msg.role === "system").slice(0, 2) const final = msgs.filter((msg) => msg.role !== "system").slice(-2) @@ -363,7 +389,27 @@ function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage }, } + // DashScope requires `cache_control` on a content *block*, not on the + // message envelope. `@ai-sdk/openai-compatible` lifts each part's + // `providerOptions.openaiCompatible.*` onto the corresponding wire block + // (see vercel/ai `convert-to-openai-compatible-chat-messages.ts`), so the + // existing content-level path produces the exact wire shape Bailian + // documents — *if* the message has array content to attach a part to. + // System messages still arrive here as strings (see `session/llm.ts` + // line 152), so lift them into a single-block array first. + const requiresBlockLevel = isDashScopeRoutedModel(model) + for (const msg of unique([...system, ...final])) { + if (requiresBlockLevel && typeof msg.content === "string" && msg.content.length > 0) { + // The AI SDK v3 ModelMessage union types system content as `string`, so + // assigning an array tripped tsc. The runtime accepts array content for + // every role we touch here (verified against `@ai-sdk/openai-compatible`'s + // `convert-to-openai-compatible-chat-messages.ts`, which passes + // `content` through verbatim for system and maps each part for + // user/assistant). Cast through `any` for the assignment only. + ;(msg as any).content = [{ type: "text", text: msg.content }] + } + const useMessageLevelOptions = model.providerID === "anthropic" || model.providerID.includes("bedrock") || @@ -438,7 +484,15 @@ export function message(msgs: ModelMessage[], model: Provider.Model, options: Re model.id.includes("anthropic") || model.id.includes("claude") || model.api.npm === "@ai-sdk/anthropic" || - model.api.npm === "@ai-sdk/alibaba") && + model.api.npm === "@ai-sdk/alibaba" || + // DashScope-routed models (qwen3-max, qwen3.6-*, qwen3-coder-plus, + // kimi-k2.5 on alibaba-cn, deepseek-v3.2 on alibaba-cn, …) all speak + // the same cache_control protocol as the native `@ai-sdk/alibaba` + // SDK even though the catalog wires them through + // `@ai-sdk/openai-compatible`. Without this branch, browser tasks on + // Qwen pay full price every turn — which is exactly the scenario + // README describes as "more expensive than Opus 4.7 without caching". + isDashScopeRoutedModel(model)) && model.api.npm !== "@ai-sdk/gateway" ) { msgs = applyCaching(msgs, model) diff --git a/packages/opencode/test/provider/dashscope-cache.test.ts b/packages/opencode/test/provider/dashscope-cache.test.ts new file mode 100644 index 000000000..22fd04c58 --- /dev/null +++ b/packages/opencode/test/provider/dashscope-cache.test.ts @@ -0,0 +1,300 @@ +import { describe, expect, test } from "bun:test" +import { ProviderTransform } from "@/provider/transform" + +// Regression coverage for the DashScope (Alibaba Cloud Model Studio / Bailian +// / Tongyi) prompt-cache path. +// +// The catalog wires `alibaba`, `alibaba-cn`, `alibaba-coding-plan(-cn)` +// through `@ai-sdk/openai-compatible` pointing at +// `https://dashscope[-intl].aliyuncs.com/compatible-mode/v1`. Prior to this +// patch, `ProviderTransform.message` skipped `applyCaching` for those models +// — even though the upstream service speaks the same cache_control protocol +// `@ai-sdk/alibaba` does +// (https://www.alibabacloud.com/help/zh/model-studio/context-cache). Browser +// agent loops therefore re-sent the full conversation every turn at full +// price, making qwen3-max look more expensive than Opus 4.7. +// +// These tests pin three properties: +// 1. The gate fires for every DashScope-routed providerID. +// 2. `cache_control` lands on a content *block*, not on the message +// envelope — Bailian rejects it at message level. +// 3. System messages, which arrive as plain strings from `session/llm.ts`, +// get lifted into a single-block array so the block-level path applies. + +type AnyModel = Parameters[1] + +const dashscopeOpenAICompatibleModel = (overrides: Partial = {}): AnyModel => + ({ + id: "alibaba-cn/qwen3-max", + providerID: "alibaba-cn", + api: { + id: "qwen3-max", + url: "https://dashscope.aliyuncs.com/compatible-mode/v1", + npm: "@ai-sdk/openai-compatible", + }, + name: "Qwen3 Max", + capabilities: { + temperature: true, + reasoning: false, + attachment: false, + toolcall: true, + input: { text: true, audio: false, image: false, video: false, pdf: false }, + output: { text: true, audio: false, image: false, video: false, pdf: false }, + interleaved: false, + }, + cost: { input: 1.2, output: 6 }, + limit: { context: 262144, output: 65536 }, + status: "active", + options: {}, + headers: {}, + ...overrides, + }) as any + +describe("ProviderTransform.message - DashScope (alibaba-*) cache_control", () => { + test("alibaba-cn via openai-compatible: cache_control lands on the last content block of system+last messages", () => { + const model = dashscopeOpenAICompatibleModel() + const msgs = [ + { role: "system", content: "You are a helpful browser agent." }, + { role: "system", content: "Tools: browser_execute, search." }, + { role: "user", content: "Find flights from NYC to SF tomorrow." }, + { role: "assistant", content: [{ type: "text", text: "I'll start by searching." }] }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + // System messages were lifted from string → array so cache_control can + // ride on a content block. + expect(Array.isArray(result[0].content)).toBe(true) + expect(Array.isArray(result[1].content)).toBe(true) + + // The wire-relevant marker for DashScope is `openaiCompatible.cache_control` + // (snake_case) — the SDK spreads this onto the content block; the other + // namespaces stay attached so the same loop output can switch providers + // without remarking. Allow extra namespaces (anthropic/openrouter/etc.) + // since they are documented dead weight when the wire goes via OAI-compat. + for (const i of [0, 1, 2, 3]) { + const lastBlock = result[i].content[result[i].content.length - 1] + expect(lastBlock.providerOptions.openaiCompatible.cache_control).toEqual({ + type: "ephemeral", + }) + // The Alibaba SDK namespace must also be set for the case where a user + // pins `@ai-sdk/alibaba` instead of openai-compatible — same one-pass + // marking, no second transform needed. + expect(lastBlock.providerOptions.alibaba.cacheControl).toEqual({ + type: "ephemeral", + }) + // Cache_control must NOT leak onto the message envelope; Bailian will + // not look there and would still create a fresh prefix every turn. + expect(result[i].providerOptions?.openaiCompatible?.cache_control).toBeUndefined() + } + }) + + test("alibaba (intl) is treated the same as alibaba-cn", () => { + const model = dashscopeOpenAICompatibleModel({ + providerID: "alibaba", + api: { + id: "qwen3-max", + url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1", + npm: "@ai-sdk/openai-compatible", + }, + }) + const msgs = [ + { role: "system", content: "system prompt" }, + { role: "user", content: "user message" }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + expect(Array.isArray(result[0].content)).toBe(true) + expect(result[0].content[0].providerOptions.openaiCompatible.cache_control).toEqual({ + type: "ephemeral", + }) + }) + + test("alibaba-coding-plan-cn (subscription tier) also gets cache_control", () => { + const model = dashscopeOpenAICompatibleModel({ + providerID: "alibaba-coding-plan-cn", + id: "alibaba-coding-plan-cn/qwen3-coder-plus", + api: { + id: "qwen3-coder-plus", + url: "https://dashscope.aliyuncs.com/compatible-mode/v1", + npm: "@ai-sdk/openai-compatible", + }, + }) + const msgs = [ + { role: "system", content: "system prompt" }, + { role: "user", content: "user message" }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + const block = result[0].content[0] + expect(block.providerOptions.openaiCompatible.cache_control).toEqual({ type: "ephemeral" }) + }) + + test("at most 4 markers — first 2 system + last 2 non-system (matches Bailian's 4-breakpoint cap)", () => { + const model = dashscopeOpenAICompatibleModel() + const msgs = [ + { role: "system", content: "sys A (marked)" }, + { role: "system", content: "sys B (marked)" }, + { role: "system", content: "sys C (NOT marked)" }, // beyond first 2 + { role: "user", content: "u1 (NOT marked)" }, + { role: "assistant", content: [{ type: "text", text: "a1 (NOT marked)" }] }, + { role: "user", content: "u2 (marked - second to last)" }, + { role: "assistant", content: [{ type: "text", text: "a2 (marked - last)" }] }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + const hasMarker = (m: any) => { + if (Array.isArray(m.content) && m.content.length > 0) { + const last = m.content[m.content.length - 1] + return last?.providerOptions?.openaiCompatible?.cache_control != null + } + return m.providerOptions?.openaiCompatible?.cache_control != null + } + + expect(hasMarker(result[0])).toBe(true) // sys A + expect(hasMarker(result[1])).toBe(true) // sys B + expect(hasMarker(result[2])).toBe(false) // sys C — beyond the first 2 system slots + expect(hasMarker(result[3])).toBe(false) // u1 — not in the last 2 + expect(hasMarker(result[4])).toBe(false) // a1 — not in the last 2 + expect(hasMarker(result[5])).toBe(true) // u2 — second-to-last non-system + expect(hasMarker(result[6])).toBe(true) // a2 — last non-system + }) + + test("string system content survives sanitization and gets a real text block", () => { + const model = dashscopeOpenAICompatibleModel() + const msgs = [{ role: "system", content: "hello world" }, { role: "user", content: "hi" }] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + expect(result[0].content).toEqual([ + { + type: "text", + text: "hello world", + providerOptions: expect.objectContaining({ + openaiCompatible: { cache_control: { type: "ephemeral" } }, + alibaba: { cacheControl: { type: "ephemeral" } }, + }), + }, + ]) + }) + + test("empty system content is left alone (no spurious empty block, no marker)", () => { + const model = dashscopeOpenAICompatibleModel() + // sanitizeSurrogates collapses to "" then the system stays as empty + // string; we should not generate a `[{ type: "text", text: "" }]` block. + const msgs = [{ role: "system", content: "" }, { role: "user", content: "hi" }] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + expect(result[0].content).toBe("") + }) + + test("@ai-sdk/alibaba SDK path still works (defense in depth)", () => { + const model = dashscopeOpenAICompatibleModel({ + api: { + id: "qwen3-max", + url: "https://dashscope.aliyuncs.com/compatible-mode/v1", + npm: "@ai-sdk/alibaba", + }, + }) + const msgs = [ + { role: "system", content: "sys" }, + { role: "user", content: [{ type: "text", text: "u" }] }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + expect(result[0].content[0].providerOptions.alibaba.cacheControl).toEqual({ + type: "ephemeral", + }) + expect(result[1].content[0].providerOptions.alibaba.cacheControl).toEqual({ + type: "ephemeral", + }) + }) + + test("gateway-routed alibaba is excluded (gateway handles caching itself)", () => { + const model = dashscopeOpenAICompatibleModel({ + providerID: "vercel", + api: { + id: "alibaba/qwen3-max", + url: "https://ai-gateway.vercel.sh/v3/ai", + npm: "@ai-sdk/gateway", + }, + }) + const msgs = [ + { role: "system", content: "sys" }, + { role: "user", content: "u" }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + // Gateway path must NOT inject cache_control — the gateway provider does + // its own caching via `gateway: { caching: "auto" }` set in + // ProviderTransform.options. Double-marking would risk double-billing. + expect(result[0].content).toBe("sys") + expect(result[0].providerOptions).toBeUndefined() + }) + + test("non-DashScope openai-compatible provider (e.g. deepseek direct) is still excluded", () => { + const model = dashscopeOpenAICompatibleModel({ + providerID: "deepseek", + id: "deepseek/deepseek-chat", + api: { + id: "deepseek-chat", + url: "https://api.deepseek.com/v1", + npm: "@ai-sdk/openai-compatible", + }, + }) + const msgs = [ + { role: "system", content: "sys" }, + { role: "user", content: "u" }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + + // DeepSeek's direct API uses implicit caching — no per-message + // cache_control to set. Keep the current no-op behaviour for it; if + // someone routes deepseek through DashScope's alibaba-cn provider, that + // model's providerID will be "alibaba-cn" and the gate above fires. + expect(result[0].content).toBe("sys") + expect(result[0].providerOptions).toBeUndefined() + }) +}) + +describe("ProviderTransform.message - providerOptions key remap for @ai-sdk/alibaba", () => { + test("stored providerID 'alibaba-cn' remaps to SDK key 'alibaba' when api.npm is @ai-sdk/alibaba", () => { + // Sessions that were originally written with providerID `alibaba-cn` + // persist providerOptions under that exact key. When the SDK switches + // to `@ai-sdk/alibaba`, sdkKey() must remap to "alibaba" so the + // serialized options reach the provider plugin. + const model = dashscopeOpenAICompatibleModel({ + api: { + id: "qwen3-max", + url: "https://dashscope.aliyuncs.com/compatible-mode/v1", + npm: "@ai-sdk/alibaba", + }, + }) + const msgs = [ + { + role: "user", + content: [ + { + type: "text", + text: "hello", + providerOptions: { + "alibaba-cn": { cacheControl: { type: "ephemeral" } }, + }, + }, + ], + }, + ] as any[] + + const result = ProviderTransform.message(msgs, model, {}) as any[] + const block = result[0].content[0] + expect(block.providerOptions.alibaba?.cacheControl).toEqual({ type: "ephemeral" }) + expect(block.providerOptions["alibaba-cn"]).toBeUndefined() + }) +}) From b49bbefcf540840b27929d5860f067d3f990ed9b Mon Sep 17 00:00:00 2001 From: peiwenz2 <110538780+peiwenz2@users.noreply.github.com> Date: Sun, 24 May 2026 22:25:39 +0800 Subject: [PATCH 2/2] test(provider): rename example model in cache regression tests to qwen3.7-max MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qwen3.7-max is the model the README pricing footnote calls out — and the one users have actually been hitting the uncached cost cliff on. The cache gate added in the previous commit is keyed on providerID (alibaba* / dashscope / bailian / @ai-sdk/alibaba), so it already covers every model the catalog hangs under those providers, including qwen3.7-max, the qwen3.7-max-2026-05-20 snapshot, qwen3-max, qwen3.6-*, kimi-k2.5, and deepseek-v3.2. This commit only renames the example model id in the fixture so the test output makes that coverage visible at a glance to anyone reading the diff. No production code changes; cost numbers in the fixture are informational — the gate does not read cost. Live probe against dashscope.aliyuncs.com/compatible-mode/v1 on 2026-05-24 confirmed qwen3.7-max returns prompt_tokens_details.cache_creation / cache_creation_input_tokens / cache_type=ephemeral / cached_tokens in response usage, so the wire-format path the gate enables is honored server-side. --- .../test/provider/dashscope-cache.test.ts | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/packages/opencode/test/provider/dashscope-cache.test.ts b/packages/opencode/test/provider/dashscope-cache.test.ts index 22fd04c58..8b48a1925 100644 --- a/packages/opencode/test/provider/dashscope-cache.test.ts +++ b/packages/opencode/test/provider/dashscope-cache.test.ts @@ -12,7 +12,7 @@ import { ProviderTransform } from "@/provider/transform" // `@ai-sdk/alibaba` does // (https://www.alibabacloud.com/help/zh/model-studio/context-cache). Browser // agent loops therefore re-sent the full conversation every turn at full -// price, making qwen3-max look more expensive than Opus 4.7. +// price, making qwen3.7-max look more expensive than Opus 4.7. // // These tests pin three properties: // 1. The gate fires for every DashScope-routed providerID. @@ -25,23 +25,29 @@ type AnyModel = Parameters[1] const dashscopeOpenAICompatibleModel = (overrides: Partial = {}): AnyModel => ({ - id: "alibaba-cn/qwen3-max", + id: "alibaba-cn/qwen3.7-max", providerID: "alibaba-cn", api: { - id: "qwen3-max", + id: "qwen3.7-max", url: "https://dashscope.aliyuncs.com/compatible-mode/v1", npm: "@ai-sdk/openai-compatible", }, - name: "Qwen3 Max", + name: "Qwen3.7 Max", capabilities: { temperature: true, - reasoning: false, + // qwen3.7-max returns reasoning_tokens by default per a live probe + // against dashscope.aliyuncs.com on 2026-05-24 — mirror the catalog + // convention for thinking models. + reasoning: true, attachment: false, toolcall: true, input: { text: true, audio: false, image: false, video: false, pdf: false }, output: { text: true, audio: false, image: false, video: false, pdf: false }, interleaved: false, }, + // The cache gate is providerID-keyed and does not read cost, so these + // numbers are informational only — once qwen3.7-max lands in models.dev + // the snapshot will overwrite the catalog cost field anyway. cost: { input: 1.2, output: 6 }, limit: { context: 262144, output: 65536 }, status: "active", @@ -93,7 +99,7 @@ describe("ProviderTransform.message - DashScope (alibaba-*) cache_control", () = const model = dashscopeOpenAICompatibleModel({ providerID: "alibaba", api: { - id: "qwen3-max", + id: "qwen3.7-max", url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1", npm: "@ai-sdk/openai-compatible", }, @@ -195,7 +201,7 @@ describe("ProviderTransform.message - DashScope (alibaba-*) cache_control", () = test("@ai-sdk/alibaba SDK path still works (defense in depth)", () => { const model = dashscopeOpenAICompatibleModel({ api: { - id: "qwen3-max", + id: "qwen3.7-max", url: "https://dashscope.aliyuncs.com/compatible-mode/v1", npm: "@ai-sdk/alibaba", }, @@ -219,7 +225,7 @@ describe("ProviderTransform.message - DashScope (alibaba-*) cache_control", () = const model = dashscopeOpenAICompatibleModel({ providerID: "vercel", api: { - id: "alibaba/qwen3-max", + id: "alibaba/qwen3.7-max", url: "https://ai-gateway.vercel.sh/v3/ai", npm: "@ai-sdk/gateway", }, @@ -272,7 +278,7 @@ describe("ProviderTransform.message - providerOptions key remap for @ai-sdk/alib // serialized options reach the provider plugin. const model = dashscopeOpenAICompatibleModel({ api: { - id: "qwen3-max", + id: "qwen3.7-max", url: "https://dashscope.aliyuncs.com/compatible-mode/v1", npm: "@ai-sdk/alibaba", },