From 5423d348f64514bb68b8269f29db7b1b6df92a7d Mon Sep 17 00:00:00 2001 From: Dmitriy Zhuk Date: Tue, 23 Jun 2026 20:22:41 +0300 Subject: [PATCH 1/4] fix: scope bridle browser clients by clientId+agentId Clients were keyed by clientId alone, so one user (same clientId/sub) opening chats with several agents on separate sockets had each new socket overwrite the previous registration; earlier conversations stopped receiving events. Key the registry by the clientId+agentId pair instead, letting a multi-slot dashboard talk to N agents at once. Co-Authored-By: Claude Opus 4.8 (1M context) --- api/src/slices/bridle/bridle.controller.ts | 4 +-- api/src/slices/bridle/data/bridle.gateway.ts | 32 ++++++++++++------- .../slices/bridle/domain/bridle.gateway.ts | 8 ++--- api/src/slices/bridle/domain/bridle.types.ts | 1 + .../bridle/handlers/bridleClientWs.handler.ts | 9 ++++-- 5 files changed, 34 insertions(+), 20 deletions(-) diff --git a/api/src/slices/bridle/bridle.controller.ts b/api/src/slices/bridle/bridle.controller.ts index ee1616b..346bfd6 100644 --- a/api/src/slices/bridle/bridle.controller.ts +++ b/api/src/slices/bridle/bridle.controller.ts @@ -80,7 +80,7 @@ export class BridleController { return new Promise((resolve) => { const timeout = setTimeout(() => { - this.hub.unregisterClient(clientId); + this.hub.unregisterClient(clientId, agentId); resolve({ text: chunks.join('') || 'Timeout: no response from agent', messageId: '', @@ -95,7 +95,7 @@ export class BridleController { const event = data as Record; if (event.type === 'message' || event.type === 'stream_end') { clearTimeout(timeout); - this.hub.unregisterClient(clientId); + this.hub.unregisterClient(clientId, agentId); resolve({ text: event.text ?? chunks.join(''), messageId: event.messageId, diff --git a/api/src/slices/bridle/data/bridle.gateway.ts b/api/src/slices/bridle/data/bridle.gateway.ts index 8513891..6377a22 100644 --- a/api/src/slices/bridle/data/bridle.gateway.ts +++ b/api/src/slices/bridle/data/bridle.gateway.ts @@ -36,9 +36,18 @@ export class BridleGateway extends IBridleGateway { /** Agent connections: agentId → send function */ private agents = new Map void>(); - /** Browser clients: clientId → { agentId, send } */ + /** + * Browser clients keyed by `${clientId}\u0000${agentId}`. Keying by the pair + * (not clientId alone) lets ONE user hold several concurrent conversations — + * e.g. a multi-slot dashboard chatting with N agents on N sockets — without + * later sockets overwriting earlier ones (they share clientId='admin'/sub). + */ private clients = new Map(); + private clientKey(clientId: string, agentId: string): string { + return `${clientId}\u0000${agentId}`; + } + /** Pending sync requests awaiting agent ack: requestId → pending */ private pendingSyncs = new Map(); @@ -97,7 +106,8 @@ export class BridleGateway extends IBridleGateway { isAdmin: boolean, prompt?: string, ): void { - this.clients.set(clientId, { + this.clients.set(this.clientKey(clientId, agentId), { + clientId, agentId, send, isAdmin, @@ -108,10 +118,10 @@ export class BridleGateway extends IBridleGateway { ); } - unregisterClient(clientId: string): void { - this.clients.delete(clientId); + unregisterClient(clientId: string, agentId: string): void { + this.clients.delete(this.clientKey(clientId, agentId)); this.logger.log( - `Browser client unregistered: ${clientId} (total: ${this.clients.size})`, + `Browser client unregistered: ${clientId} agentId=${agentId} (total: ${this.clients.size})`, ); } @@ -126,7 +136,7 @@ export class BridleGateway extends IBridleGateway { this.logger.warn( `Cannot send to agent — not connected (agentId=${agentId})`, ); - this.sendToClient(clientId, { + this.sendToClient(clientId, agentId, { type: 'message', text: 'Agent is not connected. Please try again later.', parts: [ @@ -141,7 +151,7 @@ export class BridleGateway extends IBridleGateway { return; } - const client = this.clients.get(clientId); + const client = this.clients.get(this.clientKey(clientId, agentId)); agentSend({ type: 'message', clientId, @@ -152,8 +162,8 @@ export class BridleGateway extends IBridleGateway { }); } - sendToClient(clientId: string, data: unknown): void { - const client = this.clients.get(clientId); + sendToClient(clientId: string, agentId: string, data: unknown): void { + const client = this.clients.get(this.clientKey(clientId, agentId)); if (client) { client.send(data); } @@ -163,8 +173,8 @@ export class BridleGateway extends IBridleGateway { const clientId = data.clientId; if (!clientId) return; - const client = this.clients.get(clientId); - if (client && client.agentId === agentId) { + const client = this.clients.get(this.clientKey(clientId, agentId)); + if (client) { client.send(data); } } diff --git a/api/src/slices/bridle/domain/bridle.gateway.ts b/api/src/slices/bridle/domain/bridle.gateway.ts index d55f614..3e8dd9d 100644 --- a/api/src/slices/bridle/domain/bridle.gateway.ts +++ b/api/src/slices/bridle/domain/bridle.gateway.ts @@ -30,8 +30,8 @@ export abstract class IBridleGateway { text: string, parts: BridlePart[], ): void; - /** Send an event to a specific browser client */ - abstract sendToClient(clientId: string, data: unknown): void; + /** Send an event to a specific browser client (scoped to clientId + agentId) */ + abstract sendToClient(clientId: string, agentId: string, data: unknown): void; /** Register a browser client for a specific agent */ abstract registerClient( clientId: string, @@ -42,8 +42,8 @@ export abstract class IBridleGateway { * agent on every message in this session. */ prompt?: string, ): void; - /** Unregister a browser client */ - abstract unregisterClient(clientId: string): void; + /** Unregister a browser client (scoped to clientId + agentId) */ + abstract unregisterClient(clientId: string, agentId: string): void; /** Register an agent connection for a specific agent */ abstract registerAgent(agentId: string, send: (data: unknown) => void): void; /** Unregister an agent connection for a specific agent */ diff --git a/api/src/slices/bridle/domain/bridle.types.ts b/api/src/slices/bridle/domain/bridle.types.ts index 66f2bfc..64c19b4 100644 --- a/api/src/slices/bridle/domain/bridle.types.ts +++ b/api/src/slices/bridle/domain/bridle.types.ts @@ -113,6 +113,7 @@ export interface IBridleAgentHealthData { /** Registered client metadata */ export interface IBridleClientData { + clientId: string; agentId: string; send: (data: unknown) => void; isAdmin: boolean; diff --git a/api/src/slices/bridle/handlers/bridleClientWs.handler.ts b/api/src/slices/bridle/handlers/bridleClientWs.handler.ts index d864d9a..47ca5b1 100644 --- a/api/src/slices/bridle/handlers/bridleClientWs.handler.ts +++ b/api/src/slices/bridle/handlers/bridleClientWs.handler.ts @@ -192,9 +192,12 @@ export class BridleClientWsHandler handleDisconnect(client: Socket) { const clientId = client.data?.clientId as string | undefined; - if (clientId) { - this.hub.unregisterClient(clientId); - this.logger.log(`Browser disconnected: clientId=${clientId}`); + const agentId = client.data?.agentId as string | undefined; + if (clientId && agentId) { + this.hub.unregisterClient(clientId, agentId); + this.logger.log( + `Browser disconnected: clientId=${clientId} agentId=${agentId}`, + ); } } From cb280dcbb6883469a773f4bad1636e024e380249 Mon Sep 17 00:00:00 2001 From: Dmitriy Zhuk Date: Tue, 23 Jun 2026 20:22:58 +0300 Subject: [PATCH 2/4] fix: lower agent pod cpu request to 100m for node bin-packing The scheduler packs by requests, not usage. A 500m request reserved a full 8-vCPU node across ~14 idle agents (running at ~8%), so new agents stuck Pending on Insufficient cpu. Idle agents use ~10-20m; drop the request to 100m. Limits are unchanged (still burst to i.cpu); the 512Mi memory floor becomes the next ceiling (~28 agents/node). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../slices/workflow/data/agent-workflow.manifest.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/api/src/slices/workflow/data/agent-workflow.manifest.ts b/api/src/slices/workflow/data/agent-workflow.manifest.ts index 12ed98a..9990194 100644 --- a/api/src/slices/workflow/data/agent-workflow.manifest.ts +++ b/api/src/slices/workflow/data/agent-workflow.manifest.ts @@ -220,8 +220,16 @@ function buildAgentPod( // running browser_play / Chromium). Guaranteed (requests == limits // at 2 CPU / 2Gi) made schedules fail on small Hetzner nodes — // even one agent didn't fit alongside the platform pods. + // + // CPU request is 100m, not 500m: idle agents actually use ~10-20m, + // so 500m × ~14 agents reserved 100% of an 8-vCPU cx43 node while it + // ran at 8% — new agents stuck Pending on "Insufficient cpu" despite + // a near-idle node. The scheduler packs by requests, not usage, so + // an honest low floor is what lets agents fit. Bursting is unaffected + // (limits still come from i.cpu). Memory floor stays 512Mi — that's + // a real idle footprint and becomes the next ceiling (~28/node). resources: { - requests: { cpu: '500m', memory: '512Mi' }, + requests: { cpu: '100m', memory: '512Mi' }, limits: { cpu: i.cpu, memory: i.memory }, }, ports: [{ containerPort: 3000 }], From 2c1f80e9f7ab815aab05a6ba14be55f6b305d55b Mon Sep 17 00:00:00 2001 From: Dmitriy Zhuk Date: Tue, 23 Jun 2026 20:22:58 +0300 Subject: [PATCH 3/4] fix: resolve cli version at runtime to avoid stale-dist loop Version was baked into the bundle at build time, so a dist/ built before a version bump self-reported the old version and the update-available banner looped forever. Read package.json from disk at runtime (npm always ships it in the tarball). Add a CI step that verifies the built binary reports the package version before publishing. Bump 0.1.9 to 0.1.10. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/publish-cli.yaml | 12 +++++++++++ cli/package.json | 2 +- cli/src/index.ts | 4 ++-- cli/src/utils/version-check.ts | 32 +++++++++++++++++++++++++++--- 4 files changed, 44 insertions(+), 6 deletions(-) diff --git a/.github/workflows/publish-cli.yaml b/.github/workflows/publish-cli.yaml index c398f87..84f8976 100644 --- a/.github/workflows/publish-cli.yaml +++ b/.github/workflows/publish-cli.yaml @@ -88,6 +88,18 @@ jobs: working-directory: cli run: bun run build + - name: Verify built binary reports the package version + if: steps.gate.outputs.publish == 'true' + working-directory: cli + run: | + EXPECTED="${{ steps.versions.outputs.package }}" + ACTUAL=$(node dist/ranch.mjs --version) + if [ "$ACTUAL" != "$EXPECTED" ]; then + echo "::error::Built binary reports v$ACTUAL but package.json is v$EXPECTED — refusing to publish a stale dist." + exit 1 + fi + echo "Built binary reports v$ACTUAL ✓" + - name: Pack (dry run, sanity check) if: steps.gate.outputs.publish == 'true' working-directory: cli diff --git a/cli/package.json b/cli/package.json index b0c37c9..11407d9 100644 --- a/cli/package.json +++ b/cli/package.json @@ -1,6 +1,6 @@ { "name": "@cleanslice/ranch", - "version": "0.1.9", + "version": "0.1.10", "type": "module", "description": "Ranch project CLI", "license": "MIT", diff --git a/cli/src/index.ts b/cli/src/index.ts index af0484b..ec530f1 100644 --- a/cli/src/index.ts +++ b/cli/src/index.ts @@ -1,7 +1,6 @@ import { defineCommand, runMain as _runMain } from "citty"; import { consola } from "consola"; import { spawn } from "node:child_process"; -import pkg from "../package.json" with { type: "json" }; import { devCommand } from "./commands/dev"; import { upCommand } from "./commands/up"; import { downCommand } from "./commands/down"; @@ -11,6 +10,7 @@ import { statusCommand } from "./commands/status"; import { whereCommand } from "./commands/where"; import { upgradeCommand } from "./commands/upgrade"; import { + currentVersion, isCacheStale, readCachedCheck, refreshCache, @@ -19,7 +19,7 @@ import { const main = defineCommand({ meta: { name: "ranch", - version: pkg.version, + version: currentVersion(), description: "Ranch project CLI", }, subCommands: { diff --git a/cli/src/utils/version-check.ts b/cli/src/utils/version-check.ts index b883950..dcdb5d7 100644 --- a/cli/src/utils/version-check.ts +++ b/cli/src/utils/version-check.ts @@ -1,4 +1,6 @@ -import pkg from "../../package.json" with { type: "json" }; +import { readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; import { readCache, writeCache } from "./cache"; const REPO = "CleanSlice/Ranch"; @@ -24,8 +26,32 @@ interface GhTag { name: string; } +// Resolve the version from the installed package.json at runtime, NOT at build +// time. Baking it into the bundle let a stale `dist/` (built before a version +// bump) ship a binary that self-reported the old version — making the update +// banner loop forever. npm always includes package.json in the tarball, so the +// installed file is the source of truth regardless of when dist/ was built. +let cachedVersion: string | undefined; + export function currentVersion(): string { - return pkg.version; + if (cachedVersion) return cachedVersion; + const here = dirname(fileURLToPath(import.meta.url)); + // Bundled: /dist/ranch.mjs → ../package.json + // Dev: /src/utils/version-check.ts → ../../package.json + for (const rel of ["../package.json", "../../package.json"]) { + try { + const raw = readFileSync(join(here, rel), "utf8"); + const pkg = JSON.parse(raw) as { name?: string; version?: string }; + if (pkg.name === "@cleanslice/ranch" && pkg.version) { + cachedVersion = pkg.version; + return cachedVersion; + } + } catch { + // try the next candidate path + } + } + cachedVersion = "0.0.0"; + return cachedVersion; } function parseSemver(v: string): [number, number, number] | null { @@ -58,7 +84,7 @@ export async function fetchLatestVersion(): Promise { headers: { Accept: "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28", - "User-Agent": `cleanslice-ranch-cli/${pkg.version}`, + "User-Agent": `cleanslice-ranch-cli/${currentVersion()}`, }, signal: controller.signal, }, From 9a1d1bf5cce3ec82e5f83cc5c8964681439fb9c5 Mon Sep 17 00:00:00 2001 From: Dmitriy Zhuk Date: Tue, 23 Jun 2026 20:22:58 +0300 Subject: [PATCH 4/4] chore: regenerate api client sdk Regenerate app + admin SDK from the current swagger-spec via openapi-ts so the checked-in client matches generator output. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../api/data/repositories/api/sdk.gen.ts | 51 +++++++++++++++++++ .../api/data/repositories/api/types.gen.ts | 44 ++++++++++++++++ .../api/data/repositories/api/sdk.gen.ts | 51 +++++++++++++++++++ .../api/data/repositories/api/types.gen.ts | 44 ++++++++++++++++ 4 files changed, 190 insertions(+) diff --git a/admin/slices/setup/api/data/repositories/api/sdk.gen.ts b/admin/slices/setup/api/data/repositories/api/sdk.gen.ts index 41daf1f..9aee10c 100644 --- a/admin/slices/setup/api/data/repositories/api/sdk.gen.ts +++ b/admin/slices/setup/api/data/repositories/api/sdk.gen.ts @@ -80,6 +80,8 @@ import type { AgentControllerDemoteAdminData, AgentControllerPromoteAdminData, AgentControllerRestartData, + AgentControllerStopData, + AgentControllerStartData, RestartByTemplateData, FileControllerListData, FileControllerReadData, @@ -98,6 +100,7 @@ import type { ResetBridleTranscriptResponse, GetBridleTranscriptData, GetBridleTranscriptResponse, + ArchiveBridleTranscriptData, SkillControllerFindAllData, SkillControllerCreateData, SkillControllerListSourcesData, @@ -1433,6 +1436,38 @@ export class AgentsService { }); } + /** + * Stop an agent without deleting it: cancels its workflow and deletes its pod to free cluster CPU/memory, then marks it `stopped`. Use this to free a slot so another agent can start. Bring it back with POST :id/start. Admin or Owner. + */ + public static agentControllerStop( + options: Options, + ) { + return (options.client ?? _heyApiClient).post< + unknown, + unknown, + ThrowOnError + >({ + url: "/agents/{id}/stop", + ...options, + }); + } + + /** + * Start a stopped agent: deploys a fresh pod and reattaches the runtime. Inverse of POST :id/stop. Admin or Owner. + */ + public static agentControllerStart( + options: Options, + ) { + return (options.client ?? _heyApiClient).post< + unknown, + unknown, + ThrowOnError + >({ + url: "/agents/{id}/start", + ...options, + }); + } + /** * Restart every agent that uses this template. Pulls latest template-owned files into each agent and redeploys, preserving runtime state. Concurrency capped at 5 to avoid overwhelming the cluster. Admin or Owner. */ @@ -1692,6 +1727,22 @@ export class BridleService { ...options, }); } + + /** + * Archive the persisted chat transcript for an agent/channel — the live JSONL is moved to a timestamped sibling (`bridle:..archived.jsonl`) and the live slot starts empty. Used by the embed's "New chat" action when the visitor wants a clean slate but we still want the prior conversation for admin/audit. No-op (returns `{}`) when there's nothing to archive. + */ + public static archiveBridleTranscript( + options: Options, + ) { + return (options.client ?? _heyApiClient).post< + unknown, + unknown, + ThrowOnError + >({ + url: "/api/agent/{agentId}/transcript/archive", + ...options, + }); + } } export class SkillsService { diff --git a/admin/slices/setup/api/data/repositories/api/types.gen.ts b/admin/slices/setup/api/data/repositories/api/types.gen.ts index 0efe293..ce12e5c 100644 --- a/admin/slices/setup/api/data/repositories/api/types.gen.ts +++ b/admin/slices/setup/api/data/repositories/api/types.gen.ts @@ -1965,6 +1965,32 @@ export type AgentControllerRestartResponses = { 201: unknown; }; +export type AgentControllerStopData = { + body?: never; + path: { + id: string; + }; + query?: never; + url: "/agents/{id}/stop"; +}; + +export type AgentControllerStopResponses = { + 201: unknown; +}; + +export type AgentControllerStartData = { + body?: never; + path: { + id: string; + }; + query?: never; + url: "/agents/{id}/start"; +}; + +export type AgentControllerStartResponses = { + 201: unknown; +}; + export type RestartByTemplateData = { body?: never; path: { @@ -2175,6 +2201,24 @@ export type GetBridleTranscriptResponses = { export type GetBridleTranscriptResponse = GetBridleTranscriptResponses[keyof GetBridleTranscriptResponses]; +export type ArchiveBridleTranscriptData = { + body?: never; + path: { + agentId: string; + }; + query?: { + /** + * Session channel — defaults to "admin". + */ + channel?: string; + }; + url: "/api/agent/{agentId}/transcript/archive"; +}; + +export type ArchiveBridleTranscriptResponses = { + 200: unknown; +}; + export type SkillControllerFindAllData = { body?: never; path?: never; diff --git a/app/slices/setup/api/data/repositories/api/sdk.gen.ts b/app/slices/setup/api/data/repositories/api/sdk.gen.ts index 41daf1f..9aee10c 100644 --- a/app/slices/setup/api/data/repositories/api/sdk.gen.ts +++ b/app/slices/setup/api/data/repositories/api/sdk.gen.ts @@ -80,6 +80,8 @@ import type { AgentControllerDemoteAdminData, AgentControllerPromoteAdminData, AgentControllerRestartData, + AgentControllerStopData, + AgentControllerStartData, RestartByTemplateData, FileControllerListData, FileControllerReadData, @@ -98,6 +100,7 @@ import type { ResetBridleTranscriptResponse, GetBridleTranscriptData, GetBridleTranscriptResponse, + ArchiveBridleTranscriptData, SkillControllerFindAllData, SkillControllerCreateData, SkillControllerListSourcesData, @@ -1433,6 +1436,38 @@ export class AgentsService { }); } + /** + * Stop an agent without deleting it: cancels its workflow and deletes its pod to free cluster CPU/memory, then marks it `stopped`. Use this to free a slot so another agent can start. Bring it back with POST :id/start. Admin or Owner. + */ + public static agentControllerStop( + options: Options, + ) { + return (options.client ?? _heyApiClient).post< + unknown, + unknown, + ThrowOnError + >({ + url: "/agents/{id}/stop", + ...options, + }); + } + + /** + * Start a stopped agent: deploys a fresh pod and reattaches the runtime. Inverse of POST :id/stop. Admin or Owner. + */ + public static agentControllerStart( + options: Options, + ) { + return (options.client ?? _heyApiClient).post< + unknown, + unknown, + ThrowOnError + >({ + url: "/agents/{id}/start", + ...options, + }); + } + /** * Restart every agent that uses this template. Pulls latest template-owned files into each agent and redeploys, preserving runtime state. Concurrency capped at 5 to avoid overwhelming the cluster. Admin or Owner. */ @@ -1692,6 +1727,22 @@ export class BridleService { ...options, }); } + + /** + * Archive the persisted chat transcript for an agent/channel — the live JSONL is moved to a timestamped sibling (`bridle:..archived.jsonl`) and the live slot starts empty. Used by the embed's "New chat" action when the visitor wants a clean slate but we still want the prior conversation for admin/audit. No-op (returns `{}`) when there's nothing to archive. + */ + public static archiveBridleTranscript( + options: Options, + ) { + return (options.client ?? _heyApiClient).post< + unknown, + unknown, + ThrowOnError + >({ + url: "/api/agent/{agentId}/transcript/archive", + ...options, + }); + } } export class SkillsService { diff --git a/app/slices/setup/api/data/repositories/api/types.gen.ts b/app/slices/setup/api/data/repositories/api/types.gen.ts index 0efe293..ce12e5c 100644 --- a/app/slices/setup/api/data/repositories/api/types.gen.ts +++ b/app/slices/setup/api/data/repositories/api/types.gen.ts @@ -1965,6 +1965,32 @@ export type AgentControllerRestartResponses = { 201: unknown; }; +export type AgentControllerStopData = { + body?: never; + path: { + id: string; + }; + query?: never; + url: "/agents/{id}/stop"; +}; + +export type AgentControllerStopResponses = { + 201: unknown; +}; + +export type AgentControllerStartData = { + body?: never; + path: { + id: string; + }; + query?: never; + url: "/agents/{id}/start"; +}; + +export type AgentControllerStartResponses = { + 201: unknown; +}; + export type RestartByTemplateData = { body?: never; path: { @@ -2175,6 +2201,24 @@ export type GetBridleTranscriptResponses = { export type GetBridleTranscriptResponse = GetBridleTranscriptResponses[keyof GetBridleTranscriptResponses]; +export type ArchiveBridleTranscriptData = { + body?: never; + path: { + agentId: string; + }; + query?: { + /** + * Session channel — defaults to "admin". + */ + channel?: string; + }; + url: "/api/agent/{agentId}/transcript/archive"; +}; + +export type ArchiveBridleTranscriptResponses = { + 200: unknown; +}; + export type SkillControllerFindAllData = { body?: never; path?: never;