Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 97 additions & 4 deletions admin/slices/agent/agent/components/agent/Provider.vue
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import {
TableRow,
} from '#theme/components/ui/table';
import { Tabs, TabsContent, TabsList, TabsTrigger } from '#theme/components/ui/tabs';
import { IconAlertTriangle, IconArrowLeft, IconEye, IconEyeOff, IconLoader2, IconRefresh, IconShield, IconX } from '@tabler/icons-vue';
import { IconAlertTriangle, IconArrowLeft, IconEye, IconEyeOff, IconLoader2, IconPlayerPlay, IconPlayerStop, IconRefresh, IconShield, IconX } from '@tabler/icons-vue';
import { FileText, X } from 'lucide-vue-next';
import type { IPaddockScenario } from '#paddock/stores/paddockScenario';

Expand Down Expand Up @@ -236,6 +236,50 @@ function dismissRestartBanner() {
agentStore.clearPendingRestart(props.id);
}

// ── Stop / Start ─────────────────────────────────────────────────────────
// Stop cancels the workflow and deletes the pod to free cluster CPU/memory
// (so another agent can start when the cluster is full); Start deploys a fresh
// pod. Which one we show depends on whether the agent currently holds a pod.
const RESOURCE_HOLDING: ReadonlySet<AgentStatusTypes> = new Set([
'running',
'deploying',
'pending',
]);
const canStop = computed(() =>
agent.value ? RESOURCE_HOLDING.has(agent.value.status) : false,
);
const toggling = ref(false);
const toggleError = ref<string | null>(null);

async function onToggleRunning() {
if (!agent.value || toggling.value) return;
toggling.value = true;
toggleError.value = null;
const previousStatus = agent.value.status;
const stopping = canStop.value;
// Optimistic flip so the badge reacts before the API resolves.
agent.value = {
...agent.value,
status: stopping ? 'stopped' : 'deploying',
};
try {
if (stopping) {
await agentStore.stop(agent.value.id);
} else {
agentStore.markRestartInFlight(agent.value.id);
await agentStore.start(agent.value.id);
}
await refresh();
} catch (err) {
if (agent.value) agent.value = { ...agent.value, status: previousStatus };
if (!stopping) agentStore.clearRestartInFlight(props.id);
toggleError.value =
(err as Error).message || (stopping ? 'Stop failed' : 'Start failed');
} finally {
toggling.value = false;
}
}

// ── Live pod state from SSE ─────────────────────────────────────────────
// Lets the user watch sub-second pod transitions (Pending → ContainerCreating
// → Running) instead of waiting on the 5s status poll below.
Expand All @@ -261,6 +305,7 @@ const podPhaseLabel = computed(() => {
type ChatOverlay =
| { kind: 'starting'; title: string; detail: string }
| { kind: 'failed'; title: string; detail: string }
| { kind: 'stopped'; title: string; detail: string }
| null;

const chatOverlay = computed<ChatOverlay>(() => {
Expand All @@ -277,6 +322,15 @@ const chatOverlay = computed<ChatOverlay>(() => {

if (chatLive) return null;

if (s === 'stopped') {
return {
kind: 'stopped',
title: 'Agent stopped',
detail:
'The pod was deleted to free cluster resources. Start it to chat again.',
};
}

if (s === 'failed') {
return {
kind: 'failed',
Expand Down Expand Up @@ -579,7 +633,29 @@ onBeforeUnmount(stopMetricsPolling);
</Button>
<Button
variant="outline"
:disabled="isRestarting"
:disabled="toggling || isRestarting"
:title="canStop ? 'Cancel the workflow and delete the pod to free cluster resources' : 'Deploy a fresh pod'"
@click="onToggleRunning"
>
<IconLoader2
v-if="toggling"
class="size-4 animate-spin"
/>
<IconPlayerStop v-else-if="canStop" class="size-4" />
<IconPlayerPlay v-else class="size-4" />
{{
toggling
? canStop
? 'Stopping…'
: 'Starting…'
: canStop
? 'Stop'
: 'Start'
}}
</Button>
<Button
variant="outline"
:disabled="isRestarting || toggling"
@click="onRestart"
>
<IconLoader2
Expand All @@ -591,10 +667,10 @@ onBeforeUnmount(stopMetricsPolling);
</Button>
</div>
<p
v-if="restartError"
v-if="restartError || toggleError"
class="text-xs text-destructive"
>
{{ restartError }}
{{ restartError || toggleError }}
</p>
</div>
</div>
Expand Down Expand Up @@ -659,6 +735,10 @@ onBeforeUnmount(stopMetricsPolling);
v-if="chatOverlay.kind === 'starting'"
class="size-10 animate-spin text-primary"
/>
<IconPlayerStop
v-else-if="chatOverlay.kind === 'stopped'"
class="size-10 text-muted-foreground"
/>
<IconAlertTriangle
v-else
class="size-10 text-destructive"
Expand All @@ -669,6 +749,19 @@ onBeforeUnmount(stopMetricsPolling);
{{ chatOverlay.detail }}
</p>
</div>
<Button
v-if="chatOverlay.kind === 'stopped'"
size="sm"
:disabled="toggling"
@click="onToggleRunning"
>
<IconLoader2
v-if="toggling"
class="size-4 animate-spin"
/>
<IconPlayerPlay v-else class="size-4" />
{{ toggling ? 'Starting…' : 'Start agent' }}
</Button>
<Button
v-if="chatOverlay.kind === 'failed'"
size="sm"
Expand Down
60 changes: 59 additions & 1 deletion admin/slices/agent/agent/components/agentList/Provider.vue
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import {
import {
IconDotsVertical,
IconLoader2,
IconPlayerPlay,
IconPlayerStop,
IconRefresh,
IconShield,
IconTrash,
Expand Down Expand Up @@ -66,6 +68,42 @@ async function onRestart(agent: IAgentData) {
}
}

// Statuses that consume cluster resources (a pod is or will be running) — the
// only states where "Stop" makes sense. Everything else gets "Start".
const RESOURCE_HOLDING: ReadonlySet<AgentStatusTypes> = new Set([
'running',
'deploying',
'pending',
]);

function canStop(status: AgentStatusTypes): boolean {
return RESOURCE_HOLDING.has(status);
}

// Per-row busy guard for stop/start so one row's spinner doesn't block others.
const togglingIds = ref<Set<string>>(new Set());

function isToggling(id: string): boolean {
return togglingIds.value.has(id);
}

async function onToggleRunning(agent: IAgentData) {
if (togglingIds.value.has(agent.id)) return;
togglingIds.value = new Set([...togglingIds.value, agent.id]);
try {
if (canStop(agent.status)) {
await agentStore.stop(agent.id);
} else {
await agentStore.start(agent.id);
}
await refresh();
} finally {
const next = new Set(togglingIds.value);
next.delete(agent.id);
togglingIds.value = next;
}
}

const pendingRemoval = ref<IAgentData | null>(null);
const confirmRemoveOpen = computed({
get: () => pendingRemoval.value !== null,
Expand Down Expand Up @@ -159,7 +197,7 @@ async function onRemove() {
</DropdownMenuTrigger>
<DropdownMenuContent align="end">
<DropdownMenuItem
:disabled="isRestarting(agent.id)"
:disabled="isRestarting(agent.id) || isToggling(agent.id)"
@select="onRestart(agent)"
>
<IconLoader2
Expand All @@ -169,6 +207,26 @@ async function onRemove() {
<IconRefresh v-else class="size-4" />
{{ isRestarting(agent.id) ? 'Restarting…' : 'Restart' }}
</DropdownMenuItem>
<DropdownMenuItem
:disabled="isToggling(agent.id) || isRestarting(agent.id)"
@select="onToggleRunning(agent)"
>
<IconLoader2
v-if="isToggling(agent.id)"
class="size-4 animate-spin"
/>
<IconPlayerStop v-else-if="canStop(agent.status)" class="size-4" />
<IconPlayerPlay v-else class="size-4" />
{{
isToggling(agent.id)
? canStop(agent.status)
? 'Stopping…'
: 'Starting…'
: canStop(agent.status)
? 'Stop'
: 'Start'
}}
</DropdownMenuItem>
<DropdownMenuSeparator />
<DropdownMenuItem
class="text-destructive focus:text-destructive"
Expand Down
52 changes: 52 additions & 0 deletions admin/slices/agent/agent/stores/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,56 @@ export const useAgentStore = defineStore('agent', () => {
}
}

// Stop an agent — cancels its workflow and deletes its pod to free cluster
// resources, keeping the row so it can be started again. Raw axios call:
// the generated SDK hasn't been regenerated for this endpoint yet (same
// approach as fetchEnv/fetchMetrics). Optimistic flip to 'stopped'.
async function stop(id: string) {
const previous = agents.value.find((a) => a.id === id);
if (previous && previous.status !== 'stopped') {
agents.value = agents.value.map((a) =>
a.id === id ? { ...a, status: 'stopped' } : a,
);
}
try {
const res = await client.instance.post(`/agents/${id}/stop`);
const env = res.data as ApiEnvelope<IAgentData> | undefined;
const updated = env?.data;
if (!updated) throw new Error('Stop returned no agent data');
agents.value = agents.value.map((a) => (a.id === id ? updated : a));
return updated;
} catch (err) {
if (previous) {
agents.value = agents.value.map((a) => (a.id === id ? previous : a));
}
throw err;
}
}

// Start a stopped agent — deploys a fresh pod. Optimistic flip to
// 'deploying' so the badge reacts before the API resolves.
async function start(id: string) {
const previous = agents.value.find((a) => a.id === id);
if (previous && previous.status !== 'deploying') {
agents.value = agents.value.map((a) =>
a.id === id ? { ...a, status: 'deploying' } : a,
);
}
try {
const res = await client.instance.post(`/agents/${id}/start`);
const env = res.data as ApiEnvelope<IAgentData> | undefined;
const updated = env?.data;
if (!updated) throw new Error('Start returned no agent data');
agents.value = agents.value.map((a) => (a.id === id ? updated : a));
return updated;
} catch (err) {
if (previous) {
agents.value = agents.value.map((a) => (a.id === id ? previous : a));
}
throw err;
}
}

async function remove(id: string, options: { wipeS3?: boolean } = {}) {
// Use raw fetch because the OpenAPI spec doesn't yet expose `wipeS3`
// as a typed query param. We re-attach the Bearer token from the
Expand Down Expand Up @@ -376,6 +426,8 @@ export const useAgentStore = defineStore('agent', () => {
create,
update,
restart,
stop,
start,
remove,
promoteAdmin,
demoteAdmin,
Expand Down
29 changes: 29 additions & 0 deletions api/src/slices/agent/agent/agent.controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ export class AgentController {
private async syncStatus(agentId: string) {
const agent = await this.agentGateway.findById(agentId);
if (!agent?.workflowId) return agent;
// A stopped agent intentionally has no live pod — never resurrect its
// status from the (now cancelled) workflow phase.
if (agent.status === 'stopped') return agent;
// Forward-only: only flip to 'failed' (terminal) here. We never demote
// 'running' back to 'deploying' on a Pending workflow phase — pod-event
// reconciler owns the running/deploying transitions and a workflow can
Expand Down Expand Up @@ -404,6 +407,32 @@ export class AgentController {
return this.agentGateway.findById(id);
}

@Post(':id/stop')
@Roles(UserRoleTypes.Owner, UserRoleTypes.Admin)
@ApiOperation({
summary:
'Stop an agent without deleting it: cancels its workflow and deletes its pod to free cluster CPU/memory, then marks it `stopped`. Use this to free a slot so another agent can start. Bring it back with POST :id/start. Admin or Owner.',
})
async stop(@Param('id') id: string) {
const agent = await this.agentGateway.findById(id);
if (!agent) throw new NotFoundException('Agent not found');
await this.agentDeployService.stopAgent(id);
return this.agentGateway.findById(id);
}

@Post(':id/start')
@Roles(UserRoleTypes.Owner, UserRoleTypes.Admin)
@ApiOperation({
summary:
'Start a stopped agent: deploys a fresh pod and reattaches the runtime. Inverse of POST :id/stop. Admin or Owner.',
})
async start(@Param('id') id: string) {
const agent = await this.agentGateway.findById(id);
if (!agent) throw new NotFoundException('Agent not found');
await this.deploy(id);
return this.agentGateway.findById(id);
}

@Post('restart-by-template/:templateId')
@Roles(UserRoleTypes.Owner, UserRoleTypes.Admin)
@ApiOperation({
Expand Down
6 changes: 4 additions & 2 deletions api/src/slices/agent/agent/data/agent.gateway.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,15 @@ export class AgentGateway extends IAgentGateway {
async updateStatus(
id: string,
status: AgentStatusTypes,
workflowId?: string,
workflowId?: string | null,
): Promise<IAgentData> {
const record = await this.prisma.agent.update({
where: { id },
data: {
status,
...(workflowId && { workflowId }),
// `undefined` leaves the column untouched; `null` clears it (used when
// stopping an agent so the now-cancelled workflow id isn't kept around).
...(workflowId !== undefined && { workflowId }),
},
});
return this.mapper.toEntity(record);
Expand Down
2 changes: 1 addition & 1 deletion api/src/slices/agent/agent/domain/agent.gateway.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export abstract class IAgentGateway {
abstract updateStatus(
id: string,
status: AgentStatusTypes,
workflowId?: string,
workflowId?: string | null,
): Promise<IAgentData>;
abstract setWorkflowId(id: string, workflowId: string): Promise<IAgentData>;
abstract setAdmin(id: string, enabled: boolean): Promise<IAgentData>;
Expand Down
Loading
Loading