Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions apps/cli/src/commands/results/serve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -435,10 +435,12 @@ async function handleRunDetail(c: C, { searchDir }: DataContext) {
// Studio-side resume against this exact run. Remote runs live in the
// results-repo cache and cannot be resumed in place, so omit both fields.
const resumeMeta = meta.source === 'local' ? deriveResumeMeta(searchDir, meta.path) : {};
const liveStatus = meta.source === 'local' ? getActiveRunStatus(meta.path) : undefined;
return c.json({
results: stripHeavyFields(loaded),
source: meta.source,
source_label: meta.displayName,
...(liveStatus && { status: liveStatus }),
...resumeMeta,
});
} catch {
Expand Down
5 changes: 4 additions & 1 deletion apps/studio/src/components/ResumeRunActions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
buildResumeRequestBody,
shouldShowResumeActions,
} from './resume-run-helpers';
import type { RunStatus } from './stop-run-helpers';

export interface ResumeRunActionsProps {
results: EvalResult[];
Expand All @@ -36,6 +37,7 @@ export interface ResumeRunActionsProps {
projectId?: string;
isReadOnly: boolean;
plannedTestCount?: number;
runStatus?: RunStatus;
}

export function ResumeRunActions({
Expand All @@ -46,12 +48,13 @@ export function ResumeRunActions({
projectId,
isReadOnly,
plannedTestCount,
runStatus,
}: ResumeRunActionsProps) {
const navigate = useNavigate();
const [busy, setBusy] = useState<ResumeMode | null>(null);
const [error, setError] = useState<string | null>(null);

if (!shouldShowResumeActions(results, isReadOnly, plannedTestCount)) return null;
if (!shouldShowResumeActions(results, isReadOnly, plannedTestCount, runStatus)) return null;

// Both actions need the run dir + the original eval file. Without those
// we can't target the existing run workspace, so we render the buttons
Expand Down
33 changes: 33 additions & 0 deletions apps/studio/src/components/RunStatusIndicator.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/**
* RunStatusIndicator — shared live/terminal status badge for Studio-launched
* eval runs. Used anywhere the UI needs the same colored status label and
* active spinner so run/job views stay visually consistent.
*/

import type { RunStatus } from './stop-run-helpers';

export interface RunStatusIndicatorProps {
status: RunStatus;
}

export function RunStatusIndicator({ status }: RunStatusIndicatorProps) {
const isTerminal = status === 'finished' || status === 'failed';
const statusColors: Record<string, string> = {
starting: 'text-yellow-400',
running: 'text-cyan-400',
finished: 'text-emerald-400',
failed: 'text-red-400',
};
const statusColor = statusColors[status] ?? 'text-gray-400';

return (
<>
<span className={`text-sm font-medium ${statusColor}`}>
{status.charAt(0).toUpperCase() + status.slice(1)}
</span>
{!isTerminal && (
<span className="inline-block h-3 w-3 animate-spin rounded-full border-2 border-cyan-400 border-t-transparent" />
)}
</>
);
}
22 changes: 15 additions & 7 deletions apps/studio/src/components/StopRunButton.tsx
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
/**
* StopRunButton — pause-style affordance on /jobs/:runId that interrupts
* a Studio-launched eval. Stop is part of the stop → resume → complete
* workflow, not a destructive cancel: the partial index.jsonl is
* preserved and can be resumed in one click from the run-detail page.
* StopRunButton — stop affordance on /jobs/:runId and active run detail
* views that interrupts a Studio-launched eval. Stop is part of the
* stop → resume → complete workflow, not a destructive cancel: the
* partial index.jsonl is preserved and can be resumed in one click from
* the run-detail page.
*
* Calls POST /api/eval/run/:id/stop (or the project-scoped variant).
* Optimistically flips the local label to "Stopping…" until the next
* poll of /api/eval/status/:id observes a terminal state — at which
* point the button hides via `shouldShowStopButton`.
*
* Styling is intentionally neutral (gray, not red) to signal that this
* is a pause, not a kill.
* stops execution without deleting the partial run workspace.
*/

import { useState } from 'react';
Expand Down Expand Up @@ -51,10 +52,17 @@ export function StopRunButton({ runId, status, isReadOnly, projectId }: StopRunB
type="button"
onClick={onClick}
disabled={stopping}
className="rounded-md border border-gray-700 bg-transparent px-3 py-1.5 text-sm font-medium text-gray-300 hover:bg-gray-800 disabled:cursor-not-allowed disabled:opacity-50"
className="inline-flex items-center gap-2 rounded-md border border-gray-700 bg-transparent px-3 py-1.5 text-sm font-medium text-gray-300 hover:bg-gray-800 disabled:cursor-not-allowed disabled:opacity-50"
data-testid="stop-run-button"
>
{stopping ? 'Stopping…' : '⏸ Stop'}
{stopping ? (
'Stopping…'
) : (
<>
<span aria-hidden="true" className="inline-block h-2.5 w-2.5 rounded-[1px] bg-current" />
Stop
</>
)}
</button>
{error && <p className="text-xs text-red-400">{error}</p>}
</div>
Expand Down
10 changes: 10 additions & 0 deletions apps/studio/src/components/resume-run-helpers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ describe('shouldShowResumeActions', () => {
expect(shouldShowResumeActions([ok('a'), errored('b')], false)).toBe(true);
});

it('hides while the run is still active even if it looks incomplete', () => {
expect(shouldShowResumeActions([ok('a')], false, 5, 'running')).toBe(false);
expect(shouldShowResumeActions([errored('a')], false, undefined, 'starting')).toBe(false);
});

it('shows once the run is terminal and resumable', () => {
expect(shouldShowResumeActions([ok('a')], false, 5, 'failed')).toBe(true);
expect(shouldShowResumeActions([errored('a')], false, undefined, 'finished')).toBe(true);
});

it('hides in read-only mode even when execution errors are present', () => {
expect(shouldShowResumeActions([errored('a')], true)).toBe(false);
});
Expand Down
4 changes: 4 additions & 0 deletions apps/studio/src/components/resume-run-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

import type { EvalResult, RunEvalRequest } from '~/lib/types';

import { type RunStatus, isTerminalRunStatus } from './stop-run-helpers';

export type ResumeMode = 'resume' | 'rerun';

export interface BuildResumeRequestParams {
Expand Down Expand Up @@ -39,8 +41,10 @@ export function shouldShowResumeActions(
results: EvalResult[],
isReadOnly: boolean,
plannedTestCount?: number,
runStatus?: RunStatus,
): boolean {
if (isReadOnly) return false;
if (runStatus && !isTerminalRunStatus(runStatus)) return false;
if (results.some((r) => r.executionStatus === 'execution_error')) return true;
if (plannedTestCount !== undefined && results.length < plannedTestCount) return true;
return false;
Expand Down
2 changes: 2 additions & 0 deletions apps/studio/src/lib/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ export interface RunDetailResponse {
results: EvalResult[];
source: 'local' | 'remote';
source_label?: string;
/** Live execution status when this run is still tracked in-memory by Studio. */
status?: 'starting' | 'running' | 'finished' | 'failed';
/** Path to the run workspace directory (relative to cwd when inside, otherwise absolute). Local runs only. */
run_dir?: string;
/** Eval file path the run was launched against, if recorded in benchmark.json. Local runs only. */
Expand Down
17 changes: 2 additions & 15 deletions apps/studio/src/routes/jobs/$runId.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import { Link, createFileRoute } from '@tanstack/react-router';

import { RunStatusIndicator } from '~/components/RunStatusIndicator';
import { StopRunButton } from '~/components/StopRunButton';
import { useEvalRunStatus, useStudioConfig } from '~/lib/api';

Expand Down Expand Up @@ -45,15 +46,6 @@ function JobDetailPage() {

const isTerminal = status.status === 'finished' || status.status === 'failed';

const statusColors: Record<string, string> = {
starting: 'text-yellow-400',
running: 'text-cyan-400',
finished: 'text-emerald-400',
failed: 'text-red-400',
};

const statusColor = statusColors[status.status] ?? 'text-gray-400';

return (
<div className="space-y-4">
<BackLink />
Expand All @@ -79,12 +71,7 @@ function JobDetailPage() {
</div>
<div className="flex flex-shrink-0 items-center gap-3">
<StopRunButton runId={runId} status={status.status} isReadOnly={isReadOnly} />
<span className={`text-sm font-medium ${statusColor}`}>
{status.status.charAt(0).toUpperCase() + status.status.slice(1)}
</span>
{!isTerminal && (
<span className="inline-block h-3 w-3 animate-spin rounded-full border-2 border-cyan-400 border-t-transparent" />
)}
<RunStatusIndicator status={status.status} />
</div>
</div>

Expand Down
17 changes: 2 additions & 15 deletions apps/studio/src/routes/projects/$projectId_/jobs/$runId.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import { Link, createFileRoute } from '@tanstack/react-router';

import { RunStatusIndicator } from '~/components/RunStatusIndicator';
import { StopRunButton } from '~/components/StopRunButton';
import { useEvalRunStatus, useStudioConfig } from '~/lib/api';

Expand Down Expand Up @@ -40,15 +41,6 @@ function ProjectJobDetailPage() {

const isTerminal = status.status === 'finished' || status.status === 'failed';

const statusColors: Record<string, string> = {
starting: 'text-yellow-400',
running: 'text-cyan-400',
finished: 'text-emerald-400',
failed: 'text-red-400',
};

const statusColor = statusColors[status.status] ?? 'text-gray-400';

return (
<div className="space-y-4">
<BackLink projectId={projectId} />
Expand Down Expand Up @@ -78,12 +70,7 @@ function ProjectJobDetailPage() {
isReadOnly={isReadOnly}
projectId={projectId}
/>
<span className={`text-sm font-medium ${statusColor}`}>
{status.status.charAt(0).toUpperCase() + status.status.slice(1)}
</span>
{!isTerminal && (
<span className="inline-block h-3 w-3 animate-spin rounded-full border-2 border-cyan-400 border-t-transparent" />
)}
<RunStatusIndicator status={status.status} />
</div>
</div>

Expand Down
35 changes: 25 additions & 10 deletions apps/studio/src/routes/projects/$projectId_/runs/$runId.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import { useState } from 'react';
import { ResumeRunActions } from '~/components/ResumeRunActions';
import { RunDetail } from '~/components/RunDetail';
import { RunEvalModal } from '~/components/RunEvalModal';
import { RunStatusIndicator } from '~/components/RunStatusIndicator';
import { StopRunButton } from '~/components/StopRunButton';
import { useProjectRunDetail, useStudioConfig } from '~/lib/api';

export const Route = createFileRoute('/projects/$projectId_/runs/$runId')({
Expand Down Expand Up @@ -47,6 +49,8 @@ function ProjectRunDetailPage() {
const experiment = firstResult?.experiment;
const timestamp = firstResult?.timestamp;
const prefill = target ? { target } : undefined;
const runStatus = data?.status;
const isActiveRun = runStatus === 'starting' || runStatus === 'running';

const heading = (() => {
const parts = [experiment, target].filter((p) => p && p !== 'default');
Expand All @@ -70,16 +74,27 @@ function ProjectRunDetailPage() {
<p className="mt-1 text-sm text-gray-500">{meta}</p>
</div>
<div className="flex items-center gap-3">
<ResumeRunActions
results={data?.results ?? []}
runDir={data?.run_dir}
suiteFilter={data?.suite_filter}
target={target ?? undefined}
projectId={projectId}
isReadOnly={isReadOnly}
plannedTestCount={data?.planned_test_count}
/>
{!isReadOnly && (
{!isReadOnly && isActiveRun ? (
<StopRunButton
runId={runId}
status={runStatus}
isReadOnly={isReadOnly}
projectId={projectId}
/>
) : (
<ResumeRunActions
results={data?.results ?? []}
runDir={data?.run_dir}
suiteFilter={data?.suite_filter}
target={target ?? undefined}
projectId={projectId}
isReadOnly={isReadOnly}
plannedTestCount={data?.planned_test_count}
runStatus={runStatus}
/>
)}
{runStatus && <RunStatusIndicator status={runStatus} />}
{!isReadOnly && !isActiveRun && (
<button
type="button"
onClick={() => setShowRunEval(true)}
Expand Down
28 changes: 19 additions & 9 deletions apps/studio/src/routes/runs/$runId.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import { useState } from 'react';
import { ResumeRunActions } from '~/components/ResumeRunActions';
import { RunDetail } from '~/components/RunDetail';
import { RunEvalModal } from '~/components/RunEvalModal';
import { RunStatusIndicator } from '~/components/RunStatusIndicator';
import { StopRunButton } from '~/components/StopRunButton';
import { useRunDetail, useStudioConfig } from '~/lib/api';

export const Route = createFileRoute('/runs/$runId')({
Expand Down Expand Up @@ -48,6 +50,8 @@ function RunDetailPage() {
const timestamp = firstResult?.timestamp;

const prefill = target ? { target } : undefined;
const runStatus = data?.status;
const isActiveRun = runStatus === 'starting' || runStatus === 'running';

const heading = (() => {
const parts = [experiment, target].filter((p) => p && p !== 'default');
Expand All @@ -71,15 +75,21 @@ function RunDetailPage() {
<p className="mt-1 text-sm text-gray-500">{meta}</p>
</div>
<div className="flex items-center gap-3">
<ResumeRunActions
results={data?.results ?? []}
runDir={data?.run_dir}
suiteFilter={data?.suite_filter}
target={target ?? undefined}
isReadOnly={isReadOnly}
plannedTestCount={data?.planned_test_count}
/>
{!isReadOnly && (
{!isReadOnly && isActiveRun ? (
<StopRunButton runId={runId} status={runStatus} isReadOnly={isReadOnly} />
) : (
<ResumeRunActions
results={data?.results ?? []}
runDir={data?.run_dir}
suiteFilter={data?.suite_filter}
target={target ?? undefined}
isReadOnly={isReadOnly}
plannedTestCount={data?.planned_test_count}
runStatus={runStatus}
/>
)}
{runStatus && <RunStatusIndicator status={runStatus} />}
{!isReadOnly && !isActiveRun && (
<button
type="button"
onClick={() => setShowRunEval(true)}
Expand Down
Loading