Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,17 @@ rli axon events <id> # List events for an axon

```bash
rli scenario info <id> # Display scenario definition details
rli scenario create # Create a new custom scenario
rli scenario list # List scenario runs
```

### Benchmark-run Commands (alias: `bmr`)

```bash
rli benchmark-run cancel <id> # Cancel a running benchmark run
rli benchmark-run complete <id> # Complete a benchmark run (finalize an...
```

### Benchmark-job Commands (alias: `bmj`)

```bash
Expand Down
15 changes: 10 additions & 5 deletions src/commands/benchmark-job/list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ import {
listBenchmarkJobs,
type BenchmarkJob,
} from "../../services/benchmarkJobService.js";
import { output, outputError } from "../../utils/output.js";
import { output, outputError, parseLimit } from "../../utils/output.js";

interface ListOptions {
limit?: string;
days?: string;
all?: boolean;
status?: string;
Expand All @@ -30,18 +31,20 @@ const PAGE_SIZE = 100;
async function fetchJobs(
cutoffMs: number | null,
statusFilter: Set<string> | null,
maxResults: number,
): Promise<BenchmarkJob[]> {
const allJobs: BenchmarkJob[] = [];
let cursor: string | undefined;

while (true) {
while (allJobs.length < maxResults) {
const remaining = maxResults - allJobs.length;
const result = await listBenchmarkJobs({
limit: PAGE_SIZE,
limit: Math.min(PAGE_SIZE, remaining),
startingAfter: cursor,
});

for (const job of result.jobs) {
// Stop pagination if we've passed the time cutoff (API returns newest-first)
if (allJobs.length >= maxResults) break;
if (cutoffMs !== null && job.create_time_ms < cutoffMs) {
return applyStatusFilter(allJobs, statusFilter);
}
Expand Down Expand Up @@ -94,8 +97,10 @@ export async function listBenchmarkJobsCommand(
cutoffMs = Date.now() - days * 86_400_000;
}

const maxResults = parseLimit(options.limit);

// Fetch and filter
const jobs = await fetchJobs(cutoffMs, statusFilter);
const jobs = await fetchJobs(cutoffMs, statusFilter, maxResults);

// Sort ascending by create_time_ms (oldest first, most recent at bottom)
jobs.sort((a, b) => a.create_time_ms - b.create_time_ms);
Expand Down
14 changes: 14 additions & 0 deletions src/commands/benchmark-run/cancel.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { cancelBenchmarkRun } from "../../services/benchmarkService.js";
import { output, outputError } from "../../utils/output.js";

export async function cancelBenchmarkRunCommand(
id: string,
options: { output?: string },
) {
try {
const result = await cancelBenchmarkRun(id);
output(result, { format: options.output, defaultFormat: "json" });
} catch (error) {
outputError("Failed to cancel benchmark run", error);
}
}
14 changes: 14 additions & 0 deletions src/commands/benchmark-run/complete.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import { completeBenchmarkRun } from "../../services/benchmarkService.js";
import { output, outputError } from "../../utils/output.js";

export async function completeBenchmarkRunCommand(
id: string,
options: { output?: string },
) {
try {
const result = await completeBenchmarkRun(id);
output(result, { format: options.output, defaultFormat: "json" });
} catch (error) {
outputError("Failed to complete benchmark run", error);
}
}
2 changes: 2 additions & 0 deletions src/commands/blueprint/list.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,7 @@ const ListBlueprintsUI = ({
executeOperation(selectedBlueprintItem, "view_logs");
}
},
c: () => navigate("blueprint-create"),
o: handleOpenInBrowser,
"/": () => search.enterSearchMode(),
escape: handleListEscape,
Expand Down Expand Up @@ -1008,6 +1009,7 @@ const ListBlueprintsUI = ({
condition: hasMore || hasPrev,
},
{ key: "Enter", label: "Details" },
{ key: "c", label: "Create" },
{ key: "a", label: "Actions" },
{ key: "Tab", label: "Switch tab" },
{ key: "o", label: "Browser" },
Expand Down
117 changes: 117 additions & 0 deletions src/commands/scenario/create.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import { readFile } from "fs/promises";
import { createScenario } from "../../services/scenarioService.js";
import { output, outputError } from "../../utils/output.js";
import { parseMetadata } from "../../utils/metadata.js";
import type { ScenarioCreateParams } from "@runloop/api-client/resources/scenarios/scenarios";

interface CreateScenarioOptions {
name: string;
problemStatement: string;
scoringCommand?: string;
scoringFile?: string;
blueprint?: string;
snapshot?: string;
workingDirectory?: string;
referenceOutput?: string;
referenceOutputFile?: string;
metadata?: string[];
requiredEnvVars?: string[];
requiredSecrets?: string[];
scorerTimeout?: string;
validationType?: string;
output?: string;
}

export async function createScenarioCommand(options: CreateScenarioOptions) {
try {
if (!options.scoringCommand && !options.scoringFile) {
return outputError(
"At least one of --scoring-command or --scoring-file is required",
);
}

let scoringContract: ScenarioCreateParams["scoring_contract"];

if (options.scoringFile) {
const contents = await readFile(options.scoringFile, "utf-8");
scoringContract = JSON.parse(contents);
} else {
scoringContract = {
scoring_function_parameters: [
{
name: "default",
weight: 1.0,
scorer: {
type: "command_scorer" as const,
command: options.scoringCommand!,
},
},
],
};
}

const params: ScenarioCreateParams = {
name: options.name,
input_context: {
problem_statement: options.problemStatement,
},
scoring_contract: scoringContract,
};

if (options.blueprint || options.snapshot || options.workingDirectory) {
params.environment_parameters = {};
if (options.blueprint) {
params.environment_parameters.blueprint_id = options.blueprint;
}
if (options.snapshot) {
params.environment_parameters.snapshot_id = options.snapshot;
}
if (options.workingDirectory) {
params.environment_parameters.working_directory =
options.workingDirectory;
}
}

if (options.referenceOutputFile) {
params.reference_output = await readFile(
options.referenceOutputFile,
"utf-8",
);
} else if (options.referenceOutput) {
params.reference_output = options.referenceOutput;
}

if (options.metadata) {
params.metadata = parseMetadata(options.metadata);
}

if (options.requiredEnvVars) {
params.required_environment_variables = options.requiredEnvVars;
}

if (options.requiredSecrets) {
params.required_secret_names = options.requiredSecrets;
}

if (options.scorerTimeout) {
const timeout = parseInt(options.scorerTimeout, 10);
if (isNaN(timeout)) {
return outputError("--scorer-timeout must be a number");
}
params.scorer_timeout_sec = timeout;
}

if (options.validationType) {
params.validation_type = options.validationType as
| "UNSPECIFIED"
| "FORWARD"
| "REVERSE"
| "EVALUATION";
}

const scenario = await createScenario(params);
output(scenario, { format: options.output, defaultFormat: "json" });
} catch (error) {
outputError("Failed to create scenario", error);
}
}
Loading
Loading