first test suite retrieval

This commit is contained in:
team2
2026-04-22 22:03:23 +02:00
parent 65e2b1917c
commit 8127d33571
8 changed files with 546 additions and 0 deletions

View File

@@ -0,0 +1,149 @@
<?php
declare(strict_types=1);
namespace App\Command;
use App\Eval\AgentEvalRunner;
use App\Eval\Dto\EvalCase;
use App\Eval\Dto\EvalResult;
use App\Eval\EvalCaseLoader;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'mto:agent:eval:run',
description: 'Run versioned eval cases for RetrieX'
)]
final class AgentEvalRunCommand extends Command
{
public function __construct(
private readonly EvalCaseLoader $loader,
private readonly AgentEvalRunner $runner,
) {
parent::__construct();
}
protected function configure(): void
{
$this
->addArgument(
'type',
InputArgument::OPTIONAL,
'Eval type to run',
'retrieval'
)
->addOption(
'case',
null,
InputOption::VALUE_OPTIONAL,
'Run only a single case by id'
)
->addOption(
'json',
null,
InputOption::VALUE_NONE,
'Print the full report as JSON'
);
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$type = trim((string) $input->getArgument('type'));
$caseId = trim((string) $input->getOption('case'));
$asJson = (bool) $input->getOption('json');
try {
$cases = $this->loader->load($type);
} catch (\Throwable $e) {
$io->error($e->getMessage());
return Command::FAILURE;
}
if ($caseId !== '') {
$cases = array_values(array_filter(
$cases,
static fn (EvalCase $case): bool => $case->id === $caseId
));
}
if ($cases === []) {
$io->warning('No eval cases selected.');
return Command::SUCCESS;
}
try {
$results = $this->runner->runAll($cases);
} catch (\Throwable $e) {
$io->error($e->getMessage());
return Command::FAILURE;
}
$passed = count(array_filter(
$results,
static fn (EvalResult $result): bool => $result->passed
));
$failed = count($results) - $passed;
$report = [
'type' => $type,
'total' => count($results),
'passed' => $passed,
'failed' => $failed,
'results' => array_map(
static fn (EvalResult $result): array => $result->toArray(),
$results
),
];
if ($asJson) {
$json = json_encode(
$report,
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
);
if (!is_string($json)) {
$io->error('json_encode failed.');
return Command::FAILURE;
}
$output->writeln($json);
return $failed > 0 ? Command::FAILURE : Command::SUCCESS;
}
$io->title('RetrieX Eval Run');
$io->definitionList(
['type' => $type],
['total' => (string) count($results)],
['passed' => (string) $passed],
['failed' => (string) $failed]
);
foreach ($results as $result) {
if ($result->passed) {
$io->writeln(sprintf('<info>PASS</info> %s', $result->caseId));
continue;
}
$io->writeln(sprintf('<error>FAIL</error> %s', $result->caseId));
foreach ($result->failures as $failure) {
$io->writeln(' - ' . $failure);
}
}
return $failed > 0 ? Command::FAILURE : Command::SUCCESS;
}
}