This commit is contained in:
team 1
2026-05-12 08:57:57 +02:00
parent 03d4a1d7c3
commit 0d55c0a439
6 changed files with 769 additions and 7 deletions

View File

@@ -0,0 +1,68 @@
<?php
declare(strict_types=1);
namespace App\Controller\Admin;
use App\Security\ApplicationRoles;
use App\Service\Admin\EvalAdminService;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Routing\Attribute\Route;
#[Route('/admin/evals')]
final class AdminEvalController extends AbstractController
{
#[Route('/', name: 'admin_evals_index', methods: ['GET'])]
public function index(Request $request, EvalAdminService $evals): Response
{
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
$selectedType = trim((string) $request->query->get('type', ''));
if ($selectedType === '' || !in_array($selectedType, $evals->supportedTypeNames(), true)) {
$selectedType = 'retrieval';
}
return $this->render('admin/evals/index.html.twig', [
'types' => $evals->supportedTypes(),
'overview' => $evals->overview(),
'cases_by_type' => $evals->casesByType(),
'selected_type' => $selectedType,
'selected_report' => $evals->readTypeReport($selectedType),
'last_report' => $evals->readLastReport(),
]);
}
#[Route('/run', name: 'admin_evals_run', methods: ['POST'])]
public function run(Request $request, EvalAdminService $evals): Response
{
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
if (!$this->isCsrfTokenValid('admin_eval_run', (string) $request->request->get('_token'))) {
throw $this->createAccessDeniedException();
}
$type = trim((string) $request->request->get('type', 'retrieval'));
$caseId = trim((string) $request->request->get('case_id', ''));
try {
$report = $evals->run($type, $caseId !== '' ? $caseId : null);
$this->addFlash(
((int) ($report['failed'] ?? 0)) === 0 ? 'success' : 'danger',
sprintf(
'Eval %s abgeschlossen: %d/%d bestanden.',
$type,
(int) ($report['passed'] ?? 0),
(int) ($report['total'] ?? 0)
)
);
} catch (\Throwable $e) {
$this->addFlash('danger', $e->getMessage());
}
return $this->redirectToRoute('admin_evals_index', [
'type' => $type,
]);
}
}

View File

@@ -0,0 +1,227 @@
<?php
declare(strict_types=1);
namespace App\Service\Admin;
use App\Eval\AgentEvalRunner;
use App\Eval\Dto\EvalCase;
use App\Eval\Dto\EvalResult;
use App\Eval\EvalCaseLoader;
use App\Eval\EvalReportWriter;
final readonly class EvalAdminService
{
/**
* @var array<string, string>
*/
private const TYPES = [
'retrieval' => 'Retrieval',
'shop_query' => 'Shopquery',
'followup' => 'Follow-up',
'answer_guard' => 'Answer-Guard',
];
public function __construct(
private EvalCaseLoader $caseLoader,
private AgentEvalRunner $runner,
private EvalReportWriter $reportWriter,
private string $projectDir,
) {
}
/**
* @return array<string, string>
*/
public function supportedTypes(): array
{
return self::TYPES;
}
/**
* @return array<int, string>
*/
public function supportedTypeNames(): array
{
return array_keys(self::TYPES);
}
public function assertSupportedType(string $type): string
{
$type = trim($type);
if (!array_key_exists($type, self::TYPES)) {
throw new \InvalidArgumentException(sprintf('Unsupported eval type: %s', $type));
}
return $type;
}
/**
* @return array<string, array<int, array{id:string,prompt:string,type:string}>>
*/
public function casesByType(): array
{
$casesByType = [];
foreach (array_keys(self::TYPES) as $type) {
$casesByType[$type] = array_map(
static fn (EvalCase $case): array => [
'id' => $case->id,
'type' => $case->type,
'prompt' => $case->prompt,
],
$this->loadCases($type)
);
}
return $casesByType;
}
/**
* @return array<int, array<string, mixed>>
*/
public function overview(): array
{
$overview = [];
foreach (self::TYPES as $type => $label) {
$cases = $this->loadCases($type);
$report = $this->readTypeReport($type);
$overview[] = [
'type' => $type,
'label' => $label,
'case_count' => count($cases),
'report' => $report,
'status' => $this->statusFromReport($report),
];
}
return $overview;
}
/**
* @return array<string, mixed>
*/
public function run(string $type, ?string $caseId = null): array
{
$type = $this->assertSupportedType($type);
$caseId = trim((string) $caseId);
$cases = $this->loadCases($type);
if ($caseId !== '') {
$cases = array_values(array_filter(
$cases,
static fn (EvalCase $case): bool => $case->id === $caseId
));
}
if ($cases === []) {
throw new \RuntimeException('No eval cases selected.');
}
$results = $this->runner->runAll($cases);
$report = $this->buildReport($type, $caseId !== '' ? $caseId : null, $results);
$typeReportPath = $this->reportWriter->write($report, sprintf('%s-last-run.json', $type));
$lastReportPath = $this->reportWriter->write($report);
$report['written_to'] = $typeReportPath;
$report['last_run_written_to'] = $lastReportPath;
return $report;
}
/**
* @return array<string, mixed>|null
*/
public function readTypeReport(string $type): ?array
{
$type = $this->assertSupportedType($type);
return $this->readReportFile(sprintf('%s/tests/evals/reports/%s-last-run.json', $this->projectDir, $type));
}
/**
* @return array<string, mixed>|null
*/
public function readLastReport(): ?array
{
return $this->readReportFile(sprintf('%s/tests/evals/reports/last-run.json', $this->projectDir));
}
/**
* @return array<int, EvalCase>
*/
private function loadCases(string $type): array
{
return $this->caseLoader->load($this->assertSupportedType($type));
}
/**
* @param array<int, EvalResult> $results
* @return array<string, mixed>
*/
private function buildReport(string $type, ?string $caseId, array $results): array
{
$passed = count(array_filter(
$results,
static fn (EvalResult $result): bool => $result->passed
));
$failed = count($results) - $passed;
return [
'type' => $type,
'case_filter' => $caseId,
'total' => count($results),
'passed' => $passed,
'failed' => $failed,
'generated_at' => (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM),
'results' => array_map(
static fn (EvalResult $result): array => $result->toArray(),
$results
),
];
}
/**
* @return array<string, mixed>|null
*/
private function readReportFile(string $path): ?array
{
if (!is_file($path)) {
return null;
}
$raw = file_get_contents($path);
if (!is_string($raw) || trim($raw) === '') {
return null;
}
$decoded = json_decode($raw, true);
if (!is_array($decoded)) {
return null;
}
return $decoded;
}
private function statusFromReport(?array $report): string
{
if ($report === null) {
return 'not_run';
}
$failed = (int) ($report['failed'] ?? 0);
$total = (int) ($report['total'] ?? 0);
if ($total <= 0) {
return 'empty';
}
return $failed === 0 ? 'green' : 'red';
}
}