p100
This commit is contained in:
68
src/Controller/Admin/AdminEvalController.php
Normal file
68
src/Controller/Admin/AdminEvalController.php
Normal file
@@ -0,0 +1,68 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Controller\Admin;
|
||||
|
||||
use App\Security\ApplicationRoles;
|
||||
use App\Service\Admin\EvalAdminService;
|
||||
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
|
||||
use Symfony\Component\HttpFoundation\Request;
|
||||
use Symfony\Component\HttpFoundation\Response;
|
||||
use Symfony\Component\Routing\Attribute\Route;
|
||||
|
||||
#[Route('/admin/evals')]
|
||||
final class AdminEvalController extends AbstractController
|
||||
{
|
||||
#[Route('/', name: 'admin_evals_index', methods: ['GET'])]
|
||||
public function index(Request $request, EvalAdminService $evals): Response
|
||||
{
|
||||
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
|
||||
|
||||
$selectedType = trim((string) $request->query->get('type', ''));
|
||||
if ($selectedType === '' || !in_array($selectedType, $evals->supportedTypeNames(), true)) {
|
||||
$selectedType = 'retrieval';
|
||||
}
|
||||
|
||||
return $this->render('admin/evals/index.html.twig', [
|
||||
'types' => $evals->supportedTypes(),
|
||||
'overview' => $evals->overview(),
|
||||
'cases_by_type' => $evals->casesByType(),
|
||||
'selected_type' => $selectedType,
|
||||
'selected_report' => $evals->readTypeReport($selectedType),
|
||||
'last_report' => $evals->readLastReport(),
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route('/run', name: 'admin_evals_run', methods: ['POST'])]
|
||||
public function run(Request $request, EvalAdminService $evals): Response
|
||||
{
|
||||
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
|
||||
|
||||
if (!$this->isCsrfTokenValid('admin_eval_run', (string) $request->request->get('_token'))) {
|
||||
throw $this->createAccessDeniedException();
|
||||
}
|
||||
|
||||
$type = trim((string) $request->request->get('type', 'retrieval'));
|
||||
$caseId = trim((string) $request->request->get('case_id', ''));
|
||||
|
||||
try {
|
||||
$report = $evals->run($type, $caseId !== '' ? $caseId : null);
|
||||
$this->addFlash(
|
||||
((int) ($report['failed'] ?? 0)) === 0 ? 'success' : 'danger',
|
||||
sprintf(
|
||||
'Eval %s abgeschlossen: %d/%d bestanden.',
|
||||
$type,
|
||||
(int) ($report['passed'] ?? 0),
|
||||
(int) ($report['total'] ?? 0)
|
||||
)
|
||||
);
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $e->getMessage());
|
||||
}
|
||||
|
||||
return $this->redirectToRoute('admin_evals_index', [
|
||||
'type' => $type,
|
||||
]);
|
||||
}
|
||||
}
|
||||
227
src/Service/Admin/EvalAdminService.php
Normal file
227
src/Service/Admin/EvalAdminService.php
Normal file
@@ -0,0 +1,227 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Service\Admin;
|
||||
|
||||
use App\Eval\AgentEvalRunner;
|
||||
use App\Eval\Dto\EvalCase;
|
||||
use App\Eval\Dto\EvalResult;
|
||||
use App\Eval\EvalCaseLoader;
|
||||
use App\Eval\EvalReportWriter;
|
||||
|
||||
final readonly class EvalAdminService
|
||||
{
|
||||
/**
|
||||
* @var array<string, string>
|
||||
*/
|
||||
private const TYPES = [
|
||||
'retrieval' => 'Retrieval',
|
||||
'shop_query' => 'Shopquery',
|
||||
'followup' => 'Follow-up',
|
||||
'answer_guard' => 'Answer-Guard',
|
||||
];
|
||||
|
||||
public function __construct(
|
||||
private EvalCaseLoader $caseLoader,
|
||||
private AgentEvalRunner $runner,
|
||||
private EvalReportWriter $reportWriter,
|
||||
private string $projectDir,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, string>
|
||||
*/
|
||||
public function supportedTypes(): array
|
||||
{
|
||||
return self::TYPES;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, string>
|
||||
*/
|
||||
public function supportedTypeNames(): array
|
||||
{
|
||||
return array_keys(self::TYPES);
|
||||
}
|
||||
|
||||
public function assertSupportedType(string $type): string
|
||||
{
|
||||
$type = trim($type);
|
||||
|
||||
if (!array_key_exists($type, self::TYPES)) {
|
||||
throw new \InvalidArgumentException(sprintf('Unsupported eval type: %s', $type));
|
||||
}
|
||||
|
||||
return $type;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, array<int, array{id:string,prompt:string,type:string}>>
|
||||
*/
|
||||
public function casesByType(): array
|
||||
{
|
||||
$casesByType = [];
|
||||
|
||||
foreach (array_keys(self::TYPES) as $type) {
|
||||
$casesByType[$type] = array_map(
|
||||
static fn (EvalCase $case): array => [
|
||||
'id' => $case->id,
|
||||
'type' => $case->type,
|
||||
'prompt' => $case->prompt,
|
||||
],
|
||||
$this->loadCases($type)
|
||||
);
|
||||
}
|
||||
|
||||
return $casesByType;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, array<string, mixed>>
|
||||
*/
|
||||
public function overview(): array
|
||||
{
|
||||
$overview = [];
|
||||
|
||||
foreach (self::TYPES as $type => $label) {
|
||||
$cases = $this->loadCases($type);
|
||||
$report = $this->readTypeReport($type);
|
||||
|
||||
$overview[] = [
|
||||
'type' => $type,
|
||||
'label' => $label,
|
||||
'case_count' => count($cases),
|
||||
'report' => $report,
|
||||
'status' => $this->statusFromReport($report),
|
||||
];
|
||||
}
|
||||
|
||||
return $overview;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
public function run(string $type, ?string $caseId = null): array
|
||||
{
|
||||
$type = $this->assertSupportedType($type);
|
||||
$caseId = trim((string) $caseId);
|
||||
$cases = $this->loadCases($type);
|
||||
|
||||
if ($caseId !== '') {
|
||||
$cases = array_values(array_filter(
|
||||
$cases,
|
||||
static fn (EvalCase $case): bool => $case->id === $caseId
|
||||
));
|
||||
}
|
||||
|
||||
if ($cases === []) {
|
||||
throw new \RuntimeException('No eval cases selected.');
|
||||
}
|
||||
|
||||
$results = $this->runner->runAll($cases);
|
||||
$report = $this->buildReport($type, $caseId !== '' ? $caseId : null, $results);
|
||||
|
||||
$typeReportPath = $this->reportWriter->write($report, sprintf('%s-last-run.json', $type));
|
||||
$lastReportPath = $this->reportWriter->write($report);
|
||||
|
||||
$report['written_to'] = $typeReportPath;
|
||||
$report['last_run_written_to'] = $lastReportPath;
|
||||
|
||||
return $report;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>|null
|
||||
*/
|
||||
public function readTypeReport(string $type): ?array
|
||||
{
|
||||
$type = $this->assertSupportedType($type);
|
||||
|
||||
return $this->readReportFile(sprintf('%s/tests/evals/reports/%s-last-run.json', $this->projectDir, $type));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>|null
|
||||
*/
|
||||
public function readLastReport(): ?array
|
||||
{
|
||||
return $this->readReportFile(sprintf('%s/tests/evals/reports/last-run.json', $this->projectDir));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, EvalCase>
|
||||
*/
|
||||
private function loadCases(string $type): array
|
||||
{
|
||||
return $this->caseLoader->load($this->assertSupportedType($type));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, EvalResult> $results
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function buildReport(string $type, ?string $caseId, array $results): array
|
||||
{
|
||||
$passed = count(array_filter(
|
||||
$results,
|
||||
static fn (EvalResult $result): bool => $result->passed
|
||||
));
|
||||
$failed = count($results) - $passed;
|
||||
|
||||
return [
|
||||
'type' => $type,
|
||||
'case_filter' => $caseId,
|
||||
'total' => count($results),
|
||||
'passed' => $passed,
|
||||
'failed' => $failed,
|
||||
'generated_at' => (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM),
|
||||
'results' => array_map(
|
||||
static fn (EvalResult $result): array => $result->toArray(),
|
||||
$results
|
||||
),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>|null
|
||||
*/
|
||||
private function readReportFile(string $path): ?array
|
||||
{
|
||||
if (!is_file($path)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$raw = file_get_contents($path);
|
||||
|
||||
if (!is_string($raw) || trim($raw) === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$decoded = json_decode($raw, true);
|
||||
|
||||
if (!is_array($decoded)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return $decoded;
|
||||
}
|
||||
|
||||
private function statusFromReport(?array $report): string
|
||||
{
|
||||
if ($report === null) {
|
||||
return 'not_run';
|
||||
}
|
||||
|
||||
$failed = (int) ($report['failed'] ?? 0);
|
||||
$total = (int) ($report['total'] ?? 0);
|
||||
|
||||
if ($total <= 0) {
|
||||
return 'empty';
|
||||
}
|
||||
|
||||
return $failed === 0 ? 'green' : 'red';
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user