This commit is contained in:
team 1
2026-05-12 10:56:50 +02:00
parent feaec9bbaf
commit 6dced1c4df
7 changed files with 1409 additions and 5 deletions

View File

@@ -67,4 +67,45 @@ final class AdminEvalController extends AbstractController
'type' => $type,
]);
}
#[Route('/case/create', name: 'admin_evals_case_create', methods: ['POST'])]
public function createCase(Request $request, EvalAdminService $evals): Response
{
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
if (!$this->isCsrfTokenValid('admin_eval_case_create', (string) $request->request->get('_token'))) {
throw $this->createAccessDeniedException();
}
$type = trim((string) $request->request->get('type', 'retrieval'));
try {
$created = $evals->createCase(
type: $type,
id: (string) $request->request->get('id', ''),
prompt: (string) $request->request->get('prompt', ''),
assertJson: (string) $request->request->get('assert_json', ''),
historyJson: (string) $request->request->get('history_json', ''),
requestContextHint: (string) $request->request->get('request_context_hint', ''),
);
$type = (string) ($created['type'] ?? $type);
$this->addFlash(
'success',
sprintf('Eval-Case "%s" wurde in %s.ndjson gespeichert.', (string) ($created['id'] ?? ''), $type)
);
} catch (\Throwable $e) {
$this->addFlash('danger', $e->getMessage());
}
if (!in_array($type, $evals->supportedTypeNames(), true)) {
$type = 'retrieval';
}
return $this->redirectToRoute('admin_evals_index', [
'type' => $type,
]);
}
}

View File

@@ -74,6 +74,7 @@ final readonly class ShopQueryEvalRunner
details: [
'prompt' => $case->prompt,
'history_turns' => count($case->history),
'history' => $this->buildHistoryPreview($case->history),
'has_request_context_hint' => $case->requestContextHint !== '',
'query' => $shopMeta['query'],
'individual_queries' => $shopMeta['individual_queries'],
@@ -82,6 +83,31 @@ final readonly class ShopQueryEvalRunner
);
}
/**
* @param array<int, array{prompt:string,answer:string}> $history
* @return array<int, array{prompt:string,answer_preview:string}>
*/
private function buildHistoryPreview(array $history): array
{
$preview = [];
foreach ($history as $turn) {
$prompt = trim((string) ($turn['prompt'] ?? ''));
$answer = trim((string) ($turn['answer'] ?? ''));
if ($prompt === '' && $answer === '') {
continue;
}
$preview[] = [
'prompt' => $prompt !== '' ? $prompt : 'Eval-Kontext',
'answer_preview' => $this->previewText($answer, 260),
];
}
return $preview;
}
private function buildUserId(EvalCase $case): string
{
$safeId = preg_replace('/[^a-zA-Z0-9_-]+/', '_', $case->id) ?? $case->id;
@@ -349,14 +375,15 @@ final readonly class ShopQueryEvalRunner
return array_values(array_unique($out));
}
private function previewText(string $value): string
private function previewText(string $value, int $maxLength = 1200): string
{
$value = $this->normalizeOneLine($value);
$maxLength = max(40, $maxLength);
if (mb_strlen($value, 'UTF-8') <= 1200) {
if (mb_strlen($value, 'UTF-8') <= $maxLength) {
return $value;
}
return rtrim(mb_substr($value, 0, 1200, 'UTF-8')) . '...';
return rtrim(mb_substr($value, 0, $maxLength, 'UTF-8')) . '...';
}
}

View File

@@ -145,6 +145,83 @@ final readonly class EvalAdminService
}
/**
* @return array{type:string,id:string,path:string,row:array<string,mixed>,case_count:int}
*/
public function createCase(
string $type,
string $id,
string $prompt,
string $assertJson,
string $historyJson = '',
string $requestContextHint = '',
): array {
$type = $this->assertSupportedType($type);
$id = $this->normalizeNewCaseId($id);
$prompt = trim($prompt);
$requestContextHint = trim($requestContextHint);
if ($prompt === '') {
throw new \InvalidArgumentException('Der Eval-Prompt darf nicht leer sein.');
}
if ($this->caseIdExists($id)) {
throw new \RuntimeException(sprintf(
'Ein Eval-Case mit der ID "%s" existiert bereits. Bitte eine neue ID verwenden.',
$id
));
}
$assert = $this->decodeJsonObject($assertJson, 'Assert-JSON');
$history = $this->decodeHistoryJson($historyJson);
$row = [
'id' => $id,
'type' => $type,
'prompt' => $prompt,
'assert' => $assert,
];
if ($history !== []) {
$row['history'] = $history;
}
if ($requestContextHint !== '') {
$row['request_context_hint'] = $requestContextHint;
}
// Reuse the regular DTO validation before writing the case file.
EvalCase::fromArray($row);
$path = $this->caseFilePath($type);
$line = json_encode(
$row,
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR
);
$prefix = '';
if (is_file($path) && filesize($path) > 0) {
$contents = file_get_contents($path);
if (is_string($contents) && $contents !== '' && !str_ends_with($contents, "\n")) {
$prefix = "\n";
}
}
$written = file_put_contents($path, $prefix . $line . PHP_EOL, FILE_APPEND | LOCK_EX);
if ($written === false) {
throw new \RuntimeException(sprintf('Eval-Case-Datei konnte nicht geschrieben werden: %s', $path));
}
return [
'type' => $type,
'id' => $id,
'path' => $path,
'row' => $row,
'case_count' => count($this->loadCases($type)),
];
}
/**
* @param array<int, EvalCase> $cases
* @return array<int, EvalCase>
@@ -249,6 +326,123 @@ final readonly class EvalAdminService
return $decoded;
}
private function normalizeNewCaseId(string $id): string
{
$id = trim($id);
if ($id === '') {
throw new \InvalidArgumentException('Die Eval-Case-ID darf nicht leer sein.');
}
if (preg_match('/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/', $id) !== 1) {
throw new \InvalidArgumentException(
'Die Eval-Case-ID darf nur Buchstaben, Zahlen, Unterstriche und Bindestriche enthalten und muss mit einem Buchstaben oder einer Zahl beginnen.'
);
}
return $id;
}
private function caseIdExists(string $id): bool
{
foreach (array_keys(self::TYPES) as $type) {
foreach ($this->loadCases($type) as $case) {
if ($case->id === $id) {
return true;
}
}
}
return false;
}
/**
* @return array<string, mixed>
*/
private function decodeJsonObject(string $json, string $label): array
{
$json = trim($json);
if ($json === '') {
return [];
}
try {
$decoded = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
throw new \InvalidArgumentException(sprintf('%s ist ungültig: %s', $label, $e->getMessage()));
}
if (!is_array($decoded)) {
throw new \InvalidArgumentException(sprintf('%s muss ein JSON-Objekt sein.', $label));
}
return $decoded;
}
/**
* @return array<int, array{prompt:string,answer:string}>
*/
private function decodeHistoryJson(string $json): array
{
$json = trim($json);
if ($json === '') {
return [];
}
try {
$decoded = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
throw new \InvalidArgumentException(sprintf('History-JSON ist ungültig: %s', $e->getMessage()));
}
if (!is_array($decoded)) {
throw new \InvalidArgumentException('History-JSON muss eine JSON-Liste sein.');
}
$history = [];
foreach ($decoded as $entry) {
if (is_string($entry)) {
$entry = trim($entry);
if ($entry !== '') {
$history[] = [
'prompt' => 'Eval-Kontext',
'answer' => $entry,
];
}
continue;
}
if (!is_array($entry)) {
continue;
}
$prompt = trim((string) ($entry['prompt'] ?? ''));
$answer = trim((string) ($entry['answer'] ?? $entry['response'] ?? $entry['answer_preview'] ?? ''));
if ($prompt === '' && $answer === '') {
continue;
}
$history[] = [
'prompt' => $prompt !== '' ? $prompt : 'Eval-Kontext',
'answer' => $answer,
];
}
return $history;
}
private function caseFilePath(string $type): string
{
$type = $this->assertSupportedType($type);
return sprintf('%s/tests/evals/cases/%s.ndjson', $this->projectDir, $type);
}
private function statusFromReport(?array $report): string
{
if ($report === null) {