p101
This commit is contained in:
@@ -67,4 +67,45 @@ final class AdminEvalController extends AbstractController
|
||||
'type' => $type,
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route('/case/create', name: 'admin_evals_case_create', methods: ['POST'])]
|
||||
public function createCase(Request $request, EvalAdminService $evals): Response
|
||||
{
|
||||
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
|
||||
|
||||
if (!$this->isCsrfTokenValid('admin_eval_case_create', (string) $request->request->get('_token'))) {
|
||||
throw $this->createAccessDeniedException();
|
||||
}
|
||||
|
||||
$type = trim((string) $request->request->get('type', 'retrieval'));
|
||||
|
||||
try {
|
||||
$created = $evals->createCase(
|
||||
type: $type,
|
||||
id: (string) $request->request->get('id', ''),
|
||||
prompt: (string) $request->request->get('prompt', ''),
|
||||
assertJson: (string) $request->request->get('assert_json', ''),
|
||||
historyJson: (string) $request->request->get('history_json', ''),
|
||||
requestContextHint: (string) $request->request->get('request_context_hint', ''),
|
||||
);
|
||||
|
||||
$type = (string) ($created['type'] ?? $type);
|
||||
|
||||
$this->addFlash(
|
||||
'success',
|
||||
sprintf('Eval-Case "%s" wurde in %s.ndjson gespeichert.', (string) ($created['id'] ?? ''), $type)
|
||||
);
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $e->getMessage());
|
||||
}
|
||||
|
||||
if (!in_array($type, $evals->supportedTypeNames(), true)) {
|
||||
$type = 'retrieval';
|
||||
}
|
||||
|
||||
return $this->redirectToRoute('admin_evals_index', [
|
||||
'type' => $type,
|
||||
]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -74,6 +74,7 @@ final readonly class ShopQueryEvalRunner
|
||||
details: [
|
||||
'prompt' => $case->prompt,
|
||||
'history_turns' => count($case->history),
|
||||
'history' => $this->buildHistoryPreview($case->history),
|
||||
'has_request_context_hint' => $case->requestContextHint !== '',
|
||||
'query' => $shopMeta['query'],
|
||||
'individual_queries' => $shopMeta['individual_queries'],
|
||||
@@ -82,6 +83,31 @@ final readonly class ShopQueryEvalRunner
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array{prompt:string,answer:string}> $history
|
||||
* @return array<int, array{prompt:string,answer_preview:string}>
|
||||
*/
|
||||
private function buildHistoryPreview(array $history): array
|
||||
{
|
||||
$preview = [];
|
||||
|
||||
foreach ($history as $turn) {
|
||||
$prompt = trim((string) ($turn['prompt'] ?? ''));
|
||||
$answer = trim((string) ($turn['answer'] ?? ''));
|
||||
|
||||
if ($prompt === '' && $answer === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$preview[] = [
|
||||
'prompt' => $prompt !== '' ? $prompt : 'Eval-Kontext',
|
||||
'answer_preview' => $this->previewText($answer, 260),
|
||||
];
|
||||
}
|
||||
|
||||
return $preview;
|
||||
}
|
||||
|
||||
private function buildUserId(EvalCase $case): string
|
||||
{
|
||||
$safeId = preg_replace('/[^a-zA-Z0-9_-]+/', '_', $case->id) ?? $case->id;
|
||||
@@ -349,14 +375,15 @@ final readonly class ShopQueryEvalRunner
|
||||
return array_values(array_unique($out));
|
||||
}
|
||||
|
||||
private function previewText(string $value): string
|
||||
private function previewText(string $value, int $maxLength = 1200): string
|
||||
{
|
||||
$value = $this->normalizeOneLine($value);
|
||||
$maxLength = max(40, $maxLength);
|
||||
|
||||
if (mb_strlen($value, 'UTF-8') <= 1200) {
|
||||
if (mb_strlen($value, 'UTF-8') <= $maxLength) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
return rtrim(mb_substr($value, 0, 1200, 'UTF-8')) . '...';
|
||||
return rtrim(mb_substr($value, 0, $maxLength, 'UTF-8')) . '...';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,6 +145,83 @@ final readonly class EvalAdminService
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return array{type:string,id:string,path:string,row:array<string,mixed>,case_count:int}
|
||||
*/
|
||||
public function createCase(
|
||||
string $type,
|
||||
string $id,
|
||||
string $prompt,
|
||||
string $assertJson,
|
||||
string $historyJson = '',
|
||||
string $requestContextHint = '',
|
||||
): array {
|
||||
$type = $this->assertSupportedType($type);
|
||||
$id = $this->normalizeNewCaseId($id);
|
||||
$prompt = trim($prompt);
|
||||
$requestContextHint = trim($requestContextHint);
|
||||
|
||||
if ($prompt === '') {
|
||||
throw new \InvalidArgumentException('Der Eval-Prompt darf nicht leer sein.');
|
||||
}
|
||||
|
||||
if ($this->caseIdExists($id)) {
|
||||
throw new \RuntimeException(sprintf(
|
||||
'Ein Eval-Case mit der ID "%s" existiert bereits. Bitte eine neue ID verwenden.',
|
||||
$id
|
||||
));
|
||||
}
|
||||
|
||||
$assert = $this->decodeJsonObject($assertJson, 'Assert-JSON');
|
||||
$history = $this->decodeHistoryJson($historyJson);
|
||||
|
||||
$row = [
|
||||
'id' => $id,
|
||||
'type' => $type,
|
||||
'prompt' => $prompt,
|
||||
'assert' => $assert,
|
||||
];
|
||||
|
||||
if ($history !== []) {
|
||||
$row['history'] = $history;
|
||||
}
|
||||
|
||||
if ($requestContextHint !== '') {
|
||||
$row['request_context_hint'] = $requestContextHint;
|
||||
}
|
||||
|
||||
// Reuse the regular DTO validation before writing the case file.
|
||||
EvalCase::fromArray($row);
|
||||
|
||||
$path = $this->caseFilePath($type);
|
||||
$line = json_encode(
|
||||
$row,
|
||||
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR
|
||||
);
|
||||
|
||||
$prefix = '';
|
||||
if (is_file($path) && filesize($path) > 0) {
|
||||
$contents = file_get_contents($path);
|
||||
if (is_string($contents) && $contents !== '' && !str_ends_with($contents, "\n")) {
|
||||
$prefix = "\n";
|
||||
}
|
||||
}
|
||||
|
||||
$written = file_put_contents($path, $prefix . $line . PHP_EOL, FILE_APPEND | LOCK_EX);
|
||||
if ($written === false) {
|
||||
throw new \RuntimeException(sprintf('Eval-Case-Datei konnte nicht geschrieben werden: %s', $path));
|
||||
}
|
||||
|
||||
return [
|
||||
'type' => $type,
|
||||
'id' => $id,
|
||||
'path' => $path,
|
||||
'row' => $row,
|
||||
'case_count' => count($this->loadCases($type)),
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param array<int, EvalCase> $cases
|
||||
* @return array<int, EvalCase>
|
||||
@@ -249,6 +326,123 @@ final readonly class EvalAdminService
|
||||
return $decoded;
|
||||
}
|
||||
|
||||
|
||||
private function normalizeNewCaseId(string $id): string
|
||||
{
|
||||
$id = trim($id);
|
||||
|
||||
if ($id === '') {
|
||||
throw new \InvalidArgumentException('Die Eval-Case-ID darf nicht leer sein.');
|
||||
}
|
||||
|
||||
if (preg_match('/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/', $id) !== 1) {
|
||||
throw new \InvalidArgumentException(
|
||||
'Die Eval-Case-ID darf nur Buchstaben, Zahlen, Unterstriche und Bindestriche enthalten und muss mit einem Buchstaben oder einer Zahl beginnen.'
|
||||
);
|
||||
}
|
||||
|
||||
return $id;
|
||||
}
|
||||
|
||||
private function caseIdExists(string $id): bool
|
||||
{
|
||||
foreach (array_keys(self::TYPES) as $type) {
|
||||
foreach ($this->loadCases($type) as $case) {
|
||||
if ($case->id === $id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function decodeJsonObject(string $json, string $label): array
|
||||
{
|
||||
$json = trim($json);
|
||||
|
||||
if ($json === '') {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
$decoded = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
|
||||
} catch (\JsonException $e) {
|
||||
throw new \InvalidArgumentException(sprintf('%s ist ungültig: %s', $label, $e->getMessage()));
|
||||
}
|
||||
|
||||
if (!is_array($decoded)) {
|
||||
throw new \InvalidArgumentException(sprintf('%s muss ein JSON-Objekt sein.', $label));
|
||||
}
|
||||
|
||||
return $decoded;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, array{prompt:string,answer:string}>
|
||||
*/
|
||||
private function decodeHistoryJson(string $json): array
|
||||
{
|
||||
$json = trim($json);
|
||||
|
||||
if ($json === '') {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
$decoded = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
|
||||
} catch (\JsonException $e) {
|
||||
throw new \InvalidArgumentException(sprintf('History-JSON ist ungültig: %s', $e->getMessage()));
|
||||
}
|
||||
|
||||
if (!is_array($decoded)) {
|
||||
throw new \InvalidArgumentException('History-JSON muss eine JSON-Liste sein.');
|
||||
}
|
||||
|
||||
$history = [];
|
||||
|
||||
foreach ($decoded as $entry) {
|
||||
if (is_string($entry)) {
|
||||
$entry = trim($entry);
|
||||
if ($entry !== '') {
|
||||
$history[] = [
|
||||
'prompt' => 'Eval-Kontext',
|
||||
'answer' => $entry,
|
||||
];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_array($entry)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$prompt = trim((string) ($entry['prompt'] ?? ''));
|
||||
$answer = trim((string) ($entry['answer'] ?? $entry['response'] ?? $entry['answer_preview'] ?? ''));
|
||||
|
||||
if ($prompt === '' && $answer === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$history[] = [
|
||||
'prompt' => $prompt !== '' ? $prompt : 'Eval-Kontext',
|
||||
'answer' => $answer,
|
||||
];
|
||||
}
|
||||
|
||||
return $history;
|
||||
}
|
||||
|
||||
private function caseFilePath(string $type): string
|
||||
{
|
||||
$type = $this->assertSupportedType($type);
|
||||
|
||||
return sprintf('%s/tests/evals/cases/%s.ndjson', $this->projectDir, $type);
|
||||
}
|
||||
|
||||
private function statusFromReport(?array $report): string
|
||||
{
|
||||
if ($report === null) {
|
||||
|
||||
Reference in New Issue
Block a user