p101a
This commit is contained in:
@@ -68,7 +68,35 @@ final class AdminEvalController extends AbstractController
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route('/case/create', name: 'admin_evals_case_create', methods: ['POST'])]
|
||||
#[Route('/cases/new', name: 'admin_evals_case_new', methods: ['GET'])]
|
||||
public function newCase(Request $request, EvalAdminService $evals): Response
|
||||
{
|
||||
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
|
||||
|
||||
$type = trim((string) $request->query->get('type', 'retrieval'));
|
||||
if (!in_array($type, $evals->supportedTypeNames(), true)) {
|
||||
$type = 'retrieval';
|
||||
}
|
||||
|
||||
$sourceType = trim((string) $request->query->get('source_type', ''));
|
||||
$sourceCaseId = trim((string) $request->query->get('source_case_id', ''));
|
||||
|
||||
try {
|
||||
$draft = $sourceType !== '' && $sourceCaseId !== ''
|
||||
? $evals->caseDraftFromReportResult($sourceType, $sourceCaseId)
|
||||
: $evals->emptyCaseDraft($type);
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('warning', $e->getMessage());
|
||||
$draft = $evals->emptyCaseDraft($type);
|
||||
}
|
||||
|
||||
return $this->render('admin/evals/case_new.html.twig', [
|
||||
'types' => $evals->supportedTypes(),
|
||||
'case_draft' => $draft,
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route('/cases', name: 'admin_evals_case_create', methods: ['POST'])]
|
||||
public function createCase(Request $request, EvalAdminService $evals): Response
|
||||
{
|
||||
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
|
||||
@@ -78,6 +106,15 @@ final class AdminEvalController extends AbstractController
|
||||
}
|
||||
|
||||
$type = trim((string) $request->request->get('type', 'retrieval'));
|
||||
$draft = [
|
||||
'type' => $type,
|
||||
'id' => (string) $request->request->get('id', ''),
|
||||
'prompt' => (string) $request->request->get('prompt', ''),
|
||||
'assert_json' => (string) $request->request->get('assert_json', ''),
|
||||
'history_json' => (string) $request->request->get('history_json', ''),
|
||||
'request_context_hint' => (string) $request->request->get('request_context_hint', ''),
|
||||
'source_label' => '',
|
||||
];
|
||||
|
||||
try {
|
||||
$created = $evals->createCase(
|
||||
@@ -95,17 +132,21 @@ final class AdminEvalController extends AbstractController
|
||||
'success',
|
||||
sprintf('Eval-Case "%s" wurde in %s.ndjson gespeichert.', (string) ($created['id'] ?? ''), $type)
|
||||
);
|
||||
|
||||
return $this->redirectToRoute('admin_evals_index', [
|
||||
'type' => $type,
|
||||
]);
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $e->getMessage());
|
||||
}
|
||||
|
||||
if (!in_array($type, $evals->supportedTypeNames(), true)) {
|
||||
$type = 'retrieval';
|
||||
$draft['type'] = 'retrieval';
|
||||
}
|
||||
|
||||
return $this->redirectToRoute('admin_evals_index', [
|
||||
'type' => $type,
|
||||
]);
|
||||
return $this->render('admin/evals/case_new.html.twig', [
|
||||
'types' => $evals->supportedTypes(),
|
||||
'case_draft' => $draft,
|
||||
], new Response('', Response::HTTP_UNPROCESSABLE_ENTITY));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -144,6 +144,75 @@ final readonly class EvalAdminService
|
||||
return $report;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{type:string,id:string,prompt:string,assert_json:string,history_json:string,request_context_hint:string,source_label:string}
|
||||
*/
|
||||
public function emptyCaseDraft(string $type = 'retrieval'): array
|
||||
{
|
||||
$type = $this->assertSupportedType($type);
|
||||
|
||||
return [
|
||||
'type' => $type,
|
||||
'id' => '',
|
||||
'prompt' => '',
|
||||
'assert_json' => $this->encodePrettyJson($this->defaultAssertForType($type)),
|
||||
'history_json' => '',
|
||||
'request_context_hint' => '',
|
||||
'source_label' => '',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{type:string,id:string,prompt:string,assert_json:string,history_json:string,request_context_hint:string,source_label:string}
|
||||
*/
|
||||
public function caseDraftFromReportResult(string $type, string $caseId): array
|
||||
{
|
||||
$type = $this->assertSupportedType($type);
|
||||
$caseId = trim($caseId);
|
||||
|
||||
if ($caseId === '') {
|
||||
throw new \InvalidArgumentException('Es wurde keine Quell-Case-ID übergeben.');
|
||||
}
|
||||
|
||||
$report = $this->readTypeReport($type);
|
||||
if ($report === null) {
|
||||
throw new \RuntimeException(sprintf(
|
||||
'Für den Eval-Typ "%s" liegt kein Report vor. Bitte den Eval zuerst ausführen.',
|
||||
$type
|
||||
));
|
||||
}
|
||||
|
||||
$result = null;
|
||||
foreach (($report['results'] ?? []) as $candidate) {
|
||||
if (is_array($candidate) && (string) ($candidate['case_id'] ?? '') === $caseId) {
|
||||
$result = $candidate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_array($result)) {
|
||||
throw new \RuntimeException(sprintf(
|
||||
'Der Report enthält keinen Case "%s" für Eval-Typ "%s".',
|
||||
$caseId,
|
||||
$type
|
||||
));
|
||||
}
|
||||
|
||||
$details = is_array($result['details'] ?? null) ? $result['details'] : [];
|
||||
$prompt = trim((string) ($result['prompt'] ?? $details['prompt'] ?? ''));
|
||||
$history = $this->historyDraftFromDetails($details);
|
||||
$assert = $this->suggestAssertFromReportResult($type, $result, $details);
|
||||
|
||||
return [
|
||||
'type' => $type,
|
||||
'id' => $this->suggestUniqueCaseId($type . '_' . $caseId . '_new'),
|
||||
'prompt' => $prompt,
|
||||
'assert_json' => $this->encodePrettyJson($assert),
|
||||
'history_json' => $history === [] ? '' : $this->encodePrettyJson($history),
|
||||
'request_context_hint' => '',
|
||||
'source_label' => sprintf('Vorlage aus Report-Case %s (%s)', $caseId, self::TYPES[$type]),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{type:string,id:string,path:string,row:array<string,mixed>,case_count:int}
|
||||
@@ -190,7 +259,7 @@ final readonly class EvalAdminService
|
||||
$row['request_context_hint'] = $requestContextHint;
|
||||
}
|
||||
|
||||
// Reuse the regular DTO validation before writing the case file.
|
||||
// Validate with the same DTO that the eval runner uses.
|
||||
EvalCase::fromArray($row);
|
||||
|
||||
$path = $this->caseFilePath($type);
|
||||
@@ -221,7 +290,6 @@ final readonly class EvalAdminService
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param array<int, EvalCase> $cases
|
||||
* @return array<int, EvalCase>
|
||||
@@ -326,7 +394,6 @@ final readonly class EvalAdminService
|
||||
return $decoded;
|
||||
}
|
||||
|
||||
|
||||
private function normalizeNewCaseId(string $id): string
|
||||
{
|
||||
$id = trim($id);
|
||||
@@ -374,7 +441,7 @@ final readonly class EvalAdminService
|
||||
throw new \InvalidArgumentException(sprintf('%s ist ungültig: %s', $label, $e->getMessage()));
|
||||
}
|
||||
|
||||
if (!is_array($decoded)) {
|
||||
if (!is_array($decoded) || !str_starts_with($json, '{') || ($decoded !== [] && array_is_list($decoded))) {
|
||||
throw new \InvalidArgumentException(sprintf('%s muss ein JSON-Objekt sein.', $label));
|
||||
}
|
||||
|
||||
@@ -398,7 +465,7 @@ final readonly class EvalAdminService
|
||||
throw new \InvalidArgumentException(sprintf('History-JSON ist ungültig: %s', $e->getMessage()));
|
||||
}
|
||||
|
||||
if (!is_array($decoded)) {
|
||||
if (!is_array($decoded) || !str_starts_with($json, '[') || !array_is_list($decoded)) {
|
||||
throw new \InvalidArgumentException('History-JSON muss eine JSON-Liste sein.');
|
||||
}
|
||||
|
||||
@@ -458,4 +525,162 @@ final readonly class EvalAdminService
|
||||
|
||||
return $failed === 0 ? 'green' : 'red';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function defaultAssertForType(string $type): array
|
||||
{
|
||||
return match ($type) {
|
||||
'retrieval', 'answer_guard' => [
|
||||
'min_results' => 1,
|
||||
],
|
||||
'shop_query', 'followup' => [
|
||||
'expected_query' => '',
|
||||
],
|
||||
default => [],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $result
|
||||
* @param array<string, mixed> $details
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function suggestAssertFromReportResult(string $type, array $result, array $details): array
|
||||
{
|
||||
if (($type === 'shop_query' || $type === 'followup') && is_string($details['query'] ?? null)) {
|
||||
$query = trim($details['query']);
|
||||
if ($query !== '') {
|
||||
return [
|
||||
'expected_query' => $query,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
if (($type === 'shop_query' || $type === 'followup') && is_array($details['individual_queries'] ?? null)) {
|
||||
$queries = array_values(array_filter(array_map(
|
||||
static fn (mixed $value): string => trim((string) $value),
|
||||
$details['individual_queries']
|
||||
)));
|
||||
|
||||
if ($queries !== []) {
|
||||
return [
|
||||
'expected_individual_queries' => $queries,
|
||||
'expected_individual_queries_exact' => true,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
if (is_array($details['document_refs'] ?? null)) {
|
||||
$documentIds = [];
|
||||
foreach ($details['document_refs'] as $documentRef) {
|
||||
if (!is_array($documentRef)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$documentId = trim((string) ($documentRef['id'] ?? ''));
|
||||
if ($documentId !== '') {
|
||||
$documentIds[] = $documentId;
|
||||
}
|
||||
}
|
||||
|
||||
if ($documentIds !== []) {
|
||||
return [
|
||||
'min_results' => 1,
|
||||
'must_include_one_of_document_ids' => array_values(array_unique($documentIds)),
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
if (is_array($details['document_ids'] ?? null)) {
|
||||
$documentIds = array_values(array_filter(array_map(
|
||||
static fn (mixed $value): string => trim((string) $value),
|
||||
$details['document_ids']
|
||||
)));
|
||||
|
||||
if ($documentIds !== []) {
|
||||
return [
|
||||
'min_results' => 1,
|
||||
'must_include_one_of_document_ids' => array_values(array_unique($documentIds)),
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
$resultCount = (int) ($details['result_count'] ?? -1);
|
||||
if ($resultCount === 0) {
|
||||
return [
|
||||
'max_results' => 0,
|
||||
];
|
||||
}
|
||||
|
||||
return $this->defaultAssertForType($type);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $details
|
||||
* @return array<int, array{prompt:string,answer:string}>
|
||||
*/
|
||||
private function historyDraftFromDetails(array $details): array
|
||||
{
|
||||
if (!is_array($details['history'] ?? null)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$history = [];
|
||||
foreach ($details['history'] as $entry) {
|
||||
if (!is_array($entry)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$prompt = trim((string) ($entry['prompt'] ?? ''));
|
||||
$answer = trim((string) ($entry['answer'] ?? $entry['answer_preview'] ?? ''));
|
||||
|
||||
if ($prompt === '' && $answer === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$history[] = [
|
||||
'prompt' => $prompt !== '' ? $prompt : 'Eval-Kontext',
|
||||
'answer' => $answer,
|
||||
];
|
||||
}
|
||||
|
||||
return $history;
|
||||
}
|
||||
|
||||
private function suggestUniqueCaseId(string $base): string
|
||||
{
|
||||
$base = strtolower(trim($base));
|
||||
$base = preg_replace('/[^a-z0-9_-]+/', '_', $base) ?? 'eval_case';
|
||||
$base = trim($base, '_-');
|
||||
|
||||
if ($base === '') {
|
||||
$base = 'eval_case';
|
||||
}
|
||||
|
||||
if (!$this->caseIdExists($base)) {
|
||||
return $base;
|
||||
}
|
||||
|
||||
for ($i = 2; $i <= 999; ++$i) {
|
||||
$candidate = sprintf('%s_%d', $base, $i);
|
||||
if (!$this->caseIdExists($candidate)) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return sprintf('%s_%s', $base, (new \DateTimeImmutable())->format('YmdHis'));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<mixed> $value
|
||||
*/
|
||||
private function encodePrettyJson(array $value): string
|
||||
{
|
||||
return json_encode(
|
||||
$value,
|
||||
JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user