diff --git a/patch_history/RETRIEX_PATCH_100B_ADMIN_EVAL_CASE_SELECTION_FIX_README.md b/patch_history/RETRIEX_PATCH_100B_ADMIN_EVAL_CASE_SELECTION_FIX_README.md new file mode 100644 index 0000000..980462c --- /dev/null +++ b/patch_history/RETRIEX_PATCH_100B_ADMIN_EVAL_CASE_SELECTION_FIX_README.md @@ -0,0 +1,37 @@ +# RetrieX Patch p100b - Admin Eval Case Selection Fix + +## Ziel + +Behebt die Admin-Eval-UX, wenn ein einzelner Case ausgewaehlt wird und der Request mit `No eval cases selected.` endet. + +## Ursache + +Die p100/p100a-Seite nutzte ein freies `datalist`-Feld fuer Case-IDs, das Cases aller Eval-Typen enthielt. Dadurch konnte ein Case aus `shop_query` ausgewaehlt werden, waehrend das Formular noch einen anderen Eval-Typ sendete. Der Admin-Service suchte dann nur in der Case-Datei des gesendeten Typs und fand keine passenden Cases. + +## Aenderungen + +- Das freie Case-ID-Feld wurde durch ein gefiltertes Select ersetzt. +- Die Case-Liste wird clientseitig passend zum gewaehlten Eval-Typ gefiltert. +- Beim Wechsel des Eval-Typs wird eine nicht passende Case-Auswahl automatisch geleert. +- Der Admin-Service ist robuster: Wenn eine Case-ID nicht im gesendeten Typ gefunden wird, wird sie ueber alle unterstuetzten Eval-Typen gesucht und mit dem richtigen Typ ausgefuehrt. +- Der Controller redirectet nach dem Run auf den effektiv ausgefuehrten Eval-Typ. +- Die alte unklare Meldung `No eval cases selected.` wird durch konkrete Fehlertexte ersetzt. + +## Scope + +Keine Aenderungen an: + +- Retrieval-Logik +- Shopquery-Logik +- Follow-up-Logik +- Answer-Guard-Logik +- Eval-Cases +- YAML-Konfiguration +- Modellparametern +- Datenbank/Migrationen + +## Geaenderte Dateien + +- `src/Controller/Admin/AdminEvalController.php` +- `src/Service/Admin/EvalAdminService.php` +- `templates/admin/evals/index.html.twig` diff --git a/patch_history/RETRIEX_PATCH_100C_ADMIN_EVAL_DOCUMENT_LABELS_README.md b/patch_history/RETRIEX_PATCH_100C_ADMIN_EVAL_DOCUMENT_LABELS_README.md new file mode 100644 index 0000000..4183f38 --- /dev/null +++ b/patch_history/RETRIEX_PATCH_100C_ADMIN_EVAL_DOCUMENT_LABELS_README.md @@ -0,0 +1,45 @@ +# RetrieX Patch p100c - Admin Eval Document Labels + +## Ziel + +Die Admin-Eval-Resultate sollen bei Retrieval-/Answer-Guard-Fällen nicht nur technische `document_id`- und `chunk_id`-Werte anzeigen, sondern auch menschenlesbare Dokumentinformationen, damit ein gefundenes Dokument im Admin/Dateibestand leichter identifiziert werden kann. + +## Änderungen + +- `NdjsonHybridRetriever::retrieveDebug()` gibt pro Debug-Treffer zusätzlich aus: + - `document_title` + - `file_path` + - `version_number` +- `RetrievalDebugRunner` schreibt in Eval-Reports zusätzlich: + - `document_refs`: eindeutige Dokumentübersicht mit Titel, Datei, Version, Ranks und Chunk-IDs + - `result_rows`: rankgenaue Trefferliste mit Titel, Datei, Chunk-ID und Text-Preview +- Admin-Eval-Template zeigt diese Informationen direkt in den Result-Details: + - Tabelle "Gefundene Dokumente" + - aufklappbare Tabelle "Treffer / Chunks anzeigen" + - JSON-Details bleiben weiterhin verfügbar + +## Nicht geändert + +- Keine Eval-Assertions geändert +- Keine Retrieval-Gewichte geändert +- Keine Shopquery-/Follow-up-/Answer-Logik geändert +- Keine YAML-/Parameteränderung +- Keine Datenbankmigration + +## Prüfung + +Nach Einspielen: + +```bash +php bin/console mto:agent:config:validate +php bin/console mto:agent:eval:run retrieval +php bin/console mto:agent:eval:run answer_guard +``` + +Danach im Admin: + +```text +/admin/evals/ +``` + +Einen Retrieval- oder Answer-Guard-Eval öffnen und prüfen, ob bei den Resultaten Titel/Datei zusätzlich zur Doc-ID sichtbar sind. diff --git a/src/Controller/Admin/AdminEvalController.php b/src/Controller/Admin/AdminEvalController.php index 611b335..db7d2e8 100644 --- a/src/Controller/Admin/AdminEvalController.php +++ b/src/Controller/Admin/AdminEvalController.php @@ -48,6 +48,8 @@ final class AdminEvalController extends AbstractController try { $report = $evals->run($type, $caseId !== '' ? $caseId : null); + $type = trim((string) ($report['type'] ?? $type)); + $this->addFlash( ((int) ($report['failed'] ?? 0)) === 0 ? 'success' : 'danger', sprintf( diff --git a/src/Eval/RetrievalDebugRunner.php b/src/Eval/RetrievalDebugRunner.php index 63b2129..4cf473d 100644 --- a/src/Eval/RetrievalDebugRunner.php +++ b/src/Eval/RetrievalDebugRunner.php @@ -33,6 +33,8 @@ final readonly class RetrievalDebugRunner $documentIds = $this->extractUniqueStringValues($rows, 'document_id'); $chunkIds = $this->extractUniqueStringValues($rows, 'chunk_id'); + $documentRefs = $this->buildDocumentRefs($rows); + $resultRows = $this->buildResultRows($rows); $joinedText = $this->extractJoinedText($rows); $assert = $case->assert; @@ -220,6 +222,8 @@ final readonly class RetrievalDebugRunner 'intent' => $intent, 'document_ids' => $documentIds, 'chunk_ids' => $chunkIds, + 'document_refs' => $documentRefs, + 'result_rows' => $resultRows, 'matched_any_terms' => $matchedAnyTerms, 'matched_all_terms' => $matchedAllTerms, 'forbidden_terms_checked' => $this->normalizeStringList($assert['must_not_include_terms'] ?? []), @@ -268,6 +272,122 @@ final readonly class RetrievalDebugRunner return array_keys($values); } + /** + * @param array> $rows + * @return array,ranks:array}> + */ + private function buildDocumentRefs(array $rows): array + { + $refs = []; + + foreach ($rows as $row) { + $documentId = $this->extractNullableString($row, 'document_id'); + + if ($documentId === '') { + continue; + } + + if (!isset($refs[$documentId])) { + $refs[$documentId] = [ + 'id' => $documentId, + 'title' => $this->extractNullableString($row, 'document_title'), + 'file_path' => $this->extractNullableString($row, 'file_path'), + 'version_number' => $this->extractNullableString($row, 'version_number'), + 'chunk_ids' => [], + 'ranks' => [], + ]; + } + + $chunkId = $this->extractNullableString($row, 'chunk_id'); + if ($chunkId !== '' && !in_array($chunkId, $refs[$documentId]['chunk_ids'], true)) { + $refs[$documentId]['chunk_ids'][] = $chunkId; + } + + $rank = $this->extractNullableInt($row, 'rank'); + if ($rank !== null && !in_array($rank, $refs[$documentId]['ranks'], true)) { + $refs[$documentId]['ranks'][] = $rank; + } + } + + return array_values($refs); + } + + /** + * @param array> $rows + * @return array> + */ + private function buildResultRows(array $rows): array + { + $out = []; + + foreach ($rows as $row) { + $out[] = [ + 'rank' => $this->extractNullableInt($row, 'rank'), + 'document_id' => $this->extractNullableString($row, 'document_id'), + 'document_title' => $this->extractNullableString($row, 'document_title'), + 'file_path' => $this->extractNullableString($row, 'file_path'), + 'chunk_id' => $this->extractNullableString($row, 'chunk_id'), + 'chunk_index' => $this->extractNullableInt($row, 'chunk_index'), + 'raw_score' => $row['raw_score'] ?? null, + 'rrf_score' => $row['rrf_score'] ?? null, + 'text_preview' => $this->previewText($this->extractNullableString($row, 'text')), + ]; + } + + return $out; + } + + /** + * @param array $row + */ + private function extractNullableString(array $row, string $key): string + { + $value = $row[$key] ?? null; + + if ($value === null || is_array($value) || is_object($value)) { + return ''; + } + + return trim((string)$value); + } + + /** + * @param array $row + */ + private function extractNullableInt(array $row, string $key): ?int + { + $value = $row[$key] ?? null; + + if ($value === null || $value === '') { + return null; + } + + if (is_int($value)) { + return $value; + } + + if (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) { + return (int)$value; + } + + return null; + } + + private function previewText(string $text, int $limit = 240): string + { + $text = preg_replace('/\s+/u', ' ', trim($text)) ?? trim($text); + + if ($text === '') { + return ''; + } + + if (mb_strlen($text, 'UTF-8') <= $limit) { + return $text; + } + + return mb_substr($text, 0, $limit, 'UTF-8') . '...'; + } + /** * @param array> $rows */ diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php index 9b3f2db..9cc1178 100644 --- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php +++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php @@ -133,13 +133,17 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface continue; } + $row = $result['rows'][$chunkId]; $rank++; $out[] = [ 'rank' => $rank, 'chunk_id' => $chunkId, - 'document_id' => $result['rows'][$chunkId]['document_id'] ?? null, - 'chunk_index' => $result['rows'][$chunkId]['chunk_index'] ?? null, + 'document_id' => $row['document_id'] ?? null, + 'document_title' => $this->extractDocumentTitle($row), + 'file_path' => $this->extractMetadataString($row, 'file_path'), + 'version_number' => $this->extractMetadataString($row, 'version_number'), + 'chunk_index' => $row['chunk_index'] ?? null, 'raw_score' => $result['rawScores'][$chunkId] ?? null, 'rrf_score' => $result['rrfScores'][$chunkId] ?? null, 'threshold' => $result['threshold'], @@ -148,7 +152,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface 'entity_label' => $result['entityLabel'], 'is_list_query' => $result['isListQuery'], 'selection_mode' => $result['selectionMode'], - 'text' => trim((string)$result['rows'][$chunkId]['text']), + 'text' => trim((string)($row['text'] ?? '')), ]; } @@ -1683,6 +1687,20 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface return ''; } + /** + * Extracts a scalar metadata value for debug/eval output. + */ + private function extractMetadataString(array $row, string $key): string + { + $value = $row['metadata'][$key] ?? null; + + if (is_scalar($value)) { + return trim((string)$value); + } + + return ''; + } + /** * Normalizes text for token-safe product comparisons. */ diff --git a/src/Service/Admin/EvalAdminService.php b/src/Service/Admin/EvalAdminService.php index 4c67c8b..008693a 100644 --- a/src/Service/Admin/EvalAdminService.php +++ b/src/Service/Admin/EvalAdminService.php @@ -111,14 +111,25 @@ final readonly class EvalAdminService $cases = $this->loadCases($type); if ($caseId !== '') { - $cases = array_values(array_filter( - $cases, - static fn (EvalCase $case): bool => $case->id === $caseId - )); + $cases = $this->filterCasesById($cases, $caseId); + + if ($cases === []) { + [$type, $cases] = $this->findCasesByIdAcrossTypes($caseId); + } } if ($cases === []) { - throw new \RuntimeException('No eval cases selected.'); + if ($caseId !== '') { + throw new \RuntimeException(sprintf( + 'Eval case "%s" was not found. Please select a case from the list for the chosen eval type.', + $caseId + )); + } + + throw new \RuntimeException(sprintf( + 'No eval cases available for eval type "%s".', + $type + )); } $results = $this->runner->runAll($cases); @@ -133,6 +144,35 @@ final readonly class EvalAdminService return $report; } + + /** + * @param array $cases + * @return array + */ + private function filterCasesById(array $cases, string $caseId): array + { + return array_values(array_filter( + $cases, + static fn (EvalCase $case): bool => $case->id === $caseId + )); + } + + /** + * @return array{0:string,1:array} + */ + private function findCasesByIdAcrossTypes(string $caseId): array + { + foreach (array_keys(self::TYPES) as $candidateType) { + $cases = $this->filterCasesById($this->loadCases($candidateType), $caseId); + + if ($cases !== []) { + return [$candidateType, $cases]; + } + } + + return ['', []]; + } + /** * @return array|null */ diff --git a/templates/admin/evals/index.html.twig b/templates/admin/evals/index.html.twig index 2fb5046..9265281 100644 --- a/templates/admin/evals/index.html.twig +++ b/templates/admin/evals/index.html.twig @@ -146,18 +146,23 @@
- - - + + +
+ Die Case-Liste wird passend zum Eval-Typ gefiltert. Leer lassen, um alle Cases des Typs auszuführen. +