This commit is contained in:
team 1
2026-05-12 09:16:09 +02:00
parent 0d55c0a439
commit feaec9bbaf
7 changed files with 405 additions and 17 deletions

View File

@@ -0,0 +1,37 @@
# RetrieX Patch p100b - Admin Eval Case Selection Fix
## Ziel
Behebt die Admin-Eval-UX, wenn ein einzelner Case ausgewaehlt wird und der Request mit `No eval cases selected.` endet.
## Ursache
Die p100/p100a-Seite nutzte ein freies `datalist`-Feld fuer Case-IDs, das Cases aller Eval-Typen enthielt. Dadurch konnte ein Case aus `shop_query` ausgewaehlt werden, waehrend das Formular noch einen anderen Eval-Typ sendete. Der Admin-Service suchte dann nur in der Case-Datei des gesendeten Typs und fand keine passenden Cases.
## Aenderungen
- Das freie Case-ID-Feld wurde durch ein gefiltertes Select ersetzt.
- Die Case-Liste wird clientseitig passend zum gewaehlten Eval-Typ gefiltert.
- Beim Wechsel des Eval-Typs wird eine nicht passende Case-Auswahl automatisch geleert.
- Der Admin-Service ist robuster: Wenn eine Case-ID nicht im gesendeten Typ gefunden wird, wird sie ueber alle unterstuetzten Eval-Typen gesucht und mit dem richtigen Typ ausgefuehrt.
- Der Controller redirectet nach dem Run auf den effektiv ausgefuehrten Eval-Typ.
- Die alte unklare Meldung `No eval cases selected.` wird durch konkrete Fehlertexte ersetzt.
## Scope
Keine Aenderungen an:
- Retrieval-Logik
- Shopquery-Logik
- Follow-up-Logik
- Answer-Guard-Logik
- Eval-Cases
- YAML-Konfiguration
- Modellparametern
- Datenbank/Migrationen
## Geaenderte Dateien
- `src/Controller/Admin/AdminEvalController.php`
- `src/Service/Admin/EvalAdminService.php`
- `templates/admin/evals/index.html.twig`

View File

@@ -0,0 +1,45 @@
# RetrieX Patch p100c - Admin Eval Document Labels
## Ziel
Die Admin-Eval-Resultate sollen bei Retrieval-/Answer-Guard-Fällen nicht nur technische `document_id`- und `chunk_id`-Werte anzeigen, sondern auch menschenlesbare Dokumentinformationen, damit ein gefundenes Dokument im Admin/Dateibestand leichter identifiziert werden kann.
## Änderungen
- `NdjsonHybridRetriever::retrieveDebug()` gibt pro Debug-Treffer zusätzlich aus:
- `document_title`
- `file_path`
- `version_number`
- `RetrievalDebugRunner` schreibt in Eval-Reports zusätzlich:
- `document_refs`: eindeutige Dokumentübersicht mit Titel, Datei, Version, Ranks und Chunk-IDs
- `result_rows`: rankgenaue Trefferliste mit Titel, Datei, Chunk-ID und Text-Preview
- Admin-Eval-Template zeigt diese Informationen direkt in den Result-Details:
- Tabelle "Gefundene Dokumente"
- aufklappbare Tabelle "Treffer / Chunks anzeigen"
- JSON-Details bleiben weiterhin verfügbar
## Nicht geändert
- Keine Eval-Assertions geändert
- Keine Retrieval-Gewichte geändert
- Keine Shopquery-/Follow-up-/Answer-Logik geändert
- Keine YAML-/Parameteränderung
- Keine Datenbankmigration
## Prüfung
Nach Einspielen:
```bash
php bin/console mto:agent:config:validate
php bin/console mto:agent:eval:run retrieval
php bin/console mto:agent:eval:run answer_guard
```
Danach im Admin:
```text
/admin/evals/
```
Einen Retrieval- oder Answer-Guard-Eval öffnen und prüfen, ob bei den Resultaten Titel/Datei zusätzlich zur Doc-ID sichtbar sind.

View File

@@ -48,6 +48,8 @@ final class AdminEvalController extends AbstractController
try {
$report = $evals->run($type, $caseId !== '' ? $caseId : null);
$type = trim((string) ($report['type'] ?? $type));
$this->addFlash(
((int) ($report['failed'] ?? 0)) === 0 ? 'success' : 'danger',
sprintf(

View File

@@ -33,6 +33,8 @@ final readonly class RetrievalDebugRunner
$documentIds = $this->extractUniqueStringValues($rows, 'document_id');
$chunkIds = $this->extractUniqueStringValues($rows, 'chunk_id');
$documentRefs = $this->buildDocumentRefs($rows);
$resultRows = $this->buildResultRows($rows);
$joinedText = $this->extractJoinedText($rows);
$assert = $case->assert;
@@ -220,6 +222,8 @@ final readonly class RetrievalDebugRunner
'intent' => $intent,
'document_ids' => $documentIds,
'chunk_ids' => $chunkIds,
'document_refs' => $documentRefs,
'result_rows' => $resultRows,
'matched_any_terms' => $matchedAnyTerms,
'matched_all_terms' => $matchedAllTerms,
'forbidden_terms_checked' => $this->normalizeStringList($assert['must_not_include_terms'] ?? []),
@@ -268,6 +272,122 @@ final readonly class RetrievalDebugRunner
return array_keys($values);
}
/**
* @param array<int, array<string, mixed>> $rows
* @return array<int, array{id:string,title:string,file_path:string,version_number:string,chunk_ids:array<int,string>,ranks:array<int,int>}>
*/
private function buildDocumentRefs(array $rows): array
{
$refs = [];
foreach ($rows as $row) {
$documentId = $this->extractNullableString($row, 'document_id');
if ($documentId === '') {
continue;
}
if (!isset($refs[$documentId])) {
$refs[$documentId] = [
'id' => $documentId,
'title' => $this->extractNullableString($row, 'document_title'),
'file_path' => $this->extractNullableString($row, 'file_path'),
'version_number' => $this->extractNullableString($row, 'version_number'),
'chunk_ids' => [],
'ranks' => [],
];
}
$chunkId = $this->extractNullableString($row, 'chunk_id');
if ($chunkId !== '' && !in_array($chunkId, $refs[$documentId]['chunk_ids'], true)) {
$refs[$documentId]['chunk_ids'][] = $chunkId;
}
$rank = $this->extractNullableInt($row, 'rank');
if ($rank !== null && !in_array($rank, $refs[$documentId]['ranks'], true)) {
$refs[$documentId]['ranks'][] = $rank;
}
}
return array_values($refs);
}
/**
* @param array<int, array<string, mixed>> $rows
* @return array<int, array<string, mixed>>
*/
private function buildResultRows(array $rows): array
{
$out = [];
foreach ($rows as $row) {
$out[] = [
'rank' => $this->extractNullableInt($row, 'rank'),
'document_id' => $this->extractNullableString($row, 'document_id'),
'document_title' => $this->extractNullableString($row, 'document_title'),
'file_path' => $this->extractNullableString($row, 'file_path'),
'chunk_id' => $this->extractNullableString($row, 'chunk_id'),
'chunk_index' => $this->extractNullableInt($row, 'chunk_index'),
'raw_score' => $row['raw_score'] ?? null,
'rrf_score' => $row['rrf_score'] ?? null,
'text_preview' => $this->previewText($this->extractNullableString($row, 'text')),
];
}
return $out;
}
/**
* @param array<string, mixed> $row
*/
private function extractNullableString(array $row, string $key): string
{
$value = $row[$key] ?? null;
if ($value === null || is_array($value) || is_object($value)) {
return '';
}
return trim((string)$value);
}
/**
* @param array<string, mixed> $row
*/
private function extractNullableInt(array $row, string $key): ?int
{
$value = $row[$key] ?? null;
if ($value === null || $value === '') {
return null;
}
if (is_int($value)) {
return $value;
}
if (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
return (int)$value;
}
return null;
}
private function previewText(string $text, int $limit = 240): string
{
$text = preg_replace('/\s+/u', ' ', trim($text)) ?? trim($text);
if ($text === '') {
return '';
}
if (mb_strlen($text, 'UTF-8') <= $limit) {
return $text;
}
return mb_substr($text, 0, $limit, 'UTF-8') . '...';
}
/**
* @param array<int, array<string, mixed>> $rows
*/

View File

@@ -133,13 +133,17 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
continue;
}
$row = $result['rows'][$chunkId];
$rank++;
$out[] = [
'rank' => $rank,
'chunk_id' => $chunkId,
'document_id' => $result['rows'][$chunkId]['document_id'] ?? null,
'chunk_index' => $result['rows'][$chunkId]['chunk_index'] ?? null,
'document_id' => $row['document_id'] ?? null,
'document_title' => $this->extractDocumentTitle($row),
'file_path' => $this->extractMetadataString($row, 'file_path'),
'version_number' => $this->extractMetadataString($row, 'version_number'),
'chunk_index' => $row['chunk_index'] ?? null,
'raw_score' => $result['rawScores'][$chunkId] ?? null,
'rrf_score' => $result['rrfScores'][$chunkId] ?? null,
'threshold' => $result['threshold'],
@@ -148,7 +152,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
'entity_label' => $result['entityLabel'],
'is_list_query' => $result['isListQuery'],
'selection_mode' => $result['selectionMode'],
'text' => trim((string)$result['rows'][$chunkId]['text']),
'text' => trim((string)($row['text'] ?? '')),
];
}
@@ -1683,6 +1687,20 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
return '';
}
/**
* Extracts a scalar metadata value for debug/eval output.
*/
private function extractMetadataString(array $row, string $key): string
{
$value = $row['metadata'][$key] ?? null;
if (is_scalar($value)) {
return trim((string)$value);
}
return '';
}
/**
* Normalizes text for token-safe product comparisons.
*/

View File

@@ -111,14 +111,25 @@ final readonly class EvalAdminService
$cases = $this->loadCases($type);
if ($caseId !== '') {
$cases = array_values(array_filter(
$cases,
static fn (EvalCase $case): bool => $case->id === $caseId
));
$cases = $this->filterCasesById($cases, $caseId);
if ($cases === []) {
[$type, $cases] = $this->findCasesByIdAcrossTypes($caseId);
}
}
if ($cases === []) {
throw new \RuntimeException('No eval cases selected.');
if ($caseId !== '') {
throw new \RuntimeException(sprintf(
'Eval case "%s" was not found. Please select a case from the list for the chosen eval type.',
$caseId
));
}
throw new \RuntimeException(sprintf(
'No eval cases available for eval type "%s".',
$type
));
}
$results = $this->runner->runAll($cases);
@@ -133,6 +144,35 @@ final readonly class EvalAdminService
return $report;
}
/**
* @param array<int, EvalCase> $cases
* @return array<int, EvalCase>
*/
private function filterCasesById(array $cases, string $caseId): array
{
return array_values(array_filter(
$cases,
static fn (EvalCase $case): bool => $case->id === $caseId
));
}
/**
* @return array{0:string,1:array<int, EvalCase>}
*/
private function findCasesByIdAcrossTypes(string $caseId): array
{
foreach (array_keys(self::TYPES) as $candidateType) {
$cases = $this->filterCasesById($this->loadCases($candidateType), $caseId);
if ($cases !== []) {
return [$candidateType, $cases];
}
}
return ['', []];
}
/**
* @return array<string, mixed>|null
*/

View File

@@ -146,18 +146,23 @@
</div>
<div class="mb-3">
<label class="form-label">Optional: Case-ID</label>
<input name="case_id"
list="evalCaseIds"
class="form-control bg-dark text-light border-secondary"
placeholder="z. B. followup_indicator_price_001">
<datalist id="evalCaseIds">
<label class="form-label">Optional: Case</label>
<select name="case_id"
class="form-select bg-dark text-light border-secondary js-admin-eval-case-select">
<option value="">Alle Cases des ausgewählten Typs</option>
{% for type, cases in cases_by_type %}
{% for case in cases %}
<option value="{{ case.id }}">{{ type }} - {{ case.prompt }}</option>
<option value="{{ case.id }}"
data-eval-type="{{ type }}"
{% if type != selected_type %}hidden disabled{% endif %}>
{{ case.id }}{{ case.prompt }}
</option>
{% endfor %}
{% endfor %}
</datalist>
</select>
<div class="form-text text-secondary">
Die Case-Liste wird passend zum Eval-Typ gefiltert. Leer lassen, um alle Cases des Typs auszuführen.
</div>
</div>
<button type="submit" class="btn btn-outline-warning js-admin-eval-run-button">
@@ -294,9 +299,94 @@
<div class="small text-success mb-2">Keine Fehler.</div>
{% endif %}
{% set documentRefs = result.details.document_refs|default([]) %}
{% if documentRefs is not empty %}
<div class="mb-2">
<div class="small text-secondary mb-1">Gefundene Dokumente</div>
<div class="table-responsive">
<table class="table table-dark table-sm table-bordered border-secondary align-middle mb-2">
<thead>
<tr class="small text-secondary">
<th style="width: 90px;">Ranks</th>
<th>Titel / Datei</th>
<th style="width: 170px;">Doc-ID</th>
<th style="width: 220px;">Chunks</th>
</tr>
</thead>
<tbody>
{% for doc in documentRefs %}
<tr>
<td class="small">{{ doc.ranks|default([])|join(', ') }}</td>
<td>
<div class="fw-semibold">{{ doc.title|default('Ohne Titel') }}</div>
{% if doc.file_path|default('') %}
<div class="small text-secondary" style="word-break: break-all;">
{{ doc.file_path }}
</div>
{% endif %}
{% if doc.version_number|default('') %}
<div class="small text-secondary">Version: {{ doc.version_number }}</div>
{% endif %}
</td>
<td><code class="small">{{ doc.id|default('') }}</code></td>
<td class="small" style="word-break: break-all;">
{% for chunkId in doc.chunk_ids|default([]) %}
<code>{{ chunkId }}</code>{% if not loop.last %}<br>{% endif %}
{% endfor %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endif %}
{% set resultRows = result.details.result_rows|default([]) %}
{% if resultRows is not empty %}
<details class="mb-2">
<summary class="small text-info" style="cursor:pointer;">
Treffer / Chunks anzeigen
</summary>
<div class="table-responsive mt-2">
<table class="table table-dark table-sm table-bordered border-secondary align-middle mb-0">
<thead>
<tr class="small text-secondary">
<th style="width: 60px;">Rank</th>
<th>Titel / Datei</th>
<th style="width: 180px;">Chunk</th>
<th>Preview</th>
</tr>
</thead>
<tbody>
{% for row in resultRows %}
<tr>
<td>{{ row.rank|default('') }}</td>
<td>
<div class="fw-semibold">{{ row.document_title|default('Ohne Titel') }}</div>
{% if row.file_path|default('') %}
<div class="small text-secondary" style="word-break: break-all;">{{ row.file_path }}</div>
{% endif %}
<div class="small text-secondary">Doc-ID: <code>{{ row.document_id|default('') }}</code></div>
</td>
<td class="small" style="word-break: break-all;">
<code>{{ row.chunk_id|default('') }}</code>
{% if row.chunk_index is defined and row.chunk_index is not same as(null) %}
<div class="text-secondary">Index: {{ row.chunk_index }}</div>
{% endif %}
</td>
<td class="small text-secondary">{{ row.text_preview|default('') }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</details>
{% endif %}
<details>
<summary class="small text-info" style="cursor:pointer;">
Details anzeigen
JSON-Details anzeigen
</summary>
<pre class="bg-dark border border-secondary rounded p-2 mt-2 small text-light" style="white-space: pre-wrap; max-height: 260px; overflow: auto;">{{ result.details|default({})|json_encode(constant('JSON_PRETTY_PRINT')) }}</pre>
</details>
@@ -337,6 +427,33 @@
return (form.dataset.evalTypeLabel || 'Eval').trim();
}
function syncCaseSelect(form) {
const typeSelect = form.querySelector('.js-admin-eval-type-select');
const caseSelect = form.querySelector('.js-admin-eval-case-select');
if (!typeSelect || !caseSelect) {
return;
}
const selectedType = typeSelect.value;
Array.from(caseSelect.options).forEach(function (option) {
if (option.value === '') {
option.hidden = false;
option.disabled = false;
return;
}
const matchesType = option.dataset.evalType === selectedType;
option.hidden = !matchesType;
option.disabled = !matchesType;
if (!matchesType && option.selected) {
caseSelect.value = '';
}
});
}
function setAllRunButtonsDisabled() {
document.querySelectorAll('.js-admin-eval-run-button').forEach(function (button) {
button.disabled = true;
@@ -345,6 +462,15 @@
}
forms.forEach(function (form) {
syncCaseSelect(form);
const typeSelect = form.querySelector('.js-admin-eval-type-select');
if (typeSelect) {
typeSelect.addEventListener('change', function () {
syncCaseSelect(form);
});
}
form.addEventListener('submit', function (event) {
const button = event.submitter && event.submitter.classList.contains('js-admin-eval-run-button')
? event.submitter