p100c
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
# RetrieX Patch p100b - Admin Eval Case Selection Fix
|
||||
|
||||
## Ziel
|
||||
|
||||
Behebt die Admin-Eval-UX, wenn ein einzelner Case ausgewaehlt wird und der Request mit `No eval cases selected.` endet.
|
||||
|
||||
## Ursache
|
||||
|
||||
Die p100/p100a-Seite nutzte ein freies `datalist`-Feld fuer Case-IDs, das Cases aller Eval-Typen enthielt. Dadurch konnte ein Case aus `shop_query` ausgewaehlt werden, waehrend das Formular noch einen anderen Eval-Typ sendete. Der Admin-Service suchte dann nur in der Case-Datei des gesendeten Typs und fand keine passenden Cases.
|
||||
|
||||
## Aenderungen
|
||||
|
||||
- Das freie Case-ID-Feld wurde durch ein gefiltertes Select ersetzt.
|
||||
- Die Case-Liste wird clientseitig passend zum gewaehlten Eval-Typ gefiltert.
|
||||
- Beim Wechsel des Eval-Typs wird eine nicht passende Case-Auswahl automatisch geleert.
|
||||
- Der Admin-Service ist robuster: Wenn eine Case-ID nicht im gesendeten Typ gefunden wird, wird sie ueber alle unterstuetzten Eval-Typen gesucht und mit dem richtigen Typ ausgefuehrt.
|
||||
- Der Controller redirectet nach dem Run auf den effektiv ausgefuehrten Eval-Typ.
|
||||
- Die alte unklare Meldung `No eval cases selected.` wird durch konkrete Fehlertexte ersetzt.
|
||||
|
||||
## Scope
|
||||
|
||||
Keine Aenderungen an:
|
||||
|
||||
- Retrieval-Logik
|
||||
- Shopquery-Logik
|
||||
- Follow-up-Logik
|
||||
- Answer-Guard-Logik
|
||||
- Eval-Cases
|
||||
- YAML-Konfiguration
|
||||
- Modellparametern
|
||||
- Datenbank/Migrationen
|
||||
|
||||
## Geaenderte Dateien
|
||||
|
||||
- `src/Controller/Admin/AdminEvalController.php`
|
||||
- `src/Service/Admin/EvalAdminService.php`
|
||||
- `templates/admin/evals/index.html.twig`
|
||||
@@ -0,0 +1,45 @@
|
||||
# RetrieX Patch p100c - Admin Eval Document Labels
|
||||
|
||||
## Ziel
|
||||
|
||||
Die Admin-Eval-Resultate sollen bei Retrieval-/Answer-Guard-Fällen nicht nur technische `document_id`- und `chunk_id`-Werte anzeigen, sondern auch menschenlesbare Dokumentinformationen, damit ein gefundenes Dokument im Admin/Dateibestand leichter identifiziert werden kann.
|
||||
|
||||
## Änderungen
|
||||
|
||||
- `NdjsonHybridRetriever::retrieveDebug()` gibt pro Debug-Treffer zusätzlich aus:
|
||||
- `document_title`
|
||||
- `file_path`
|
||||
- `version_number`
|
||||
- `RetrievalDebugRunner` schreibt in Eval-Reports zusätzlich:
|
||||
- `document_refs`: eindeutige Dokumentübersicht mit Titel, Datei, Version, Ranks und Chunk-IDs
|
||||
- `result_rows`: rankgenaue Trefferliste mit Titel, Datei, Chunk-ID und Text-Preview
|
||||
- Admin-Eval-Template zeigt diese Informationen direkt in den Result-Details:
|
||||
- Tabelle "Gefundene Dokumente"
|
||||
- aufklappbare Tabelle "Treffer / Chunks anzeigen"
|
||||
- JSON-Details bleiben weiterhin verfügbar
|
||||
|
||||
## Nicht geändert
|
||||
|
||||
- Keine Eval-Assertions geändert
|
||||
- Keine Retrieval-Gewichte geändert
|
||||
- Keine Shopquery-/Follow-up-/Answer-Logik geändert
|
||||
- Keine YAML-/Parameteränderung
|
||||
- Keine Datenbankmigration
|
||||
|
||||
## Prüfung
|
||||
|
||||
Nach Einspielen:
|
||||
|
||||
```bash
|
||||
php bin/console mto:agent:config:validate
|
||||
php bin/console mto:agent:eval:run retrieval
|
||||
php bin/console mto:agent:eval:run answer_guard
|
||||
```
|
||||
|
||||
Danach im Admin:
|
||||
|
||||
```text
|
||||
/admin/evals/
|
||||
```
|
||||
|
||||
Einen Retrieval- oder Answer-Guard-Eval öffnen und prüfen, ob bei den Resultaten Titel/Datei zusätzlich zur Doc-ID sichtbar sind.
|
||||
@@ -48,6 +48,8 @@ final class AdminEvalController extends AbstractController
|
||||
|
||||
try {
|
||||
$report = $evals->run($type, $caseId !== '' ? $caseId : null);
|
||||
$type = trim((string) ($report['type'] ?? $type));
|
||||
|
||||
$this->addFlash(
|
||||
((int) ($report['failed'] ?? 0)) === 0 ? 'success' : 'danger',
|
||||
sprintf(
|
||||
|
||||
@@ -33,6 +33,8 @@ final readonly class RetrievalDebugRunner
|
||||
|
||||
$documentIds = $this->extractUniqueStringValues($rows, 'document_id');
|
||||
$chunkIds = $this->extractUniqueStringValues($rows, 'chunk_id');
|
||||
$documentRefs = $this->buildDocumentRefs($rows);
|
||||
$resultRows = $this->buildResultRows($rows);
|
||||
$joinedText = $this->extractJoinedText($rows);
|
||||
|
||||
$assert = $case->assert;
|
||||
@@ -220,6 +222,8 @@ final readonly class RetrievalDebugRunner
|
||||
'intent' => $intent,
|
||||
'document_ids' => $documentIds,
|
||||
'chunk_ids' => $chunkIds,
|
||||
'document_refs' => $documentRefs,
|
||||
'result_rows' => $resultRows,
|
||||
'matched_any_terms' => $matchedAnyTerms,
|
||||
'matched_all_terms' => $matchedAllTerms,
|
||||
'forbidden_terms_checked' => $this->normalizeStringList($assert['must_not_include_terms'] ?? []),
|
||||
@@ -268,6 +272,122 @@ final readonly class RetrievalDebugRunner
|
||||
return array_keys($values);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array<string, mixed>> $rows
|
||||
* @return array<int, array{id:string,title:string,file_path:string,version_number:string,chunk_ids:array<int,string>,ranks:array<int,int>}>
|
||||
*/
|
||||
private function buildDocumentRefs(array $rows): array
|
||||
{
|
||||
$refs = [];
|
||||
|
||||
foreach ($rows as $row) {
|
||||
$documentId = $this->extractNullableString($row, 'document_id');
|
||||
|
||||
if ($documentId === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isset($refs[$documentId])) {
|
||||
$refs[$documentId] = [
|
||||
'id' => $documentId,
|
||||
'title' => $this->extractNullableString($row, 'document_title'),
|
||||
'file_path' => $this->extractNullableString($row, 'file_path'),
|
||||
'version_number' => $this->extractNullableString($row, 'version_number'),
|
||||
'chunk_ids' => [],
|
||||
'ranks' => [],
|
||||
];
|
||||
}
|
||||
|
||||
$chunkId = $this->extractNullableString($row, 'chunk_id');
|
||||
if ($chunkId !== '' && !in_array($chunkId, $refs[$documentId]['chunk_ids'], true)) {
|
||||
$refs[$documentId]['chunk_ids'][] = $chunkId;
|
||||
}
|
||||
|
||||
$rank = $this->extractNullableInt($row, 'rank');
|
||||
if ($rank !== null && !in_array($rank, $refs[$documentId]['ranks'], true)) {
|
||||
$refs[$documentId]['ranks'][] = $rank;
|
||||
}
|
||||
}
|
||||
|
||||
return array_values($refs);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array<string, mixed>> $rows
|
||||
* @return array<int, array<string, mixed>>
|
||||
*/
|
||||
private function buildResultRows(array $rows): array
|
||||
{
|
||||
$out = [];
|
||||
|
||||
foreach ($rows as $row) {
|
||||
$out[] = [
|
||||
'rank' => $this->extractNullableInt($row, 'rank'),
|
||||
'document_id' => $this->extractNullableString($row, 'document_id'),
|
||||
'document_title' => $this->extractNullableString($row, 'document_title'),
|
||||
'file_path' => $this->extractNullableString($row, 'file_path'),
|
||||
'chunk_id' => $this->extractNullableString($row, 'chunk_id'),
|
||||
'chunk_index' => $this->extractNullableInt($row, 'chunk_index'),
|
||||
'raw_score' => $row['raw_score'] ?? null,
|
||||
'rrf_score' => $row['rrf_score'] ?? null,
|
||||
'text_preview' => $this->previewText($this->extractNullableString($row, 'text')),
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $row
|
||||
*/
|
||||
private function extractNullableString(array $row, string $key): string
|
||||
{
|
||||
$value = $row[$key] ?? null;
|
||||
|
||||
if ($value === null || is_array($value) || is_object($value)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
return trim((string)$value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $row
|
||||
*/
|
||||
private function extractNullableInt(array $row, string $key): ?int
|
||||
{
|
||||
$value = $row[$key] ?? null;
|
||||
|
||||
if ($value === null || $value === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (is_int($value)) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
if (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
|
||||
return (int)$value;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private function previewText(string $text, int $limit = 240): string
|
||||
{
|
||||
$text = preg_replace('/\s+/u', ' ', trim($text)) ?? trim($text);
|
||||
|
||||
if ($text === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
if (mb_strlen($text, 'UTF-8') <= $limit) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
return mb_substr($text, 0, $limit, 'UTF-8') . '...';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array<string, mixed>> $rows
|
||||
*/
|
||||
|
||||
@@ -133,13 +133,17 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
continue;
|
||||
}
|
||||
|
||||
$row = $result['rows'][$chunkId];
|
||||
$rank++;
|
||||
|
||||
$out[] = [
|
||||
'rank' => $rank,
|
||||
'chunk_id' => $chunkId,
|
||||
'document_id' => $result['rows'][$chunkId]['document_id'] ?? null,
|
||||
'chunk_index' => $result['rows'][$chunkId]['chunk_index'] ?? null,
|
||||
'document_id' => $row['document_id'] ?? null,
|
||||
'document_title' => $this->extractDocumentTitle($row),
|
||||
'file_path' => $this->extractMetadataString($row, 'file_path'),
|
||||
'version_number' => $this->extractMetadataString($row, 'version_number'),
|
||||
'chunk_index' => $row['chunk_index'] ?? null,
|
||||
'raw_score' => $result['rawScores'][$chunkId] ?? null,
|
||||
'rrf_score' => $result['rrfScores'][$chunkId] ?? null,
|
||||
'threshold' => $result['threshold'],
|
||||
@@ -148,7 +152,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
'entity_label' => $result['entityLabel'],
|
||||
'is_list_query' => $result['isListQuery'],
|
||||
'selection_mode' => $result['selectionMode'],
|
||||
'text' => trim((string)$result['rows'][$chunkId]['text']),
|
||||
'text' => trim((string)($row['text'] ?? '')),
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1683,6 +1687,20 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts a scalar metadata value for debug/eval output.
|
||||
*/
|
||||
private function extractMetadataString(array $row, string $key): string
|
||||
{
|
||||
$value = $row['metadata'][$key] ?? null;
|
||||
|
||||
if (is_scalar($value)) {
|
||||
return trim((string)$value);
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes text for token-safe product comparisons.
|
||||
*/
|
||||
|
||||
@@ -111,14 +111,25 @@ final readonly class EvalAdminService
|
||||
$cases = $this->loadCases($type);
|
||||
|
||||
if ($caseId !== '') {
|
||||
$cases = array_values(array_filter(
|
||||
$cases,
|
||||
static fn (EvalCase $case): bool => $case->id === $caseId
|
||||
));
|
||||
$cases = $this->filterCasesById($cases, $caseId);
|
||||
|
||||
if ($cases === []) {
|
||||
[$type, $cases] = $this->findCasesByIdAcrossTypes($caseId);
|
||||
}
|
||||
}
|
||||
|
||||
if ($cases === []) {
|
||||
throw new \RuntimeException('No eval cases selected.');
|
||||
if ($caseId !== '') {
|
||||
throw new \RuntimeException(sprintf(
|
||||
'Eval case "%s" was not found. Please select a case from the list for the chosen eval type.',
|
||||
$caseId
|
||||
));
|
||||
}
|
||||
|
||||
throw new \RuntimeException(sprintf(
|
||||
'No eval cases available for eval type "%s".',
|
||||
$type
|
||||
));
|
||||
}
|
||||
|
||||
$results = $this->runner->runAll($cases);
|
||||
@@ -133,6 +144,35 @@ final readonly class EvalAdminService
|
||||
return $report;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param array<int, EvalCase> $cases
|
||||
* @return array<int, EvalCase>
|
||||
*/
|
||||
private function filterCasesById(array $cases, string $caseId): array
|
||||
{
|
||||
return array_values(array_filter(
|
||||
$cases,
|
||||
static fn (EvalCase $case): bool => $case->id === $caseId
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{0:string,1:array<int, EvalCase>}
|
||||
*/
|
||||
private function findCasesByIdAcrossTypes(string $caseId): array
|
||||
{
|
||||
foreach (array_keys(self::TYPES) as $candidateType) {
|
||||
$cases = $this->filterCasesById($this->loadCases($candidateType), $caseId);
|
||||
|
||||
if ($cases !== []) {
|
||||
return [$candidateType, $cases];
|
||||
}
|
||||
}
|
||||
|
||||
return ['', []];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>|null
|
||||
*/
|
||||
|
||||
@@ -146,18 +146,23 @@
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label">Optional: Case-ID</label>
|
||||
<input name="case_id"
|
||||
list="evalCaseIds"
|
||||
class="form-control bg-dark text-light border-secondary"
|
||||
placeholder="z. B. followup_indicator_price_001">
|
||||
<datalist id="evalCaseIds">
|
||||
<label class="form-label">Optional: Case</label>
|
||||
<select name="case_id"
|
||||
class="form-select bg-dark text-light border-secondary js-admin-eval-case-select">
|
||||
<option value="">Alle Cases des ausgewählten Typs</option>
|
||||
{% for type, cases in cases_by_type %}
|
||||
{% for case in cases %}
|
||||
<option value="{{ case.id }}">{{ type }} - {{ case.prompt }}</option>
|
||||
<option value="{{ case.id }}"
|
||||
data-eval-type="{{ type }}"
|
||||
{% if type != selected_type %}hidden disabled{% endif %}>
|
||||
{{ case.id }} — {{ case.prompt }}
|
||||
</option>
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
</datalist>
|
||||
</select>
|
||||
<div class="form-text text-secondary">
|
||||
Die Case-Liste wird passend zum Eval-Typ gefiltert. Leer lassen, um alle Cases des Typs auszuführen.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button type="submit" class="btn btn-outline-warning js-admin-eval-run-button">
|
||||
@@ -294,9 +299,94 @@
|
||||
<div class="small text-success mb-2">Keine Fehler.</div>
|
||||
{% endif %}
|
||||
|
||||
{% set documentRefs = result.details.document_refs|default([]) %}
|
||||
{% if documentRefs is not empty %}
|
||||
<div class="mb-2">
|
||||
<div class="small text-secondary mb-1">Gefundene Dokumente</div>
|
||||
<div class="table-responsive">
|
||||
<table class="table table-dark table-sm table-bordered border-secondary align-middle mb-2">
|
||||
<thead>
|
||||
<tr class="small text-secondary">
|
||||
<th style="width: 90px;">Ranks</th>
|
||||
<th>Titel / Datei</th>
|
||||
<th style="width: 170px;">Doc-ID</th>
|
||||
<th style="width: 220px;">Chunks</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for doc in documentRefs %}
|
||||
<tr>
|
||||
<td class="small">{{ doc.ranks|default([])|join(', ') }}</td>
|
||||
<td>
|
||||
<div class="fw-semibold">{{ doc.title|default('Ohne Titel') }}</div>
|
||||
{% if doc.file_path|default('') %}
|
||||
<div class="small text-secondary" style="word-break: break-all;">
|
||||
{{ doc.file_path }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if doc.version_number|default('') %}
|
||||
<div class="small text-secondary">Version: {{ doc.version_number }}</div>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td><code class="small">{{ doc.id|default('') }}</code></td>
|
||||
<td class="small" style="word-break: break-all;">
|
||||
{% for chunkId in doc.chunk_ids|default([]) %}
|
||||
<code>{{ chunkId }}</code>{% if not loop.last %}<br>{% endif %}
|
||||
{% endfor %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% set resultRows = result.details.result_rows|default([]) %}
|
||||
{% if resultRows is not empty %}
|
||||
<details class="mb-2">
|
||||
<summary class="small text-info" style="cursor:pointer;">
|
||||
Treffer / Chunks anzeigen
|
||||
</summary>
|
||||
<div class="table-responsive mt-2">
|
||||
<table class="table table-dark table-sm table-bordered border-secondary align-middle mb-0">
|
||||
<thead>
|
||||
<tr class="small text-secondary">
|
||||
<th style="width: 60px;">Rank</th>
|
||||
<th>Titel / Datei</th>
|
||||
<th style="width: 180px;">Chunk</th>
|
||||
<th>Preview</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for row in resultRows %}
|
||||
<tr>
|
||||
<td>{{ row.rank|default('') }}</td>
|
||||
<td>
|
||||
<div class="fw-semibold">{{ row.document_title|default('Ohne Titel') }}</div>
|
||||
{% if row.file_path|default('') %}
|
||||
<div class="small text-secondary" style="word-break: break-all;">{{ row.file_path }}</div>
|
||||
{% endif %}
|
||||
<div class="small text-secondary">Doc-ID: <code>{{ row.document_id|default('') }}</code></div>
|
||||
</td>
|
||||
<td class="small" style="word-break: break-all;">
|
||||
<code>{{ row.chunk_id|default('') }}</code>
|
||||
{% if row.chunk_index is defined and row.chunk_index is not same as(null) %}
|
||||
<div class="text-secondary">Index: {{ row.chunk_index }}</div>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td class="small text-secondary">{{ row.text_preview|default('') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</details>
|
||||
{% endif %}
|
||||
|
||||
<details>
|
||||
<summary class="small text-info" style="cursor:pointer;">
|
||||
Details anzeigen
|
||||
JSON-Details anzeigen
|
||||
</summary>
|
||||
<pre class="bg-dark border border-secondary rounded p-2 mt-2 small text-light" style="white-space: pre-wrap; max-height: 260px; overflow: auto;">{{ result.details|default({})|json_encode(constant('JSON_PRETTY_PRINT')) }}</pre>
|
||||
</details>
|
||||
@@ -337,6 +427,33 @@
|
||||
return (form.dataset.evalTypeLabel || 'Eval').trim();
|
||||
}
|
||||
|
||||
function syncCaseSelect(form) {
|
||||
const typeSelect = form.querySelector('.js-admin-eval-type-select');
|
||||
const caseSelect = form.querySelector('.js-admin-eval-case-select');
|
||||
|
||||
if (!typeSelect || !caseSelect) {
|
||||
return;
|
||||
}
|
||||
|
||||
const selectedType = typeSelect.value;
|
||||
|
||||
Array.from(caseSelect.options).forEach(function (option) {
|
||||
if (option.value === '') {
|
||||
option.hidden = false;
|
||||
option.disabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
const matchesType = option.dataset.evalType === selectedType;
|
||||
option.hidden = !matchesType;
|
||||
option.disabled = !matchesType;
|
||||
|
||||
if (!matchesType && option.selected) {
|
||||
caseSelect.value = '';
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function setAllRunButtonsDisabled() {
|
||||
document.querySelectorAll('.js-admin-eval-run-button').forEach(function (button) {
|
||||
button.disabled = true;
|
||||
@@ -345,6 +462,15 @@
|
||||
}
|
||||
|
||||
forms.forEach(function (form) {
|
||||
syncCaseSelect(form);
|
||||
|
||||
const typeSelect = form.querySelector('.js-admin-eval-type-select');
|
||||
if (typeSelect) {
|
||||
typeSelect.addEventListener('change', function () {
|
||||
syncCaseSelect(form);
|
||||
});
|
||||
}
|
||||
|
||||
form.addEventListener('submit', function (event) {
|
||||
const button = event.submitter && event.submitter.classList.contains('js-admin-eval-run-button')
|
||||
? event.submitter
|
||||
|
||||
Reference in New Issue
Block a user