This commit is contained in:
team 1
2026-05-12 11:08:34 +02:00
parent 6dced1c4df
commit 6e2ca15e97
5 changed files with 542 additions and 287 deletions

View File

@@ -0,0 +1,54 @@
# RetrieX Patch p101a - Admin Eval Case Creator Separate Page
## Ziel
Der Eval-Case-Creator wird als eigene Admin-Seite geführt, damit die Eval-Suite-Übersicht schlank bleibt und nicht durch das komplette Case-Erstellformular aufgeblasen wirkt.
## Neue / geänderte Admin-Routen
- `GET /admin/evals/` bleibt die fokussierte Eval-Suite-Übersicht für Runs und Reports.
- `GET /admin/evals/cases/new` zeigt das separate Formular zum Anlegen neuer Eval-Cases.
- `POST /admin/evals/cases` speichert neue Eval-Cases in `tests/evals/cases/<type>.ndjson`.
## UX-Änderungen
- Die Eval-Suite-Übersicht erhält nur einen kompakten Button `Eval-Case erstellen`.
- Report-Ergebnisse erhalten den Button `Als neuen Case vorbereiten`.
- Die neue Seite übernimmt bei vorbereiteten Cases:
- Eval-Typ
- Prompt
- History/Kontext, sofern im Report vorhanden
- vorgeschlagene Assertions aus Query, Einzelqueries oder Dokument-IDs
- Die eigentliche Case-Erstellung liegt außerhalb der Report-/Run-Übersicht.
## Validierung
Beim Speichern werden geprüft:
- CSRF-Token
- `ROLE_KNOWLEDGE_ADMIN`
- unterstützter Eval-Typ
- eindeutige Case-ID über alle Eval-Typen
- erlaubtes Case-ID-Format
- nicht leerer Prompt
- gültiges Assert-JSON-Objekt
- gültige History-JSON-Liste
- DTO-Validierung über `EvalCase::fromArray()`
## Nicht geändert
- Keine Retrieval-Logik
- Keine Shopquery-Logik
- Keine Follow-up-Logik
- Keine Answer-Guard-Logik
- Keine Eval-Cases
- Keine YAML-/Parameteränderung
- Keine Migration
## Betroffene Dateien
- `src/Controller/Admin/AdminEvalController.php`
- `src/Service/Admin/EvalAdminService.php`
- `templates/admin/evals/index.html.twig`
- `templates/admin/evals/case_new.html.twig`
- `patch_history/RETRIEX_PATCH_101A_ADMIN_EVAL_CASE_CREATOR_PAGE_README.md`

View File

@@ -68,7 +68,35 @@ final class AdminEvalController extends AbstractController
]);
}
#[Route('/case/create', name: 'admin_evals_case_create', methods: ['POST'])]
#[Route('/cases/new', name: 'admin_evals_case_new', methods: ['GET'])]
public function newCase(Request $request, EvalAdminService $evals): Response
{
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
$type = trim((string) $request->query->get('type', 'retrieval'));
if (!in_array($type, $evals->supportedTypeNames(), true)) {
$type = 'retrieval';
}
$sourceType = trim((string) $request->query->get('source_type', ''));
$sourceCaseId = trim((string) $request->query->get('source_case_id', ''));
try {
$draft = $sourceType !== '' && $sourceCaseId !== ''
? $evals->caseDraftFromReportResult($sourceType, $sourceCaseId)
: $evals->emptyCaseDraft($type);
} catch (\Throwable $e) {
$this->addFlash('warning', $e->getMessage());
$draft = $evals->emptyCaseDraft($type);
}
return $this->render('admin/evals/case_new.html.twig', [
'types' => $evals->supportedTypes(),
'case_draft' => $draft,
]);
}
#[Route('/cases', name: 'admin_evals_case_create', methods: ['POST'])]
public function createCase(Request $request, EvalAdminService $evals): Response
{
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
@@ -78,6 +106,15 @@ final class AdminEvalController extends AbstractController
}
$type = trim((string) $request->request->get('type', 'retrieval'));
$draft = [
'type' => $type,
'id' => (string) $request->request->get('id', ''),
'prompt' => (string) $request->request->get('prompt', ''),
'assert_json' => (string) $request->request->get('assert_json', ''),
'history_json' => (string) $request->request->get('history_json', ''),
'request_context_hint' => (string) $request->request->get('request_context_hint', ''),
'source_label' => '',
];
try {
$created = $evals->createCase(
@@ -95,17 +132,21 @@ final class AdminEvalController extends AbstractController
'success',
sprintf('Eval-Case "%s" wurde in %s.ndjson gespeichert.', (string) ($created['id'] ?? ''), $type)
);
return $this->redirectToRoute('admin_evals_index', [
'type' => $type,
]);
} catch (\Throwable $e) {
$this->addFlash('danger', $e->getMessage());
}
if (!in_array($type, $evals->supportedTypeNames(), true)) {
$type = 'retrieval';
$draft['type'] = 'retrieval';
}
return $this->redirectToRoute('admin_evals_index', [
'type' => $type,
]);
return $this->render('admin/evals/case_new.html.twig', [
'types' => $evals->supportedTypes(),
'case_draft' => $draft,
], new Response('', Response::HTTP_UNPROCESSABLE_ENTITY));
}
}

View File

@@ -144,6 +144,75 @@ final readonly class EvalAdminService
return $report;
}
/**
* @return array{type:string,id:string,prompt:string,assert_json:string,history_json:string,request_context_hint:string,source_label:string}
*/
public function emptyCaseDraft(string $type = 'retrieval'): array
{
$type = $this->assertSupportedType($type);
return [
'type' => $type,
'id' => '',
'prompt' => '',
'assert_json' => $this->encodePrettyJson($this->defaultAssertForType($type)),
'history_json' => '',
'request_context_hint' => '',
'source_label' => '',
];
}
/**
* @return array{type:string,id:string,prompt:string,assert_json:string,history_json:string,request_context_hint:string,source_label:string}
*/
public function caseDraftFromReportResult(string $type, string $caseId): array
{
$type = $this->assertSupportedType($type);
$caseId = trim($caseId);
if ($caseId === '') {
throw new \InvalidArgumentException('Es wurde keine Quell-Case-ID übergeben.');
}
$report = $this->readTypeReport($type);
if ($report === null) {
throw new \RuntimeException(sprintf(
'Für den Eval-Typ "%s" liegt kein Report vor. Bitte den Eval zuerst ausführen.',
$type
));
}
$result = null;
foreach (($report['results'] ?? []) as $candidate) {
if (is_array($candidate) && (string) ($candidate['case_id'] ?? '') === $caseId) {
$result = $candidate;
break;
}
}
if (!is_array($result)) {
throw new \RuntimeException(sprintf(
'Der Report enthält keinen Case "%s" für Eval-Typ "%s".',
$caseId,
$type
));
}
$details = is_array($result['details'] ?? null) ? $result['details'] : [];
$prompt = trim((string) ($result['prompt'] ?? $details['prompt'] ?? ''));
$history = $this->historyDraftFromDetails($details);
$assert = $this->suggestAssertFromReportResult($type, $result, $details);
return [
'type' => $type,
'id' => $this->suggestUniqueCaseId($type . '_' . $caseId . '_new'),
'prompt' => $prompt,
'assert_json' => $this->encodePrettyJson($assert),
'history_json' => $history === [] ? '' : $this->encodePrettyJson($history),
'request_context_hint' => '',
'source_label' => sprintf('Vorlage aus Report-Case %s (%s)', $caseId, self::TYPES[$type]),
];
}
/**
* @return array{type:string,id:string,path:string,row:array<string,mixed>,case_count:int}
@@ -190,7 +259,7 @@ final readonly class EvalAdminService
$row['request_context_hint'] = $requestContextHint;
}
// Reuse the regular DTO validation before writing the case file.
// Validate with the same DTO that the eval runner uses.
EvalCase::fromArray($row);
$path = $this->caseFilePath($type);
@@ -221,7 +290,6 @@ final readonly class EvalAdminService
];
}
/**
* @param array<int, EvalCase> $cases
* @return array<int, EvalCase>
@@ -326,7 +394,6 @@ final readonly class EvalAdminService
return $decoded;
}
private function normalizeNewCaseId(string $id): string
{
$id = trim($id);
@@ -374,7 +441,7 @@ final readonly class EvalAdminService
throw new \InvalidArgumentException(sprintf('%s ist ungültig: %s', $label, $e->getMessage()));
}
if (!is_array($decoded)) {
if (!is_array($decoded) || !str_starts_with($json, '{') || ($decoded !== [] && array_is_list($decoded))) {
throw new \InvalidArgumentException(sprintf('%s muss ein JSON-Objekt sein.', $label));
}
@@ -398,7 +465,7 @@ final readonly class EvalAdminService
throw new \InvalidArgumentException(sprintf('History-JSON ist ungültig: %s', $e->getMessage()));
}
if (!is_array($decoded)) {
if (!is_array($decoded) || !str_starts_with($json, '[') || !array_is_list($decoded)) {
throw new \InvalidArgumentException('History-JSON muss eine JSON-Liste sein.');
}
@@ -458,4 +525,162 @@ final readonly class EvalAdminService
return $failed === 0 ? 'green' : 'red';
}
/**
* @return array<string, mixed>
*/
private function defaultAssertForType(string $type): array
{
return match ($type) {
'retrieval', 'answer_guard' => [
'min_results' => 1,
],
'shop_query', 'followup' => [
'expected_query' => '',
],
default => [],
};
}
/**
* @param array<string, mixed> $result
* @param array<string, mixed> $details
* @return array<string, mixed>
*/
private function suggestAssertFromReportResult(string $type, array $result, array $details): array
{
if (($type === 'shop_query' || $type === 'followup') && is_string($details['query'] ?? null)) {
$query = trim($details['query']);
if ($query !== '') {
return [
'expected_query' => $query,
];
}
}
if (($type === 'shop_query' || $type === 'followup') && is_array($details['individual_queries'] ?? null)) {
$queries = array_values(array_filter(array_map(
static fn (mixed $value): string => trim((string) $value),
$details['individual_queries']
)));
if ($queries !== []) {
return [
'expected_individual_queries' => $queries,
'expected_individual_queries_exact' => true,
];
}
}
if (is_array($details['document_refs'] ?? null)) {
$documentIds = [];
foreach ($details['document_refs'] as $documentRef) {
if (!is_array($documentRef)) {
continue;
}
$documentId = trim((string) ($documentRef['id'] ?? ''));
if ($documentId !== '') {
$documentIds[] = $documentId;
}
}
if ($documentIds !== []) {
return [
'min_results' => 1,
'must_include_one_of_document_ids' => array_values(array_unique($documentIds)),
];
}
}
if (is_array($details['document_ids'] ?? null)) {
$documentIds = array_values(array_filter(array_map(
static fn (mixed $value): string => trim((string) $value),
$details['document_ids']
)));
if ($documentIds !== []) {
return [
'min_results' => 1,
'must_include_one_of_document_ids' => array_values(array_unique($documentIds)),
];
}
}
$resultCount = (int) ($details['result_count'] ?? -1);
if ($resultCount === 0) {
return [
'max_results' => 0,
];
}
return $this->defaultAssertForType($type);
}
/**
* @param array<string, mixed> $details
* @return array<int, array{prompt:string,answer:string}>
*/
private function historyDraftFromDetails(array $details): array
{
if (!is_array($details['history'] ?? null)) {
return [];
}
$history = [];
foreach ($details['history'] as $entry) {
if (!is_array($entry)) {
continue;
}
$prompt = trim((string) ($entry['prompt'] ?? ''));
$answer = trim((string) ($entry['answer'] ?? $entry['answer_preview'] ?? ''));
if ($prompt === '' && $answer === '') {
continue;
}
$history[] = [
'prompt' => $prompt !== '' ? $prompt : 'Eval-Kontext',
'answer' => $answer,
];
}
return $history;
}
private function suggestUniqueCaseId(string $base): string
{
$base = strtolower(trim($base));
$base = preg_replace('/[^a-z0-9_-]+/', '_', $base) ?? 'eval_case';
$base = trim($base, '_-');
if ($base === '') {
$base = 'eval_case';
}
if (!$this->caseIdExists($base)) {
return $base;
}
for ($i = 2; $i <= 999; ++$i) {
$candidate = sprintf('%s_%d', $base, $i);
if (!$this->caseIdExists($candidate)) {
return $candidate;
}
}
return sprintf('%s_%s', $base, (new \DateTimeImmutable())->format('YmdHis'));
}
/**
* @param array<mixed> $value
*/
private function encodePrettyJson(array $value): string
{
return json_encode(
$value,
JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR
);
}
}

View File

@@ -0,0 +1,194 @@
{% extends 'admin/base.html.twig' %}
{% block title %}Eval-Case erstellen{% endblock %}
{% block body %}
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<div>
<h1 class="h3 mb-1">
<i class="bi bi-journal-plus"></i> Eval-Case erstellen
</h1>
<div class="small text-secondary">
Neue Regression-Cases separat anlegen, ohne die Eval-Suite-Übersicht aufzublähen.
</div>
</div>
<a href="{{ path('admin_evals_index', {type: case_draft.type|default('retrieval')}) }}"
class="btn btn-sm btn-outline-secondary">
Zurück zur Eval Suite
</a>
</div>
{% for label in ['success', 'danger', 'warning', 'info'] %}
{% for message in app.flashes(label) %}
<div class="alert alert-{{ label }} shadow-sm">
{{ message }}
</div>
{% endfor %}
{% endfor %}
{% if case_draft.source_label|default('') %}
<div class="alert alert-info border-info bg-black text-light shadow-sm">
<strong>Vorlage geladen:</strong> {{ case_draft.source_label }}<br>
<span class="small text-secondary">
Bitte Case-ID, Prompt und Assertions prüfen, bevor du den Case speicherst.
</span>
</div>
{% endif %}
<div class="row g-4">
<div class="col-xl-8">
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body">
<h5 class="text-warning mb-3">
<i class="bi bi-pencil-square"></i> Neuer Eval-Case
</h5>
<form method="post" action="{{ path('admin_evals_case_create') }}">
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_case_create') }}">
<div class="mb-3">
<label class="form-label">Eval-Typ</label>
<select name="type" class="form-select bg-dark text-light border-secondary">
{% for type, label in types %}
<option value="{{ type }}" {% if type == case_draft.type|default('retrieval') %}selected{% endif %}>
{{ label }}
</option>
{% endfor %}
</select>
<div class="form-text text-secondary">
Der Typ entscheidet, in welche Datei geschrieben wird: <code>tests/evals/cases/&lt;type&gt;.ndjson</code>.
</div>
</div>
<div class="mb-3">
<label class="form-label">Neue Case-ID</label>
<input type="text"
name="id"
value="{{ case_draft.id|default('') }}"
class="form-control bg-dark text-light border-secondary"
placeholder="followup_testomat808_device_price_001"
required>
<div class="form-text text-secondary">
Eindeutig über alle Eval-Typen. Erlaubt: Buchstaben, Zahlen, <code>_</code> und <code>-</code>.
</div>
</div>
<div class="mb-3">
<label class="form-label">Prompt</label>
<textarea name="prompt"
rows="3"
class="form-control bg-dark text-light border-secondary"
placeholder="und was kostet das gerät selber"
required>{{ case_draft.prompt|default('') }}</textarea>
<div class="form-text text-secondary">
Exakt der Nutzerprompt, der abgesichert werden soll. Tippfehler bewusst so eintragen, wenn sie Teil des Tests sind.
</div>
</div>
<div class="mb-3">
<label class="form-label">Assert-JSON</label>
<textarea name="assert_json"
rows="9"
class="form-control bg-dark text-light border-secondary font-monospace"
spellcheck="false">{{ case_draft.assert_json|default('{}') }}</textarea>
<div class="form-text text-secondary">
Muss ein gültiges JSON-Objekt sein. Beispiel: <code>{"expected_query":"testomat 808"}</code>.
</div>
</div>
<div class="mb-3">
<label class="form-label">History-JSON <span class="text-secondary">optional</span></label>
<textarea name="history_json"
rows="8"
class="form-control bg-dark text-light border-secondary font-monospace"
spellcheck="false"
placeholder='[{"prompt":"vorherige Frage","answer":"vorherige Antwort"}]'>{{ case_draft.history_json|default('') }}</textarea>
<div class="form-text text-secondary">
Für Follow-up-Cases empfohlen. Muss eine JSON-Liste sein. Leer lassen für direkte Prompts.
</div>
</div>
<div class="mb-4">
<label class="form-label">Request Context Hint <span class="text-secondary">optional</span></label>
<textarea name="request_context_hint"
rows="3"
class="form-control bg-dark text-light border-secondary"
placeholder="Nur für Spezialfälle, wenn History nicht ausreicht.">{{ case_draft.request_context_hint|default('') }}</textarea>
<div class="form-text text-secondary">
Normalerweise leer lassen. Für reguläre Regressionen lieber History-JSON verwenden.
</div>
</div>
<div class="d-flex flex-wrap gap-2">
<button type="submit" class="btn btn-warning">
<i class="bi bi-save"></i> Eval-Case speichern
</button>
<a href="{{ path('admin_evals_index', {type: case_draft.type|default('retrieval')}) }}"
class="btn btn-outline-secondary">
Abbrechen
</a>
</div>
</form>
</div>
</div>
</div>
<div class="col-xl-4">
<div class="card bg-black border-secondary text-light shadow-sm mb-4">
<div class="card-body">
<h5 class="text-info mb-3">
<i class="bi bi-info-circle"></i> Feld-Checkliste
</h5>
<ul class="small text-secondary mb-0">
<li><strong class="text-light">retrieval</strong>: richtiges Dokument / richtige Chunks prüfen.</li>
<li><strong class="text-light">shop_query</strong>: direkte Shopquery prüfen.</li>
<li><strong class="text-light">followup</strong>: Prompt plus History prüfen.</li>
<li><strong class="text-light">answer_guard</strong>: No-Answer- oder Evidenzfälle prüfen.</li>
</ul>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm mb-4">
<div class="card-body">
<h5 class="text-info mb-3">
<i class="bi bi-braces"></i> Häufige Assertions
</h5>
<div class="small text-secondary mb-2">Exakte Query:</div>
<pre class="bg-dark border border-secondary rounded p-2 small text-light"><code>{
"expected_query": "testomat 808"
}</code></pre>
<div class="small text-secondary mb-2">Begriffe müssen enthalten sein:</div>
<pre class="bg-dark border border-secondary rounded p-2 small text-light"><code>{
"must_include_terms": [
"testomat",
"808"
]
}</code></pre>
<div class="small text-secondary mb-2">Dokument muss enthalten sein:</div>
<pre class="bg-dark border border-secondary rounded p-2 small text-light"><code>{
"min_results": 1,
"must_include_one_of_document_ids": [
"DOKUMENT-ID"
]
}</code></pre>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3">
<i class="bi bi-lightbulb"></i> Empfehlung
</h5>
<p class="small text-secondary mb-0">
Ein guter Eval-Case prüft genau einen Zweck. Lieber mehrere kleine Cases anlegen als einen großen, empfindlichen Case.
</p>
</div>
</div>
</div>
</div>
{% endblock %}

View File

@@ -14,10 +14,16 @@
</div>
</div>
<a href="{{ path('admin_model_config_list') }}"
class="btn btn-sm btn-outline-secondary">
Zurück zum KI-/LLM-Setup
</a>
<div class="d-flex flex-wrap gap-2">
<a href="{{ path('admin_evals_case_new', {type: selected_type|default('retrieval')}) }}"
class="btn btn-sm btn-outline-warning">
<i class="bi bi-journal-plus"></i> Eval-Case erstellen
</a>
<a href="{{ path('admin_model_config_list') }}"
class="btn btn-sm btn-outline-secondary">
Zurück zum KI-/LLM-Setup
</a>
</div>
</div>
{% for label in ['success', 'danger', 'warning', 'info'] %}
@@ -212,100 +218,6 @@
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm mb-4" id="adminEvalCaseCreator">
<div class="card-body">
<div class="d-flex justify-content-between align-items-start flex-wrap gap-2 mb-3">
<div>
<h5 class="text-warning mb-1">
<i class="bi bi-plus-square"></i> Eval-Case erstellen
</h5>
<div class="small text-secondary">
Speichert neue Regression-Cases direkt in <code>tests/evals/cases/&lt;type&gt;.ndjson</code>.
Aus Report-Ergebnissen kannst du Prompt, History, Query oder Dokument-IDs als Vorlage übernehmen.
</div>
</div>
</div>
<form method="post" action="{{ path('admin_evals_case_create') }}" class="row g-3">
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_case_create') }}">
<div class="col-md-4">
<label class="form-label">Eval-Typ</label>
<select name="type" class="form-select bg-dark text-light border-secondary js-admin-eval-create-type">
{% for type, label in types %}
<option value="{{ type }}" {% if type == selected_type %}selected{% endif %}>{{ label }}</option>
{% endfor %}
</select>
</div>
<div class="col-md-8">
<label class="form-label">Neue Case-ID</label>
<input type="text"
name="id"
class="form-control bg-dark text-light border-secondary js-admin-eval-create-id"
placeholder="z. B. retrieval_semantic_new_001"
autocomplete="off"
required>
<div class="form-text text-secondary">
Erlaubt: Buchstaben, Zahlen, Unterstrich, Bindestrich. IDs müssen eindeutig sein.
</div>
</div>
<div class="col-12">
<label class="form-label">Prompt</label>
<textarea name="prompt"
rows="2"
class="form-control bg-dark text-light border-secondary js-admin-eval-create-prompt"
placeholder="Testprompt, der abgesichert werden soll"
required></textarea>
</div>
<div class="col-lg-6">
<label class="form-label">Assert-JSON</label>
<textarea name="assert_json"
rows="8"
class="form-control bg-dark text-light border-secondary font-monospace small js-admin-eval-create-assert"
spellcheck="false">{
"min_results": 1
}</textarea>
<div class="form-text text-secondary">
Beispiel: <code>expected_query</code>, <code>must_include_one_of_document_ids</code>, <code>must_not_include_terms</code>.
</div>
</div>
<div class="col-lg-6">
<label class="form-label">Optional: History-JSON</label>
<textarea name="history_json"
rows="8"
class="form-control bg-dark text-light border-secondary font-monospace small js-admin-eval-create-history"
spellcheck="false"
placeholder='[{"prompt":"...","answer":"..."}]'></textarea>
<div class="form-text text-secondary">
Für Follow-up-Cases: Liste vorheriger Chat-Turns mit <code>prompt</code> und <code>answer</code>.
</div>
</div>
<div class="col-12">
<label class="form-label">Optional: Request Context Hint</label>
<textarea name="request_context_hint"
rows="2"
class="form-control bg-dark text-light border-secondary js-admin-eval-create-context"
placeholder="Nur nutzen, wenn ein Case explizit Zusatzkontext braucht."></textarea>
</div>
<div class="col-12 d-flex gap-2 flex-wrap">
<button type="submit" class="btn btn-outline-warning">
<i class="bi bi-save"></i> Case speichern
</button>
<button type="button" class="btn btn-outline-secondary js-admin-eval-create-clear">
Formular leeren
</button>
</div>
</form>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body">
<div class="d-flex justify-content-between align-items-center flex-wrap gap-2 mb-3">
@@ -387,6 +299,13 @@
</div>
{% endif %}
<div class="mt-2">
<a href="{{ path('admin_evals_case_new', {source_type: selected_type, source_case_id: result.case_id|default('')}) }}"
class="btn btn-sm btn-outline-warning">
<i class="bi bi-journal-plus"></i> Als neuen Case vorbereiten
</a>
</div>
{% set historyRows = result.details.history|default([]) %}
{% if historyRows is not empty %}
<details class="small">
@@ -407,17 +326,6 @@
</div>
</details>
{% endif %}
<button type="button"
class="btn btn-sm btn-outline-warning mt-2 js-admin-eval-prefill-case"
data-result-type="{{ result.type|default(selected_type)|e('html_attr') }}"
data-result-prompt="{{ casePrompt|default('')|e('html_attr') }}"
data-result-history="{{ historyRows|default([])|json_encode|e('html_attr') }}"
data-result-query="{{ result.details.query|default('')|e('html_attr') }}"
data-result-individual-queries="{{ result.details.individual_queries|default([])|json_encode|e('html_attr') }}"
data-result-document-ids="{{ result.details.document_ids|default([])|json_encode|e('html_attr') }}">
Als neuen Case vorbereiten
</button>
</td>
<td style="width: 120px;">
{{ result.duration_ms|default(0) }} ms
@@ -595,173 +503,6 @@
});
}
const creator = document.getElementById('adminEvalCaseCreator');
function parseJsonData(value, fallback) {
if (!value) {
return fallback;
}
try {
return JSON.parse(value);
} catch (error) {
return fallback;
}
}
function slugifyPrompt(prompt) {
const normalized = (prompt || '')
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/ä/g, 'ae')
.replace(/ö/g, 'oe')
.replace(/ü/g, 'ue')
.replace(/ß/g, 'ss')
.replace(/[^a-z0-9]+/g, '_')
.replace(/^_+|_+$/g, '')
.slice(0, 44);
return normalized || 'case';
}
function buildAssertTemplate(type, query, individualQueries, documentIds) {
if ((type === 'shop_query' || type === 'followup') && individualQueries.length > 0) {
return {
expected_individual_queries: individualQueries,
expected_individual_queries_exact: true
};
}
if ((type === 'shop_query' || type === 'followup') && query) {
return {
expected_query: query
};
}
if ((type === 'retrieval' || type === 'answer_guard') && documentIds.length > 0) {
return {
min_results: 1,
must_include_one_of_document_ids: [documentIds[0]]
};
}
if (type === 'answer_guard') {
return {
max_results: 0
};
}
return {
min_results: 1
};
}
function normalizeHistoryForForm(historyRows) {
return historyRows
.map(function (turn) {
return {
prompt: (turn.prompt || 'Eval-Kontext').trim(),
answer: (turn.answer || turn.response || turn.answer_preview || '').trim()
};
})
.filter(function (turn) {
return turn.prompt !== '' || turn.answer !== '';
});
}
function fillCreatorFormFromResult(button) {
if (!creator) {
return;
}
const type = button.dataset.resultType || 'retrieval';
const prompt = button.dataset.resultPrompt || '';
const history = normalizeHistoryForForm(parseJsonData(button.dataset.resultHistory, []));
const query = button.dataset.resultQuery || '';
const individualQueries = parseJsonData(button.dataset.resultIndividualQueries, []);
const documentIds = parseJsonData(button.dataset.resultDocumentIds, []);
const now = new Date();
const suffix = String(now.getFullYear()).slice(2)
+ String(now.getMonth() + 1).padStart(2, '0')
+ String(now.getDate()).padStart(2, '0')
+ '_'
+ String(now.getHours()).padStart(2, '0')
+ String(now.getMinutes()).padStart(2, '0')
+ String(now.getSeconds()).padStart(2, '0');
const typeField = creator.querySelector('.js-admin-eval-create-type');
const idField = creator.querySelector('.js-admin-eval-create-id');
const promptField = creator.querySelector('.js-admin-eval-create-prompt');
const assertField = creator.querySelector('.js-admin-eval-create-assert');
const historyField = creator.querySelector('.js-admin-eval-create-history');
const contextField = creator.querySelector('.js-admin-eval-create-context');
if (typeField) {
typeField.value = type;
}
if (idField) {
idField.value = type + '_' + slugifyPrompt(prompt) + '_' + suffix;
}
if (promptField) {
promptField.value = prompt;
}
if (assertField) {
assertField.value = JSON.stringify(
buildAssertTemplate(type, query, individualQueries, documentIds),
null,
2
);
}
if (historyField) {
historyField.value = history.length > 0 ? JSON.stringify(history, null, 2) : '';
}
if (contextField) {
contextField.value = '';
}
creator.scrollIntoView({behavior: 'smooth', block: 'start'});
}
if (creator) {
creator.querySelectorAll('.js-admin-eval-create-clear').forEach(function (button) {
button.addEventListener('click', function () {
const idField = creator.querySelector('.js-admin-eval-create-id');
const promptField = creator.querySelector('.js-admin-eval-create-prompt');
const assertField = creator.querySelector('.js-admin-eval-create-assert');
const historyField = creator.querySelector('.js-admin-eval-create-history');
const contextField = creator.querySelector('.js-admin-eval-create-context');
if (idField) {
idField.value = '';
}
if (promptField) {
promptField.value = '';
}
if (assertField) {
assertField.value = '{\n "min_results": 1\n}';
}
if (historyField) {
historyField.value = '';
}
if (contextField) {
contextField.value = '';
}
});
});
}
document.querySelectorAll('.js-admin-eval-prefill-case').forEach(function (button) {
button.addEventListener('click', function () {
fillCreatorFormFromResult(button);
});
});
forms.forEach(function (form) {
syncCaseSelect(form);