807 lines
41 KiB
Twig
807 lines
41 KiB
Twig
{% extends 'admin/base.html.twig' %}
|
|
|
|
{% block title %}RetrieX Eval Suite{% endblock %}
|
|
|
|
{% block body %}
|
|
|
|
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
|
|
<div>
|
|
<h1 class="h3 mb-1">
|
|
<i class="bi bi-clipboard2-check"></i> RetrieX Eval Suite
|
|
</h1>
|
|
<div class="small text-secondary">
|
|
Regressionen für Retrieval, Shopquery, Follow-up und Answer-Guard direkt im Admin prüfen.
|
|
</div>
|
|
</div>
|
|
|
|
<a href="{{ path('admin_model_config_list') }}"
|
|
class="btn btn-sm btn-outline-secondary">
|
|
Zurück zum KI-/LLM-Setup
|
|
</a>
|
|
</div>
|
|
|
|
{% for label in ['success', 'danger', 'warning', 'info'] %}
|
|
{% for message in app.flashes(label) %}
|
|
<div class="alert alert-{{ label }} shadow-sm">
|
|
{{ message }}
|
|
</div>
|
|
{% endfor %}
|
|
{% endfor %}
|
|
|
|
|
|
|
|
<div id="adminEvalRunOverlay"
|
|
class="position-fixed top-0 start-0 w-100 h-100 d-none"
|
|
style="background: rgba(0, 0, 0, .72); z-index: 1080;">
|
|
<div class="h-100 d-flex align-items-center justify-content-center px-3">
|
|
<div class="card bg-black border-warning text-light shadow-lg" style="max-width: 520px; width: 100%;">
|
|
<div class="card-body text-center py-5">
|
|
<div class="spinner-border text-warning mb-3" role="status" aria-hidden="true"></div>
|
|
<h5 class="text-warning mb-2" id="adminEvalRunOverlayLabel">Eval läuft ...</h5>
|
|
<div class="small text-secondary">
|
|
Die Regressionstests werden ausgeführt. Bitte die Seite nicht neu laden.
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="row g-4 mb-4">
|
|
{% for item in overview %}
|
|
{% set report = item.report %}
|
|
{% set status = item.status %}
|
|
{% set badgeClass = status == 'green'
|
|
? 'bg-success'
|
|
: (status == 'red' ? 'bg-danger' : 'bg-secondary')
|
|
%}
|
|
<div class="col-md-6 col-xl-3">
|
|
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
|
|
<div class="card-body">
|
|
<div class="d-flex justify-content-between align-items-start gap-2 mb-2">
|
|
<h5 class="text-info mb-0">{{ item.label }}</h5>
|
|
<span class="badge {{ badgeClass }}">
|
|
{% if status == 'green' %}
|
|
grün
|
|
{% elseif status == 'red' %}
|
|
rot
|
|
{% elseif status == 'empty' %}
|
|
leer
|
|
{% else %}
|
|
nicht gelaufen
|
|
{% endif %}
|
|
</span>
|
|
</div>
|
|
|
|
<div class="small text-secondary mb-3">
|
|
{{ item.case_count }} Cases
|
|
</div>
|
|
|
|
{% if report %}
|
|
<div class="small">
|
|
<div><strong>Total:</strong> {{ report.total|default(0) }}</div>
|
|
<div><strong>Passed:</strong> {{ report.passed|default(0) }}</div>
|
|
<div><strong>Failed:</strong> {{ report.failed|default(0) }}</div>
|
|
<div class="text-secondary mt-2">
|
|
{{ report.generated_at|default('') }}
|
|
</div>
|
|
</div>
|
|
{% else %}
|
|
<div class="small text-secondary">
|
|
Für diesen Typ liegt noch kein Admin-Report vor.
|
|
</div>
|
|
{% endif %}
|
|
|
|
<div class="d-flex flex-wrap gap-2 mt-3">
|
|
<form method="post"
|
|
action="{{ path('admin_evals_run') }}"
|
|
class="d-inline js-admin-eval-run-form"
|
|
data-eval-type-label="{{ item.label|e('html_attr') }}">
|
|
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_run') }}">
|
|
<input type="hidden" name="type" value="{{ item.type }}">
|
|
<button type="submit" class="btn btn-sm btn-outline-warning js-admin-eval-run-button">
|
|
<span class="js-admin-eval-button-label">Run</span>
|
|
<span class="spinner-border spinner-border-sm ms-2 d-none js-admin-eval-button-spinner"
|
|
role="status"
|
|
aria-hidden="true"></span>
|
|
</button>
|
|
</form>
|
|
|
|
<a class="btn btn-sm btn-outline-info"
|
|
href="{{ path('admin_evals_index', {type: item.type}) }}">
|
|
Details
|
|
</a>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
{% endfor %}
|
|
</div>
|
|
|
|
<div class="row g-4 mb-4">
|
|
<div class="col-xl-5">
|
|
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
|
|
<div class="card-body">
|
|
<h5 class="text-warning mb-3">
|
|
<i class="bi bi-play-circle"></i> Eval ausführen
|
|
</h5>
|
|
|
|
<form method="post"
|
|
action="{{ path('admin_evals_run') }}"
|
|
class="js-admin-eval-run-form"
|
|
data-eval-type-label="Ausgewählter Eval">
|
|
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_run') }}">
|
|
|
|
<div class="mb-3">
|
|
<label class="form-label">Eval-Typ</label>
|
|
<select name="type" class="form-select bg-dark text-light border-secondary js-admin-eval-type-select">
|
|
{% for type, label in types %}
|
|
<option value="{{ type }}" {% if type == selected_type %}selected{% endif %}>
|
|
{{ label }}
|
|
</option>
|
|
{% endfor %}
|
|
</select>
|
|
<div class="form-text text-secondary">
|
|
Ohne Case-ID wird der komplette Typ ausgeführt.
|
|
</div>
|
|
</div>
|
|
|
|
<div class="mb-3">
|
|
<label class="form-label">Optional: Case</label>
|
|
<select name="case_id"
|
|
class="form-select bg-dark text-light border-secondary js-admin-eval-case-select">
|
|
<option value="">Alle Cases des ausgewählten Typs</option>
|
|
{% for type, cases in cases_by_type %}
|
|
{% for case in cases %}
|
|
<option value="{{ case.id }}"
|
|
data-eval-type="{{ type }}"
|
|
{% if type != selected_type %}hidden disabled{% endif %}>
|
|
{{ case.id }} — {{ case.prompt }}
|
|
</option>
|
|
{% endfor %}
|
|
{% endfor %}
|
|
</select>
|
|
<div class="form-text text-secondary">
|
|
Die Case-Liste wird passend zum Eval-Typ gefiltert. Leer lassen, um alle Cases des Typs auszuführen.
|
|
</div>
|
|
</div>
|
|
|
|
<button type="submit" class="btn btn-outline-warning js-admin-eval-run-button">
|
|
<span class="js-admin-eval-button-label">Eval starten</span>
|
|
<span class="spinner-border spinner-border-sm ms-2 d-none js-admin-eval-button-spinner"
|
|
role="status"
|
|
aria-hidden="true"></span>
|
|
</button>
|
|
</form>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="col-xl-7">
|
|
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
|
|
<div class="card-body">
|
|
<h5 class="text-info mb-3">
|
|
<i class="bi bi-terminal"></i> CLI-Referenz
|
|
</h5>
|
|
|
|
<p class="small text-secondary mb-3">
|
|
Die Admin-Runs schreiben typspezifische Reports nach
|
|
<code>tests/evals/reports/<type>-last-run.json</code>
|
|
und zusätzlich den bekannten <code>last-run.json</code>.
|
|
</p>
|
|
|
|
<div class="small">
|
|
{% for type, label in types %}
|
|
<div class="mb-2">
|
|
<span class="text-info">{{ label }}</span><br>
|
|
<code>php bin/console mto:agent:eval:run {{ type }}</code>
|
|
</div>
|
|
{% endfor %}
|
|
</div>
|
|
|
|
{% if last_report %}
|
|
<hr class="border-secondary">
|
|
<div class="small text-secondary">
|
|
Letzter generischer Report:
|
|
<span class="text-light">{{ last_report.type|default('unknown') }}</span>,
|
|
{{ last_report.passed|default(0) }}/{{ last_report.total|default(0) }} bestanden,
|
|
{{ last_report.generated_at|default('') }}
|
|
</div>
|
|
{% endif %}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div class="card bg-black border-secondary text-light shadow-sm mb-4" id="adminEvalCaseCreator">
|
|
<div class="card-body">
|
|
<div class="d-flex justify-content-between align-items-start flex-wrap gap-2 mb-3">
|
|
<div>
|
|
<h5 class="text-warning mb-1">
|
|
<i class="bi bi-plus-square"></i> Eval-Case erstellen
|
|
</h5>
|
|
<div class="small text-secondary">
|
|
Speichert neue Regression-Cases direkt in <code>tests/evals/cases/<type>.ndjson</code>.
|
|
Aus Report-Ergebnissen kannst du Prompt, History, Query oder Dokument-IDs als Vorlage übernehmen.
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<form method="post" action="{{ path('admin_evals_case_create') }}" class="row g-3">
|
|
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_case_create') }}">
|
|
|
|
<div class="col-md-4">
|
|
<label class="form-label">Eval-Typ</label>
|
|
<select name="type" class="form-select bg-dark text-light border-secondary js-admin-eval-create-type">
|
|
{% for type, label in types %}
|
|
<option value="{{ type }}" {% if type == selected_type %}selected{% endif %}>{{ label }}</option>
|
|
{% endfor %}
|
|
</select>
|
|
</div>
|
|
|
|
<div class="col-md-8">
|
|
<label class="form-label">Neue Case-ID</label>
|
|
<input type="text"
|
|
name="id"
|
|
class="form-control bg-dark text-light border-secondary js-admin-eval-create-id"
|
|
placeholder="z. B. retrieval_semantic_new_001"
|
|
autocomplete="off"
|
|
required>
|
|
<div class="form-text text-secondary">
|
|
Erlaubt: Buchstaben, Zahlen, Unterstrich, Bindestrich. IDs müssen eindeutig sein.
|
|
</div>
|
|
</div>
|
|
|
|
<div class="col-12">
|
|
<label class="form-label">Prompt</label>
|
|
<textarea name="prompt"
|
|
rows="2"
|
|
class="form-control bg-dark text-light border-secondary js-admin-eval-create-prompt"
|
|
placeholder="Testprompt, der abgesichert werden soll"
|
|
required></textarea>
|
|
</div>
|
|
|
|
<div class="col-lg-6">
|
|
<label class="form-label">Assert-JSON</label>
|
|
<textarea name="assert_json"
|
|
rows="8"
|
|
class="form-control bg-dark text-light border-secondary font-monospace small js-admin-eval-create-assert"
|
|
spellcheck="false">{
|
|
"min_results": 1
|
|
}</textarea>
|
|
<div class="form-text text-secondary">
|
|
Beispiel: <code>expected_query</code>, <code>must_include_one_of_document_ids</code>, <code>must_not_include_terms</code>.
|
|
</div>
|
|
</div>
|
|
|
|
<div class="col-lg-6">
|
|
<label class="form-label">Optional: History-JSON</label>
|
|
<textarea name="history_json"
|
|
rows="8"
|
|
class="form-control bg-dark text-light border-secondary font-monospace small js-admin-eval-create-history"
|
|
spellcheck="false"
|
|
placeholder='[{"prompt":"...","answer":"..."}]'></textarea>
|
|
<div class="form-text text-secondary">
|
|
Für Follow-up-Cases: Liste vorheriger Chat-Turns mit <code>prompt</code> und <code>answer</code>.
|
|
</div>
|
|
</div>
|
|
|
|
<div class="col-12">
|
|
<label class="form-label">Optional: Request Context Hint</label>
|
|
<textarea name="request_context_hint"
|
|
rows="2"
|
|
class="form-control bg-dark text-light border-secondary js-admin-eval-create-context"
|
|
placeholder="Nur nutzen, wenn ein Case explizit Zusatzkontext braucht."></textarea>
|
|
</div>
|
|
|
|
<div class="col-12 d-flex gap-2 flex-wrap">
|
|
<button type="submit" class="btn btn-outline-warning">
|
|
<i class="bi bi-save"></i> Case speichern
|
|
</button>
|
|
<button type="button" class="btn btn-outline-secondary js-admin-eval-create-clear">
|
|
Formular leeren
|
|
</button>
|
|
</div>
|
|
</form>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="card bg-black border-secondary text-light shadow-sm">
|
|
<div class="card-body">
|
|
<div class="d-flex justify-content-between align-items-center flex-wrap gap-2 mb-3">
|
|
<h5 class="text-warning mb-0">
|
|
<i class="bi bi-list-check"></i>
|
|
Report-Details: {{ types[selected_type]|default(selected_type) }}
|
|
</h5>
|
|
|
|
<div class="btn-group btn-group-sm" role="group" aria-label="Eval report types">
|
|
{% for type, label in types %}
|
|
<a class="btn {{ type == selected_type ? 'btn-info' : 'btn-outline-info' }}"
|
|
href="{{ path('admin_evals_index', {type: type}) }}">
|
|
{{ label }}
|
|
</a>
|
|
{% endfor %}
|
|
</div>
|
|
</div>
|
|
|
|
{% if selected_report %}
|
|
{% set selectedFailed = selected_report.failed|default(0) %}
|
|
<div class="row g-3 mb-3 small">
|
|
<div class="col-md-3">
|
|
<div class="border border-secondary rounded p-3 h-100">
|
|
<div class="text-secondary">Total</div>
|
|
<div class="h5 mb-0">{{ selected_report.total|default(0) }}</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-3">
|
|
<div class="border border-secondary rounded p-3 h-100">
|
|
<div class="text-secondary">Passed</div>
|
|
<div class="h5 text-success mb-0">{{ selected_report.passed|default(0) }}</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-3">
|
|
<div class="border border-secondary rounded p-3 h-100">
|
|
<div class="text-secondary">Failed</div>
|
|
<div class="h5 {{ selectedFailed == 0 ? 'text-success' : 'text-danger' }} mb-0">
|
|
{{ selectedFailed }}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-3">
|
|
<div class="border border-secondary rounded p-3 h-100">
|
|
<div class="text-secondary">Generated</div>
|
|
<div class="small text-light">{{ selected_report.generated_at|default('') }}</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="table-responsive">
|
|
<table class="table table-dark table-striped table-hover align-middle mb-0">
|
|
<thead class="table-secondary text-dark">
|
|
<tr>
|
|
<th>Status</th>
|
|
<th>Case</th>
|
|
<th>Dauer</th>
|
|
<th>Failures / Details</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
{% for result in selected_report.results|default([]) %}
|
|
<tr>
|
|
<td style="width: 110px;">
|
|
{% if result.passed|default(false) %}
|
|
<span class="badge bg-success">PASS</span>
|
|
{% else %}
|
|
<span class="badge bg-danger">FAIL</span>
|
|
{% endif %}
|
|
</td>
|
|
<td style="min-width: 260px;">
|
|
<code>{{ result.case_id|default('') }}</code>
|
|
<div class="small text-secondary mb-2">{{ result.type|default('') }}</div>
|
|
|
|
{% set casePrompt = result.prompt|default(result.details.prompt|default('')) %}
|
|
{% if casePrompt %}
|
|
<div class="small mb-2">
|
|
<span class="text-secondary">Prompt:</span><br>
|
|
<span class="text-light">{{ casePrompt }}</span>
|
|
</div>
|
|
{% endif %}
|
|
|
|
{% set historyRows = result.details.history|default([]) %}
|
|
{% if historyRows is not empty %}
|
|
<details class="small">
|
|
<summary class="text-info" style="cursor:pointer;">
|
|
Kontext / History anzeigen
|
|
</summary>
|
|
<div class="mt-2 ps-2 border-start border-secondary">
|
|
{% for turn in historyRows %}
|
|
<div class="mb-2">
|
|
<div class="text-secondary">Vorheriger Prompt:</div>
|
|
<div class="text-light">{{ turn.prompt|default('') }}</div>
|
|
{% if turn.answer_preview|default('') %}
|
|
<div class="text-secondary mt-1">Antwort-Auszug:</div>
|
|
<div class="text-secondary">{{ turn.answer_preview }}</div>
|
|
{% endif %}
|
|
</div>
|
|
{% endfor %}
|
|
</div>
|
|
</details>
|
|
{% endif %}
|
|
|
|
<button type="button"
|
|
class="btn btn-sm btn-outline-warning mt-2 js-admin-eval-prefill-case"
|
|
data-result-type="{{ result.type|default(selected_type)|e('html_attr') }}"
|
|
data-result-prompt="{{ casePrompt|default('')|e('html_attr') }}"
|
|
data-result-history="{{ historyRows|default([])|json_encode|e('html_attr') }}"
|
|
data-result-query="{{ result.details.query|default('')|e('html_attr') }}"
|
|
data-result-individual-queries="{{ result.details.individual_queries|default([])|json_encode|e('html_attr') }}"
|
|
data-result-document-ids="{{ result.details.document_ids|default([])|json_encode|e('html_attr') }}">
|
|
Als neuen Case vorbereiten
|
|
</button>
|
|
</td>
|
|
<td style="width: 120px;">
|
|
{{ result.duration_ms|default(0) }} ms
|
|
</td>
|
|
<td>
|
|
{% if result.failures|default([]) is not empty %}
|
|
<ul class="mb-2 small text-danger">
|
|
{% for failure in result.failures %}
|
|
<li>{{ failure }}</li>
|
|
{% endfor %}
|
|
</ul>
|
|
{% else %}
|
|
<div class="small text-success mb-2">Keine Fehler.</div>
|
|
{% endif %}
|
|
|
|
{% set documentRefs = result.details.document_refs|default([]) %}
|
|
{% if documentRefs is not empty %}
|
|
<div class="mb-2">
|
|
<div class="small text-secondary mb-1">Gefundene Dokumente</div>
|
|
<div class="table-responsive">
|
|
<table class="table table-dark table-sm table-bordered border-secondary align-middle mb-2">
|
|
<thead>
|
|
<tr class="small text-secondary">
|
|
<th style="width: 90px;">Ranks</th>
|
|
<th>Titel / Datei</th>
|
|
<th style="width: 170px;">Doc-ID</th>
|
|
<th style="width: 220px;">Chunks</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
{% for doc in documentRefs %}
|
|
<tr>
|
|
<td class="small">{{ doc.ranks|default([])|join(', ') }}</td>
|
|
<td>
|
|
<div class="fw-semibold">{{ doc.title|default('Ohne Titel') }}</div>
|
|
{% if doc.file_path|default('') %}
|
|
<div class="small text-secondary" style="word-break: break-all;">
|
|
{{ doc.file_path }}
|
|
</div>
|
|
{% endif %}
|
|
{% if doc.version_number|default('') %}
|
|
<div class="small text-secondary">Version: {{ doc.version_number }}</div>
|
|
{% endif %}
|
|
</td>
|
|
<td><code class="small">{{ doc.id|default('') }}</code></td>
|
|
<td class="small" style="word-break: break-all;">
|
|
{% for chunkId in doc.chunk_ids|default([]) %}
|
|
<code>{{ chunkId }}</code>{% if not loop.last %}<br>{% endif %}
|
|
{% endfor %}
|
|
</td>
|
|
</tr>
|
|
{% endfor %}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
{% endif %}
|
|
|
|
{% set resultRows = result.details.result_rows|default([]) %}
|
|
{% if resultRows is not empty %}
|
|
<details class="mb-2">
|
|
<summary class="small text-info" style="cursor:pointer;">
|
|
Treffer / Chunks anzeigen
|
|
</summary>
|
|
<div class="table-responsive mt-2">
|
|
<table class="table table-dark table-sm table-bordered border-secondary align-middle mb-0">
|
|
<thead>
|
|
<tr class="small text-secondary">
|
|
<th style="width: 60px;">Rank</th>
|
|
<th>Titel / Datei</th>
|
|
<th style="width: 180px;">Chunk</th>
|
|
<th>Preview</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
{% for row in resultRows %}
|
|
<tr>
|
|
<td>{{ row.rank|default('') }}</td>
|
|
<td>
|
|
<div class="fw-semibold">{{ row.document_title|default('Ohne Titel') }}</div>
|
|
{% if row.file_path|default('') %}
|
|
<div class="small text-secondary" style="word-break: break-all;">{{ row.file_path }}</div>
|
|
{% endif %}
|
|
<div class="small text-secondary">Doc-ID: <code>{{ row.document_id|default('') }}</code></div>
|
|
</td>
|
|
<td class="small" style="word-break: break-all;">
|
|
<code>{{ row.chunk_id|default('') }}</code>
|
|
{% if row.chunk_index is defined and row.chunk_index is not same as(null) %}
|
|
<div class="text-secondary">Index: {{ row.chunk_index }}</div>
|
|
{% endif %}
|
|
</td>
|
|
<td class="small text-secondary">{{ row.text_preview|default('') }}</td>
|
|
</tr>
|
|
{% endfor %}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</details>
|
|
{% endif %}
|
|
|
|
<details>
|
|
<summary class="small text-info" style="cursor:pointer;">
|
|
JSON-Details anzeigen
|
|
</summary>
|
|
<pre class="bg-dark border border-secondary rounded p-2 mt-2 small text-light" style="white-space: pre-wrap; max-height: 260px; overflow: auto;">{{ result.details|default({})|json_encode(constant('JSON_PRETTY_PRINT')) }}</pre>
|
|
</details>
|
|
</td>
|
|
</tr>
|
|
{% else %}
|
|
<tr>
|
|
<td colspan="4" class="text-center text-secondary py-4">
|
|
Dieser Report enthält keine Resultate.
|
|
</td>
|
|
</tr>
|
|
{% endfor %}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
{% else %}
|
|
<div class="alert alert-secondary mb-0">
|
|
Für {{ types[selected_type]|default(selected_type) }} liegt noch kein typspezifischer Admin-Report vor.
|
|
Starte den Eval oben oder per CLI.
|
|
</div>
|
|
{% endif %}
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<script>
|
|
document.addEventListener('DOMContentLoaded', function () {
|
|
const forms = Array.from(document.querySelectorAll('.js-admin-eval-run-form'));
|
|
const overlay = document.getElementById('adminEvalRunOverlay');
|
|
const overlayLabel = document.getElementById('adminEvalRunOverlayLabel');
|
|
|
|
function resolveEvalLabel(form) {
|
|
const select = form.querySelector('.js-admin-eval-type-select');
|
|
if (select && select.selectedOptions.length > 0) {
|
|
return select.selectedOptions[0].textContent.trim();
|
|
}
|
|
|
|
return (form.dataset.evalTypeLabel || 'Eval').trim();
|
|
}
|
|
|
|
function syncCaseSelect(form) {
|
|
const typeSelect = form.querySelector('.js-admin-eval-type-select');
|
|
const caseSelect = form.querySelector('.js-admin-eval-case-select');
|
|
|
|
if (!typeSelect || !caseSelect) {
|
|
return;
|
|
}
|
|
|
|
const selectedType = typeSelect.value;
|
|
|
|
Array.from(caseSelect.options).forEach(function (option) {
|
|
if (option.value === '') {
|
|
option.hidden = false;
|
|
option.disabled = false;
|
|
return;
|
|
}
|
|
|
|
const matchesType = option.dataset.evalType === selectedType;
|
|
option.hidden = !matchesType;
|
|
option.disabled = !matchesType;
|
|
|
|
if (!matchesType && option.selected) {
|
|
caseSelect.value = '';
|
|
}
|
|
});
|
|
}
|
|
|
|
function setAllRunButtonsDisabled() {
|
|
document.querySelectorAll('.js-admin-eval-run-button').forEach(function (button) {
|
|
button.disabled = true;
|
|
button.classList.add('disabled');
|
|
});
|
|
}
|
|
|
|
const creator = document.getElementById('adminEvalCaseCreator');
|
|
|
|
function parseJsonData(value, fallback) {
|
|
if (!value) {
|
|
return fallback;
|
|
}
|
|
|
|
try {
|
|
return JSON.parse(value);
|
|
} catch (error) {
|
|
return fallback;
|
|
}
|
|
}
|
|
|
|
function slugifyPrompt(prompt) {
|
|
const normalized = (prompt || '')
|
|
.toLowerCase()
|
|
.normalize('NFD')
|
|
.replace(/[\u0300-\u036f]/g, '')
|
|
.replace(/ä/g, 'ae')
|
|
.replace(/ö/g, 'oe')
|
|
.replace(/ü/g, 'ue')
|
|
.replace(/ß/g, 'ss')
|
|
.replace(/[^a-z0-9]+/g, '_')
|
|
.replace(/^_+|_+$/g, '')
|
|
.slice(0, 44);
|
|
|
|
return normalized || 'case';
|
|
}
|
|
|
|
function buildAssertTemplate(type, query, individualQueries, documentIds) {
|
|
if ((type === 'shop_query' || type === 'followup') && individualQueries.length > 0) {
|
|
return {
|
|
expected_individual_queries: individualQueries,
|
|
expected_individual_queries_exact: true
|
|
};
|
|
}
|
|
|
|
if ((type === 'shop_query' || type === 'followup') && query) {
|
|
return {
|
|
expected_query: query
|
|
};
|
|
}
|
|
|
|
if ((type === 'retrieval' || type === 'answer_guard') && documentIds.length > 0) {
|
|
return {
|
|
min_results: 1,
|
|
must_include_one_of_document_ids: [documentIds[0]]
|
|
};
|
|
}
|
|
|
|
if (type === 'answer_guard') {
|
|
return {
|
|
max_results: 0
|
|
};
|
|
}
|
|
|
|
return {
|
|
min_results: 1
|
|
};
|
|
}
|
|
|
|
function normalizeHistoryForForm(historyRows) {
|
|
return historyRows
|
|
.map(function (turn) {
|
|
return {
|
|
prompt: (turn.prompt || 'Eval-Kontext').trim(),
|
|
answer: (turn.answer || turn.response || turn.answer_preview || '').trim()
|
|
};
|
|
})
|
|
.filter(function (turn) {
|
|
return turn.prompt !== '' || turn.answer !== '';
|
|
});
|
|
}
|
|
|
|
function fillCreatorFormFromResult(button) {
|
|
if (!creator) {
|
|
return;
|
|
}
|
|
|
|
const type = button.dataset.resultType || 'retrieval';
|
|
const prompt = button.dataset.resultPrompt || '';
|
|
const history = normalizeHistoryForForm(parseJsonData(button.dataset.resultHistory, []));
|
|
const query = button.dataset.resultQuery || '';
|
|
const individualQueries = parseJsonData(button.dataset.resultIndividualQueries, []);
|
|
const documentIds = parseJsonData(button.dataset.resultDocumentIds, []);
|
|
const now = new Date();
|
|
const suffix = String(now.getFullYear()).slice(2)
|
|
+ String(now.getMonth() + 1).padStart(2, '0')
|
|
+ String(now.getDate()).padStart(2, '0')
|
|
+ '_'
|
|
+ String(now.getHours()).padStart(2, '0')
|
|
+ String(now.getMinutes()).padStart(2, '0')
|
|
+ String(now.getSeconds()).padStart(2, '0');
|
|
|
|
const typeField = creator.querySelector('.js-admin-eval-create-type');
|
|
const idField = creator.querySelector('.js-admin-eval-create-id');
|
|
const promptField = creator.querySelector('.js-admin-eval-create-prompt');
|
|
const assertField = creator.querySelector('.js-admin-eval-create-assert');
|
|
const historyField = creator.querySelector('.js-admin-eval-create-history');
|
|
const contextField = creator.querySelector('.js-admin-eval-create-context');
|
|
|
|
if (typeField) {
|
|
typeField.value = type;
|
|
}
|
|
|
|
if (idField) {
|
|
idField.value = type + '_' + slugifyPrompt(prompt) + '_' + suffix;
|
|
}
|
|
|
|
if (promptField) {
|
|
promptField.value = prompt;
|
|
}
|
|
|
|
if (assertField) {
|
|
assertField.value = JSON.stringify(
|
|
buildAssertTemplate(type, query, individualQueries, documentIds),
|
|
null,
|
|
2
|
|
);
|
|
}
|
|
|
|
if (historyField) {
|
|
historyField.value = history.length > 0 ? JSON.stringify(history, null, 2) : '';
|
|
}
|
|
|
|
if (contextField) {
|
|
contextField.value = '';
|
|
}
|
|
|
|
creator.scrollIntoView({behavior: 'smooth', block: 'start'});
|
|
}
|
|
|
|
if (creator) {
|
|
creator.querySelectorAll('.js-admin-eval-create-clear').forEach(function (button) {
|
|
button.addEventListener('click', function () {
|
|
const idField = creator.querySelector('.js-admin-eval-create-id');
|
|
const promptField = creator.querySelector('.js-admin-eval-create-prompt');
|
|
const assertField = creator.querySelector('.js-admin-eval-create-assert');
|
|
const historyField = creator.querySelector('.js-admin-eval-create-history');
|
|
const contextField = creator.querySelector('.js-admin-eval-create-context');
|
|
|
|
if (idField) {
|
|
idField.value = '';
|
|
}
|
|
if (promptField) {
|
|
promptField.value = '';
|
|
}
|
|
if (assertField) {
|
|
assertField.value = '{\n "min_results": 1\n}';
|
|
}
|
|
if (historyField) {
|
|
historyField.value = '';
|
|
}
|
|
if (contextField) {
|
|
contextField.value = '';
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
document.querySelectorAll('.js-admin-eval-prefill-case').forEach(function (button) {
|
|
button.addEventListener('click', function () {
|
|
fillCreatorFormFromResult(button);
|
|
});
|
|
});
|
|
|
|
forms.forEach(function (form) {
|
|
syncCaseSelect(form);
|
|
|
|
const typeSelect = form.querySelector('.js-admin-eval-type-select');
|
|
if (typeSelect) {
|
|
typeSelect.addEventListener('change', function () {
|
|
syncCaseSelect(form);
|
|
});
|
|
}
|
|
|
|
form.addEventListener('submit', function (event) {
|
|
const button = event.submitter && event.submitter.classList.contains('js-admin-eval-run-button')
|
|
? event.submitter
|
|
: form.querySelector('.js-admin-eval-run-button');
|
|
const label = resolveEvalLabel(form);
|
|
|
|
if (overlay && overlayLabel) {
|
|
overlayLabel.textContent = label + ' läuft ...';
|
|
overlay.classList.remove('d-none');
|
|
}
|
|
|
|
if (button) {
|
|
const buttonLabel = button.querySelector('.js-admin-eval-button-label');
|
|
const spinner = button.querySelector('.js-admin-eval-button-spinner');
|
|
|
|
if (buttonLabel) {
|
|
buttonLabel.textContent = 'Läuft ...';
|
|
}
|
|
|
|
if (spinner) {
|
|
spinner.classList.remove('d-none');
|
|
}
|
|
}
|
|
|
|
setAllRunButtonsDisabled();
|
|
document.body.style.cursor = 'progress';
|
|
});
|
|
});
|
|
});
|
|
</script>
|
|
|
|
{% endblock %}
|