Files
MtoRagSystem/templates/admin/evals/index.html.twig
team 1 0d55c0a439 p100
2026-05-12 08:57:57 +02:00

380 lines
18 KiB
Twig

{% extends 'admin/base.html.twig' %}
{% block title %}RetrieX Eval Suite{% endblock %}
{% block body %}
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<div>
<h1 class="h3 mb-1">
<i class="bi bi-clipboard2-check"></i> RetrieX Eval Suite
</h1>
<div class="small text-secondary">
Regressionen für Retrieval, Shopquery, Follow-up und Answer-Guard direkt im Admin prüfen.
</div>
</div>
<a href="{{ path('admin_model_config_list') }}"
class="btn btn-sm btn-outline-secondary">
Zurück zum KI-/LLM-Setup
</a>
</div>
{% for label in ['success', 'danger', 'warning', 'info'] %}
{% for message in app.flashes(label) %}
<div class="alert alert-{{ label }} shadow-sm">
{{ message }}
</div>
{% endfor %}
{% endfor %}
<div id="adminEvalRunOverlay"
class="position-fixed top-0 start-0 w-100 h-100 d-none"
style="background: rgba(0, 0, 0, .72); z-index: 1080;">
<div class="h-100 d-flex align-items-center justify-content-center px-3">
<div class="card bg-black border-warning text-light shadow-lg" style="max-width: 520px; width: 100%;">
<div class="card-body text-center py-5">
<div class="spinner-border text-warning mb-3" role="status" aria-hidden="true"></div>
<h5 class="text-warning mb-2" id="adminEvalRunOverlayLabel">Eval läuft ...</h5>
<div class="small text-secondary">
Die Regressionstests werden ausgeführt. Bitte die Seite nicht neu laden.
</div>
</div>
</div>
</div>
</div>
<div class="row g-4 mb-4">
{% for item in overview %}
{% set report = item.report %}
{% set status = item.status %}
{% set badgeClass = status == 'green'
? 'bg-success'
: (status == 'red' ? 'bg-danger' : 'bg-secondary')
%}
<div class="col-md-6 col-xl-3">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<div class="d-flex justify-content-between align-items-start gap-2 mb-2">
<h5 class="text-info mb-0">{{ item.label }}</h5>
<span class="badge {{ badgeClass }}">
{% if status == 'green' %}
grün
{% elseif status == 'red' %}
rot
{% elseif status == 'empty' %}
leer
{% else %}
nicht gelaufen
{% endif %}
</span>
</div>
<div class="small text-secondary mb-3">
{{ item.case_count }} Cases
</div>
{% if report %}
<div class="small">
<div><strong>Total:</strong> {{ report.total|default(0) }}</div>
<div><strong>Passed:</strong> {{ report.passed|default(0) }}</div>
<div><strong>Failed:</strong> {{ report.failed|default(0) }}</div>
<div class="text-secondary mt-2">
{{ report.generated_at|default('') }}
</div>
</div>
{% else %}
<div class="small text-secondary">
Für diesen Typ liegt noch kein Admin-Report vor.
</div>
{% endif %}
<div class="d-flex flex-wrap gap-2 mt-3">
<form method="post"
action="{{ path('admin_evals_run') }}"
class="d-inline js-admin-eval-run-form"
data-eval-type-label="{{ item.label|e('html_attr') }}">
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_run') }}">
<input type="hidden" name="type" value="{{ item.type }}">
<button type="submit" class="btn btn-sm btn-outline-warning js-admin-eval-run-button">
<span class="js-admin-eval-button-label">Run</span>
<span class="spinner-border spinner-border-sm ms-2 d-none js-admin-eval-button-spinner"
role="status"
aria-hidden="true"></span>
</button>
</form>
<a class="btn btn-sm btn-outline-info"
href="{{ path('admin_evals_index', {type: item.type}) }}">
Details
</a>
</div>
</div>
</div>
</div>
{% endfor %}
</div>
<div class="row g-4 mb-4">
<div class="col-xl-5">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<h5 class="text-warning mb-3">
<i class="bi bi-play-circle"></i> Eval ausführen
</h5>
<form method="post"
action="{{ path('admin_evals_run') }}"
class="js-admin-eval-run-form"
data-eval-type-label="Ausgewählter Eval">
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_run') }}">
<div class="mb-3">
<label class="form-label">Eval-Typ</label>
<select name="type" class="form-select bg-dark text-light border-secondary js-admin-eval-type-select">
{% for type, label in types %}
<option value="{{ type }}" {% if type == selected_type %}selected{% endif %}>
{{ label }}
</option>
{% endfor %}
</select>
<div class="form-text text-secondary">
Ohne Case-ID wird der komplette Typ ausgeführt.
</div>
</div>
<div class="mb-3">
<label class="form-label">Optional: Case-ID</label>
<input name="case_id"
list="evalCaseIds"
class="form-control bg-dark text-light border-secondary"
placeholder="z. B. followup_indicator_price_001">
<datalist id="evalCaseIds">
{% for type, cases in cases_by_type %}
{% for case in cases %}
<option value="{{ case.id }}">{{ type }} - {{ case.prompt }}</option>
{% endfor %}
{% endfor %}
</datalist>
</div>
<button type="submit" class="btn btn-outline-warning js-admin-eval-run-button">
<span class="js-admin-eval-button-label">Eval starten</span>
<span class="spinner-border spinner-border-sm ms-2 d-none js-admin-eval-button-spinner"
role="status"
aria-hidden="true"></span>
</button>
</form>
</div>
</div>
</div>
<div class="col-xl-7">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3">
<i class="bi bi-terminal"></i> CLI-Referenz
</h5>
<p class="small text-secondary mb-3">
Die Admin-Runs schreiben typspezifische Reports nach
<code>tests/evals/reports/&lt;type&gt;-last-run.json</code>
und zusätzlich den bekannten <code>last-run.json</code>.
</p>
<div class="small">
{% for type, label in types %}
<div class="mb-2">
<span class="text-info">{{ label }}</span><br>
<code>php bin/console mto:agent:eval:run {{ type }}</code>
</div>
{% endfor %}
</div>
{% if last_report %}
<hr class="border-secondary">
<div class="small text-secondary">
Letzter generischer Report:
<span class="text-light">{{ last_report.type|default('unknown') }}</span>,
{{ last_report.passed|default(0) }}/{{ last_report.total|default(0) }} bestanden,
{{ last_report.generated_at|default('') }}
</div>
{% endif %}
</div>
</div>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body">
<div class="d-flex justify-content-between align-items-center flex-wrap gap-2 mb-3">
<h5 class="text-warning mb-0">
<i class="bi bi-list-check"></i>
Report-Details: {{ types[selected_type]|default(selected_type) }}
</h5>
<div class="btn-group btn-group-sm" role="group" aria-label="Eval report types">
{% for type, label in types %}
<a class="btn {{ type == selected_type ? 'btn-info' : 'btn-outline-info' }}"
href="{{ path('admin_evals_index', {type: type}) }}">
{{ label }}
</a>
{% endfor %}
</div>
</div>
{% if selected_report %}
{% set selectedFailed = selected_report.failed|default(0) %}
<div class="row g-3 mb-3 small">
<div class="col-md-3">
<div class="border border-secondary rounded p-3 h-100">
<div class="text-secondary">Total</div>
<div class="h5 mb-0">{{ selected_report.total|default(0) }}</div>
</div>
</div>
<div class="col-md-3">
<div class="border border-secondary rounded p-3 h-100">
<div class="text-secondary">Passed</div>
<div class="h5 text-success mb-0">{{ selected_report.passed|default(0) }}</div>
</div>
</div>
<div class="col-md-3">
<div class="border border-secondary rounded p-3 h-100">
<div class="text-secondary">Failed</div>
<div class="h5 {{ selectedFailed == 0 ? 'text-success' : 'text-danger' }} mb-0">
{{ selectedFailed }}
</div>
</div>
</div>
<div class="col-md-3">
<div class="border border-secondary rounded p-3 h-100">
<div class="text-secondary">Generated</div>
<div class="small text-light">{{ selected_report.generated_at|default('') }}</div>
</div>
</div>
</div>
<div class="table-responsive">
<table class="table table-dark table-striped table-hover align-middle mb-0">
<thead class="table-secondary text-dark">
<tr>
<th>Status</th>
<th>Case</th>
<th>Dauer</th>
<th>Failures / Details</th>
</tr>
</thead>
<tbody>
{% for result in selected_report.results|default([]) %}
<tr>
<td style="width: 110px;">
{% if result.passed|default(false) %}
<span class="badge bg-success">PASS</span>
{% else %}
<span class="badge bg-danger">FAIL</span>
{% endif %}
</td>
<td>
<code>{{ result.case_id|default('') }}</code>
<div class="small text-secondary">{{ result.type|default('') }}</div>
</td>
<td style="width: 120px;">
{{ result.duration_ms|default(0) }} ms
</td>
<td>
{% if result.failures|default([]) is not empty %}
<ul class="mb-2 small text-danger">
{% for failure in result.failures %}
<li>{{ failure }}</li>
{% endfor %}
</ul>
{% else %}
<div class="small text-success mb-2">Keine Fehler.</div>
{% endif %}
<details>
<summary class="small text-info" style="cursor:pointer;">
Details anzeigen
</summary>
<pre class="bg-dark border border-secondary rounded p-2 mt-2 small text-light" style="white-space: pre-wrap; max-height: 260px; overflow: auto;">{{ result.details|default({})|json_encode(constant('JSON_PRETTY_PRINT')) }}</pre>
</details>
</td>
</tr>
{% else %}
<tr>
<td colspan="4" class="text-center text-secondary py-4">
Dieser Report enthält keine Resultate.
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<div class="alert alert-secondary mb-0">
Für {{ types[selected_type]|default(selected_type) }} liegt noch kein typspezifischer Admin-Report vor.
Starte den Eval oben oder per CLI.
</div>
{% endif %}
</div>
</div>
<script>
document.addEventListener('DOMContentLoaded', function () {
const forms = Array.from(document.querySelectorAll('.js-admin-eval-run-form'));
const overlay = document.getElementById('adminEvalRunOverlay');
const overlayLabel = document.getElementById('adminEvalRunOverlayLabel');
function resolveEvalLabel(form) {
const select = form.querySelector('.js-admin-eval-type-select');
if (select && select.selectedOptions.length > 0) {
return select.selectedOptions[0].textContent.trim();
}
return (form.dataset.evalTypeLabel || 'Eval').trim();
}
function setAllRunButtonsDisabled() {
document.querySelectorAll('.js-admin-eval-run-button').forEach(function (button) {
button.disabled = true;
button.classList.add('disabled');
});
}
forms.forEach(function (form) {
form.addEventListener('submit', function (event) {
const button = event.submitter && event.submitter.classList.contains('js-admin-eval-run-button')
? event.submitter
: form.querySelector('.js-admin-eval-run-button');
const label = resolveEvalLabel(form);
if (overlay && overlayLabel) {
overlayLabel.textContent = label + ' läuft ...';
overlay.classList.remove('d-none');
}
if (button) {
const buttonLabel = button.querySelector('.js-admin-eval-button-label');
const spinner = button.querySelector('.js-admin-eval-button-spinner');
if (buttonLabel) {
buttonLabel.textContent = 'Läuft ...';
}
if (spinner) {
spinner.classList.remove('d-none');
}
}
setAllRunButtonsDisabled();
document.body.style.cursor = 'progress';
});
});
});
</script>
{% endblock %}