p101a
This commit is contained in:
194
templates/admin/evals/case_new.html.twig
Normal file
194
templates/admin/evals/case_new.html.twig
Normal file
@@ -0,0 +1,194 @@
|
||||
{% extends 'admin/base.html.twig' %}
|
||||
|
||||
{% block title %}Eval-Case erstellen{% endblock %}
|
||||
|
||||
{% block body %}
|
||||
|
||||
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
|
||||
<div>
|
||||
<h1 class="h3 mb-1">
|
||||
<i class="bi bi-journal-plus"></i> Eval-Case erstellen
|
||||
</h1>
|
||||
<div class="small text-secondary">
|
||||
Neue Regression-Cases separat anlegen, ohne die Eval-Suite-Übersicht aufzublähen.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<a href="{{ path('admin_evals_index', {type: case_draft.type|default('retrieval')}) }}"
|
||||
class="btn btn-sm btn-outline-secondary">
|
||||
Zurück zur Eval Suite
|
||||
</a>
|
||||
</div>
|
||||
|
||||
{% for label in ['success', 'danger', 'warning', 'info'] %}
|
||||
{% for message in app.flashes(label) %}
|
||||
<div class="alert alert-{{ label }} shadow-sm">
|
||||
{{ message }}
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
|
||||
{% if case_draft.source_label|default('') %}
|
||||
<div class="alert alert-info border-info bg-black text-light shadow-sm">
|
||||
<strong>Vorlage geladen:</strong> {{ case_draft.source_label }}<br>
|
||||
<span class="small text-secondary">
|
||||
Bitte Case-ID, Prompt und Assertions prüfen, bevor du den Case speicherst.
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="row g-4">
|
||||
<div class="col-xl-8">
|
||||
<div class="card bg-black border-secondary text-light shadow-sm">
|
||||
<div class="card-body">
|
||||
<h5 class="text-warning mb-3">
|
||||
<i class="bi bi-pencil-square"></i> Neuer Eval-Case
|
||||
</h5>
|
||||
|
||||
<form method="post" action="{{ path('admin_evals_case_create') }}">
|
||||
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_case_create') }}">
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label">Eval-Typ</label>
|
||||
<select name="type" class="form-select bg-dark text-light border-secondary">
|
||||
{% for type, label in types %}
|
||||
<option value="{{ type }}" {% if type == case_draft.type|default('retrieval') %}selected{% endif %}>
|
||||
{{ label }}
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<div class="form-text text-secondary">
|
||||
Der Typ entscheidet, in welche Datei geschrieben wird: <code>tests/evals/cases/<type>.ndjson</code>.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label">Neue Case-ID</label>
|
||||
<input type="text"
|
||||
name="id"
|
||||
value="{{ case_draft.id|default('') }}"
|
||||
class="form-control bg-dark text-light border-secondary"
|
||||
placeholder="followup_testomat808_device_price_001"
|
||||
required>
|
||||
<div class="form-text text-secondary">
|
||||
Eindeutig über alle Eval-Typen. Erlaubt: Buchstaben, Zahlen, <code>_</code> und <code>-</code>.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label">Prompt</label>
|
||||
<textarea name="prompt"
|
||||
rows="3"
|
||||
class="form-control bg-dark text-light border-secondary"
|
||||
placeholder="und was kostet das gerät selber"
|
||||
required>{{ case_draft.prompt|default('') }}</textarea>
|
||||
<div class="form-text text-secondary">
|
||||
Exakt der Nutzerprompt, der abgesichert werden soll. Tippfehler bewusst so eintragen, wenn sie Teil des Tests sind.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label">Assert-JSON</label>
|
||||
<textarea name="assert_json"
|
||||
rows="9"
|
||||
class="form-control bg-dark text-light border-secondary font-monospace"
|
||||
spellcheck="false">{{ case_draft.assert_json|default('{}') }}</textarea>
|
||||
<div class="form-text text-secondary">
|
||||
Muss ein gültiges JSON-Objekt sein. Beispiel: <code>{"expected_query":"testomat 808"}</code>.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label class="form-label">History-JSON <span class="text-secondary">optional</span></label>
|
||||
<textarea name="history_json"
|
||||
rows="8"
|
||||
class="form-control bg-dark text-light border-secondary font-monospace"
|
||||
spellcheck="false"
|
||||
placeholder='[{"prompt":"vorherige Frage","answer":"vorherige Antwort"}]'>{{ case_draft.history_json|default('') }}</textarea>
|
||||
<div class="form-text text-secondary">
|
||||
Für Follow-up-Cases empfohlen. Muss eine JSON-Liste sein. Leer lassen für direkte Prompts.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-4">
|
||||
<label class="form-label">Request Context Hint <span class="text-secondary">optional</span></label>
|
||||
<textarea name="request_context_hint"
|
||||
rows="3"
|
||||
class="form-control bg-dark text-light border-secondary"
|
||||
placeholder="Nur für Spezialfälle, wenn History nicht ausreicht.">{{ case_draft.request_context_hint|default('') }}</textarea>
|
||||
<div class="form-text text-secondary">
|
||||
Normalerweise leer lassen. Für reguläre Regressionen lieber History-JSON verwenden.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="d-flex flex-wrap gap-2">
|
||||
<button type="submit" class="btn btn-warning">
|
||||
<i class="bi bi-save"></i> Eval-Case speichern
|
||||
</button>
|
||||
<a href="{{ path('admin_evals_index', {type: case_draft.type|default('retrieval')}) }}"
|
||||
class="btn btn-outline-secondary">
|
||||
Abbrechen
|
||||
</a>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-xl-4">
|
||||
<div class="card bg-black border-secondary text-light shadow-sm mb-4">
|
||||
<div class="card-body">
|
||||
<h5 class="text-info mb-3">
|
||||
<i class="bi bi-info-circle"></i> Feld-Checkliste
|
||||
</h5>
|
||||
<ul class="small text-secondary mb-0">
|
||||
<li><strong class="text-light">retrieval</strong>: richtiges Dokument / richtige Chunks prüfen.</li>
|
||||
<li><strong class="text-light">shop_query</strong>: direkte Shopquery prüfen.</li>
|
||||
<li><strong class="text-light">followup</strong>: Prompt plus History prüfen.</li>
|
||||
<li><strong class="text-light">answer_guard</strong>: No-Answer- oder Evidenzfälle prüfen.</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card bg-black border-secondary text-light shadow-sm mb-4">
|
||||
<div class="card-body">
|
||||
<h5 class="text-info mb-3">
|
||||
<i class="bi bi-braces"></i> Häufige Assertions
|
||||
</h5>
|
||||
<div class="small text-secondary mb-2">Exakte Query:</div>
|
||||
<pre class="bg-dark border border-secondary rounded p-2 small text-light"><code>{
|
||||
"expected_query": "testomat 808"
|
||||
}</code></pre>
|
||||
|
||||
<div class="small text-secondary mb-2">Begriffe müssen enthalten sein:</div>
|
||||
<pre class="bg-dark border border-secondary rounded p-2 small text-light"><code>{
|
||||
"must_include_terms": [
|
||||
"testomat",
|
||||
"808"
|
||||
]
|
||||
}</code></pre>
|
||||
|
||||
<div class="small text-secondary mb-2">Dokument muss enthalten sein:</div>
|
||||
<pre class="bg-dark border border-secondary rounded p-2 small text-light"><code>{
|
||||
"min_results": 1,
|
||||
"must_include_one_of_document_ids": [
|
||||
"DOKUMENT-ID"
|
||||
]
|
||||
}</code></pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card bg-black border-secondary text-light shadow-sm">
|
||||
<div class="card-body">
|
||||
<h5 class="text-info mb-3">
|
||||
<i class="bi bi-lightbulb"></i> Empfehlung
|
||||
</h5>
|
||||
<p class="small text-secondary mb-0">
|
||||
Ein guter Eval-Case prüft genau einen Zweck. Lieber mehrere kleine Cases anlegen als einen großen, empfindlichen Case.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
||||
@@ -14,10 +14,16 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<a href="{{ path('admin_model_config_list') }}"
|
||||
class="btn btn-sm btn-outline-secondary">
|
||||
Zurück zum KI-/LLM-Setup
|
||||
</a>
|
||||
<div class="d-flex flex-wrap gap-2">
|
||||
<a href="{{ path('admin_evals_case_new', {type: selected_type|default('retrieval')}) }}"
|
||||
class="btn btn-sm btn-outline-warning">
|
||||
<i class="bi bi-journal-plus"></i> Eval-Case erstellen
|
||||
</a>
|
||||
<a href="{{ path('admin_model_config_list') }}"
|
||||
class="btn btn-sm btn-outline-secondary">
|
||||
Zurück zum KI-/LLM-Setup
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% for label in ['success', 'danger', 'warning', 'info'] %}
|
||||
@@ -212,100 +218,6 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="card bg-black border-secondary text-light shadow-sm mb-4" id="adminEvalCaseCreator">
|
||||
<div class="card-body">
|
||||
<div class="d-flex justify-content-between align-items-start flex-wrap gap-2 mb-3">
|
||||
<div>
|
||||
<h5 class="text-warning mb-1">
|
||||
<i class="bi bi-plus-square"></i> Eval-Case erstellen
|
||||
</h5>
|
||||
<div class="small text-secondary">
|
||||
Speichert neue Regression-Cases direkt in <code>tests/evals/cases/<type>.ndjson</code>.
|
||||
Aus Report-Ergebnissen kannst du Prompt, History, Query oder Dokument-IDs als Vorlage übernehmen.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<form method="post" action="{{ path('admin_evals_case_create') }}" class="row g-3">
|
||||
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_case_create') }}">
|
||||
|
||||
<div class="col-md-4">
|
||||
<label class="form-label">Eval-Typ</label>
|
||||
<select name="type" class="form-select bg-dark text-light border-secondary js-admin-eval-create-type">
|
||||
{% for type, label in types %}
|
||||
<option value="{{ type }}" {% if type == selected_type %}selected{% endif %}>{{ label }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="col-md-8">
|
||||
<label class="form-label">Neue Case-ID</label>
|
||||
<input type="text"
|
||||
name="id"
|
||||
class="form-control bg-dark text-light border-secondary js-admin-eval-create-id"
|
||||
placeholder="z. B. retrieval_semantic_new_001"
|
||||
autocomplete="off"
|
||||
required>
|
||||
<div class="form-text text-secondary">
|
||||
Erlaubt: Buchstaben, Zahlen, Unterstrich, Bindestrich. IDs müssen eindeutig sein.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-12">
|
||||
<label class="form-label">Prompt</label>
|
||||
<textarea name="prompt"
|
||||
rows="2"
|
||||
class="form-control bg-dark text-light border-secondary js-admin-eval-create-prompt"
|
||||
placeholder="Testprompt, der abgesichert werden soll"
|
||||
required></textarea>
|
||||
</div>
|
||||
|
||||
<div class="col-lg-6">
|
||||
<label class="form-label">Assert-JSON</label>
|
||||
<textarea name="assert_json"
|
||||
rows="8"
|
||||
class="form-control bg-dark text-light border-secondary font-monospace small js-admin-eval-create-assert"
|
||||
spellcheck="false">{
|
||||
"min_results": 1
|
||||
}</textarea>
|
||||
<div class="form-text text-secondary">
|
||||
Beispiel: <code>expected_query</code>, <code>must_include_one_of_document_ids</code>, <code>must_not_include_terms</code>.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-lg-6">
|
||||
<label class="form-label">Optional: History-JSON</label>
|
||||
<textarea name="history_json"
|
||||
rows="8"
|
||||
class="form-control bg-dark text-light border-secondary font-monospace small js-admin-eval-create-history"
|
||||
spellcheck="false"
|
||||
placeholder='[{"prompt":"...","answer":"..."}]'></textarea>
|
||||
<div class="form-text text-secondary">
|
||||
Für Follow-up-Cases: Liste vorheriger Chat-Turns mit <code>prompt</code> und <code>answer</code>.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-12">
|
||||
<label class="form-label">Optional: Request Context Hint</label>
|
||||
<textarea name="request_context_hint"
|
||||
rows="2"
|
||||
class="form-control bg-dark text-light border-secondary js-admin-eval-create-context"
|
||||
placeholder="Nur nutzen, wenn ein Case explizit Zusatzkontext braucht."></textarea>
|
||||
</div>
|
||||
|
||||
<div class="col-12 d-flex gap-2 flex-wrap">
|
||||
<button type="submit" class="btn btn-outline-warning">
|
||||
<i class="bi bi-save"></i> Case speichern
|
||||
</button>
|
||||
<button type="button" class="btn btn-outline-secondary js-admin-eval-create-clear">
|
||||
Formular leeren
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card bg-black border-secondary text-light shadow-sm">
|
||||
<div class="card-body">
|
||||
<div class="d-flex justify-content-between align-items-center flex-wrap gap-2 mb-3">
|
||||
@@ -387,6 +299,13 @@
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="mt-2">
|
||||
<a href="{{ path('admin_evals_case_new', {source_type: selected_type, source_case_id: result.case_id|default('')}) }}"
|
||||
class="btn btn-sm btn-outline-warning">
|
||||
<i class="bi bi-journal-plus"></i> Als neuen Case vorbereiten
|
||||
</a>
|
||||
</div>
|
||||
|
||||
{% set historyRows = result.details.history|default([]) %}
|
||||
{% if historyRows is not empty %}
|
||||
<details class="small">
|
||||
@@ -407,17 +326,6 @@
|
||||
</div>
|
||||
</details>
|
||||
{% endif %}
|
||||
|
||||
<button type="button"
|
||||
class="btn btn-sm btn-outline-warning mt-2 js-admin-eval-prefill-case"
|
||||
data-result-type="{{ result.type|default(selected_type)|e('html_attr') }}"
|
||||
data-result-prompt="{{ casePrompt|default('')|e('html_attr') }}"
|
||||
data-result-history="{{ historyRows|default([])|json_encode|e('html_attr') }}"
|
||||
data-result-query="{{ result.details.query|default('')|e('html_attr') }}"
|
||||
data-result-individual-queries="{{ result.details.individual_queries|default([])|json_encode|e('html_attr') }}"
|
||||
data-result-document-ids="{{ result.details.document_ids|default([])|json_encode|e('html_attr') }}">
|
||||
Als neuen Case vorbereiten
|
||||
</button>
|
||||
</td>
|
||||
<td style="width: 120px;">
|
||||
{{ result.duration_ms|default(0) }} ms
|
||||
@@ -595,173 +503,6 @@
|
||||
});
|
||||
}
|
||||
|
||||
const creator = document.getElementById('adminEvalCaseCreator');
|
||||
|
||||
function parseJsonData(value, fallback) {
|
||||
if (!value) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.parse(value);
|
||||
} catch (error) {
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
function slugifyPrompt(prompt) {
|
||||
const normalized = (prompt || '')
|
||||
.toLowerCase()
|
||||
.normalize('NFD')
|
||||
.replace(/[\u0300-\u036f]/g, '')
|
||||
.replace(/ä/g, 'ae')
|
||||
.replace(/ö/g, 'oe')
|
||||
.replace(/ü/g, 'ue')
|
||||
.replace(/ß/g, 'ss')
|
||||
.replace(/[^a-z0-9]+/g, '_')
|
||||
.replace(/^_+|_+$/g, '')
|
||||
.slice(0, 44);
|
||||
|
||||
return normalized || 'case';
|
||||
}
|
||||
|
||||
function buildAssertTemplate(type, query, individualQueries, documentIds) {
|
||||
if ((type === 'shop_query' || type === 'followup') && individualQueries.length > 0) {
|
||||
return {
|
||||
expected_individual_queries: individualQueries,
|
||||
expected_individual_queries_exact: true
|
||||
};
|
||||
}
|
||||
|
||||
if ((type === 'shop_query' || type === 'followup') && query) {
|
||||
return {
|
||||
expected_query: query
|
||||
};
|
||||
}
|
||||
|
||||
if ((type === 'retrieval' || type === 'answer_guard') && documentIds.length > 0) {
|
||||
return {
|
||||
min_results: 1,
|
||||
must_include_one_of_document_ids: [documentIds[0]]
|
||||
};
|
||||
}
|
||||
|
||||
if (type === 'answer_guard') {
|
||||
return {
|
||||
max_results: 0
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
min_results: 1
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeHistoryForForm(historyRows) {
|
||||
return historyRows
|
||||
.map(function (turn) {
|
||||
return {
|
||||
prompt: (turn.prompt || 'Eval-Kontext').trim(),
|
||||
answer: (turn.answer || turn.response || turn.answer_preview || '').trim()
|
||||
};
|
||||
})
|
||||
.filter(function (turn) {
|
||||
return turn.prompt !== '' || turn.answer !== '';
|
||||
});
|
||||
}
|
||||
|
||||
function fillCreatorFormFromResult(button) {
|
||||
if (!creator) {
|
||||
return;
|
||||
}
|
||||
|
||||
const type = button.dataset.resultType || 'retrieval';
|
||||
const prompt = button.dataset.resultPrompt || '';
|
||||
const history = normalizeHistoryForForm(parseJsonData(button.dataset.resultHistory, []));
|
||||
const query = button.dataset.resultQuery || '';
|
||||
const individualQueries = parseJsonData(button.dataset.resultIndividualQueries, []);
|
||||
const documentIds = parseJsonData(button.dataset.resultDocumentIds, []);
|
||||
const now = new Date();
|
||||
const suffix = String(now.getFullYear()).slice(2)
|
||||
+ String(now.getMonth() + 1).padStart(2, '0')
|
||||
+ String(now.getDate()).padStart(2, '0')
|
||||
+ '_'
|
||||
+ String(now.getHours()).padStart(2, '0')
|
||||
+ String(now.getMinutes()).padStart(2, '0')
|
||||
+ String(now.getSeconds()).padStart(2, '0');
|
||||
|
||||
const typeField = creator.querySelector('.js-admin-eval-create-type');
|
||||
const idField = creator.querySelector('.js-admin-eval-create-id');
|
||||
const promptField = creator.querySelector('.js-admin-eval-create-prompt');
|
||||
const assertField = creator.querySelector('.js-admin-eval-create-assert');
|
||||
const historyField = creator.querySelector('.js-admin-eval-create-history');
|
||||
const contextField = creator.querySelector('.js-admin-eval-create-context');
|
||||
|
||||
if (typeField) {
|
||||
typeField.value = type;
|
||||
}
|
||||
|
||||
if (idField) {
|
||||
idField.value = type + '_' + slugifyPrompt(prompt) + '_' + suffix;
|
||||
}
|
||||
|
||||
if (promptField) {
|
||||
promptField.value = prompt;
|
||||
}
|
||||
|
||||
if (assertField) {
|
||||
assertField.value = JSON.stringify(
|
||||
buildAssertTemplate(type, query, individualQueries, documentIds),
|
||||
null,
|
||||
2
|
||||
);
|
||||
}
|
||||
|
||||
if (historyField) {
|
||||
historyField.value = history.length > 0 ? JSON.stringify(history, null, 2) : '';
|
||||
}
|
||||
|
||||
if (contextField) {
|
||||
contextField.value = '';
|
||||
}
|
||||
|
||||
creator.scrollIntoView({behavior: 'smooth', block: 'start'});
|
||||
}
|
||||
|
||||
if (creator) {
|
||||
creator.querySelectorAll('.js-admin-eval-create-clear').forEach(function (button) {
|
||||
button.addEventListener('click', function () {
|
||||
const idField = creator.querySelector('.js-admin-eval-create-id');
|
||||
const promptField = creator.querySelector('.js-admin-eval-create-prompt');
|
||||
const assertField = creator.querySelector('.js-admin-eval-create-assert');
|
||||
const historyField = creator.querySelector('.js-admin-eval-create-history');
|
||||
const contextField = creator.querySelector('.js-admin-eval-create-context');
|
||||
|
||||
if (idField) {
|
||||
idField.value = '';
|
||||
}
|
||||
if (promptField) {
|
||||
promptField.value = '';
|
||||
}
|
||||
if (assertField) {
|
||||
assertField.value = '{\n "min_results": 1\n}';
|
||||
}
|
||||
if (historyField) {
|
||||
historyField.value = '';
|
||||
}
|
||||
if (contextField) {
|
||||
contextField.value = '';
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
document.querySelectorAll('.js-admin-eval-prefill-case').forEach(function (button) {
|
||||
button.addEventListener('click', function () {
|
||||
fillCreatorFormFromResult(button);
|
||||
});
|
||||
});
|
||||
|
||||
forms.forEach(function (form) {
|
||||
syncCaseSelect(form);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user