This commit is contained in:
team 1
2026-05-12 11:08:34 +02:00
parent 6dced1c4df
commit 6e2ca15e97
5 changed files with 542 additions and 287 deletions

View File

@@ -14,10 +14,16 @@
</div>
</div>
<a href="{{ path('admin_model_config_list') }}"
class="btn btn-sm btn-outline-secondary">
Zurück zum KI-/LLM-Setup
</a>
<div class="d-flex flex-wrap gap-2">
<a href="{{ path('admin_evals_case_new', {type: selected_type|default('retrieval')}) }}"
class="btn btn-sm btn-outline-warning">
<i class="bi bi-journal-plus"></i> Eval-Case erstellen
</a>
<a href="{{ path('admin_model_config_list') }}"
class="btn btn-sm btn-outline-secondary">
Zurück zum KI-/LLM-Setup
</a>
</div>
</div>
{% for label in ['success', 'danger', 'warning', 'info'] %}
@@ -212,100 +218,6 @@
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm mb-4" id="adminEvalCaseCreator">
<div class="card-body">
<div class="d-flex justify-content-between align-items-start flex-wrap gap-2 mb-3">
<div>
<h5 class="text-warning mb-1">
<i class="bi bi-plus-square"></i> Eval-Case erstellen
</h5>
<div class="small text-secondary">
Speichert neue Regression-Cases direkt in <code>tests/evals/cases/&lt;type&gt;.ndjson</code>.
Aus Report-Ergebnissen kannst du Prompt, History, Query oder Dokument-IDs als Vorlage übernehmen.
</div>
</div>
</div>
<form method="post" action="{{ path('admin_evals_case_create') }}" class="row g-3">
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_case_create') }}">
<div class="col-md-4">
<label class="form-label">Eval-Typ</label>
<select name="type" class="form-select bg-dark text-light border-secondary js-admin-eval-create-type">
{% for type, label in types %}
<option value="{{ type }}" {% if type == selected_type %}selected{% endif %}>{{ label }}</option>
{% endfor %}
</select>
</div>
<div class="col-md-8">
<label class="form-label">Neue Case-ID</label>
<input type="text"
name="id"
class="form-control bg-dark text-light border-secondary js-admin-eval-create-id"
placeholder="z. B. retrieval_semantic_new_001"
autocomplete="off"
required>
<div class="form-text text-secondary">
Erlaubt: Buchstaben, Zahlen, Unterstrich, Bindestrich. IDs müssen eindeutig sein.
</div>
</div>
<div class="col-12">
<label class="form-label">Prompt</label>
<textarea name="prompt"
rows="2"
class="form-control bg-dark text-light border-secondary js-admin-eval-create-prompt"
placeholder="Testprompt, der abgesichert werden soll"
required></textarea>
</div>
<div class="col-lg-6">
<label class="form-label">Assert-JSON</label>
<textarea name="assert_json"
rows="8"
class="form-control bg-dark text-light border-secondary font-monospace small js-admin-eval-create-assert"
spellcheck="false">{
"min_results": 1
}</textarea>
<div class="form-text text-secondary">
Beispiel: <code>expected_query</code>, <code>must_include_one_of_document_ids</code>, <code>must_not_include_terms</code>.
</div>
</div>
<div class="col-lg-6">
<label class="form-label">Optional: History-JSON</label>
<textarea name="history_json"
rows="8"
class="form-control bg-dark text-light border-secondary font-monospace small js-admin-eval-create-history"
spellcheck="false"
placeholder='[{"prompt":"...","answer":"..."}]'></textarea>
<div class="form-text text-secondary">
Für Follow-up-Cases: Liste vorheriger Chat-Turns mit <code>prompt</code> und <code>answer</code>.
</div>
</div>
<div class="col-12">
<label class="form-label">Optional: Request Context Hint</label>
<textarea name="request_context_hint"
rows="2"
class="form-control bg-dark text-light border-secondary js-admin-eval-create-context"
placeholder="Nur nutzen, wenn ein Case explizit Zusatzkontext braucht."></textarea>
</div>
<div class="col-12 d-flex gap-2 flex-wrap">
<button type="submit" class="btn btn-outline-warning">
<i class="bi bi-save"></i> Case speichern
</button>
<button type="button" class="btn btn-outline-secondary js-admin-eval-create-clear">
Formular leeren
</button>
</div>
</form>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body">
<div class="d-flex justify-content-between align-items-center flex-wrap gap-2 mb-3">
@@ -387,6 +299,13 @@
</div>
{% endif %}
<div class="mt-2">
<a href="{{ path('admin_evals_case_new', {source_type: selected_type, source_case_id: result.case_id|default('')}) }}"
class="btn btn-sm btn-outline-warning">
<i class="bi bi-journal-plus"></i> Als neuen Case vorbereiten
</a>
</div>
{% set historyRows = result.details.history|default([]) %}
{% if historyRows is not empty %}
<details class="small">
@@ -407,17 +326,6 @@
</div>
</details>
{% endif %}
<button type="button"
class="btn btn-sm btn-outline-warning mt-2 js-admin-eval-prefill-case"
data-result-type="{{ result.type|default(selected_type)|e('html_attr') }}"
data-result-prompt="{{ casePrompt|default('')|e('html_attr') }}"
data-result-history="{{ historyRows|default([])|json_encode|e('html_attr') }}"
data-result-query="{{ result.details.query|default('')|e('html_attr') }}"
data-result-individual-queries="{{ result.details.individual_queries|default([])|json_encode|e('html_attr') }}"
data-result-document-ids="{{ result.details.document_ids|default([])|json_encode|e('html_attr') }}">
Als neuen Case vorbereiten
</button>
</td>
<td style="width: 120px;">
{{ result.duration_ms|default(0) }} ms
@@ -595,173 +503,6 @@
});
}
const creator = document.getElementById('adminEvalCaseCreator');
function parseJsonData(value, fallback) {
if (!value) {
return fallback;
}
try {
return JSON.parse(value);
} catch (error) {
return fallback;
}
}
function slugifyPrompt(prompt) {
const normalized = (prompt || '')
.toLowerCase()
.normalize('NFD')
.replace(/[\u0300-\u036f]/g, '')
.replace(/ä/g, 'ae')
.replace(/ö/g, 'oe')
.replace(/ü/g, 'ue')
.replace(/ß/g, 'ss')
.replace(/[^a-z0-9]+/g, '_')
.replace(/^_+|_+$/g, '')
.slice(0, 44);
return normalized || 'case';
}
function buildAssertTemplate(type, query, individualQueries, documentIds) {
if ((type === 'shop_query' || type === 'followup') && individualQueries.length > 0) {
return {
expected_individual_queries: individualQueries,
expected_individual_queries_exact: true
};
}
if ((type === 'shop_query' || type === 'followup') && query) {
return {
expected_query: query
};
}
if ((type === 'retrieval' || type === 'answer_guard') && documentIds.length > 0) {
return {
min_results: 1,
must_include_one_of_document_ids: [documentIds[0]]
};
}
if (type === 'answer_guard') {
return {
max_results: 0
};
}
return {
min_results: 1
};
}
function normalizeHistoryForForm(historyRows) {
return historyRows
.map(function (turn) {
return {
prompt: (turn.prompt || 'Eval-Kontext').trim(),
answer: (turn.answer || turn.response || turn.answer_preview || '').trim()
};
})
.filter(function (turn) {
return turn.prompt !== '' || turn.answer !== '';
});
}
function fillCreatorFormFromResult(button) {
if (!creator) {
return;
}
const type = button.dataset.resultType || 'retrieval';
const prompt = button.dataset.resultPrompt || '';
const history = normalizeHistoryForForm(parseJsonData(button.dataset.resultHistory, []));
const query = button.dataset.resultQuery || '';
const individualQueries = parseJsonData(button.dataset.resultIndividualQueries, []);
const documentIds = parseJsonData(button.dataset.resultDocumentIds, []);
const now = new Date();
const suffix = String(now.getFullYear()).slice(2)
+ String(now.getMonth() + 1).padStart(2, '0')
+ String(now.getDate()).padStart(2, '0')
+ '_'
+ String(now.getHours()).padStart(2, '0')
+ String(now.getMinutes()).padStart(2, '0')
+ String(now.getSeconds()).padStart(2, '0');
const typeField = creator.querySelector('.js-admin-eval-create-type');
const idField = creator.querySelector('.js-admin-eval-create-id');
const promptField = creator.querySelector('.js-admin-eval-create-prompt');
const assertField = creator.querySelector('.js-admin-eval-create-assert');
const historyField = creator.querySelector('.js-admin-eval-create-history');
const contextField = creator.querySelector('.js-admin-eval-create-context');
if (typeField) {
typeField.value = type;
}
if (idField) {
idField.value = type + '_' + slugifyPrompt(prompt) + '_' + suffix;
}
if (promptField) {
promptField.value = prompt;
}
if (assertField) {
assertField.value = JSON.stringify(
buildAssertTemplate(type, query, individualQueries, documentIds),
null,
2
);
}
if (historyField) {
historyField.value = history.length > 0 ? JSON.stringify(history, null, 2) : '';
}
if (contextField) {
contextField.value = '';
}
creator.scrollIntoView({behavior: 'smooth', block: 'start'});
}
if (creator) {
creator.querySelectorAll('.js-admin-eval-create-clear').forEach(function (button) {
button.addEventListener('click', function () {
const idField = creator.querySelector('.js-admin-eval-create-id');
const promptField = creator.querySelector('.js-admin-eval-create-prompt');
const assertField = creator.querySelector('.js-admin-eval-create-assert');
const historyField = creator.querySelector('.js-admin-eval-create-history');
const contextField = creator.querySelector('.js-admin-eval-create-context');
if (idField) {
idField.value = '';
}
if (promptField) {
promptField.value = '';
}
if (assertField) {
assertField.value = '{\n "min_results": 1\n}';
}
if (historyField) {
historyField.value = '';
}
if (contextField) {
contextField.value = '';
}
});
});
}
document.querySelectorAll('.js-admin-eval-prefill-case').forEach(function (button) {
button.addEventListener('click', function () {
fillCreatorFormFromResult(button);
});
});
forms.forEach(function (form) {
syncCaseSelect(form);