p101
This commit is contained in:
@@ -212,6 +212,100 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="card bg-black border-secondary text-light shadow-sm mb-4" id="adminEvalCaseCreator">
|
||||
<div class="card-body">
|
||||
<div class="d-flex justify-content-between align-items-start flex-wrap gap-2 mb-3">
|
||||
<div>
|
||||
<h5 class="text-warning mb-1">
|
||||
<i class="bi bi-plus-square"></i> Eval-Case erstellen
|
||||
</h5>
|
||||
<div class="small text-secondary">
|
||||
Speichert neue Regression-Cases direkt in <code>tests/evals/cases/<type>.ndjson</code>.
|
||||
Aus Report-Ergebnissen kannst du Prompt, History, Query oder Dokument-IDs als Vorlage übernehmen.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<form method="post" action="{{ path('admin_evals_case_create') }}" class="row g-3">
|
||||
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_case_create') }}">
|
||||
|
||||
<div class="col-md-4">
|
||||
<label class="form-label">Eval-Typ</label>
|
||||
<select name="type" class="form-select bg-dark text-light border-secondary js-admin-eval-create-type">
|
||||
{% for type, label in types %}
|
||||
<option value="{{ type }}" {% if type == selected_type %}selected{% endif %}>{{ label }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="col-md-8">
|
||||
<label class="form-label">Neue Case-ID</label>
|
||||
<input type="text"
|
||||
name="id"
|
||||
class="form-control bg-dark text-light border-secondary js-admin-eval-create-id"
|
||||
placeholder="z. B. retrieval_semantic_new_001"
|
||||
autocomplete="off"
|
||||
required>
|
||||
<div class="form-text text-secondary">
|
||||
Erlaubt: Buchstaben, Zahlen, Unterstrich, Bindestrich. IDs müssen eindeutig sein.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-12">
|
||||
<label class="form-label">Prompt</label>
|
||||
<textarea name="prompt"
|
||||
rows="2"
|
||||
class="form-control bg-dark text-light border-secondary js-admin-eval-create-prompt"
|
||||
placeholder="Testprompt, der abgesichert werden soll"
|
||||
required></textarea>
|
||||
</div>
|
||||
|
||||
<div class="col-lg-6">
|
||||
<label class="form-label">Assert-JSON</label>
|
||||
<textarea name="assert_json"
|
||||
rows="8"
|
||||
class="form-control bg-dark text-light border-secondary font-monospace small js-admin-eval-create-assert"
|
||||
spellcheck="false">{
|
||||
"min_results": 1
|
||||
}</textarea>
|
||||
<div class="form-text text-secondary">
|
||||
Beispiel: <code>expected_query</code>, <code>must_include_one_of_document_ids</code>, <code>must_not_include_terms</code>.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-lg-6">
|
||||
<label class="form-label">Optional: History-JSON</label>
|
||||
<textarea name="history_json"
|
||||
rows="8"
|
||||
class="form-control bg-dark text-light border-secondary font-monospace small js-admin-eval-create-history"
|
||||
spellcheck="false"
|
||||
placeholder='[{"prompt":"...","answer":"..."}]'></textarea>
|
||||
<div class="form-text text-secondary">
|
||||
Für Follow-up-Cases: Liste vorheriger Chat-Turns mit <code>prompt</code> und <code>answer</code>.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col-12">
|
||||
<label class="form-label">Optional: Request Context Hint</label>
|
||||
<textarea name="request_context_hint"
|
||||
rows="2"
|
||||
class="form-control bg-dark text-light border-secondary js-admin-eval-create-context"
|
||||
placeholder="Nur nutzen, wenn ein Case explizit Zusatzkontext braucht."></textarea>
|
||||
</div>
|
||||
|
||||
<div class="col-12 d-flex gap-2 flex-wrap">
|
||||
<button type="submit" class="btn btn-outline-warning">
|
||||
<i class="bi bi-save"></i> Case speichern
|
||||
</button>
|
||||
<button type="button" class="btn btn-outline-secondary js-admin-eval-create-clear">
|
||||
Formular leeren
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card bg-black border-secondary text-light shadow-sm">
|
||||
<div class="card-body">
|
||||
<div class="d-flex justify-content-between align-items-center flex-wrap gap-2 mb-3">
|
||||
@@ -281,9 +375,49 @@
|
||||
<span class="badge bg-danger">FAIL</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
<td style="min-width: 260px;">
|
||||
<code>{{ result.case_id|default('') }}</code>
|
||||
<div class="small text-secondary">{{ result.type|default('') }}</div>
|
||||
<div class="small text-secondary mb-2">{{ result.type|default('') }}</div>
|
||||
|
||||
{% set casePrompt = result.prompt|default(result.details.prompt|default('')) %}
|
||||
{% if casePrompt %}
|
||||
<div class="small mb-2">
|
||||
<span class="text-secondary">Prompt:</span><br>
|
||||
<span class="text-light">{{ casePrompt }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
{% set historyRows = result.details.history|default([]) %}
|
||||
{% if historyRows is not empty %}
|
||||
<details class="small">
|
||||
<summary class="text-info" style="cursor:pointer;">
|
||||
Kontext / History anzeigen
|
||||
</summary>
|
||||
<div class="mt-2 ps-2 border-start border-secondary">
|
||||
{% for turn in historyRows %}
|
||||
<div class="mb-2">
|
||||
<div class="text-secondary">Vorheriger Prompt:</div>
|
||||
<div class="text-light">{{ turn.prompt|default('') }}</div>
|
||||
{% if turn.answer_preview|default('') %}
|
||||
<div class="text-secondary mt-1">Antwort-Auszug:</div>
|
||||
<div class="text-secondary">{{ turn.answer_preview }}</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</details>
|
||||
{% endif %}
|
||||
|
||||
<button type="button"
|
||||
class="btn btn-sm btn-outline-warning mt-2 js-admin-eval-prefill-case"
|
||||
data-result-type="{{ result.type|default(selected_type)|e('html_attr') }}"
|
||||
data-result-prompt="{{ casePrompt|default('')|e('html_attr') }}"
|
||||
data-result-history="{{ historyRows|default([])|json_encode|e('html_attr') }}"
|
||||
data-result-query="{{ result.details.query|default('')|e('html_attr') }}"
|
||||
data-result-individual-queries="{{ result.details.individual_queries|default([])|json_encode|e('html_attr') }}"
|
||||
data-result-document-ids="{{ result.details.document_ids|default([])|json_encode|e('html_attr') }}">
|
||||
Als neuen Case vorbereiten
|
||||
</button>
|
||||
</td>
|
||||
<td style="width: 120px;">
|
||||
{{ result.duration_ms|default(0) }} ms
|
||||
@@ -461,6 +595,173 @@
|
||||
});
|
||||
}
|
||||
|
||||
const creator = document.getElementById('adminEvalCaseCreator');
|
||||
|
||||
function parseJsonData(value, fallback) {
|
||||
if (!value) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.parse(value);
|
||||
} catch (error) {
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
function slugifyPrompt(prompt) {
|
||||
const normalized = (prompt || '')
|
||||
.toLowerCase()
|
||||
.normalize('NFD')
|
||||
.replace(/[\u0300-\u036f]/g, '')
|
||||
.replace(/ä/g, 'ae')
|
||||
.replace(/ö/g, 'oe')
|
||||
.replace(/ü/g, 'ue')
|
||||
.replace(/ß/g, 'ss')
|
||||
.replace(/[^a-z0-9]+/g, '_')
|
||||
.replace(/^_+|_+$/g, '')
|
||||
.slice(0, 44);
|
||||
|
||||
return normalized || 'case';
|
||||
}
|
||||
|
||||
function buildAssertTemplate(type, query, individualQueries, documentIds) {
|
||||
if ((type === 'shop_query' || type === 'followup') && individualQueries.length > 0) {
|
||||
return {
|
||||
expected_individual_queries: individualQueries,
|
||||
expected_individual_queries_exact: true
|
||||
};
|
||||
}
|
||||
|
||||
if ((type === 'shop_query' || type === 'followup') && query) {
|
||||
return {
|
||||
expected_query: query
|
||||
};
|
||||
}
|
||||
|
||||
if ((type === 'retrieval' || type === 'answer_guard') && documentIds.length > 0) {
|
||||
return {
|
||||
min_results: 1,
|
||||
must_include_one_of_document_ids: [documentIds[0]]
|
||||
};
|
||||
}
|
||||
|
||||
if (type === 'answer_guard') {
|
||||
return {
|
||||
max_results: 0
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
min_results: 1
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeHistoryForForm(historyRows) {
|
||||
return historyRows
|
||||
.map(function (turn) {
|
||||
return {
|
||||
prompt: (turn.prompt || 'Eval-Kontext').trim(),
|
||||
answer: (turn.answer || turn.response || turn.answer_preview || '').trim()
|
||||
};
|
||||
})
|
||||
.filter(function (turn) {
|
||||
return turn.prompt !== '' || turn.answer !== '';
|
||||
});
|
||||
}
|
||||
|
||||
function fillCreatorFormFromResult(button) {
|
||||
if (!creator) {
|
||||
return;
|
||||
}
|
||||
|
||||
const type = button.dataset.resultType || 'retrieval';
|
||||
const prompt = button.dataset.resultPrompt || '';
|
||||
const history = normalizeHistoryForForm(parseJsonData(button.dataset.resultHistory, []));
|
||||
const query = button.dataset.resultQuery || '';
|
||||
const individualQueries = parseJsonData(button.dataset.resultIndividualQueries, []);
|
||||
const documentIds = parseJsonData(button.dataset.resultDocumentIds, []);
|
||||
const now = new Date();
|
||||
const suffix = String(now.getFullYear()).slice(2)
|
||||
+ String(now.getMonth() + 1).padStart(2, '0')
|
||||
+ String(now.getDate()).padStart(2, '0')
|
||||
+ '_'
|
||||
+ String(now.getHours()).padStart(2, '0')
|
||||
+ String(now.getMinutes()).padStart(2, '0')
|
||||
+ String(now.getSeconds()).padStart(2, '0');
|
||||
|
||||
const typeField = creator.querySelector('.js-admin-eval-create-type');
|
||||
const idField = creator.querySelector('.js-admin-eval-create-id');
|
||||
const promptField = creator.querySelector('.js-admin-eval-create-prompt');
|
||||
const assertField = creator.querySelector('.js-admin-eval-create-assert');
|
||||
const historyField = creator.querySelector('.js-admin-eval-create-history');
|
||||
const contextField = creator.querySelector('.js-admin-eval-create-context');
|
||||
|
||||
if (typeField) {
|
||||
typeField.value = type;
|
||||
}
|
||||
|
||||
if (idField) {
|
||||
idField.value = type + '_' + slugifyPrompt(prompt) + '_' + suffix;
|
||||
}
|
||||
|
||||
if (promptField) {
|
||||
promptField.value = prompt;
|
||||
}
|
||||
|
||||
if (assertField) {
|
||||
assertField.value = JSON.stringify(
|
||||
buildAssertTemplate(type, query, individualQueries, documentIds),
|
||||
null,
|
||||
2
|
||||
);
|
||||
}
|
||||
|
||||
if (historyField) {
|
||||
historyField.value = history.length > 0 ? JSON.stringify(history, null, 2) : '';
|
||||
}
|
||||
|
||||
if (contextField) {
|
||||
contextField.value = '';
|
||||
}
|
||||
|
||||
creator.scrollIntoView({behavior: 'smooth', block: 'start'});
|
||||
}
|
||||
|
||||
if (creator) {
|
||||
creator.querySelectorAll('.js-admin-eval-create-clear').forEach(function (button) {
|
||||
button.addEventListener('click', function () {
|
||||
const idField = creator.querySelector('.js-admin-eval-create-id');
|
||||
const promptField = creator.querySelector('.js-admin-eval-create-prompt');
|
||||
const assertField = creator.querySelector('.js-admin-eval-create-assert');
|
||||
const historyField = creator.querySelector('.js-admin-eval-create-history');
|
||||
const contextField = creator.querySelector('.js-admin-eval-create-context');
|
||||
|
||||
if (idField) {
|
||||
idField.value = '';
|
||||
}
|
||||
if (promptField) {
|
||||
promptField.value = '';
|
||||
}
|
||||
if (assertField) {
|
||||
assertField.value = '{\n "min_results": 1\n}';
|
||||
}
|
||||
if (historyField) {
|
||||
historyField.value = '';
|
||||
}
|
||||
if (contextField) {
|
||||
contextField.value = '';
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
document.querySelectorAll('.js-admin-eval-prefill-case').forEach(function (button) {
|
||||
button.addEventListener('click', function () {
|
||||
fillCreatorFormFromResult(button);
|
||||
});
|
||||
});
|
||||
|
||||
forms.forEach(function (form) {
|
||||
syncCaseSelect(form);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user