p101d
This commit is contained in:
@@ -0,0 +1,50 @@
|
|||||||
|
# RetrieX Patch p101c - Admin Eval Case Delete
|
||||||
|
|
||||||
|
## Ziel
|
||||||
|
|
||||||
|
Ergänzt die Admin-Eval-Case-Verwaltung um eine sichere Löschfunktion für einzelne Eval-Cases.
|
||||||
|
|
||||||
|
Damit können falsch angelegte oder nicht mehr benötigte Cases direkt im Admin entfernt werden, ohne die Eval-Suite-Übersicht weiter aufzublähen.
|
||||||
|
|
||||||
|
## Umfang
|
||||||
|
|
||||||
|
- Neue POST-Route `admin_evals_case_delete` unter `/admin/evals/cases/delete`
|
||||||
|
- CSRF-Schutz pro Eval-Typ und Case-ID
|
||||||
|
- Rollenprüfung über `ROLE_KNOWLEDGE_ADMIN`
|
||||||
|
- Entfernen genau des ausgewählten Cases aus `tests/evals/cases/<type>.ndjson`
|
||||||
|
- Abbruch ohne Änderung, wenn die NDJSON-Datei ungültig ist oder der Case nicht gefunden wird
|
||||||
|
- Löschbereich auf der separaten Case-Seite `/admin/evals/cases/new`
|
||||||
|
- Bestätigungsdialog vor dem Löschen
|
||||||
|
- Hinweis, dass nach dem Löschen der betroffene Eval-Typ erneut ausgeführt werden sollte
|
||||||
|
|
||||||
|
## Nicht geändert
|
||||||
|
|
||||||
|
- Keine Retrieval-Logik
|
||||||
|
- Keine Shopquery-Logik
|
||||||
|
- Keine Follow-up-Logik
|
||||||
|
- Keine Answer-Guard-Logik
|
||||||
|
- Keine Eval-Assertions
|
||||||
|
- Keine bestehenden Cases automatisch gelöscht
|
||||||
|
- Keine YAML-/Parameteränderung
|
||||||
|
- Keine Migration
|
||||||
|
|
||||||
|
## Prüfung
|
||||||
|
|
||||||
|
Nach Einspielen:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
php bin/console mto:agent:config:validate
|
||||||
|
php bin/console mto:agent:eval:run retrieval
|
||||||
|
php bin/console mto:agent:eval:run shop_query
|
||||||
|
php bin/console mto:agent:eval:run followup
|
||||||
|
php bin/console mto:agent:eval:run answer_guard
|
||||||
|
```
|
||||||
|
|
||||||
|
Im Admin:
|
||||||
|
|
||||||
|
1. `/admin/evals/cases/new` öffnen.
|
||||||
|
2. Einen Test-Case anlegen oder einen bestehenden Test-Case auswählen.
|
||||||
|
3. `Case löschen` klicken.
|
||||||
|
4. Bestätigungsdialog bestätigen.
|
||||||
|
5. Prüfen, dass der Case aus der Liste verschwindet.
|
||||||
|
6. Den betroffenen Eval-Typ erneut laufen lassen.
|
||||||
@@ -0,0 +1,53 @@
|
|||||||
|
# RetrieX Patch p101d - Admin Eval Case Delete Hotfix
|
||||||
|
|
||||||
|
## Ziel
|
||||||
|
|
||||||
|
Behebt einen Fehler aus p101c, bei dem beim Löschen eines Eval-Cases folgende Exception auftreten konnte:
|
||||||
|
|
||||||
|
```text
|
||||||
|
Call to undefined method App\Service\Admin\EvalAdminService::normalizeExistingCaseId()
|
||||||
|
```
|
||||||
|
|
||||||
|
## Ursache
|
||||||
|
|
||||||
|
`EvalAdminService::deleteCase()` ruft eine Validierungs-Hilfsmethode für bestehende Case-IDs auf. Diese Methode wurde in p101c referenziert, aber nicht in die Service-Klasse aufgenommen.
|
||||||
|
|
||||||
|
## Änderung
|
||||||
|
|
||||||
|
Ergänzt `normalizeExistingCaseId()` in `EvalAdminService`.
|
||||||
|
|
||||||
|
Die Methode:
|
||||||
|
|
||||||
|
- trimmt die übergebene Case-ID,
|
||||||
|
- verhindert leere IDs,
|
||||||
|
- erlaubt nur Buchstaben, Zahlen, Unterstriche und Bindestriche,
|
||||||
|
- gibt eine verständliche Fehlermeldung bei ungültigen IDs zurück.
|
||||||
|
|
||||||
|
## Geänderte Dateien
|
||||||
|
|
||||||
|
```text
|
||||||
|
src/Service/Admin/EvalAdminService.php
|
||||||
|
patch_history/RETRIEX_PATCH_101D_ADMIN_EVAL_CASE_DELETE_HOTFIX_README.md
|
||||||
|
```
|
||||||
|
|
||||||
|
## Nicht geändert
|
||||||
|
|
||||||
|
```text
|
||||||
|
keine Eval-Logik
|
||||||
|
keine Retrieval-Logik
|
||||||
|
keine Shopquery-Logik
|
||||||
|
keine Follow-up-Logik
|
||||||
|
keine Answer-Guard-Logik
|
||||||
|
keine YAML-/Parameteränderung
|
||||||
|
keine bestehenden Eval-Cases
|
||||||
|
keine Migration
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prüfung
|
||||||
|
|
||||||
|
```bash
|
||||||
|
php -l src/Service/Admin/EvalAdminService.php
|
||||||
|
php bin/console mto:agent:config:validate
|
||||||
|
```
|
||||||
|
|
||||||
|
Danach im Admin einen Eval-Case löschen.
|
||||||
@@ -92,6 +92,7 @@ final class AdminEvalController extends AbstractController
|
|||||||
|
|
||||||
return $this->render('admin/evals/case_new.html.twig', [
|
return $this->render('admin/evals/case_new.html.twig', [
|
||||||
'types' => $evals->supportedTypes(),
|
'types' => $evals->supportedTypes(),
|
||||||
|
'cases_by_type' => $evals->casesByType(),
|
||||||
'case_draft' => $draft,
|
'case_draft' => $draft,
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
@@ -146,7 +147,46 @@ final class AdminEvalController extends AbstractController
|
|||||||
|
|
||||||
return $this->render('admin/evals/case_new.html.twig', [
|
return $this->render('admin/evals/case_new.html.twig', [
|
||||||
'types' => $evals->supportedTypes(),
|
'types' => $evals->supportedTypes(),
|
||||||
|
'cases_by_type' => $evals->casesByType(),
|
||||||
'case_draft' => $draft,
|
'case_draft' => $draft,
|
||||||
], new Response('', Response::HTTP_UNPROCESSABLE_ENTITY));
|
], new Response('', Response::HTTP_UNPROCESSABLE_ENTITY));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[Route('/cases/delete', name: 'admin_evals_case_delete', methods: ['POST'])]
|
||||||
|
public function deleteCase(Request $request, EvalAdminService $evals): Response
|
||||||
|
{
|
||||||
|
$this->denyAccessUnlessGranted(ApplicationRoles::ROLE_KNOWLEDGE_ADMIN);
|
||||||
|
|
||||||
|
$type = trim((string) $request->request->get('type', 'retrieval'));
|
||||||
|
$caseId = trim((string) $request->request->get('case_id', ''));
|
||||||
|
|
||||||
|
if (!$this->isCsrfTokenValid(
|
||||||
|
sprintf('admin_eval_case_delete_%s_%s', $type, $caseId),
|
||||||
|
(string) $request->request->get('_token')
|
||||||
|
)) {
|
||||||
|
throw $this->createAccessDeniedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$deleted = $evals->deleteCase($type, $caseId);
|
||||||
|
$type = (string) ($deleted['type'] ?? $type);
|
||||||
|
|
||||||
|
$this->addFlash(
|
||||||
|
'success',
|
||||||
|
sprintf('Eval-Case "%s" wurde aus %s.ndjson entfernt.', (string) ($deleted['id'] ?? $caseId), $type)
|
||||||
|
);
|
||||||
|
} catch (\Throwable $e) {
|
||||||
|
$this->addFlash('danger', $e->getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!in_array($type, $evals->supportedTypeNames(), true)) {
|
||||||
|
$type = 'retrieval';
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->redirectToRoute('admin_evals_case_new', [
|
||||||
|
'type' => $type,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -290,6 +290,77 @@ final readonly class EvalAdminService
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array{type:string,id:string,path:string,case_count:int}
|
||||||
|
*/
|
||||||
|
public function deleteCase(string $type, string $caseId): array
|
||||||
|
{
|
||||||
|
$type = $this->assertSupportedType($type);
|
||||||
|
$caseId = $this->normalizeExistingCaseId($caseId);
|
||||||
|
$path = $this->caseFilePath($type);
|
||||||
|
|
||||||
|
if (!is_file($path)) {
|
||||||
|
throw new \RuntimeException(sprintf('Eval-Case-Datei wurde nicht gefunden: %s', $path));
|
||||||
|
}
|
||||||
|
|
||||||
|
$lines = file($path, FILE_IGNORE_NEW_LINES);
|
||||||
|
if ($lines === false) {
|
||||||
|
throw new \RuntimeException(sprintf('Eval-Case-Datei konnte nicht gelesen werden: %s', $path));
|
||||||
|
}
|
||||||
|
|
||||||
|
$keptLines = [];
|
||||||
|
$deleted = false;
|
||||||
|
|
||||||
|
foreach ($lines as $line) {
|
||||||
|
$trimmed = trim((string) $line);
|
||||||
|
if ($trimmed === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$decoded = json_decode($trimmed, true, 512, JSON_THROW_ON_ERROR);
|
||||||
|
} catch (\JsonException $e) {
|
||||||
|
throw new \RuntimeException(sprintf(
|
||||||
|
'Eval-Case-Datei enthält ungültiges JSON und wurde nicht verändert: %s',
|
||||||
|
$e->getMessage()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_array($decoded)) {
|
||||||
|
throw new \RuntimeException('Eval-Case-Datei enthält eine ungültige NDJSON-Zeile und wurde nicht verändert.');
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((string) ($decoded['id'] ?? '') === $caseId) {
|
||||||
|
$deleted = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$keptLines[] = $trimmed;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$deleted) {
|
||||||
|
throw new \RuntimeException(sprintf(
|
||||||
|
'Eval-Case "%s" wurde im Typ "%s" nicht gefunden.',
|
||||||
|
$caseId,
|
||||||
|
$type
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
$contents = $keptLines === [] ? '' : implode(PHP_EOL, $keptLines) . PHP_EOL;
|
||||||
|
$written = file_put_contents($path, $contents, LOCK_EX);
|
||||||
|
if ($written === false) {
|
||||||
|
throw new \RuntimeException(sprintf('Eval-Case-Datei konnte nicht geschrieben werden: %s', $path));
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
'type' => $type,
|
||||||
|
'id' => $caseId,
|
||||||
|
'path' => $path,
|
||||||
|
'case_count' => count($this->loadCases($type)),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param array<int, EvalCase> $cases
|
* @param array<int, EvalCase> $cases
|
||||||
* @return array<int, EvalCase>
|
* @return array<int, EvalCase>
|
||||||
@@ -411,6 +482,23 @@ final readonly class EvalAdminService
|
|||||||
return $id;
|
return $id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function normalizeExistingCaseId(string $id): string
|
||||||
|
{
|
||||||
|
$id = trim($id);
|
||||||
|
|
||||||
|
if ($id === '') {
|
||||||
|
throw new \InvalidArgumentException('Es wurde keine Eval-Case-ID zum Löschen übergeben.');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match('/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/', $id) !== 1) {
|
||||||
|
throw new \InvalidArgumentException(
|
||||||
|
'Die Eval-Case-ID ist ungültig. Erlaubt sind nur Buchstaben, Zahlen, Unterstriche und Bindestriche.'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $id;
|
||||||
|
}
|
||||||
|
|
||||||
private function caseIdExists(string $id): bool
|
private function caseIdExists(string $id): bool
|
||||||
{
|
{
|
||||||
foreach (array_keys(self::TYPES) as $type) {
|
foreach (array_keys(self::TYPES) as $type) {
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
{% extends 'admin/base.html.twig' %}
|
{% extends 'admin/base.html.twig' %}
|
||||||
|
|
||||||
{% block title %}Eval-Case erstellen{% endblock %}
|
{% block title %}Eval-Cases verwalten{% endblock %}
|
||||||
|
|
||||||
{% block body %}
|
{% block body %}
|
||||||
|
|
||||||
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
|
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
|
||||||
<div>
|
<div>
|
||||||
<h1 class="h3 mb-1">
|
<h1 class="h3 mb-1">
|
||||||
<i class="bi bi-journal-plus"></i> Eval-Case erstellen
|
<i class="bi bi-journal-plus"></i> Eval-Cases verwalten
|
||||||
</h1>
|
</h1>
|
||||||
<div class="small text-secondary">
|
<div class="small text-secondary">
|
||||||
Neue Regression-Cases separat anlegen, ohne die Eval-Suite-Übersicht aufzublähen.
|
Neue Regression-Cases separat anlegen oder bestehende Cases entfernen, ohne die Eval-Suite-Übersicht aufzublähen.
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -203,6 +203,59 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="col-xl-4">
|
<div class="col-xl-4">
|
||||||
|
<div class="card bg-black border-danger text-light shadow-sm mb-4">
|
||||||
|
<div class="card-body">
|
||||||
|
<h5 class="text-danger mb-3">
|
||||||
|
<i class="bi bi-trash3"></i> Bestehende Eval-Cases entfernen
|
||||||
|
</h5>
|
||||||
|
<p class="small text-secondary mb-3">
|
||||||
|
Hier kannst du falsch angelegte oder nicht mehr benötigte Cases aus den
|
||||||
|
<code>tests/evals/cases/*.ndjson</code>-Dateien entfernen. Das Löschen betrifft nur den Eval-Case,
|
||||||
|
nicht das RAG-Wissen, nicht den Shop und nicht die bestehenden Reports.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{% for type, label in types %}
|
||||||
|
{% set cases = cases_by_type[type]|default([]) %}
|
||||||
|
<details class="border border-secondary rounded p-3 mb-3" {% if type == case_draft.type|default('retrieval') %}open{% endif %}>
|
||||||
|
<summary class="text-info" style="cursor:pointer;">
|
||||||
|
{{ label }} <span class="text-secondary">({{ cases|length }} Cases)</span>
|
||||||
|
</summary>
|
||||||
|
|
||||||
|
{% if cases is empty %}
|
||||||
|
<div class="small text-secondary mt-3">
|
||||||
|
Für diesen Typ gibt es aktuell keine Cases.
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<div class="mt-3">
|
||||||
|
{% for case in cases %}
|
||||||
|
<div class="border-top border-secondary pt-3 mt-3">
|
||||||
|
<div class="small mb-2">
|
||||||
|
<code>{{ case.id }}</code>
|
||||||
|
<div class="text-secondary mt-1">{{ case.prompt }}</div>
|
||||||
|
</div>
|
||||||
|
<form method="post"
|
||||||
|
action="{{ path('admin_evals_case_delete') }}"
|
||||||
|
onsubmit="return confirm('Eval-Case {{ case.id }} wirklich löschen? Diese Änderung entfernt die NDJSON-Zeile dauerhaft.');">
|
||||||
|
<input type="hidden" name="_token" value="{{ csrf_token('admin_eval_case_delete_' ~ type ~ '_' ~ case.id) }}">
|
||||||
|
<input type="hidden" name="type" value="{{ type }}">
|
||||||
|
<input type="hidden" name="case_id" value="{{ case.id }}">
|
||||||
|
<button type="submit" class="btn btn-sm btn-outline-danger">
|
||||||
|
<i class="bi bi-trash3"></i> Case löschen
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</details>
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
<div class="small text-secondary">
|
||||||
|
Nach dem Löschen solltest du den betroffenen Eval-Typ einmal ausführen, damit der Report zum neuen Case-Bestand passt.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="card bg-black border-secondary text-light shadow-sm mb-4">
|
<div class="card bg-black border-secondary text-light shadow-sm mb-4">
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
<h5 class="text-info mb-3">
|
<h5 class="text-info mb-3">
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
<div class="d-flex flex-wrap gap-2">
|
<div class="d-flex flex-wrap gap-2">
|
||||||
<a href="{{ path('admin_evals_case_new', {type: selected_type|default('retrieval')}) }}"
|
<a href="{{ path('admin_evals_case_new', {type: selected_type|default('retrieval')}) }}"
|
||||||
class="btn btn-sm btn-outline-warning">
|
class="btn btn-sm btn-outline-warning">
|
||||||
<i class="bi bi-journal-plus"></i> Eval-Case erstellen
|
<i class="bi bi-journal-plus"></i> Eval-Cases verwalten
|
||||||
</a>
|
</a>
|
||||||
<a href="{{ path('admin_model_config_list') }}"
|
<a href="{{ path('admin_model_config_list') }}"
|
||||||
class="btn btn-sm btn-outline-secondary">
|
class="btn btn-sm btn-outline-secondary">
|
||||||
|
|||||||
@@ -16,4 +16,4 @@
|
|||||||
{"id":"retrieval_negative_003","type":"retrieval","prompt":"testomat 2000 self clean reinigungsloesung","assert":{"min_results":1,"must_include_one_of_document_ids":["51589532-a1a1-46e0-94b2-a139dce78543","b8c3343b-931e-4994-9d53-a2130efc846f"],"must_include_any_terms":["reinigungslösung","self clean"],"must_not_include_document_ids":["26129c01-c09f-4c71-9c80-7ddffb6c77fb"]}}
|
{"id":"retrieval_negative_003","type":"retrieval","prompt":"testomat 2000 self clean reinigungsloesung","assert":{"min_results":1,"must_include_one_of_document_ids":["51589532-a1a1-46e0-94b2-a139dce78543","b8c3343b-931e-4994-9d53-a2130efc846f"],"must_include_any_terms":["reinigungslösung","self clean"],"must_not_include_document_ids":["26129c01-c09f-4c71-9c80-7ddffb6c77fb"]}}
|
||||||
{"id":"retrieval_short_001","type":"retrieval","prompt":"evo th","assert":{"min_results":1,"must_include_one_of_document_ids":["eb91c1be-4546-4ed5-8b01-f075519d675b","74fdad85-5e4e-4f08-8d95-402f3180ed55"],"must_include_any_terms":["evo"]}}
|
{"id":"retrieval_short_001","type":"retrieval","prompt":"evo th","assert":{"min_results":1,"must_include_one_of_document_ids":["eb91c1be-4546-4ed5-8b01-f075519d675b","74fdad85-5e4e-4f08-8d95-402f3180ed55"],"must_include_any_terms":["evo"]}}
|
||||||
{"id":"retrieval_short_002","type":"retrieval","prompt":"808","assert":{"min_results":1,"must_include_one_of_document_ids":["26129c01-c09f-4c71-9c80-7ddffb6c77fb"],"must_include_any_terms":["808"]}}
|
{"id":"retrieval_short_002","type":"retrieval","prompt":"808","assert":{"min_results":1,"must_include_one_of_document_ids":["26129c01-c09f-4c71-9c80-7ddffb6c77fb"],"must_include_any_terms":["808"]}}
|
||||||
{"id":"retrieval_noise_001","type":"retrieval","prompt":"dsgfsdgfsdgf","assert":{"max_results":0}}
|
{"id":"retrieval_notfound_doc","type":"retrieval","prompt":"hdfghdfghdfhg","assert":{"min_results":0}}
|
||||||
|
|||||||
Reference in New Issue
Block a user