first test suite retrieval
This commit is contained in:
184
src/Eval/RetrievalDebugRunner.php
Normal file
184
src/Eval/RetrievalDebugRunner.php
Normal file
@@ -0,0 +1,184 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Eval;
|
||||
|
||||
use App\Eval\Dto\EvalCase;
|
||||
use App\Eval\Dto\EvalResult;
|
||||
use App\Knowledge\Retrieval\NdjsonHybridRetriever;
|
||||
|
||||
final readonly class RetrievalDebugRunner
|
||||
{
|
||||
public function __construct(
|
||||
private NdjsonHybridRetriever $retriever,
|
||||
) {
|
||||
}
|
||||
|
||||
public function run(EvalCase $case): EvalResult
|
||||
{
|
||||
$start = microtime(true);
|
||||
$failures = [];
|
||||
|
||||
$rows = $this->retriever->retrieveDebug($case->prompt);
|
||||
|
||||
$durationMs = round((microtime(true) - $start) * 1000, 2);
|
||||
|
||||
$resultCount = count($rows);
|
||||
$first = $rows[0] ?? [];
|
||||
|
||||
$selectionMode = $this->extractString($first, 'selection_mode');
|
||||
$route = $this->extractString($first, 'route');
|
||||
$intent = $this->extractString($first, 'intent');
|
||||
|
||||
$documentIds = $this->extractUniqueStringValues($rows, 'document_id');
|
||||
$chunkIds = $this->extractUniqueStringValues($rows, 'chunk_id');
|
||||
|
||||
$assert = $case->assert;
|
||||
|
||||
if (isset($assert['selection_mode']) && (string) $assert['selection_mode'] !== $selectionMode) {
|
||||
$failures[] = sprintf(
|
||||
'selection_mode mismatch: expected "%s", got "%s".',
|
||||
(string) $assert['selection_mode'],
|
||||
$selectionMode
|
||||
);
|
||||
}
|
||||
|
||||
if (isset($assert['route']) && (string) $assert['route'] !== $route) {
|
||||
$failures[] = sprintf(
|
||||
'route mismatch: expected "%s", got "%s".',
|
||||
(string) $assert['route'],
|
||||
$route
|
||||
);
|
||||
}
|
||||
|
||||
if (isset($assert['intent']) && (string) $assert['intent'] !== $intent) {
|
||||
$failures[] = sprintf(
|
||||
'intent mismatch: expected "%s", got "%s".',
|
||||
(string) $assert['intent'],
|
||||
$intent
|
||||
);
|
||||
}
|
||||
|
||||
if (isset($assert['min_results']) && $resultCount < (int) $assert['min_results']) {
|
||||
$failures[] = sprintf(
|
||||
'result_count too low: expected >= %d, got %d.',
|
||||
(int) $assert['min_results'],
|
||||
$resultCount
|
||||
);
|
||||
}
|
||||
|
||||
if (isset($assert['max_results']) && $resultCount > (int) $assert['max_results']) {
|
||||
$failures[] = sprintf(
|
||||
'result_count too high: expected <= %d, got %d.',
|
||||
(int) $assert['max_results'],
|
||||
$resultCount
|
||||
);
|
||||
}
|
||||
|
||||
foreach ($this->normalizeStringList($assert['must_include_document_ids'] ?? []) as $expectedDocumentId) {
|
||||
if (!in_array($expectedDocumentId, $documentIds, true)) {
|
||||
$failures[] = sprintf(
|
||||
'missing expected document_id "%s".',
|
||||
$expectedDocumentId
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($this->normalizeStringList($assert['must_include_chunk_ids'] ?? []) as $expectedChunkId) {
|
||||
if (!in_array($expectedChunkId, $chunkIds, true)) {
|
||||
$failures[] = sprintf(
|
||||
'missing expected chunk_id "%s".',
|
||||
$expectedChunkId
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return new EvalResult(
|
||||
caseId: $case->id,
|
||||
type: $case->type,
|
||||
passed: $failures === [],
|
||||
durationMs: $durationMs,
|
||||
failures: $failures,
|
||||
details: [
|
||||
'prompt' => $case->prompt,
|
||||
'result_count' => $resultCount,
|
||||
'selection_mode' => $selectionMode,
|
||||
'route' => $route,
|
||||
'intent' => $intent,
|
||||
'document_ids' => $documentIds,
|
||||
'chunk_ids' => $chunkIds,
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $row
|
||||
*/
|
||||
private function extractString(array $row, string $key): string
|
||||
{
|
||||
$value = $row[$key] ?? null;
|
||||
|
||||
if (!is_string($value)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
return trim($value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array<string, mixed>> $rows
|
||||
* @return array<int, string>
|
||||
*/
|
||||
private function extractUniqueStringValues(array $rows, string $key): array
|
||||
{
|
||||
$values = [];
|
||||
|
||||
foreach ($rows as $row) {
|
||||
$value = $row[$key] ?? null;
|
||||
|
||||
if (!is_string($value)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$value = trim($value);
|
||||
|
||||
if ($value === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$values[$value] = true;
|
||||
}
|
||||
|
||||
return array_keys($values);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
* @return array<int, string>
|
||||
*/
|
||||
private function normalizeStringList(mixed $value): array
|
||||
{
|
||||
if (!is_array($value)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$out = [];
|
||||
|
||||
foreach ($value as $item) {
|
||||
if (!is_string($item)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$item = trim($item);
|
||||
|
||||
if ($item === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$out[] = $item;
|
||||
}
|
||||
|
||||
return array_values(array_unique($out));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user