Files
MtoRagSystem/src/Eval/RetrievalDebugRunner.php
2026-04-22 22:22:27 +02:00

434 lines
12 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Eval;
use App\Eval\Dto\EvalCase;
use App\Eval\Dto\EvalResult;
use App\Knowledge\Retrieval\NdjsonHybridRetriever;
final readonly class RetrievalDebugRunner
{
public function __construct(
private NdjsonHybridRetriever $retriever,
) {
}
public function run(EvalCase $case): EvalResult
{
$start = microtime(true);
$failures = [];
$rows = $this->retriever->retrieveDebug($case->prompt);
$durationMs = round((microtime(true) - $start) * 1000, 2);
$resultCount = count($rows);
$first = $rows[0] ?? [];
$selectionMode = $this->extractString($first, 'selection_mode');
$route = $this->extractString($first, 'route');
$intent = $this->extractString($first, 'intent');
$documentIds = $this->extractUniqueStringValues($rows, 'document_id');
$chunkIds = $this->extractUniqueStringValues($rows, 'chunk_id');
$joinedText = $this->extractJoinedText($rows);
$assert = $case->assert;
// ---------------------------------------------------------
// Strict single-value assertions
// ---------------------------------------------------------
if (isset($assert['selection_mode']) && (string) $assert['selection_mode'] !== $selectionMode) {
$failures[] = sprintf(
'selection_mode mismatch: expected "%s", got "%s".',
(string) $assert['selection_mode'],
$selectionMode
);
}
if (isset($assert['route']) && (string) $assert['route'] !== $route) {
$failures[] = sprintf(
'route mismatch: expected "%s", got "%s".',
(string) $assert['route'],
$route
);
}
if (isset($assert['intent']) && (string) $assert['intent'] !== $intent) {
$failures[] = sprintf(
'intent mismatch: expected "%s", got "%s".',
(string) $assert['intent'],
$intent
);
}
// ---------------------------------------------------------
// Flexible multi-value assertions
// ---------------------------------------------------------
$this->assertValueInList(
failures: $failures,
actual: $selectionMode,
expectedList: $assert['selection_mode_in'] ?? [],
label: 'selection_mode'
);
$this->assertValueInList(
failures: $failures,
actual: $route,
expectedList: $assert['route_in'] ?? [],
label: 'route'
);
$this->assertValueInList(
failures: $failures,
actual: $intent,
expectedList: $assert['intent_in'] ?? [],
label: 'intent'
);
// ---------------------------------------------------------
// Result count assertions
// ---------------------------------------------------------
if (isset($assert['min_results']) && $resultCount < (int) $assert['min_results']) {
$failures[] = sprintf(
'result_count too low: expected >= %d, got %d.',
(int) $assert['min_results'],
$resultCount
);
}
if (isset($assert['max_results']) && $resultCount > (int) $assert['max_results']) {
$failures[] = sprintf(
'result_count too high: expected <= %d, got %d.',
(int) $assert['max_results'],
$resultCount
);
}
// ---------------------------------------------------------
// ID assertions
// ---------------------------------------------------------
foreach ($this->normalizeStringList($assert['must_include_document_ids'] ?? []) as $expectedDocumentId) {
if (!in_array($expectedDocumentId, $documentIds, true)) {
$failures[] = sprintf(
'missing expected document_id "%s".',
$expectedDocumentId
);
}
}
foreach ($this->normalizeStringList($assert['must_include_chunk_ids'] ?? []) as $expectedChunkId) {
if (!in_array($expectedChunkId, $chunkIds, true)) {
$failures[] = sprintf(
'missing expected chunk_id "%s".',
$expectedChunkId
);
}
}
$this->assertContainsAtLeastOne(
failures: $failures,
actualValues: $documentIds,
expectedList: $assert['must_include_one_of_document_ids'] ?? [],
label: 'document_id'
);
$this->assertContainsAtLeastOne(
failures: $failures,
actualValues: $chunkIds,
expectedList: $assert['must_include_one_of_chunk_ids'] ?? [],
label: 'chunk_id'
);
$this->assertContainsNone(
failures: $failures,
actualValues: $documentIds,
forbiddenList: $assert['must_not_include_document_ids'] ?? [],
label: 'document_id'
);
$this->assertContainsNone(
failures: $failures,
actualValues: $chunkIds,
forbiddenList: $assert['must_not_include_chunk_ids'] ?? [],
label: 'chunk_id'
);
// ---------------------------------------------------------
// Text / term assertions
// ---------------------------------------------------------
$matchedAnyTerms = $this->findMatchingTerms(
haystack: $joinedText,
terms: $this->normalizeStringList($assert['must_include_any_terms'] ?? [])
);
$matchedAllTerms = $this->findMatchingTerms(
haystack: $joinedText,
terms: $this->normalizeStringList($assert['must_include_all_terms'] ?? [])
);
$requiredAnyTerms = $this->normalizeStringList($assert['must_include_any_terms'] ?? []);
if ($requiredAnyTerms !== [] && $matchedAnyTerms === []) {
$failures[] = sprintf(
'none of the required any-terms were found in the retrieval text: [%s].',
implode(', ', $requiredAnyTerms)
);
}
$requiredAllTerms = $this->normalizeStringList($assert['must_include_all_terms'] ?? []);
foreach ($requiredAllTerms as $requiredTerm) {
if (!$this->containsTerm($joinedText, $requiredTerm)) {
$failures[] = sprintf(
'required all-term "%s" was not found in the retrieval text.',
$requiredTerm
);
}
}
return new EvalResult(
caseId: $case->id,
type: $case->type,
passed: $failures === [],
durationMs: $durationMs,
failures: $failures,
details: [
'prompt' => $case->prompt,
'result_count' => $resultCount,
'selection_mode' => $selectionMode,
'route' => $route,
'intent' => $intent,
'document_ids' => $documentIds,
'chunk_ids' => $chunkIds,
'matched_any_terms' => $matchedAnyTerms,
'matched_all_terms' => $matchedAllTerms,
],
);
}
/**
* @param array<string, mixed> $row
*/
private function extractString(array $row, string $key): string
{
$value = $row[$key] ?? null;
if (!is_string($value)) {
return '';
}
return trim($value);
}
/**
* @param array<int, array<string, mixed>> $rows
* @return array<int, string>
*/
private function extractUniqueStringValues(array $rows, string $key): array
{
$values = [];
foreach ($rows as $row) {
$value = $row[$key] ?? null;
if (!is_string($value)) {
continue;
}
$value = trim($value);
if ($value === '') {
continue;
}
$values[$value] = true;
}
return array_keys($values);
}
/**
* @param array<int, array<string, mixed>> $rows
*/
private function extractJoinedText(array $rows): string
{
$parts = [];
foreach ($rows as $row) {
$text = $row['text'] ?? null;
if (!is_string($text)) {
continue;
}
$text = trim($text);
if ($text === '') {
continue;
}
$parts[] = $text;
}
return implode("\n\n", $parts);
}
/**
* @param array<int, string> $failures
* @param mixed $expectedList
*/
private function assertValueInList(
array &$failures,
string $actual,
mixed $expectedList,
string $label
): void {
$expected = $this->normalizeStringList($expectedList);
if ($expected === []) {
return;
}
if (!in_array($actual, $expected, true)) {
$failures[] = sprintf(
'%s mismatch: expected one of [%s], got "%s".',
$label,
implode(', ', $expected),
$actual
);
}
}
/**
* @param array<int, string> $failures
* @param array<int, string> $actualValues
* @param mixed $expectedList
*/
private function assertContainsAtLeastOne(
array &$failures,
array $actualValues,
mixed $expectedList,
string $label
): void {
$expected = $this->normalizeStringList($expectedList);
if ($expected === []) {
return;
}
foreach ($expected as $candidate) {
if (in_array($candidate, $actualValues, true)) {
return;
}
}
$failures[] = sprintf(
'none of the expected %s values were found. Expected one of [%s], got [%s].',
$label,
implode(', ', $expected),
implode(', ', $actualValues)
);
}
/**
* @param array<int, string> $failures
* @param array<int, string> $actualValues
* @param mixed $forbiddenList
*/
private function assertContainsNone(
array &$failures,
array $actualValues,
mixed $forbiddenList,
string $label
): void {
$forbidden = $this->normalizeStringList($forbiddenList);
if ($forbidden === []) {
return;
}
foreach ($forbidden as $forbiddenValue) {
if (in_array($forbiddenValue, $actualValues, true)) {
$failures[] = sprintf(
'forbidden %s "%s" was present in the retrieval results.',
$label,
$forbiddenValue
);
}
}
}
/**
* @param array<int, string> $terms
* @return array<int, string>
*/
private function findMatchingTerms(string $haystack, array $terms): array
{
$matches = [];
foreach ($terms as $term) {
if ($this->containsTerm($haystack, $term)) {
$matches[] = $term;
}
}
return array_values(array_unique($matches));
}
private function containsTerm(string $haystack, string $term): bool
{
$haystack = $this->normalizeText($haystack);
$term = $this->normalizeText($term);
if ($term === '') {
return false;
}
return str_contains($haystack, $term);
}
private function normalizeText(string $value): string
{
$value = trim($value);
if ($value === '') {
return '';
}
if (function_exists('mb_strtolower')) {
return mb_strtolower($value);
}
return strtolower($value);
}
/**
* @param mixed $value
* @return array<int, string>
*/
private function normalizeStringList(mixed $value): array
{
if (!is_array($value)) {
return [];
}
$out = [];
foreach ($value as $item) {
if (!is_string($item)) {
continue;
}
$item = trim($item);
if ($item === '') {
continue;
}
$out[] = $item;
}
return array_values(array_unique($out));
}
}