retriever->retrieveDebug($case->prompt); $durationMs = round((microtime(true) - $start) * 1000, 2); $resultCount = count($rows); $first = $rows[0] ?? []; $selectionMode = $this->extractString($first, 'selection_mode'); $route = $this->extractString($first, 'route'); $intent = $this->extractString($first, 'intent'); $documentIds = $this->extractUniqueStringValues($rows, 'document_id'); $chunkIds = $this->extractUniqueStringValues($rows, 'chunk_id'); $joinedText = $this->extractJoinedText($rows); $assert = $case->assert; // --------------------------------------------------------- // Strict single-value assertions // --------------------------------------------------------- if (isset($assert['selection_mode']) && (string) $assert['selection_mode'] !== $selectionMode) { $failures[] = sprintf( 'selection_mode mismatch: expected "%s", got "%s".', (string) $assert['selection_mode'], $selectionMode ); } if (isset($assert['route']) && (string) $assert['route'] !== $route) { $failures[] = sprintf( 'route mismatch: expected "%s", got "%s".', (string) $assert['route'], $route ); } if (isset($assert['intent']) && (string) $assert['intent'] !== $intent) { $failures[] = sprintf( 'intent mismatch: expected "%s", got "%s".', (string) $assert['intent'], $intent ); } // --------------------------------------------------------- // Flexible multi-value assertions // --------------------------------------------------------- $this->assertValueInList( failures: $failures, actual: $selectionMode, expectedList: $assert['selection_mode_in'] ?? [], label: 'selection_mode' ); $this->assertValueInList( failures: $failures, actual: $route, expectedList: $assert['route_in'] ?? [], label: 'route' ); $this->assertValueInList( failures: $failures, actual: $intent, expectedList: $assert['intent_in'] ?? [], label: 'intent' ); // --------------------------------------------------------- // Result count assertions // --------------------------------------------------------- if (isset($assert['min_results']) && $resultCount < (int) $assert['min_results']) { $failures[] = sprintf( 'result_count too low: expected >= %d, got %d.', (int) $assert['min_results'], $resultCount ); } if (isset($assert['max_results']) && $resultCount > (int) $assert['max_results']) { $failures[] = sprintf( 'result_count too high: expected <= %d, got %d.', (int) $assert['max_results'], $resultCount ); } // --------------------------------------------------------- // ID assertions // --------------------------------------------------------- foreach ($this->normalizeStringList($assert['must_include_document_ids'] ?? []) as $expectedDocumentId) { if (!in_array($expectedDocumentId, $documentIds, true)) { $failures[] = sprintf( 'missing expected document_id "%s".', $expectedDocumentId ); } } foreach ($this->normalizeStringList($assert['must_include_chunk_ids'] ?? []) as $expectedChunkId) { if (!in_array($expectedChunkId, $chunkIds, true)) { $failures[] = sprintf( 'missing expected chunk_id "%s".', $expectedChunkId ); } } $this->assertContainsAtLeastOne( failures: $failures, actualValues: $documentIds, expectedList: $assert['must_include_one_of_document_ids'] ?? [], label: 'document_id' ); $this->assertContainsAtLeastOne( failures: $failures, actualValues: $chunkIds, expectedList: $assert['must_include_one_of_chunk_ids'] ?? [], label: 'chunk_id' ); $this->assertContainsNone( failures: $failures, actualValues: $documentIds, forbiddenList: $assert['must_not_include_document_ids'] ?? [], label: 'document_id' ); $this->assertContainsNone( failures: $failures, actualValues: $chunkIds, forbiddenList: $assert['must_not_include_chunk_ids'] ?? [], label: 'chunk_id' ); // --------------------------------------------------------- // Text / term assertions // --------------------------------------------------------- $matchedAnyTerms = $this->findMatchingTerms( haystack: $joinedText, terms: $this->normalizeStringList($assert['must_include_any_terms'] ?? []) ); $matchedAllTerms = $this->findMatchingTerms( haystack: $joinedText, terms: $this->normalizeStringList($assert['must_include_all_terms'] ?? []) ); $requiredAnyTerms = $this->normalizeStringList($assert['must_include_any_terms'] ?? []); if ($requiredAnyTerms !== [] && $matchedAnyTerms === []) { $failures[] = sprintf( 'none of the required any-terms were found in the retrieval text: [%s].', implode(', ', $requiredAnyTerms) ); } $requiredAllTerms = $this->normalizeStringList($assert['must_include_all_terms'] ?? []); foreach ($requiredAllTerms as $requiredTerm) { if (!$this->containsTerm($joinedText, $requiredTerm)) { $failures[] = sprintf( 'required all-term "%s" was not found in the retrieval text.', $requiredTerm ); } } return new EvalResult( caseId: $case->id, type: $case->type, passed: $failures === [], durationMs: $durationMs, failures: $failures, details: [ 'prompt' => $case->prompt, 'result_count' => $resultCount, 'selection_mode' => $selectionMode, 'route' => $route, 'intent' => $intent, 'document_ids' => $documentIds, 'chunk_ids' => $chunkIds, 'matched_any_terms' => $matchedAnyTerms, 'matched_all_terms' => $matchedAllTerms, ], ); } /** * @param array $row */ private function extractString(array $row, string $key): string { $value = $row[$key] ?? null; if (!is_string($value)) { return ''; } return trim($value); } /** * @param array> $rows * @return array */ private function extractUniqueStringValues(array $rows, string $key): array { $values = []; foreach ($rows as $row) { $value = $row[$key] ?? null; if (!is_string($value)) { continue; } $value = trim($value); if ($value === '') { continue; } $values[$value] = true; } return array_keys($values); } /** * @param array> $rows */ private function extractJoinedText(array $rows): string { $parts = []; foreach ($rows as $row) { $text = $row['text'] ?? null; if (!is_string($text)) { continue; } $text = trim($text); if ($text === '') { continue; } $parts[] = $text; } return implode("\n\n", $parts); } /** * @param array $failures * @param mixed $expectedList */ private function assertValueInList( array &$failures, string $actual, mixed $expectedList, string $label ): void { $expected = $this->normalizeStringList($expectedList); if ($expected === []) { return; } if (!in_array($actual, $expected, true)) { $failures[] = sprintf( '%s mismatch: expected one of [%s], got "%s".', $label, implode(', ', $expected), $actual ); } } /** * @param array $failures * @param array $actualValues * @param mixed $expectedList */ private function assertContainsAtLeastOne( array &$failures, array $actualValues, mixed $expectedList, string $label ): void { $expected = $this->normalizeStringList($expectedList); if ($expected === []) { return; } foreach ($expected as $candidate) { if (in_array($candidate, $actualValues, true)) { return; } } $failures[] = sprintf( 'none of the expected %s values were found. Expected one of [%s], got [%s].', $label, implode(', ', $expected), implode(', ', $actualValues) ); } /** * @param array $failures * @param array $actualValues * @param mixed $forbiddenList */ private function assertContainsNone( array &$failures, array $actualValues, mixed $forbiddenList, string $label ): void { $forbidden = $this->normalizeStringList($forbiddenList); if ($forbidden === []) { return; } foreach ($forbidden as $forbiddenValue) { if (in_array($forbiddenValue, $actualValues, true)) { $failures[] = sprintf( 'forbidden %s "%s" was present in the retrieval results.', $label, $forbiddenValue ); } } } /** * @param array $terms * @return array */ private function findMatchingTerms(string $haystack, array $terms): array { $matches = []; foreach ($terms as $term) { if ($this->containsTerm($haystack, $term)) { $matches[] = $term; } } return array_values(array_unique($matches)); } private function containsTerm(string $haystack, string $term): bool { $haystack = $this->normalizeText($haystack); $term = $this->normalizeText($term); if ($term === '') { return false; } return str_contains($haystack, $term); } private function normalizeText(string $value): string { $value = trim($value); if ($value === '') { return ''; } if (function_exists('mb_strtolower')) { return mb_strtolower($value); } return strtolower($value); } /** * @param mixed $value * @return array */ private function normalizeStringList(mixed $value): array { if (!is_array($value)) { return []; } $out = []; foreach ($value as $item) { if (!is_string($item)) { continue; } $item = trim($item); if ($item === '') { continue; } $out[] = $item; } return array_values(array_unique($out)); } }