diff --git a/src/Command/AgentEvalRunCommand.php b/src/Command/AgentEvalRunCommand.php
new file mode 100644
index 0000000..d34a67b
--- /dev/null
+++ b/src/Command/AgentEvalRunCommand.php
@@ -0,0 +1,179 @@
+addArgument(
+ 'type',
+ InputArgument::OPTIONAL,
+ 'Eval type to run',
+ 'retrieval'
+ )
+ ->addOption(
+ 'case',
+ null,
+ InputOption::VALUE_OPTIONAL,
+ 'Run only a single case by id'
+ )
+ ->addOption(
+ 'json',
+ null,
+ InputOption::VALUE_NONE,
+ 'Print the full report as JSON'
+ )
+ ->addOption(
+ 'no-write',
+ null,
+ InputOption::VALUE_NONE,
+ 'Do not write the report file'
+ );
+ }
+
+ protected function execute(InputInterface $input, OutputInterface $output): int
+ {
+ $io = new SymfonyStyle($input, $output);
+
+ $type = trim((string) $input->getArgument('type'));
+ $caseId = trim((string) $input->getOption('case'));
+ $asJson = (bool) $input->getOption('json');
+ $noWrite = (bool) $input->getOption('no-write');
+
+ try {
+ $cases = $this->loader->load($type);
+ } catch (\Throwable $e) {
+ $io->error($e->getMessage());
+
+ return Command::FAILURE;
+ }
+
+ if ($caseId !== '') {
+ $cases = array_values(array_filter(
+ $cases,
+ static fn (EvalCase $case): bool => $case->id === $caseId
+ ));
+ }
+
+ if ($cases === []) {
+ $io->warning('No eval cases selected.');
+
+ return Command::SUCCESS;
+ }
+
+ try {
+ $results = $this->runner->runAll($cases);
+ } catch (\Throwable $e) {
+ $io->error($e->getMessage());
+
+ return Command::FAILURE;
+ }
+
+ $passed = count(array_filter(
+ $results,
+ static fn (EvalResult $result): bool => $result->passed
+ ));
+ $failed = count($results) - $passed;
+
+ $report = [
+ 'type' => $type,
+ 'case_filter' => $caseId !== '' ? $caseId : null,
+ 'total' => count($results),
+ 'passed' => $passed,
+ 'failed' => $failed,
+ 'generated_at' => (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM),
+ 'results' => array_map(
+ static fn (EvalResult $result): array => $result->toArray(),
+ $results
+ ),
+ ];
+
+ $writtenPath = null;
+
+ if (!$noWrite) {
+ try {
+ $writtenPath = $this->reportWriter->write($report);
+ } catch (\Throwable $e) {
+ $io->error($e->getMessage());
+
+ return Command::FAILURE;
+ }
+ }
+
+ if ($asJson) {
+ $jsonReport = $report;
+
+ if ($writtenPath !== null) {
+ $jsonReport['written_to'] = $writtenPath;
+ }
+
+ $json = json_encode(
+ $jsonReport,
+ JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
+ );
+
+ if (!is_string($json)) {
+ $io->error('json_encode failed.');
+
+ return Command::FAILURE;
+ }
+
+ $output->writeln($json);
+
+ return $failed > 0 ? Command::FAILURE : Command::SUCCESS;
+ }
+
+ $io->title('RetrieX Eval Run');
+ $io->definitionList(
+ ['type' => $type],
+ ['total' => (string) count($results)],
+ ['passed' => (string) $passed],
+ ['failed' => (string) $failed],
+ ['report_file' => $writtenPath ?? 'disabled (--no-write)']
+ );
+
+ foreach ($results as $result) {
+ if ($result->passed) {
+ $io->writeln(sprintf('PASS %s', $result->caseId));
+ continue;
+ }
+
+ $io->writeln(sprintf('FAIL %s', $result->caseId));
+
+ foreach ($result->failures as $failure) {
+ $io->writeln(' - ' . $failure);
+ }
+ }
+
+ return $failed > 0 ? Command::FAILURE : Command::SUCCESS;
+ }
+}
\ No newline at end of file
diff --git a/src/Config/NdjsonHybridRetrieverConfig.php b/src/Config/NdjsonHybridRetrieverConfig.php
index ab83943..80129c5 100644
--- a/src/Config/NdjsonHybridRetrieverConfig.php
+++ b/src/Config/NdjsonHybridRetrieverConfig.php
@@ -34,7 +34,7 @@ final class NdjsonHybridRetrieverConfig
* - the system now has more safeguards:
* lexical cross-signals, scoped retrieval, title/meta boost, selection rules
*/
- public const VECTOR_SCORE_THRESHOLD = 0.83;
+ public const VECTOR_SCORE_THRESHOLD = 0.82;
/**
* Lower safety boundary for dynamic threshold adjustments.
@@ -79,7 +79,7 @@ final class NdjsonHybridRetrieverConfig
* - slightly larger safety net for the richer hybrid stack
* - helps no-tag and low-signal cases without exploding context
*/
- public const EMPTY_RRF_FALLBACK_TOPN = 5;
+ public const EMPTY_RRF_FALLBACK_TOPN = 1;
/**
* Maximum number of chunks allowed from one document in spread mode.
diff --git a/src/Eval/AgentEvalRunner.php b/src/Eval/AgentEvalRunner.php
new file mode 100644
index 0000000..1358357
--- /dev/null
+++ b/src/Eval/AgentEvalRunner.php
@@ -0,0 +1,43 @@
+isRetrievalCase()) {
+ return $this->retrievalDebugRunner->run($case);
+ }
+
+ throw new \InvalidArgumentException(sprintf(
+ 'Unsupported eval case type: %s',
+ $case->type
+ ));
+ }
+
+ /**
+ * @param array $cases
+ * @return array
+ */
+ public function runAll(array $cases): array
+ {
+ $results = [];
+
+ foreach ($cases as $case) {
+ $results[] = $this->run($case);
+ }
+
+ return $results;
+ }
+}
\ No newline at end of file
diff --git a/src/Eval/Dto/EvalCase.php b/src/Eval/Dto/EvalCase.php
new file mode 100644
index 0000000..e5ce645
--- /dev/null
+++ b/src/Eval/Dto/EvalCase.php
@@ -0,0 +1,60 @@
+ $assert
+ */
+ public function __construct(
+ public string $id,
+ public string $type,
+ public string $prompt,
+ public array $assert = [],
+ ) {
+ }
+
+ /**
+ * @param array $row
+ */
+ public static function fromArray(array $row): self
+ {
+ $id = trim((string) ($row['id'] ?? ''));
+ $type = trim((string) ($row['type'] ?? ''));
+ $prompt = trim((string) ($row['prompt'] ?? ''));
+ $assert = is_array($row['assert'] ?? null) ? $row['assert'] : [];
+
+ if ($id === '') {
+ throw new \InvalidArgumentException('Eval case id must not be empty.');
+ }
+
+ if ($type === '') {
+ throw new \InvalidArgumentException(sprintf(
+ 'Eval case "%s" has an empty type.',
+ $id
+ ));
+ }
+
+ if ($prompt === '') {
+ throw new \InvalidArgumentException(sprintf(
+ 'Eval case "%s" has an empty prompt.',
+ $id
+ ));
+ }
+
+ return new self(
+ id: $id,
+ type: $type,
+ prompt: $prompt,
+ assert: $assert,
+ );
+ }
+
+ public function isRetrievalCase(): bool
+ {
+ return $this->type === 'retrieval';
+ }
+}
\ No newline at end of file
diff --git a/src/Eval/Dto/EvalResult.php b/src/Eval/Dto/EvalResult.php
new file mode 100644
index 0000000..7215941
--- /dev/null
+++ b/src/Eval/Dto/EvalResult.php
@@ -0,0 +1,37 @@
+ $failures
+ * @param array $details
+ */
+ public function __construct(
+ public string $caseId,
+ public string $type,
+ public bool $passed,
+ public float $durationMs,
+ public array $failures = [],
+ public array $details = [],
+ ) {
+ }
+
+ /**
+ * @return array
+ */
+ public function toArray(): array
+ {
+ return [
+ 'case_id' => $this->caseId,
+ 'type' => $this->type,
+ 'passed' => $this->passed,
+ 'duration_ms' => $this->durationMs,
+ 'failures' => $this->failures,
+ 'details' => $this->details,
+ ];
+ }
+}
\ No newline at end of file
diff --git a/src/Eval/EvalCaseLoader.php b/src/Eval/EvalCaseLoader.php
new file mode 100644
index 0000000..e71259b
--- /dev/null
+++ b/src/Eval/EvalCaseLoader.php
@@ -0,0 +1,67 @@
+
+ */
+ public function load(string $type = 'retrieval'): array
+ {
+ $path = sprintf(
+ '%s/tests/evals/cases/%s.ndjson',
+ $this->projectDir,
+ $type
+ );
+
+ if (!is_file($path)) {
+ throw new \RuntimeException(sprintf(
+ 'Eval case file not found: %s',
+ $path
+ ));
+ }
+
+ $lines = file($path, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+
+ if ($lines === false) {
+ throw new \RuntimeException(sprintf(
+ 'Failed to read eval case file: %s',
+ $path
+ ));
+ }
+
+ $cases = [];
+
+ foreach ($lines as $lineNumber => $line) {
+ $line = trim($line);
+
+ if ($line === '') {
+ continue;
+ }
+
+ $decoded = json_decode($line, true);
+
+ if (!is_array($decoded)) {
+ throw new \RuntimeException(sprintf(
+ 'Invalid JSON in %s on line %d.',
+ $path,
+ $lineNumber + 1
+ ));
+ }
+
+ $cases[] = EvalCase::fromArray($decoded);
+ }
+
+ return $cases;
+ }
+}
\ No newline at end of file
diff --git a/src/Eval/EvalReportWriter.php b/src/Eval/EvalReportWriter.php
new file mode 100644
index 0000000..f889847
--- /dev/null
+++ b/src/Eval/EvalReportWriter.php
@@ -0,0 +1,59 @@
+ $report
+ */
+ public function write(array $report, string $filename = 'last-run.json'): string
+ {
+ $directory = sprintf('%s/tests/evals/reports', $this->projectDir);
+
+ if (!is_dir($directory) && !mkdir($directory, 0775, true) && !is_dir($directory)) {
+ throw new \RuntimeException(sprintf(
+ 'Failed to create eval report directory: %s',
+ $directory
+ ));
+ }
+
+ $path = sprintf('%s/%s', $directory, ltrim($filename, '/'));
+
+ $json = json_encode(
+ $report,
+ JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
+ );
+
+ if (!is_string($json)) {
+ throw new \RuntimeException('json_encode failed for eval report.');
+ }
+
+ $tmpPath = $path . '.tmp';
+
+ if (file_put_contents($tmpPath, $json) === false) {
+ throw new \RuntimeException(sprintf(
+ 'Failed to write temporary eval report file: %s',
+ $tmpPath
+ ));
+ }
+
+ if (!rename($tmpPath, $path)) {
+ @unlink($tmpPath);
+
+ throw new \RuntimeException(sprintf(
+ 'Failed to move temporary eval report into place: %s',
+ $path
+ ));
+ }
+
+ return $path;
+ }
+}
\ No newline at end of file
diff --git a/src/Eval/RetrievalDebugRunner.php b/src/Eval/RetrievalDebugRunner.php
new file mode 100644
index 0000000..c07c28a
--- /dev/null
+++ b/src/Eval/RetrievalDebugRunner.php
@@ -0,0 +1,434 @@
+retriever->retrieveDebug($case->prompt);
+
+ $durationMs = round((microtime(true) - $start) * 1000, 2);
+
+ $resultCount = count($rows);
+ $first = $rows[0] ?? [];
+
+ $selectionMode = $this->extractString($first, 'selection_mode');
+ $route = $this->extractString($first, 'route');
+ $intent = $this->extractString($first, 'intent');
+
+ $documentIds = $this->extractUniqueStringValues($rows, 'document_id');
+ $chunkIds = $this->extractUniqueStringValues($rows, 'chunk_id');
+ $joinedText = $this->extractJoinedText($rows);
+
+ $assert = $case->assert;
+
+ // ---------------------------------------------------------
+ // Strict single-value assertions
+ // ---------------------------------------------------------
+ if (isset($assert['selection_mode']) && (string) $assert['selection_mode'] !== $selectionMode) {
+ $failures[] = sprintf(
+ 'selection_mode mismatch: expected "%s", got "%s".',
+ (string) $assert['selection_mode'],
+ $selectionMode
+ );
+ }
+
+ if (isset($assert['route']) && (string) $assert['route'] !== $route) {
+ $failures[] = sprintf(
+ 'route mismatch: expected "%s", got "%s".',
+ (string) $assert['route'],
+ $route
+ );
+ }
+
+ if (isset($assert['intent']) && (string) $assert['intent'] !== $intent) {
+ $failures[] = sprintf(
+ 'intent mismatch: expected "%s", got "%s".',
+ (string) $assert['intent'],
+ $intent
+ );
+ }
+
+ // ---------------------------------------------------------
+ // Flexible multi-value assertions
+ // ---------------------------------------------------------
+ $this->assertValueInList(
+ failures: $failures,
+ actual: $selectionMode,
+ expectedList: $assert['selection_mode_in'] ?? [],
+ label: 'selection_mode'
+ );
+
+ $this->assertValueInList(
+ failures: $failures,
+ actual: $route,
+ expectedList: $assert['route_in'] ?? [],
+ label: 'route'
+ );
+
+ $this->assertValueInList(
+ failures: $failures,
+ actual: $intent,
+ expectedList: $assert['intent_in'] ?? [],
+ label: 'intent'
+ );
+
+ // ---------------------------------------------------------
+ // Result count assertions
+ // ---------------------------------------------------------
+ if (isset($assert['min_results']) && $resultCount < (int) $assert['min_results']) {
+ $failures[] = sprintf(
+ 'result_count too low: expected >= %d, got %d.',
+ (int) $assert['min_results'],
+ $resultCount
+ );
+ }
+
+ if (isset($assert['max_results']) && $resultCount > (int) $assert['max_results']) {
+ $failures[] = sprintf(
+ 'result_count too high: expected <= %d, got %d.',
+ (int) $assert['max_results'],
+ $resultCount
+ );
+ }
+
+ // ---------------------------------------------------------
+ // ID assertions
+ // ---------------------------------------------------------
+ foreach ($this->normalizeStringList($assert['must_include_document_ids'] ?? []) as $expectedDocumentId) {
+ if (!in_array($expectedDocumentId, $documentIds, true)) {
+ $failures[] = sprintf(
+ 'missing expected document_id "%s".',
+ $expectedDocumentId
+ );
+ }
+ }
+
+ foreach ($this->normalizeStringList($assert['must_include_chunk_ids'] ?? []) as $expectedChunkId) {
+ if (!in_array($expectedChunkId, $chunkIds, true)) {
+ $failures[] = sprintf(
+ 'missing expected chunk_id "%s".',
+ $expectedChunkId
+ );
+ }
+ }
+
+ $this->assertContainsAtLeastOne(
+ failures: $failures,
+ actualValues: $documentIds,
+ expectedList: $assert['must_include_one_of_document_ids'] ?? [],
+ label: 'document_id'
+ );
+
+ $this->assertContainsAtLeastOne(
+ failures: $failures,
+ actualValues: $chunkIds,
+ expectedList: $assert['must_include_one_of_chunk_ids'] ?? [],
+ label: 'chunk_id'
+ );
+
+ $this->assertContainsNone(
+ failures: $failures,
+ actualValues: $documentIds,
+ forbiddenList: $assert['must_not_include_document_ids'] ?? [],
+ label: 'document_id'
+ );
+
+ $this->assertContainsNone(
+ failures: $failures,
+ actualValues: $chunkIds,
+ forbiddenList: $assert['must_not_include_chunk_ids'] ?? [],
+ label: 'chunk_id'
+ );
+
+ // ---------------------------------------------------------
+ // Text / term assertions
+ // ---------------------------------------------------------
+ $matchedAnyTerms = $this->findMatchingTerms(
+ haystack: $joinedText,
+ terms: $this->normalizeStringList($assert['must_include_any_terms'] ?? [])
+ );
+
+ $matchedAllTerms = $this->findMatchingTerms(
+ haystack: $joinedText,
+ terms: $this->normalizeStringList($assert['must_include_all_terms'] ?? [])
+ );
+
+ $requiredAnyTerms = $this->normalizeStringList($assert['must_include_any_terms'] ?? []);
+ if ($requiredAnyTerms !== [] && $matchedAnyTerms === []) {
+ $failures[] = sprintf(
+ 'none of the required any-terms were found in the retrieval text: [%s].',
+ implode(', ', $requiredAnyTerms)
+ );
+ }
+
+ $requiredAllTerms = $this->normalizeStringList($assert['must_include_all_terms'] ?? []);
+ foreach ($requiredAllTerms as $requiredTerm) {
+ if (!$this->containsTerm($joinedText, $requiredTerm)) {
+ $failures[] = sprintf(
+ 'required all-term "%s" was not found in the retrieval text.',
+ $requiredTerm
+ );
+ }
+ }
+
+ return new EvalResult(
+ caseId: $case->id,
+ type: $case->type,
+ passed: $failures === [],
+ durationMs: $durationMs,
+ failures: $failures,
+ details: [
+ 'prompt' => $case->prompt,
+ 'result_count' => $resultCount,
+ 'selection_mode' => $selectionMode,
+ 'route' => $route,
+ 'intent' => $intent,
+ 'document_ids' => $documentIds,
+ 'chunk_ids' => $chunkIds,
+ 'matched_any_terms' => $matchedAnyTerms,
+ 'matched_all_terms' => $matchedAllTerms,
+ ],
+ );
+ }
+
+ /**
+ * @param array $row
+ */
+ private function extractString(array $row, string $key): string
+ {
+ $value = $row[$key] ?? null;
+
+ if (!is_string($value)) {
+ return '';
+ }
+
+ return trim($value);
+ }
+
+ /**
+ * @param array> $rows
+ * @return array
+ */
+ private function extractUniqueStringValues(array $rows, string $key): array
+ {
+ $values = [];
+
+ foreach ($rows as $row) {
+ $value = $row[$key] ?? null;
+
+ if (!is_string($value)) {
+ continue;
+ }
+
+ $value = trim($value);
+
+ if ($value === '') {
+ continue;
+ }
+
+ $values[$value] = true;
+ }
+
+ return array_keys($values);
+ }
+
+ /**
+ * @param array> $rows
+ */
+ private function extractJoinedText(array $rows): string
+ {
+ $parts = [];
+
+ foreach ($rows as $row) {
+ $text = $row['text'] ?? null;
+
+ if (!is_string($text)) {
+ continue;
+ }
+
+ $text = trim($text);
+
+ if ($text === '') {
+ continue;
+ }
+
+ $parts[] = $text;
+ }
+
+ return implode("\n\n", $parts);
+ }
+
+ /**
+ * @param array $failures
+ * @param mixed $expectedList
+ */
+ private function assertValueInList(
+ array &$failures,
+ string $actual,
+ mixed $expectedList,
+ string $label
+ ): void {
+ $expected = $this->normalizeStringList($expectedList);
+
+ if ($expected === []) {
+ return;
+ }
+
+ if (!in_array($actual, $expected, true)) {
+ $failures[] = sprintf(
+ '%s mismatch: expected one of [%s], got "%s".',
+ $label,
+ implode(', ', $expected),
+ $actual
+ );
+ }
+ }
+
+ /**
+ * @param array $failures
+ * @param array $actualValues
+ * @param mixed $expectedList
+ */
+ private function assertContainsAtLeastOne(
+ array &$failures,
+ array $actualValues,
+ mixed $expectedList,
+ string $label
+ ): void {
+ $expected = $this->normalizeStringList($expectedList);
+
+ if ($expected === []) {
+ return;
+ }
+
+ foreach ($expected as $candidate) {
+ if (in_array($candidate, $actualValues, true)) {
+ return;
+ }
+ }
+
+ $failures[] = sprintf(
+ 'none of the expected %s values were found. Expected one of [%s], got [%s].',
+ $label,
+ implode(', ', $expected),
+ implode(', ', $actualValues)
+ );
+ }
+
+ /**
+ * @param array $failures
+ * @param array $actualValues
+ * @param mixed $forbiddenList
+ */
+ private function assertContainsNone(
+ array &$failures,
+ array $actualValues,
+ mixed $forbiddenList,
+ string $label
+ ): void {
+ $forbidden = $this->normalizeStringList($forbiddenList);
+
+ if ($forbidden === []) {
+ return;
+ }
+
+ foreach ($forbidden as $forbiddenValue) {
+ if (in_array($forbiddenValue, $actualValues, true)) {
+ $failures[] = sprintf(
+ 'forbidden %s "%s" was present in the retrieval results.',
+ $label,
+ $forbiddenValue
+ );
+ }
+ }
+ }
+
+ /**
+ * @param array $terms
+ * @return array
+ */
+ private function findMatchingTerms(string $haystack, array $terms): array
+ {
+ $matches = [];
+
+ foreach ($terms as $term) {
+ if ($this->containsTerm($haystack, $term)) {
+ $matches[] = $term;
+ }
+ }
+
+ return array_values(array_unique($matches));
+ }
+
+ private function containsTerm(string $haystack, string $term): bool
+ {
+ $haystack = $this->normalizeText($haystack);
+ $term = $this->normalizeText($term);
+
+ if ($term === '') {
+ return false;
+ }
+
+ return str_contains($haystack, $term);
+ }
+
+ private function normalizeText(string $value): string
+ {
+ $value = trim($value);
+
+ if ($value === '') {
+ return '';
+ }
+
+ if (function_exists('mb_strtolower')) {
+ return mb_strtolower($value);
+ }
+
+ return strtolower($value);
+ }
+
+ /**
+ * @param mixed $value
+ * @return array
+ */
+ private function normalizeStringList(mixed $value): array
+ {
+ if (!is_array($value)) {
+ return [];
+ }
+
+ $out = [];
+
+ foreach ($value as $item) {
+ if (!is_string($item)) {
+ continue;
+ }
+
+ $item = trim($item);
+
+ if ($item === '') {
+ continue;
+ }
+
+ $out[] = $item;
+ }
+
+ return array_values(array_unique($out));
+ }
+}
\ No newline at end of file
diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
index bc4ef2c..175c68d 100644
--- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
+++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
@@ -366,7 +366,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$rawScores = $fused['raw_scores'];
if ($rrfScores === [] && $globalHits !== []) {
- $rrfScores = $this->fallbackRrfFromHits($globalHits);
+ // $rrfScores = $this->fallbackRrfFromHits($globalHits);
}
if ($rrfScores === []) {
diff --git a/tests/evals/cases/retrieval.ndjson b/tests/evals/cases/retrieval.ndjson
new file mode 100644
index 0000000..629e1aa
--- /dev/null
+++ b/tests/evals/cases/retrieval.ndjson
@@ -0,0 +1,117 @@
+{
+ "id": "retrieval_exact_doc_001",
+ "type": "retrieval",
+ "prompt": "phaseaaudit-md",
+ "assert": {
+ "selection_mode_in": [
+ "exact_document_title"
+ ],
+ "min_results": 1,
+ "must_include_one_of_document_ids": [
+ "5914508a-5930-4f04-892b-323881d0daa7"
+ ],
+ "must_include_any_terms": [
+ "enterprise",
+ "governance",
+ "vector-service"
+ ]
+ }
+}
+{
+ "id": "retrieval_exact_doc_002",
+ "type": "retrieval",
+ "prompt": "ragsystemoverview-md",
+ "assert": {
+ "selection_mode_in": [
+ "exact_document_title"
+ ],
+ "min_results": 1,
+ "must_include_one_of_document_ids": [
+ "7513fd82-eec6-4bfa-a730-41820b38b6b4"
+ ],
+ "must_include_all_terms": [
+ "rag-system",
+ "dokumente"
+ ]
+ }
+}
+{
+ "id": "retrieval_exact_doc_003",
+ "type": "retrieval",
+ "prompt": "matrixparams-md",
+ "assert": {
+ "selection_mode_in": [
+ "exact_document_title"
+ ],
+ "min_results": 1,
+ "must_include_one_of_document_ids": [
+ "25276f4c-32bb-47a5-98b3-9d81aa722d2b"
+ ],
+ "must_include_any_terms": [
+ "retrievalmaxchunks",
+ "retrievalvectortopk",
+ "hard_max_chunks"
+ ]
+ }
+}
+{
+ "id": "retrieval_exact_doc_004",
+ "type": "retrieval",
+ "prompt": "readme-md",
+ "assert": {
+ "selection_mode_in": [
+ "exact_document_title"
+ ],
+ "min_results": 1,
+ "must_include_one_of_document_ids": [
+ "8abe1f0d-54e6-41ad-967a-9ce8a0efc6da"
+ ],
+ "must_include_any_terms": [
+ "deterministisches",
+ "faiss",
+ "vector-service"
+ ]
+ }
+}
+{
+ "id": "retrieval_semantic_001",
+ "type": "retrieval",
+ "prompt": "wie funktioniert das system",
+ "assert": {
+ "min_results": 1,
+ "must_include_one_of_document_ids": [
+ "7513fd82-eec6-4bfa-a730-41820b38b6b4",
+ "8abe1f0d-54e6-41ad-967a-9ce8a0efc6da"
+ ],
+ "must_include_any_terms": [
+ "rag-system",
+ "dokumente",
+ "indexierung"
+ ]
+ }
+}
+{
+ "id": "retrieval_semantic_002",
+ "type": "retrieval",
+ "prompt": "welche parameter beeinflussen retrieval",
+ "assert": {
+ "min_results": 1,
+ "must_include_one_of_document_ids": [
+ "25276f4c-32bb-47a5-98b3-9d81aa722d2b",
+ "7513fd82-eec6-4bfa-a730-41820b38b6b4"
+ ],
+ "must_include_any_terms": [
+ "retrievalmaxchunks",
+ "vectortopk",
+ "chunk"
+ ]
+ }
+}
+{
+ "id": "retrieval_noise_001",
+ "type": "retrieval",
+ "prompt": "dsgfsdgfsdgf",
+ "assert": {
+ "max_results": 0
+ }
+}
\ No newline at end of file
diff --git a/tests/evals/reports/.gitignore b/tests/evals/reports/.gitignore
new file mode 100644
index 0000000..c96a04f
--- /dev/null
+++ b/tests/evals/reports/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
\ No newline at end of file