diff --git a/composer.json b/composer.json
index f155307..ff6de92 100644
--- a/composer.json
+++ b/composer.json
@@ -28,7 +28,8 @@
         "symfony/security-bundle": "7.4.*",
         "symfony/twig-bundle": "7.4.*",
         "symfony/uid": "7.4.*",
-        "symfony/yaml": "^7.4"
+        "symfony/yaml": "^7.4",
+      "ext-sqlite3": "*"
     },
     "config": {
         "optimize-autoloader": true,
diff --git a/src/Command/TestHybridRetrievalCommand.php b/src/Command/TestHybridRetrievalCommand.php
new file mode 100644
index 0000000..e691664
--- /dev/null
+++ b/src/Command/TestHybridRetrievalCommand.php
@@ -0,0 +1,298 @@
+<?php
+
+declare(strict_types=1);
+
+namespace App\Command;
+
+use App\Knowledge\Retrieval\NdjsonHybridRetriever;
+use Symfony\Component\Console\Attribute\AsCommand;
+use Symfony\Component\Console\Command\Command;
+use Symfony\Component\Console\Input\InputArgument;
+use Symfony\Component\Console\Input\InputInterface;
+use Symfony\Component\Console\Input\InputOption;
+use Symfony\Component\Console\Output\OutputInterface;
+use Symfony\Component\Console\Style\SymfonyStyle;
+
+#[AsCommand(
+    name: 'mto:agent:retrieval:test',
+    description: 'Test the real hybrid retrieval path with debug output'
+)]
+final class TestHybridRetrievalCommand extends Command
+{
+    public function __construct(
+        private readonly NdjsonHybridRetriever $retriever,
+    ) {
+        parent::__construct();
+    }
+
+    protected function configure(): void
+    {
+        $this
+            ->addArgument(
+                'prompt',
+                InputArgument::REQUIRED,
+                'Prompt to test against the real hybrid retrieval pipeline'
+            )
+            ->addOption(
+                'json',
+                null,
+                InputOption::VALUE_NONE,
+                'Return the raw retrieval debug result as JSON'
+            )
+            ->addOption(
+                'show-text',
+                null,
+                InputOption::VALUE_NONE,
+                'Show full chunk text instead of a shortened preview'
+            );
+    }
+
+    protected function execute(InputInterface $input, OutputInterface $output): int
+    {
+        $io = new SymfonyStyle($input, $output);
+
+        $prompt = trim((string) $input->getArgument('prompt'));
+        $asJson = (bool) $input->getOption('json');
+        $showText = (bool) $input->getOption('show-text');
+
+        if ($prompt === '') {
+            $io->error('Prompt must not be empty.');
+
+            return Command::FAILURE;
+        }
+
+        $start = microtime(true);
+
+        try {
+            $results = $this->retriever->retrieveDebug($prompt);
+        } catch (\Throwable $e) {
+            $io->error($e->getMessage());
+
+            return Command::FAILURE;
+        }
+
+        $durationMs = round((microtime(true) - $start) * 1000, 2);
+
+        if ($asJson) {
+            $payload = [
+                'prompt' => $prompt,
+                'duration_ms' => $durationMs,
+                'result_count' => count($results),
+                'results' => $results,
+            ];
+
+            $json = json_encode(
+                $payload,
+                JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
+            );
+
+            if (!is_string($json)) {
+                $io->error('json_encode failed.');
+
+                return Command::FAILURE;
+            }
+
+            $output->writeln($json);
+
+            return Command::SUCCESS;
+        }
+
+        $io->title('Hybrid Retrieval Test');
+        $io->definitionList(
+            ['prompt' => $prompt],
+            ['duration_ms' => (string) $durationMs],
+            ['result_count' => (string) count($results)]
+        );
+
+        if ($results === []) {
+            $io->warning('No retrieval results returned.');
+
+            return Command::SUCCESS;
+        }
+
+        $first = $results[0];
+
+        $io->section('Pipeline Summary');
+        $io->definitionList(
+            ['scope_mode' => $this->stringValue($first, 'scope_mode')],
+            ['selection_mode' => $this->stringValue($first, 'selection_mode')],
+            ['intent' => $this->stringValue($first, 'intent')],
+            ['route' => $this->stringValue($first, 'route')],
+            ['entity_label' => $this->stringValue($first, 'entity_label')],
+            ['is_list_query' => $this->boolishValue($first, 'is_list_query')],
+            ['clean_query' => $this->stringValue($first, 'clean_query')],
+            ['semantic_query' => $this->stringValue($first, 'semantic_query')],
+            ['secondary_vector_query' => $this->stringValue($first, 'secondary_vector_query')],
+            ['lexical_query' => $this->stringValue($first, 'lexical_query')],
+            ['threshold' => $this->scalarValue($first, 'threshold')],
+            ['lexical_threshold' => $this->scalarValue($first, 'lexical_threshold')]
+        );
+
+        $io->section('Scope Candidates');
+        $io->definitionList(
+            ['tag_candidate_doc_ids' => $this->jsonValue($first, 'tag_candidate_doc_ids')],
+            ['soft_document_candidate_doc_ids' => $this->jsonValue($first, 'soft_document_candidate_doc_ids')],
+            ['pseudo_scope_doc_ids' => $this->jsonValue($first, 'pseudo_scope_doc_ids')],
+            ['title_metadata_doc_boosts' => $this->jsonObjectValue($first, 'title_metadata_doc_boosts')]
+        );
+
+        $io->section('Hit Counts');
+        $io->definitionList(
+            ['global_hit_count' => $this->scalarValue($first, 'global_hit_count')],
+            ['scoped_hit_count' => $this->scalarValue($first, 'scoped_hit_count')],
+            ['global_vector_hit_count' => $this->scalarValue($first, 'global_vector_hit_count')],
+            ['global_primary_vector_hit_count' => $this->scalarValue($first, 'global_primary_vector_hit_count')],
+            ['global_secondary_vector_hit_count' => $this->scalarValue($first, 'global_secondary_vector_hit_count')],
+            ['global_keyword_hit_count' => $this->scalarValue($first, 'global_keyword_hit_count')],
+            ['scoped_vector_hit_count' => $this->scalarValue($first, 'scoped_vector_hit_count')],
+            ['scoped_primary_vector_hit_count' => $this->scalarValue($first, 'scoped_primary_vector_hit_count')],
+            ['scoped_secondary_vector_hit_count' => $this->scalarValue($first, 'scoped_secondary_vector_hit_count')],
+            ['scoped_keyword_hit_count' => $this->scalarValue($first, 'scoped_keyword_hit_count')]
+        );
+
+        $io->section('Boosts');
+        $io->definitionList(
+            ['scoped_boost_factor' => $this->scalarValue($first, 'scoped_boost_factor')],
+            ['scoped_vector_boost_factor' => $this->scalarValue($first, 'scoped_vector_boost_factor')],
+            ['secondary_scoped_vector_boost_factor' => $this->scalarValue($first, 'secondary_scoped_vector_boost_factor')],
+            ['scoped_keyword_boost_factor' => $this->scalarValue($first, 'scoped_keyword_boost_factor')]
+        );
+
+        $io->section('Selected Chunks');
+
+        foreach ($results as $row) {
+            $rank = $this->scalarValue($row, 'rank');
+            $chunkId = $this->stringValue($row, 'chunk_id');
+            $documentId = $this->stringValue($row, 'document_id');
+            $chunkIndex = $this->scalarValue($row, 'chunk_index');
+            $rrfScore = $this->scalarValue($row, 'rrf_score');
+            $rawVectorScore = $this->scalarValue($row, 'raw_vector_score');
+            $rawKeywordScore = $this->scalarValue($row, 'raw_keyword_score');
+            $titleMetadataBoost = $this->scalarValue($row, 'title_metadata_boost');
+            $text = (string) ($row['text'] ?? '');
+
+            if (!$showText) {
+                $text = $this->shortenText($text, 500);
+            }
+
+            $io->writeln(sprintf(
+                '<info>#%s</info> chunk=%s doc=%s idx=%s rrf=%s vector=%s keyword=%s title_meta=%s',
+                $rank,
+                $chunkId,
+                $documentId !== '' ? $documentId : '-',
+                $chunkIndex !== '' ? $chunkIndex : '-',
+                $rrfScore !== '' ? $rrfScore : '-',
+                $rawVectorScore !== '' ? $rawVectorScore : '-',
+                $rawKeywordScore !== '' ? $rawKeywordScore : '-',
+                $titleMetadataBoost !== '' ? $titleMetadataBoost : '-'
+            ));
+            $io->writeln($text);
+            $io->writeln('');
+        }
+
+        return Command::SUCCESS;
+    }
+
+    /**
+     * @param array<string, mixed> $row
+     */
+    private function stringValue(array $row, string $key): string
+    {
+        $value = $row[$key] ?? null;
+
+        if ($value === null) {
+            return '';
+        }
+
+        return trim((string) $value);
+    }
+
+    /**
+     * @param array<string, mixed> $row
+     */
+    private function scalarValue(array $row, string $key): string
+    {
+        $value = $row[$key] ?? null;
+
+        if ($value === null) {
+            return '';
+        }
+
+        if (is_bool($value)) {
+            return $value ? 'true' : 'false';
+        }
+
+        if (is_scalar($value)) {
+            return (string) $value;
+        }
+
+        return '';
+    }
+
+    /**
+     * @param array<string, mixed> $row
+     */
+    private function boolishValue(array $row, string $key): string
+    {
+        $value = $row[$key] ?? null;
+
+        if (is_bool($value)) {
+            return $value ? 'true' : 'false';
+        }
+
+        if (is_scalar($value)) {
+            return (string) $value;
+        }
+
+        return '';
+    }
+
+    /**
+     * @param array<string, mixed> $row
+     */
+    private function jsonValue(array $row, string $key): string
+    {
+        $value = $row[$key] ?? null;
+
+        if ($value === null || !is_array($value)) {
+            return '[]';
+        }
+
+        $json = json_encode(
+            array_values($value),
+            JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
+        );
+
+        return is_string($json) ? $json : '[]';
+    }
+
+    /**
+     * @param array<string, mixed> $row
+     */
+    private function jsonObjectValue(array $row, string $key): string
+    {
+        $value = $row[$key] ?? null;
+
+        if ($value === null || !is_array($value)) {
+            return '{}';
+        }
+
+        $json = json_encode(
+            $value,
+            JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
+        );
+
+        return is_string($json) ? $json : '{}';
+    }
+
+    private function shortenText(string $text, int $maxLength): string
+    {
+        $text = trim((preg_replace('/\s+/u', ' ', $text) ?? $text));
+
+        if (mb_strlen($text, 'UTF-8') <= $maxLength) {
+            return $text;
+        }
+
+        return mb_substr($text, 0, $maxLength, 'UTF-8') . ' …';
+    }
+}
\ No newline at end of file
diff --git a/src/Config/NdjsonHybridRetrieverConfig.php b/src/Config/NdjsonHybridRetrieverConfig.php
index 845b656..2b8d66b 100644
--- a/src/Config/NdjsonHybridRetrieverConfig.php
+++ b/src/Config/NdjsonHybridRetrieverConfig.php
@@ -7,62 +7,96 @@ namespace App\Config;
 final class NdjsonHybridRetrieverConfig
 {
     /**
-     * Default semantic similarity threshold for vector hits.
+     * Maximum number of chunks the retriever may finally hand to the model.
      *
-     * Chosen to stay selective enough for product-family-heavy data
-     * while not cutting off too many useful fallback hits.
+     * Rationale:
+     * - enough room for the stronger hybrid pipeline
+     * - still conservative enough to avoid prompt bloat
      */
-    public const VECTOR_SCORE_THRESHOLD = 0.83;
+    public const HARD_MAX_CHUNKS = 6;
 
     /**
-     * Absolute safety caps.
+     * Hard upper bound for vector retrieval candidate size.
      *
-     * These limits protect the retriever from overly large candidate sets
-     * even if runtime config values are set too high.
+     * Rationale:
+     * - the pipeline now combines primary vector, secondary vector,
+     *   lexical, scoped retrieval and re-ranking
+     * - the old limit would constrain recall too early
+     * - still capped to keep latency controlled
      */
-    public const HARD_MAX_CHUNKS = 72;
-    public const HARD_MAX_VECTORK = 180;
+    public const HARD_MAX_VECTORK = 18;
 
     /**
-     * List-style queries benefit from a slightly wider candidate pool
-     * before de-duplication and final selection.
+     * Default semantic score threshold for vector hits.
+     *
+     * Rationale:
+     * - slightly relaxed compared to stricter pure-vector setups
+     * - the system now has more safeguards:
+     *   lexical cross-signals, scoped retrieval, title/meta boost, selection rules
      */
-    public const LIST_BONUS = 1.25;
+    public const VECTOR_SCORE_THRESHOLD = 0.81;
 
     /**
-     * Selection rules for cross-document semantic retrieval.
+     * Lower safety boundary for dynamic threshold adjustments.
      *
-     * MAX_CHUNKS_PER_DOC:
-     * Keeps one document from dominating the final result in normal
-     * semantic retrieval mode.
-     *
-     * MIN_CHUNK_DISTANCE:
-     * Allows nearby chunks to be selected when they are still meaningfully
-     * distinct, which is important for compact product sheets.
+     * Rationale:
+     * - prevents the system from getting too noisy in fallback cases
+     * - still allows recovery when exact signals are sparse
      */
-    public const MAX_CHUNKS_PER_DOC = 3;
-    public const MIN_CHUNK_DISTANCE = 1.0;
+    public const THRESHOLD_FLOOR = 0.75;
+
+    /**
+     * Upper safety boundary for dynamic threshold adjustments.
+     *
+     * Rationale:
+     * - protects objection/pricing/list adjustments from becoming too strict
+     * - keeps retrieval from collapsing into empty result sets too easily
+     */
+    public const THRESHOLD_CEIL = 0.90;
+
+    /**
+     * Additional candidate expansion factor for list-like prompts.
+     *
+     * Rationale:
+     * - list requests benefit from wider candidate recall
+     * - too high would create noise across multiple retrieval channels
+     */
+    public const LIST_BONUS = 1.35;
 
     /**
      * Reciprocal Rank Fusion constant.
      *
-     * Slightly lower than classic defaults so top-ranked hits matter more.
+     * Rationale:
+     * - keep rank importance meaningful
+     * - but not so aggressive that one retrieval source dominates too hard
      */
     public const RRF_K = 50;
 
     /**
-     * Dynamic threshold clamp boundaries.
+     * Fallback size when thresholded fusion yields no candidates.
      *
-     * The floor must stay below the default threshold, otherwise the
-     * configured base threshold becomes ineffective.
+     * Rationale:
+     * - slightly larger safety net for the richer hybrid stack
+     * - helps no-tag and low-signal cases without exploding context
      */
-    public const THRESHOLD_FLOOR = 0.78;
-    public const THRESHOLD_CEIL = 0.90;
+    public const EMPTY_RRF_FALLBACK_TOPN = 5;
 
     /**
-     * Fallback breadth when strict thresholding removes all fused hits.
+     * Maximum number of chunks allowed from one document in spread mode.
      *
-     * More than one fallback result makes the retriever less brittle.
+     * Rationale:
+     * - preserve diversity across documents
+     * - still allow coherent multi-chunk retrieval from strong sources
      */
-    public const EMPTY_RRF_FALLBACK_TOPN = 3;
+    public const MAX_CHUNKS_PER_DOC = 2;
+
+    /**
+     * Minimum distance between chunk indices from the same document
+     * during spread-style selection.
+     *
+     * Rationale:
+     * - reduce near-duplicate neighboring chunks
+     * - still allow relevant continuation when needed
+     */
+    public const MIN_CHUNK_DISTANCE = 2;
 }
\ No newline at end of file
diff --git a/src/Config/QueryEnricherConfig.php b/src/Config/QueryEnricherConfig.php
index 8d79fb7..f4f2837 100644
--- a/src/Config/QueryEnricherConfig.php
+++ b/src/Config/QueryEnricherConfig.php
@@ -1,22 +1,180 @@
 <?php
 
+declare(strict_types=1);
+
 namespace App\Config;
 
-class QueryEnricherConfig
+final readonly class QueryEnricherConfig
 {
+    /**
+     * Keep the enrichment vocabulary in the class for now.
+     *
+     * Important:
+     * - This is intentionally NOT externalized yet.
+     * - Add or maintain the current project-specific mappings here.
+     * - The later move to external config/files can happen separately.
+     *
+     * Supported shapes:
+     *
+     * 1) Simple mapping:
+     * [
+     *     'water hardness' => 'residual hardness',
+     *     'device' => 'instrument',
+     * ]
+     *
+     * 2) Small synonym groups:
+     * [
+     *     ['water hardness', 'residual hardness', 'hardness'],
+     *     ['device', 'instrument', 'meter'],
+     * ]
+     *
+     * The public API stays intentionally simple:
+     * - getEnrichQueryList(): array<string,string>
+     *
+     * This keeps QueryEnricher generic while the domain vocabulary
+     * deliberately remains inside this class for now.
+     *
+     * Replace the example entries below with your real project mappings.
+     *
+     * @var array<int|string, mixed>
+     */
+    private const ENRICH_QUERY_LIST = [
+        // -----------------------------------------------------------------
+        // Example mappings.
+        // Replace / extend these with your current real project mappings.
+        // -----------------------------------------------------------------
+
+        'water hardness' => 'residual hardness',
+        'device' => 'instrument',
+        'gerät'=>'produkt',
+        'rebuild'=>'reindex',
+
+        ['measuring device', 'meter', 'instrument'],
+    ];
+
+    /**
+     * Returns a normalized, deduplicated mapping for the QueryEnricher.
+     *
+     * Output format:
+     * [
+     *     'term a' => 'term b',
+     *     'term c' => 'term d',
+     * ]
+     *
+     * Rules:
+     * - ignore empty / invalid values
+     * - trim and normalize whitespace
+     * - ignore self-mappings
+     * - preserve first valid rule if duplicates normalize to the same key
+     *
+     * @return array<string, string>
+     */
     public function getEnrichQueryList(): array
     {
-        return [
-            'Wasserhärte' => 'Resthärte',
-            'Gerät' => 'Modell',
-            'Indikator' => 'Chemie',
-            'Seminar' => 'Webinar',
-            'Schulung' => 'Seminar',
-            'Indikatoren' => 'Indikator',
-            'Wasserhärte-Grenzwert' => 'Resthärte',
-            'Resthärte-Grenzwert' => 'Wasserhärte',
-            'Grenzwert' => 'Überwachungsbereich',
-            'store'=>'shop'
-        ];
+        $normalized = [];
+
+        foreach (self::ENRICH_QUERY_LIST as $key => $value) {
+            if (is_array($value)) {
+                $this->ingestGroup($normalized, $value);
+                continue;
+            }
+
+            $left = $this->normalizePhrase(is_string($key) ? $key : '');
+            $right = $this->normalizePhrase(is_string($value) ? $value : '');
+
+            if (!$this->isValidPair($left, $right)) {
+                continue;
+            }
+
+            if (!isset($normalized[$left])) {
+                $normalized[$left] = $right;
+            }
+        }
+
+        return $normalized;
+    }
+
+    /**
+     * Returns true when at least one valid enrichment rule exists.
+     */
+    public function hasRules(): bool
+    {
+        return $this->getEnrichQueryList() !== [];
+    }
+
+    /**
+     * @param array<string, string> $normalized
+     * @param array<int|string, mixed> $group
+     */
+    private function ingestGroup(array &$normalized, array $group): void
+    {
+        $items = [];
+
+        foreach ($group as $item) {
+            if (!is_string($item)) {
+                continue;
+            }
+
+            $item = $this->normalizePhrase($item);
+
+            if ($item === '') {
+                continue;
+            }
+
+            $items[$item] = $item;
+        }
+
+        $items = array_values($items);
+
+        if (count($items) < 2) {
+            return;
+        }
+
+        /**
+         * Turn a synonym group into a conservative chain:
+         * ['a', 'b', 'c'] => a=>b, b=>c
+         *
+         * QueryEnricher builds a bidirectional lookup later,
+         * so the config output stays intentionally small.
+         */
+        for ($i = 0, $max = count($items) - 1; $i < $max; $i++) {
+            $left = $items[$i];
+            $right = $items[$i + 1];
+
+            if (!$this->isValidPair($left, $right)) {
+                continue;
+            }
+
+            if (!isset($normalized[$left])) {
+                $normalized[$left] = $right;
+            }
+        }
+    }
+
+    private function isValidPair(string $left, string $right): bool
+    {
+        if ($left === '' || $right === '') {
+            return false;
+        }
+
+        if ($left === $right) {
+            return false;
+        }
+
+        return true;
+    }
+
+    private function normalizePhrase(string $value): string
+    {
+        $value = trim($value);
+
+        if ($value === '') {
+            return '';
+        }
+
+        $value = mb_strtolower($value, 'UTF-8');
+        $value = preg_replace('/\s+/u', ' ', $value) ?? $value;
+
+        return trim($value);
     }
 }
\ No newline at end of file
diff --git a/src/Ingest/VectorRebuildService.php b/src/Ingest/VectorRebuildService.php
index a79498f..0975ee8 100644
--- a/src/Ingest/VectorRebuildService.php
+++ b/src/Ingest/VectorRebuildService.php
@@ -6,36 +6,50 @@ namespace App\Ingest;
 
 use App\Index\IndexMetaManager;
 use App\Knowledge\ChunkManager;
+use App\Knowledge\Retrieval\NdjsonLexicalIndexBuilder;
 use App\Vector\VectorIndexBuilder;
 
 final readonly class VectorRebuildService
 {
     public function __construct(
         private VectorIndexBuilder $vectorBuilder,
-        private IndexMetaManager   $metaManager,
-        private ChunkManager       $chunkManager,
-    ) {}
+        private NdjsonLexicalIndexBuilder $lexicalIndexBuilder,
+        private IndexMetaManager $metaManager,
+        private ChunkManager $chunkManager,
+    ) {
+    }
 
     /**
-     * Führt einen vollständigen, deterministischen FAISS-Rebuild aus.
+     * Executes a full deterministic rebuild of all derived retrieval artifacts.
      *
-     * Ablauf:
-     * 1. Rebuild des Vector Index aus index.ndjson
-     * 2. Chunk-Zählung via ChunkManager
-     * 3. Runtime-Stats atomar aktualisieren
+     * Flow:
+     * 1. Ensure index_meta.json exists
+     * 2. Rebuild vector index from index.ndjson
+     * 3. Rebuild lexical index from index.ndjson
+     * 4. Count chunks streaming-safe
+     * 5. Update runtime stats atomically
+     *
+     * Important:
+     * - Vector and lexical index are both derived from the same NDJSON source
+     * - rebuilding both here prevents drift between semantic and lexical retrieval layers
+     * - failures in either derived artifact should fail the rebuild as a whole
+     * @throws \Throwable
      */
     public function rebuild(?string $logPath = null): void
     {
-        // ✅ Stelle sicher, dass index_meta.json existiert
+        // Ensure metadata exists before derived index work starts.
         $this->metaManager->ensureExists();
 
-        // 1️⃣ Vector Index neu bauen
+        // 1) Rebuild semantic vector index.
         $this->vectorBuilder->rebuildFromNdjson($logPath);
 
-        // 2️⃣ Chunk Count streaming-safe zählen
+        // 2) Rebuild generic lexical index from the same NDJSON source.
+        $this->lexicalIndexBuilder->build();
+
+        // 3) Count chunks streaming-safe.
         $chunkCount = $this->chunkManager->countAllChunks();
 
-        // 3️⃣ Runtime-Stats aktualisieren (atomar)
+        // 4) Update runtime stats atomically.
         $this->metaManager->updateRuntimeStats($chunkCount);
     }
 }
\ No newline at end of file
diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
index 2b7ca25..a29e86f 100644
--- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
+++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
@@ -25,7 +25,8 @@ use RuntimeException;
  * - optionally short-circuit to catalog list output
  * - resolve exact document-title matches before semantic retrieval
  * - run vector retrieval globally and optionally document-scoped
- * - fuse both result sets with RRF-style scoring
+ * - run lexical retrieval globally and optionally document-scoped
+ * - fuse all result sets with RRF-style scoring
  * - apply selection rules for list queries vs. sales-style queries
  * - return either plain chunk texts or debug metadata
  */
@@ -40,35 +41,82 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
     private const DOMINANT_DOC_MAX_CHUNKS = 4;
     private const EXACT_DOCUMENT_MAX_CHUNKS = 6;
 
-    public function __construct(
-        private NdjsonChunkLookup               $lookup,
-        private VectorSearchClient              $vectorClient,
-        private TagRoutingService               $tagRouting,
-        private ModelGenerationConfigRepository $configRepository,
-        private QueryCleaner                    $queryCleaner,
-        private IntentLite                      $intentLite,
-        private SalesIntentLite                 $salesIntentLite,
-        private CatalogIntentLite               $catalogIntent,
-        private IntentRouteResolver             $routeResolver,
-        private EntityCatalogService            $entityCatalogService,
-        private QueryEnricher                   $queryEnricher,
-    )
-    {
-    }
+    /**
+     * Conservative no-tag fallback:
+     * derive a temporary document scope only when the top global vector hits
+     * show repeated evidence for the same document(s).
+     */
+    private const PSEUDO_SCOPE_GLOBAL_WINDOW = 10;
+    private const PSEUDO_SCOPE_MIN_DOC_HITS = 2;
+    private const PSEUDO_SCOPE_MAX_DOCS = 3;
 
-    // =========================================================
-    // PUBLIC API
-    // =========================================================
+    /**
+     * Soft document candidates are derived from global lexical hits first.
+     * This stage is placed between tag-routing and vector-based pseudo scope.
+     */
+    private const SOFT_DOC_CANDIDATE_WINDOW = 8;
+    private const SOFT_DOC_CANDIDATE_MIN_DOC_HITS = 2;
+    private const SOFT_DOC_CANDIDATE_MAX_DOCS = 3;
+    private const SOFT_DOC_TOP_SCORE_MIN = 0.98;
+
+    /**
+     * Scoped retrieval is useful in both cases, but true tag-routing should
+     * stay stronger than soft candidates and pseudo-scoping.
+     */
+    private const TAG_SCOPED_VECTOR_BOOST = 1.20;
+    private const SOFT_DOC_SCOPED_VECTOR_BOOST = 1.12;
+    private const PSEUDO_SCOPED_VECTOR_BOOST = 1.08;
+
+    /**
+     * Secondary vector query should help recall/robustness, but must not
+     * overpower the primary enriched semantic query.
+     */
+    private const SECONDARY_GLOBAL_VECTOR_BOOST = 0.93;
+    private const SECONDARY_SCOPED_VECTOR_MULTIPLIER = 0.95;
+
+    /**
+     * Lexical retrieval should support precision, but not overpower vector routing.
+     */
+    private const LEXICAL_SCORE_THRESHOLD = 0.18;
+    private const GLOBAL_LEXICAL_BOOST = 0.90;
+    private const TAG_SCOPED_LEXICAL_BOOST = 1.04;
+    private const SOFT_DOC_SCOPED_LEXICAL_BOOST = 1.02;
+    private const PSEUDO_SCOPED_LEXICAL_BOOST = 1.00;
+
+    /**
+     * Conservative re-rank stage based on document title / metadata alignment.
+     *
+     * This is intentionally applied after fusion so it sharpens ranking
+     * without replacing the underlying retrieval sources.
+     */
+    private const TITLE_MATCH_BASE_BOOST = 0.04;
+    private const TITLE_MATCH_MAX_BOOST = 0.18;
+    private const FILE_MATCH_BASE_BOOST = 0.02;
+    private const FILE_MATCH_MAX_BOOST = 0.08;
+    private const META_MATCH_MAX_BOOST = 0.04;
+    private const EXACT_TITLE_PHRASE_BOOST = 0.08;
+    private const EXACT_FILE_PHRASE_BOOST = 0.04;
+    private const MAX_TITLE_METADATA_BOOST = 0.22;
+
+    public function __construct(
+        private NdjsonChunkLookup $lookup,
+        private VectorSearchClient $vectorClient,
+        private NdjsonKeywordRetriever $keywordRetriever,
+        private TagRoutingService $tagRouting,
+        private ModelGenerationConfigRepository $configRepository,
+        private QueryCleaner $queryCleaner,
+        private IntentLite $intentLite,
+        private SalesIntentLite $salesIntentLite,
+        private CatalogIntentLite $catalogIntent,
+        private IntentRouteResolver $routeResolver,
+        private EntityCatalogService $entityCatalogService,
+        private QueryEnricher $queryEnricher,
+    ) {
+    }
 
     /**
      * Returns the final retrieval payload as plain text chunks.
      *
-     * Behaviour:
-     * - loads active retrieval config
-     * - executes the full orchestration pipeline
-     * - if the route resolves to a catalog list, returns the catalog block only
-     * - otherwise returns the selected chunk texts
-     *
      * @throws Exception
      */
     public function retrieve(string $prompt): array
@@ -93,13 +141,6 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
     /**
      * Returns a debug-friendly retrieval result with scoring/meta information.
      *
-     * This method is used for inspection and tuning:
-     * - selected chunk ids
-     * - raw vector scores
-     * - fused RRF scores
-     * - intent / route information
-     * - threshold and list-query flags
-     *
      * @throws Exception
      */
     public function retrieveDebug(string $prompt, ?ModelGenerationConfig $config = null): array
@@ -114,13 +155,40 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 'document_id' => null,
                 'chunk_index' => null,
                 'raw_score' => null,
+                'raw_vector_score' => null,
+                'raw_keyword_score' => null,
                 'rrf_score' => null,
                 'threshold' => 0.0,
+                'lexical_threshold' => self::LEXICAL_SCORE_THRESHOLD,
                 'intent' => $result['intent'],
                 'route' => $result['route'],
                 'entity_label' => $result['entityLabel'],
                 'is_list_query' => true,
                 'selection_mode' => 'catalog_list',
+                'scope_mode' => 'catalog_list',
+                'clean_query' => null,
+                'semantic_query' => null,
+                'secondary_vector_query' => null,
+                'lexical_query' => null,
+                'tag_candidate_doc_ids' => [],
+                'soft_document_candidate_doc_ids' => [],
+                'pseudo_scope_doc_ids' => [],
+                'global_hit_count' => 0,
+                'scoped_hit_count' => 0,
+                'global_vector_hit_count' => 0,
+                'global_primary_vector_hit_count' => 0,
+                'global_secondary_vector_hit_count' => 0,
+                'global_keyword_hit_count' => 0,
+                'scoped_vector_hit_count' => 0,
+                'scoped_primary_vector_hit_count' => 0,
+                'scoped_secondary_vector_hit_count' => 0,
+                'scoped_keyword_hit_count' => 0,
+                'scoped_boost_factor' => 0.0,
+                'scoped_vector_boost_factor' => 0.0,
+                'secondary_scoped_vector_boost_factor' => 0.0,
+                'scoped_keyword_boost_factor' => 0.0,
+                'title_metadata_boost' => 0.0,
+                'title_metadata_doc_boosts' => [],
                 'text' => $result['catalogBlock'],
             ]];
         }
@@ -139,19 +207,49 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
 
             $rank++;
 
+            $rawVectorScore = $result['rawVectorScores'][$chunkId] ?? null;
+            $rawKeywordScore = $result['rawKeywordScores'][$chunkId] ?? null;
+
             $out[] = [
                 'rank' => $rank,
                 'chunk_id' => $chunkId,
                 'document_id' => $result['rows'][$chunkId]['document_id'] ?? null,
                 'chunk_index' => $result['rows'][$chunkId]['chunk_index'] ?? null,
-                'raw_score' => $result['rawScores'][$chunkId] ?? null,
+                'raw_score' => $this->maxNullableFloat($rawVectorScore, $rawKeywordScore),
+                'raw_vector_score' => $rawVectorScore,
+                'raw_keyword_score' => $rawKeywordScore,
                 'rrf_score' => $result['rrfScores'][$chunkId] ?? null,
                 'threshold' => $result['threshold'],
+                'lexical_threshold' => self::LEXICAL_SCORE_THRESHOLD,
                 'intent' => $result['intent'],
                 'route' => $result['route'],
                 'entity_label' => $result['entityLabel'],
                 'is_list_query' => $result['isListQuery'],
                 'selection_mode' => $result['selectionMode'],
+                'scope_mode' => $result['scopeMode'],
+                'clean_query' => $result['cleanQuery'],
+                'semantic_query' => $result['semanticQuery'],
+                'secondary_vector_query' => $result['secondaryVectorQuery'],
+                'lexical_query' => $result['lexicalQuery'],
+                'tag_candidate_doc_ids' => $result['tagCandidateDocIds'],
+                'soft_document_candidate_doc_ids' => $result['softDocumentCandidateDocIds'],
+                'pseudo_scope_doc_ids' => $result['pseudoScopeDocIds'],
+                'global_hit_count' => $result['globalHitCount'],
+                'scoped_hit_count' => $result['scopedHitCount'],
+                'global_vector_hit_count' => $result['globalVectorHitCount'],
+                'global_primary_vector_hit_count' => $result['globalPrimaryVectorHitCount'],
+                'global_secondary_vector_hit_count' => $result['globalSecondaryVectorHitCount'],
+                'global_keyword_hit_count' => $result['globalKeywordHitCount'],
+                'scoped_vector_hit_count' => $result['scopedVectorHitCount'],
+                'scoped_primary_vector_hit_count' => $result['scopedPrimaryVectorHitCount'],
+                'scoped_secondary_vector_hit_count' => $result['scopedSecondaryVectorHitCount'],
+                'scoped_keyword_hit_count' => $result['scopedKeywordHitCount'],
+                'scoped_boost_factor' => $result['scopedBoostFactor'],
+                'scoped_vector_boost_factor' => $result['scopedVectorBoostFactor'],
+                'secondary_scoped_vector_boost_factor' => $result['secondaryScopedVectorBoostFactor'],
+                'scoped_keyword_boost_factor' => $result['scopedKeywordBoostFactor'],
+                'title_metadata_boost' => $result['titleMetadataBoosts'][$chunkId] ?? 0.0,
+                'title_metadata_doc_boosts' => $result['titleMetadataDocBoosts'],
                 'text' => trim((string)$result['rows'][$chunkId]['text']),
             ];
         }
@@ -159,29 +257,16 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return $out;
     }
 
-    // =========================================================
-    // CENTRAL ORCHESTRATION
-    // =========================================================
-
     /**
      * Central orchestration entrypoint.
      *
-     * Pipeline:
-     * 1. Detect catalog entity and sales intent
-     * 2. Resolve route
-     * 3. If route is a catalog list route, try direct catalog output
-     * 4. If prompt matches one exact document title, use exact-document fast path
-     * 5. Otherwise, run the normal hybrid retrieval core
-     * 6. Select final chunk ids depending on query type
-     *
      * @throws Exception
      */
     private function execute(
-        string                $prompt,
+        string $prompt,
         ModelGenerationConfig $config,
-        bool                  $withScores
-    ): array
-    {
+        bool $withScores
+    ): array {
         $entityLabel = $this->catalogIntent->detect($prompt);
         $salesIntent = $this->detectSalesIntent($prompt);
         $route = $this->routeResolver->resolve($salesIntent, $entityLabel);
@@ -196,10 +281,35 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                     'intent' => $salesIntent,
                     'isListQuery' => true,
                     'selectionMode' => 'catalog_list',
+                    'scopeMode' => 'catalog_list',
+                    'cleanQuery' => null,
+                    'semanticQuery' => null,
+                    'secondaryVectorQuery' => null,
+                    'lexicalQuery' => null,
+                    'tagCandidateDocIds' => [],
+                    'softDocumentCandidateDocIds' => [],
+                    'pseudoScopeDocIds' => [],
+                    'globalHitCount' => 0,
+                    'scopedHitCount' => 0,
+                    'globalVectorHitCount' => 0,
+                    'globalPrimaryVectorHitCount' => 0,
+                    'globalSecondaryVectorHitCount' => 0,
+                    'globalKeywordHitCount' => 0,
+                    'scopedVectorHitCount' => 0,
+                    'scopedPrimaryVectorHitCount' => 0,
+                    'scopedSecondaryVectorHitCount' => 0,
+                    'scopedKeywordHitCount' => 0,
+                    'scopedBoostFactor' => 0.0,
+                    'scopedVectorBoostFactor' => 0.0,
+                    'secondaryScopedVectorBoostFactor' => 0.0,
+                    'scopedKeywordBoostFactor' => 0.0,
                     'selectedChunkIds' => [],
                     'rows' => [],
                     'rrfScores' => [],
-                    'rawScores' => [],
+                    'rawVectorScores' => [],
+                    'rawKeywordScores' => [],
+                    'titleMetadataBoosts' => [],
+                    'titleMetadataDocBoosts' => [],
                     'threshold' => 0.0,
                     'catalogBlock' => trim($catalogBlock),
                 ];
@@ -221,10 +331,35 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                     'intent' => $salesIntent,
                     'isListQuery' => false,
                     'selectionMode' => 'exact_document_title',
+                    'scopeMode' => 'exact_document_title',
+                    'cleanQuery' => null,
+                    'semanticQuery' => null,
+                    'secondaryVectorQuery' => null,
+                    'lexicalQuery' => null,
+                    'tagCandidateDocIds' => [],
+                    'softDocumentCandidateDocIds' => [],
+                    'pseudoScopeDocIds' => [],
+                    'globalHitCount' => 0,
+                    'scopedHitCount' => 0,
+                    'globalVectorHitCount' => 0,
+                    'globalPrimaryVectorHitCount' => 0,
+                    'globalSecondaryVectorHitCount' => 0,
+                    'globalKeywordHitCount' => 0,
+                    'scopedVectorHitCount' => 0,
+                    'scopedPrimaryVectorHitCount' => 0,
+                    'scopedSecondaryVectorHitCount' => 0,
+                    'scopedKeywordHitCount' => 0,
+                    'scopedBoostFactor' => 0.0,
+                    'scopedVectorBoostFactor' => 0.0,
+                    'secondaryScopedVectorBoostFactor' => 0.0,
+                    'scopedKeywordBoostFactor' => 0.0,
                     'selectedChunkIds' => $selectedChunkIds,
                     'rows' => $exactDocumentMatch['rows'],
                     'rrfScores' => $this->buildExactDocumentScores($selectedChunkIds),
-                    'rawScores' => [],
+                    'rawVectorScores' => [],
+                    'rawKeywordScores' => [],
+                    'titleMetadataBoosts' => [],
+                    'titleMetadataDocBoosts' => [],
                     'threshold' => 1.0,
                     'catalogBlock' => null,
                 ];
@@ -240,10 +375,39 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 'intent' => $salesIntent,
                 'isListQuery' => $core['is_list_query'],
                 'selectionMode' => null,
+                'scopeMode' => $core['scope_mode'],
+                'cleanQuery' => $core['clean_query'],
+                'semanticQuery' => $core['semantic_query'],
+                'secondaryVectorQuery' => $core['secondary_vector_query'],
+                'lexicalQuery' => $core['lexical_query'],
+                'tagCandidateDocIds' => $core['tag_candidate_doc_ids'],
+                'softDocumentCandidateDocIds' => $core['soft_document_candidate_doc_ids'],
+                'pseudoScopeDocIds' => $core['pseudo_scope_doc_ids'],
+                'globalHitCount' => $core['global_hit_count'],
+                'scopedHitCount' => $core['scoped_hit_count'],
+                'globalVectorHitCount' => $core['global_vector_hit_count'],
+                'globalPrimaryVectorHitCount' => $core['global_primary_vector_hit_count'],
+                'globalSecondaryVectorHitCount' => $core['global_secondary_vector_hit_count'],
+                'globalKeywordHitCount' => $core['global_keyword_hit_count'],
+                'scopedVectorHitCount' => $core['scoped_vector_hit_count'],
+                'scopedPrimaryVectorHitCount' => $core['scoped_primary_vector_hit_count'],
+                'scopedSecondaryVectorHitCount' => $core['scoped_secondary_vector_hit_count'],
+                'scopedKeywordHitCount' => $core['scoped_keyword_hit_count'],
+                'scopedBoostFactor' => max(
+                    $core['scoped_vector_boost_factor'],
+                    $core['secondary_scoped_vector_boost_factor'],
+                    $core['scoped_keyword_boost_factor']
+                ),
+                'scopedVectorBoostFactor' => $core['scoped_vector_boost_factor'],
+                'secondaryScopedVectorBoostFactor' => $core['secondary_scoped_vector_boost_factor'],
+                'scopedKeywordBoostFactor' => $core['scoped_keyword_boost_factor'],
                 'selectedChunkIds' => [],
                 'rows' => [],
                 'rrfScores' => [],
-                'rawScores' => [],
+                'rawVectorScores' => [],
+                'rawKeywordScores' => [],
+                'titleMetadataBoosts' => $core['title_metadata_boosts'],
+                'titleMetadataDocBoosts' => $core['title_metadata_doc_boosts'],
                 'threshold' => $core['threshold'],
                 'catalogBlock' => null,
             ];
@@ -273,107 +437,272 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
             'intent' => $salesIntent,
             'isListQuery' => $core['is_list_query'],
             'selectionMode' => $selectionMode,
+            'scopeMode' => $core['scope_mode'],
+            'cleanQuery' => $core['clean_query'],
+            'semanticQuery' => $core['semantic_query'],
+            'secondaryVectorQuery' => $core['secondary_vector_query'],
+            'lexicalQuery' => $core['lexical_query'],
+            'tagCandidateDocIds' => $core['tag_candidate_doc_ids'],
+            'softDocumentCandidateDocIds' => $core['soft_document_candidate_doc_ids'],
+            'pseudoScopeDocIds' => $core['pseudo_scope_doc_ids'],
+            'globalHitCount' => $core['global_hit_count'],
+            'scopedHitCount' => $core['scoped_hit_count'],
+            'globalVectorHitCount' => $core['global_vector_hit_count'],
+            'globalPrimaryVectorHitCount' => $core['global_primary_vector_hit_count'],
+            'globalSecondaryVectorHitCount' => $core['global_secondary_vector_hit_count'],
+            'globalKeywordHitCount' => $core['global_keyword_hit_count'],
+            'scopedVectorHitCount' => $core['scoped_vector_hit_count'],
+            'scopedPrimaryVectorHitCount' => $core['scoped_primary_vector_hit_count'],
+            'scopedSecondaryVectorHitCount' => $core['scoped_secondary_vector_hit_count'],
+            'scopedKeywordHitCount' => $core['scoped_keyword_hit_count'],
+            'scopedBoostFactor' => max(
+                $core['scoped_vector_boost_factor'],
+                $core['secondary_scoped_vector_boost_factor'],
+                $core['scoped_keyword_boost_factor']
+            ),
+            'scopedVectorBoostFactor' => $core['scoped_vector_boost_factor'],
+            'secondaryScopedVectorBoostFactor' => $core['secondary_scoped_vector_boost_factor'],
+            'scopedKeywordBoostFactor' => $core['scoped_keyword_boost_factor'],
             'selectedChunkIds' => $selectedChunkIds,
             'rows' => $core['rows'],
             'rrfScores' => $core['rrf_scores'],
-            'rawScores' => $core['raw_scores'],
+            'rawVectorScores' => $core['raw_vector_scores'],
+            'rawKeywordScores' => $core['raw_keyword_scores'],
+            'titleMetadataBoosts' => $core['title_metadata_boosts'],
+            'titleMetadataDocBoosts' => $core['title_metadata_doc_boosts'],
             'threshold' => $core['threshold'],
             'catalogBlock' => null,
         ];
     }
 
-    // =========================================================
-    // CORE PIPELINE
-    // =========================================================
-
     /**
      * Executes the actual hybrid retrieval logic.
      *
-     * Steps:
-     * - derive limits from config within hard safety caps
-     * - detect whether the prompt is a "list query"
-     * - clean and enrich the prompt
-     * - compute threshold + vector topK based on intent/query type
-     * - route query into candidate document ids via tag routing
-     * - run global and optional scoped vector search
-     * - fuse hits
-     * - resolve chunk ids to chunk rows
-     *
      * @throws Exception
      */
     private function runCore(
-        string                $prompt,
+        string $prompt,
         ModelGenerationConfig $config,
-        bool                  $withScores,
-        string                $salesIntent
-    ): array
-    {
+        bool $withScores,
+        string $salesIntent
+    ): array {
         $limit = max(1, min($config->getRetrievalMaxChunks(), NdjsonHybridRetrieverConfig::HARD_MAX_CHUNKS));
         $vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK));
 
         $isListQuery = $this->intentLite->isListQuery($prompt);
 
         $cleanQuery = $this->queryCleaner->clean($prompt);
-        $cleanQuery = $this->queryEnricher->enrichPrompt($cleanQuery);
 
         if ($cleanQuery === '') {
             return [
                 'limit' => $limit,
                 'is_list_query' => $isListQuery,
                 'threshold' => NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD,
+                'clean_query' => '',
+                'semantic_query' => '',
+                'secondary_vector_query' => '',
+                'lexical_query' => '',
+                'scope_mode' => 'none',
+                'tag_candidate_doc_ids' => [],
+                'soft_document_candidate_doc_ids' => [],
+                'pseudo_scope_doc_ids' => [],
+                'global_hit_count' => 0,
+                'scoped_hit_count' => 0,
+                'global_vector_hit_count' => 0,
+                'global_primary_vector_hit_count' => 0,
+                'global_secondary_vector_hit_count' => 0,
+                'global_keyword_hit_count' => 0,
+                'scoped_vector_hit_count' => 0,
+                'scoped_primary_vector_hit_count' => 0,
+                'scoped_secondary_vector_hit_count' => 0,
+                'scoped_keyword_hit_count' => 0,
+                'scoped_vector_boost_factor' => 0.0,
+                'secondary_scoped_vector_boost_factor' => 0.0,
+                'scoped_keyword_boost_factor' => 0.0,
                 'ranked_chunk_ids' => [],
                 'rows' => [],
                 'rrf_scores' => [],
-                'raw_scores' => [],
+                'raw_vector_scores' => [],
+                'raw_keyword_scores' => [],
+                'title_metadata_boosts' => [],
+                'title_metadata_doc_boosts' => [],
             ];
         }
 
+        $semanticQuery = $this->queryEnricher->enrichPrompt($cleanQuery);
+        $secondaryVectorQuery = $cleanQuery !== $semanticQuery ? $cleanQuery : '';
+        $lexicalQuery = $cleanQuery;
+
         [$threshold, $topK] = $this->computeThresholdAndTopK(
             $salesIntent,
             $isListQuery,
             $vectorTopKBase
         );
 
-        $candidateDocIds = $this->tagRouting->route($cleanQuery);
-        $candidateDocIds = is_array($candidateDocIds)
+        $tagCandidateDocIds = $this->tagRouting->route($semanticQuery);
+        $tagCandidateDocIds = is_array($tagCandidateDocIds)
             ? array_values(array_unique(array_filter(
-                $candidateDocIds,
+                $tagCandidateDocIds,
                 static fn(mixed $value): bool => is_string($value) && $value !== ''
             )))
             : [];
 
-        $globalHits = $this->vectorClient->search($cleanQuery, $topK);
+        $globalPrimaryVectorHits = $this->vectorClient->search($semanticQuery, $topK);
+        $globalSecondaryVectorHits = $secondaryVectorQuery !== ''
+            ? $this->vectorClient->search($secondaryVectorQuery, $topK)
+            : [];
+        $globalKeywordHits = $this->keywordRetriever->search($lexicalQuery, $topK);
 
-        $scopedHits = [];
-        if ($candidateDocIds !== []) {
-            $scopedHits = $this->vectorClient->searchScoped($cleanQuery, $topK, $candidateDocIds);
+        $softDocumentCandidateDocIds = [];
+        $pseudoScopeDocIds = [];
+        $scopeMode = 'none';
+
+        $scopedVectorBoostFactor = 0.0;
+        $secondaryScopedVectorBoostFactor = 0.0;
+        $scopedKeywordBoostFactor = 0.0;
+
+        $scopedPrimaryVectorHits = [];
+        $scopedSecondaryVectorHits = [];
+        $scopedKeywordHits = [];
+
+        if ($tagCandidateDocIds !== []) {
+            $scopeMode = 'tag_routing';
+            $scopedVectorBoostFactor = self::TAG_SCOPED_VECTOR_BOOST;
+            $secondaryScopedVectorBoostFactor = self::TAG_SCOPED_VECTOR_BOOST * self::SECONDARY_SCOPED_VECTOR_MULTIPLIER;
+            $scopedKeywordBoostFactor = self::TAG_SCOPED_LEXICAL_BOOST;
+
+            $scopedPrimaryVectorHits = $this->vectorClient->searchScoped($semanticQuery, $topK, $tagCandidateDocIds);
+            $scopedSecondaryVectorHits = $secondaryVectorQuery !== ''
+                ? $this->vectorClient->searchScoped($secondaryVectorQuery, $topK, $tagCandidateDocIds)
+                : [];
+            $scopedKeywordHits = $this->keywordRetriever->search($lexicalQuery, $topK, $tagCandidateDocIds);
+        } else {
+            $softDocumentCandidateDocIds = $this->deriveSoftDocumentCandidateDocIds($globalKeywordHits);
+
+            if ($softDocumentCandidateDocIds !== []) {
+                $scopeMode = 'soft_document_candidate';
+                $scopedVectorBoostFactor = self::SOFT_DOC_SCOPED_VECTOR_BOOST;
+                $secondaryScopedVectorBoostFactor = self::SOFT_DOC_SCOPED_VECTOR_BOOST * self::SECONDARY_SCOPED_VECTOR_MULTIPLIER;
+                $scopedKeywordBoostFactor = self::SOFT_DOC_SCOPED_LEXICAL_BOOST;
+
+                $scopedPrimaryVectorHits = $this->vectorClient->searchScoped($semanticQuery, $topK, $softDocumentCandidateDocIds);
+                $scopedSecondaryVectorHits = $secondaryVectorQuery !== ''
+                    ? $this->vectorClient->searchScoped($secondaryVectorQuery, $topK, $softDocumentCandidateDocIds)
+                    : [];
+                $scopedKeywordHits = $this->keywordRetriever->search($lexicalQuery, $topK, $softDocumentCandidateDocIds);
+            } else {
+                $pseudoScopeDocIds = $this->derivePseudoScopeDocumentIds($globalPrimaryVectorHits);
+
+                if ($pseudoScopeDocIds !== []) {
+                    $scopeMode = 'pseudo_scope';
+                    $scopedVectorBoostFactor = self::PSEUDO_SCOPED_VECTOR_BOOST;
+                    $secondaryScopedVectorBoostFactor = self::PSEUDO_SCOPED_VECTOR_BOOST * self::SECONDARY_SCOPED_VECTOR_MULTIPLIER;
+                    $scopedKeywordBoostFactor = self::PSEUDO_SCOPED_LEXICAL_BOOST;
+
+                    $scopedPrimaryVectorHits = $this->vectorClient->searchScoped($semanticQuery, $topK, $pseudoScopeDocIds);
+                    $scopedSecondaryVectorHits = $secondaryVectorQuery !== ''
+                        ? $this->vectorClient->searchScoped($secondaryVectorQuery, $topK, $pseudoScopeDocIds)
+                        : [];
+                    $scopedKeywordHits = $this->keywordRetriever->search($lexicalQuery, $topK, $pseudoScopeDocIds);
+                }
+            }
         }
 
-        if ($globalHits === [] && $scopedHits === []) {
+        if (
+            $globalPrimaryVectorHits === []
+            && $globalSecondaryVectorHits === []
+            && $globalKeywordHits === []
+            && $scopedPrimaryVectorHits === []
+            && $scopedSecondaryVectorHits === []
+            && $scopedKeywordHits === []
+        ) {
             return [
                 'limit' => $limit,
                 'is_list_query' => $isListQuery,
                 'threshold' => $threshold,
+                'clean_query' => $cleanQuery,
+                'semantic_query' => $semanticQuery,
+                'secondary_vector_query' => $secondaryVectorQuery,
+                'lexical_query' => $lexicalQuery,
+                'scope_mode' => $scopeMode,
+                'tag_candidate_doc_ids' => $tagCandidateDocIds,
+                'soft_document_candidate_doc_ids' => $softDocumentCandidateDocIds,
+                'pseudo_scope_doc_ids' => $pseudoScopeDocIds,
+                'global_hit_count' => 0,
+                'scoped_hit_count' => 0,
+                'global_vector_hit_count' => 0,
+                'global_primary_vector_hit_count' => 0,
+                'global_secondary_vector_hit_count' => 0,
+                'global_keyword_hit_count' => 0,
+                'scoped_vector_hit_count' => 0,
+                'scoped_primary_vector_hit_count' => 0,
+                'scoped_secondary_vector_hit_count' => 0,
+                'scoped_keyword_hit_count' => 0,
+                'scoped_vector_boost_factor' => $scopedVectorBoostFactor,
+                'secondary_scoped_vector_boost_factor' => $secondaryScopedVectorBoostFactor,
+                'scoped_keyword_boost_factor' => $scopedKeywordBoostFactor,
                 'ranked_chunk_ids' => [],
                 'rows' => [],
                 'rrf_scores' => [],
-                'raw_scores' => [],
+                'raw_vector_scores' => [],
+                'raw_keyword_scores' => [],
+                'title_metadata_boosts' => [],
+                'title_metadata_doc_boosts' => [],
             ];
         }
 
-        $fused = $this->fuseHits(
-            $globalHits,
-            $scopedHits,
-            $threshold,
-            $scopedHits !== [],
-            $withScores
-        );
+        $fused = $this->fuseHitSources([
+            [
+                'hits' => $globalPrimaryVectorHits,
+                'threshold' => $threshold,
+                'boost' => 1.0,
+                'bucket' => 'vector',
+            ],
+            [
+                'hits' => $globalSecondaryVectorHits,
+                'threshold' => $threshold,
+                'boost' => self::SECONDARY_GLOBAL_VECTOR_BOOST,
+                'bucket' => 'vector',
+            ],
+            [
+                'hits' => $globalKeywordHits,
+                'threshold' => self::LEXICAL_SCORE_THRESHOLD,
+                'boost' => self::GLOBAL_LEXICAL_BOOST,
+                'bucket' => 'keyword',
+            ],
+            [
+                'hits' => $scopedPrimaryVectorHits,
+                'threshold' => $threshold,
+                'boost' => $scopedVectorBoostFactor,
+                'bucket' => 'vector',
+            ],
+            [
+                'hits' => $scopedSecondaryVectorHits,
+                'threshold' => $threshold,
+                'boost' => $secondaryScopedVectorBoostFactor,
+                'bucket' => 'vector',
+            ],
+            [
+                'hits' => $scopedKeywordHits,
+                'threshold' => self::LEXICAL_SCORE_THRESHOLD,
+                'boost' => $scopedKeywordBoostFactor,
+                'bucket' => 'keyword',
+            ],
+        ], $withScores);
 
         $rrfScores = $fused['rrf_scores'];
-        $rawScores = $fused['raw_scores'];
+        $rawVectorScores = $fused['raw_vector_scores'];
+        $rawKeywordScores = $fused['raw_keyword_scores'];
 
-        if ($rrfScores === [] && $globalHits !== []) {
-            $rrfScores = $this->fallbackRrfFromHits($globalHits);
+        if ($rrfScores === []) {
+            $rrfScores = $this->fallbackRrfFromSources(
+                $globalPrimaryVectorHits,
+                $globalSecondaryVectorHits,
+                $globalKeywordHits,
+                $scopedPrimaryVectorHits,
+                $scopedSecondaryVectorHits,
+                $scopedKeywordHits
+            );
         }
 
         if ($rrfScores === []) {
@@ -381,13 +710,45 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 'limit' => $limit,
                 'is_list_query' => $isListQuery,
                 'threshold' => $threshold,
+                'clean_query' => $cleanQuery,
+                'semantic_query' => $semanticQuery,
+                'secondary_vector_query' => $secondaryVectorQuery,
+                'lexical_query' => $lexicalQuery,
+                'scope_mode' => $scopeMode,
+                'tag_candidate_doc_ids' => $tagCandidateDocIds,
+                'soft_document_candidate_doc_ids' => $softDocumentCandidateDocIds,
+                'pseudo_scope_doc_ids' => $pseudoScopeDocIds,
+                'global_hit_count' => count($globalPrimaryVectorHits) + count($globalSecondaryVectorHits) + count($globalKeywordHits),
+                'scoped_hit_count' => count($scopedPrimaryVectorHits) + count($scopedSecondaryVectorHits) + count($scopedKeywordHits),
+                'global_vector_hit_count' => count($globalPrimaryVectorHits) + count($globalSecondaryVectorHits),
+                'global_primary_vector_hit_count' => count($globalPrimaryVectorHits),
+                'global_secondary_vector_hit_count' => count($globalSecondaryVectorHits),
+                'global_keyword_hit_count' => count($globalKeywordHits),
+                'scoped_vector_hit_count' => count($scopedPrimaryVectorHits) + count($scopedSecondaryVectorHits),
+                'scoped_primary_vector_hit_count' => count($scopedPrimaryVectorHits),
+                'scoped_secondary_vector_hit_count' => count($scopedSecondaryVectorHits),
+                'scoped_keyword_hit_count' => count($scopedKeywordHits),
+                'scoped_vector_boost_factor' => $scopedVectorBoostFactor,
+                'secondary_scoped_vector_boost_factor' => $secondaryScopedVectorBoostFactor,
+                'scoped_keyword_boost_factor' => $scopedKeywordBoostFactor,
                 'ranked_chunk_ids' => [],
                 'rows' => [],
                 'rrf_scores' => [],
-                'raw_scores' => $rawScores,
+                'raw_vector_scores' => $rawVectorScores,
+                'raw_keyword_scores' => $rawKeywordScores,
+                'title_metadata_boosts' => [],
+                'title_metadata_doc_boosts' => [],
             ];
         }
 
+        $rows = $this->lookup->findByChunkIds(array_keys($rrfScores));
+
+        [$rrfScores, $titleMetadataBoosts, $titleMetadataDocBoosts] = $this->applyTitleMetadataBoosts(
+            $rrfScores,
+            $rows,
+            $lexicalQuery
+        );
+
         arsort($rrfScores);
         $rankedChunkIds = array_keys($rrfScores);
 
@@ -397,22 +758,38 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
             'limit' => $limit,
             'is_list_query' => $isListQuery,
             'threshold' => $threshold,
+            'clean_query' => $cleanQuery,
+            'semantic_query' => $semanticQuery,
+            'secondary_vector_query' => $secondaryVectorQuery,
+            'lexical_query' => $lexicalQuery,
+            'scope_mode' => $scopeMode,
+            'tag_candidate_doc_ids' => $tagCandidateDocIds,
+            'soft_document_candidate_doc_ids' => $softDocumentCandidateDocIds,
+            'pseudo_scope_doc_ids' => $pseudoScopeDocIds,
+            'global_hit_count' => count($globalPrimaryVectorHits) + count($globalSecondaryVectorHits) + count($globalKeywordHits),
+            'scoped_hit_count' => count($scopedPrimaryVectorHits) + count($scopedSecondaryVectorHits) + count($scopedKeywordHits),
+            'global_vector_hit_count' => count($globalPrimaryVectorHits) + count($globalSecondaryVectorHits),
+            'global_primary_vector_hit_count' => count($globalPrimaryVectorHits),
+            'global_secondary_vector_hit_count' => count($globalSecondaryVectorHits),
+            'global_keyword_hit_count' => count($globalKeywordHits),
+            'scoped_vector_hit_count' => count($scopedPrimaryVectorHits) + count($scopedSecondaryVectorHits),
+            'scoped_primary_vector_hit_count' => count($scopedPrimaryVectorHits),
+            'scoped_secondary_vector_hit_count' => count($scopedSecondaryVectorHits),
+            'scoped_keyword_hit_count' => count($scopedKeywordHits),
+            'scoped_vector_boost_factor' => $scopedVectorBoostFactor,
+            'secondary_scoped_vector_boost_factor' => $secondaryScopedVectorBoostFactor,
+            'scoped_keyword_boost_factor' => $scopedKeywordBoostFactor,
             'ranked_chunk_ids' => $rankedChunkIds,
             'rows' => $rows,
             'rrf_scores' => $rrfScores,
-            'raw_scores' => $rawScores,
+            'raw_vector_scores' => $rawVectorScores,
+            'raw_keyword_scores' => $rawKeywordScores,
+            'title_metadata_boosts' => $titleMetadataBoosts,
+            'title_metadata_doc_boosts' => $titleMetadataDocBoosts,
         ];
     }
 
-    // =========================================================
-    // SUPPORT
-    // =========================================================
 
-    /**
-     * Loads the active model generation config.
-     *
-     * Retrieval is not allowed to proceed without an active config.
-     */
     private function requireConfig(): ModelGenerationConfig
     {
         $config = $this->configRepository->findActiveForModel();
@@ -424,32 +801,18 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return $config;
     }
 
-    /**
-     * Extracts the normalized sales intent string from the intent detector.
-     *
-     * Falls back to DISCOVERY when the detector payload is incomplete.
-     */
     private function detectSalesIntent(string $prompt): string
     {
         $data = $this->salesIntentLite->detect($prompt);
 
-        return (string)($data['intent'] ?? SalesIntentLite::DISCOVERY);
+        return (string) ($data['intent'] ?? SalesIntentLite::DISCOVERY);
     }
 
-    /**
-     * Computes retrieval threshold and vector topK.
-     *
-     * Rules:
-     * - objection/pricing intents are slightly stricter
-     * - list queries are allowed to retrieve a wider candidate set
-     * - all values are clamped to global hard limits
-     */
     private function computeThresholdAndTopK(
         string $salesIntent,
-        bool   $isListQuery,
-        int    $vectorTopKBase
-    ): array
-    {
+        bool $isListQuery,
+        int $vectorTopKBase
+    ): array {
         $threshold = NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD;
         $topK = $vectorTopKBase;
 
@@ -461,7 +824,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         }
 
         if ($isListQuery) {
-            $topK = (int)round($topK * NdjsonHybridRetrieverConfig::LIST_BONUS);
+            $topK = (int) round($topK * NdjsonHybridRetrieverConfig::LIST_BONUS);
         }
 
         $topK = max(1, min($topK, NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK));
@@ -474,26 +837,175 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
     }
 
     /**
-     * Fuses multiple hit lists into one RRF-style score map.
-     *
-     * Notes:
-     * - only hits above threshold are considered
-     * - rank position within each hit list contributes to the final score
-     * - scoped hits can be boosted
-     * - raw scores are optionally captured for debug output
+     * @param array<int, array<string,mixed>> $globalKeywordHits
+     * @return string[]
      */
-    private function fuseHits(
-        array $globalHits,
-        array $scopedHits,
-        float $threshold,
-        bool  $boostScoped,
-        bool  $captureRaw
-    ): array
+    private function deriveSoftDocumentCandidateDocIds(array $globalKeywordHits): array
+    {
+        $window = array_slice($globalKeywordHits, 0, self::SOFT_DOC_CANDIDATE_WINDOW);
+        $stats = [];
+
+        foreach ($window as $rank => $hit) {
+            $documentId = $hit['document_id'] ?? null;
+
+            if (!is_string($documentId) || $documentId === '') {
+                continue;
+            }
+
+            $score = isset($hit['score']) && is_numeric($hit['score'])
+                ? (float) $hit['score']
+                : 0.0;
+
+            if (!isset($stats[$documentId])) {
+                $stats[$documentId] = [
+                    'document_id' => $documentId,
+                    'count' => 0,
+                    'best_rank' => $rank,
+                    'best_score' => $score,
+                ];
+            }
+
+            $stats[$documentId]['count']++;
+            $stats[$documentId]['best_rank'] = min($stats[$documentId]['best_rank'], $rank);
+            $stats[$documentId]['best_score'] = max($stats[$documentId]['best_score'], $score);
+        }
+
+        if ($stats === []) {
+            return [];
+        }
+
+        uasort($stats, static function (array $a, array $b): int {
+            if ($a['count'] !== $b['count']) {
+                return $b['count'] <=> $a['count'];
+            }
+
+            if (abs((float) $a['best_score'] - (float) $b['best_score']) > 0.000001) {
+                return ((float) $b['best_score'] <=> (float) $a['best_score']);
+            }
+
+            return $a['best_rank'] <=> $b['best_rank'];
+        });
+
+        $selected = [];
+
+        foreach ($stats as $row) {
+            $count = (int) $row['count'];
+            $bestRank = (int) $row['best_rank'];
+            $bestScore = (float) $row['best_score'];
+
+            if (
+                $count < self::SOFT_DOC_CANDIDATE_MIN_DOC_HITS
+                && !($bestRank === 0 && $bestScore >= self::SOFT_DOC_TOP_SCORE_MIN)
+            ) {
+                continue;
+            }
+
+            $selected[] = (string) $row['document_id'];
+
+            if (count($selected) >= self::SOFT_DOC_CANDIDATE_MAX_DOCS) {
+                break;
+            }
+        }
+
+        return $selected;
+    }
+
+    /**
+     * @param array<int, array<string,mixed>> $globalPrimaryVectorHits
+     * @return string[]
+     */
+    private function derivePseudoScopeDocumentIds(array $globalPrimaryVectorHits): array
+    {
+        $window = array_slice($globalPrimaryVectorHits, 0, self::PSEUDO_SCOPE_GLOBAL_WINDOW);
+        $stats = [];
+
+        foreach ($window as $rank => $hit) {
+            $documentId = $hit['document_id'] ?? null;
+
+            if (!is_string($documentId) || $documentId === '') {
+                continue;
+            }
+
+            $score = isset($hit['score']) && is_numeric($hit['score'])
+                ? (float) $hit['score']
+                : 0.0;
+
+            if (!isset($stats[$documentId])) {
+                $stats[$documentId] = [
+                    'document_id' => $documentId,
+                    'count' => 0,
+                    'best_rank' => $rank,
+                    'best_score' => $score,
+                ];
+            }
+
+            $stats[$documentId]['count']++;
+            $stats[$documentId]['best_rank'] = min($stats[$documentId]['best_rank'], $rank);
+            $stats[$documentId]['best_score'] = max($stats[$documentId]['best_score'], $score);
+        }
+
+        if ($stats === []) {
+            return [];
+        }
+
+        uasort($stats, static function (array $a, array $b): int {
+            if ($a['count'] !== $b['count']) {
+                return $b['count'] <=> $a['count'];
+            }
+
+            if (abs((float) $a['best_score'] - (float) $b['best_score']) > 0.000001) {
+                return ((float) $b['best_score'] <=> (float) $a['best_score']);
+            }
+
+            return $a['best_rank'] <=> $b['best_rank'];
+        });
+
+        $selected = [];
+
+        foreach ($stats as $row) {
+            if ((int) $row['count'] < self::PSEUDO_SCOPE_MIN_DOC_HITS) {
+                continue;
+            }
+
+            $selected[] = (string) $row['document_id'];
+
+            if (count($selected) >= self::PSEUDO_SCOPE_MAX_DOCS) {
+                break;
+            }
+        }
+
+        return $selected;
+    }
+
+    /**
+     * @param array<int, array{
+     *   hits: array<int, array<string,mixed>>,
+     *   threshold: float,
+     *   boost: float,
+     *   bucket: string
+     * }> $sources
+     * @return array{
+     *   rrf_scores: array<string,float>,
+     *   raw_vector_scores: array<string,float>,
+     *   raw_keyword_scores: array<string,float>
+     * }
+     */
+    private function fuseHitSources(array $sources, bool $captureRaw): array
     {
         $rrfScores = [];
-        $rawScores = [];
+        $rawVectorScores = [];
+        $rawKeywordScores = [];
+
+        foreach ($sources as $source) {
+            $hits = $source['hits'];
+            $threshold = (float) $source['threshold'];
+            $boost = max(0.0, (float) $source['boost']);
+            $bucket = (string) $source['bucket'];
+
+            if ($hits === [] || $boost <= 0.0) {
+                continue;
+            }
 
-        $apply = function (array $hits, bool $boost) use (&$rrfScores, &$rawScores, $threshold, $captureRaw): void {
             $rank = 0;
 
             foreach ($hits as $hit) {
@@ -501,85 +1013,83 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                     continue;
                 }
 
-                $raw = (float)$hit['score'];
+                $raw = (float) $hit['score'];
 
                 if ($raw < $threshold) {
                     continue;
                 }
 
-                $chunkId = (string)$hit['chunk_id'];
+                $chunkId = (string) $hit['chunk_id'];
 
                 if ($captureRaw) {
-                    $rawScores[$chunkId] = max($rawScores[$chunkId] ?? 0.0, $raw);
+                    if ($bucket === 'vector') {
+                        $rawVectorScores[$chunkId] = max($rawVectorScores[$chunkId] ?? 0.0, $raw);
+                    } elseif ($bucket === 'keyword') {
+                        $rawKeywordScores[$chunkId] = max($rawKeywordScores[$chunkId] ?? 0.0, $raw);
+                    }
                 }
 
                 $rank++;
                 $rrf = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank);
-
-                if ($boost) {
-                    $rrf *= 1.2;
-                }
+                $rrf *= $boost;
 
                 $rrfScores[$chunkId] = ($rrfScores[$chunkId] ?? 0.0) + $rrf;
             }
-        };
-
-        $apply($globalHits, false);
-        $apply($scopedHits, $boostScoped);
+        }
 
         return [
             'rrf_scores' => $rrfScores,
-            'raw_scores' => $rawScores,
+            'raw_vector_scores' => $rawVectorScores,
+            'raw_keyword_scores' => $rawKeywordScores,
         ];
     }
 
     /**
-     * Builds a fallback RRF ranking purely from hit order.
-     *
-     * Used when thresholding removed all fused candidates but
-     * the global hit list itself still exists.
+     * @param array<int, array<string,mixed>> ...$sourceLists
+     * @return array<string,float>
      */
-    private function fallbackRrfFromHits(array $hits): array
+    private function fallbackRrfFromSources(array ...$sourceLists): array
     {
-        $rrf = [];
-        $rank = 0;
+        foreach ($sourceLists as $hits) {
+            $rrf = [];
+            $rank = 0;
 
-        foreach ($hits as $hit) {
-            if (!isset($hit['chunk_id'])) {
-                continue;
+            foreach ($hits as $hit) {
+                if (!isset($hit['chunk_id'])) {
+                    continue;
+                }
+
+                $rank++;
+                $rrf[(string) $hit['chunk_id']] = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank);
+
+                if ($rank >= NdjsonHybridRetrieverConfig::EMPTY_RRF_FALLBACK_TOPN) {
+                    break;
+                }
             }
 
-            $rank++;
-            $rrf[(string)$hit['chunk_id']] = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank);
-
-            if ($rank >= NdjsonHybridRetrieverConfig::EMPTY_RRF_FALLBACK_TOPN) {
-                break;
+            if ($rrf !== []) {
+                return $rrf;
             }
         }
 
-        return $rrf;
+        return [];
     }
 
     /**
-     * Selects a coherent chunk window from one exact document title match.
-     *
-     * For exact product questions we prefer a pure document slice over
-     * cross-document fusion to avoid mixing neighbouring product families.
-     *
      * @param array<string,array<string,mixed>> $rows
      * @return string[]
      */
     private function selectExactDocumentChunkIds(array $rows, int $limit): array
     {
         uasort($rows, static function (array $a, array $b): int {
-            $aIndex = is_int($a['chunk_index'] ?? null) ? (int)$a['chunk_index'] : PHP_INT_MAX;
-            $bIndex = is_int($b['chunk_index'] ?? null) ? (int)$b['chunk_index'] : PHP_INT_MAX;
+            $aIndex = is_int($a['chunk_index'] ?? null) ? (int) $a['chunk_index'] : PHP_INT_MAX;
+            $bIndex = is_int($b['chunk_index'] ?? null) ? (int) $b['chunk_index'] : PHP_INT_MAX;
 
             if ($aIndex !== $bIndex) {
                 return $aIndex <=> $bIndex;
             }
 
-            return strcmp((string)($a['chunk_id'] ?? ''), (string)($b['chunk_id'] ?? ''));
+            return strcmp((string) ($a['chunk_id'] ?? ''), (string) ($b['chunk_id'] ?? ''));
         });
 
         $selected = [];
@@ -587,7 +1097,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
 
         foreach ($rows as $row) {
             $chunkId = $row['chunk_id'] ?? null;
-            $text = trim((string)($row['text'] ?? ''));
+            $text = trim((string) ($row['text'] ?? ''));
 
             if (!is_string($chunkId) || $chunkId === '' || $text === '') {
                 continue;
@@ -604,10 +1114,6 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
     }
 
     /**
-     * Builds synthetic scores for exact-title fast-path selections.
-     *
-     * These scores are only used for debug output consistency.
-     *
      * @param string[] $chunkIds
      * @return array<string,float>
      */
@@ -616,20 +1122,12 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         $scores = [];
 
         foreach (array_values($chunkIds) as $rank => $chunkId) {
-            $scores[(string)$chunkId] = 1.0 / (1 + $rank);
+            $scores[(string) $chunkId] = 1.0 / (1 + $rank);
         }
 
         return $scores;
     }
 
-    /**
-     * Selection strategy for list-style queries.
-     *
-     * Goal:
-     * - avoid near-identical chunks
-     * - prefer diverse list entries
-     * - stop once the configured limit is reached
-     */
     private function selectListChunkIds(array $chunkIds, array $rows, int $limit): array
     {
         $seen = [];
@@ -640,19 +1138,19 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 continue;
             }
 
-            $chunk = trim((string)$rows[$id]['text']);
+            $chunk = trim((string) $rows[$id]['text']);
             if ($chunk === '') {
                 continue;
             }
 
-            $key = md5(mb_strtolower((string)preg_replace('/\s+/u', ' ', $chunk)));
+            $key = md5(mb_strtolower((string) (preg_replace('/\s+/u', ' ', $chunk) ?? $chunk)));
 
             if (isset($seen[$key])) {
                 continue;
             }
 
             $seen[$key] = true;
-            $out[] = (string)$id;
+            $out[] = (string) $id;
 
             if (count($out) >= $limit) {
                 break;
@@ -662,23 +1160,6 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return $out;
     }
 
-    /**
-     * Selection strategy for sales-oriented queries.
-     *
-     * Modes:
-     * - exact_document_title:
-     *   used when the prompt clearly contains one exact document title
-     *   and the answer should stay strictly within that document
-     *
-     * - sales_dominant_document:
-     *   used when one document clearly dominates the top hit window
-     *   and coherent neighbouring chunks from that document are more
-     *   useful than cross-document spread
-     *
-     * - sales_spread:
-     *   default mode that spreads chunks across documents and enforces
-     *   distance between chunk positions of the same document
-     */
     private function selectSalesChunkIds(array $chunkIds, array $rows, int $limit): array
     {
         $dominantDocId = $this->detectDominantTopDocument($chunkIds, $rows);
@@ -710,13 +1191,6 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         ];
     }
 
-    /**
-     * Detects whether one document clearly dominates the first ranked window.
-     *
-     * This is especially useful for product-sheet style documents where
-     * several adjacent chunks belong together and should be passed to the model
-     * as one coherent factual block.
-     */
     private function detectDominantTopDocument(array $chunkIds, array $rows): ?string
     {
         $docWindow = [];
@@ -726,7 +1200,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 continue;
             }
 
-            $text = trim((string)$rows[$chunkId]['text']);
+            $text = trim((string) $rows[$chunkId]['text']);
             $docId = $rows[$chunkId]['document_id'] ?? null;
 
             if ($text === '' || !is_string($docId) || $docId === '') {
@@ -749,7 +1223,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
             return null;
         }
 
-        $dominantCount = (int)($counts[$dominantDocId] ?? 0);
+        $dominantCount = (int) ($counts[$dominantDocId] ?? 0);
 
         if ($dominantCount >= self::DOMINANT_DOC_MIN_HITS) {
             return $dominantDocId;
@@ -765,21 +1239,12 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return null;
     }
 
-    /**
-     * Selects a coherent chunk window from the dominant document.
-     *
-     * Strategy:
-     * - use the highest-ranked chunk of that document as anchor
-     * - prefer neighbouring chunk indices around that anchor
-     * - sort the final selection by chunk index for prompt coherence
-     */
     private function selectDominantDocumentChunkIds(
         string $documentId,
-        array  $chunkIds,
-        array  $rows,
-        int    $limit
-    ): array
-    {
+        array $chunkIds,
+        array $rows,
+        int $limit
+    ): array {
         $docHits = [];
         $anchorChunkIndex = null;
 
@@ -788,7 +1253,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 continue;
             }
 
-            $text = trim((string)$rows[$chunkId]['text']);
+            $text = trim((string) $rows[$chunkId]['text']);
             $docId = $rows[$chunkId]['document_id'] ?? null;
 
             if ($text === '' || $docId !== $documentId) {
@@ -803,7 +1268,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
             }
 
             $docHits[] = [
-                'id' => (string)$chunkId,
+                'id' => (string) $chunkId,
                 'rank' => $rank,
                 'chunk_index' => $chunkIndex,
             ];
@@ -861,19 +1326,12 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         );
     }
 
-    /**
-     * Fills the remaining sales slots after a dominant document selection.
-     *
-     * The already selected dominant-document chunks stay fixed.
-     * Remaining slots are filled with the normal spread strategy.
-     */
     private function fillRemainingSalesChunkIds(
         array $seedChunkIds,
         array $chunkIds,
         array $rows,
-        int   $limit
-    ): array
-    {
+        int $limit
+    ): array {
         $out = array_values(array_unique(array_map('strval', $seedChunkIds)));
 
         if (count($out) >= $limit) {
@@ -925,12 +1383,12 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 }
             }
 
-            $text = trim((string)$rows[$chunkId]['text']);
+            $text = trim((string) $rows[$chunkId]['text']);
             if ($text === '') {
                 continue;
             }
 
-            $out[] = (string)$chunkId;
+            $out[] = (string) $chunkId;
             $selected[$chunkId] = true;
             $docCounter[$docId] = ($docCounter[$docId] ?? 0) + 1;
 
@@ -946,14 +1404,6 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return $out;
     }
 
-    /**
-     * Default spread selection for sales-oriented queries.
-     *
-     * Goal:
-     * - avoid overloading the result with chunks from the same document
-     * - avoid chunks that are too close to each other in the same document
-     * - preserve top-ranked relevance while improving contextual spread
-     */
     private function selectSalesChunkIdsSpread(array $chunkIds, array $rows, int $limit): array
     {
         $out = [];
@@ -986,12 +1436,12 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 $docChunkPositions[$docId][] = $chunkIndex;
             }
 
-            $text = trim((string)$rows[$chunkId]['text']);
+            $text = trim((string) $rows[$chunkId]['text']);
             if ($text === '') {
                 continue;
             }
 
-            $out[] = (string)$chunkId;
+            $out[] = (string) $chunkId;
             $docCounter[$docId] = ($docCounter[$docId] ?? 0) + 1;
 
             if (count($out) >= $limit) {
@@ -1002,9 +1452,6 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return $out;
     }
 
-    /**
-     * Converts selected chunk ids into the final plain text result list.
-     */
     private function collectTextsFromIds(array $chunkIds, array $rows): array
     {
         $out = [];
@@ -1014,7 +1461,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 continue;
             }
 
-            $text = trim((string)$rows[$id]['text']);
+            $text = trim((string) $rows[$id]['text']);
 
             if ($text !== '') {
                 $out[] = $text;
@@ -1023,4 +1470,233 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
 
         return $out;
     }
+
+    /**
+     * Applies a conservative document-level re-rank based on title / metadata matching.
+     *
+     * This is intentionally executed after source fusion. It should sharpen ranking
+     * for clearly matching documents, but never replace the underlying retrieval logic.
+     *
+     * @param array<string,float> $rrfScores
+     * @param array<string,array<string,mixed>> $rows
+     * @return array{0: array<string,float>, 1: array<string,float>, 2: array<string,float>}
+     */
+    private function applyTitleMetadataBoosts(array $rrfScores, array $rows, string $lexicalQuery): array
+    {
+        $normalizedQuery = $this->normalizeForMatching($lexicalQuery);
+        $queryTokens = $this->tokenizeNormalizedQuery($normalizedQuery);
+
+        if ($normalizedQuery === '' || $queryTokens === [] || $rrfScores === [] || $rows === []) {
+            return [$rrfScores, [], []];
+        }
+
+        $documentBoosts = [];
+
+        foreach ($rows as $row) {
+            $documentId = $row['document_id'] ?? null;
+
+            if (!is_string($documentId) || $documentId === '' || isset($documentBoosts[$documentId])) {
+                continue;
+            }
+
+            $documentBoosts[$documentId] = $this->computeDocumentMetadataBoost(
+                $row,
+                $normalizedQuery,
+                $queryTokens
+            );
+        }
+
+        if ($documentBoosts === []) {
+            return [$rrfScores, [], []];
+        }
+
+        $chunkBoosts = [];
+
+        foreach ($rrfScores as $chunkId => $score) {
+            $row = $rows[$chunkId] ?? null;
+
+            if (!is_array($row)) {
+                continue;
+            }
+
+            $documentId = $row['document_id'] ?? null;
+
+            if (!is_string($documentId) || $documentId === '') {
+                continue;
+            }
+
+            $boost = $documentBoosts[$documentId] ?? 0.0;
+
+            if ($boost <= 0.0) {
+                continue;
+            }
+
+            $rrfScores[$chunkId] = $score * (1.0 + $boost);
+            $chunkBoosts[$chunkId] = $boost;
+        }
+
+        return [$rrfScores, $chunkBoosts, $documentBoosts];
+    }
+
+    /**
+     * @param array<string,mixed> $row
+     * @param string[] $queryTokens
+     */
+    private function computeDocumentMetadataBoost(array $row, string $normalizedQuery, array $queryTokens): float
+    {
+        $documentTitle = $this->normalizeForMatching($this->extractMetadataString($row, [
+            'document_title',
+            'title',
+        ]));
+
+        $fileName = $this->normalizeForMatching($this->extractMetadataString($row, [
+            'file_name',
+            'filename',
+            'original_filename',
+            'source_name',
+            'document_name',
+        ]));
+
+        $metaText = $this->normalizeForMatching($this->extractMetadataString($row, [
+            'source_path',
+            'path',
+            'heading',
+            'section_title',
+            'category',
+        ]));
+
+        $boost = 0.0;
+
+        $titleCoverage = $this->computeNormalizedTokenCoverage($queryTokens, $documentTitle);
+        if ($titleCoverage > 0.0) {
+            $boost += min(
+                self::TITLE_MATCH_MAX_BOOST,
+                self::TITLE_MATCH_BASE_BOOST + ($titleCoverage * self::TITLE_MATCH_MAX_BOOST)
+            );
+        }
+
+        $fileCoverage = $this->computeNormalizedTokenCoverage($queryTokens, $fileName);
+        if ($fileCoverage > 0.0) {
+            $boost += min(
+                self::FILE_MATCH_MAX_BOOST,
+                self::FILE_MATCH_BASE_BOOST + ($fileCoverage * self::FILE_MATCH_MAX_BOOST)
+            );
+        }
+
+        $metaCoverage = $this->computeNormalizedTokenCoverage($queryTokens, $metaText);
+        if ($metaCoverage > 0.0) {
+            $boost += min(
+                self::META_MATCH_MAX_BOOST,
+                $metaCoverage * self::META_MATCH_MAX_BOOST
+            );
+        }
+
+        if (str_contains($normalizedQuery, ' ')) {
+            if ($documentTitle !== '' && str_contains(' ' . $documentTitle . ' ', ' ' . $normalizedQuery . ' ')) {
+                $boost += self::EXACT_TITLE_PHRASE_BOOST;
+            }
+
+            if ($fileName !== '' && str_contains(' ' . $fileName . ' ', ' ' . $normalizedQuery . ' ')) {
+                $boost += self::EXACT_FILE_PHRASE_BOOST;
+            }
+        }
+
+        return min(self::MAX_TITLE_METADATA_BOOST, $boost);
+    }
+
+    /**
+     * @param array<string,mixed> $row
+     * @param string[] $preferredKeys
+     */
+    private function extractMetadataString(array $row, array $preferredKeys): string
+    {
+        foreach ($preferredKeys as $key) {
+            $topLevel = $row[$key] ?? null;
+            if (is_string($topLevel) && trim($topLevel) !== '') {
+                return trim($topLevel);
+            }
+
+            $metadata = $row['metadata'] ?? null;
+            if (is_array($metadata)) {
+                $value = $metadata[$key] ?? null;
+                if (is_string($value) && trim($value) !== '') {
+                    return trim($value);
+                }
+            }
+        }
+
+        return '';
+    }
+
+    /**
+     * @param string[] $queryTokens
+     */
+    private function computeNormalizedTokenCoverage(array $queryTokens, string $normalizedHaystack): float
+    {
+        if ($queryTokens === [] || $normalizedHaystack === '') {
+            return 0.0;
+        }
+
+        $matched = 0;
+
+        foreach ($queryTokens as $token) {
+            if ($token === '') {
+                continue;
+            }
+
+            if (str_contains(' ' . $normalizedHaystack . ' ', ' ' . $token . ' ')) {
+                $matched++;
+            }
+        }
+
+        if ($matched < 1) {
+            return 0.0;
+        }
+
+        return $matched / max(1, count($queryTokens));
+    }
+
+    /**
+     * @return string[]
+     */
+    private function tokenizeNormalizedQuery(string $normalizedQuery): array
+    {
+        if ($normalizedQuery === '') {
+            return [];
+        }
+
+        $tokens = preg_split('/\s+/u', $normalizedQuery, -1, PREG_SPLIT_NO_EMPTY) ?: [];
+        $tokens = array_values(array_unique(array_filter(
+            $tokens,
+            static fn (string $token): bool => mb_strlen($token, 'UTF-8') >= 2
+        )));
+
+        return $tokens;
+    }
+
+    private function normalizeForMatching(string $value): string
+    {
+        $value = mb_strtolower(trim($value), 'UTF-8');
+        $value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value;
+        $value = preg_replace('/\s+/u', ' ', $value) ?? $value;
+
+        return trim($value);
+    }
+
+    private function maxNullableFloat(?float $a, ?float $b): ?float
+    {
+        if ($a === null && $b === null) {
+            return null;
+        }
+
+        if ($a === null) {
+            return $b;
+        }
+
+        if ($b === null) {
+            return $a;
+        }
+
+        return max($a, $b);
+    }
 }
\ No newline at end of file
diff --git a/src/Knowledge/Retrieval/NdjsonKeywordRetriever.php b/src/Knowledge/Retrieval/NdjsonKeywordRetriever.php
new file mode 100644
index 0000000..1d4c16a
--- /dev/null
+++ b/src/Knowledge/Retrieval/NdjsonKeywordRetriever.php
@@ -0,0 +1,451 @@
+<?php
+
+declare(strict_types=1);
+
+namespace App\Knowledge\Retrieval;
+
+use App\Knowledge\StopWords;
+use Psr\Log\LoggerInterface;
+use SQLite3;
+
+final readonly class NdjsonKeywordRetriever
+{
+    private const DEFAULT_RELATIVE_INDEX_PATH = '/var/knowledge/lexical.index.sqlite';
+    private const MAX_LIMIT = 100;
+    private const MAX_QUERY_TOKENS = 12;
+
+    public function __construct(
+        private string $projectDir,
+        private LoggerInterface $agentLogger,
+    ) {
+    }
+
+    /**
+     * Generic lexical retrieval against a prebuilt SQLite index.
+     *
+     * Expected DB schema (to be created by the lexical index builder):
+     *
+     * lexical_meta(
+     *   key TEXT PRIMARY KEY,
+     *   value TEXT NOT NULL
+     * )
+     *
+     * lexical_terms(
+     *   token TEXT PRIMARY KEY,
+     *   df INTEGER NOT NULL
+     * )
+     *
+     * lexical_postings(
+     *   token TEXT NOT NULL,
+     *   chunk_id TEXT NOT NULL,
+     *   document_id TEXT NOT NULL,
+     *   chunk_index INTEGER,
+     *   tf INTEGER NOT NULL,
+     *   title_tf INTEGER NOT NULL DEFAULT 0,
+     *   PRIMARY KEY(token, chunk_id)
+     * )
+     *
+     * This retriever contains no domain-specific keyword logic.
+     * It only uses generic token overlap, rarity, title hits, and numeric/code emphasis.
+     *
+     * @param string[] $docIds Optional document scope
+     *
+     * @return array<int, array{
+     *     chunk_id:string,
+     *     score:float,
+     *     document_id:?string,
+     *     chunk_index:?int
+     * }>
+     */
+    public function search(string $query, int $limit = 10, array $docIds = []): array
+    {
+        $limit = $this->clampLimit($limit);
+        $analysis = $this->analyzeQuery($query);
+
+        if ($analysis['tokens'] === []) {
+            return [];
+        }
+
+        $db = $this->openReadOnlyDb();
+
+        if (!$db instanceof SQLite3) {
+            return [];
+        }
+
+        try {
+            $totalChunks = $this->loadTotalChunks($db);
+            $rows = $this->loadPostings(
+                $db,
+                $analysis['tokens'],
+                $docIds
+            );
+
+            if ($rows === []) {
+                return [];
+            }
+
+            return $this->scoreRows(
+                $rows,
+                $analysis['tokens'],
+                $analysis['numeric_tokens'],
+                $totalChunks,
+                $limit
+            );
+        } catch (\Throwable $e) {
+            $this->agentLogger->error('Keyword retriever failed', [
+                'error' => $e->getMessage(),
+            ]);
+
+            return [];
+        } finally {
+            $db->close();
+        }
+    }
+
+    /**
+     * @return array{
+     *   normalized_query:string,
+     *   tokens:string[],
+     *   numeric_tokens:string[]
+     * }
+     */
+    private function analyzeQuery(string $query): array
+    {
+        $normalized = $this->normalizeText($query);
+
+        if ($normalized === '') {
+            return [
+                'normalized_query' => '',
+                'tokens' => [],
+                'numeric_tokens' => [],
+            ];
+        }
+
+        $parts = preg_split('/\s+/u', $normalized, -1, PREG_SPLIT_NO_EMPTY) ?: [];
+
+        $tokens = [];
+        $numericTokens = [];
+
+        foreach ($parts as $token) {
+            if ($token === '') {
+                continue;
+            }
+
+            if ($this->shouldIgnoreToken($token)) {
+                continue;
+            }
+
+            $tokens[] = $token;
+
+            if (preg_match('/\d/u', $token) === 1) {
+                $numericTokens[] = $token;
+            }
+        }
+
+        $tokens = array_values(array_unique($tokens));
+        $numericTokens = array_values(array_unique($numericTokens));
+
+        if (count($tokens) > self::MAX_QUERY_TOKENS) {
+            $tokens = array_slice($tokens, 0, self::MAX_QUERY_TOKENS);
+        }
+
+        return [
+            'normalized_query' => $normalized,
+            'tokens' => $tokens,
+            'numeric_tokens' => $numericTokens,
+        ];
+    }
+
+    private function shouldIgnoreToken(string $token): bool
+    {
+        if ($token === '') {
+            return true;
+        }
+
+        if (preg_match('/\d/u', $token) === 1) {
+            return false;
+        }
+
+        if (mb_strlen($token, 'UTF-8') < 2) {
+            return true;
+        }
+
+        return StopWords::isStopWord($token);
+    }
+
+    private function normalizeText(string $value): string
+    {
+        $value = mb_strtolower(trim($value), 'UTF-8');
+        $value = str_replace(['-', '/', '_'], ' ', $value);
+        $value = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $value) ?? $value;
+        $value = preg_replace('/\s+/u', ' ', $value) ?? $value;
+
+        return trim($value);
+    }
+
+    private function openReadOnlyDb(): ?SQLite3
+    {
+        if (!class_exists(SQLite3::class)) {
+            $this->agentLogger->warning('Keyword retriever unavailable: sqlite3 extension missing.');
+
+            return null;
+        }
+
+        $path = $this->getIndexPath();
+
+        if (!is_file($path)) {
+            return null;
+        }
+
+        try {
+            $db = new SQLite3($path, SQLITE3_OPEN_READONLY);
+            $db->busyTimeout(1000);
+
+            return $db;
+        } catch (\Throwable $e) {
+            $this->agentLogger->error('Unable to open lexical index', [
+                'path' => $path,
+                'error' => $e->getMessage(),
+            ]);
+
+            return null;
+        }
+    }
+
+    private function getIndexPath(): string
+    {
+        return rtrim($this->projectDir, '/') . self::DEFAULT_RELATIVE_INDEX_PATH;
+    }
+
+    private function loadTotalChunks(SQLite3 $db): int
+    {
+        $stmt = $db->prepare('SELECT value FROM lexical_meta WHERE key = :key');
+        if (!$stmt) {
+            return 1;
+        }
+
+        $stmt->bindValue(':key', 'total_chunks', SQLITE3_TEXT);
+        $result = $stmt->execute();
+
+        if ($result === false) {
+            return 1;
+        }
+
+        $row = $result->fetchArray(SQLITE3_ASSOC);
+        $result->finalize();
+
+        $value = isset($row['value']) ? (int) $row['value'] : 0;
+
+        return max(1, $value);
+    }
+
+    /**
+     * @param string[] $tokens
+     * @param string[] $docIds
+     * @return array<int, array{
+     *   token:string,
+     *   chunk_id:string,
+     *   document_id:string,
+     *   chunk_index:?int,
+     *   tf:int,
+     *   title_tf:int,
+     *   df:int
+     * }>
+     */
+    private function loadPostings(SQLite3 $db, array $tokens, array $docIds): array
+    {
+        if ($tokens === []) {
+            return [];
+        }
+
+        $tokenPlaceholders = [];
+        foreach (array_keys($tokens) as $i) {
+            $tokenPlaceholders[] = ':t' . $i;
+        }
+
+        $sql = '
+            SELECT
+                p.token,
+                p.chunk_id,
+                p.document_id,
+                p.chunk_index,
+                p.tf,
+                p.title_tf,
+                lt.df
+            FROM lexical_postings p
+            INNER JOIN lexical_terms lt ON lt.token = p.token
+            WHERE p.token IN (' . implode(', ', $tokenPlaceholders) . ')
+        ';
+
+        $docIds = array_values(array_unique(array_filter(
+            $docIds,
+            static fn (mixed $value): bool => is_string($value) && $value !== ''
+        )));
+
+        if ($docIds !== []) {
+            $docPlaceholders = [];
+            foreach (array_keys($docIds) as $i) {
+                $docPlaceholders[] = ':d' . $i;
+            }
+
+            $sql .= ' AND p.document_id IN (' . implode(', ', $docPlaceholders) . ')';
+        }
+
+        $stmt = $db->prepare($sql);
+
+        if ($stmt === false) {
+            return [];
+        }
+
+        foreach ($tokens as $i => $token) {
+            $stmt->bindValue(':t' . $i, $token, SQLITE3_TEXT);
+        }
+
+        foreach ($docIds as $i => $docId) {
+            $stmt->bindValue(':d' . $i, $docId, SQLITE3_TEXT);
+        }
+
+        $result = $stmt->execute();
+
+        if ($result === false) {
+            return [];
+        }
+
+        $rows = [];
+
+        while (($row = $result->fetchArray(SQLITE3_ASSOC)) !== false) {
+            $chunkId = (string) ($row['chunk_id'] ?? '');
+            $documentId = (string) ($row['document_id'] ?? '');
+            $token = (string) ($row['token'] ?? '');
+
+            if ($chunkId === '' || $documentId === '' || $token === '') {
+                continue;
+            }
+
+            $chunkIndex = null;
+            if (isset($row['chunk_index']) && is_numeric($row['chunk_index'])) {
+                $chunkIndex = (int) $row['chunk_index'];
+            }
+
+            $rows[] = [
+                'token' => $token,
+                'chunk_id' => $chunkId,
+                'document_id' => $documentId,
+                'chunk_index' => $chunkIndex,
+                'tf' => max(1, (int) ($row['tf'] ?? 1)),
+                'title_tf' => max(0, (int) ($row['title_tf'] ?? 0)),
+                'df' => max(1, (int) ($row['df'] ?? 1)),
+            ];
+        }
+
+        $result->finalize();
+
+        return $rows;
+    }
+
+    /**
+     * @param array<int, array{
+     *   token:string,
+     *   chunk_id:string,
+     *   document_id:string,
+     *   chunk_index:?int,
+     *   tf:int,
+     *   title_tf:int,
+     *   df:int
+     * }> $rows
+     * @param string[] $queryTokens
+     * @param string[] $numericTokens
+     *
+     * @return array<int, array{
+     *   chunk_id:string,
+     *   score:float,
+     *   document_id:?string,
+     *   chunk_index:?int
+     * }>
+     */
+    private function scoreRows(
+        array $rows,
+        array $queryTokens,
+        array $numericTokens,
+        int $totalChunks,
+        int $limit
+    ): array {
+        if ($rows === []) {
+            return [];
+        }
+
+        $numericLookup = array_fill_keys($numericTokens, true);
+        $queryTokenCount = max(1, count($queryTokens));
+
+        $scores = [];
+        $meta = [];
+        $matchedTokens = [];
+
+        foreach ($rows as $row) {
+            $chunkId = $row['chunk_id'];
+            $token = $row['token'];
+
+            $idf = log(1.0 + ($totalChunks / max(1.0, (float) (1 + $row['df']))));
+            $tfBoost = 1.0 + (min(3, $row['tf']) * 0.20);
+            $numericBoost = isset($numericLookup[$token]) ? 1.60 : 1.0;
+            $titleBonus = $row['title_tf'] > 0 ? ($idf * 0.75) : 0.0;
+
+            $scores[$chunkId] = ($scores[$chunkId] ?? 0.0)
+                + ($idf * $tfBoost * $numericBoost)
+                + $titleBonus;
+
+            $matchedTokens[$chunkId][$token] = true;
+
+            if (!isset($meta[$chunkId])) {
+                $meta[$chunkId] = [
+                    'document_id' => $row['document_id'],
+                    'chunk_index' => $row['chunk_index'],
+                ];
+            }
+        }
+
+        foreach ($scores as $chunkId => $score) {
+            $coverage = count($matchedTokens[$chunkId] ?? []) / $queryTokenCount;
+            $scores[$chunkId] = $score * (0.65 + (0.35 * $coverage));
+        }
+
+        arsort($scores);
+
+        $topScore = (float) reset($scores);
+        if ($topScore <= 0.0) {
+            return [];
+        }
+
+        $out = [];
+
+        foreach ($scores as $chunkId => $score) {
+            $normalizedScore = $score / $topScore;
+
+            $out[] = [
+                'chunk_id' => $chunkId,
+                'score' => round($normalizedScore, 6),
+                'document_id' => $meta[$chunkId]['document_id'] ?? null,
+                'chunk_index' => $meta[$chunkId]['chunk_index'] ?? null,
+            ];
+
+            if (count($out) >= $limit) {
+                break;
+            }
+        }
+
+        return $out;
+    }
+
+    private function clampLimit(int $limit): int
+    {
+        if ($limit < 1) {
+            return 1;
+        }
+
+        if ($limit > self::MAX_LIMIT) {
+            return self::MAX_LIMIT;
+        }
+
+        return $limit;
+    }
+}
\ No newline at end of file
diff --git a/src/Knowledge/Retrieval/NdjsonLexicalIndexBuilder.php b/src/Knowledge/Retrieval/NdjsonLexicalIndexBuilder.php
new file mode 100644
index 0000000..c83f1f7
--- /dev/null
+++ b/src/Knowledge/Retrieval/NdjsonLexicalIndexBuilder.php
@@ -0,0 +1,528 @@
+<?php
+
+
+declare(strict_types=1);
+
+namespace App\Knowledge\Retrieval;
+
+use App\Knowledge\StopWords;
+use Psr\Log\LoggerInterface;
+use SQLite3;
+
+final readonly class NdjsonLexicalIndexBuilder
+{
+    private const DEFAULT_RELATIVE_NDJSON_PATH = '/var/knowledge/index.ndjson';
+    private const DEFAULT_RELATIVE_INDEX_PATH = '/var/knowledge/lexical.index.sqlite';
+
+    /**
+     * Upper bound to avoid pathological chunks exploding the lexical index.
+     * This stays generic and does not encode any domain-specific assumption.
+     */
+    private const MAX_UNIQUE_TOKENS_PER_CHUNK = 256;
+
+    public function __construct(
+        private string          $projectDir,
+        private LoggerInterface $agentLogger,
+    )
+    {
+    }
+
+    /**
+     * Build a generic lexical SQLite index from index.ndjson.
+     *
+     * Output DB schema:
+     *
+     * lexical_meta(
+     *   key TEXT PRIMARY KEY,
+     *   value TEXT NOT NULL
+     * )
+     *
+     * lexical_terms(
+     *   token TEXT PRIMARY KEY,
+     *   df INTEGER NOT NULL
+     * )
+     *
+     * lexical_postings(
+     *   token TEXT NOT NULL,
+     *   chunk_id TEXT NOT NULL,
+     *   document_id TEXT NOT NULL,
+     *   chunk_index INTEGER,
+     *   tf INTEGER NOT NULL,
+     *   title_tf INTEGER NOT NULL DEFAULT 0,
+     *   PRIMARY KEY(token, chunk_id)
+     * )
+     *
+     * Design goals:
+     * - generic, data-driven lexical retrieval base
+     * - no domain keywords in core code
+     * - no full scan per request later
+     * - duplicate chunk_id lines in index.ndjson must not inflate the index
+     */
+    public function build(): void
+    {
+        $this->assertSqliteAvailable();
+
+        $indexNdjsonPath = $this->getIndexNdjsonPath();
+        $lexicalIndexPath = $this->getLexicalIndexPath();
+        $tmpPath = $lexicalIndexPath . '.tmp';
+
+        if (!is_file($indexNdjsonPath) || filesize($indexNdjsonPath) === 0) {
+            $this->removeFileIfExists($lexicalIndexPath);
+            $this->removeFileIfExists($tmpPath);
+
+            $this->agentLogger->info('Lexical index skipped because index.ndjson is missing or empty.', [
+                'index_ndjson' => $indexNdjsonPath,
+            ]);
+
+            return;
+        }
+
+        $this->ensureTargetDirectoryExists($lexicalIndexPath);
+        $this->removeFileIfExists($tmpPath);
+
+        $db = $this->openWritableDb($tmpPath);
+
+        try {
+            $this->initializeSchema($db);
+            $this->buildFromNdjson($db, $indexNdjsonPath);
+            $db->close();
+
+            $this->atomicReplace($tmpPath, $lexicalIndexPath);
+
+            $this->agentLogger->info('Lexical index build completed.', [
+                'path' => $lexicalIndexPath,
+            ]);
+        } catch (\Throwable $e) {
+            try {
+                $db->close();
+            } catch (\Throwable) {
+                // Ignore close failures during cleanup.
+            }
+
+            $this->removeFileIfExists($tmpPath);
+
+            $this->agentLogger->error('Lexical index build failed.', [
+                'path' => $lexicalIndexPath,
+                'error' => $e->getMessage(),
+            ]);
+
+            throw $e;
+        }
+    }
+
+    private function buildFromNdjson(SQLite3 $db, string $indexNdjsonPath): void
+    {
+        $handle = @fopen($indexNdjsonPath, 'rb');
+
+        if ($handle === false) {
+            throw new \RuntimeException('Unable to read index.ndjson: ' . $indexNdjsonPath);
+        }
+
+        $db->exec('BEGIN IMMEDIATE TRANSACTION');
+
+        try {
+            $seenChunkStmt = $db->prepare(
+                'INSERT OR IGNORE INTO lexical_seen_chunks (chunk_id) VALUES (:chunk_id)'
+            );
+            $termStmt = $db->prepare(
+                'INSERT INTO lexical_terms (token, df)
+                 VALUES (:token, 1)
+                 ON CONFLICT(token) DO UPDATE SET df = df + 1'
+            );
+            $postingStmt = $db->prepare(
+                'INSERT INTO lexical_postings (
+                    token,
+                    chunk_id,
+                    document_id,
+                    chunk_index,
+                    tf,
+                    title_tf
+                 ) VALUES (
+                    :token,
+                    :chunk_id,
+                    :document_id,
+                    :chunk_index,
+                    :tf,
+                    :title_tf
+                 )'
+            );
+
+            if (!$seenChunkStmt || !$termStmt || !$postingStmt) {
+                throw new \RuntimeException('Failed to prepare lexical index SQL statements.');
+            }
+
+            $totalChunks = 0;
+            $lineNumber = 0;
+
+            while (($line = fgets($handle)) !== false) {
+                $lineNumber++;
+                $line = trim($line);
+
+                if ($line === '') {
+                    continue;
+                }
+
+                $row = json_decode($line, true);
+
+                if (!is_array($row)) {
+                    continue;
+                }
+
+                $chunkId = trim((string)($row['chunk_id'] ?? ''));
+                $documentId = trim((string)($row['document_id'] ?? ''));
+                $chunkIndex = $this->normalizeChunkIndex($row['chunk_index'] ?? null);
+                $text = trim((string)($row['text'] ?? ''));
+
+                if ($chunkId === '' || $documentId === '' || $text === '') {
+                    continue;
+                }
+
+                $seenChunkStmt->reset();
+                $seenChunkStmt->clear();
+                $seenChunkStmt->bindValue(':chunk_id', $chunkId, SQLITE3_TEXT);
+                $seenResult = $seenChunkStmt->execute();
+
+                if ($seenResult !== false) {
+                    $seenResult->finalize();
+                }
+
+                if ($db->changes() < 1) {
+                    continue;
+                }
+
+                $title = $this->extractDocumentTitle($row);
+                $tokenStats = $this->buildTokenStats($text, $title);
+
+                if ($tokenStats === []) {
+                    continue;
+                }
+
+                $totalChunks++;
+
+                foreach ($tokenStats as $token => $stats) {
+                    $termStmt->reset();
+                    $termStmt->clear();
+                    $termStmt->bindValue(':token', $token, SQLITE3_TEXT);
+                    $termResult = $termStmt->execute();
+
+                    if ($termResult !== false) {
+                        $termResult->finalize();
+                    }
+
+                    $postingStmt->reset();
+                    $postingStmt->clear();
+                    $postingStmt->bindValue(':token', $token, SQLITE3_TEXT);
+                    $postingStmt->bindValue(':chunk_id', $chunkId, SQLITE3_TEXT);
+                    $postingStmt->bindValue(':document_id', $documentId, SQLITE3_TEXT);
+
+                    if ($chunkIndex === null) {
+                        $postingStmt->bindValue(':chunk_index', null, SQLITE3_NULL);
+                    } else {
+                        $postingStmt->bindValue(':chunk_index', $chunkIndex, SQLITE3_INTEGER);
+                    }
+
+                    $postingStmt->bindValue(':tf', $stats['tf'], SQLITE3_INTEGER);
+                    $postingStmt->bindValue(':title_tf', $stats['title_tf'], SQLITE3_INTEGER);
+
+                    $postingResult = $postingStmt->execute();
+
+                    if ($postingResult === false) {
+                        throw new \RuntimeException('Failed to insert lexical posting for token: ' . $token);
+                    }
+
+                    $postingResult->finalize();
+                }
+            }
+
+            fclose($handle);
+
+            $this->writeMeta($db, $totalChunks);
+
+            $db->exec('COMMIT');
+
+            $this->agentLogger->info('Lexical index streaming pass completed.', [
+                'indexed_chunks' => $totalChunks,
+                'source' => $indexNdjsonPath,
+            ]);
+        } catch (\Throwable $e) {
+            fclose($handle);
+            $db->exec('ROLLBACK');
+
+            throw $e;
+        }
+    }
+
+    /**
+     * @return array<string, array{tf:int, title_tf:int}>
+     */
+    private function buildTokenStats(string $text, string $title): array
+    {
+        $textTokens = $this->tokenize($text);
+        $titleTokens = $this->tokenize($title);
+
+        if ($textTokens === [] && $titleTokens === []) {
+            return [];
+        }
+
+        $textTf = [];
+        foreach ($textTokens as $token) {
+            $textTf[$token] = ($textTf[$token] ?? 0) + 1;
+        }
+
+        $titleTf = [];
+        foreach ($titleTokens as $token) {
+            $titleTf[$token] = ($titleTf[$token] ?? 0) + 1;
+        }
+
+        $tokens = array_values(array_unique(array_merge(
+            array_keys($textTf),
+            array_keys($titleTf)
+        )));
+
+        if (count($tokens) > self::MAX_UNIQUE_TOKENS_PER_CHUNK) {
+            $tokens = array_slice($tokens, 0, self::MAX_UNIQUE_TOKENS_PER_CHUNK);
+        }
+
+        $stats = [];
+
+        foreach ($tokens as $token) {
+            $stats[$token] = [
+                'tf' => $textTf[$token] ?? 0,
+                'title_tf' => $titleTf[$token] ?? 0,
+            ];
+        }
+
+        return $stats;
+    }
+
+    /**
+     * Generic tokenizer:
+     * - lowercases
+     * - removes punctuation
+     * - preserves alphanumeric codes
+     * - keeps numeric/code-like tokens even if short
+     * - drops generic stop words for non-numeric tokens
+     *
+     * @return string[]
+     */
+    private function tokenize(string $value): array
+    {
+        $value = $this->normalizeText($value);
+
+        if ($value === '') {
+            return [];
+        }
+
+        $parts = preg_split('/\s+/u', $value, -1, PREG_SPLIT_NO_EMPTY) ?: [];
+        $tokens = [];
+
+        foreach ($parts as $token) {
+            if ($token === '') {
+                continue;
+            }
+
+            if ($this->shouldIgnoreToken($token)) {
+                continue;
+            }
+
+            $tokens[] = $token;
+        }
+
+        return $tokens;
+    }
+
+    private function shouldIgnoreToken(string $token): bool
+    {
+        if ($token === '') {
+            return true;
+        }
+
+        if (preg_match('/\d/u', $token) === 1) {
+            return false;
+        }
+
+        if (mb_strlen($token, 'UTF-8') < 2) {
+            return true;
+        }
+
+        return StopWords::isStopWord($token);
+    }
+
+    private function normalizeText(string $value): string
+    {
+        $value = mb_strtolower(trim($value), 'UTF-8');
+        $value = str_replace(['-', '/', '_'], ' ', $value);
+        $value = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $value) ?? $value;
+        $value = preg_replace('/\s+/u', ' ', $value) ?? $value;
+
+        return trim($value);
+    }
+
+    private function extractDocumentTitle(array $row): string
+    {
+        $metadata = $row['metadata'] ?? null;
+
+        if (!is_array($metadata)) {
+            return '';
+        }
+
+        return trim((string)($metadata['document_title'] ?? ''));
+    }
+
+    private function normalizeChunkIndex(mixed $value): ?int
+    {
+        if (is_int($value)) {
+            return $value;
+        }
+
+        if (is_string($value) && ctype_digit($value)) {
+            return (int)$value;
+        }
+
+        return null;
+    }
+
+    private function writeMeta(SQLite3 $db, int $totalChunks): void
+    {
+        $metaStmt = $db->prepare(
+            'INSERT OR REPLACE INTO lexical_meta (key, value) VALUES (:key, :value)'
+        );
+
+        if ($metaStmt === false) {
+            throw new \RuntimeException('Failed to prepare lexical meta statement.');
+        }
+
+        $meta = [
+            'schema_version' => '1',
+            'built_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
+            'total_chunks' => (string)$totalChunks,
+        ];
+
+        foreach ($meta as $key => $value) {
+            $metaStmt->reset();
+            $metaStmt->clear();
+            $metaStmt->bindValue(':key', $key, SQLITE3_TEXT);
+            $metaStmt->bindValue(':value', $value, SQLITE3_TEXT);
+
+            $result = $metaStmt->execute();
+
+            if ($result === false) {
+                throw new \RuntimeException('Failed to write lexical meta key: ' . $key);
+            }
+
+            $result->finalize();
+        }
+    }
+
+    private function initializeSchema(SQLite3 $db): void
+    {
+        $db->exec('PRAGMA journal_mode = DELETE');
+        $db->exec('PRAGMA synchronous = NORMAL');
+        $db->exec('PRAGMA temp_store = MEMORY');
+        $db->exec('PRAGMA foreign_keys = OFF');
+
+        $schema = <<<'SQL'
+CREATE TABLE IF NOT EXISTS lexical_meta (
+    key TEXT PRIMARY KEY,
+    value TEXT NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS lexical_terms (
+    token TEXT PRIMARY KEY,
+    df INTEGER NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS lexical_postings (
+    token TEXT NOT NULL,
+    chunk_id TEXT NOT NULL,
+    document_id TEXT NOT NULL,
+    chunk_index INTEGER NULL,
+    tf INTEGER NOT NULL,
+    title_tf INTEGER NOT NULL DEFAULT 0,
+    PRIMARY KEY (token, chunk_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_lexical_postings_document_token
+    ON lexical_postings (document_id, token);
+
+CREATE INDEX IF NOT EXISTS idx_lexical_postings_chunk
+    ON lexical_postings (chunk_id);
+
+CREATE TABLE IF NOT EXISTS lexical_seen_chunks (
+    chunk_id TEXT PRIMARY KEY
+);
+SQL;
+
+        if ($db->exec($schema) === false) {
+            throw new \RuntimeException('Failed to initialize lexical index schema.');
+        }
+    }
+
+    private function openWritableDb(string $path): SQLite3
+    {
+        try {
+            $db = new SQLite3($path, SQLITE3_OPEN_READWRITE | SQLITE3_OPEN_CREATE);
+        } catch (\Throwable $e) {
+            throw new \RuntimeException('Unable to open lexical index DB: ' . $path, 0, $e);
+        }
+
+        $db->busyTimeout(5000);
+
+        return $db;
+    }
+
+    private function getIndexNdjsonPath(): string
+    {
+        return rtrim($this->projectDir, '/') . self::DEFAULT_RELATIVE_NDJSON_PATH;
+    }
+
+    private function getLexicalIndexPath(): string
+    {
+        return rtrim($this->projectDir, '/') . self::DEFAULT_RELATIVE_INDEX_PATH;
+    }
+
+    private function ensureTargetDirectoryExists(string $finalIndexPath): void
+    {
+        $dir = dirname($finalIndexPath);
+
+        if (is_dir($dir)) {
+            return;
+        }
+
+        if (!@mkdir($dir, 0775, true) && !is_dir($dir)) {
+            throw new \RuntimeException('Unable to create lexical index directory: ' . $dir);
+        }
+    }
+
+    private function atomicReplace(string $tmpPath, string $finalPath): void
+    {
+        if (is_file($finalPath)) {
+            @chmod($finalPath, 0664);
+        }
+
+        if (!@rename($tmpPath, $finalPath)) {
+            if (!@copy($tmpPath, $finalPath)) {
+                @unlink($tmpPath);
+                throw new \RuntimeException('Atomic replace failed for lexical index: ' . $finalPath);
+            }
+
+            @unlink($tmpPath);
+        }
+
+        @chmod($finalPath, 0664);
+    }
+
+    private function removeFileIfExists(string $path): void
+    {
+        if (is_file($path)) {
+            @unlink($path);
+        }
+    }
+
+    private function assertSqliteAvailable(): void
+    {
+        if (!class_exists(SQLite3::class)) {
+            throw new \RuntimeException('The sqlite3 PHP extension is required for lexical index building.');
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/Knowledge/Retrieval/QueryEnricher.php b/src/Knowledge/Retrieval/QueryEnricher.php
index 87faf88..ffe66dc 100644
--- a/src/Knowledge/Retrieval/QueryEnricher.php
+++ b/src/Knowledge/Retrieval/QueryEnricher.php
@@ -8,6 +8,14 @@ use App\Config\QueryEnricherConfig;
 
 final readonly class QueryEnricher
 {
+    /**
+     * Keep enrichment conservative.
+     *
+     * The enriched semantic query should help vector retrieval,
+     * but must not become bloated enough to dilute the original user intent.
+     */
+    private const MAX_EXPANSIONS = 4;
+
     public function __construct(
         private QueryEnricherConfig $config
     ) {
@@ -16,6 +24,12 @@ final readonly class QueryEnricher
     /**
      * Enriches the query with mapped counterpart terms.
      *
+     * Design goals:
+     * - preserve the original query unchanged at the front
+     * - only append counterpart terms that are not already present
+     * - prefer longer / more specific phrase matches over short generic matches
+     * - keep the number of appended terms intentionally small
+     *
      * Example:
      * - input:  "water hardness device"
      * - output: "water hardness device residual hardness model"
@@ -29,26 +43,63 @@ final readonly class QueryEnricher
         }
 
         $mapping = $this->config->getEnrichQueryList();
+
+        if ($mapping === []) {
+            return $originalQuery;
+        }
+
         $lookup = $this->buildBidirectionalLookup($mapping);
+
+        if ($lookup === []) {
+            return $originalQuery;
+        }
+
+        $lookup = $this->sortLookupBySpecificity($lookup);
         $normalizedQuery = $this->normalizeForMatching($originalQuery);
 
-        $matches = [];
+        if ($normalizedQuery === '') {
+            return $originalQuery;
+        }
 
-        foreach ($lookup as $needle => $mappedValue) {
-            if ($needle === '') {
+        $matches = [];
+        $seenNormalizedExpansions = [];
+
+        foreach ($lookup as $normalizedNeedle => $mappedValue) {
+            if ($normalizedNeedle === '') {
                 continue;
             }
 
-            if ($this->containsWholePhrase($normalizedQuery, $needle)) {
-                $matches[] = $mappedValue;
+            if (!$this->containsWholePhrase($normalizedQuery, $normalizedNeedle)) {
+                continue;
+            }
+
+            $mappedValue = trim($mappedValue);
+            if ($mappedValue === '') {
+                continue;
+            }
+
+            $normalizedMappedValue = $this->normalizeForMatching($mappedValue);
+            if ($normalizedMappedValue === '') {
+                continue;
+            }
+
+            // Do not re-add information that is already present in the query.
+            if ($this->containsWholePhrase($normalizedQuery, $normalizedMappedValue)) {
+                continue;
+            }
+
+            if (isset($seenNormalizedExpansions[$normalizedMappedValue])) {
+                continue;
+            }
+
+            $matches[] = $mappedValue;
+            $seenNormalizedExpansions[$normalizedMappedValue] = true;
+
+            if (count($matches) >= self::MAX_EXPANSIONS) {
+                break;
             }
         }
 
-        $matches = array_values(array_unique(array_filter(
-            $matches,
-            static fn(string $value): bool => trim($value) !== ''
-        )));
-
         if ($matches === []) {
             return $originalQuery;
         }
@@ -106,6 +157,11 @@ final readonly class QueryEnricher
      *     'jacket'   => 'coat',
      *     'coat'     => 'jacket',
      * ]
+     *
+     * Returned format:
+     * [
+     *     '<normalized needle>' => '<original mapped value>',
+     * ]
      */
     private function buildBidirectionalLookup(array $mapping): array
     {
@@ -122,15 +178,49 @@ final readonly class QueryEnricher
             $normalizedKey = $this->normalizeForMatching($key);
             $normalizedValue = $this->normalizeForMatching($value);
 
-            if ($normalizedKey !== '') {
+            if ($normalizedKey !== '' && !isset($lookup[$normalizedKey])) {
                 $lookup[$normalizedKey] = $value;
             }
 
-            if ($normalizedValue !== '') {
+            if ($normalizedValue !== '' && !isset($lookup[$normalizedValue])) {
                 $lookup[$normalizedValue] = $key;
             }
         }
 
         return $lookup;
     }
+
+    /**
+     * Sorts phrase rules by specificity so longer / more precise phrases win first.
+     *
+     * Priority:
+     * 1. more words
+     * 2. longer character length
+     * 3. lexical order for deterministic output
+     *
+     * @param array<string, string> $lookup
+     * @return array<string, string>
+     */
+    private function sortLookupBySpecificity(array $lookup): array
+    {
+        uksort($lookup, static function (string $a, string $b): int {
+            $aWordCount = substr_count($a, ' ') + 1;
+            $bWordCount = substr_count($b, ' ') + 1;
+
+            if ($aWordCount !== $bWordCount) {
+                return $bWordCount <=> $aWordCount;
+            }
+
+            $aLength = mb_strlen($a, 'UTF-8');
+            $bLength = mb_strlen($b, 'UTF-8');
+
+            if ($aLength !== $bLength) {
+                return $bLength <=> $aLength;
+            }
+
+            return strcmp($a, $b);
+        });
+
+        return $lookup;
+    }
 }
\ No newline at end of file