From ce859b9662b9d10ab8f57f8900263d6d4e0f8f76 Mon Sep 17 00:00:00 2001
From: team 1 <team1@mitho-media.de>
Date: Tue, 21 Apr 2026 17:20:16 +0200
Subject: [PATCH] fine tuning rag

---
 .../Retrieval/NdjsonHybridRetriever.php       | 1631 +++++++----------
 1 file changed, 699 insertions(+), 932 deletions(-)

diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
index a29e86f..fa55f3f 100644
--- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
+++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
@@ -25,8 +25,7 @@ use RuntimeException;
  * - optionally short-circuit to catalog list output
  * - resolve exact document-title matches before semantic retrieval
  * - run vector retrieval globally and optionally document-scoped
- * - run lexical retrieval globally and optionally document-scoped
- * - fuse all result sets with RRF-style scoring
+ * - fuse both result sets with RRF-style scoring
  * - apply selection rules for list queries vs. sales-style queries
  * - return either plain chunk texts or debug metadata
  */
@@ -40,83 +39,40 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
     private const DOMINANT_DOC_MIN_HITS = 3;
     private const DOMINANT_DOC_MAX_CHUNKS = 4;
     private const EXACT_DOCUMENT_MAX_CHUNKS = 6;
-
-    /**
-     * Conservative no-tag fallback:
-     * derive a temporary document scope only when the top global vector hits
-     * show repeated evidence for the same document(s).
-     */
-    private const PSEUDO_SCOPE_GLOBAL_WINDOW = 10;
-    private const PSEUDO_SCOPE_MIN_DOC_HITS = 2;
-    private const PSEUDO_SCOPE_MAX_DOCS = 3;
-
-    /**
-     * Soft document candidates are derived from global lexical hits first.
-     * This stage is placed between tag-routing and vector-based pseudo scope.
-     */
-    private const SOFT_DOC_CANDIDATE_WINDOW = 8;
-    private const SOFT_DOC_CANDIDATE_MIN_DOC_HITS = 2;
-    private const SOFT_DOC_CANDIDATE_MAX_DOCS = 3;
-    private const SOFT_DOC_TOP_SCORE_MIN = 0.98;
-
-    /**
-     * Scoped retrieval is useful in both cases, but true tag-routing should
-     * stay stronger than soft candidates and pseudo-scoping.
-     */
-    private const TAG_SCOPED_VECTOR_BOOST = 1.20;
-    private const SOFT_DOC_SCOPED_VECTOR_BOOST = 1.12;
-    private const PSEUDO_SCOPED_VECTOR_BOOST = 1.08;
-
-    /**
-     * Secondary vector query should help recall/robustness, but must not
-     * overpower the primary enriched semantic query.
-     */
-    private const SECONDARY_GLOBAL_VECTOR_BOOST = 0.93;
-    private const SECONDARY_SCOPED_VECTOR_MULTIPLIER = 0.95;
-
-    /**
-     * Lexical retrieval should support precision, but not overpower vector routing.
-     */
-    private const LEXICAL_SCORE_THRESHOLD = 0.18;
-    private const GLOBAL_LEXICAL_BOOST = 0.90;
-    private const TAG_SCOPED_LEXICAL_BOOST = 1.04;
-    private const SOFT_DOC_SCOPED_LEXICAL_BOOST = 1.02;
-    private const PSEUDO_SCOPED_LEXICAL_BOOST = 1.00;
-
-    /**
-     * Conservative re-rank stage based on document title / metadata alignment.
-     *
-     * This is intentionally applied after fusion so it sharpens ranking
-     * without replacing the underlying retrieval sources.
-     */
-    private const TITLE_MATCH_BASE_BOOST = 0.04;
-    private const TITLE_MATCH_MAX_BOOST = 0.18;
-    private const FILE_MATCH_BASE_BOOST = 0.02;
-    private const FILE_MATCH_MAX_BOOST = 0.08;
-    private const META_MATCH_MAX_BOOST = 0.04;
-    private const EXACT_TITLE_PHRASE_BOOST = 0.08;
-    private const EXACT_FILE_PHRASE_BOOST = 0.04;
-    private const MAX_TITLE_METADATA_BOOST = 0.22;
+    private const FOCUSED_PRODUCT_WINDOW = 8;
+    private const FOCUSED_PRODUCT_MIN_SCORE = 10.0;
+    private const FOCUSED_PRODUCT_MIN_GAP = 4.0;
+    private const FOCUSED_PRODUCT_MAX_CHUNKS = 4;
 
     public function __construct(
-        private NdjsonChunkLookup $lookup,
-        private VectorSearchClient $vectorClient,
-        private NdjsonKeywordRetriever $keywordRetriever,
-        private TagRoutingService $tagRouting,
+        private NdjsonChunkLookup               $lookup,
+        private VectorSearchClient              $vectorClient,
+        private TagRoutingService               $tagRouting,
         private ModelGenerationConfigRepository $configRepository,
-        private QueryCleaner $queryCleaner,
-        private IntentLite $intentLite,
-        private SalesIntentLite $salesIntentLite,
-        private CatalogIntentLite $catalogIntent,
-        private IntentRouteResolver $routeResolver,
-        private EntityCatalogService $entityCatalogService,
-        private QueryEnricher $queryEnricher,
-    ) {
+        private QueryCleaner                    $queryCleaner,
+        private IntentLite                      $intentLite,
+        private SalesIntentLite                 $salesIntentLite,
+        private CatalogIntentLite               $catalogIntent,
+        private IntentRouteResolver             $routeResolver,
+        private EntityCatalogService            $entityCatalogService,
+        private QueryEnricher                   $queryEnricher,
+    )
+    {
     }
 
+    // =========================================================
+    // PUBLIC API
+    // =========================================================
+
     /**
      * Returns the final retrieval payload as plain text chunks.
      *
+     * Behaviour:
+     * - loads active retrieval config
+     * - executes the full orchestration pipeline
+     * - if the route resolves to a catalog list, returns the catalog block only
+     * - otherwise returns the selected chunk texts
+     *
      * @throws Exception
      */
     public function retrieve(string $prompt): array
@@ -141,6 +97,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
     /**
      * Returns a debug-friendly retrieval result with scoring/meta information.
      *
+     * This method is used for inspection and tuning:
+     * - selected chunk ids
+     * - raw vector scores
+     * - fused RRF scores
+     * - intent / route information
+     * - threshold and list-query flags
+     *
      * @throws Exception
      */
     public function retrieveDebug(string $prompt, ?ModelGenerationConfig $config = null): array
@@ -155,40 +118,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 'document_id' => null,
                 'chunk_index' => null,
                 'raw_score' => null,
-                'raw_vector_score' => null,
-                'raw_keyword_score' => null,
                 'rrf_score' => null,
                 'threshold' => 0.0,
-                'lexical_threshold' => self::LEXICAL_SCORE_THRESHOLD,
                 'intent' => $result['intent'],
                 'route' => $result['route'],
                 'entity_label' => $result['entityLabel'],
                 'is_list_query' => true,
                 'selection_mode' => 'catalog_list',
-                'scope_mode' => 'catalog_list',
-                'clean_query' => null,
-                'semantic_query' => null,
-                'secondary_vector_query' => null,
-                'lexical_query' => null,
-                'tag_candidate_doc_ids' => [],
-                'soft_document_candidate_doc_ids' => [],
-                'pseudo_scope_doc_ids' => [],
-                'global_hit_count' => 0,
-                'scoped_hit_count' => 0,
-                'global_vector_hit_count' => 0,
-                'global_primary_vector_hit_count' => 0,
-                'global_secondary_vector_hit_count' => 0,
-                'global_keyword_hit_count' => 0,
-                'scoped_vector_hit_count' => 0,
-                'scoped_primary_vector_hit_count' => 0,
-                'scoped_secondary_vector_hit_count' => 0,
-                'scoped_keyword_hit_count' => 0,
-                'scoped_boost_factor' => 0.0,
-                'scoped_vector_boost_factor' => 0.0,
-                'secondary_scoped_vector_boost_factor' => 0.0,
-                'scoped_keyword_boost_factor' => 0.0,
-                'title_metadata_boost' => 0.0,
-                'title_metadata_doc_boosts' => [],
                 'text' => $result['catalogBlock'],
             ]];
         }
@@ -207,49 +143,19 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
 
             $rank++;
 
-            $rawVectorScore = $result['rawVectorScores'][$chunkId] ?? null;
-            $rawKeywordScore = $result['rawKeywordScores'][$chunkId] ?? null;
-
             $out[] = [
                 'rank' => $rank,
                 'chunk_id' => $chunkId,
                 'document_id' => $result['rows'][$chunkId]['document_id'] ?? null,
                 'chunk_index' => $result['rows'][$chunkId]['chunk_index'] ?? null,
-                'raw_score' => $this->maxNullableFloat($rawVectorScore, $rawKeywordScore),
-                'raw_vector_score' => $rawVectorScore,
-                'raw_keyword_score' => $rawKeywordScore,
+                'raw_score' => $result['rawScores'][$chunkId] ?? null,
                 'rrf_score' => $result['rrfScores'][$chunkId] ?? null,
                 'threshold' => $result['threshold'],
-                'lexical_threshold' => self::LEXICAL_SCORE_THRESHOLD,
                 'intent' => $result['intent'],
                 'route' => $result['route'],
                 'entity_label' => $result['entityLabel'],
                 'is_list_query' => $result['isListQuery'],
                 'selection_mode' => $result['selectionMode'],
-                'scope_mode' => $result['scopeMode'],
-                'clean_query' => $result['cleanQuery'],
-                'semantic_query' => $result['semanticQuery'],
-                'secondary_vector_query' => $result['secondaryVectorQuery'],
-                'lexical_query' => $result['lexicalQuery'],
-                'tag_candidate_doc_ids' => $result['tagCandidateDocIds'],
-                'soft_document_candidate_doc_ids' => $result['softDocumentCandidateDocIds'],
-                'pseudo_scope_doc_ids' => $result['pseudoScopeDocIds'],
-                'global_hit_count' => $result['globalHitCount'],
-                'scoped_hit_count' => $result['scopedHitCount'],
-                'global_vector_hit_count' => $result['globalVectorHitCount'],
-                'global_primary_vector_hit_count' => $result['globalPrimaryVectorHitCount'],
-                'global_secondary_vector_hit_count' => $result['globalSecondaryVectorHitCount'],
-                'global_keyword_hit_count' => $result['globalKeywordHitCount'],
-                'scoped_vector_hit_count' => $result['scopedVectorHitCount'],
-                'scoped_primary_vector_hit_count' => $result['scopedPrimaryVectorHitCount'],
-                'scoped_secondary_vector_hit_count' => $result['scopedSecondaryVectorHitCount'],
-                'scoped_keyword_hit_count' => $result['scopedKeywordHitCount'],
-                'scoped_boost_factor' => $result['scopedBoostFactor'],
-                'scoped_vector_boost_factor' => $result['scopedVectorBoostFactor'],
-                'secondary_scoped_vector_boost_factor' => $result['secondaryScopedVectorBoostFactor'],
-                'scoped_keyword_boost_factor' => $result['scopedKeywordBoostFactor'],
-                'title_metadata_boost' => $result['titleMetadataBoosts'][$chunkId] ?? 0.0,
-                'title_metadata_doc_boosts' => $result['titleMetadataDocBoosts'],
                 'text' => trim((string)$result['rows'][$chunkId]['text']),
             ];
         }
@@ -257,16 +163,29 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return $out;
     }
 
+    // =========================================================
+    // CENTRAL ORCHESTRATION
+    // =========================================================
+
     /**
      * Central orchestration entrypoint.
      *
+     * Pipeline:
+     * 1. Detect catalog entity and sales intent
+     * 2. Resolve route
+     * 3. If route is a catalog list route, try direct catalog output
+     * 4. If prompt matches one exact document title, use exact-document fast path
+     * 5. Otherwise, run the normal hybrid retrieval core
+     * 6. Select final chunk ids depending on query type
+     *
      * @throws Exception
      */
     private function execute(
-        string $prompt,
+        string                $prompt,
         ModelGenerationConfig $config,
-        bool $withScores
-    ): array {
+        bool                  $withScores
+    ): array
+    {
         $entityLabel = $this->catalogIntent->detect($prompt);
         $salesIntent = $this->detectSalesIntent($prompt);
         $route = $this->routeResolver->resolve($salesIntent, $entityLabel);
@@ -281,35 +200,10 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                     'intent' => $salesIntent,
                     'isListQuery' => true,
                     'selectionMode' => 'catalog_list',
-                    'scopeMode' => 'catalog_list',
-                    'cleanQuery' => null,
-                    'semanticQuery' => null,
-                    'secondaryVectorQuery' => null,
-                    'lexicalQuery' => null,
-                    'tagCandidateDocIds' => [],
-                    'softDocumentCandidateDocIds' => [],
-                    'pseudoScopeDocIds' => [],
-                    'globalHitCount' => 0,
-                    'scopedHitCount' => 0,
-                    'globalVectorHitCount' => 0,
-                    'globalPrimaryVectorHitCount' => 0,
-                    'globalSecondaryVectorHitCount' => 0,
-                    'globalKeywordHitCount' => 0,
-                    'scopedVectorHitCount' => 0,
-                    'scopedPrimaryVectorHitCount' => 0,
-                    'scopedSecondaryVectorHitCount' => 0,
-                    'scopedKeywordHitCount' => 0,
-                    'scopedBoostFactor' => 0.0,
-                    'scopedVectorBoostFactor' => 0.0,
-                    'secondaryScopedVectorBoostFactor' => 0.0,
-                    'scopedKeywordBoostFactor' => 0.0,
                     'selectedChunkIds' => [],
                     'rows' => [],
                     'rrfScores' => [],
-                    'rawVectorScores' => [],
-                    'rawKeywordScores' => [],
-                    'titleMetadataBoosts' => [],
-                    'titleMetadataDocBoosts' => [],
+                    'rawScores' => [],
                     'threshold' => 0.0,
                     'catalogBlock' => trim($catalogBlock),
                 ];
@@ -331,35 +225,10 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                     'intent' => $salesIntent,
                     'isListQuery' => false,
                     'selectionMode' => 'exact_document_title',
-                    'scopeMode' => 'exact_document_title',
-                    'cleanQuery' => null,
-                    'semanticQuery' => null,
-                    'secondaryVectorQuery' => null,
-                    'lexicalQuery' => null,
-                    'tagCandidateDocIds' => [],
-                    'softDocumentCandidateDocIds' => [],
-                    'pseudoScopeDocIds' => [],
-                    'globalHitCount' => 0,
-                    'scopedHitCount' => 0,
-                    'globalVectorHitCount' => 0,
-                    'globalPrimaryVectorHitCount' => 0,
-                    'globalSecondaryVectorHitCount' => 0,
-                    'globalKeywordHitCount' => 0,
-                    'scopedVectorHitCount' => 0,
-                    'scopedPrimaryVectorHitCount' => 0,
-                    'scopedSecondaryVectorHitCount' => 0,
-                    'scopedKeywordHitCount' => 0,
-                    'scopedBoostFactor' => 0.0,
-                    'scopedVectorBoostFactor' => 0.0,
-                    'secondaryScopedVectorBoostFactor' => 0.0,
-                    'scopedKeywordBoostFactor' => 0.0,
                     'selectedChunkIds' => $selectedChunkIds,
                     'rows' => $exactDocumentMatch['rows'],
                     'rrfScores' => $this->buildExactDocumentScores($selectedChunkIds),
-                    'rawVectorScores' => [],
-                    'rawKeywordScores' => [],
-                    'titleMetadataBoosts' => [],
-                    'titleMetadataDocBoosts' => [],
+                    'rawScores' => [],
                     'threshold' => 1.0,
                     'catalogBlock' => null,
                 ];
@@ -375,39 +244,10 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 'intent' => $salesIntent,
                 'isListQuery' => $core['is_list_query'],
                 'selectionMode' => null,
-                'scopeMode' => $core['scope_mode'],
-                'cleanQuery' => $core['clean_query'],
-                'semanticQuery' => $core['semantic_query'],
-                'secondaryVectorQuery' => $core['secondary_vector_query'],
-                'lexicalQuery' => $core['lexical_query'],
-                'tagCandidateDocIds' => $core['tag_candidate_doc_ids'],
-                'softDocumentCandidateDocIds' => $core['soft_document_candidate_doc_ids'],
-                'pseudoScopeDocIds' => $core['pseudo_scope_doc_ids'],
-                'globalHitCount' => $core['global_hit_count'],
-                'scopedHitCount' => $core['scoped_hit_count'],
-                'globalVectorHitCount' => $core['global_vector_hit_count'],
-                'globalPrimaryVectorHitCount' => $core['global_primary_vector_hit_count'],
-                'globalSecondaryVectorHitCount' => $core['global_secondary_vector_hit_count'],
-                'globalKeywordHitCount' => $core['global_keyword_hit_count'],
-                'scopedVectorHitCount' => $core['scoped_vector_hit_count'],
-                'scopedPrimaryVectorHitCount' => $core['scoped_primary_vector_hit_count'],
-                'scopedSecondaryVectorHitCount' => $core['scoped_secondary_vector_hit_count'],
-                'scopedKeywordHitCount' => $core['scoped_keyword_hit_count'],
-                'scopedBoostFactor' => max(
-                    $core['scoped_vector_boost_factor'],
-                    $core['secondary_scoped_vector_boost_factor'],
-                    $core['scoped_keyword_boost_factor']
-                ),
-                'scopedVectorBoostFactor' => $core['scoped_vector_boost_factor'],
-                'secondaryScopedVectorBoostFactor' => $core['secondary_scoped_vector_boost_factor'],
-                'scopedKeywordBoostFactor' => $core['scoped_keyword_boost_factor'],
                 'selectedChunkIds' => [],
                 'rows' => [],
                 'rrfScores' => [],
-                'rawVectorScores' => [],
-                'rawKeywordScores' => [],
-                'titleMetadataBoosts' => $core['title_metadata_boosts'],
-                'titleMetadataDocBoosts' => $core['title_metadata_doc_boosts'],
+                'rawScores' => [],
                 'threshold' => $core['threshold'],
                 'catalogBlock' => null,
             ];
@@ -422,6 +262,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
             $selectionMode = 'list_deduplicated';
         } else {
             $salesSelection = $this->selectSalesChunkIds(
+                $prompt,
                 $core['ranked_chunk_ids'],
                 $core['rows'],
                 $core['limit']
@@ -437,272 +278,107 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
             'intent' => $salesIntent,
             'isListQuery' => $core['is_list_query'],
             'selectionMode' => $selectionMode,
-            'scopeMode' => $core['scope_mode'],
-            'cleanQuery' => $core['clean_query'],
-            'semanticQuery' => $core['semantic_query'],
-            'secondaryVectorQuery' => $core['secondary_vector_query'],
-            'lexicalQuery' => $core['lexical_query'],
-            'tagCandidateDocIds' => $core['tag_candidate_doc_ids'],
-            'softDocumentCandidateDocIds' => $core['soft_document_candidate_doc_ids'],
-            'pseudoScopeDocIds' => $core['pseudo_scope_doc_ids'],
-            'globalHitCount' => $core['global_hit_count'],
-            'scopedHitCount' => $core['scoped_hit_count'],
-            'globalVectorHitCount' => $core['global_vector_hit_count'],
-            'globalPrimaryVectorHitCount' => $core['global_primary_vector_hit_count'],
-            'globalSecondaryVectorHitCount' => $core['global_secondary_vector_hit_count'],
-            'globalKeywordHitCount' => $core['global_keyword_hit_count'],
-            'scopedVectorHitCount' => $core['scoped_vector_hit_count'],
-            'scopedPrimaryVectorHitCount' => $core['scoped_primary_vector_hit_count'],
-            'scopedSecondaryVectorHitCount' => $core['scoped_secondary_vector_hit_count'],
-            'scopedKeywordHitCount' => $core['scoped_keyword_hit_count'],
-            'scopedBoostFactor' => max(
-                $core['scoped_vector_boost_factor'],
-                $core['secondary_scoped_vector_boost_factor'],
-                $core['scoped_keyword_boost_factor']
-            ),
-            'scopedVectorBoostFactor' => $core['scoped_vector_boost_factor'],
-            'secondaryScopedVectorBoostFactor' => $core['secondary_scoped_vector_boost_factor'],
-            'scopedKeywordBoostFactor' => $core['scoped_keyword_boost_factor'],
             'selectedChunkIds' => $selectedChunkIds,
             'rows' => $core['rows'],
             'rrfScores' => $core['rrf_scores'],
-            'rawVectorScores' => $core['raw_vector_scores'],
-            'rawKeywordScores' => $core['raw_keyword_scores'],
-            'titleMetadataBoosts' => $core['title_metadata_boosts'],
-            'titleMetadataDocBoosts' => $core['title_metadata_doc_boosts'],
+            'rawScores' => $core['raw_scores'],
             'threshold' => $core['threshold'],
             'catalogBlock' => null,
         ];
     }
 
+    // =========================================================
+    // CORE PIPELINE
+    // =========================================================
+
     /**
      * Executes the actual hybrid retrieval logic.
      *
+     * Steps:
+     * - derive limits from config within hard safety caps
+     * - detect whether the prompt is a "list query"
+     * - clean and enrich the prompt
+     * - compute threshold + vector topK based on intent/query type
+     * - route query into candidate document ids via tag routing
+     * - run global and optional scoped vector search
+     * - fuse hits
+     * - resolve chunk ids to chunk rows
+     *
      * @throws Exception
      */
     private function runCore(
-        string $prompt,
+        string                $prompt,
         ModelGenerationConfig $config,
-        bool $withScores,
-        string $salesIntent
-    ): array {
+        bool                  $withScores,
+        string                $salesIntent
+    ): array
+    {
         $limit = max(1, min($config->getRetrievalMaxChunks(), NdjsonHybridRetrieverConfig::HARD_MAX_CHUNKS));
         $vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK));
 
         $isListQuery = $this->intentLite->isListQuery($prompt);
 
         $cleanQuery = $this->queryCleaner->clean($prompt);
+        $cleanQuery = $this->queryEnricher->enrichPrompt($cleanQuery);
 
         if ($cleanQuery === '') {
             return [
                 'limit' => $limit,
                 'is_list_query' => $isListQuery,
                 'threshold' => NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD,
-                'clean_query' => '',
-                'semantic_query' => '',
-                'secondary_vector_query' => '',
-                'lexical_query' => '',
-                'scope_mode' => 'none',
-                'tag_candidate_doc_ids' => [],
-                'soft_document_candidate_doc_ids' => [],
-                'pseudo_scope_doc_ids' => [],
-                'global_hit_count' => 0,
-                'scoped_hit_count' => 0,
-                'global_vector_hit_count' => 0,
-                'global_primary_vector_hit_count' => 0,
-                'global_secondary_vector_hit_count' => 0,
-                'global_keyword_hit_count' => 0,
-                'scoped_vector_hit_count' => 0,
-                'scoped_primary_vector_hit_count' => 0,
-                'scoped_secondary_vector_hit_count' => 0,
-                'scoped_keyword_hit_count' => 0,
-                'scoped_vector_boost_factor' => 0.0,
-                'secondary_scoped_vector_boost_factor' => 0.0,
-                'scoped_keyword_boost_factor' => 0.0,
                 'ranked_chunk_ids' => [],
                 'rows' => [],
                 'rrf_scores' => [],
-                'raw_vector_scores' => [],
-                'raw_keyword_scores' => [],
-                'title_metadata_boosts' => [],
-                'title_metadata_doc_boosts' => [],
+                'raw_scores' => [],
             ];
         }
 
-        $semanticQuery = $this->queryEnricher->enrichPrompt($cleanQuery);
-        $secondaryVectorQuery = $cleanQuery !== $semanticQuery ? $cleanQuery : '';
-        $lexicalQuery = $cleanQuery;
-
         [$threshold, $topK] = $this->computeThresholdAndTopK(
             $salesIntent,
             $isListQuery,
             $vectorTopKBase
         );
 
-        $tagCandidateDocIds = $this->tagRouting->route($semanticQuery);
-        $tagCandidateDocIds = is_array($tagCandidateDocIds)
+        $candidateDocIds = $this->tagRouting->route($cleanQuery);
+        $candidateDocIds = is_array($candidateDocIds)
             ? array_values(array_unique(array_filter(
-                $tagCandidateDocIds,
+                $candidateDocIds,
                 static fn(mixed $value): bool => is_string($value) && $value !== ''
             )))
             : [];
 
-        $globalPrimaryVectorHits = $this->vectorClient->search($semanticQuery, $topK);
-        $globalSecondaryVectorHits = $secondaryVectorQuery !== ''
-            ? $this->vectorClient->search($secondaryVectorQuery, $topK)
-            : [];
-        $globalKeywordHits = $this->keywordRetriever->search($lexicalQuery, $topK);
+        $globalHits = $this->vectorClient->search($cleanQuery, $topK);
 
-        $softDocumentCandidateDocIds = [];
-        $pseudoScopeDocIds = [];
-        $scopeMode = 'none';
-
-        $scopedVectorBoostFactor = 0.0;
-        $secondaryScopedVectorBoostFactor = 0.0;
-        $scopedKeywordBoostFactor = 0.0;
-
-        $scopedPrimaryVectorHits = [];
-        $scopedSecondaryVectorHits = [];
-        $scopedKeywordHits = [];
-
-        if ($tagCandidateDocIds !== []) {
-            $scopeMode = 'tag_routing';
-            $scopedVectorBoostFactor = self::TAG_SCOPED_VECTOR_BOOST;
-            $secondaryScopedVectorBoostFactor = self::TAG_SCOPED_VECTOR_BOOST * self::SECONDARY_SCOPED_VECTOR_MULTIPLIER;
-            $scopedKeywordBoostFactor = self::TAG_SCOPED_LEXICAL_BOOST;
-
-            $scopedPrimaryVectorHits = $this->vectorClient->searchScoped($semanticQuery, $topK, $tagCandidateDocIds);
-            $scopedSecondaryVectorHits = $secondaryVectorQuery !== ''
-                ? $this->vectorClient->searchScoped($secondaryVectorQuery, $topK, $tagCandidateDocIds)
-                : [];
-            $scopedKeywordHits = $this->keywordRetriever->search($lexicalQuery, $topK, $tagCandidateDocIds);
-        } else {
-            $softDocumentCandidateDocIds = $this->deriveSoftDocumentCandidateDocIds($globalKeywordHits);
-
-            if ($softDocumentCandidateDocIds !== []) {
-                $scopeMode = 'soft_document_candidate';
-                $scopedVectorBoostFactor = self::SOFT_DOC_SCOPED_VECTOR_BOOST;
-                $secondaryScopedVectorBoostFactor = self::SOFT_DOC_SCOPED_VECTOR_BOOST * self::SECONDARY_SCOPED_VECTOR_MULTIPLIER;
-                $scopedKeywordBoostFactor = self::SOFT_DOC_SCOPED_LEXICAL_BOOST;
-
-                $scopedPrimaryVectorHits = $this->vectorClient->searchScoped($semanticQuery, $topK, $softDocumentCandidateDocIds);
-                $scopedSecondaryVectorHits = $secondaryVectorQuery !== ''
-                    ? $this->vectorClient->searchScoped($secondaryVectorQuery, $topK, $softDocumentCandidateDocIds)
-                    : [];
-                $scopedKeywordHits = $this->keywordRetriever->search($lexicalQuery, $topK, $softDocumentCandidateDocIds);
-            } else {
-                $pseudoScopeDocIds = $this->derivePseudoScopeDocumentIds($globalPrimaryVectorHits);
-
-                if ($pseudoScopeDocIds !== []) {
-                    $scopeMode = 'pseudo_scope';
-                    $scopedVectorBoostFactor = self::PSEUDO_SCOPED_VECTOR_BOOST;
-                    $secondaryScopedVectorBoostFactor = self::PSEUDO_SCOPED_VECTOR_BOOST * self::SECONDARY_SCOPED_VECTOR_MULTIPLIER;
-                    $scopedKeywordBoostFactor = self::PSEUDO_SCOPED_LEXICAL_BOOST;
-
-                    $scopedPrimaryVectorHits = $this->vectorClient->searchScoped($semanticQuery, $topK, $pseudoScopeDocIds);
-                    $scopedSecondaryVectorHits = $secondaryVectorQuery !== ''
-                        ? $this->vectorClient->searchScoped($secondaryVectorQuery, $topK, $pseudoScopeDocIds)
-                        : [];
-                    $scopedKeywordHits = $this->keywordRetriever->search($lexicalQuery, $topK, $pseudoScopeDocIds);
-                }
-            }
+        $scopedHits = [];
+        if ($candidateDocIds !== []) {
+            $scopedHits = $this->vectorClient->searchScoped($cleanQuery, $topK, $candidateDocIds);
         }
 
-        if (
-            $globalPrimaryVectorHits === []
-            && $globalSecondaryVectorHits === []
-            && $globalKeywordHits === []
-            && $scopedPrimaryVectorHits === []
-            && $scopedSecondaryVectorHits === []
-            && $scopedKeywordHits === []
-        ) {
+        if ($globalHits === [] && $scopedHits === []) {
             return [
                 'limit' => $limit,
                 'is_list_query' => $isListQuery,
                 'threshold' => $threshold,
-                'clean_query' => $cleanQuery,
-                'semantic_query' => $semanticQuery,
-                'secondary_vector_query' => $secondaryVectorQuery,
-                'lexical_query' => $lexicalQuery,
-                'scope_mode' => $scopeMode,
-                'tag_candidate_doc_ids' => $tagCandidateDocIds,
-                'soft_document_candidate_doc_ids' => $softDocumentCandidateDocIds,
-                'pseudo_scope_doc_ids' => $pseudoScopeDocIds,
-                'global_hit_count' => 0,
-                'scoped_hit_count' => 0,
-                'global_vector_hit_count' => 0,
-                'global_primary_vector_hit_count' => 0,
-                'global_secondary_vector_hit_count' => 0,
-                'global_keyword_hit_count' => 0,
-                'scoped_vector_hit_count' => 0,
-                'scoped_primary_vector_hit_count' => 0,
-                'scoped_secondary_vector_hit_count' => 0,
-                'scoped_keyword_hit_count' => 0,
-                'scoped_vector_boost_factor' => $scopedVectorBoostFactor,
-                'secondary_scoped_vector_boost_factor' => $secondaryScopedVectorBoostFactor,
-                'scoped_keyword_boost_factor' => $scopedKeywordBoostFactor,
                 'ranked_chunk_ids' => [],
                 'rows' => [],
                 'rrf_scores' => [],
-                'raw_vector_scores' => [],
-                'raw_keyword_scores' => [],
-                'title_metadata_boosts' => [],
-                'title_metadata_doc_boosts' => [],
+                'raw_scores' => [],
             ];
         }
 
-        $fused = $this->fuseHitSources([
-            [
-                'hits' => $globalPrimaryVectorHits,
-                'threshold' => $threshold,
-                'boost' => 1.0,
-                'bucket' => 'vector',
-            ],
-            [
-                'hits' => $globalSecondaryVectorHits,
-                'threshold' => $threshold,
-                'boost' => self::SECONDARY_GLOBAL_VECTOR_BOOST,
-                'bucket' => 'vector',
-            ],
-            [
-                'hits' => $globalKeywordHits,
-                'threshold' => self::LEXICAL_SCORE_THRESHOLD,
-                'boost' => self::GLOBAL_LEXICAL_BOOST,
-                'bucket' => 'keyword',
-            ],
-            [
-                'hits' => $scopedPrimaryVectorHits,
-                'threshold' => $threshold,
-                'boost' => $scopedVectorBoostFactor,
-                'bucket' => 'vector',
-            ],
-            [
-                'hits' => $scopedSecondaryVectorHits,
-                'threshold' => $threshold,
-                'boost' => $secondaryScopedVectorBoostFactor,
-                'bucket' => 'vector',
-            ],
-            [
-                'hits' => $scopedKeywordHits,
-                'threshold' => self::LEXICAL_SCORE_THRESHOLD,
-                'boost' => $scopedKeywordBoostFactor,
-                'bucket' => 'keyword',
-            ],
-        ], $withScores);
+        $fused = $this->fuseHits(
+            $globalHits,
+            $scopedHits,
+            $threshold,
+            $scopedHits !== [],
+            $withScores
+        );
 
         $rrfScores = $fused['rrf_scores'];
-        $rawVectorScores = $fused['raw_vector_scores'];
-        $rawKeywordScores = $fused['raw_keyword_scores'];
+        $rawScores = $fused['raw_scores'];
 
-        if ($rrfScores === []) {
-            $rrfScores = $this->fallbackRrfFromSources(
-                $globalPrimaryVectorHits,
-                $globalSecondaryVectorHits,
-                $globalKeywordHits,
-                $scopedPrimaryVectorHits,
-                $scopedSecondaryVectorHits,
-                $scopedKeywordHits
-            );
+        if ($rrfScores === [] && $globalHits !== []) {
+            $rrfScores = $this->fallbackRrfFromHits($globalHits);
         }
 
         if ($rrfScores === []) {
@@ -710,45 +386,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 'limit' => $limit,
                 'is_list_query' => $isListQuery,
                 'threshold' => $threshold,
-                'clean_query' => $cleanQuery,
-                'semantic_query' => $semanticQuery,
-                'secondary_vector_query' => $secondaryVectorQuery,
-                'lexical_query' => $lexicalQuery,
-                'scope_mode' => $scopeMode,
-                'tag_candidate_doc_ids' => $tagCandidateDocIds,
-                'soft_document_candidate_doc_ids' => $softDocumentCandidateDocIds,
-                'pseudo_scope_doc_ids' => $pseudoScopeDocIds,
-                'global_hit_count' => count($globalPrimaryVectorHits) + count($globalSecondaryVectorHits) + count($globalKeywordHits),
-                'scoped_hit_count' => count($scopedPrimaryVectorHits) + count($scopedSecondaryVectorHits) + count($scopedKeywordHits),
-                'global_vector_hit_count' => count($globalPrimaryVectorHits) + count($globalSecondaryVectorHits),
-                'global_primary_vector_hit_count' => count($globalPrimaryVectorHits),
-                'global_secondary_vector_hit_count' => count($globalSecondaryVectorHits),
-                'global_keyword_hit_count' => count($globalKeywordHits),
-                'scoped_vector_hit_count' => count($scopedPrimaryVectorHits) + count($scopedSecondaryVectorHits),
-                'scoped_primary_vector_hit_count' => count($scopedPrimaryVectorHits),
-                'scoped_secondary_vector_hit_count' => count($scopedSecondaryVectorHits),
-                'scoped_keyword_hit_count' => count($scopedKeywordHits),
-                'scoped_vector_boost_factor' => $scopedVectorBoostFactor,
-                'secondary_scoped_vector_boost_factor' => $secondaryScopedVectorBoostFactor,
-                'scoped_keyword_boost_factor' => $scopedKeywordBoostFactor,
                 'ranked_chunk_ids' => [],
                 'rows' => [],
                 'rrf_scores' => [],
-                'raw_vector_scores' => $rawVectorScores,
-                'raw_keyword_scores' => $rawKeywordScores,
-                'title_metadata_boosts' => [],
-                'title_metadata_doc_boosts' => [],
+                'raw_scores' => $rawScores,
             ];
         }
 
-        $rows = $this->lookup->findByChunkIds(array_keys($rrfScores));
-
-        [$rrfScores, $titleMetadataBoosts, $titleMetadataDocBoosts] = $this->applyTitleMetadataBoosts(
-            $rrfScores,
-            $rows,
-            $lexicalQuery
-        );
-
         arsort($rrfScores);
         $rankedChunkIds = array_keys($rrfScores);
 
@@ -758,38 +402,22 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
             'limit' => $limit,
             'is_list_query' => $isListQuery,
             'threshold' => $threshold,
-            'clean_query' => $cleanQuery,
-            'semantic_query' => $semanticQuery,
-            'secondary_vector_query' => $secondaryVectorQuery,
-            'lexical_query' => $lexicalQuery,
-            'scope_mode' => $scopeMode,
-            'tag_candidate_doc_ids' => $tagCandidateDocIds,
-            'soft_document_candidate_doc_ids' => $softDocumentCandidateDocIds,
-            'pseudo_scope_doc_ids' => $pseudoScopeDocIds,
-            'global_hit_count' => count($globalPrimaryVectorHits) + count($globalSecondaryVectorHits) + count($globalKeywordHits),
-            'scoped_hit_count' => count($scopedPrimaryVectorHits) + count($scopedSecondaryVectorHits) + count($scopedKeywordHits),
-            'global_vector_hit_count' => count($globalPrimaryVectorHits) + count($globalSecondaryVectorHits),
-            'global_primary_vector_hit_count' => count($globalPrimaryVectorHits),
-            'global_secondary_vector_hit_count' => count($globalSecondaryVectorHits),
-            'global_keyword_hit_count' => count($globalKeywordHits),
-            'scoped_vector_hit_count' => count($scopedPrimaryVectorHits) + count($scopedSecondaryVectorHits),
-            'scoped_primary_vector_hit_count' => count($scopedPrimaryVectorHits),
-            'scoped_secondary_vector_hit_count' => count($scopedSecondaryVectorHits),
-            'scoped_keyword_hit_count' => count($scopedKeywordHits),
-            'scoped_vector_boost_factor' => $scopedVectorBoostFactor,
-            'secondary_scoped_vector_boost_factor' => $secondaryScopedVectorBoostFactor,
-            'scoped_keyword_boost_factor' => $scopedKeywordBoostFactor,
             'ranked_chunk_ids' => $rankedChunkIds,
             'rows' => $rows,
             'rrf_scores' => $rrfScores,
-            'raw_vector_scores' => $rawVectorScores,
-            'raw_keyword_scores' => $rawKeywordScores,
-            'title_metadata_boosts' => $titleMetadataBoosts,
-            'title_metadata_doc_boosts' => $titleMetadataDocBoosts,
+            'raw_scores' => $rawScores,
         ];
     }
 
+    // =========================================================
+    // SUPPORT
+    // =========================================================
 
+    /**
+     * Loads the active model generation config.
+     *
+     * Retrieval is not allowed to proceed without an active config.
+     */
     private function requireConfig(): ModelGenerationConfig
     {
         $config = $this->configRepository->findActiveForModel();
@@ -801,18 +429,32 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return $config;
     }
 
+    /**
+     * Extracts the normalized sales intent string from the intent detector.
+     *
+     * Falls back to DISCOVERY when the detector payload is incomplete.
+     */
     private function detectSalesIntent(string $prompt): string
     {
         $data = $this->salesIntentLite->detect($prompt);
 
-        return (string) ($data['intent'] ?? SalesIntentLite::DISCOVERY);
+        return (string)($data['intent'] ?? SalesIntentLite::DISCOVERY);
     }
 
+    /**
+     * Computes retrieval threshold and vector topK.
+     *
+     * Rules:
+     * - objection/pricing intents are slightly stricter
+     * - list queries are allowed to retrieve a wider candidate set
+     * - all values are clamped to global hard limits
+     */
     private function computeThresholdAndTopK(
         string $salesIntent,
-        bool $isListQuery,
-        int $vectorTopKBase
-    ): array {
+        bool   $isListQuery,
+        int    $vectorTopKBase
+    ): array
+    {
         $threshold = NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD;
         $topK = $vectorTopKBase;
 
@@ -824,7 +466,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         }
 
         if ($isListQuery) {
-            $topK = (int) round($topK * NdjsonHybridRetrieverConfig::LIST_BONUS);
+            $topK = (int)round($topK * NdjsonHybridRetrieverConfig::LIST_BONUS);
         }
 
         $topK = max(1, min($topK, NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK));
@@ -837,175 +479,26 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
     }
 
     /**
-     * @param array<int, array<string,mixed>> $globalKeywordHits
-     * @return string[]
+     * Fuses multiple hit lists into one RRF-style score map.
+     *
+     * Notes:
+     * - only hits above threshold are considered
+     * - rank position within each hit list contributes to the final score
+     * - scoped hits can be boosted
+     * - raw scores are optionally captured for debug output
      */
-    private function deriveSoftDocumentCandidateDocIds(array $globalKeywordHits): array
-    {
-        $window = array_slice($globalKeywordHits, 0, self::SOFT_DOC_CANDIDATE_WINDOW);
-        $stats = [];
-
-        foreach ($window as $rank => $hit) {
-            $documentId = $hit['document_id'] ?? null;
-
-            if (!is_string($documentId) || $documentId === '') {
-                continue;
-            }
-
-            $score = isset($hit['score']) && is_numeric($hit['score'])
-                ? (float) $hit['score']
-                : 0.0;
-
-            if (!isset($stats[$documentId])) {
-                $stats[$documentId] = [
-                    'document_id' => $documentId,
-                    'count' => 0,
-                    'best_rank' => $rank,
-                    'best_score' => $score,
-                ];
-            }
-
-            $stats[$documentId]['count']++;
-            $stats[$documentId]['best_rank'] = min($stats[$documentId]['best_rank'], $rank);
-            $stats[$documentId]['best_score'] = max($stats[$documentId]['best_score'], $score);
-        }
-
-        if ($stats === []) {
-            return [];
-        }
-
-        uasort($stats, static function (array $a, array $b): int {
-            if ($a['count'] !== $b['count']) {
-                return $b['count'] <=> $a['count'];
-            }
-
-            if (abs((float) $a['best_score'] - (float) $b['best_score']) > 0.000001) {
-                return ((float) $b['best_score'] <=> (float) $a['best_score']);
-            }
-
-            return $a['best_rank'] <=> $b['best_rank'];
-        });
-
-        $selected = [];
-
-        foreach ($stats as $row) {
-            $count = (int) $row['count'];
-            $bestRank = (int) $row['best_rank'];
-            $bestScore = (float) $row['best_score'];
-
-            if (
-                $count < self::SOFT_DOC_CANDIDATE_MIN_DOC_HITS
-                && !($bestRank === 0 && $bestScore >= self::SOFT_DOC_TOP_SCORE_MIN)
-            ) {
-                continue;
-            }
-
-            $selected[] = (string) $row['document_id'];
-
-            if (count($selected) >= self::SOFT_DOC_CANDIDATE_MAX_DOCS) {
-                break;
-            }
-        }
-
-        return $selected;
-    }
-
-    /**
-     * @param array<int, array<string,mixed>> $globalPrimaryVectorHits
-     * @return string[]
-     */
-    private function derivePseudoScopeDocumentIds(array $globalPrimaryVectorHits): array
-    {
-        $window = array_slice($globalPrimaryVectorHits, 0, self::PSEUDO_SCOPE_GLOBAL_WINDOW);
-        $stats = [];
-
-        foreach ($window as $rank => $hit) {
-            $documentId = $hit['document_id'] ?? null;
-
-            if (!is_string($documentId) || $documentId === '') {
-                continue;
-            }
-
-            $score = isset($hit['score']) && is_numeric($hit['score'])
-                ? (float) $hit['score']
-                : 0.0;
-
-            if (!isset($stats[$documentId])) {
-                $stats[$documentId] = [
-                    'document_id' => $documentId,
-                    'count' => 0,
-                    'best_rank' => $rank,
-                    'best_score' => $score,
-                ];
-            }
-
-            $stats[$documentId]['count']++;
-            $stats[$documentId]['best_rank'] = min($stats[$documentId]['best_rank'], $rank);
-            $stats[$documentId]['best_score'] = max($stats[$documentId]['best_score'], $score);
-        }
-
-        if ($stats === []) {
-            return [];
-        }
-
-        uasort($stats, static function (array $a, array $b): int {
-            if ($a['count'] !== $b['count']) {
-                return $b['count'] <=> $a['count'];
-            }
-
-            if (abs((float) $a['best_score'] - (float) $b['best_score']) > 0.000001) {
-                return ((float) $b['best_score'] <=> (float) $a['best_score']);
-            }
-
-            return $a['best_rank'] <=> $b['best_rank'];
-        });
-
-        $selected = [];
-
-        foreach ($stats as $row) {
-            if ((int) $row['count'] < self::PSEUDO_SCOPE_MIN_DOC_HITS) {
-                continue;
-            }
-
-            $selected[] = (string) $row['document_id'];
-
-            if (count($selected) >= self::PSEUDO_SCOPE_MAX_DOCS) {
-                break;
-            }
-        }
-
-        return $selected;
-    }
-
-    /**
-     * @param array<int, array{
-     *   hits: array<int, array<string,mixed>>,
-     *   threshold: float,
-     *   boost: float,
-     *   bucket: string
-     * }> $sources
-     * @return array{
-     *   rrf_scores: array<string,float>,
-     *   raw_vector_scores: array<string,float>,
-     *   raw_keyword_scores: array<string,float>
-     * }
-     */
-    private function fuseHitSources(array $sources, bool $captureRaw): array
+    private function fuseHits(
+        array $globalHits,
+        array $scopedHits,
+        float $threshold,
+        bool  $boostScoped,
+        bool  $captureRaw
+    ): array
     {
         $rrfScores = [];
-        $rawVectorScores = [];
-        $rawKeywordScores = [];
-
-        foreach ($sources as $source) {
-            $hits = $source['hits'];
-            $threshold = (float) $source['threshold'];
-            $boost = max(0.0, (float) $source['boost']);
-            $bucket = (string) $source['bucket'];
-
-            if ($hits === [] || $boost <= 0.0) {
-                continue;
-            }
+        $rawScores = [];
 
+        $apply = function (array $hits, bool $boost) use (&$rrfScores, &$rawScores, $threshold, $captureRaw): void {
             $rank = 0;
 
             foreach ($hits as $hit) {
@@ -1013,83 +506,85 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                     continue;
                 }
 
-                $raw = (float) $hit['score'];
+                $raw = (float)$hit['score'];
 
                 if ($raw < $threshold) {
                     continue;
                 }
 
-                $chunkId = (string) $hit['chunk_id'];
+                $chunkId = (string)$hit['chunk_id'];
 
                 if ($captureRaw) {
-                    if ($bucket === 'vector') {
-                        $rawVectorScores[$chunkId] = max($rawVectorScores[$chunkId] ?? 0.0, $raw);
-                    } elseif ($bucket === 'keyword') {
-                        $rawKeywordScores[$chunkId] = max($rawKeywordScores[$chunkId] ?? 0.0, $raw);
-                    }
+                    $rawScores[$chunkId] = max($rawScores[$chunkId] ?? 0.0, $raw);
                 }
 
                 $rank++;
                 $rrf = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank);
-                $rrf *= $boost;
+
+                if ($boost) {
+                    $rrf *= 1.2;
+                }
 
                 $rrfScores[$chunkId] = ($rrfScores[$chunkId] ?? 0.0) + $rrf;
             }
-        }
+        };
+
+        $apply($globalHits, false);
+        $apply($scopedHits, $boostScoped);
 
         return [
             'rrf_scores' => $rrfScores,
-            'raw_vector_scores' => $rawVectorScores,
-            'raw_keyword_scores' => $rawKeywordScores,
+            'raw_scores' => $rawScores,
         ];
     }
 
     /**
-     * @param array<int, array<string,mixed>> ...$sourceLists
-     * @return array<string,float>
+     * Builds a fallback RRF ranking purely from hit order.
+     *
+     * Used when thresholding removed all fused candidates but
+     * the global hit list itself still exists.
      */
-    private function fallbackRrfFromSources(array ...$sourceLists): array
+    private function fallbackRrfFromHits(array $hits): array
     {
-        foreach ($sourceLists as $hits) {
-            $rrf = [];
-            $rank = 0;
+        $rrf = [];
+        $rank = 0;
 
-            foreach ($hits as $hit) {
-                if (!isset($hit['chunk_id'])) {
-                    continue;
-                }
-
-                $rank++;
-                $rrf[(string) $hit['chunk_id']] = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank);
-
-                if ($rank >= NdjsonHybridRetrieverConfig::EMPTY_RRF_FALLBACK_TOPN) {
-                    break;
-                }
+        foreach ($hits as $hit) {
+            if (!isset($hit['chunk_id'])) {
+                continue;
             }
 
-            if ($rrf !== []) {
-                return $rrf;
+            $rank++;
+            $rrf[(string)$hit['chunk_id']] = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank);
+
+            if ($rank >= NdjsonHybridRetrieverConfig::EMPTY_RRF_FALLBACK_TOPN) {
+                break;
             }
         }
 
-        return [];
+        return $rrf;
     }
 
     /**
+     * Selects a coherent chunk window from one exact document title match.
+     *
+     * For exact product questions we prefer a pure document slice over
+     * cross-document fusion to avoid mixing neighbouring product families.
+     *
      * @param array<string,array<string,mixed>> $rows
      * @return string[]
      */
     private function selectExactDocumentChunkIds(array $rows, int $limit): array
     {
         uasort($rows, static function (array $a, array $b): int {
-            $aIndex = is_int($a['chunk_index'] ?? null) ? (int) $a['chunk_index'] : PHP_INT_MAX;
-            $bIndex = is_int($b['chunk_index'] ?? null) ? (int) $b['chunk_index'] : PHP_INT_MAX;
+            $aIndex = is_int($a['chunk_index'] ?? null) ? (int)$a['chunk_index'] : PHP_INT_MAX;
+            $bIndex = is_int($b['chunk_index'] ?? null) ? (int)$b['chunk_index'] : PHP_INT_MAX;
 
             if ($aIndex !== $bIndex) {
                 return $aIndex <=> $bIndex;
             }
 
-            return strcmp((string) ($a['chunk_id'] ?? ''), (string) ($b['chunk_id'] ?? ''));
+            return strcmp((string)($a['chunk_id'] ?? ''), (string)($b['chunk_id'] ?? ''));
         });
 
         $selected = [];
@@ -1097,7 +592,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
 
         foreach ($rows as $row) {
             $chunkId = $row['chunk_id'] ?? null;
-            $text = trim((string) ($row['text'] ?? ''));
+            $text = trim((string)($row['text'] ?? ''));
 
             if (!is_string($chunkId) || $chunkId === '' || $text === '') {
                 continue;
@@ -1114,6 +609,10 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
     }
 
     /**
+     * Builds synthetic scores for exact-title fast-path selections.
+     *
+     * These scores are only used for debug output consistency.
+     *
      * @param string[] $chunkIds
      * @return array<string,float>
      */
@@ -1122,12 +621,20 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         $scores = [];
 
         foreach (array_values($chunkIds) as $rank => $chunkId) {
-            $scores[(string) $chunkId] = 1.0 / (1 + $rank);
+            $scores[(string)$chunkId] = 1.0 / (1 + $rank);
         }
 
         return $scores;
     }
 
+    /**
+     * Selection strategy for list-style queries.
+     *
+     * Goal:
+     * - avoid near-identical chunks
+     * - prefer diverse list entries
+     * - stop once the configured limit is reached
+     */
     private function selectListChunkIds(array $chunkIds, array $rows, int $limit): array
     {
         $seen = [];
@@ -1138,19 +645,19 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 continue;
             }
 
-            $chunk = trim((string) $rows[$id]['text']);
+            $chunk = trim((string)$rows[$id]['text']);
             if ($chunk === '') {
                 continue;
             }
 
-            $key = md5(mb_strtolower((string) (preg_replace('/\s+/u', ' ', $chunk) ?? $chunk)));
+            $key = md5(mb_strtolower((string)preg_replace('/\s+/u', ' ', $chunk)));
 
             if (isset($seen[$key])) {
                 continue;
             }
 
             $seen[$key] = true;
-            $out[] = (string) $id;
+            $out[] = (string)$id;
 
             if (count($out) >= $limit) {
                 break;
@@ -1160,8 +667,43 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return $out;
     }
 
-    private function selectSalesChunkIds(array $chunkIds, array $rows, int $limit): array
+    /**
+     * Selection strategy for sales-oriented queries.
+     *
+     * Modes:
+     * - exact_document_title:
+     *   used when the prompt clearly contains one exact document title
+     *   and the answer should stay strictly within that document
+     *
+     * - sales_dominant_document:
+     *   used when one document clearly dominates the top hit window
+     *   and coherent neighbouring chunks from that document are more
+     *   useful than cross-document spread
+     *
+     * - sales_spread:
+     *   default mode that spreads chunks across documents and enforces
+     *   distance between chunk positions of the same document
+     */
+    private function selectSalesChunkIds(string $prompt, array $chunkIds, array $rows, int $limit): array
     {
+        $focusedDocId = $this->resolveFocusedSalesDocumentId($prompt, $chunkIds, $rows);
+
+        if ($focusedDocId !== null) {
+            $focusedChunkIds = $this->selectFocusedProductChunkIds(
+                $focusedDocId,
+                $chunkIds,
+                $rows,
+                $limit
+            );
+
+            if ($focusedChunkIds !== []) {
+                return [
+                    'ids' => $focusedChunkIds,
+                    'mode' => 'sales_product_dominant_document',
+                ];
+            }
+        }
+
         $dominantDocId = $this->detectDominantTopDocument($chunkIds, $rows);
 
         if ($dominantDocId !== null) {
@@ -1191,6 +733,265 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         ];
     }
 
+
+    /**
+     * Resolves a strongly focused product document before normal sales spreading.
+     *
+     * This protects against classic false positives where neighbouring products,
+     * indicators or safety sheets outrank the actually requested device.
+     */
+    private function resolveFocusedSalesDocumentId(string $prompt, array $chunkIds, array $rows): ?string
+    {
+        $promptProfile = $this->buildPromptProductProfile($prompt);
+
+        if ($promptProfile['anchors'] === []) {
+            return null;
+        }
+
+        $candidates = [];
+        $seenDocs = [];
+
+        foreach (array_slice($chunkIds, 0, self::FOCUSED_PRODUCT_WINDOW) as $rank => $chunkId) {
+            $row = $rows[$chunkId] ?? null;
+            if (!is_array($row)) {
+                continue;
+            }
+
+            $documentId = $row['document_id'] ?? null;
+            if (!is_string($documentId) || $documentId === '' || isset($seenDocs[$documentId])) {
+                continue;
+            }
+
+            $title = $this->extractDocumentTitle($row);
+            if ($title === '') {
+                continue;
+            }
+
+            $seenDocs[$documentId] = true;
+            $score = $this->scoreFocusedProductCandidate($promptProfile, $title, $row, $rank);
+
+            $candidates[] = [
+                'document_id' => $documentId,
+                'score' => $score,
+            ];
+        }
+
+        if ($candidates === []) {
+            return null;
+        }
+
+        usort($candidates, static function (array $a, array $b): int {
+            if ($a['score'] === $b['score']) {
+                return strcmp((string)$a['document_id'], (string)$b['document_id']);
+            }
+
+            return $b['score'] <=> $a['score'];
+        });
+
+        $best = $candidates[0] ?? null;
+        if ($best === null) {
+            return null;
+        }
+
+        $runnerUpScore = (float)($candidates[1]['score'] ?? -INF);
+        $bestScore = (float)$best['score'];
+        $gap = $bestScore - $runnerUpScore;
+
+        if ($bestScore < self::FOCUSED_PRODUCT_MIN_SCORE || $gap < self::FOCUSED_PRODUCT_MIN_GAP) {
+            return null;
+        }
+
+        $documentId = $best['document_id'] ?? null;
+
+        return is_string($documentId) && $documentId !== '' ? $documentId : null;
+    }
+
+    /**
+     * Builds a small prompt profile used for focused product dominance decisions.
+     *
+     * @return array{
+     *     normalized:string,
+     *     anchors:string[],
+     *     family_tokens:string[],
+     *     number_tokens:string[],
+     *     asks_reagent:bool,
+     *     asks_document:bool,
+     *     asks_safety:bool,
+     *     asks_device:bool
+     * }
+     */
+    private function buildPromptProductProfile(string $prompt): array
+    {
+        $normalized = $this->normalizeText($prompt);
+        $tokens = $this->tokenizeText($normalized);
+
+        $reagentWords = [
+            'indikator', 'reagenz', 'reagens', 'chemie', 'chemikalie', 'sdb',
+            'sicherheitsdatenblatt', 'msds', 'flasche', 'gebinde',
+        ];
+        $documentWords = [
+            'datenblatt', 'dokument', 'pdf', 'handbuch', 'manual', 'beschreibung',
+            'sdb', 'sicherheitsdatenblatt', 'msds',
+        ];
+        $safetyWords = [
+            'gefahr', 'gefahrgut', 'clp', 'h290', 'sicherheit', 'kennzeichnung',
+            'transport', 'lagerung', 'piktogramm',
+        ];
+        $deviceWords = [
+            'geraet', 'gerät', 'messgeraet', 'messgerät', 'analysator', 'automat',
+            'messung', 'messen', 'ueberwachung', 'überwachung', 'online', 'monitor',
+        ];
+
+        $asksReagent = $this->containsAnyToken($tokens, $reagentWords);
+        $asksDocument = $this->containsAnyToken($tokens, $documentWords);
+        $asksSafety = $this->containsAnyToken($tokens, $safetyWords);
+        $asksDevice = $this->containsAnyToken($tokens, $deviceWords) || (!$asksReagent && !$asksDocument && !$asksSafety);
+
+        $anchors = [];
+        $familyTokens = [];
+        $numberTokens = [];
+
+        foreach ($tokens as $token) {
+            if ($this->isGenericProductToken($token)) {
+                continue;
+            }
+
+            if (preg_match('/\d/u', $token) === 1) {
+                $anchors[] = $token;
+                $numberTokens[] = $token;
+                $familyTokens[] = $token;
+                continue;
+            }
+
+            if ($this->isImportantShortModelToken($token)) {
+                $anchors[] = $token;
+                $familyTokens[] = $token;
+                continue;
+            }
+
+            if (mb_strlen($token, 'UTF-8') >= 3) {
+                $anchors[] = $token;
+
+                if ($this->isFamilyDescriptorToken($token)) {
+                    $familyTokens[] = $token;
+                }
+            }
+        }
+
+        return [
+            'normalized' => $normalized,
+            'anchors' => array_values(array_unique($anchors)),
+            'family_tokens' => array_values(array_unique($familyTokens)),
+            'number_tokens' => array_values(array_unique($numberTokens)),
+            'asks_reagent' => $asksReagent,
+            'asks_document' => $asksDocument,
+            'asks_safety' => $asksSafety,
+            'asks_device' => $asksDevice,
+        ];
+    }
+
+    /**
+     * Scores one candidate document for focused product selection.
+     */
+    private function scoreFocusedProductCandidate(array $promptProfile, string $title, array $row, int $rank): float
+    {
+        $titleNormalized = $this->normalizeText($title);
+        $titleTokens = $this->tokenizeText($titleNormalized);
+        $titleTokenMap = array_fill_keys($titleTokens, true);
+        $textNormalized = $this->normalizeText((string)($row['text'] ?? ''));
+
+        $score = max(0.0, 5.0 - $rank);
+
+        if ($titleNormalized !== '' && str_contains(' ' . $promptProfile['normalized'] . ' ', ' ' . $titleNormalized . ' ')) {
+            $score += 24.0;
+        }
+
+        $matchedAnchors = 0;
+        foreach ($promptProfile['anchors'] as $anchor) {
+            if (isset($titleTokenMap[$anchor])) {
+                $matchedAnchors++;
+                $score += $this->isImportantShortModelToken($anchor) ? 4.0 : 3.5;
+                continue;
+            }
+
+            if (str_contains(' ' . $titleNormalized . ' ', ' ' . $anchor . ' ')) {
+                $matchedAnchors++;
+                $score += 3.0;
+                continue;
+            }
+
+            $score -= $this->isFamilyDescriptorToken($anchor) ? 3.5 : 2.0;
+        }
+
+        foreach ($promptProfile['number_tokens'] as $numberToken) {
+            if (isset($titleTokenMap[$numberToken])) {
+                $score += 4.0;
+            } else {
+                $score -= 5.0;
+            }
+        }
+
+        foreach ($promptProfile['family_tokens'] as $familyToken) {
+            if (isset($titleTokenMap[$familyToken])) {
+                $score += 4.0;
+            } else {
+                $score -= 4.5;
+            }
+        }
+
+        if ($promptProfile['asks_device']) {
+            if ($this->looksLikeReagentOrAccessoryDocument($row, $titleNormalized, $textNormalized)) {
+                $score -= 12.0;
+            }
+
+            if ($this->looksLikeSafetyDocument($row, $titleNormalized, $textNormalized)) {
+                $score -= 8.0;
+            }
+        }
+
+        if ($promptProfile['asks_reagent'] && $this->looksLikeReagentOrAccessoryDocument($row, $titleNormalized, $textNormalized)) {
+            $score += 6.0;
+        }
+
+        if (($promptProfile['asks_document'] || $promptProfile['asks_safety']) && $this->looksLikeSafetyDocument($row, $titleNormalized, $textNormalized)) {
+            $score += 4.0;
+        }
+
+        if ($matchedAnchors === 0) {
+            $score -= 10.0;
+        }
+
+        return $score;
+    }
+
+    /**
+     * Selects only the focused product document chunks.
+     *
+     * In this strict mode we intentionally do not fill remaining slots with
+     * neighbouring products, because that would reintroduce the original bug.
+     */
+    private function selectFocusedProductChunkIds(
+        string $documentId,
+        array $chunkIds,
+        array $rows,
+        int $limit
+    ): array
+    {
+        return $this->selectDominantDocumentChunkIds(
+            $documentId,
+            $chunkIds,
+            $rows,
+            min($limit, self::FOCUSED_PRODUCT_MAX_CHUNKS)
+        );
+    }
+
+    /**
+     * Detects whether one document clearly dominates the first ranked window.
+     *
+     * This is especially useful for product-sheet style documents where
+     * several adjacent chunks belong together and should be passed to the model
+     * as one coherent factual block.
+     */
     private function detectDominantTopDocument(array $chunkIds, array $rows): ?string
     {
         $docWindow = [];
@@ -1200,7 +1001,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 continue;
             }
 
-            $text = trim((string) $rows[$chunkId]['text']);
+            $text = trim((string)$rows[$chunkId]['text']);
             $docId = $rows[$chunkId]['document_id'] ?? null;
 
             if ($text === '' || !is_string($docId) || $docId === '') {
@@ -1223,7 +1024,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
             return null;
         }
 
-        $dominantCount = (int) ($counts[$dominantDocId] ?? 0);
+        $dominantCount = (int)($counts[$dominantDocId] ?? 0);
 
         if ($dominantCount >= self::DOMINANT_DOC_MIN_HITS) {
             return $dominantDocId;
@@ -1239,12 +1040,21 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return null;
     }
 
+    /**
+     * Selects a coherent chunk window from the dominant document.
+     *
+     * Strategy:
+     * - use the highest-ranked chunk of that document as anchor
+     * - prefer neighbouring chunk indices around that anchor
+     * - sort the final selection by chunk index for prompt coherence
+     */
     private function selectDominantDocumentChunkIds(
         string $documentId,
-        array $chunkIds,
-        array $rows,
-        int $limit
-    ): array {
+        array  $chunkIds,
+        array  $rows,
+        int    $limit
+    ): array
+    {
         $docHits = [];
         $anchorChunkIndex = null;
 
@@ -1253,7 +1063,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 continue;
             }
 
-            $text = trim((string) $rows[$chunkId]['text']);
+            $text = trim((string)$rows[$chunkId]['text']);
             $docId = $rows[$chunkId]['document_id'] ?? null;
 
             if ($text === '' || $docId !== $documentId) {
@@ -1268,7 +1078,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
             }
 
             $docHits[] = [
-                'id' => (string) $chunkId,
+                'id' => (string)$chunkId,
                 'rank' => $rank,
                 'chunk_index' => $chunkIndex,
             ];
@@ -1326,12 +1136,19 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         );
     }
 
+    /**
+     * Fills the remaining sales slots after a dominant document selection.
+     *
+     * The already selected dominant-document chunks stay fixed.
+     * Remaining slots are filled with the normal spread strategy.
+     */
     private function fillRemainingSalesChunkIds(
         array $seedChunkIds,
         array $chunkIds,
         array $rows,
-        int $limit
-    ): array {
+        int   $limit
+    ): array
+    {
         $out = array_values(array_unique(array_map('strval', $seedChunkIds)));
 
         if (count($out) >= $limit) {
@@ -1383,12 +1200,12 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 }
             }
 
-            $text = trim((string) $rows[$chunkId]['text']);
+            $text = trim((string)$rows[$chunkId]['text']);
             if ($text === '') {
                 continue;
             }
 
-            $out[] = (string) $chunkId;
+            $out[] = (string)$chunkId;
             $selected[$chunkId] = true;
             $docCounter[$docId] = ($docCounter[$docId] ?? 0) + 1;
 
@@ -1404,6 +1221,14 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return $out;
     }
 
+    /**
+     * Default spread selection for sales-oriented queries.
+     *
+     * Goal:
+     * - avoid overloading the result with chunks from the same document
+     * - avoid chunks that are too close to each other in the same document
+     * - preserve top-ranked relevance while improving contextual spread
+     */
     private function selectSalesChunkIdsSpread(array $chunkIds, array $rows, int $limit): array
     {
         $out = [];
@@ -1436,12 +1261,12 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 $docChunkPositions[$docId][] = $chunkIndex;
             }
 
-            $text = trim((string) $rows[$chunkId]['text']);
+            $text = trim((string)$rows[$chunkId]['text']);
             if ($text === '') {
                 continue;
             }
 
-            $out[] = (string) $chunkId;
+            $out[] = (string)$chunkId;
             $docCounter[$docId] = ($docCounter[$docId] ?? 0) + 1;
 
             if (count($out) >= $limit) {
@@ -1452,6 +1277,177 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
         return $out;
     }
 
+
+    /**
+     * Extracts the document title from metadata or from the first product-title heading.
+     */
+    private function extractDocumentTitle(array $row): string
+    {
+        $metadataTitle = $row['metadata']['document_title'] ?? null;
+
+        if (is_string($metadataTitle) && trim($metadataTitle) !== '') {
+            return trim($metadataTitle);
+        }
+
+        $text = (string)($row['text'] ?? '');
+
+        if (
+            $text !== '' &&
+            preg_match('/^#\s*Produkt\s+Titel:\s*`?([^`\n]+)`?/imu', $text, $matches) === 1
+        ) {
+            return trim((string)($matches[1] ?? ''));
+        }
+
+        return '';
+    }
+
+    /**
+     * Normalizes text for token-safe product comparisons.
+     */
+    private function normalizeText(string $value): string
+    {
+        $value = mb_strtolower(trim($value), 'UTF-8');
+        $value = str_replace(['-', '/', '_'], ' ', $value);
+        $value = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $value) ?? $value;
+        $value = preg_replace('/\s+/u', ' ', $value) ?? $value;
+
+        return trim($value);
+    }
+
+    /**
+     * Tokenizes normalized text.
+     *
+     * @return string[]
+     */
+    private function tokenizeText(string $value): array
+    {
+        if ($value === '') {
+            return [];
+        }
+
+        return preg_split('/\s+/u', $value, -1, PREG_SPLIT_NO_EMPTY) ?: [];
+    }
+
+    /**
+     * Returns true when at least one token from the haystack matches the given words.
+     */
+    private function containsAnyToken(array $tokens, array $needles): bool
+    {
+        if ($tokens === [] || $needles === []) {
+            return false;
+        }
+
+        $tokenMap = array_fill_keys($tokens, true);
+
+        foreach ($needles as $needle) {
+            if (isset($tokenMap[$needle])) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Generic product words must not drive product dominance decisions.
+     */
+    private function isGenericProductToken(string $token): bool
+    {
+        static $generic = [
+            'der', 'die', 'das', 'ein', 'eine', 'einen', 'einem', 'und', 'oder', 'mit',
+            'fuer', 'für', 'von', 'im', 'in', 'am', 'an', 'auf', 'zu', 'zum', 'zur',
+            'produkt', 'produkte', 'produktkarte', 'titel', 'geraet', 'gerät',
+            'messgeraet', 'messgerät', 'wasser', 'haerte', 'härte', 'resthaerte',
+            'resthärte', 'analyse', 'analysator', 'automat', 'online', 'messung',
+            'messen', 'preis', 'preise', 'kosten', 'info', 'infos', 'passend',
+            'richtige', 'richtiges', 'geeignet', 'geeignete', 'welche', 'welcher',
+            'welches', 'brauche', 'suche', 'bitte', 'fuer', 'gegen', 'und', 'oder',
+        ];
+
+        return isset(array_fill_keys($generic, true)[$token]);
+    }
+
+    /**
+     * Short technical model codes like TH or TC are allowed as anchors.
+     */
+    private function isImportantShortModelToken(string $token): bool
+    {
+        static $allowed = ['th', 'tc', 'tp', 'tm', 'ph', 'rx'];
+
+        return in_array($token, $allowed, true);
+    }
+
+    /**
+     * Family descriptors are strong product differentiators.
+     */
+    private function isFamilyDescriptorToken(string $token): bool
+    {
+        static $familyDescriptors = [
+            'evo', 'eco', 'self', 'clean', 'mini', 'pro', 'plus', 'basic', 'lab',
+            'inline', 'compact', 'panel', 'sc',
+        ];
+
+        return in_array($token, $familyDescriptors, true)
+            || $this->isImportantShortModelToken($token)
+            || preg_match('/\d/u', $token) === 1;
+    }
+
+    /**
+     * Heuristic classifier for indicator, reagent, accessory and spare-part documents.
+     */
+    private function looksLikeReagentOrAccessoryDocument(array $row, string $titleNormalized, string $textNormalized): bool
+    {
+        $haystack = trim($titleNormalized . ' ' . $textNormalized);
+
+        if ($haystack === '') {
+            return false;
+        }
+
+        $needles = [
+            'indikator', 'reagenz', 'reagens', 'laborchemikalie', 'chemikalie',
+            'sicherheitsdatenblatt', 'sdb', 'msds', 'ufi', 'gebinde', 'flasche',
+            'ersatzteil', 'zubehoer', 'zubehör', 'service set', 'filtereinsatz',
+            'kerzenfilter', 'druckregler',
+        ];
+
+        foreach ($needles as $needle) {
+            if (str_contains($haystack, $needle)) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Heuristic classifier for safety-style documents.
+     */
+    private function looksLikeSafetyDocument(array $row, string $titleNormalized, string $textNormalized): bool
+    {
+        $haystack = trim($titleNormalized . ' ' . $textNormalized);
+
+        if ($haystack === '') {
+            return false;
+        }
+
+        $needles = [
+            'sicherheitsdatenblatt', 'sdb', 'msds', 'gefahrenbewertung',
+            'gefahrenpiktogramm', 'signalwort', 'lagerung', 'transport', 'clp',
+            'kennzeichnung', 'h290', 'pbt', 'vpvb',
+        ];
+
+        foreach ($needles as $needle) {
+            if (str_contains($haystack, $needle)) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Converts selected chunk ids into the final plain text result list.
+     */
     private function collectTextsFromIds(array $chunkIds, array $rows): array
     {
         $out = [];
@@ -1461,7 +1457,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
                 continue;
             }
 
-            $text = trim((string) $rows[$id]['text']);
+            $text = trim((string)$rows[$id]['text']);
 
             if ($text !== '') {
                 $out[] = $text;
@@ -1470,233 +1466,4 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
 
         return $out;
     }
-
-    /**
-     * Applies a conservative document-level re-rank based on title / metadata matching.
-     *
-     * This is intentionally executed after source fusion. It should sharpen ranking
-     * for clearly matching documents, but never replace the underlying retrieval logic.
-     *
-     * @param array<string,float> $rrfScores
-     * @param array<string,array<string,mixed>> $rows
-     * @return array{0: array<string,float>, 1: array<string,float>, 2: array<string,float>}
-     */
-    private function applyTitleMetadataBoosts(array $rrfScores, array $rows, string $lexicalQuery): array
-    {
-        $normalizedQuery = $this->normalizeForMatching($lexicalQuery);
-        $queryTokens = $this->tokenizeNormalizedQuery($normalizedQuery);
-
-        if ($normalizedQuery === '' || $queryTokens === [] || $rrfScores === [] || $rows === []) {
-            return [$rrfScores, [], []];
-        }
-
-        $documentBoosts = [];
-
-        foreach ($rows as $row) {
-            $documentId = $row['document_id'] ?? null;
-
-            if (!is_string($documentId) || $documentId === '' || isset($documentBoosts[$documentId])) {
-                continue;
-            }
-
-            $documentBoosts[$documentId] = $this->computeDocumentMetadataBoost(
-                $row,
-                $normalizedQuery,
-                $queryTokens
-            );
-        }
-
-        if ($documentBoosts === []) {
-            return [$rrfScores, [], []];
-        }
-
-        $chunkBoosts = [];
-
-        foreach ($rrfScores as $chunkId => $score) {
-            $row = $rows[$chunkId] ?? null;
-
-            if (!is_array($row)) {
-                continue;
-            }
-
-            $documentId = $row['document_id'] ?? null;
-
-            if (!is_string($documentId) || $documentId === '') {
-                continue;
-            }
-
-            $boost = $documentBoosts[$documentId] ?? 0.0;
-
-            if ($boost <= 0.0) {
-                continue;
-            }
-
-            $rrfScores[$chunkId] = $score * (1.0 + $boost);
-            $chunkBoosts[$chunkId] = $boost;
-        }
-
-        return [$rrfScores, $chunkBoosts, $documentBoosts];
-    }
-
-    /**
-     * @param array<string,mixed> $row
-     * @param string[] $queryTokens
-     */
-    private function computeDocumentMetadataBoost(array $row, string $normalizedQuery, array $queryTokens): float
-    {
-        $documentTitle = $this->normalizeForMatching($this->extractMetadataString($row, [
-            'document_title',
-            'title',
-        ]));
-
-        $fileName = $this->normalizeForMatching($this->extractMetadataString($row, [
-            'file_name',
-            'filename',
-            'original_filename',
-            'source_name',
-            'document_name',
-        ]));
-
-        $metaText = $this->normalizeForMatching($this->extractMetadataString($row, [
-            'source_path',
-            'path',
-            'heading',
-            'section_title',
-            'category',
-        ]));
-
-        $boost = 0.0;
-
-        $titleCoverage = $this->computeNormalizedTokenCoverage($queryTokens, $documentTitle);
-        if ($titleCoverage > 0.0) {
-            $boost += min(
-                self::TITLE_MATCH_MAX_BOOST,
-                self::TITLE_MATCH_BASE_BOOST + ($titleCoverage * self::TITLE_MATCH_MAX_BOOST)
-            );
-        }
-
-        $fileCoverage = $this->computeNormalizedTokenCoverage($queryTokens, $fileName);
-        if ($fileCoverage > 0.0) {
-            $boost += min(
-                self::FILE_MATCH_MAX_BOOST,
-                self::FILE_MATCH_BASE_BOOST + ($fileCoverage * self::FILE_MATCH_MAX_BOOST)
-            );
-        }
-
-        $metaCoverage = $this->computeNormalizedTokenCoverage($queryTokens, $metaText);
-        if ($metaCoverage > 0.0) {
-            $boost += min(
-                self::META_MATCH_MAX_BOOST,
-                $metaCoverage * self::META_MATCH_MAX_BOOST
-            );
-        }
-
-        if (str_contains($normalizedQuery, ' ')) {
-            if ($documentTitle !== '' && str_contains(' ' . $documentTitle . ' ', ' ' . $normalizedQuery . ' ')) {
-                $boost += self::EXACT_TITLE_PHRASE_BOOST;
-            }
-
-            if ($fileName !== '' && str_contains(' ' . $fileName . ' ', ' ' . $normalizedQuery . ' ')) {
-                $boost += self::EXACT_FILE_PHRASE_BOOST;
-            }
-        }
-
-        return min(self::MAX_TITLE_METADATA_BOOST, $boost);
-    }
-
-    /**
-     * @param array<string,mixed> $row
-     * @param string[] $preferredKeys
-     */
-    private function extractMetadataString(array $row, array $preferredKeys): string
-    {
-        foreach ($preferredKeys as $key) {
-            $topLevel = $row[$key] ?? null;
-            if (is_string($topLevel) && trim($topLevel) !== '') {
-                return trim($topLevel);
-            }
-
-            $metadata = $row['metadata'] ?? null;
-            if (is_array($metadata)) {
-                $value = $metadata[$key] ?? null;
-                if (is_string($value) && trim($value) !== '') {
-                    return trim($value);
-                }
-            }
-        }
-
-        return '';
-    }
-
-    /**
-     * @param string[] $queryTokens
-     */
-    private function computeNormalizedTokenCoverage(array $queryTokens, string $normalizedHaystack): float
-    {
-        if ($queryTokens === [] || $normalizedHaystack === '') {
-            return 0.0;
-        }
-
-        $matched = 0;
-
-        foreach ($queryTokens as $token) {
-            if ($token === '') {
-                continue;
-            }
-
-            if (str_contains(' ' . $normalizedHaystack . ' ', ' ' . $token . ' ')) {
-                $matched++;
-            }
-        }
-
-        if ($matched < 1) {
-            return 0.0;
-        }
-
-        return $matched / max(1, count($queryTokens));
-    }
-
-    /**
-     * @return string[]
-     */
-    private function tokenizeNormalizedQuery(string $normalizedQuery): array
-    {
-        if ($normalizedQuery === '') {
-            return [];
-        }
-
-        $tokens = preg_split('/\s+/u', $normalizedQuery, -1, PREG_SPLIT_NO_EMPTY) ?: [];
-        $tokens = array_values(array_unique(array_filter(
-            $tokens,
-            static fn (string $token): bool => mb_strlen($token, 'UTF-8') >= 2
-        )));
-
-        return $tokens;
-    }
-
-    private function normalizeForMatching(string $value): string
-    {
-        $value = mb_strtolower(trim($value), 'UTF-8');
-        $value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value;
-        $value = preg_replace('/\s+/u', ' ', $value) ?? $value;
-
-        return trim($value);
-    }
-
-    private function maxNullableFloat(?float $a, ?float $b): ?float
-    {
-        if ($a === null && $b === null) {
-            return null;
-        }
-
-        if ($a === null) {
-            return $b;
-        }
-
-        if ($b === null) {
-            return $a;
-        }
-
-        return max($a, $b);
-    }
 }
\ No newline at end of file