This commit is contained in:
team 1
2026-04-24 18:54:25 +02:00
parent 372a6797fa
commit c439fb99d6
12 changed files with 1126 additions and 336 deletions

View File

@@ -45,6 +45,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
private IntentRouteResolver $routeResolver,
private EntityCatalogService $entityCatalogService,
private QueryEnricher $queryEnricher,
private NdjsonHybridRetrieverConfig $retrieverConfig,
)
{
}
@@ -211,7 +212,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
if ($exactDocumentMatch !== null) {
$selectedChunkIds = $this->selectExactDocumentChunkIds(
$exactDocumentMatch['rows'],
max(1, min($config->getRetrievalMaxChunks(), NdjsonHybridRetrieverConfig::HARD_MAX_CHUNKS)),
max(1, min($config->getRetrievalMaxChunks(), $this->retrieverConfig->hardMaxChunks())),
$prompt
);
@@ -310,8 +311,8 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
string $salesIntent
): array
{
$limit = max(1, min($config->getRetrievalMaxChunks(), NdjsonHybridRetrieverConfig::HARD_MAX_CHUNKS));
$vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK));
$limit = max(1, min($config->getRetrievalMaxChunks(), $this->retrieverConfig->hardMaxChunks()));
$vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), $this->retrieverConfig->hardMaxVectorK()));
$isListQuery = $this->intentLite->isListQuery($prompt);
@@ -322,7 +323,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
return [
'limit' => $limit,
'is_list_query' => $isListQuery,
'threshold' => NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD,
'threshold' => $this->retrieverConfig->vectorScoreThreshold(),
'ranked_chunk_ids' => [],
'rows' => [],
'rrf_scores' => [],
@@ -501,9 +502,9 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
*/
private function computeKeywordTopK(int $vectorTopK): int
{
$topK = (int) ceil($vectorTopK * NdjsonHybridRetrieverConfig::KEYWORD_TOPK_MULTIPLIER);
$topK = (int) ceil($vectorTopK * $this->retrieverConfig->keywordTopKMultiplier());
return max(1, min($topK, NdjsonHybridRetrieverConfig::HARD_MAX_KEYWORDK));
return max(1, min($topK, $this->retrieverConfig->hardMaxKeywordK()));
}
/**
@@ -520,7 +521,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
int $vectorTopKBase
): array
{
$threshold = NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD;
$threshold = $this->retrieverConfig->vectorScoreThreshold();
$topK = $vectorTopKBase;
if (
@@ -531,13 +532,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
}
if ($isListQuery) {
$topK = (int)round($topK * NdjsonHybridRetrieverConfig::LIST_BONUS);
$topK = (int)round($topK * $this->retrieverConfig->listBonus());
}
$topK = max(1, min($topK, NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK));
$topK = max(1, min($topK, $this->retrieverConfig->hardMaxVectorK()));
$threshold = max(
NdjsonHybridRetrieverConfig::THRESHOLD_FLOOR,
min(NdjsonHybridRetrieverConfig::THRESHOLD_CEIL, $threshold)
$this->retrieverConfig->thresholdFloor(),
min($this->retrieverConfig->thresholdCeil(), $threshold)
);
return [$threshold, $topK];
@@ -587,16 +588,16 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
}
$rank++;
$rrf = (1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank)) * $weight;
$rrf = (1.0 / ($this->retrieverConfig->rrfK() + $rank)) * $weight;
$rrfScores[$chunkId] = ($rrfScores[$chunkId] ?? 0.0) + $rrf;
}
};
$apply($globalHits, $vectorThreshold, 1.0);
$apply($scopedHits, $vectorThreshold, $boostScopedVector ? NdjsonHybridRetrieverConfig::SCOPED_VECTOR_RRF_WEIGHT : 1.0);
$apply($keywordHits, NdjsonHybridRetrieverConfig::KEYWORD_SCORE_THRESHOLD, NdjsonHybridRetrieverConfig::KEYWORD_RRF_WEIGHT);
$apply($scopedKeywordHits, NdjsonHybridRetrieverConfig::KEYWORD_SCORE_THRESHOLD, $boostScopedKeyword ? NdjsonHybridRetrieverConfig::SCOPED_KEYWORD_RRF_WEIGHT : NdjsonHybridRetrieverConfig::KEYWORD_RRF_WEIGHT);
$apply($scopedHits, $vectorThreshold, $boostScopedVector ? $this->retrieverConfig->scopedVectorRrfWeight() : 1.0);
$apply($keywordHits, $this->retrieverConfig->keywordScoreThreshold(), $this->retrieverConfig->keywordRrfWeight());
$apply($scopedKeywordHits, $this->retrieverConfig->keywordScoreThreshold(), $boostScopedKeyword ? $this->retrieverConfig->scopedKeywordRrfWeight() : $this->retrieverConfig->keywordRrfWeight());
return [
'rrf_scores' => $rrfScores,
@@ -621,9 +622,9 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
}
$rank++;
$rrf[(string)$hit['chunk_id']] = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank);
$rrf[(string)$hit['chunk_id']] = 1.0 / ($this->retrieverConfig->rrfK() + $rank);
if ($rank >= NdjsonHybridRetrieverConfig::EMPTY_RRF_FALLBACK_TOPN) {
if ($rank >= $this->retrieverConfig->emptyRrfFallbackTopN()) {
break;
}
}
@@ -649,7 +650,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
private function selectExactDocumentChunkIds(array $rows, int $limit, string $prompt): array
{
$orderedRows = $this->sortRowsByChunkIndex($rows);
$max = min($limit, NdjsonHybridRetrieverConfig::EXACT_DOCUMENT_MAX_CHUNKS);
$max = min($limit, $this->retrieverConfig->exactDocumentMaxChunks());
if ($orderedRows === [] || $max <= 0) {
return [];