fix 3
This commit is contained in:
@@ -45,6 +45,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
private IntentRouteResolver $routeResolver,
|
||||
private EntityCatalogService $entityCatalogService,
|
||||
private QueryEnricher $queryEnricher,
|
||||
private NdjsonHybridRetrieverConfig $retrieverConfig,
|
||||
)
|
||||
{
|
||||
}
|
||||
@@ -211,7 +212,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
if ($exactDocumentMatch !== null) {
|
||||
$selectedChunkIds = $this->selectExactDocumentChunkIds(
|
||||
$exactDocumentMatch['rows'],
|
||||
max(1, min($config->getRetrievalMaxChunks(), NdjsonHybridRetrieverConfig::HARD_MAX_CHUNKS)),
|
||||
max(1, min($config->getRetrievalMaxChunks(), $this->retrieverConfig->hardMaxChunks())),
|
||||
$prompt
|
||||
);
|
||||
|
||||
@@ -310,8 +311,8 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
string $salesIntent
|
||||
): array
|
||||
{
|
||||
$limit = max(1, min($config->getRetrievalMaxChunks(), NdjsonHybridRetrieverConfig::HARD_MAX_CHUNKS));
|
||||
$vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK));
|
||||
$limit = max(1, min($config->getRetrievalMaxChunks(), $this->retrieverConfig->hardMaxChunks()));
|
||||
$vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), $this->retrieverConfig->hardMaxVectorK()));
|
||||
|
||||
$isListQuery = $this->intentLite->isListQuery($prompt);
|
||||
|
||||
@@ -322,7 +323,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return [
|
||||
'limit' => $limit,
|
||||
'is_list_query' => $isListQuery,
|
||||
'threshold' => NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD,
|
||||
'threshold' => $this->retrieverConfig->vectorScoreThreshold(),
|
||||
'ranked_chunk_ids' => [],
|
||||
'rows' => [],
|
||||
'rrf_scores' => [],
|
||||
@@ -501,9 +502,9 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
*/
|
||||
private function computeKeywordTopK(int $vectorTopK): int
|
||||
{
|
||||
$topK = (int) ceil($vectorTopK * NdjsonHybridRetrieverConfig::KEYWORD_TOPK_MULTIPLIER);
|
||||
$topK = (int) ceil($vectorTopK * $this->retrieverConfig->keywordTopKMultiplier());
|
||||
|
||||
return max(1, min($topK, NdjsonHybridRetrieverConfig::HARD_MAX_KEYWORDK));
|
||||
return max(1, min($topK, $this->retrieverConfig->hardMaxKeywordK()));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -520,7 +521,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
int $vectorTopKBase
|
||||
): array
|
||||
{
|
||||
$threshold = NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD;
|
||||
$threshold = $this->retrieverConfig->vectorScoreThreshold();
|
||||
$topK = $vectorTopKBase;
|
||||
|
||||
if (
|
||||
@@ -531,13 +532,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
}
|
||||
|
||||
if ($isListQuery) {
|
||||
$topK = (int)round($topK * NdjsonHybridRetrieverConfig::LIST_BONUS);
|
||||
$topK = (int)round($topK * $this->retrieverConfig->listBonus());
|
||||
}
|
||||
|
||||
$topK = max(1, min($topK, NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK));
|
||||
$topK = max(1, min($topK, $this->retrieverConfig->hardMaxVectorK()));
|
||||
$threshold = max(
|
||||
NdjsonHybridRetrieverConfig::THRESHOLD_FLOOR,
|
||||
min(NdjsonHybridRetrieverConfig::THRESHOLD_CEIL, $threshold)
|
||||
$this->retrieverConfig->thresholdFloor(),
|
||||
min($this->retrieverConfig->thresholdCeil(), $threshold)
|
||||
);
|
||||
|
||||
return [$threshold, $topK];
|
||||
@@ -587,16 +588,16 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
}
|
||||
|
||||
$rank++;
|
||||
$rrf = (1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank)) * $weight;
|
||||
$rrf = (1.0 / ($this->retrieverConfig->rrfK() + $rank)) * $weight;
|
||||
|
||||
$rrfScores[$chunkId] = ($rrfScores[$chunkId] ?? 0.0) + $rrf;
|
||||
}
|
||||
};
|
||||
|
||||
$apply($globalHits, $vectorThreshold, 1.0);
|
||||
$apply($scopedHits, $vectorThreshold, $boostScopedVector ? NdjsonHybridRetrieverConfig::SCOPED_VECTOR_RRF_WEIGHT : 1.0);
|
||||
$apply($keywordHits, NdjsonHybridRetrieverConfig::KEYWORD_SCORE_THRESHOLD, NdjsonHybridRetrieverConfig::KEYWORD_RRF_WEIGHT);
|
||||
$apply($scopedKeywordHits, NdjsonHybridRetrieverConfig::KEYWORD_SCORE_THRESHOLD, $boostScopedKeyword ? NdjsonHybridRetrieverConfig::SCOPED_KEYWORD_RRF_WEIGHT : NdjsonHybridRetrieverConfig::KEYWORD_RRF_WEIGHT);
|
||||
$apply($scopedHits, $vectorThreshold, $boostScopedVector ? $this->retrieverConfig->scopedVectorRrfWeight() : 1.0);
|
||||
$apply($keywordHits, $this->retrieverConfig->keywordScoreThreshold(), $this->retrieverConfig->keywordRrfWeight());
|
||||
$apply($scopedKeywordHits, $this->retrieverConfig->keywordScoreThreshold(), $boostScopedKeyword ? $this->retrieverConfig->scopedKeywordRrfWeight() : $this->retrieverConfig->keywordRrfWeight());
|
||||
|
||||
return [
|
||||
'rrf_scores' => $rrfScores,
|
||||
@@ -621,9 +622,9 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
}
|
||||
|
||||
$rank++;
|
||||
$rrf[(string)$hit['chunk_id']] = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank);
|
||||
$rrf[(string)$hit['chunk_id']] = 1.0 / ($this->retrieverConfig->rrfK() + $rank);
|
||||
|
||||
if ($rank >= NdjsonHybridRetrieverConfig::EMPTY_RRF_FALLBACK_TOPN) {
|
||||
if ($rank >= $this->retrieverConfig->emptyRrfFallbackTopN()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -649,7 +650,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
private function selectExactDocumentChunkIds(array $rows, int $limit, string $prompt): array
|
||||
{
|
||||
$orderedRows = $this->sortRowsByChunkIndex($rows);
|
||||
$max = min($limit, NdjsonHybridRetrieverConfig::EXACT_DOCUMENT_MAX_CHUNKS);
|
||||
$max = min($limit, $this->retrieverConfig->exactDocumentMaxChunks());
|
||||
|
||||
if ($orderedRows === [] || $max <= 0) {
|
||||
return [];
|
||||
|
||||
Reference in New Issue
Block a user