add new configs

This commit is contained in:
team 1
2026-04-15 08:46:26 +02:00
parent 8cac77ed31
commit 1815a42035
18 changed files with 508 additions and 309 deletions

View File

@@ -5,6 +5,7 @@ declare(strict_types=1);
namespace App\Knowledge\Retrieval;
use App\Catalog\EntityCatalogService;
use App\Config\NdjsonHybridRetrieverConfig;
use App\Entity\ModelGenerationConfig;
use App\Intent\CatalogIntentLite;
use App\Intent\IntentLite;
@@ -14,35 +15,20 @@ use App\Routing\IntentRouteResolver;
use App\Tag\TagRoutingService;
use App\Vector\VectorSearchClient;
final class NdjsonHybridRetriever implements RetrieverInterface
final readonly class NdjsonHybridRetriever implements RetrieverInterface
{
private const VECTOR_SCORE_THRESHOLD = 0.75;
private const HARD_MAX_CHUNKS = 90;
private const HARD_MAX_VECTORK = 250;
private const LIST_BONUS = 1.25;
private const MAX_CHUNKS_PER_DOC = 2;
private const MIN_CHUNK_DISTANCE = 2.5;
private const RRF_K = 60;
private const THRESHOLD_FLOOR = 0.83;
private const THRESHOLD_CEIL = 0.92;
private const EMPTY_RRF_FALLBACK_TOPN = 1;
public function __construct(
private readonly NdjsonChunkLookup $lookup,
private readonly VectorSearchClient $vectorClient,
private readonly TagRoutingService $tagRouting,
private readonly ModelGenerationConfigRepository $configRepository,
private readonly QueryCleaner $queryCleaner,
private readonly IntentLite $intentLite,
private readonly SalesIntentLite $salesIntentLite,
private readonly CatalogIntentLite $catalogIntent,
private readonly IntentRouteResolver $routeResolver,
private readonly EntityCatalogService $entityCatalogService,
private readonly QueryEnricher $queryEnricher,
private NdjsonChunkLookup $lookup,
private VectorSearchClient $vectorClient,
private TagRoutingService $tagRouting,
private ModelGenerationConfigRepository $configRepository,
private QueryCleaner $queryCleaner,
private IntentLite $intentLite,
private SalesIntentLite $salesIntentLite,
private CatalogIntentLite $catalogIntent,
private IntentRouteResolver $routeResolver,
private EntityCatalogService $entityCatalogService,
private QueryEnricher $queryEnricher,
)
{
}
@@ -206,8 +192,8 @@ final class NdjsonHybridRetriever implements RetrieverInterface
): array
{
$limit = max(1, min($config->getRetrievalMaxChunks(), self::HARD_MAX_CHUNKS));
$vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), self::HARD_MAX_VECTORK));
$limit = max(1, min($config->getRetrievalMaxChunks(), NdjsonHybridRetrieverConfig::HARD_MAX_CHUNKS));
$vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK));
$isListQuery = $this->intentLite->isListQuery($prompt);
@@ -218,7 +204,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return [
'limit' => $limit,
'is_list_query' => $isListQuery,
'threshold' => self::VECTOR_SCORE_THRESHOLD,
'threshold' => NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD,
'ranked_chunk_ids' => [],
'rows' => [],
'rrf_scores' => [],
@@ -270,7 +256,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
if ($rrfScores === [] && $globalHits !== []) {
$rrfScores = $this->fallbackRrfFromHits(
$globalHits,
self::EMPTY_RRF_FALLBACK_TOPN
NdjsonHybridRetrieverConfig::EMPTY_RRF_FALLBACK_TOPN
);
}
@@ -327,7 +313,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
): array
{
$threshold = self::VECTOR_SCORE_THRESHOLD;
$threshold = NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD;
$topK = $vectorTopKBase;
if (
@@ -338,11 +324,11 @@ final class NdjsonHybridRetriever implements RetrieverInterface
}
if ($isListQuery) {
$topK = (int)round($topK * self::LIST_BONUS);
$topK = (int)round($topK * NdjsonHybridRetrieverConfig::LIST_BONUS);
}
$topK = max(1, min($topK, self::HARD_MAX_VECTORK));
$threshold = max(self::THRESHOLD_FLOOR, min(self::THRESHOLD_CEIL, $threshold));
$topK = max(1, min($topK, NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK));
$threshold = max(NdjsonHybridRetrieverConfig::THRESHOLD_FLOOR, min(NdjsonHybridRetrieverConfig::THRESHOLD_CEIL, $threshold));
return [$threshold, $topK];
}
@@ -382,7 +368,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
}
$rank++;
$rrf = 1.0 / (self::RRF_K + $rank);
$rrf = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank);
if ($boost) {
$rrf *= 1.2;
@@ -413,7 +399,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
}
$rank++;
$rrf[(string)$hit['chunk_id']] = 1.0 / (self::RRF_K + $rank);
$rrf[(string)$hit['chunk_id']] = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank);
if ($rank >= $topN) {
break;
@@ -475,13 +461,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface
continue;
}
if (($docCounter[$docId] ?? 0) >= self::MAX_CHUNKS_PER_DOC) {
if (($docCounter[$docId] ?? 0) >= NdjsonHybridRetrieverConfig::MAX_CHUNKS_PER_DOC) {
continue;
}
if (is_int($chunkIndex)) {
foreach ($docChunkPositions[$docId] ?? [] as $prevIdx) {
if (abs($prevIdx - $chunkIndex) < self::MIN_CHUNK_DISTANCE) {
if (abs($prevIdx - $chunkIndex) < NdjsonHybridRetrieverConfig::MIN_CHUNK_DISTANCE) {
continue 2;
}
}