optimize system and cleanup

This commit is contained in:
team2
2026-03-02 21:27:20 +01:00
parent 6b8d1b1936
commit e7047cd885
10 changed files with 459 additions and 346 deletions

View File

@@ -57,6 +57,10 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return [$result['catalogBlock']];
}
if ($result['selectedChunkIds'] === []) {
return [];
}
return $this->collectTextsFromIds(
$result['selectedChunkIds'],
$result['rows']
@@ -84,10 +88,15 @@ final class NdjsonHybridRetriever implements RetrieverInterface
]];
}
if ($result['selectedChunkIds'] === []) {
return [];
}
$out = [];
$rank = 0;
foreach ($result['selectedChunkIds'] as $chunkId) {
if (!isset($result['rows'][$chunkId])) {
continue;
}
@@ -127,6 +136,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$route = $this->routeResolver->resolve($salesIntent, $entityLabel);
if ($route === IntentRouteResolver::ROUTE_CATALOG_LIST && $entityLabel !== null) {
$catalogBlock = $this->entityCatalogService->listByTerm($entityLabel);
if ($catalogBlock !== null) {
@@ -147,6 +157,21 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$core = $this->runCore($prompt, $config, $withScores, $salesIntent);
if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
return [
'route' => $route,
'entityLabel' => $entityLabel,
'intent' => $salesIntent,
'isListQuery' => $core['is_list_query'],
'selectedChunkIds' => [],
'rows' => [],
'rrfScores' => [],
'rawScores' => [],
'threshold' => $core['threshold'],
'catalogBlock' => null,
];
}
$selectedChunkIds = $core['is_list_query']
? $this->selectListChunkIds($core['ranked_chunk_ids'], $core['rows'], $core['limit'])
: $this->selectSalesChunkIds($core['ranked_chunk_ids'], $core['rows'], $core['limit']);
@@ -182,8 +207,17 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$isListQuery = $this->intentLite->isListQuery($prompt);
$cleanQuery = $this->queryCleaner->clean($prompt);
if ($cleanQuery === '') {
$cleanQuery = $prompt;
return [
'limit' => $limit,
'is_list_query' => $isListQuery,
'threshold' => self::VECTOR_SCORE_THRESHOLD,
'ranked_chunk_ids' => [],
'rows' => [],
'rrf_scores' => [],
'raw_scores' => [],
];
}
[$threshold, $topK] = $this->computeThresholdAndTopK(
@@ -200,10 +234,22 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$globalHits = $this->vectorClient->search($cleanQuery, $topK);
$scopedHits = [];
if (!empty($candidateDocIds)) {
if ($candidateDocIds !== []) {
$scopedHits = $this->vectorClient->searchScoped($cleanQuery, $topK, $candidateDocIds);
}
if ($globalHits === [] && $scopedHits === []) {
return [
'limit' => $limit,
'is_list_query' => $isListQuery,
'threshold' => $threshold,
'ranked_chunk_ids' => [],
'rows' => [],
'rrf_scores' => [],
'raw_scores' => [],
];
}
$fused = $this->fuseHits(
$globalHits,
$scopedHits,
@@ -216,11 +262,25 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$rawScores = $fused['raw_scores'];
if ($rrfScores === [] && $globalHits !== []) {
$rrfScores = $this->fallbackRrfFromHits($globalHits, self::EMPTY_RRF_FALLBACK_TOPN);
$rrfScores = $this->fallbackRrfFromHits(
$globalHits,
self::EMPTY_RRF_FALLBACK_TOPN
);
}
if ($rrfScores === []) {
return [
'limit' => $limit,
'is_list_query' => $isListQuery,
'threshold' => $threshold,
'ranked_chunk_ids' => [],
'rows' => [],
'rrf_scores' => [],
'raw_scores' => $rawScores,
];
}
arsort($rrfScores);
$rankedChunkIds = array_keys($rrfScores);
$rows = $this->lookup->findByChunkIds($rankedChunkIds);
@@ -254,13 +314,19 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return (string)($data['intent'] ?? SalesIntentLite::DISCOVERY);
}
private function computeThresholdAndTopK(string $salesIntent, bool $isListQuery, int $vectorTopKBase): array
{
private function computeThresholdAndTopK(
string $salesIntent,
bool $isListQuery,
int $vectorTopKBase
): array {
$threshold = self::VECTOR_SCORE_THRESHOLD;
$topK = $vectorTopKBase;
if ($salesIntent === SalesIntentLite::OBJECTION ||
$salesIntent === SalesIntentLite::PRICING) {
if (
$salesIntent === SalesIntentLite::OBJECTION ||
$salesIntent === SalesIntentLite::PRICING
) {
$threshold += 0.02;
}
@@ -333,6 +399,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$rank = 0;
foreach ($hits as $hit) {
if (!isset($hit['chunk_id'])) {
continue;
}
@@ -354,6 +421,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$out = [];
foreach ($chunkIds as $id) {
if (!isset($rows[$id]['text'])) {
continue;
}
@@ -433,11 +501,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$out = [];
foreach ($chunkIds as $id) {
if (!isset($rows[$id]['text'])) {
continue;
}
$text = trim((string)$rows[$id]['text']);
if ($text !== '') {
$out[] = $text;
}