optimize system and cleanup
This commit is contained in:
@@ -57,6 +57,10 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return [$result['catalogBlock']];
|
||||
}
|
||||
|
||||
if ($result['selectedChunkIds'] === []) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return $this->collectTextsFromIds(
|
||||
$result['selectedChunkIds'],
|
||||
$result['rows']
|
||||
@@ -84,10 +88,15 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
]];
|
||||
}
|
||||
|
||||
if ($result['selectedChunkIds'] === []) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$out = [];
|
||||
$rank = 0;
|
||||
|
||||
foreach ($result['selectedChunkIds'] as $chunkId) {
|
||||
|
||||
if (!isset($result['rows'][$chunkId])) {
|
||||
continue;
|
||||
}
|
||||
@@ -127,6 +136,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$route = $this->routeResolver->resolve($salesIntent, $entityLabel);
|
||||
|
||||
if ($route === IntentRouteResolver::ROUTE_CATALOG_LIST && $entityLabel !== null) {
|
||||
|
||||
$catalogBlock = $this->entityCatalogService->listByTerm($entityLabel);
|
||||
|
||||
if ($catalogBlock !== null) {
|
||||
@@ -147,6 +157,21 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
|
||||
$core = $this->runCore($prompt, $config, $withScores, $salesIntent);
|
||||
|
||||
if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
|
||||
return [
|
||||
'route' => $route,
|
||||
'entityLabel' => $entityLabel,
|
||||
'intent' => $salesIntent,
|
||||
'isListQuery' => $core['is_list_query'],
|
||||
'selectedChunkIds' => [],
|
||||
'rows' => [],
|
||||
'rrfScores' => [],
|
||||
'rawScores' => [],
|
||||
'threshold' => $core['threshold'],
|
||||
'catalogBlock' => null,
|
||||
];
|
||||
}
|
||||
|
||||
$selectedChunkIds = $core['is_list_query']
|
||||
? $this->selectListChunkIds($core['ranked_chunk_ids'], $core['rows'], $core['limit'])
|
||||
: $this->selectSalesChunkIds($core['ranked_chunk_ids'], $core['rows'], $core['limit']);
|
||||
@@ -182,8 +207,17 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$isListQuery = $this->intentLite->isListQuery($prompt);
|
||||
|
||||
$cleanQuery = $this->queryCleaner->clean($prompt);
|
||||
|
||||
if ($cleanQuery === '') {
|
||||
$cleanQuery = $prompt;
|
||||
return [
|
||||
'limit' => $limit,
|
||||
'is_list_query' => $isListQuery,
|
||||
'threshold' => self::VECTOR_SCORE_THRESHOLD,
|
||||
'ranked_chunk_ids' => [],
|
||||
'rows' => [],
|
||||
'rrf_scores' => [],
|
||||
'raw_scores' => [],
|
||||
];
|
||||
}
|
||||
|
||||
[$threshold, $topK] = $this->computeThresholdAndTopK(
|
||||
@@ -200,10 +234,22 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$globalHits = $this->vectorClient->search($cleanQuery, $topK);
|
||||
|
||||
$scopedHits = [];
|
||||
if (!empty($candidateDocIds)) {
|
||||
if ($candidateDocIds !== []) {
|
||||
$scopedHits = $this->vectorClient->searchScoped($cleanQuery, $topK, $candidateDocIds);
|
||||
}
|
||||
|
||||
if ($globalHits === [] && $scopedHits === []) {
|
||||
return [
|
||||
'limit' => $limit,
|
||||
'is_list_query' => $isListQuery,
|
||||
'threshold' => $threshold,
|
||||
'ranked_chunk_ids' => [],
|
||||
'rows' => [],
|
||||
'rrf_scores' => [],
|
||||
'raw_scores' => [],
|
||||
];
|
||||
}
|
||||
|
||||
$fused = $this->fuseHits(
|
||||
$globalHits,
|
||||
$scopedHits,
|
||||
@@ -216,11 +262,25 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$rawScores = $fused['raw_scores'];
|
||||
|
||||
if ($rrfScores === [] && $globalHits !== []) {
|
||||
$rrfScores = $this->fallbackRrfFromHits($globalHits, self::EMPTY_RRF_FALLBACK_TOPN);
|
||||
$rrfScores = $this->fallbackRrfFromHits(
|
||||
$globalHits,
|
||||
self::EMPTY_RRF_FALLBACK_TOPN
|
||||
);
|
||||
}
|
||||
|
||||
if ($rrfScores === []) {
|
||||
return [
|
||||
'limit' => $limit,
|
||||
'is_list_query' => $isListQuery,
|
||||
'threshold' => $threshold,
|
||||
'ranked_chunk_ids' => [],
|
||||
'rows' => [],
|
||||
'rrf_scores' => [],
|
||||
'raw_scores' => $rawScores,
|
||||
];
|
||||
}
|
||||
|
||||
arsort($rrfScores);
|
||||
|
||||
$rankedChunkIds = array_keys($rrfScores);
|
||||
$rows = $this->lookup->findByChunkIds($rankedChunkIds);
|
||||
|
||||
@@ -254,13 +314,19 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return (string)($data['intent'] ?? SalesIntentLite::DISCOVERY);
|
||||
}
|
||||
|
||||
private function computeThresholdAndTopK(string $salesIntent, bool $isListQuery, int $vectorTopKBase): array
|
||||
{
|
||||
private function computeThresholdAndTopK(
|
||||
string $salesIntent,
|
||||
bool $isListQuery,
|
||||
int $vectorTopKBase
|
||||
): array {
|
||||
|
||||
$threshold = self::VECTOR_SCORE_THRESHOLD;
|
||||
$topK = $vectorTopKBase;
|
||||
|
||||
if ($salesIntent === SalesIntentLite::OBJECTION ||
|
||||
$salesIntent === SalesIntentLite::PRICING) {
|
||||
if (
|
||||
$salesIntent === SalesIntentLite::OBJECTION ||
|
||||
$salesIntent === SalesIntentLite::PRICING
|
||||
) {
|
||||
$threshold += 0.02;
|
||||
}
|
||||
|
||||
@@ -333,6 +399,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$rank = 0;
|
||||
|
||||
foreach ($hits as $hit) {
|
||||
|
||||
if (!isset($hit['chunk_id'])) {
|
||||
continue;
|
||||
}
|
||||
@@ -354,6 +421,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$out = [];
|
||||
|
||||
foreach ($chunkIds as $id) {
|
||||
|
||||
if (!isset($rows[$id]['text'])) {
|
||||
continue;
|
||||
}
|
||||
@@ -433,11 +501,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$out = [];
|
||||
|
||||
foreach ($chunkIds as $id) {
|
||||
|
||||
if (!isset($rows[$id]['text'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$text = trim((string)$rows[$id]['text']);
|
||||
|
||||
if ($text !== '') {
|
||||
$out[] = $text;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user