optimize catalog semantic match sby tags

This commit is contained in:
team2
2026-02-28 16:10:47 +01:00
parent d3294464ea
commit 0d3f6e21d6
13 changed files with 329 additions and 151 deletions

View File

@@ -4,7 +4,6 @@ declare(strict_types=1);
namespace App\Knowledge\Retrieval;
use App\Catalog\EntityCatalogService;
use App\Entity\ModelGenerationConfig;
use App\Intent\CatalogIntentLite;
use App\Intent\IntentLite;
@@ -13,6 +12,9 @@ use App\Knowledge\QueryCleaner;
use App\Repository\ModelGenerationConfigRepository;
use App\Tag\TagRoutingService;
use App\Vector\VectorSearchClient;
use App\Catalog\EntityCatalogService;
use App\Knowledge\Retrieval\NdjsonChunkLookup;
use App\Knowledge\Retrieval\RetrieverInterface;
final class NdjsonHybridRetriever implements RetrieverInterface
{
@@ -146,15 +148,15 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$text = trim((string)($core['rows'][$chunkId]['text'] ?? ''));
$out[] = [
'rank' => $rank,
'chunk_id' => $chunkId,
'document_id' => isset($core['rows'][$chunkId]['document_id']) ? (string)$core['rows'][$chunkId]['document_id'] : null,
'raw_score' => isset($core['raw_scores'][$chunkId]) ? (float)$core['raw_scores'][$chunkId] : null,
'rrf_score' => isset($core['rrf_scores'][$chunkId]) ? (float)$core['rrf_scores'][$chunkId] : null,
'threshold' => (float)$core['threshold'],
'intent' => (string)$core['sales_intent'],
'is_list_query'=> (bool)$core['is_list_query'],
'text' => $text,
'rank' => $rank,
'chunk_id' => $chunkId,
'document_id' => isset($core['rows'][$chunkId]['document_id']) ? (string)$core['rows'][$chunkId]['document_id'] : null,
'raw_score' => isset($core['raw_scores'][$chunkId]) ? (float)$core['raw_scores'][$chunkId] : null,
'rrf_score' => isset($core['rrf_scores'][$chunkId]) ? (float)$core['rrf_scores'][$chunkId] : null,
'threshold' => (float)$core['threshold'],
'intent' => (string)$core['sales_intent'],
'is_list_query' => (bool)$core['is_list_query'],
'text' => $text,
];
}
@@ -305,9 +307,10 @@ final class NdjsonHybridRetriever implements RetrieverInterface
array &$rrfScores,
array &$rawScores,
float $threshold,
bool $boost = false,
bool $captureRaw = false
): void {
bool $boost = false,
bool $captureRaw = false
): void
{
$rank = 0;
foreach ($hits as $hit) {