add catalog mode
This commit is contained in:
@@ -4,7 +4,9 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Knowledge\Retrieval;
|
||||
|
||||
use App\Catalog\EntityCatalogService;
|
||||
use App\Entity\ModelGenerationConfig;
|
||||
use App\Intent\CatalogIntentLite;
|
||||
use App\Intent\IntentLite;
|
||||
use App\Intent\SalesIntentLite;
|
||||
use App\Knowledge\QueryCleaner;
|
||||
@@ -32,7 +34,9 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
private readonly ModelGenerationConfigRepository $configRepository,
|
||||
private readonly QueryCleaner $queryCleaner,
|
||||
private readonly IntentLite $intentLite,
|
||||
private readonly SalesIntentLite $salesIntentLite
|
||||
private readonly SalesIntentLite $salesIntentLite,
|
||||
private readonly CatalogIntentLite $catalogIntent,
|
||||
private readonly EntityCatalogService $entityCatalogService
|
||||
)
|
||||
{
|
||||
}
|
||||
@@ -54,6 +58,17 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
|
||||
public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array
|
||||
{
|
||||
// 🔵 ENTITY CATALOG EARLY EXIT (jetzt auch im Admin-Test aktiv)
|
||||
$entityTerm = $this->catalogIntent->detect($prompt);
|
||||
|
||||
if ($entityTerm !== null) {
|
||||
$catalogBlock = $this->entityCatalogService->listByTerm($entityTerm);
|
||||
|
||||
if ($catalogBlock !== null) {
|
||||
return [$catalogBlock];
|
||||
}
|
||||
}
|
||||
|
||||
$core = $this->runCore($prompt, $config, false);
|
||||
|
||||
if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
|
||||
@@ -111,8 +126,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return [];
|
||||
}
|
||||
|
||||
// 1) Production-like selection: wir selektieren Texte,
|
||||
// aber in Debug brauchen wir die ChunkIds dazu.
|
||||
$selectedChunkIds = $core['is_list_query']
|
||||
? $this->selectChunkIdsListMode($core['ranked_chunk_ids'], $core['rows'], $core['limit'])
|
||||
: $this->selectChunkIdsSalesMode($core['ranked_chunk_ids'], $core['rows'], $core['limit']);
|
||||
@@ -121,7 +134,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return [];
|
||||
}
|
||||
|
||||
// 2) Ausgabe inklusive Scores
|
||||
$out = [];
|
||||
$rank = 0;
|
||||
|
||||
@@ -179,7 +191,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$cleanQuery = $prompt;
|
||||
}
|
||||
|
||||
// Intent-based adjustments (identisch zur Produktionslogik)
|
||||
$threshold = self::VECTOR_SCORE_THRESHOLD;
|
||||
$topK = $vectorTopKBase;
|
||||
|
||||
@@ -216,7 +227,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
|
||||
$topK = max(1, min($topK, self::HARD_MAX_VECTORK));
|
||||
|
||||
// Tag routing (identisch)
|
||||
$candidateDocIds = $this->tagRouting->route($cleanQuery);
|
||||
$candidateSet = null;
|
||||
|
||||
@@ -224,7 +234,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$candidateSet = array_fill_keys($candidateDocIds, true);
|
||||
}
|
||||
|
||||
// Dual search (identisch)
|
||||
$globalHits = $this->vectorClient->search($cleanQuery, $topK);
|
||||
|
||||
$scopedHits = [];
|
||||
@@ -249,7 +258,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$rrfScores = [];
|
||||
$rawScores = [];
|
||||
|
||||
// RRF (identisch) + optional raw capture
|
||||
$this->applyRrfWithOptionalRaw($globalHits, $rrfScores, $rawScores, $threshold, false, $withScores);
|
||||
$this->applyRrfWithOptionalRaw(
|
||||
$scopedHits,
|
||||
@@ -292,13 +300,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Gleiche Logik wie applyRrf(), aber optional mit raw-score capture.
|
||||
*
|
||||
* @param array<int, array{chunk_id:string, score:float}> $hits
|
||||
* @param array<string,float> $rrfScores
|
||||
* @param array<string,float> $rawScores
|
||||
*/
|
||||
private function applyRrfWithOptionalRaw(
|
||||
array $hits,
|
||||
array &$rrfScores,
|
||||
@@ -322,7 +323,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$chunkId = (string)$hit['chunk_id'];
|
||||
|
||||
if ($captureRaw) {
|
||||
// wenn global+scoped vorkommt: bestes raw behalten
|
||||
if (!isset($rawScores[$chunkId]) || $raw > $rawScores[$chunkId]) {
|
||||
$rawScores[$chunkId] = $raw;
|
||||
}
|
||||
@@ -343,15 +343,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// DEBUG SELECTION HELPERS (identisch zu Produktionsregeln)
|
||||
// =========================================================
|
||||
|
||||
/**
|
||||
* List-Mode nutzt exakt collectTexts() Regeln, aber gibt ChunkIds zurück.
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
private function selectChunkIdsListMode(array $chunkIds, array $rows, int $limit): array
|
||||
{
|
||||
$seen = [];
|
||||
@@ -384,11 +375,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normal-Mode nutzt exakt collectSalesOptimized() Regeln, aber gibt ChunkIds zurück.
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
private function selectChunkIdsSalesMode(array $chunkIds, array $rows, int $limit): array
|
||||
{
|
||||
$out = [];
|
||||
@@ -437,10 +423,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return $out;
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// ORIGINAL METHODS (UNVERÄNDERT)
|
||||
// =========================================================
|
||||
|
||||
private function applyRrf(array $hits, array &$rrfScores, float $threshold, bool $boost = false): void
|
||||
{
|
||||
$rank = 0;
|
||||
|
||||
Reference in New Issue
Block a user