add catalog mode

This commit is contained in:
team2
2026-02-28 13:51:54 +01:00
parent 47a3c9cca2
commit d3294464ea
7 changed files with 484 additions and 201 deletions

View File

@@ -4,7 +4,9 @@ declare(strict_types=1);
namespace App\Knowledge\Retrieval;
use App\Catalog\EntityCatalogService;
use App\Entity\ModelGenerationConfig;
use App\Intent\CatalogIntentLite;
use App\Intent\IntentLite;
use App\Intent\SalesIntentLite;
use App\Knowledge\QueryCleaner;
@@ -32,7 +34,9 @@ final class NdjsonHybridRetriever implements RetrieverInterface
private readonly ModelGenerationConfigRepository $configRepository,
private readonly QueryCleaner $queryCleaner,
private readonly IntentLite $intentLite,
private readonly SalesIntentLite $salesIntentLite
private readonly SalesIntentLite $salesIntentLite,
private readonly CatalogIntentLite $catalogIntent,
private readonly EntityCatalogService $entityCatalogService
)
{
}
@@ -54,6 +58,17 @@ final class NdjsonHybridRetriever implements RetrieverInterface
public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array
{
// 🔵 ENTITY CATALOG EARLY EXIT (jetzt auch im Admin-Test aktiv)
$entityTerm = $this->catalogIntent->detect($prompt);
if ($entityTerm !== null) {
$catalogBlock = $this->entityCatalogService->listByTerm($entityTerm);
if ($catalogBlock !== null) {
return [$catalogBlock];
}
}
$core = $this->runCore($prompt, $config, false);
if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
@@ -111,8 +126,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return [];
}
// 1) Production-like selection: wir selektieren Texte,
// aber in Debug brauchen wir die ChunkIds dazu.
$selectedChunkIds = $core['is_list_query']
? $this->selectChunkIdsListMode($core['ranked_chunk_ids'], $core['rows'], $core['limit'])
: $this->selectChunkIdsSalesMode($core['ranked_chunk_ids'], $core['rows'], $core['limit']);
@@ -121,7 +134,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return [];
}
// 2) Ausgabe inklusive Scores
$out = [];
$rank = 0;
@@ -179,7 +191,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$cleanQuery = $prompt;
}
// Intent-based adjustments (identisch zur Produktionslogik)
$threshold = self::VECTOR_SCORE_THRESHOLD;
$topK = $vectorTopKBase;
@@ -216,7 +227,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$topK = max(1, min($topK, self::HARD_MAX_VECTORK));
// Tag routing (identisch)
$candidateDocIds = $this->tagRouting->route($cleanQuery);
$candidateSet = null;
@@ -224,7 +234,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$candidateSet = array_fill_keys($candidateDocIds, true);
}
// Dual search (identisch)
$globalHits = $this->vectorClient->search($cleanQuery, $topK);
$scopedHits = [];
@@ -249,7 +258,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$rrfScores = [];
$rawScores = [];
// RRF (identisch) + optional raw capture
$this->applyRrfWithOptionalRaw($globalHits, $rrfScores, $rawScores, $threshold, false, $withScores);
$this->applyRrfWithOptionalRaw(
$scopedHits,
@@ -292,13 +300,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
];
}
/**
* Gleiche Logik wie applyRrf(), aber optional mit raw-score capture.
*
* @param array<int, array{chunk_id:string, score:float}> $hits
* @param array<string,float> $rrfScores
* @param array<string,float> $rawScores
*/
private function applyRrfWithOptionalRaw(
array $hits,
array &$rrfScores,
@@ -322,7 +323,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$chunkId = (string)$hit['chunk_id'];
if ($captureRaw) {
// wenn global+scoped vorkommt: bestes raw behalten
if (!isset($rawScores[$chunkId]) || $raw > $rawScores[$chunkId]) {
$rawScores[$chunkId] = $raw;
}
@@ -343,15 +343,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
}
}
// =========================================================
// DEBUG SELECTION HELPERS (identisch zu Produktionsregeln)
// =========================================================
/**
* List-Mode nutzt exakt collectTexts() Regeln, aber gibt ChunkIds zurück.
*
* @return string[]
*/
private function selectChunkIdsListMode(array $chunkIds, array $rows, int $limit): array
{
$seen = [];
@@ -384,11 +375,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return $out;
}
/**
* Normal-Mode nutzt exakt collectSalesOptimized() Regeln, aber gibt ChunkIds zurück.
*
* @return string[]
*/
private function selectChunkIdsSalesMode(array $chunkIds, array $rows, int $limit): array
{
$out = [];
@@ -437,10 +423,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return $out;
}
// =========================================================
// ORIGINAL METHODS (UNVERÄNDERT)
// =========================================================
private function applyRrf(array $hits, array &$rrfScores, float $threshold, bool $boost = false): void
{
$rank = 0;