diff --git a/src/Intent/CatalogIntentLite.php b/src/Intent/CatalogIntentLite.php index 2219f33..196a6df 100644 --- a/src/Intent/CatalogIntentLite.php +++ b/src/Intent/CatalogIntentLite.php @@ -7,51 +7,52 @@ namespace App\Intent; use App\Tag\TagVectorSearchClient; use App\Tag\TagTypes; +/** + * CatalogIntentLite + * + * Reiner Entity-Detector. + * + * Verantwortlich nur für: + * - Vector-Tag-Erkennung + * - Score-Gate + * - Ambiguity-Check + * - Sicherstellen, dass TagType = catalog_entity + * + * KEIN: + * - Listen-Signal + * - SalesIntent + * - Routing + */ final class CatalogIntentLite { - private const LIST_SIGNALS = [ - 'liste', - 'auflisten', - 'aufzaehl', - 'aufzähl', - 'übersicht', - 'uebersicht', - 'welche gibt es', - 'welche sind', - 'zeig mir alle', - 'zeige mir alle', - 'alle', - ]; - - // Realistischer Gate-Wert + /** + * Minimaler Similarity-Score. + * Verhindert Rauschen. + */ private const MIN_SCORE = 0.50; - // Ambiguity darf nicht zu aggressiv sein + /** + * Differenz zwischen Top1 und Top2, + * damit kein unsicherer Treffer akzeptiert wird. + */ private const AMBIGUITY_DELTA = 0.01; public function __construct( - private readonly SalesIntentLite $salesIntentLite, private readonly TagVectorSearchClient $tagVectorClient, ) {} + /** + * Gibt das canonical Label der erkannten catalog_entity zurück + * oder null, wenn kein sauberer Treffer. + */ public function detect(string $prompt): ?string { - $normalizedPrompt = mb_strtolower($prompt); - - // 1) Listen-Signal prüfen - if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) { + $prompt = trim($prompt); + if ($prompt === '') { return null; } - // 2) Nur DISCOVERY zulassen - $sales = $this->salesIntentLite->detect($prompt); - $intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY); - - if ($intent !== SalesIntentLite::DISCOVERY) { - return null; - } - - // 3) Vector-Search + // 1) Vector-Suche $hits = $this->tagVectorClient->search($prompt, 3); if ($hits === []) { @@ -61,23 +62,26 @@ final class CatalogIntentLite $best = $hits[0]; $bestScore = (float)($best['score'] ?? 0.0); + // 2) Score-Gate if ($bestScore < self::MIN_SCORE) { return null; } - // Ambiguity-Prüfung + // 3) Ambiguity-Check if (isset($hits[1])) { $secondScore = (float)($hits[1]['score'] ?? 0.0); + if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) { return null; } } - // Nur catalog_entity zulassen + // 4) Nur catalog_entity zulassen if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) { return null; } + // 5) Canonical Label $label = trim((string)($best['label'] ?? '')); if ($label === '') { @@ -86,15 +90,4 @@ final class CatalogIntentLite return mb_strtolower($label); } - - private function containsAny(string $haystack, array $needles): bool - { - foreach ($needles as $needle) { - if ($needle !== '' && str_contains($haystack, $needle)) { - return true; - } - } - - return false; - } } \ No newline at end of file diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php index 16b4219..538f48b 100644 --- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php +++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php @@ -4,17 +4,15 @@ declare(strict_types=1); namespace App\Knowledge\Retrieval; +use App\Catalog\EntityCatalogService; use App\Entity\ModelGenerationConfig; use App\Intent\CatalogIntentLite; use App\Intent\IntentLite; use App\Intent\SalesIntentLite; -use App\Knowledge\QueryCleaner; use App\Repository\ModelGenerationConfigRepository; +use App\Routing\IntentRouteResolver; use App\Tag\TagRoutingService; use App\Vector\VectorSearchClient; -use App\Catalog\EntityCatalogService; -use App\Knowledge\Retrieval\NdjsonChunkLookup; -use App\Knowledge\Retrieval\RetrieverInterface; final class NdjsonHybridRetriever implements RetrieverInterface { @@ -38,13 +36,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface private readonly IntentLite $intentLite, private readonly SalesIntentLite $salesIntentLite, private readonly CatalogIntentLite $catalogIntent, + private readonly IntentRouteResolver $routeResolver, private readonly EntityCatalogService $entityCatalogService - ) - { + ) { } // ========================================================= - // PRODUCTION (UNVERÄNDERTES VERHALTEN) + // PRODUCTION // ========================================================= public function retrieve(string $prompt): array @@ -60,17 +58,32 @@ final class NdjsonHybridRetriever implements RetrieverInterface public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array { - // 🔵 ENTITY CATALOG EARLY EXIT (jetzt auch im Admin-Test aktiv) - $entityTerm = $this->catalogIntent->detect($prompt); + // ------------------------------------------------------------ + // ROUTING-MATRIX (minimal, ohne Core zu zerlegen) + // ------------------------------------------------------------ - if ($entityTerm !== null) { - $catalogBlock = $this->entityCatalogService->listByTerm($entityTerm); + // 1) Entity (semantisch über Tag-Vektor) + $entityLabel = $this->catalogIntent->detect($prompt); + + // 2) Intent (regelbasiert) + $intent = (string)($this->salesIntentLite->detect($prompt)['intent'] ?? SalesIntentLite::DISCOVERY); + + // 3) Route bestimmen (Intent + Entity) + $route = $this->routeResolver->resolve($intent, $entityLabel); + + // 4) Early Exit nur für catalog_list + if ($route === IntentRouteResolver::ROUTE_CATALOG_LIST && $entityLabel !== null) { + $catalogBlock = $this->entityCatalogService->listByTerm($entityLabel); if ($catalogBlock !== null) { return [$catalogBlock]; } } + // ------------------------------------------------------------ + // NORMALER CORE + // ------------------------------------------------------------ + $core = $this->runCore($prompt, $config, false); if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) { @@ -95,7 +108,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface } // ========================================================= - // DEBUG (NEU, ABER NICHT IM PRODUKTIONS-PFAD) + // DEBUG (unverändert, kein Early-Exit damit Debug immer Core zeigt) // ========================================================= /** diff --git a/src/Knowledge/QueryCleaner.php b/src/Knowledge/Retrieval/QueryCleaner.php similarity index 96% rename from src/Knowledge/QueryCleaner.php rename to src/Knowledge/Retrieval/QueryCleaner.php index c6c2f1d..ef2942c 100644 --- a/src/Knowledge/QueryCleaner.php +++ b/src/Knowledge/Retrieval/QueryCleaner.php @@ -2,7 +2,9 @@ declare(strict_types=1); -namespace App\Knowledge; +namespace App\Knowledge\Retrieval; + +use App\Knowledge\StopWords; final class QueryCleaner { diff --git a/src/Routing/IntentRouteResolver.php b/src/Routing/IntentRouteResolver.php new file mode 100644 index 0000000..6304135 --- /dev/null +++ b/src/Routing/IntentRouteResolver.php @@ -0,0 +1,70 @@ + self::ROUTE_CATALOG_LIST, + + SalesIntentLite::PRICING + => self::ROUTE_ENTITY_PRICING, + + SalesIntentLite::COMPARISON + => self::ROUTE_ENTITY_COMPARISON, + + SalesIntentLite::IMPLEMENTATION + => self::ROUTE_ENTITY_IMPLEMENTATION, + + SalesIntentLite::ROI + => self::ROUTE_ENTITY_ROI, + + default + => self::ROUTE_NORMAL, + }; + } +} \ No newline at end of file