harden semantic match sby tags

This commit is contained in:
team2
2026-02-28 19:04:01 +01:00
parent ff01919b30
commit 1b5cff1c53
4 changed files with 134 additions and 56 deletions

View File

@@ -7,51 +7,52 @@ namespace App\Intent;
use App\Tag\TagVectorSearchClient;
use App\Tag\TagTypes;
/**
* CatalogIntentLite
*
* Reiner Entity-Detector.
*
* Verantwortlich nur für:
* - Vector-Tag-Erkennung
* - Score-Gate
* - Ambiguity-Check
* - Sicherstellen, dass TagType = catalog_entity
*
* KEIN:
* - Listen-Signal
* - SalesIntent
* - Routing
*/
final class CatalogIntentLite
{
private const LIST_SIGNALS = [
'liste',
'auflisten',
'aufzaehl',
'aufzähl',
'übersicht',
'uebersicht',
'welche gibt es',
'welche sind',
'zeig mir alle',
'zeige mir alle',
'alle',
];
// Realistischer Gate-Wert
/**
* Minimaler Similarity-Score.
* Verhindert Rauschen.
*/
private const MIN_SCORE = 0.50;
// Ambiguity darf nicht zu aggressiv sein
/**
* Differenz zwischen Top1 und Top2,
* damit kein unsicherer Treffer akzeptiert wird.
*/
private const AMBIGUITY_DELTA = 0.01;
public function __construct(
private readonly SalesIntentLite $salesIntentLite,
private readonly TagVectorSearchClient $tagVectorClient,
) {}
/**
* Gibt das canonical Label der erkannten catalog_entity zurück
* oder null, wenn kein sauberer Treffer.
*/
public function detect(string $prompt): ?string
{
$normalizedPrompt = mb_strtolower($prompt);
// 1) Listen-Signal prüfen
if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) {
$prompt = trim($prompt);
if ($prompt === '') {
return null;
}
// 2) Nur DISCOVERY zulassen
$sales = $this->salesIntentLite->detect($prompt);
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
if ($intent !== SalesIntentLite::DISCOVERY) {
return null;
}
// 3) Vector-Search
// 1) Vector-Suche
$hits = $this->tagVectorClient->search($prompt, 3);
if ($hits === []) {
@@ -61,23 +62,26 @@ final class CatalogIntentLite
$best = $hits[0];
$bestScore = (float)($best['score'] ?? 0.0);
// 2) Score-Gate
if ($bestScore < self::MIN_SCORE) {
return null;
}
// Ambiguity-Prüfung
// 3) Ambiguity-Check
if (isset($hits[1])) {
$secondScore = (float)($hits[1]['score'] ?? 0.0);
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
return null;
}
}
// Nur catalog_entity zulassen
// 4) Nur catalog_entity zulassen
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
return null;
}
// 5) Canonical Label
$label = trim((string)($best['label'] ?? ''));
if ($label === '') {
@@ -86,15 +90,4 @@ final class CatalogIntentLite
return mb_strtolower($label);
}
private function containsAny(string $haystack, array $needles): bool
{
foreach ($needles as $needle) {
if ($needle !== '' && str_contains($haystack, $needle)) {
return true;
}
}
return false;
}
}

View File

@@ -4,17 +4,15 @@ declare(strict_types=1);
namespace App\Knowledge\Retrieval;
use App\Catalog\EntityCatalogService;
use App\Entity\ModelGenerationConfig;
use App\Intent\CatalogIntentLite;
use App\Intent\IntentLite;
use App\Intent\SalesIntentLite;
use App\Knowledge\QueryCleaner;
use App\Repository\ModelGenerationConfigRepository;
use App\Routing\IntentRouteResolver;
use App\Tag\TagRoutingService;
use App\Vector\VectorSearchClient;
use App\Catalog\EntityCatalogService;
use App\Knowledge\Retrieval\NdjsonChunkLookup;
use App\Knowledge\Retrieval\RetrieverInterface;
final class NdjsonHybridRetriever implements RetrieverInterface
{
@@ -38,13 +36,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface
private readonly IntentLite $intentLite,
private readonly SalesIntentLite $salesIntentLite,
private readonly CatalogIntentLite $catalogIntent,
private readonly IntentRouteResolver $routeResolver,
private readonly EntityCatalogService $entityCatalogService
)
{
) {
}
// =========================================================
// PRODUCTION (UNVERÄNDERTES VERHALTEN)
// PRODUCTION
// =========================================================
public function retrieve(string $prompt): array
@@ -60,17 +58,32 @@ final class NdjsonHybridRetriever implements RetrieverInterface
public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array
{
// 🔵 ENTITY CATALOG EARLY EXIT (jetzt auch im Admin-Test aktiv)
$entityTerm = $this->catalogIntent->detect($prompt);
// ------------------------------------------------------------
// ROUTING-MATRIX (minimal, ohne Core zu zerlegen)
// ------------------------------------------------------------
if ($entityTerm !== null) {
$catalogBlock = $this->entityCatalogService->listByTerm($entityTerm);
// 1) Entity (semantisch über Tag-Vektor)
$entityLabel = $this->catalogIntent->detect($prompt);
// 2) Intent (regelbasiert)
$intent = (string)($this->salesIntentLite->detect($prompt)['intent'] ?? SalesIntentLite::DISCOVERY);
// 3) Route bestimmen (Intent + Entity)
$route = $this->routeResolver->resolve($intent, $entityLabel);
// 4) Early Exit nur für catalog_list
if ($route === IntentRouteResolver::ROUTE_CATALOG_LIST && $entityLabel !== null) {
$catalogBlock = $this->entityCatalogService->listByTerm($entityLabel);
if ($catalogBlock !== null) {
return [$catalogBlock];
}
}
// ------------------------------------------------------------
// NORMALER CORE
// ------------------------------------------------------------
$core = $this->runCore($prompt, $config, false);
if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
@@ -95,7 +108,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
}
// =========================================================
// DEBUG (NEU, ABER NICHT IM PRODUKTIONS-PFAD)
// DEBUG (unverändert, kein Early-Exit damit Debug immer Core zeigt)
// =========================================================
/**

View File

@@ -2,7 +2,9 @@
declare(strict_types=1);
namespace App\Knowledge;
namespace App\Knowledge\Retrieval;
use App\Knowledge\StopWords;
final class QueryCleaner
{

View File

@@ -0,0 +1,70 @@
<?php
declare(strict_types=1);
namespace App\Routing;
use App\Intent\SalesIntentLite;
/**
* IntentRouteResolver
*
* Deterministische Routing-Matrix für:
* Intent (SalesIntentLite)
* +
* erkannte Entity (CatalogIntentLite)
*
* Diese Klasse enthält KEINE Erkennungslogik.
* Sie entscheidet ausschließlich, welcher Modus gefahren wird.
*
* Erweiterbar über neue Intent-Konstanten.
*/
final class IntentRouteResolver
{
public const ROUTE_NORMAL = 'normal_rag';
public const ROUTE_CATALOG_LIST = 'catalog_list';
public const ROUTE_ENTITY_PRICING = 'entity_pricing';
public const ROUTE_ENTITY_COMPARISON = 'entity_comparison';
public const ROUTE_ENTITY_IMPLEMENTATION = 'entity_implementation';
public const ROUTE_ENTITY_ROI = 'entity_roi';
/**
* Routing-Entscheidung basierend auf Intent + Entity.
*
* @param string $intent Ergebnis aus SalesIntentLite
* @param string|null $entityLabel Ergebnis aus CatalogIntentLite
*/
public function resolve(string $intent, ?string $entityLabel): string
{
// ------------------------------------------------------------
// 1) Keine Entity → normales RAG
// ------------------------------------------------------------
if ($entityLabel === null || $entityLabel === '') {
return self::ROUTE_NORMAL;
}
// ------------------------------------------------------------
// 2) Intent-basierte Entscheidung
// ------------------------------------------------------------
return match ($intent) {
SalesIntentLite::DISCOVERY
=> self::ROUTE_CATALOG_LIST,
SalesIntentLite::PRICING
=> self::ROUTE_ENTITY_PRICING,
SalesIntentLite::COMPARISON
=> self::ROUTE_ENTITY_COMPARISON,
SalesIntentLite::IMPLEMENTATION
=> self::ROUTE_ENTITY_IMPLEMENTATION,
SalesIntentLite::ROI
=> self::ROUTE_ENTITY_ROI,
default
=> self::ROUTE_NORMAL,
};
}
}