harden semantic match sby tags
This commit is contained in:
@@ -7,51 +7,52 @@ namespace App\Intent;
|
||||
use App\Tag\TagVectorSearchClient;
|
||||
use App\Tag\TagTypes;
|
||||
|
||||
/**
|
||||
* CatalogIntentLite
|
||||
*
|
||||
* Reiner Entity-Detector.
|
||||
*
|
||||
* Verantwortlich nur für:
|
||||
* - Vector-Tag-Erkennung
|
||||
* - Score-Gate
|
||||
* - Ambiguity-Check
|
||||
* - Sicherstellen, dass TagType = catalog_entity
|
||||
*
|
||||
* KEIN:
|
||||
* - Listen-Signal
|
||||
* - SalesIntent
|
||||
* - Routing
|
||||
*/
|
||||
final class CatalogIntentLite
|
||||
{
|
||||
private const LIST_SIGNALS = [
|
||||
'liste',
|
||||
'auflisten',
|
||||
'aufzaehl',
|
||||
'aufzähl',
|
||||
'übersicht',
|
||||
'uebersicht',
|
||||
'welche gibt es',
|
||||
'welche sind',
|
||||
'zeig mir alle',
|
||||
'zeige mir alle',
|
||||
'alle',
|
||||
];
|
||||
|
||||
// Realistischer Gate-Wert
|
||||
/**
|
||||
* Minimaler Similarity-Score.
|
||||
* Verhindert Rauschen.
|
||||
*/
|
||||
private const MIN_SCORE = 0.50;
|
||||
|
||||
// Ambiguity darf nicht zu aggressiv sein
|
||||
/**
|
||||
* Differenz zwischen Top1 und Top2,
|
||||
* damit kein unsicherer Treffer akzeptiert wird.
|
||||
*/
|
||||
private const AMBIGUITY_DELTA = 0.01;
|
||||
|
||||
public function __construct(
|
||||
private readonly SalesIntentLite $salesIntentLite,
|
||||
private readonly TagVectorSearchClient $tagVectorClient,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Gibt das canonical Label der erkannten catalog_entity zurück
|
||||
* oder null, wenn kein sauberer Treffer.
|
||||
*/
|
||||
public function detect(string $prompt): ?string
|
||||
{
|
||||
$normalizedPrompt = mb_strtolower($prompt);
|
||||
|
||||
// 1) Listen-Signal prüfen
|
||||
if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) {
|
||||
$prompt = trim($prompt);
|
||||
if ($prompt === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 2) Nur DISCOVERY zulassen
|
||||
$sales = $this->salesIntentLite->detect($prompt);
|
||||
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
|
||||
|
||||
if ($intent !== SalesIntentLite::DISCOVERY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 3) Vector-Search
|
||||
// 1) Vector-Suche
|
||||
$hits = $this->tagVectorClient->search($prompt, 3);
|
||||
|
||||
if ($hits === []) {
|
||||
@@ -61,23 +62,26 @@ final class CatalogIntentLite
|
||||
$best = $hits[0];
|
||||
$bestScore = (float)($best['score'] ?? 0.0);
|
||||
|
||||
// 2) Score-Gate
|
||||
if ($bestScore < self::MIN_SCORE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Ambiguity-Prüfung
|
||||
// 3) Ambiguity-Check
|
||||
if (isset($hits[1])) {
|
||||
$secondScore = (float)($hits[1]['score'] ?? 0.0);
|
||||
|
||||
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Nur catalog_entity zulassen
|
||||
// 4) Nur catalog_entity zulassen
|
||||
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 5) Canonical Label
|
||||
$label = trim((string)($best['label'] ?? ''));
|
||||
|
||||
if ($label === '') {
|
||||
@@ -86,15 +90,4 @@ final class CatalogIntentLite
|
||||
|
||||
return mb_strtolower($label);
|
||||
}
|
||||
|
||||
private function containsAny(string $haystack, array $needles): bool
|
||||
{
|
||||
foreach ($needles as $needle) {
|
||||
if ($needle !== '' && str_contains($haystack, $needle)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -4,17 +4,15 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Knowledge\Retrieval;
|
||||
|
||||
use App\Catalog\EntityCatalogService;
|
||||
use App\Entity\ModelGenerationConfig;
|
||||
use App\Intent\CatalogIntentLite;
|
||||
use App\Intent\IntentLite;
|
||||
use App\Intent\SalesIntentLite;
|
||||
use App\Knowledge\QueryCleaner;
|
||||
use App\Repository\ModelGenerationConfigRepository;
|
||||
use App\Routing\IntentRouteResolver;
|
||||
use App\Tag\TagRoutingService;
|
||||
use App\Vector\VectorSearchClient;
|
||||
use App\Catalog\EntityCatalogService;
|
||||
use App\Knowledge\Retrieval\NdjsonChunkLookup;
|
||||
use App\Knowledge\Retrieval\RetrieverInterface;
|
||||
|
||||
final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
{
|
||||
@@ -38,13 +36,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
private readonly IntentLite $intentLite,
|
||||
private readonly SalesIntentLite $salesIntentLite,
|
||||
private readonly CatalogIntentLite $catalogIntent,
|
||||
private readonly IntentRouteResolver $routeResolver,
|
||||
private readonly EntityCatalogService $entityCatalogService
|
||||
)
|
||||
{
|
||||
) {
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// PRODUCTION (UNVERÄNDERTES VERHALTEN)
|
||||
// PRODUCTION
|
||||
// =========================================================
|
||||
|
||||
public function retrieve(string $prompt): array
|
||||
@@ -60,17 +58,32 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
|
||||
public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array
|
||||
{
|
||||
// 🔵 ENTITY CATALOG EARLY EXIT (jetzt auch im Admin-Test aktiv)
|
||||
$entityTerm = $this->catalogIntent->detect($prompt);
|
||||
// ------------------------------------------------------------
|
||||
// ROUTING-MATRIX (minimal, ohne Core zu zerlegen)
|
||||
// ------------------------------------------------------------
|
||||
|
||||
if ($entityTerm !== null) {
|
||||
$catalogBlock = $this->entityCatalogService->listByTerm($entityTerm);
|
||||
// 1) Entity (semantisch über Tag-Vektor)
|
||||
$entityLabel = $this->catalogIntent->detect($prompt);
|
||||
|
||||
// 2) Intent (regelbasiert)
|
||||
$intent = (string)($this->salesIntentLite->detect($prompt)['intent'] ?? SalesIntentLite::DISCOVERY);
|
||||
|
||||
// 3) Route bestimmen (Intent + Entity)
|
||||
$route = $this->routeResolver->resolve($intent, $entityLabel);
|
||||
|
||||
// 4) Early Exit nur für catalog_list
|
||||
if ($route === IntentRouteResolver::ROUTE_CATALOG_LIST && $entityLabel !== null) {
|
||||
$catalogBlock = $this->entityCatalogService->listByTerm($entityLabel);
|
||||
|
||||
if ($catalogBlock !== null) {
|
||||
return [$catalogBlock];
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// NORMALER CORE
|
||||
// ------------------------------------------------------------
|
||||
|
||||
$core = $this->runCore($prompt, $config, false);
|
||||
|
||||
if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
|
||||
@@ -95,7 +108,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// DEBUG (NEU, ABER NICHT IM PRODUKTIONS-PFAD)
|
||||
// DEBUG (unverändert, kein Early-Exit damit Debug immer Core zeigt)
|
||||
// =========================================================
|
||||
|
||||
/**
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Knowledge;
|
||||
namespace App\Knowledge\Retrieval;
|
||||
|
||||
use App\Knowledge\StopWords;
|
||||
|
||||
final class QueryCleaner
|
||||
{
|
||||
70
src/Routing/IntentRouteResolver.php
Normal file
70
src/Routing/IntentRouteResolver.php
Normal file
@@ -0,0 +1,70 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Routing;
|
||||
|
||||
use App\Intent\SalesIntentLite;
|
||||
|
||||
/**
|
||||
* IntentRouteResolver
|
||||
*
|
||||
* Deterministische Routing-Matrix für:
|
||||
* Intent (SalesIntentLite)
|
||||
* +
|
||||
* erkannte Entity (CatalogIntentLite)
|
||||
*
|
||||
* Diese Klasse enthält KEINE Erkennungslogik.
|
||||
* Sie entscheidet ausschließlich, welcher Modus gefahren wird.
|
||||
*
|
||||
* Erweiterbar über neue Intent-Konstanten.
|
||||
*/
|
||||
final class IntentRouteResolver
|
||||
{
|
||||
public const ROUTE_NORMAL = 'normal_rag';
|
||||
public const ROUTE_CATALOG_LIST = 'catalog_list';
|
||||
public const ROUTE_ENTITY_PRICING = 'entity_pricing';
|
||||
public const ROUTE_ENTITY_COMPARISON = 'entity_comparison';
|
||||
public const ROUTE_ENTITY_IMPLEMENTATION = 'entity_implementation';
|
||||
public const ROUTE_ENTITY_ROI = 'entity_roi';
|
||||
|
||||
/**
|
||||
* Routing-Entscheidung basierend auf Intent + Entity.
|
||||
*
|
||||
* @param string $intent Ergebnis aus SalesIntentLite
|
||||
* @param string|null $entityLabel Ergebnis aus CatalogIntentLite
|
||||
*/
|
||||
public function resolve(string $intent, ?string $entityLabel): string
|
||||
{
|
||||
// ------------------------------------------------------------
|
||||
// 1) Keine Entity → normales RAG
|
||||
// ------------------------------------------------------------
|
||||
if ($entityLabel === null || $entityLabel === '') {
|
||||
return self::ROUTE_NORMAL;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// 2) Intent-basierte Entscheidung
|
||||
// ------------------------------------------------------------
|
||||
return match ($intent) {
|
||||
|
||||
SalesIntentLite::DISCOVERY
|
||||
=> self::ROUTE_CATALOG_LIST,
|
||||
|
||||
SalesIntentLite::PRICING
|
||||
=> self::ROUTE_ENTITY_PRICING,
|
||||
|
||||
SalesIntentLite::COMPARISON
|
||||
=> self::ROUTE_ENTITY_COMPARISON,
|
||||
|
||||
SalesIntentLite::IMPLEMENTATION
|
||||
=> self::ROUTE_ENTITY_IMPLEMENTATION,
|
||||
|
||||
SalesIntentLite::ROI
|
||||
=> self::ROUTE_ENTITY_ROI,
|
||||
|
||||
default
|
||||
=> self::ROUTE_NORMAL,
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user