harden semantic match sby tags
This commit is contained in:
@@ -7,51 +7,52 @@ namespace App\Intent;
|
|||||||
use App\Tag\TagVectorSearchClient;
|
use App\Tag\TagVectorSearchClient;
|
||||||
use App\Tag\TagTypes;
|
use App\Tag\TagTypes;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CatalogIntentLite
|
||||||
|
*
|
||||||
|
* Reiner Entity-Detector.
|
||||||
|
*
|
||||||
|
* Verantwortlich nur für:
|
||||||
|
* - Vector-Tag-Erkennung
|
||||||
|
* - Score-Gate
|
||||||
|
* - Ambiguity-Check
|
||||||
|
* - Sicherstellen, dass TagType = catalog_entity
|
||||||
|
*
|
||||||
|
* KEIN:
|
||||||
|
* - Listen-Signal
|
||||||
|
* - SalesIntent
|
||||||
|
* - Routing
|
||||||
|
*/
|
||||||
final class CatalogIntentLite
|
final class CatalogIntentLite
|
||||||
{
|
{
|
||||||
private const LIST_SIGNALS = [
|
/**
|
||||||
'liste',
|
* Minimaler Similarity-Score.
|
||||||
'auflisten',
|
* Verhindert Rauschen.
|
||||||
'aufzaehl',
|
*/
|
||||||
'aufzähl',
|
|
||||||
'übersicht',
|
|
||||||
'uebersicht',
|
|
||||||
'welche gibt es',
|
|
||||||
'welche sind',
|
|
||||||
'zeig mir alle',
|
|
||||||
'zeige mir alle',
|
|
||||||
'alle',
|
|
||||||
];
|
|
||||||
|
|
||||||
// Realistischer Gate-Wert
|
|
||||||
private const MIN_SCORE = 0.50;
|
private const MIN_SCORE = 0.50;
|
||||||
|
|
||||||
// Ambiguity darf nicht zu aggressiv sein
|
/**
|
||||||
|
* Differenz zwischen Top1 und Top2,
|
||||||
|
* damit kein unsicherer Treffer akzeptiert wird.
|
||||||
|
*/
|
||||||
private const AMBIGUITY_DELTA = 0.01;
|
private const AMBIGUITY_DELTA = 0.01;
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private readonly SalesIntentLite $salesIntentLite,
|
|
||||||
private readonly TagVectorSearchClient $tagVectorClient,
|
private readonly TagVectorSearchClient $tagVectorClient,
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gibt das canonical Label der erkannten catalog_entity zurück
|
||||||
|
* oder null, wenn kein sauberer Treffer.
|
||||||
|
*/
|
||||||
public function detect(string $prompt): ?string
|
public function detect(string $prompt): ?string
|
||||||
{
|
{
|
||||||
$normalizedPrompt = mb_strtolower($prompt);
|
$prompt = trim($prompt);
|
||||||
|
if ($prompt === '') {
|
||||||
// 1) Listen-Signal prüfen
|
|
||||||
if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2) Nur DISCOVERY zulassen
|
// 1) Vector-Suche
|
||||||
$sales = $this->salesIntentLite->detect($prompt);
|
|
||||||
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
|
|
||||||
|
|
||||||
if ($intent !== SalesIntentLite::DISCOVERY) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3) Vector-Search
|
|
||||||
$hits = $this->tagVectorClient->search($prompt, 3);
|
$hits = $this->tagVectorClient->search($prompt, 3);
|
||||||
|
|
||||||
if ($hits === []) {
|
if ($hits === []) {
|
||||||
@@ -61,23 +62,26 @@ final class CatalogIntentLite
|
|||||||
$best = $hits[0];
|
$best = $hits[0];
|
||||||
$bestScore = (float)($best['score'] ?? 0.0);
|
$bestScore = (float)($best['score'] ?? 0.0);
|
||||||
|
|
||||||
|
// 2) Score-Gate
|
||||||
if ($bestScore < self::MIN_SCORE) {
|
if ($bestScore < self::MIN_SCORE) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ambiguity-Prüfung
|
// 3) Ambiguity-Check
|
||||||
if (isset($hits[1])) {
|
if (isset($hits[1])) {
|
||||||
$secondScore = (float)($hits[1]['score'] ?? 0.0);
|
$secondScore = (float)($hits[1]['score'] ?? 0.0);
|
||||||
|
|
||||||
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
|
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Nur catalog_entity zulassen
|
// 4) Nur catalog_entity zulassen
|
||||||
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
|
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 5) Canonical Label
|
||||||
$label = trim((string)($best['label'] ?? ''));
|
$label = trim((string)($best['label'] ?? ''));
|
||||||
|
|
||||||
if ($label === '') {
|
if ($label === '') {
|
||||||
@@ -86,15 +90,4 @@ final class CatalogIntentLite
|
|||||||
|
|
||||||
return mb_strtolower($label);
|
return mb_strtolower($label);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function containsAny(string $haystack, array $needles): bool
|
|
||||||
{
|
|
||||||
foreach ($needles as $needle) {
|
|
||||||
if ($needle !== '' && str_contains($haystack, $needle)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
@@ -4,17 +4,15 @@ declare(strict_types=1);
|
|||||||
|
|
||||||
namespace App\Knowledge\Retrieval;
|
namespace App\Knowledge\Retrieval;
|
||||||
|
|
||||||
|
use App\Catalog\EntityCatalogService;
|
||||||
use App\Entity\ModelGenerationConfig;
|
use App\Entity\ModelGenerationConfig;
|
||||||
use App\Intent\CatalogIntentLite;
|
use App\Intent\CatalogIntentLite;
|
||||||
use App\Intent\IntentLite;
|
use App\Intent\IntentLite;
|
||||||
use App\Intent\SalesIntentLite;
|
use App\Intent\SalesIntentLite;
|
||||||
use App\Knowledge\QueryCleaner;
|
|
||||||
use App\Repository\ModelGenerationConfigRepository;
|
use App\Repository\ModelGenerationConfigRepository;
|
||||||
|
use App\Routing\IntentRouteResolver;
|
||||||
use App\Tag\TagRoutingService;
|
use App\Tag\TagRoutingService;
|
||||||
use App\Vector\VectorSearchClient;
|
use App\Vector\VectorSearchClient;
|
||||||
use App\Catalog\EntityCatalogService;
|
|
||||||
use App\Knowledge\Retrieval\NdjsonChunkLookup;
|
|
||||||
use App\Knowledge\Retrieval\RetrieverInterface;
|
|
||||||
|
|
||||||
final class NdjsonHybridRetriever implements RetrieverInterface
|
final class NdjsonHybridRetriever implements RetrieverInterface
|
||||||
{
|
{
|
||||||
@@ -38,13 +36,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
private readonly IntentLite $intentLite,
|
private readonly IntentLite $intentLite,
|
||||||
private readonly SalesIntentLite $salesIntentLite,
|
private readonly SalesIntentLite $salesIntentLite,
|
||||||
private readonly CatalogIntentLite $catalogIntent,
|
private readonly CatalogIntentLite $catalogIntent,
|
||||||
|
private readonly IntentRouteResolver $routeResolver,
|
||||||
private readonly EntityCatalogService $entityCatalogService
|
private readonly EntityCatalogService $entityCatalogService
|
||||||
)
|
) {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// =========================================================
|
// =========================================================
|
||||||
// PRODUCTION (UNVERÄNDERTES VERHALTEN)
|
// PRODUCTION
|
||||||
// =========================================================
|
// =========================================================
|
||||||
|
|
||||||
public function retrieve(string $prompt): array
|
public function retrieve(string $prompt): array
|
||||||
@@ -60,17 +58,32 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
|
|
||||||
public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array
|
public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array
|
||||||
{
|
{
|
||||||
// 🔵 ENTITY CATALOG EARLY EXIT (jetzt auch im Admin-Test aktiv)
|
// ------------------------------------------------------------
|
||||||
$entityTerm = $this->catalogIntent->detect($prompt);
|
// ROUTING-MATRIX (minimal, ohne Core zu zerlegen)
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
|
||||||
if ($entityTerm !== null) {
|
// 1) Entity (semantisch über Tag-Vektor)
|
||||||
$catalogBlock = $this->entityCatalogService->listByTerm($entityTerm);
|
$entityLabel = $this->catalogIntent->detect($prompt);
|
||||||
|
|
||||||
|
// 2) Intent (regelbasiert)
|
||||||
|
$intent = (string)($this->salesIntentLite->detect($prompt)['intent'] ?? SalesIntentLite::DISCOVERY);
|
||||||
|
|
||||||
|
// 3) Route bestimmen (Intent + Entity)
|
||||||
|
$route = $this->routeResolver->resolve($intent, $entityLabel);
|
||||||
|
|
||||||
|
// 4) Early Exit nur für catalog_list
|
||||||
|
if ($route === IntentRouteResolver::ROUTE_CATALOG_LIST && $entityLabel !== null) {
|
||||||
|
$catalogBlock = $this->entityCatalogService->listByTerm($entityLabel);
|
||||||
|
|
||||||
if ($catalogBlock !== null) {
|
if ($catalogBlock !== null) {
|
||||||
return [$catalogBlock];
|
return [$catalogBlock];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
// NORMALER CORE
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
|
||||||
$core = $this->runCore($prompt, $config, false);
|
$core = $this->runCore($prompt, $config, false);
|
||||||
|
|
||||||
if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
|
if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
|
||||||
@@ -95,7 +108,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
}
|
}
|
||||||
|
|
||||||
// =========================================================
|
// =========================================================
|
||||||
// DEBUG (NEU, ABER NICHT IM PRODUKTIONS-PFAD)
|
// DEBUG (unverändert, kein Early-Exit damit Debug immer Core zeigt)
|
||||||
// =========================================================
|
// =========================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -2,7 +2,9 @@
|
|||||||
|
|
||||||
declare(strict_types=1);
|
declare(strict_types=1);
|
||||||
|
|
||||||
namespace App\Knowledge;
|
namespace App\Knowledge\Retrieval;
|
||||||
|
|
||||||
|
use App\Knowledge\StopWords;
|
||||||
|
|
||||||
final class QueryCleaner
|
final class QueryCleaner
|
||||||
{
|
{
|
||||||
70
src/Routing/IntentRouteResolver.php
Normal file
70
src/Routing/IntentRouteResolver.php
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Routing;
|
||||||
|
|
||||||
|
use App\Intent\SalesIntentLite;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IntentRouteResolver
|
||||||
|
*
|
||||||
|
* Deterministische Routing-Matrix für:
|
||||||
|
* Intent (SalesIntentLite)
|
||||||
|
* +
|
||||||
|
* erkannte Entity (CatalogIntentLite)
|
||||||
|
*
|
||||||
|
* Diese Klasse enthält KEINE Erkennungslogik.
|
||||||
|
* Sie entscheidet ausschließlich, welcher Modus gefahren wird.
|
||||||
|
*
|
||||||
|
* Erweiterbar über neue Intent-Konstanten.
|
||||||
|
*/
|
||||||
|
final class IntentRouteResolver
|
||||||
|
{
|
||||||
|
public const ROUTE_NORMAL = 'normal_rag';
|
||||||
|
public const ROUTE_CATALOG_LIST = 'catalog_list';
|
||||||
|
public const ROUTE_ENTITY_PRICING = 'entity_pricing';
|
||||||
|
public const ROUTE_ENTITY_COMPARISON = 'entity_comparison';
|
||||||
|
public const ROUTE_ENTITY_IMPLEMENTATION = 'entity_implementation';
|
||||||
|
public const ROUTE_ENTITY_ROI = 'entity_roi';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Routing-Entscheidung basierend auf Intent + Entity.
|
||||||
|
*
|
||||||
|
* @param string $intent Ergebnis aus SalesIntentLite
|
||||||
|
* @param string|null $entityLabel Ergebnis aus CatalogIntentLite
|
||||||
|
*/
|
||||||
|
public function resolve(string $intent, ?string $entityLabel): string
|
||||||
|
{
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
// 1) Keine Entity → normales RAG
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
if ($entityLabel === null || $entityLabel === '') {
|
||||||
|
return self::ROUTE_NORMAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
// 2) Intent-basierte Entscheidung
|
||||||
|
// ------------------------------------------------------------
|
||||||
|
return match ($intent) {
|
||||||
|
|
||||||
|
SalesIntentLite::DISCOVERY
|
||||||
|
=> self::ROUTE_CATALOG_LIST,
|
||||||
|
|
||||||
|
SalesIntentLite::PRICING
|
||||||
|
=> self::ROUTE_ENTITY_PRICING,
|
||||||
|
|
||||||
|
SalesIntentLite::COMPARISON
|
||||||
|
=> self::ROUTE_ENTITY_COMPARISON,
|
||||||
|
|
||||||
|
SalesIntentLite::IMPLEMENTATION
|
||||||
|
=> self::ROUTE_ENTITY_IMPLEMENTATION,
|
||||||
|
|
||||||
|
SalesIntentLite::ROI
|
||||||
|
=> self::ROUTE_ENTITY_ROI,
|
||||||
|
|
||||||
|
default
|
||||||
|
=> self::ROUTE_NORMAL,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user