harden semantic match sby tags
This commit is contained in:
@@ -7,51 +7,52 @@ namespace App\Intent;
|
||||
use App\Tag\TagVectorSearchClient;
|
||||
use App\Tag\TagTypes;
|
||||
|
||||
/**
|
||||
* CatalogIntentLite
|
||||
*
|
||||
* Reiner Entity-Detector.
|
||||
*
|
||||
* Verantwortlich nur für:
|
||||
* - Vector-Tag-Erkennung
|
||||
* - Score-Gate
|
||||
* - Ambiguity-Check
|
||||
* - Sicherstellen, dass TagType = catalog_entity
|
||||
*
|
||||
* KEIN:
|
||||
* - Listen-Signal
|
||||
* - SalesIntent
|
||||
* - Routing
|
||||
*/
|
||||
final class CatalogIntentLite
|
||||
{
|
||||
private const LIST_SIGNALS = [
|
||||
'liste',
|
||||
'auflisten',
|
||||
'aufzaehl',
|
||||
'aufzähl',
|
||||
'übersicht',
|
||||
'uebersicht',
|
||||
'welche gibt es',
|
||||
'welche sind',
|
||||
'zeig mir alle',
|
||||
'zeige mir alle',
|
||||
'alle',
|
||||
];
|
||||
|
||||
// Realistischer Gate-Wert
|
||||
/**
|
||||
* Minimaler Similarity-Score.
|
||||
* Verhindert Rauschen.
|
||||
*/
|
||||
private const MIN_SCORE = 0.50;
|
||||
|
||||
// Ambiguity darf nicht zu aggressiv sein
|
||||
/**
|
||||
* Differenz zwischen Top1 und Top2,
|
||||
* damit kein unsicherer Treffer akzeptiert wird.
|
||||
*/
|
||||
private const AMBIGUITY_DELTA = 0.01;
|
||||
|
||||
public function __construct(
|
||||
private readonly SalesIntentLite $salesIntentLite,
|
||||
private readonly TagVectorSearchClient $tagVectorClient,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Gibt das canonical Label der erkannten catalog_entity zurück
|
||||
* oder null, wenn kein sauberer Treffer.
|
||||
*/
|
||||
public function detect(string $prompt): ?string
|
||||
{
|
||||
$normalizedPrompt = mb_strtolower($prompt);
|
||||
|
||||
// 1) Listen-Signal prüfen
|
||||
if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) {
|
||||
$prompt = trim($prompt);
|
||||
if ($prompt === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 2) Nur DISCOVERY zulassen
|
||||
$sales = $this->salesIntentLite->detect($prompt);
|
||||
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
|
||||
|
||||
if ($intent !== SalesIntentLite::DISCOVERY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 3) Vector-Search
|
||||
// 1) Vector-Suche
|
||||
$hits = $this->tagVectorClient->search($prompt, 3);
|
||||
|
||||
if ($hits === []) {
|
||||
@@ -61,23 +62,26 @@ final class CatalogIntentLite
|
||||
$best = $hits[0];
|
||||
$bestScore = (float)($best['score'] ?? 0.0);
|
||||
|
||||
// 2) Score-Gate
|
||||
if ($bestScore < self::MIN_SCORE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Ambiguity-Prüfung
|
||||
// 3) Ambiguity-Check
|
||||
if (isset($hits[1])) {
|
||||
$secondScore = (float)($hits[1]['score'] ?? 0.0);
|
||||
|
||||
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Nur catalog_entity zulassen
|
||||
// 4) Nur catalog_entity zulassen
|
||||
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 5) Canonical Label
|
||||
$label = trim((string)($best['label'] ?? ''));
|
||||
|
||||
if ($label === '') {
|
||||
@@ -86,15 +90,4 @@ final class CatalogIntentLite
|
||||
|
||||
return mb_strtolower($label);
|
||||
}
|
||||
|
||||
private function containsAny(string $haystack, array $needles): bool
|
||||
{
|
||||
foreach ($needles as $needle) {
|
||||
if ($needle !== '' && str_contains($haystack, $needle)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user