From 1f93238bf4f8504ee67fc085123af27f44742e23 Mon Sep 17 00:00:00 2001 From: team2 Date: Tue, 3 Mar 2026 07:55:03 +0100 Subject: [PATCH] cleanup ctalog intent --- src/Intent/CatalogIntentLite.php | 131 ++++++------------------------- 1 file changed, 26 insertions(+), 105 deletions(-) diff --git a/src/Intent/CatalogIntentLite.php b/src/Intent/CatalogIntentLite.php index 8ca2267..c85f144 100644 --- a/src/Intent/CatalogIntentLite.php +++ b/src/Intent/CatalogIntentLite.php @@ -11,7 +11,9 @@ use App\Tag\TagTypes; /** * CatalogIntentLite * - * Verantwortlich ausschließlich für: + * Reiner Entity-Detector. + * + * Verantwortlich nur für: * - Vector-Tag-Erkennung * - Score-Gate * - Ambiguity-Check @@ -24,14 +26,27 @@ use App\Tag\TagTypes; */ final class CatalogIntentLite { + /** + * Minimaler Similarity-Score. + * Verhindert Rauschen. + */ private const MIN_SCORE = 0.72; - private const AMBIGUITY_DELTA = 0.03; + + /** + * Differenz zwischen Top1 und Top2, + * damit kein unsicherer Treffer akzeptiert wird. + */ + private const AMBIGUITY_DELTA = 0.02; public function __construct( private readonly TagVectorSearchClient $tagVectorClient, - private readonly QueryCleaner $queryCleaner, + private readonly QueryCleaner $queryCleaner, ) {} + /** + * Gibt das canonical Label der erkannten catalog_entity zurück + * oder null, wenn kein sauberer Treffer. + */ public function detect(string $prompt): ?string { $prompt = trim($prompt); @@ -39,82 +54,10 @@ final class CatalogIntentLite return null; } - $clean = $this->queryCleaner->clean($prompt); - if ($clean === '') { - $clean = $prompt; - } + $promptTag = $this->queryCleaner->clean($prompt); - // ---------------------------------------------------- - // 1️⃣ Primär: Vollquery testen - // ---------------------------------------------------- - - $label = $this->detectFromQuery($clean); - if ($label !== null) { - return $label; - } - - // ---------------------------------------------------- - // 2️⃣ Fallback: Tokenweise testen - // (wichtig für "geräteliste testomat") - // ---------------------------------------------------- - - $tokens = $this->tokenize($clean); - - $bestLabel = null; - $bestScore = 0.0; - - foreach ($tokens as $token) { - - // sehr kurze Tokens ignorieren (Noise) - if (mb_strlen($token) < 3) { - continue; - } - - $hits = $this->tagVectorClient->search($token, 3); - - if ($hits === []) { - continue; - } - - $top = $hits[0] ?? null; - if (!is_array($top)) { - continue; - } - - $score = (float)($top['score'] ?? 0.0); - - if ($score < self::MIN_SCORE) { - continue; - } - - // Ambiguity-Check - if (isset($hits[1])) { - $secondScore = (float)($hits[1]['score'] ?? 0.0); - if (abs($score - $secondScore) < self::AMBIGUITY_DELTA) { - continue; - } - } - - if (($top['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) { - continue; - } - - if ($score > $bestScore) { - $bestScore = $score; - $bestLabel = trim((string)($top['label'] ?? '')); - } - } - - if ($bestLabel === null || $bestLabel === '') { - return null; - } - - return mb_strtolower($bestLabel); - } - - private function detectFromQuery(string $query): ?string - { - $hits = $this->tagVectorClient->search($query, 3); + // 1) Tag-Vector-Suche + $hits = $this->tagVectorClient->search($promptTag, 3); if ($hits === []) { return null; @@ -123,21 +66,26 @@ final class CatalogIntentLite $best = $hits[0]; $bestScore = (float)($best['score'] ?? 0.0); + // 2) Score-Tags if ($bestScore < self::MIN_SCORE) { return null; } + // 3) Ambiguity-Check if (isset($hits[1])) { $secondScore = (float)($hits[1]['score'] ?? 0.0); + if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) { return null; } } + // 4) Nur catalog_entity zulassen if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) { return null; } + // 5) Canonical Label $label = trim((string)($best['label'] ?? '')); if ($label === '') { @@ -146,31 +94,4 @@ final class CatalogIntentLite return mb_strtolower($label); } - - private function tokenize(string $text): array - { - $parts = preg_split('/\s+/u', trim($text)); - if (!$parts) { - return []; - } - - $seen = []; - $out = []; - - foreach ($parts as $p) { - $p = trim($p); - if ($p === '') { - continue; - } - - if (isset($seen[$p])) { - continue; - } - - $seen[$p] = true; - $out[] = $p; - } - - return $out; - } } \ No newline at end of file