From ff01919b30d462a0111641ff50d2ce0c519f189d Mon Sep 17 00:00:00 2001 From: team2 Date: Sat, 28 Feb 2026 18:48:38 +0100 Subject: [PATCH] optimize catalog semantic match sby tags --- src/Intent/CatalogIntentLite.php | 23 +++--- src/Intent/SalesIntentLite.php | 134 ++++++++++++------------------- 2 files changed, 65 insertions(+), 92 deletions(-) diff --git a/src/Intent/CatalogIntentLite.php b/src/Intent/CatalogIntentLite.php index 59f83f7..2219f33 100644 --- a/src/Intent/CatalogIntentLite.php +++ b/src/Intent/CatalogIntentLite.php @@ -5,6 +5,7 @@ declare(strict_types=1); namespace App\Intent; use App\Tag\TagVectorSearchClient; +use App\Tag\TagTypes; final class CatalogIntentLite { @@ -22,8 +23,11 @@ final class CatalogIntentLite 'alle', ]; - private const MIN_SCORE = 0.60; - private const AMBIGUITY_DELTA = 0.05; + // Realistischer Gate-Wert + private const MIN_SCORE = 0.50; + + // Ambiguity darf nicht zu aggressiv sein + private const AMBIGUITY_DELTA = 0.01; public function __construct( private readonly SalesIntentLite $salesIntentLite, @@ -34,12 +38,12 @@ final class CatalogIntentLite { $normalizedPrompt = mb_strtolower($prompt); - // 1) Muss Listen-Signal enthalten + // 1) Listen-Signal prüfen if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) { return null; } - // 2) Guardrail: Nur DISCOVERY + // 2) Nur DISCOVERY zulassen $sales = $this->salesIntentLite->detect($prompt); $intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY); @@ -47,7 +51,7 @@ final class CatalogIntentLite return null; } - // 3) Vector-basierte Tag-Suche (Top 3 für Ambiguity-Check) + // 3) Vector-Search $hits = $this->tagVectorClient->search($prompt, 3); if ($hits === []) { @@ -61,7 +65,7 @@ final class CatalogIntentLite return null; } - // Ambiguity-Check + // Ambiguity-Prüfung if (isset($hits[1])) { $secondScore = (float)($hits[1]['score'] ?? 0.0); if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) { @@ -69,13 +73,12 @@ final class CatalogIntentLite } } - // 4) Nur catalog_entity zulassen - if (($best['tag_type'] ?? null) !== 'catalog_entity') { + // Nur catalog_entity zulassen + if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) { return null; } - // 5) Canonical Label zurückgeben - $label = (string)($best['label'] ?? ''); + $label = trim((string)($best['label'] ?? '')); if ($label === '') { return null; diff --git a/src/Intent/SalesIntentLite.php b/src/Intent/SalesIntentLite.php index 37d0f84..ae5e114 100644 --- a/src/Intent/SalesIntentLite.php +++ b/src/Intent/SalesIntentLite.php @@ -4,16 +4,6 @@ declare(strict_types=1); namespace App\Intent; -/** - * SalesIntentLite - * - * Deterministische Vertriebs-Intent-Erkennung. - * Kein LLM, kein ML, nur regelbasierte Klassifikation. - * - * WICHTIG: - * - Immer mit ORIGINAL-Prompt aufrufen. - * - Nicht mit gereinigter Query. - */ final class SalesIntentLite { public const DISCOVERY = 'discovery'; @@ -36,89 +26,78 @@ final class SalesIntentLite ]; // ------------------------------------------------------------ - // PRICING + // PRICING (stark gewichten) // ------------------------------------------------------------ - $pricingWords = [ - 'preis', 'preise', 'kosten', 'lizenz', 'lizenzmodell', - 'paket', 'pakete', 'tarif', 'tarife', - 'gebühr', 'gebuehr', 'monatlich', 'jährlich', 'jaehrlich', - 'abo', 'subscription' - ]; - - foreach ($pricingWords as $word) { - if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) { - $scores[self::PRICING] += 2; + foreach ([ + 'preis','preise','kosten','lizenz','lizenzmodell', + 'tarif','tarife','gebuehr','gebühr', + 'monatlich','jaehrlich','jährlich','abo','subscription' + ] as $word) { + if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) { + $scores[self::PRICING] += 3; } } // ------------------------------------------------------------ - // COMPARISON + // COMPARISON (wichtiger für Katalog-Block) // ------------------------------------------------------------ - $comparisonPatterns = [ - '/\bvergleich\b/u', - '/\bvs\b/u', - '/\boder\b/u', - '/\balternative(n)?\b/u', - '/\bunterschied(e)?\b/u', - '/\bbesser\b/u', - ]; - - foreach ($comparisonPatterns as $pattern) { + foreach ([ + '/\bvergleich(en)?\b/u', + '/\bvs\b/u', + '/\bgegenueber\b/u', + '/\boder\b/u', + '/\balternative(n)?\b/u', + '/\bunterschied(e)?\b/u', + '/\bbesser\b/u' + ] as $pattern) { if (preg_match($pattern, $p)) { - $scores[self::COMPARISON] += 2; + $scores[self::COMPARISON] += 3; } } // ------------------------------------------------------------ // OBJECTION // ------------------------------------------------------------ - $objectionWords = [ - 'problem', 'risiko', 'nachteil', 'datenschutz', - 'dsgvo', 'sicherheit', 'compliance', - 'kritik', 'zweifel', 'unsicher' - ]; - - foreach ($objectionWords as $word) { - if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) { - $scores[self::OBJECTION] += 2; + foreach ([ + 'problem','risiko','nachteil','datenschutz', + 'dsgvo','sicherheit','compliance', + 'kritik','zweifel','unsicher' + ] as $word) { + if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) { + $scores[self::OBJECTION] += 3; } } // ------------------------------------------------------------ - // IMPLEMENTATION + // IMPLEMENTATION (Intent-Verben stärker) // ------------------------------------------------------------ - $implementationWords = [ - 'implementierung', 'einführung', 'einfuehrung', - 'integration', 'aufwand', 'setup', - 'rollout', 'migration', 'installation', - 'technisch', 'api', 'schnittstelle' - ]; - - foreach ($implementationWords as $word) { - if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) { - $scores[self::IMPLEMENTATION] += 2; + foreach ([ + 'implementierung','implementieren', + 'integration','integrieren', + 'einführung','einfuehrung', + 'aufwand','setup','rollout', + 'migration','installation', + 'api','schnittstelle' + ] as $word) { + if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) { + $scores[self::IMPLEMENTATION] += 3; } } // ------------------------------------------------------------ - // ROI / Business Case + // ROI (weniger generisch) // ------------------------------------------------------------ - $roiWords = [ - 'roi', 'rentabilität', 'rentabilitaet', - 'business case', 'nutzen', - 'effizienz', 'einsparung', 'umsatz', - 'wert', 'vorteil' - ]; - - foreach ($roiWords as $word) { - if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) { - $scores[self::ROI] += 2; + foreach ([ + 'roi','rentabilitaet','rentabilität', + 'business case','einsparung', + 'kosten senken','umsatz steigern', + 'effizienz steigern' + ] as $word) { + if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) { + $scores[self::ROI] += 3; } } - // ------------------------------------------------------------ - // Entscheidung - // ------------------------------------------------------------ arsort($scores); $topIntent = array_key_first($scores); @@ -141,20 +120,11 @@ final class SalesIntentLite { $s = mb_strtolower($s); - $replacements = [ - 'ä' => 'ae', - 'ö' => 'oe', - 'ü' => 'ue', - 'ß' => 'ss', - ]; - - foreach ($replacements as $umlaut => $alt) { - if (str_contains($s, $umlaut)) { - $s .= ' ' . str_replace($umlaut, $alt, $s); - break; - } - } - - return $s; + return strtr($s, [ + 'ä'=>'ae', + 'ö'=>'oe', + 'ü'=>'ue', + 'ß'=>'ss' + ]); } } \ No newline at end of file