optimize catalog semantic match sby tags

This commit is contained in:
team2
2026-02-28 18:48:38 +01:00
parent 0d3f6e21d6
commit ff01919b30
2 changed files with 65 additions and 92 deletions

View File

@@ -5,6 +5,7 @@ declare(strict_types=1);
namespace App\Intent; namespace App\Intent;
use App\Tag\TagVectorSearchClient; use App\Tag\TagVectorSearchClient;
use App\Tag\TagTypes;
final class CatalogIntentLite final class CatalogIntentLite
{ {
@@ -22,8 +23,11 @@ final class CatalogIntentLite
'alle', 'alle',
]; ];
private const MIN_SCORE = 0.60; // Realistischer Gate-Wert
private const AMBIGUITY_DELTA = 0.05; private const MIN_SCORE = 0.50;
// Ambiguity darf nicht zu aggressiv sein
private const AMBIGUITY_DELTA = 0.01;
public function __construct( public function __construct(
private readonly SalesIntentLite $salesIntentLite, private readonly SalesIntentLite $salesIntentLite,
@@ -34,12 +38,12 @@ final class CatalogIntentLite
{ {
$normalizedPrompt = mb_strtolower($prompt); $normalizedPrompt = mb_strtolower($prompt);
// 1) Muss Listen-Signal enthalten // 1) Listen-Signal prüfen
if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) { if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) {
return null; return null;
} }
// 2) Guardrail: Nur DISCOVERY // 2) Nur DISCOVERY zulassen
$sales = $this->salesIntentLite->detect($prompt); $sales = $this->salesIntentLite->detect($prompt);
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY); $intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
@@ -47,7 +51,7 @@ final class CatalogIntentLite
return null; return null;
} }
// 3) Vector-basierte Tag-Suche (Top 3 für Ambiguity-Check) // 3) Vector-Search
$hits = $this->tagVectorClient->search($prompt, 3); $hits = $this->tagVectorClient->search($prompt, 3);
if ($hits === []) { if ($hits === []) {
@@ -61,7 +65,7 @@ final class CatalogIntentLite
return null; return null;
} }
// Ambiguity-Check // Ambiguity-Prüfung
if (isset($hits[1])) { if (isset($hits[1])) {
$secondScore = (float)($hits[1]['score'] ?? 0.0); $secondScore = (float)($hits[1]['score'] ?? 0.0);
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) { if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
@@ -69,13 +73,12 @@ final class CatalogIntentLite
} }
} }
// 4) Nur catalog_entity zulassen // Nur catalog_entity zulassen
if (($best['tag_type'] ?? null) !== 'catalog_entity') { if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
return null; return null;
} }
// 5) Canonical Label zurückgeben $label = trim((string)($best['label'] ?? ''));
$label = (string)($best['label'] ?? '');
if ($label === '') { if ($label === '') {
return null; return null;

View File

@@ -4,16 +4,6 @@ declare(strict_types=1);
namespace App\Intent; namespace App\Intent;
/**
* SalesIntentLite
*
* Deterministische Vertriebs-Intent-Erkennung.
* Kein LLM, kein ML, nur regelbasierte Klassifikation.
*
* WICHTIG:
* - Immer mit ORIGINAL-Prompt aufrufen.
* - Nicht mit gereinigter Query.
*/
final class SalesIntentLite final class SalesIntentLite
{ {
public const DISCOVERY = 'discovery'; public const DISCOVERY = 'discovery';
@@ -36,89 +26,78 @@ final class SalesIntentLite
]; ];
// ------------------------------------------------------------ // ------------------------------------------------------------
// PRICING // PRICING (stark gewichten)
// ------------------------------------------------------------ // ------------------------------------------------------------
$pricingWords = [ foreach ([
'preis', 'preise', 'kosten', 'lizenz', 'lizenzmodell', 'preis','preise','kosten','lizenz','lizenzmodell',
'paket', 'pakete', 'tarif', 'tarife', 'tarif','tarife','gebuehr','gebühr',
'gebühr', 'gebuehr', 'monatlich', 'jährlich', 'jaehrlich', 'monatlich','jaehrlich','jährlich','abo','subscription'
'abo', 'subscription' ] as $word) {
]; if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
$scores[self::PRICING] += 3;
foreach ($pricingWords as $word) {
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
$scores[self::PRICING] += 2;
} }
} }
// ------------------------------------------------------------ // ------------------------------------------------------------
// COMPARISON // COMPARISON (wichtiger für Katalog-Block)
// ------------------------------------------------------------ // ------------------------------------------------------------
$comparisonPatterns = [ foreach ([
'/\bvergleich\b/u', '/\bvergleich(en)?\b/u',
'/\bvs\b/u', '/\bvs\b/u',
'/\boder\b/u', '/\bgegenueber\b/u',
'/\balternative(n)?\b/u', '/\boder\b/u',
'/\bunterschied(e)?\b/u', '/\balternative(n)?\b/u',
'/\bbesser\b/u', '/\bunterschied(e)?\b/u',
]; '/\bbesser\b/u'
] as $pattern) {
foreach ($comparisonPatterns as $pattern) {
if (preg_match($pattern, $p)) { if (preg_match($pattern, $p)) {
$scores[self::COMPARISON] += 2; $scores[self::COMPARISON] += 3;
} }
} }
// ------------------------------------------------------------ // ------------------------------------------------------------
// OBJECTION // OBJECTION
// ------------------------------------------------------------ // ------------------------------------------------------------
$objectionWords = [ foreach ([
'problem', 'risiko', 'nachteil', 'datenschutz', 'problem','risiko','nachteil','datenschutz',
'dsgvo', 'sicherheit', 'compliance', 'dsgvo','sicherheit','compliance',
'kritik', 'zweifel', 'unsicher' 'kritik','zweifel','unsicher'
]; ] as $word) {
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
foreach ($objectionWords as $word) { $scores[self::OBJECTION] += 3;
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
$scores[self::OBJECTION] += 2;
} }
} }
// ------------------------------------------------------------ // ------------------------------------------------------------
// IMPLEMENTATION // IMPLEMENTATION (Intent-Verben stärker)
// ------------------------------------------------------------ // ------------------------------------------------------------
$implementationWords = [ foreach ([
'implementierung', 'einführung', 'einfuehrung', 'implementierung','implementieren',
'integration', 'aufwand', 'setup', 'integration','integrieren',
'rollout', 'migration', 'installation', 'einführung','einfuehrung',
'technisch', 'api', 'schnittstelle' 'aufwand','setup','rollout',
]; 'migration','installation',
'api','schnittstelle'
foreach ($implementationWords as $word) { ] as $word) {
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) { if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
$scores[self::IMPLEMENTATION] += 2; $scores[self::IMPLEMENTATION] += 3;
} }
} }
// ------------------------------------------------------------ // ------------------------------------------------------------
// ROI / Business Case // ROI (weniger generisch)
// ------------------------------------------------------------ // ------------------------------------------------------------
$roiWords = [ foreach ([
'roi', 'rentabilität', 'rentabilitaet', 'roi','rentabilitaet','rentabilität',
'business case', 'nutzen', 'business case','einsparung',
'effizienz', 'einsparung', 'umsatz', 'kosten senken','umsatz steigern',
'wert', 'vorteil' 'effizienz steigern'
]; ] as $word) {
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
foreach ($roiWords as $word) { $scores[self::ROI] += 3;
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
$scores[self::ROI] += 2;
} }
} }
// ------------------------------------------------------------
// Entscheidung
// ------------------------------------------------------------
arsort($scores); arsort($scores);
$topIntent = array_key_first($scores); $topIntent = array_key_first($scores);
@@ -141,20 +120,11 @@ final class SalesIntentLite
{ {
$s = mb_strtolower($s); $s = mb_strtolower($s);
$replacements = [ return strtr($s, [
'ä' => 'ae', 'ä'=>'ae',
'ö' => 'oe', 'ö'=>'oe',
'ü' => 'ue', 'ü'=>'ue',
'ß' => 'ss', 'ß'=>'ss'
]; ]);
foreach ($replacements as $umlaut => $alt) {
if (str_contains($s, $umlaut)) {
$s .= ' ' . str_replace($umlaut, $alt, $s);
break;
}
}
return $s;
} }
} }