optimize catalog semantic match sby tags

This commit is contained in:
team2
2026-02-28 18:48:38 +01:00
parent 0d3f6e21d6
commit ff01919b30
2 changed files with 65 additions and 92 deletions

View File

@@ -5,6 +5,7 @@ declare(strict_types=1);
namespace App\Intent;
use App\Tag\TagVectorSearchClient;
use App\Tag\TagTypes;
final class CatalogIntentLite
{
@@ -22,8 +23,11 @@ final class CatalogIntentLite
'alle',
];
private const MIN_SCORE = 0.60;
private const AMBIGUITY_DELTA = 0.05;
// Realistischer Gate-Wert
private const MIN_SCORE = 0.50;
// Ambiguity darf nicht zu aggressiv sein
private const AMBIGUITY_DELTA = 0.01;
public function __construct(
private readonly SalesIntentLite $salesIntentLite,
@@ -34,12 +38,12 @@ final class CatalogIntentLite
{
$normalizedPrompt = mb_strtolower($prompt);
// 1) Muss Listen-Signal enthalten
// 1) Listen-Signal prüfen
if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) {
return null;
}
// 2) Guardrail: Nur DISCOVERY
// 2) Nur DISCOVERY zulassen
$sales = $this->salesIntentLite->detect($prompt);
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
@@ -47,7 +51,7 @@ final class CatalogIntentLite
return null;
}
// 3) Vector-basierte Tag-Suche (Top 3 für Ambiguity-Check)
// 3) Vector-Search
$hits = $this->tagVectorClient->search($prompt, 3);
if ($hits === []) {
@@ -61,7 +65,7 @@ final class CatalogIntentLite
return null;
}
// Ambiguity-Check
// Ambiguity-Prüfung
if (isset($hits[1])) {
$secondScore = (float)($hits[1]['score'] ?? 0.0);
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
@@ -69,13 +73,12 @@ final class CatalogIntentLite
}
}
// 4) Nur catalog_entity zulassen
if (($best['tag_type'] ?? null) !== 'catalog_entity') {
// Nur catalog_entity zulassen
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
return null;
}
// 5) Canonical Label zurückgeben
$label = (string)($best['label'] ?? '');
$label = trim((string)($best['label'] ?? ''));
if ($label === '') {
return null;

View File

@@ -4,16 +4,6 @@ declare(strict_types=1);
namespace App\Intent;
/**
* SalesIntentLite
*
* Deterministische Vertriebs-Intent-Erkennung.
* Kein LLM, kein ML, nur regelbasierte Klassifikation.
*
* WICHTIG:
* - Immer mit ORIGINAL-Prompt aufrufen.
* - Nicht mit gereinigter Query.
*/
final class SalesIntentLite
{
public const DISCOVERY = 'discovery';
@@ -36,89 +26,78 @@ final class SalesIntentLite
];
// ------------------------------------------------------------
// PRICING
// PRICING (stark gewichten)
// ------------------------------------------------------------
$pricingWords = [
'preis', 'preise', 'kosten', 'lizenz', 'lizenzmodell',
'paket', 'pakete', 'tarif', 'tarife',
'gebühr', 'gebuehr', 'monatlich', 'jährlich', 'jaehrlich',
'abo', 'subscription'
];
foreach ($pricingWords as $word) {
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
$scores[self::PRICING] += 2;
foreach ([
'preis','preise','kosten','lizenz','lizenzmodell',
'tarif','tarife','gebuehr','gebühr',
'monatlich','jaehrlich','jährlich','abo','subscription'
] as $word) {
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
$scores[self::PRICING] += 3;
}
}
// ------------------------------------------------------------
// COMPARISON
// COMPARISON (wichtiger für Katalog-Block)
// ------------------------------------------------------------
$comparisonPatterns = [
'/\bvergleich\b/u',
'/\bvs\b/u',
'/\boder\b/u',
'/\balternative(n)?\b/u',
'/\bunterschied(e)?\b/u',
'/\bbesser\b/u',
];
foreach ($comparisonPatterns as $pattern) {
foreach ([
'/\bvergleich(en)?\b/u',
'/\bvs\b/u',
'/\bgegenueber\b/u',
'/\boder\b/u',
'/\balternative(n)?\b/u',
'/\bunterschied(e)?\b/u',
'/\bbesser\b/u'
] as $pattern) {
if (preg_match($pattern, $p)) {
$scores[self::COMPARISON] += 2;
$scores[self::COMPARISON] += 3;
}
}
// ------------------------------------------------------------
// OBJECTION
// ------------------------------------------------------------
$objectionWords = [
'problem', 'risiko', 'nachteil', 'datenschutz',
'dsgvo', 'sicherheit', 'compliance',
'kritik', 'zweifel', 'unsicher'
];
foreach ($objectionWords as $word) {
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
$scores[self::OBJECTION] += 2;
foreach ([
'problem','risiko','nachteil','datenschutz',
'dsgvo','sicherheit','compliance',
'kritik','zweifel','unsicher'
] as $word) {
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
$scores[self::OBJECTION] += 3;
}
}
// ------------------------------------------------------------
// IMPLEMENTATION
// IMPLEMENTATION (Intent-Verben stärker)
// ------------------------------------------------------------
$implementationWords = [
'implementierung', 'einführung', 'einfuehrung',
'integration', 'aufwand', 'setup',
'rollout', 'migration', 'installation',
'technisch', 'api', 'schnittstelle'
];
foreach ($implementationWords as $word) {
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
$scores[self::IMPLEMENTATION] += 2;
foreach ([
'implementierung','implementieren',
'integration','integrieren',
'einführung','einfuehrung',
'aufwand','setup','rollout',
'migration','installation',
'api','schnittstelle'
] as $word) {
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
$scores[self::IMPLEMENTATION] += 3;
}
}
// ------------------------------------------------------------
// ROI / Business Case
// ROI (weniger generisch)
// ------------------------------------------------------------
$roiWords = [
'roi', 'rentabilität', 'rentabilitaet',
'business case', 'nutzen',
'effizienz', 'einsparung', 'umsatz',
'wert', 'vorteil'
];
foreach ($roiWords as $word) {
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
$scores[self::ROI] += 2;
foreach ([
'roi','rentabilitaet','rentabilität',
'business case','einsparung',
'kosten senken','umsatz steigern',
'effizienz steigern'
] as $word) {
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
$scores[self::ROI] += 3;
}
}
// ------------------------------------------------------------
// Entscheidung
// ------------------------------------------------------------
arsort($scores);
$topIntent = array_key_first($scores);
@@ -141,20 +120,11 @@ final class SalesIntentLite
{
$s = mb_strtolower($s);
$replacements = [
'ä' => 'ae',
'ö' => 'oe',
'ü' => 'ue',
'ß' => 'ss',
];
foreach ($replacements as $umlaut => $alt) {
if (str_contains($s, $umlaut)) {
$s .= ' ' . str_replace($umlaut, $alt, $s);
break;
}
}
return $s;
return strtr($s, [
'ä'=>'ae',
'ö'=>'oe',
'ü'=>'ue',
'ß'=>'ss'
]);
}
}