optimize catalog semantic match sby tags
This commit is contained in:
@@ -5,6 +5,7 @@ declare(strict_types=1);
|
||||
namespace App\Intent;
|
||||
|
||||
use App\Tag\TagVectorSearchClient;
|
||||
use App\Tag\TagTypes;
|
||||
|
||||
final class CatalogIntentLite
|
||||
{
|
||||
@@ -22,8 +23,11 @@ final class CatalogIntentLite
|
||||
'alle',
|
||||
];
|
||||
|
||||
private const MIN_SCORE = 0.60;
|
||||
private const AMBIGUITY_DELTA = 0.05;
|
||||
// Realistischer Gate-Wert
|
||||
private const MIN_SCORE = 0.50;
|
||||
|
||||
// Ambiguity darf nicht zu aggressiv sein
|
||||
private const AMBIGUITY_DELTA = 0.01;
|
||||
|
||||
public function __construct(
|
||||
private readonly SalesIntentLite $salesIntentLite,
|
||||
@@ -34,12 +38,12 @@ final class CatalogIntentLite
|
||||
{
|
||||
$normalizedPrompt = mb_strtolower($prompt);
|
||||
|
||||
// 1) Muss Listen-Signal enthalten
|
||||
// 1) Listen-Signal prüfen
|
||||
if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 2) Guardrail: Nur DISCOVERY
|
||||
// 2) Nur DISCOVERY zulassen
|
||||
$sales = $this->salesIntentLite->detect($prompt);
|
||||
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
|
||||
|
||||
@@ -47,7 +51,7 @@ final class CatalogIntentLite
|
||||
return null;
|
||||
}
|
||||
|
||||
// 3) Vector-basierte Tag-Suche (Top 3 für Ambiguity-Check)
|
||||
// 3) Vector-Search
|
||||
$hits = $this->tagVectorClient->search($prompt, 3);
|
||||
|
||||
if ($hits === []) {
|
||||
@@ -61,7 +65,7 @@ final class CatalogIntentLite
|
||||
return null;
|
||||
}
|
||||
|
||||
// Ambiguity-Check
|
||||
// Ambiguity-Prüfung
|
||||
if (isset($hits[1])) {
|
||||
$secondScore = (float)($hits[1]['score'] ?? 0.0);
|
||||
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
|
||||
@@ -69,13 +73,12 @@ final class CatalogIntentLite
|
||||
}
|
||||
}
|
||||
|
||||
// 4) Nur catalog_entity zulassen
|
||||
if (($best['tag_type'] ?? null) !== 'catalog_entity') {
|
||||
// Nur catalog_entity zulassen
|
||||
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 5) Canonical Label zurückgeben
|
||||
$label = (string)($best['label'] ?? '');
|
||||
$label = trim((string)($best['label'] ?? ''));
|
||||
|
||||
if ($label === '') {
|
||||
return null;
|
||||
|
||||
@@ -4,16 +4,6 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Intent;
|
||||
|
||||
/**
|
||||
* SalesIntentLite
|
||||
*
|
||||
* Deterministische Vertriebs-Intent-Erkennung.
|
||||
* Kein LLM, kein ML, nur regelbasierte Klassifikation.
|
||||
*
|
||||
* WICHTIG:
|
||||
* - Immer mit ORIGINAL-Prompt aufrufen.
|
||||
* - Nicht mit gereinigter Query.
|
||||
*/
|
||||
final class SalesIntentLite
|
||||
{
|
||||
public const DISCOVERY = 'discovery';
|
||||
@@ -36,89 +26,78 @@ final class SalesIntentLite
|
||||
];
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// PRICING
|
||||
// PRICING (stark gewichten)
|
||||
// ------------------------------------------------------------
|
||||
$pricingWords = [
|
||||
'preis', 'preise', 'kosten', 'lizenz', 'lizenzmodell',
|
||||
'paket', 'pakete', 'tarif', 'tarife',
|
||||
'gebühr', 'gebuehr', 'monatlich', 'jährlich', 'jaehrlich',
|
||||
'abo', 'subscription'
|
||||
];
|
||||
|
||||
foreach ($pricingWords as $word) {
|
||||
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
|
||||
$scores[self::PRICING] += 2;
|
||||
foreach ([
|
||||
'preis','preise','kosten','lizenz','lizenzmodell',
|
||||
'tarif','tarife','gebuehr','gebühr',
|
||||
'monatlich','jaehrlich','jährlich','abo','subscription'
|
||||
] as $word) {
|
||||
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
||||
$scores[self::PRICING] += 3;
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// COMPARISON
|
||||
// COMPARISON (wichtiger für Katalog-Block)
|
||||
// ------------------------------------------------------------
|
||||
$comparisonPatterns = [
|
||||
'/\bvergleich\b/u',
|
||||
'/\bvs\b/u',
|
||||
'/\boder\b/u',
|
||||
'/\balternative(n)?\b/u',
|
||||
'/\bunterschied(e)?\b/u',
|
||||
'/\bbesser\b/u',
|
||||
];
|
||||
|
||||
foreach ($comparisonPatterns as $pattern) {
|
||||
foreach ([
|
||||
'/\bvergleich(en)?\b/u',
|
||||
'/\bvs\b/u',
|
||||
'/\bgegenueber\b/u',
|
||||
'/\boder\b/u',
|
||||
'/\balternative(n)?\b/u',
|
||||
'/\bunterschied(e)?\b/u',
|
||||
'/\bbesser\b/u'
|
||||
] as $pattern) {
|
||||
if (preg_match($pattern, $p)) {
|
||||
$scores[self::COMPARISON] += 2;
|
||||
$scores[self::COMPARISON] += 3;
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// OBJECTION
|
||||
// ------------------------------------------------------------
|
||||
$objectionWords = [
|
||||
'problem', 'risiko', 'nachteil', 'datenschutz',
|
||||
'dsgvo', 'sicherheit', 'compliance',
|
||||
'kritik', 'zweifel', 'unsicher'
|
||||
];
|
||||
|
||||
foreach ($objectionWords as $word) {
|
||||
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
|
||||
$scores[self::OBJECTION] += 2;
|
||||
foreach ([
|
||||
'problem','risiko','nachteil','datenschutz',
|
||||
'dsgvo','sicherheit','compliance',
|
||||
'kritik','zweifel','unsicher'
|
||||
] as $word) {
|
||||
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
||||
$scores[self::OBJECTION] += 3;
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// IMPLEMENTATION
|
||||
// IMPLEMENTATION (Intent-Verben stärker)
|
||||
// ------------------------------------------------------------
|
||||
$implementationWords = [
|
||||
'implementierung', 'einführung', 'einfuehrung',
|
||||
'integration', 'aufwand', 'setup',
|
||||
'rollout', 'migration', 'installation',
|
||||
'technisch', 'api', 'schnittstelle'
|
||||
];
|
||||
|
||||
foreach ($implementationWords as $word) {
|
||||
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
|
||||
$scores[self::IMPLEMENTATION] += 2;
|
||||
foreach ([
|
||||
'implementierung','implementieren',
|
||||
'integration','integrieren',
|
||||
'einführung','einfuehrung',
|
||||
'aufwand','setup','rollout',
|
||||
'migration','installation',
|
||||
'api','schnittstelle'
|
||||
] as $word) {
|
||||
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
||||
$scores[self::IMPLEMENTATION] += 3;
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// ROI / Business Case
|
||||
// ROI (weniger generisch)
|
||||
// ------------------------------------------------------------
|
||||
$roiWords = [
|
||||
'roi', 'rentabilität', 'rentabilitaet',
|
||||
'business case', 'nutzen',
|
||||
'effizienz', 'einsparung', 'umsatz',
|
||||
'wert', 'vorteil'
|
||||
];
|
||||
|
||||
foreach ($roiWords as $word) {
|
||||
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) {
|
||||
$scores[self::ROI] += 2;
|
||||
foreach ([
|
||||
'roi','rentabilitaet','rentabilität',
|
||||
'business case','einsparung',
|
||||
'kosten senken','umsatz steigern',
|
||||
'effizienz steigern'
|
||||
] as $word) {
|
||||
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
||||
$scores[self::ROI] += 3;
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Entscheidung
|
||||
// ------------------------------------------------------------
|
||||
arsort($scores);
|
||||
|
||||
$topIntent = array_key_first($scores);
|
||||
@@ -141,20 +120,11 @@ final class SalesIntentLite
|
||||
{
|
||||
$s = mb_strtolower($s);
|
||||
|
||||
$replacements = [
|
||||
'ä' => 'ae',
|
||||
'ö' => 'oe',
|
||||
'ü' => 'ue',
|
||||
'ß' => 'ss',
|
||||
];
|
||||
|
||||
foreach ($replacements as $umlaut => $alt) {
|
||||
if (str_contains($s, $umlaut)) {
|
||||
$s .= ' ' . str_replace($umlaut, $alt, $s);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $s;
|
||||
return strtr($s, [
|
||||
'ä'=>'ae',
|
||||
'ö'=>'oe',
|
||||
'ü'=>'ue',
|
||||
'ß'=>'ss'
|
||||
]);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user