optimize catalog semantic match sby tags
This commit is contained in:
@@ -5,6 +5,7 @@ declare(strict_types=1);
|
|||||||
namespace App\Intent;
|
namespace App\Intent;
|
||||||
|
|
||||||
use App\Tag\TagVectorSearchClient;
|
use App\Tag\TagVectorSearchClient;
|
||||||
|
use App\Tag\TagTypes;
|
||||||
|
|
||||||
final class CatalogIntentLite
|
final class CatalogIntentLite
|
||||||
{
|
{
|
||||||
@@ -22,8 +23,11 @@ final class CatalogIntentLite
|
|||||||
'alle',
|
'alle',
|
||||||
];
|
];
|
||||||
|
|
||||||
private const MIN_SCORE = 0.60;
|
// Realistischer Gate-Wert
|
||||||
private const AMBIGUITY_DELTA = 0.05;
|
private const MIN_SCORE = 0.50;
|
||||||
|
|
||||||
|
// Ambiguity darf nicht zu aggressiv sein
|
||||||
|
private const AMBIGUITY_DELTA = 0.01;
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private readonly SalesIntentLite $salesIntentLite,
|
private readonly SalesIntentLite $salesIntentLite,
|
||||||
@@ -34,12 +38,12 @@ final class CatalogIntentLite
|
|||||||
{
|
{
|
||||||
$normalizedPrompt = mb_strtolower($prompt);
|
$normalizedPrompt = mb_strtolower($prompt);
|
||||||
|
|
||||||
// 1) Muss Listen-Signal enthalten
|
// 1) Listen-Signal prüfen
|
||||||
if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) {
|
if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2) Guardrail: Nur DISCOVERY
|
// 2) Nur DISCOVERY zulassen
|
||||||
$sales = $this->salesIntentLite->detect($prompt);
|
$sales = $this->salesIntentLite->detect($prompt);
|
||||||
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
|
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
|
||||||
|
|
||||||
@@ -47,7 +51,7 @@ final class CatalogIntentLite
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3) Vector-basierte Tag-Suche (Top 3 für Ambiguity-Check)
|
// 3) Vector-Search
|
||||||
$hits = $this->tagVectorClient->search($prompt, 3);
|
$hits = $this->tagVectorClient->search($prompt, 3);
|
||||||
|
|
||||||
if ($hits === []) {
|
if ($hits === []) {
|
||||||
@@ -61,7 +65,7 @@ final class CatalogIntentLite
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ambiguity-Check
|
// Ambiguity-Prüfung
|
||||||
if (isset($hits[1])) {
|
if (isset($hits[1])) {
|
||||||
$secondScore = (float)($hits[1]['score'] ?? 0.0);
|
$secondScore = (float)($hits[1]['score'] ?? 0.0);
|
||||||
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
|
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
|
||||||
@@ -69,13 +73,12 @@ final class CatalogIntentLite
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4) Nur catalog_entity zulassen
|
// Nur catalog_entity zulassen
|
||||||
if (($best['tag_type'] ?? null) !== 'catalog_entity') {
|
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5) Canonical Label zurückgeben
|
$label = trim((string)($best['label'] ?? ''));
|
||||||
$label = (string)($best['label'] ?? '');
|
|
||||||
|
|
||||||
if ($label === '') {
|
if ($label === '') {
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
@@ -4,16 +4,6 @@ declare(strict_types=1);
|
|||||||
|
|
||||||
namespace App\Intent;
|
namespace App\Intent;
|
||||||
|
|
||||||
/**
|
|
||||||
* SalesIntentLite
|
|
||||||
*
|
|
||||||
* Deterministische Vertriebs-Intent-Erkennung.
|
|
||||||
* Kein LLM, kein ML, nur regelbasierte Klassifikation.
|
|
||||||
*
|
|
||||||
* WICHTIG:
|
|
||||||
* - Immer mit ORIGINAL-Prompt aufrufen.
|
|
||||||
* - Nicht mit gereinigter Query.
|
|
||||||
*/
|
|
||||||
final class SalesIntentLite
|
final class SalesIntentLite
|
||||||
{
|
{
|
||||||
public const DISCOVERY = 'discovery';
|
public const DISCOVERY = 'discovery';
|
||||||
@@ -36,89 +26,78 @@ final class SalesIntentLite
|
|||||||
];
|
];
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// PRICING
|
// PRICING (stark gewichten)
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
$pricingWords = [
|
foreach ([
|
||||||
'preis','preise','kosten','lizenz','lizenzmodell',
|
'preis','preise','kosten','lizenz','lizenzmodell',
|
||||||
'paket', 'pakete', 'tarif', 'tarife',
|
'tarif','tarife','gebuehr','gebühr',
|
||||||
'gebühr', 'gebuehr', 'monatlich', 'jährlich', 'jaehrlich',
|
'monatlich','jaehrlich','jährlich','abo','subscription'
|
||||||
'abo', 'subscription'
|
] as $word) {
|
||||||
];
|
|
||||||
|
|
||||||
foreach ($pricingWords as $word) {
|
|
||||||
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
||||||
$scores[self::PRICING] += 2;
|
$scores[self::PRICING] += 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// COMPARISON
|
// COMPARISON (wichtiger für Katalog-Block)
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
$comparisonPatterns = [
|
foreach ([
|
||||||
'/\bvergleich\b/u',
|
'/\bvergleich(en)?\b/u',
|
||||||
'/\bvs\b/u',
|
'/\bvs\b/u',
|
||||||
|
'/\bgegenueber\b/u',
|
||||||
'/\boder\b/u',
|
'/\boder\b/u',
|
||||||
'/\balternative(n)?\b/u',
|
'/\balternative(n)?\b/u',
|
||||||
'/\bunterschied(e)?\b/u',
|
'/\bunterschied(e)?\b/u',
|
||||||
'/\bbesser\b/u',
|
'/\bbesser\b/u'
|
||||||
];
|
] as $pattern) {
|
||||||
|
|
||||||
foreach ($comparisonPatterns as $pattern) {
|
|
||||||
if (preg_match($pattern, $p)) {
|
if (preg_match($pattern, $p)) {
|
||||||
$scores[self::COMPARISON] += 2;
|
$scores[self::COMPARISON] += 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// OBJECTION
|
// OBJECTION
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
$objectionWords = [
|
foreach ([
|
||||||
'problem','risiko','nachteil','datenschutz',
|
'problem','risiko','nachteil','datenschutz',
|
||||||
'dsgvo','sicherheit','compliance',
|
'dsgvo','sicherheit','compliance',
|
||||||
'kritik','zweifel','unsicher'
|
'kritik','zweifel','unsicher'
|
||||||
];
|
] as $word) {
|
||||||
|
|
||||||
foreach ($objectionWords as $word) {
|
|
||||||
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
||||||
$scores[self::OBJECTION] += 2;
|
$scores[self::OBJECTION] += 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// IMPLEMENTATION
|
// IMPLEMENTATION (Intent-Verben stärker)
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
$implementationWords = [
|
foreach ([
|
||||||
'implementierung', 'einführung', 'einfuehrung',
|
'implementierung','implementieren',
|
||||||
'integration', 'aufwand', 'setup',
|
'integration','integrieren',
|
||||||
'rollout', 'migration', 'installation',
|
'einführung','einfuehrung',
|
||||||
'technisch', 'api', 'schnittstelle'
|
'aufwand','setup','rollout',
|
||||||
];
|
'migration','installation',
|
||||||
|
'api','schnittstelle'
|
||||||
foreach ($implementationWords as $word) {
|
] as $word) {
|
||||||
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
||||||
$scores[self::IMPLEMENTATION] += 2;
|
$scores[self::IMPLEMENTATION] += 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// ROI / Business Case
|
// ROI (weniger generisch)
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
$roiWords = [
|
foreach ([
|
||||||
'roi', 'rentabilität', 'rentabilitaet',
|
'roi','rentabilitaet','rentabilität',
|
||||||
'business case', 'nutzen',
|
'business case','einsparung',
|
||||||
'effizienz', 'einsparung', 'umsatz',
|
'kosten senken','umsatz steigern',
|
||||||
'wert', 'vorteil'
|
'effizienz steigern'
|
||||||
];
|
] as $word) {
|
||||||
|
|
||||||
foreach ($roiWords as $word) {
|
|
||||||
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) {
|
||||||
$scores[self::ROI] += 2;
|
$scores[self::ROI] += 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
|
||||||
// Entscheidung
|
|
||||||
// ------------------------------------------------------------
|
|
||||||
arsort($scores);
|
arsort($scores);
|
||||||
|
|
||||||
$topIntent = array_key_first($scores);
|
$topIntent = array_key_first($scores);
|
||||||
@@ -141,20 +120,11 @@ final class SalesIntentLite
|
|||||||
{
|
{
|
||||||
$s = mb_strtolower($s);
|
$s = mb_strtolower($s);
|
||||||
|
|
||||||
$replacements = [
|
return strtr($s, [
|
||||||
'ä'=>'ae',
|
'ä'=>'ae',
|
||||||
'ö'=>'oe',
|
'ö'=>'oe',
|
||||||
'ü'=>'ue',
|
'ü'=>'ue',
|
||||||
'ß' => 'ss',
|
'ß'=>'ss'
|
||||||
];
|
]);
|
||||||
|
|
||||||
foreach ($replacements as $umlaut => $alt) {
|
|
||||||
if (str_contains($s, $umlaut)) {
|
|
||||||
$s .= ' ' . str_replace($umlaut, $alt, $s);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $s;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user