optimize catalog semantic match sby tags
This commit is contained in:
@@ -4,27 +4,10 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Intent;
|
||||
|
||||
/**
|
||||
* CatalogIntentLite
|
||||
*
|
||||
* Minimal, deterministische Erkennung von Katalog-/Entity-Listenanfragen.
|
||||
*
|
||||
* Ziel:
|
||||
* - "Liste aller Geräte" / "Welche Indikatoren gibt es?" / "Zeig mir alle Funktionen"
|
||||
*
|
||||
* Guardrails:
|
||||
* - Kein Catalog-Mode bei Sales-/Pricing-/Comparison-/ROI-/Implementation-/Objection-Intents.
|
||||
* - Kein Catalog-Mode ohne expliziten Entity-Term.
|
||||
*
|
||||
* WICHTIG:
|
||||
* - Immer mit ORIGINAL-Prompt aufrufen.
|
||||
* - Kein LLM, kein ML.
|
||||
*/
|
||||
use App\Tag\TagVectorSearchClient;
|
||||
|
||||
final class CatalogIntentLite
|
||||
{
|
||||
/**
|
||||
* Listensignale (leichtgewichtig) – IntentLite bleibt weiterhin für "allgemeine" List Detection zuständig.
|
||||
*/
|
||||
private const LIST_SIGNALS = [
|
||||
'liste',
|
||||
'auflisten',
|
||||
@@ -39,100 +22,76 @@ final class CatalogIntentLite
|
||||
'alle',
|
||||
];
|
||||
|
||||
/**
|
||||
* Entity-Terms, die wir als Katalogtypen unterstützen.
|
||||
*
|
||||
* Left side: canonical term (für Tag-Suche)
|
||||
* Right side: Such-Synonyme, die im Prompt vorkommen dürfen.
|
||||
*/
|
||||
private const ENTITY_TERMS = [
|
||||
'geräte' => ['gerät', 'geräte', 'geraet', 'geraete', 'device', 'devices'],
|
||||
'indikatoren' => ['indikator', 'indikatoren', 'indicator', 'indicators'],
|
||||
'funktionen' => ['funktion', 'funktionen', 'feature', 'features', 'funktionalität', 'funktionalitaet'],
|
||||
'zubehör' => ['zubehör', 'zubehoer', 'accessory', 'accessories', 'zubehor'],
|
||||
];
|
||||
private const MIN_SCORE = 0.60;
|
||||
private const AMBIGUITY_DELTA = 0.05;
|
||||
|
||||
public function __construct(
|
||||
private readonly SalesIntentLite $salesIntentLite,
|
||||
private readonly TagVectorSearchClient $tagVectorClient,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* @return string|null canonical entity term (z. B. "geräte") oder null wenn kein Catalog-Intent.
|
||||
*/
|
||||
public function detect(string $originalPrompt): ?string
|
||||
public function detect(string $prompt): ?string
|
||||
{
|
||||
$p = $this->normalize($originalPrompt);
|
||||
$normalizedPrompt = mb_strtolower($prompt);
|
||||
|
||||
// 1) Muss ein Listen-Signal enthalten
|
||||
if (!$this->containsAny($p, self::LIST_SIGNALS)) {
|
||||
// 1) Muss Listen-Signal enthalten
|
||||
if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 2) Guardrail: Kein Catalog-Mode bei Sales-Intents
|
||||
$sales = $this->salesIntentLite->detect($originalPrompt);
|
||||
// 2) Guardrail: Nur DISCOVERY
|
||||
$sales = $this->salesIntentLite->detect($prompt);
|
||||
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
|
||||
|
||||
if ($intent !== SalesIntentLite::DISCOVERY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 3) Expliziten Entity-Term extrahieren (sonst kein Catalog)
|
||||
foreach (self::ENTITY_TERMS as $canonical => $synonyms) {
|
||||
foreach ($synonyms as $syn) {
|
||||
if ($this->containsWord($p, $syn)) {
|
||||
return $canonical;
|
||||
}
|
||||
// 3) Vector-basierte Tag-Suche (Top 3 für Ambiguity-Check)
|
||||
$hits = $this->tagVectorClient->search($prompt, 3);
|
||||
|
||||
if ($hits === []) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$best = $hits[0];
|
||||
$bestScore = (float)($best['score'] ?? 0.0);
|
||||
|
||||
if ($bestScore < self::MIN_SCORE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Ambiguity-Check
|
||||
if (isset($hits[1])) {
|
||||
$secondScore = (float)($hits[1]['score'] ?? 0.0);
|
||||
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
// 4) Nur catalog_entity zulassen
|
||||
if (($best['tag_type'] ?? null) !== 'catalog_entity') {
|
||||
return null;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Helpers
|
||||
// ------------------------------------------------------------
|
||||
// 5) Canonical Label zurückgeben
|
||||
$label = (string)($best['label'] ?? '');
|
||||
|
||||
if ($label === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
return mb_strtolower($label);
|
||||
}
|
||||
|
||||
private function containsAny(string $haystack, array $needles): bool
|
||||
{
|
||||
foreach ($needles as $needle) {
|
||||
if ($needle === '') {
|
||||
continue;
|
||||
}
|
||||
if (str_contains($haystack, $needle)) {
|
||||
if ($needle !== '' && str_contains($haystack, $needle)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function containsWord(string $haystack, string $word): bool
|
||||
{
|
||||
$word = trim($word);
|
||||
if ($word === '') {
|
||||
return false;
|
||||
}
|
||||
return preg_match('/\b' . preg_quote($word, '/') . '\b/u', $haystack) === 1;
|
||||
}
|
||||
|
||||
private function normalize(string $s): string
|
||||
{
|
||||
$s = mb_strtolower($s);
|
||||
|
||||
// Umlaute absichern (analog IntentLite/SalesIntentLite)
|
||||
$replacements = [
|
||||
'ä' => 'ae',
|
||||
'ö' => 'oe',
|
||||
'ü' => 'ue',
|
||||
'ß' => 'ss',
|
||||
];
|
||||
|
||||
foreach ($replacements as $umlaut => $alt) {
|
||||
if (str_contains($s, $umlaut)) {
|
||||
$s .= ' ' . str_replace($umlaut, $alt, $s);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $s;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user