cleanup ctalog intent

This commit is contained in:
team2
2026-03-03 07:55:03 +01:00
parent e7047cd885
commit 1f93238bf4

View File

@@ -11,7 +11,9 @@ use App\Tag\TagTypes;
/** /**
* CatalogIntentLite * CatalogIntentLite
* *
* Verantwortlich ausschließlich für: * Reiner Entity-Detector.
*
* Verantwortlich nur für:
* - Vector-Tag-Erkennung * - Vector-Tag-Erkennung
* - Score-Gate * - Score-Gate
* - Ambiguity-Check * - Ambiguity-Check
@@ -24,14 +26,27 @@ use App\Tag\TagTypes;
*/ */
final class CatalogIntentLite final class CatalogIntentLite
{ {
/**
* Minimaler Similarity-Score.
* Verhindert Rauschen.
*/
private const MIN_SCORE = 0.72; private const MIN_SCORE = 0.72;
private const AMBIGUITY_DELTA = 0.03;
/**
* Differenz zwischen Top1 und Top2,
* damit kein unsicherer Treffer akzeptiert wird.
*/
private const AMBIGUITY_DELTA = 0.02;
public function __construct( public function __construct(
private readonly TagVectorSearchClient $tagVectorClient, private readonly TagVectorSearchClient $tagVectorClient,
private readonly QueryCleaner $queryCleaner, private readonly QueryCleaner $queryCleaner,
) {} ) {}
/**
* Gibt das canonical Label der erkannten catalog_entity zurück
* oder null, wenn kein sauberer Treffer.
*/
public function detect(string $prompt): ?string public function detect(string $prompt): ?string
{ {
$prompt = trim($prompt); $prompt = trim($prompt);
@@ -39,82 +54,10 @@ final class CatalogIntentLite
return null; return null;
} }
$clean = $this->queryCleaner->clean($prompt); $promptTag = $this->queryCleaner->clean($prompt);
if ($clean === '') {
$clean = $prompt;
}
// ---------------------------------------------------- // 1) Tag-Vector-Suche
// 1⃣ Primär: Vollquery testen $hits = $this->tagVectorClient->search($promptTag, 3);
// ----------------------------------------------------
$label = $this->detectFromQuery($clean);
if ($label !== null) {
return $label;
}
// ----------------------------------------------------
// 2⃣ Fallback: Tokenweise testen
// (wichtig für "geräteliste testomat")
// ----------------------------------------------------
$tokens = $this->tokenize($clean);
$bestLabel = null;
$bestScore = 0.0;
foreach ($tokens as $token) {
// sehr kurze Tokens ignorieren (Noise)
if (mb_strlen($token) < 3) {
continue;
}
$hits = $this->tagVectorClient->search($token, 3);
if ($hits === []) {
continue;
}
$top = $hits[0] ?? null;
if (!is_array($top)) {
continue;
}
$score = (float)($top['score'] ?? 0.0);
if ($score < self::MIN_SCORE) {
continue;
}
// Ambiguity-Check
if (isset($hits[1])) {
$secondScore = (float)($hits[1]['score'] ?? 0.0);
if (abs($score - $secondScore) < self::AMBIGUITY_DELTA) {
continue;
}
}
if (($top['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
continue;
}
if ($score > $bestScore) {
$bestScore = $score;
$bestLabel = trim((string)($top['label'] ?? ''));
}
}
if ($bestLabel === null || $bestLabel === '') {
return null;
}
return mb_strtolower($bestLabel);
}
private function detectFromQuery(string $query): ?string
{
$hits = $this->tagVectorClient->search($query, 3);
if ($hits === []) { if ($hits === []) {
return null; return null;
@@ -123,21 +66,26 @@ final class CatalogIntentLite
$best = $hits[0]; $best = $hits[0];
$bestScore = (float)($best['score'] ?? 0.0); $bestScore = (float)($best['score'] ?? 0.0);
// 2) Score-Tags
if ($bestScore < self::MIN_SCORE) { if ($bestScore < self::MIN_SCORE) {
return null; return null;
} }
// 3) Ambiguity-Check
if (isset($hits[1])) { if (isset($hits[1])) {
$secondScore = (float)($hits[1]['score'] ?? 0.0); $secondScore = (float)($hits[1]['score'] ?? 0.0);
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) { if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
return null; return null;
} }
} }
// 4) Nur catalog_entity zulassen
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) { if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
return null; return null;
} }
// 5) Canonical Label
$label = trim((string)($best['label'] ?? '')); $label = trim((string)($best['label'] ?? ''));
if ($label === '') { if ($label === '') {
@@ -146,31 +94,4 @@ final class CatalogIntentLite
return mb_strtolower($label); return mb_strtolower($label);
} }
private function tokenize(string $text): array
{
$parts = preg_split('/\s+/u', trim($text));
if (!$parts) {
return [];
}
$seen = [];
$out = [];
foreach ($parts as $p) {
$p = trim($p);
if ($p === '') {
continue;
}
if (isset($seen[$p])) {
continue;
}
$seen[$p] = true;
$out[] = $p;
}
return $out;
}
} }