first commit
This commit is contained in:
@@ -6,82 +6,132 @@ namespace App\Intent;
|
||||
|
||||
use App\Config\CatalogIntentConfig;
|
||||
use App\Knowledge\Retrieval\QueryCleaner;
|
||||
use App\Tag\TagVectorSearchClient;
|
||||
use App\Tag\TagTypes;
|
||||
use App\Tag\TagVectorSearchClient;
|
||||
|
||||
/**
|
||||
* CatalogIntentLite
|
||||
* Lightweight catalog entity detector.
|
||||
*
|
||||
* Reiner Entity-Detector.
|
||||
*
|
||||
* Verantwortlich nur für:
|
||||
* - Vector-Tag-Erkennung
|
||||
* - Score-Gate
|
||||
* - Ambiguity-Check
|
||||
* - Sicherstellen, dass TagType = catalog_entity
|
||||
*
|
||||
* KEIN:
|
||||
* - Listen-Signal
|
||||
* - SalesIntent
|
||||
* - Routing
|
||||
* Responsibilities:
|
||||
* - clean the user query for tag lookup
|
||||
* - query the tag vector index
|
||||
* - keep only catalog_entity hits
|
||||
* - apply confidence and ambiguity gates
|
||||
* - return one canonical entity label or null
|
||||
*/
|
||||
final readonly class CatalogIntentLite
|
||||
{
|
||||
/**
|
||||
* Slightly wider than the old top-3 search so generic tags do not crowd out
|
||||
* relevant catalog_entity hits too easily.
|
||||
*/
|
||||
private const SEARCH_LIMIT = 6;
|
||||
|
||||
public function __construct(
|
||||
private TagVectorSearchClient $tagVectorClient,
|
||||
private QueryCleaner $queryCleaner
|
||||
) {}
|
||||
private QueryCleaner $queryCleaner,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gibt das canonical Label der erkannten catalog_entity zurück
|
||||
* oder null, wenn kein sauberer Treffer.
|
||||
* Returns the canonical normalized label of the detected catalog entity,
|
||||
* or null when no safe entity match exists.
|
||||
*/
|
||||
public function detect(string $prompt): ?string
|
||||
{
|
||||
$prompt = trim($prompt);
|
||||
|
||||
if ($prompt === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$promptTag = $this->queryCleaner->clean($prompt);
|
||||
$cleanQuery = trim($this->queryCleaner->clean($prompt));
|
||||
|
||||
// 1) Tag-Vector-Suche
|
||||
$hits = $this->tagVectorClient->search($promptTag, 3);
|
||||
|
||||
if ($hits === []) {
|
||||
if ($cleanQuery === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$best = $hits[0];
|
||||
$bestScore = (float)($best['score'] ?? 0.0);
|
||||
$catalogHits = $this->filterCatalogEntityHits(
|
||||
$this->tagVectorClient->search($cleanQuery, self::SEARCH_LIMIT)
|
||||
);
|
||||
|
||||
if ($catalogHits === []) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$best = $catalogHits[0];
|
||||
$bestScore = (float) ($best['score'] ?? 0.0);
|
||||
|
||||
// 2) Score-Tags
|
||||
if ($bestScore < CatalogIntentConfig::MIN_SCORE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 3) Ambiguity-Check
|
||||
if (isset($hits[1])) {
|
||||
$secondScore = (float)($hits[1]['score'] ?? 0.0);
|
||||
if (isset($catalogHits[1])) {
|
||||
$secondScore = (float) ($catalogHits[1]['score'] ?? 0.0);
|
||||
|
||||
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// 4) Nur catalog_entity zulassen
|
||||
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
|
||||
return null;
|
||||
$label = $this->normalizeLabel((string) ($best['label'] ?? ''));
|
||||
|
||||
return $label !== '' ? $label : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array{
|
||||
* tag_id:string,
|
||||
* score:float,
|
||||
* label?:string,
|
||||
* tag_type?:string
|
||||
* }> $hits
|
||||
*
|
||||
* @return list<array{
|
||||
* tag_id:string,
|
||||
* score:float,
|
||||
* label?:string,
|
||||
* tag_type:string
|
||||
* }>
|
||||
*/
|
||||
private function filterCatalogEntityHits(array $hits): array
|
||||
{
|
||||
$filtered = [];
|
||||
|
||||
foreach ($hits as $hit) {
|
||||
$tagId = trim((string) ($hit['tag_id'] ?? ''));
|
||||
$score = (float) ($hit['score'] ?? 0.0);
|
||||
$tagType = TagTypes::normalize((string) ($hit['tag_type'] ?? TagTypes::GENERIC));
|
||||
|
||||
if ($tagId === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($tagType !== TagTypes::CATALOG_ENTITY) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$filtered[] = [
|
||||
'tag_id' => $tagId,
|
||||
'score' => $score,
|
||||
'label' => isset($hit['label']) ? (string) $hit['label'] : null,
|
||||
'tag_type' => $tagType,
|
||||
];
|
||||
}
|
||||
|
||||
// 5) Canonical Label
|
||||
$label = trim((string)($best['label'] ?? ''));
|
||||
usort(
|
||||
$filtered,
|
||||
static fn (array $left, array $right): int => ($right['score'] <=> $left['score'])
|
||||
);
|
||||
|
||||
if ($label === '') {
|
||||
return null;
|
||||
}
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
return mb_strtolower($label);
|
||||
private function normalizeLabel(string $label): string
|
||||
{
|
||||
$label = mb_strtolower(trim($label));
|
||||
$label = preg_replace('/\s+/u', ' ', $label) ?? $label;
|
||||
|
||||
return trim($label);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user