Files
MtoRagSystem/src/Intent/CatalogIntentLite.php
2026-04-29 20:55:21 +02:00

132 lines
3.3 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Intent;
use App\Config\CatalogIntentConfig;
use App\Knowledge\Retrieval\QueryCleaner;
use App\Tag\TagTypes;
use App\Tag\TagVectorSearchClient;
/**
* Lightweight catalog entity detector.
*
* Responsibilities:
* - clean the user query for tag lookup
* - query the tag vector index
* - keep only catalog_entity hits
* - apply confidence and ambiguity gates
* - return one canonical entity label or null
*/
final readonly class CatalogIntentLite
{
public function __construct(
private TagVectorSearchClient $tagVectorClient,
private QueryCleaner $queryCleaner,
private CatalogIntentConfig $config,
) {
}
/**
* Returns the canonical normalized label of the detected catalog entity,
* or null when no safe entity match exists.
*/
public function detect(string $prompt): ?string
{
$prompt = trim($prompt);
if ($prompt === '') {
return null;
}
$cleanQuery = trim($this->queryCleaner->clean($prompt));
if ($cleanQuery === '') {
return null;
}
$catalogHits = $this->filterCatalogEntityHits(
$this->tagVectorClient->search($cleanQuery, $this->config->getIntentSearchLimit())
);
if ($catalogHits === []) {
return null;
}
$best = $catalogHits[0];
$bestScore = (float) ($best['score'] ?? 0.0);
if (!$this->config->isScoreAccepted($bestScore)) {
return null;
}
if (isset($catalogHits[1])) {
$secondScore = (float) ($catalogHits[1]['score'] ?? 0.0);
if ($this->config->isAmbiguous($bestScore, $secondScore)) {
return null;
}
}
$label = $this->normalizeLabel((string) ($best['label'] ?? ''));
return $label !== '' ? $label : null;
}
/**
* @param array<int, array{
* tag_id:string,
* score:float,
* label?:string,
* tag_type?:string
* }> $hits
*
* @return list<array{
* tag_id:string,
* score:float,
* label?:string,
* tag_type:string
* }>
*/
private function filterCatalogEntityHits(array $hits): array
{
$filtered = [];
foreach ($hits as $hit) {
$tagId = trim((string) ($hit['tag_id'] ?? ''));
$score = (float) ($hit['score'] ?? 0.0);
$tagType = TagTypes::normalize((string) ($hit['tag_type'] ?? TagTypes::GENERIC));
if ($tagId === '') {
continue;
}
if ($tagType !== TagTypes::CATALOG_ENTITY) {
continue;
}
$filtered[] = [
'tag_id' => $tagId,
'score' => $score,
'label' => isset($hit['label']) ? (string) $hit['label'] : null,
'tag_type' => $tagType,
];
}
usort(
$filtered,
static fn (array $left, array $right): int => ($right['score'] <=> $left['score'])
);
return $filtered;
}
private function normalizeLabel(string $label): string
{
$label = mb_strtolower(trim($label));
$label = preg_replace('/\s+/u', ' ', $label) ?? $label;
return trim($label);
}
}