132 lines
3.3 KiB
PHP
132 lines
3.3 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Intent;
|
|
|
|
use App\Config\CatalogIntentConfig;
|
|
use App\Knowledge\Retrieval\QueryCleaner;
|
|
use App\Tag\TagTypes;
|
|
use App\Tag\TagVectorSearchClient;
|
|
|
|
/**
|
|
* Lightweight catalog entity detector.
|
|
*
|
|
* Responsibilities:
|
|
* - clean the user query for tag lookup
|
|
* - query the tag vector index
|
|
* - keep only catalog_entity hits
|
|
* - apply confidence and ambiguity gates
|
|
* - return one canonical entity label or null
|
|
*/
|
|
final readonly class CatalogIntentLite
|
|
{
|
|
public function __construct(
|
|
private TagVectorSearchClient $tagVectorClient,
|
|
private QueryCleaner $queryCleaner,
|
|
private CatalogIntentConfig $config,
|
|
) {
|
|
}
|
|
|
|
/**
|
|
* Returns the canonical normalized label of the detected catalog entity,
|
|
* or null when no safe entity match exists.
|
|
*/
|
|
public function detect(string $prompt): ?string
|
|
{
|
|
$prompt = trim($prompt);
|
|
|
|
if ($prompt === '') {
|
|
return null;
|
|
}
|
|
|
|
$cleanQuery = trim($this->queryCleaner->clean($prompt));
|
|
|
|
if ($cleanQuery === '') {
|
|
return null;
|
|
}
|
|
|
|
$catalogHits = $this->filterCatalogEntityHits(
|
|
$this->tagVectorClient->search($cleanQuery, $this->config->getIntentSearchLimit())
|
|
);
|
|
|
|
if ($catalogHits === []) {
|
|
return null;
|
|
}
|
|
|
|
$best = $catalogHits[0];
|
|
$bestScore = (float) ($best['score'] ?? 0.0);
|
|
|
|
if (!$this->config->isScoreAccepted($bestScore)) {
|
|
return null;
|
|
}
|
|
|
|
if (isset($catalogHits[1])) {
|
|
$secondScore = (float) ($catalogHits[1]['score'] ?? 0.0);
|
|
|
|
if ($this->config->isAmbiguous($bestScore, $secondScore)) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
$label = $this->normalizeLabel((string) ($best['label'] ?? ''));
|
|
|
|
return $label !== '' ? $label : null;
|
|
}
|
|
|
|
/**
|
|
* @param array<int, array{
|
|
* tag_id:string,
|
|
* score:float,
|
|
* label?:string,
|
|
* tag_type?:string
|
|
* }> $hits
|
|
*
|
|
* @return list<array{
|
|
* tag_id:string,
|
|
* score:float,
|
|
* label?:string,
|
|
* tag_type:string
|
|
* }>
|
|
*/
|
|
private function filterCatalogEntityHits(array $hits): array
|
|
{
|
|
$filtered = [];
|
|
|
|
foreach ($hits as $hit) {
|
|
$tagId = trim((string) ($hit['tag_id'] ?? ''));
|
|
$score = (float) ($hit['score'] ?? 0.0);
|
|
$tagType = TagTypes::normalize((string) ($hit['tag_type'] ?? TagTypes::GENERIC));
|
|
|
|
if ($tagId === '') {
|
|
continue;
|
|
}
|
|
|
|
if ($tagType !== TagTypes::CATALOG_ENTITY) {
|
|
continue;
|
|
}
|
|
|
|
$filtered[] = [
|
|
'tag_id' => $tagId,
|
|
'score' => $score,
|
|
'label' => isset($hit['label']) ? (string) $hit['label'] : null,
|
|
'tag_type' => $tagType,
|
|
];
|
|
}
|
|
|
|
usort(
|
|
$filtered,
|
|
static fn (array $left, array $right): int => ($right['score'] <=> $left['score'])
|
|
);
|
|
|
|
return $filtered;
|
|
}
|
|
|
|
private function normalizeLabel(string $label): string
|
|
{
|
|
$label = mb_strtolower(trim($label));
|
|
$label = preg_replace('/\s+/u', ' ', $label) ?? $label;
|
|
|
|
return trim($label);
|
|
}
|
|
} |