first update to external config values

This commit is contained in:
team 1
2026-04-24 13:13:56 +02:00
parent 868f9a8857
commit 26ec0afc5c
11 changed files with 292 additions and 187 deletions

View File

@@ -15,41 +15,16 @@ final class TagRoutingService
/**
* Number of raw tag hits requested from the vector service.
*/
private const DEFAULT_TOPK = 8;
/**
* Hard minimum confidence required to activate tag-based document routing.
*
* This intentionally aligns with the tag vector client gate to avoid
* misleading secondary thresholds in this class.
*/
private const MIN_BEST_SCORE = 0.72;
/**
* Only keep tag hits that stay reasonably close to the best hit.
* This reduces semantic spillover into weakly related document spaces.
*/
private const MAX_SCORE_DROP_FROM_BEST = 0.08;
/**
* Maximum number of tag hits that may influence routing.
*/
private const MAX_ROUTING_TAGS = 5;
/**
* Maximum number of candidate documents passed into scoped chunk search.
*/
private const MAX_CANDIDATE_DOCS = 80;
/**
* Small bonus for documents matched by multiple routed tags.
*/
private const MULTI_TAG_BONUS_PER_EXTRA_TAG = 0.05;
private const MAX_MULTI_TAG_BONUS = 0.15;
public function __construct(
private readonly TagVectorSearchClient $tagSearch,
private readonly EntityManagerInterface $em,
private readonly int $defaultTopK = 8,
private readonly float $minBestScore = 0.72,
private readonly float $maxScoreDropFromBest = 0.08,
private readonly int $maxRoutingTags = 5,
private readonly int $maxCandidateDocs = 80,
private readonly float $multiTagBonusPerExtraTag = 0.05,
private readonly float $maxMultiTagBonus = 0.15,
) {
}
@@ -71,7 +46,7 @@ final class TagRoutingService
}
$hits = $this->filterRoutingHits(
$this->tagSearch->search($query, self::DEFAULT_TOPK)
$this->tagSearch->search($query, $this->defaultTopK)
);
if ($hits === []) {
@@ -159,8 +134,8 @@ final class TagRoutingService
if ($matchedTagCount > 1) {
$documentScores[$documentId] += min(
self::MAX_MULTI_TAG_BONUS,
($matchedTagCount - 1) * self::MULTI_TAG_BONUS_PER_EXTRA_TAG
$this->maxMultiTagBonus,
($matchedTagCount - 1) * $this->multiTagBonusPerExtraTag
);
}
}
@@ -170,7 +145,7 @@ final class TagRoutingService
return array_slice(
array_keys($documentScores),
0,
self::MAX_CANDIDATE_DOCS
$this->maxCandidateDocs
);
}
@@ -196,13 +171,13 @@ final class TagRoutingService
$bestScore = (float) ($hits[0]['score'] ?? 0.0);
if ($bestScore < self::MIN_BEST_SCORE) {
if ($bestScore < $this->minBestScore) {
return [];
}
$minimumAcceptedScore = max(
self::MIN_BEST_SCORE,
$bestScore - self::MAX_SCORE_DROP_FROM_BEST
$this->minBestScore,
$bestScore - $this->maxScoreDropFromBest
);
$filtered = [];
@@ -230,7 +205,7 @@ final class TagRoutingService
'tag_type' => $tagType,
];
if (count($filtered) >= self::MAX_ROUTING_TAGS) {
if (count($filtered) >= $this->maxRoutingTags) {
break;
}
}

View File

@@ -9,46 +9,20 @@ use Symfony\Contracts\HttpClient\HttpClientInterface;
final readonly class TagVectorSearchClient
{
/**
* Minimum similarity score required for a tag to be considered.
*/
public const MIN_SCORE = 0.72;
/**
* Default result size when callers do not specify a limit.
*/
private const DEFAULT_LIMIT = 8;
/**
* Hard limit to prevent excessive requests.
*/
private const MAX_LIMIT = 50;
/**
* HTTP timeout for the Python vector service.
*/
private const TIMEOUT_SECONDS = 10;
public function __construct(
private HttpClientInterface $http,
private string $serviceUrl,
private LoggerInterface $agentLogger,
private float $minScore = 0.72,
private int $defaultLimit = 8,
private int $maxLimit = 50,
private int $timeoutSeconds = 10,
) {
}
/**
* Executes a vector search against the Python tag index.
*
* Expected response rows:
* [
* {
* "tag_id": "...",
* "score": 0.73,
* "label": "Geräte",
* "tag_type": "catalog_entity"
* }
* ]
*
* @return list<array{
* tag_id:string,
* score:float,
@@ -56,7 +30,7 @@ final readonly class TagVectorSearchClient
* tag_type:string
* }>
*/
public function search(string $query, int $limit = self::DEFAULT_LIMIT): array
public function search(string $query, ?int $limit = null): array
{
$query = trim($query);
@@ -64,7 +38,7 @@ final readonly class TagVectorSearchClient
return [];
}
$limit = max(1, min($limit, self::MAX_LIMIT));
$limit = $this->clampLimit($limit ?? $this->defaultLimit);
$serviceUrl = rtrim(trim($this->serviceUrl), '/');
if ($serviceUrl === '') {
@@ -82,7 +56,7 @@ final readonly class TagVectorSearchClient
'query' => $query,
'limit' => $limit,
],
'timeout' => self::TIMEOUT_SECONDS,
'timeout' => $this->timeoutSeconds,
]
);
@@ -141,7 +115,7 @@ final readonly class TagVectorSearchClient
$score = (float) $score;
if ($score < self::MIN_SCORE) {
if ($score < $this->minScore) {
continue;
}
@@ -186,4 +160,9 @@ final readonly class TagVectorSearchClient
return array_slice($hits, 0, $limit);
}
}
private function clampLimit(int $limit): int
{
return max(1, min($limit, $this->maxLimit));
}
}