fix history anchor

This commit is contained in:
team 1
2026-05-04 20:08:14 +02:00
parent c5aeac397c
commit b259b6cd2d
6 changed files with 660 additions and 5 deletions

View File

@@ -291,6 +291,26 @@ final readonly class AgentRunner
$optimizedShopQuery = '';
}
$ragAnchoredShopSearchQuery = $this->enrichShopSearchQueryWithRagAnchor(
prompt: $originalPrompt,
shopSearchQuery: $shopSearchQuery,
knowledgeChunks: $knowledgeChunks
);
if ($ragAnchoredShopSearchQuery !== $shopSearchQuery) {
$this->agentLogger->info('Enriched shop search query with RAG product anchor', [
'userId' => $userId,
'prompt' => $prompt,
'routingPrompt' => $routingPrompt,
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'ragAnchoredShopSearchQuery' => $ragAnchoredShopSearchQuery,
]);
$shopSearchQuery = $ragAnchoredShopSearchQuery;
$optimizedShopQuery = '';
}
if ($shopSearchQuery === '') {
$this->agentLogger->info('Commerce search skipped because no concrete shop query could be resolved', [
'userId' => $userId,
@@ -1042,7 +1062,7 @@ final readonly class AgentRunner
*/
private function tokenizeInputNormalizationGuardText(string $value): array
{
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', mb_strtolower($value, 'UTF-8'), $matches) !== 1) {
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', mb_strtolower($value, 'UTF-8'), $matches) === false) {
return [];
}
@@ -1057,7 +1077,7 @@ final readonly class AgentRunner
*/
private function extractInputNormalizationNumbers(string $value): array
{
if (preg_match_all('/\d+(?:[,.]\d+)?/u', $value, $matches) !== 1) {
if (preg_match_all('/\d+(?:[,.]\d+)?/u', $value, $matches) === false) {
return [];
}
@@ -1193,7 +1213,7 @@ final readonly class AgentRunner
return [];
}
if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) !== 1) {
if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) === false) {
return [];
}
@@ -1685,6 +1705,302 @@ final readonly class AgentRunner
return trim($shopSearchQuery . ' ' . implode(' ', array_values($appendTokens)));
}
/**
* @param string[] $knowledgeChunks
*/
private function enrichShopSearchQueryWithRagAnchor(
string $prompt,
string $shopSearchQuery,
array $knowledgeChunks
): string {
$shopSearchQuery = trim($shopSearchQuery);
if (
$shopSearchQuery === ''
|| $knowledgeChunks === []
|| !$this->agentRunnerConfig->isShopQueryRagAnchorEnrichmentEnabled()
) {
return $shopSearchQuery;
}
$focuses = $this->extractShopQueryNumericFocuses($prompt);
if ($focuses === []) {
return $shopSearchQuery;
}
$anchor = $this->resolveBestRagShopQueryAnchor($knowledgeChunks, $focuses);
if ($anchor === '') {
return $shopSearchQuery;
}
$queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($shopSearchQuery), true);
$anchorTokens = $this->tokenizeShopQueryCandidate($anchor);
if ($anchorTokens === []) {
return $shopSearchQuery;
}
$missingAnchorToken = false;
foreach ($anchorTokens as $anchorToken) {
if (!isset($queryTokens[$anchorToken])) {
$missingAnchorToken = true;
break;
}
}
if (!$missingAnchorToken) {
return $shopSearchQuery;
}
$subject = $this->extractRagAnchorSubjectTerms($prompt, $shopSearchQuery);
$rendered = strtr($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentTemplate(), [
'{anchor}' => $anchor,
'{query}' => $shopSearchQuery,
'{subject}' => $subject,
]);
$enrichedQuery = $this->limitShopQueryTerms(
$rendered,
$this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMaxQueryTerms()
);
return $enrichedQuery !== '' ? $enrichedQuery : $shopSearchQuery;
}
/**
* @return array<int, array{value:string, unit:string}>
*/
private function extractShopQueryNumericFocuses(string $prompt): array
{
$focuses = [];
$seen = [];
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentNumericFocusPatterns() as $pattern) {
if (@preg_match_all($pattern, $prompt, $matches, PREG_SET_ORDER) === false) {
continue;
}
foreach ($matches as $match) {
$rawValue = $match['value'] ?? ($match[1] ?? '');
$rawUnit = $match['unit'] ?? ($match[2] ?? '');
if (!is_scalar($rawValue) || !is_scalar($rawUnit)) {
continue;
}
$value = $this->normalizeShopQueryNumericFocusValue((string) $rawValue);
$unit = $this->normalizeShopQueryNumericFocusUnit((string) $rawUnit);
if ($value === '' || $unit === '') {
continue;
}
$key = $value . '|' . $unit;
if (isset($seen[$key])) {
continue;
}
$seen[$key] = true;
$focuses[] = [
'value' => $value,
'unit' => $unit,
];
}
}
return $focuses;
}
/**
* @param string[] $knowledgeChunks
* @param array<int, array{value:string, unit:string}> $focuses
*/
private function resolveBestRagShopQueryAnchor(array $knowledgeChunks, array $focuses): string
{
$bestAnchor = '';
$bestScore = 0;
$minScore = $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMinScore();
$earlyBonusMax = max(0, $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentEarlyChunkBonusMax());
foreach (array_values($knowledgeChunks) as $index => $chunk) {
$chunk = (string) $chunk;
$anchor = $this->extractRagProductTitleAnchor($chunk);
if ($anchor === '') {
continue;
}
$score = $this->scoreRagChunkForShopQueryNumericFocus($chunk, $focuses);
if ($score <= 0) {
continue;
}
if ($this->ragAnchorMatchesAnyBonusPattern($anchor)) {
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusScore();
}
if ($earlyBonusMax > 0) {
$score += max(0, $earlyBonusMax - min($earlyBonusMax, $index));
}
if ($score < $minScore || $score <= $bestScore) {
continue;
}
$bestScore = $score;
$bestAnchor = $anchor;
}
return $bestAnchor;
}
private function extractRagProductTitleAnchor(string $chunk): string
{
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentProductTitlePatterns() as $pattern) {
if (@preg_match($pattern, $chunk, $matches) !== 1) {
continue;
}
$title = $matches['title'] ?? ($matches[1] ?? '');
if (!is_scalar($title)) {
continue;
}
$title = trim(preg_replace('/\s+/u', ' ', str_replace('®', '', (string) $title)) ?? '');
if ($title !== '') {
return $title;
}
}
return '';
}
/**
* @param array<int, array{value:string, unit:string}> $focuses
*/
private function scoreRagChunkForShopQueryNumericFocus(string $chunk, array $focuses): int
{
$normalizedChunk = $this->normalizeShopQueryNumericFocusSearchText($chunk);
if ($normalizedChunk === '') {
return 0;
}
$score = 0;
foreach ($focuses as $focus) {
$hasValue = $focus['value'] !== '' && str_contains($normalizedChunk, $focus['value']);
$hasUnit = $focus['unit'] === '' || str_contains($normalizedChunk, $focus['unit']);
if ($hasValue && $hasUnit) {
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueUnitScore();
continue;
}
if ($hasValue) {
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueScore();
}
}
return $score;
}
private function ragAnchorMatchesAnyBonusPattern(string $anchor): bool
{
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusPatterns() as $pattern) {
if (@preg_match($pattern, $anchor) === 1) {
return true;
}
}
return false;
}
private function extractRagAnchorSubjectTerms(string $prompt, string $shopSearchQuery): string
{
$promptTokens = array_fill_keys($this->tokenizeShopQueryCandidate($prompt), true);
$queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($shopSearchQuery), true);
$subjectTerms = [];
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentSubjectTerms() as $term) {
$termTokens = $this->tokenizeShopQueryCandidate($term);
if ($termTokens === []) {
continue;
}
$allPresent = true;
foreach ($termTokens as $termToken) {
if (!isset($promptTokens[$termToken])) {
$allPresent = false;
break;
}
}
if (!$allPresent) {
continue;
}
$alreadyInQuery = true;
foreach ($termTokens as $termToken) {
if (!isset($queryTokens[$termToken])) {
$alreadyInQuery = false;
break;
}
}
if (!$alreadyInQuery) {
$subjectTerms[] = $term;
}
}
return implode(' ', array_values(array_unique($subjectTerms)));
}
private function limitShopQueryTerms(string $query, int $maxTerms): string
{
$maxTerms = max(1, $maxTerms);
$tokens = [];
foreach ($this->tokenizeShopQueryCandidate($query) as $token) {
if (isset($tokens[$token])) {
continue;
}
$tokens[$token] = $token;
if (count($tokens) >= $maxTerms) {
break;
}
}
return implode(' ', array_values($tokens));
}
private function normalizeShopQueryNumericFocusValue(string $value): string
{
$value = $this->normalizeShopQueryNumericFocusSearchText($value);
$value = preg_replace('/[^0-9,]+/u', '', $value) ?? $value;
return trim($value, ',');
}
private function normalizeShopQueryNumericFocusUnit(string $unit): string
{
$unit = $this->normalizeShopQueryNumericFocusSearchText($unit);
$unit = preg_replace('/[^\p{L}]+/u', '', $unit) ?? $unit;
return $unit;
}
private function normalizeShopQueryNumericFocusSearchText(string $value): string
{
$value = mb_strtolower(trim($value), 'UTF-8');
$value = $this->languageCleanupConfig->normalizeDashEquivalents($value);
$value = str_replace('.', ',', $value);
$value = preg_replace('/\s+/u', '', $value) ?? $value;
$value = preg_replace('/[^\p{L}\p{N},]+/u', '', $value) ?? $value;
return trim($value);
}
private function standaloneOptimizedShopQueryIntroducesUnsupportedContext(
string $prompt,
string $optimizedShopQuery
@@ -2021,7 +2337,7 @@ final readonly class AgentRunner
$value = mb_strtolower(trim($value), 'UTF-8');
$value = $this->languageCleanupConfig->replaceWordSeparatorsWithSpace($value);
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', $value, $matches) !== 1) {
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', $value, $matches) === false) {
return [];
}