fix history anchor

This commit is contained in:
team 1
2026-05-04 20:08:14 +02:00
parent c5aeac397c
commit b259b6cd2d
6 changed files with 660 additions and 5 deletions

View File

@@ -291,6 +291,26 @@ final readonly class AgentRunner
$optimizedShopQuery = '';
}
$ragAnchoredShopSearchQuery = $this->enrichShopSearchQueryWithRagAnchor(
prompt: $originalPrompt,
shopSearchQuery: $shopSearchQuery,
knowledgeChunks: $knowledgeChunks
);
if ($ragAnchoredShopSearchQuery !== $shopSearchQuery) {
$this->agentLogger->info('Enriched shop search query with RAG product anchor', [
'userId' => $userId,
'prompt' => $prompt,
'routingPrompt' => $routingPrompt,
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'ragAnchoredShopSearchQuery' => $ragAnchoredShopSearchQuery,
]);
$shopSearchQuery = $ragAnchoredShopSearchQuery;
$optimizedShopQuery = '';
}
if ($shopSearchQuery === '') {
$this->agentLogger->info('Commerce search skipped because no concrete shop query could be resolved', [
'userId' => $userId,
@@ -1042,7 +1062,7 @@ final readonly class AgentRunner
*/
private function tokenizeInputNormalizationGuardText(string $value): array
{
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', mb_strtolower($value, 'UTF-8'), $matches) !== 1) {
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', mb_strtolower($value, 'UTF-8'), $matches) === false) {
return [];
}
@@ -1057,7 +1077,7 @@ final readonly class AgentRunner
*/
private function extractInputNormalizationNumbers(string $value): array
{
if (preg_match_all('/\d+(?:[,.]\d+)?/u', $value, $matches) !== 1) {
if (preg_match_all('/\d+(?:[,.]\d+)?/u', $value, $matches) === false) {
return [];
}
@@ -1193,7 +1213,7 @@ final readonly class AgentRunner
return [];
}
if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) !== 1) {
if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) === false) {
return [];
}
@@ -1685,6 +1705,302 @@ final readonly class AgentRunner
return trim($shopSearchQuery . ' ' . implode(' ', array_values($appendTokens)));
}
/**
* @param string[] $knowledgeChunks
*/
private function enrichShopSearchQueryWithRagAnchor(
string $prompt,
string $shopSearchQuery,
array $knowledgeChunks
): string {
$shopSearchQuery = trim($shopSearchQuery);
if (
$shopSearchQuery === ''
|| $knowledgeChunks === []
|| !$this->agentRunnerConfig->isShopQueryRagAnchorEnrichmentEnabled()
) {
return $shopSearchQuery;
}
$focuses = $this->extractShopQueryNumericFocuses($prompt);
if ($focuses === []) {
return $shopSearchQuery;
}
$anchor = $this->resolveBestRagShopQueryAnchor($knowledgeChunks, $focuses);
if ($anchor === '') {
return $shopSearchQuery;
}
$queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($shopSearchQuery), true);
$anchorTokens = $this->tokenizeShopQueryCandidate($anchor);
if ($anchorTokens === []) {
return $shopSearchQuery;
}
$missingAnchorToken = false;
foreach ($anchorTokens as $anchorToken) {
if (!isset($queryTokens[$anchorToken])) {
$missingAnchorToken = true;
break;
}
}
if (!$missingAnchorToken) {
return $shopSearchQuery;
}
$subject = $this->extractRagAnchorSubjectTerms($prompt, $shopSearchQuery);
$rendered = strtr($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentTemplate(), [
'{anchor}' => $anchor,
'{query}' => $shopSearchQuery,
'{subject}' => $subject,
]);
$enrichedQuery = $this->limitShopQueryTerms(
$rendered,
$this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMaxQueryTerms()
);
return $enrichedQuery !== '' ? $enrichedQuery : $shopSearchQuery;
}
/**
* @return array<int, array{value:string, unit:string}>
*/
private function extractShopQueryNumericFocuses(string $prompt): array
{
$focuses = [];
$seen = [];
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentNumericFocusPatterns() as $pattern) {
if (@preg_match_all($pattern, $prompt, $matches, PREG_SET_ORDER) === false) {
continue;
}
foreach ($matches as $match) {
$rawValue = $match['value'] ?? ($match[1] ?? '');
$rawUnit = $match['unit'] ?? ($match[2] ?? '');
if (!is_scalar($rawValue) || !is_scalar($rawUnit)) {
continue;
}
$value = $this->normalizeShopQueryNumericFocusValue((string) $rawValue);
$unit = $this->normalizeShopQueryNumericFocusUnit((string) $rawUnit);
if ($value === '' || $unit === '') {
continue;
}
$key = $value . '|' . $unit;
if (isset($seen[$key])) {
continue;
}
$seen[$key] = true;
$focuses[] = [
'value' => $value,
'unit' => $unit,
];
}
}
return $focuses;
}
/**
* @param string[] $knowledgeChunks
* @param array<int, array{value:string, unit:string}> $focuses
*/
private function resolveBestRagShopQueryAnchor(array $knowledgeChunks, array $focuses): string
{
$bestAnchor = '';
$bestScore = 0;
$minScore = $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMinScore();
$earlyBonusMax = max(0, $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentEarlyChunkBonusMax());
foreach (array_values($knowledgeChunks) as $index => $chunk) {
$chunk = (string) $chunk;
$anchor = $this->extractRagProductTitleAnchor($chunk);
if ($anchor === '') {
continue;
}
$score = $this->scoreRagChunkForShopQueryNumericFocus($chunk, $focuses);
if ($score <= 0) {
continue;
}
if ($this->ragAnchorMatchesAnyBonusPattern($anchor)) {
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusScore();
}
if ($earlyBonusMax > 0) {
$score += max(0, $earlyBonusMax - min($earlyBonusMax, $index));
}
if ($score < $minScore || $score <= $bestScore) {
continue;
}
$bestScore = $score;
$bestAnchor = $anchor;
}
return $bestAnchor;
}
private function extractRagProductTitleAnchor(string $chunk): string
{
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentProductTitlePatterns() as $pattern) {
if (@preg_match($pattern, $chunk, $matches) !== 1) {
continue;
}
$title = $matches['title'] ?? ($matches[1] ?? '');
if (!is_scalar($title)) {
continue;
}
$title = trim(preg_replace('/\s+/u', ' ', str_replace('®', '', (string) $title)) ?? '');
if ($title !== '') {
return $title;
}
}
return '';
}
/**
* @param array<int, array{value:string, unit:string}> $focuses
*/
private function scoreRagChunkForShopQueryNumericFocus(string $chunk, array $focuses): int
{
$normalizedChunk = $this->normalizeShopQueryNumericFocusSearchText($chunk);
if ($normalizedChunk === '') {
return 0;
}
$score = 0;
foreach ($focuses as $focus) {
$hasValue = $focus['value'] !== '' && str_contains($normalizedChunk, $focus['value']);
$hasUnit = $focus['unit'] === '' || str_contains($normalizedChunk, $focus['unit']);
if ($hasValue && $hasUnit) {
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueUnitScore();
continue;
}
if ($hasValue) {
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueScore();
}
}
return $score;
}
private function ragAnchorMatchesAnyBonusPattern(string $anchor): bool
{
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusPatterns() as $pattern) {
if (@preg_match($pattern, $anchor) === 1) {
return true;
}
}
return false;
}
private function extractRagAnchorSubjectTerms(string $prompt, string $shopSearchQuery): string
{
$promptTokens = array_fill_keys($this->tokenizeShopQueryCandidate($prompt), true);
$queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($shopSearchQuery), true);
$subjectTerms = [];
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentSubjectTerms() as $term) {
$termTokens = $this->tokenizeShopQueryCandidate($term);
if ($termTokens === []) {
continue;
}
$allPresent = true;
foreach ($termTokens as $termToken) {
if (!isset($promptTokens[$termToken])) {
$allPresent = false;
break;
}
}
if (!$allPresent) {
continue;
}
$alreadyInQuery = true;
foreach ($termTokens as $termToken) {
if (!isset($queryTokens[$termToken])) {
$alreadyInQuery = false;
break;
}
}
if (!$alreadyInQuery) {
$subjectTerms[] = $term;
}
}
return implode(' ', array_values(array_unique($subjectTerms)));
}
private function limitShopQueryTerms(string $query, int $maxTerms): string
{
$maxTerms = max(1, $maxTerms);
$tokens = [];
foreach ($this->tokenizeShopQueryCandidate($query) as $token) {
if (isset($tokens[$token])) {
continue;
}
$tokens[$token] = $token;
if (count($tokens) >= $maxTerms) {
break;
}
}
return implode(' ', array_values($tokens));
}
private function normalizeShopQueryNumericFocusValue(string $value): string
{
$value = $this->normalizeShopQueryNumericFocusSearchText($value);
$value = preg_replace('/[^0-9,]+/u', '', $value) ?? $value;
return trim($value, ',');
}
private function normalizeShopQueryNumericFocusUnit(string $unit): string
{
$unit = $this->normalizeShopQueryNumericFocusSearchText($unit);
$unit = preg_replace('/[^\p{L}]+/u', '', $unit) ?? $unit;
return $unit;
}
private function normalizeShopQueryNumericFocusSearchText(string $value): string
{
$value = mb_strtolower(trim($value), 'UTF-8');
$value = $this->languageCleanupConfig->normalizeDashEquivalents($value);
$value = str_replace('.', ',', $value);
$value = preg_replace('/\s+/u', '', $value) ?? $value;
$value = preg_replace('/[^\p{L}\p{N},]+/u', '', $value) ?? $value;
return trim($value);
}
private function standaloneOptimizedShopQueryIntroducesUnsupportedContext(
string $prompt,
string $optimizedShopQuery
@@ -2021,7 +2337,7 @@ final readonly class AgentRunner
$value = mb_strtolower(trim($value), 'UTF-8');
$value = $this->languageCleanupConfig->replaceWordSeparatorsWithSpace($value);
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', $value, $matches) !== 1) {
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', $value, $matches) === false) {
return [];
}

View File

@@ -50,6 +50,7 @@ final readonly class PromptBuilder
$prompt = $this->normalizeBlockText($prompt);
$urlContent = $this->normalizeBlockText($urlContent);
$swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut);
$knowledgeChunks = $this->prioritizeKnowledgeChunksForNumericValueFocus($prompt, $knowledgeChunks);
$hasShopResults = $shopResults !== [];
$hasKnowledge = $knowledgeChunks !== [] || $urlContent !== '';
@@ -73,6 +74,7 @@ final readonly class PromptBuilder
shopResults: $shopResults,
requestedRole: $requestedProductRole
);
$numericValueFocusBlock = $this->buildNumericValueFocusBlock($prompt);
$outputPriorityBlock = $this->buildOutputPriorityBlock(
hasShopResults: $hasShopResults,
isTechnicalProductQuestion: $isTechnicalProductQuestion
@@ -101,6 +103,7 @@ final readonly class PromptBuilder
$systemBlock,
$shopBlock,
$measurementEvidenceBlock,
$numericValueFocusBlock,
$outputPriorityBlock,
$fallbackEscalationBlock,
$responseFormatBlock,
@@ -118,6 +121,7 @@ final readonly class PromptBuilder
$systemBlock,
$shopBlock,
$measurementEvidenceBlock,
$numericValueFocusBlock,
$outputPriorityBlock,
$fallbackEscalationBlock,
$responseFormatBlock,
@@ -250,6 +254,174 @@ final readonly class PromptBuilder
return $this->implodeBlocks($parts);
}
private function buildNumericValueFocusBlock(string $prompt): string
{
if (!$this->config->isNumericValueFocusEnabled()) {
return '';
}
$focuses = $this->extractNumericValueFocuses($prompt);
if ($focuses === []) {
return '';
}
$labels = array_map(static fn(array $focus): string => $focus['label'], $focuses);
$values = implode(', ', array_values(array_unique($labels)));
$rules = [];
foreach ($this->config->getNumericValueFocusRules() as $rule) {
$rules[] = $this->renderPromptTemplate($rule, ['values' => $values]);
}
return $this->buildRuleBlock(
$this->config->getNumericValueFocusSectionLabel(),
$rules
);
}
/**
* Keep exact numeric value matches close to the front of the prompt.
*
* @param string[] $knowledgeChunks
* @return string[]
*/
private function prioritizeKnowledgeChunksForNumericValueFocus(string $prompt, array $knowledgeChunks): array
{
if (!$this->config->isNumericValueFocusEnabled() || count($knowledgeChunks) < 2) {
return $knowledgeChunks;
}
$focuses = $this->extractNumericValueFocuses($prompt);
if ($focuses === []) {
return $knowledgeChunks;
}
$decorated = [];
foreach (array_values($knowledgeChunks) as $index => $chunk) {
$chunk = (string) $chunk;
$decorated[] = [
'index' => $index,
'score' => $this->scoreKnowledgeChunkForNumericValueFocus($chunk, $focuses),
'chunk' => $chunk,
];
}
usort($decorated, static function (array $a, array $b): int {
$scoreCompare = $b['score'] <=> $a['score'];
if ($scoreCompare !== 0) {
return $scoreCompare;
}
return $a['index'] <=> $b['index'];
});
return array_map(static fn(array $item): string => $item['chunk'], $decorated);
}
/**
* @param array<int, array{value: string, unit: string, label: string}> $focuses
*/
private function scoreKnowledgeChunkForNumericValueFocus(string $chunk, array $focuses): int
{
$normalizedChunk = $this->normalizeNumericFocusSearchText($chunk);
if ($normalizedChunk === '') {
return 0;
}
$score = 0;
foreach ($focuses as $focus) {
$hasValue = $focus['value'] !== '' && str_contains($normalizedChunk, $focus['value']);
$hasUnit = $focus['unit'] === '' || str_contains($normalizedChunk, $focus['unit']);
if ($hasValue && $hasUnit) {
$score += 100;
continue;
}
if ($hasValue) {
$score += 20;
}
}
return $score;
}
/**
* @return array<int, array{value: string, unit: string, label: string}>
*/
private function extractNumericValueFocuses(string $prompt): array
{
$focuses = [];
$seen = [];
$maxValues = max(1, $this->config->getNumericValueFocusMaxValues());
foreach ($this->config->getNumericValueFocusPatterns() as $pattern) {
if (@preg_match_all($pattern, $prompt, $matches, PREG_SET_ORDER) === false) {
continue;
}
foreach ($matches as $match) {
$rawValue = $match['value'] ?? ($match[1] ?? '');
$rawUnit = $match['unit'] ?? ($match[2] ?? '');
if (!is_scalar($rawValue) || !is_scalar($rawUnit)) {
continue;
}
$value = $this->normalizeNumericFocusValue((string) $rawValue);
$unit = $this->normalizeNumericFocusUnit((string) $rawUnit);
if ($value === '' || $unit === '') {
continue;
}
$key = $value . '|' . $unit;
if (isset($seen[$key])) {
continue;
}
$seen[$key] = true;
$focuses[] = [
'value' => $value,
'unit' => $unit,
'label' => $this->normalizeBlockText(trim((string) $rawValue . ' ' . (string) $rawUnit)),
];
if (count($focuses) >= $maxValues) {
break 2;
}
}
}
return $focuses;
}
private function normalizeNumericFocusValue(string $value): string
{
$value = $this->normalizeNumericFocusSearchText($value);
$value = preg_replace('/[^0-9,]+/u', '', $value) ?? $value;
return trim($value, ',');
}
private function normalizeNumericFocusUnit(string $unit): string
{
$unit = $this->normalizeNumericFocusSearchText($unit);
$unit = preg_replace('/[^\p{L}]+/u', '', $unit) ?? $unit;
return $unit;
}
private function normalizeNumericFocusSearchText(string $value): string
{
$value = mb_strtolower($this->normalizeBlockText($value), 'UTF-8');
$value = $this->languageCleanupConfig->normalizeDashEquivalents($value);
$value = str_replace('.', ',', $value);
$value = preg_replace('/\s+/u', '', $value) ?? $value;
$value = preg_replace('/[^\p{L}\p{N},]+/u', '', $value) ?? $value;
return trim($value);
}
/**
* Build a small priority block that tells the model what to surface first.
*/
@@ -810,7 +982,7 @@ final readonly class PromptBuilder
$withoutParentheses = preg_replace('/\([^)]*\)/u', ' ', $rawParameter) ?? $rawParameter;
$this->appendGenericMeasurementParameterParts($terms, $withoutParentheses);
if (preg_match_all('/\(([^)]{1,40})\)/u', $rawParameter, $matches) === 1) {
if (preg_match_all('/\(([^)]{1,40})\)/u', $rawParameter, $matches) !== false) {
foreach ($matches[1] as $parenthetical) {
$this->appendGenericMeasurementParameterParts($terms, (string) $parenthetical);
}

View File

@@ -1074,6 +1074,78 @@ final class AgentRunnerConfig
{
return $this->getRequiredString('shop_prompt.context_anchor_enrichment.template');
}
public function isShopQueryRagAnchorEnrichmentEnabled(): bool
{
return $this->getRequiredBool('shop_prompt.rag_anchor_enrichment.enabled');
}
public function getShopQueryRagAnchorEnrichmentMinScore(): int
{
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.min_score');
}
public function getShopQueryRagAnchorEnrichmentMaxQueryTerms(): int
{
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.max_query_terms');
}
public function getShopQueryRagAnchorEnrichmentEarlyChunkBonusMax(): int
{
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.early_chunk_bonus_max');
}
public function getShopQueryRagAnchorEnrichmentExactValueUnitScore(): int
{
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.scores.exact_value_with_unit');
}
public function getShopQueryRagAnchorEnrichmentExactValueScore(): int
{
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.scores.exact_value_only');
}
public function getShopQueryRagAnchorEnrichmentAnchorBonusScore(): int
{
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.scores.anchor_bonus');
}
public function getShopQueryRagAnchorEnrichmentTemplate(): string
{
return $this->getRequiredString('shop_prompt.rag_anchor_enrichment.template');
}
/**
* @return string[]
*/
public function getShopQueryRagAnchorEnrichmentNumericFocusPatterns(): array
{
return $this->getRequiredStringList('shop_prompt.rag_anchor_enrichment.numeric_focus_patterns');
}
/**
* @return string[]
*/
public function getShopQueryRagAnchorEnrichmentProductTitlePatterns(): array
{
return $this->getRequiredStringList('shop_prompt.rag_anchor_enrichment.product_title_patterns');
}
/**
* @return string[]
*/
public function getShopQueryRagAnchorEnrichmentAnchorBonusPatterns(): array
{
return $this->getRequiredStringList('shop_prompt.rag_anchor_enrichment.anchor_bonus_patterns');
}
/**
* @return string[]
*/
public function getShopQueryRagAnchorEnrichmentSubjectTerms(): array
{
return $this->getRequiredStringList('shop_prompt.rag_anchor_enrichment.subject_terms');
}
public function getShopQueryTranslationReplacements(string $language): array
{
$value = $this->getOptionalStringMap('shop_prompt.language_preservation.translation_replacements.' . $language);

View File

@@ -98,6 +98,17 @@ final class PromptBuilderConfig
return (float) $value;
}
private function getRequiredBool(string $path): bool
{
$value = $this->getRequiredValue($path);
if (!is_bool($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX prompt config value "%s" must be boolean.', $path));
}
return $value;
}
private function getRequiredString(string $path): string
{
@@ -578,6 +589,37 @@ final class PromptBuilderConfig
return $this->getRequiredString('sections.measurement_evidence_label');
}
public function isNumericValueFocusEnabled(): bool
{
return $this->getRequiredBool('numeric_value_focus.enabled');
}
public function getNumericValueFocusSectionLabel(): string
{
return $this->getRequiredString('sections.numeric_value_focus_label');
}
public function getNumericValueFocusMaxValues(): int
{
return $this->getRequiredInt('numeric_value_focus.max_values');
}
/**
* @return string[]
*/
public function getNumericValueFocusPatterns(): array
{
return $this->getRequiredStringList('numeric_value_focus.value_patterns');
}
/**
* @return string[]
*/
public function getNumericValueFocusRules(): array
{
return $this->getRequiredStringList('numeric_value_focus.rules');
}
/**
* @return string[]
*/