fix history anchor
This commit is contained in:
@@ -291,6 +291,26 @@ final readonly class AgentRunner
|
||||
$optimizedShopQuery = '';
|
||||
}
|
||||
|
||||
$ragAnchoredShopSearchQuery = $this->enrichShopSearchQueryWithRagAnchor(
|
||||
prompt: $originalPrompt,
|
||||
shopSearchQuery: $shopSearchQuery,
|
||||
knowledgeChunks: $knowledgeChunks
|
||||
);
|
||||
|
||||
if ($ragAnchoredShopSearchQuery !== $shopSearchQuery) {
|
||||
$this->agentLogger->info('Enriched shop search query with RAG product anchor', [
|
||||
'userId' => $userId,
|
||||
'prompt' => $prompt,
|
||||
'routingPrompt' => $routingPrompt,
|
||||
'optimizedShopQuery' => $optimizedShopQuery,
|
||||
'shopSearchQuery' => $shopSearchQuery,
|
||||
'ragAnchoredShopSearchQuery' => $ragAnchoredShopSearchQuery,
|
||||
]);
|
||||
|
||||
$shopSearchQuery = $ragAnchoredShopSearchQuery;
|
||||
$optimizedShopQuery = '';
|
||||
}
|
||||
|
||||
if ($shopSearchQuery === '') {
|
||||
$this->agentLogger->info('Commerce search skipped because no concrete shop query could be resolved', [
|
||||
'userId' => $userId,
|
||||
@@ -1042,7 +1062,7 @@ final readonly class AgentRunner
|
||||
*/
|
||||
private function tokenizeInputNormalizationGuardText(string $value): array
|
||||
{
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', mb_strtolower($value, 'UTF-8'), $matches) !== 1) {
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', mb_strtolower($value, 'UTF-8'), $matches) === false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -1057,7 +1077,7 @@ final readonly class AgentRunner
|
||||
*/
|
||||
private function extractInputNormalizationNumbers(string $value): array
|
||||
{
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?/u', $value, $matches) !== 1) {
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?/u', $value, $matches) === false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -1193,7 +1213,7 @@ final readonly class AgentRunner
|
||||
return [];
|
||||
}
|
||||
|
||||
if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) !== 1) {
|
||||
if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) === false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -1685,6 +1705,302 @@ final readonly class AgentRunner
|
||||
return trim($shopSearchQuery . ' ' . implode(' ', array_values($appendTokens)));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $knowledgeChunks
|
||||
*/
|
||||
private function enrichShopSearchQueryWithRagAnchor(
|
||||
string $prompt,
|
||||
string $shopSearchQuery,
|
||||
array $knowledgeChunks
|
||||
): string {
|
||||
$shopSearchQuery = trim($shopSearchQuery);
|
||||
|
||||
if (
|
||||
$shopSearchQuery === ''
|
||||
|| $knowledgeChunks === []
|
||||
|| !$this->agentRunnerConfig->isShopQueryRagAnchorEnrichmentEnabled()
|
||||
) {
|
||||
return $shopSearchQuery;
|
||||
}
|
||||
|
||||
$focuses = $this->extractShopQueryNumericFocuses($prompt);
|
||||
if ($focuses === []) {
|
||||
return $shopSearchQuery;
|
||||
}
|
||||
|
||||
$anchor = $this->resolveBestRagShopQueryAnchor($knowledgeChunks, $focuses);
|
||||
if ($anchor === '') {
|
||||
return $shopSearchQuery;
|
||||
}
|
||||
|
||||
$queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($shopSearchQuery), true);
|
||||
$anchorTokens = $this->tokenizeShopQueryCandidate($anchor);
|
||||
|
||||
if ($anchorTokens === []) {
|
||||
return $shopSearchQuery;
|
||||
}
|
||||
|
||||
$missingAnchorToken = false;
|
||||
foreach ($anchorTokens as $anchorToken) {
|
||||
if (!isset($queryTokens[$anchorToken])) {
|
||||
$missingAnchorToken = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$missingAnchorToken) {
|
||||
return $shopSearchQuery;
|
||||
}
|
||||
|
||||
$subject = $this->extractRagAnchorSubjectTerms($prompt, $shopSearchQuery);
|
||||
$rendered = strtr($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentTemplate(), [
|
||||
'{anchor}' => $anchor,
|
||||
'{query}' => $shopSearchQuery,
|
||||
'{subject}' => $subject,
|
||||
]);
|
||||
|
||||
$enrichedQuery = $this->limitShopQueryTerms(
|
||||
$rendered,
|
||||
$this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMaxQueryTerms()
|
||||
);
|
||||
|
||||
return $enrichedQuery !== '' ? $enrichedQuery : $shopSearchQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, array{value:string, unit:string}>
|
||||
*/
|
||||
private function extractShopQueryNumericFocuses(string $prompt): array
|
||||
{
|
||||
$focuses = [];
|
||||
$seen = [];
|
||||
|
||||
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentNumericFocusPatterns() as $pattern) {
|
||||
if (@preg_match_all($pattern, $prompt, $matches, PREG_SET_ORDER) === false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($matches as $match) {
|
||||
$rawValue = $match['value'] ?? ($match[1] ?? '');
|
||||
$rawUnit = $match['unit'] ?? ($match[2] ?? '');
|
||||
|
||||
if (!is_scalar($rawValue) || !is_scalar($rawUnit)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$value = $this->normalizeShopQueryNumericFocusValue((string) $rawValue);
|
||||
$unit = $this->normalizeShopQueryNumericFocusUnit((string) $rawUnit);
|
||||
|
||||
if ($value === '' || $unit === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$key = $value . '|' . $unit;
|
||||
if (isset($seen[$key])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$seen[$key] = true;
|
||||
$focuses[] = [
|
||||
'value' => $value,
|
||||
'unit' => $unit,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $focuses;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $knowledgeChunks
|
||||
* @param array<int, array{value:string, unit:string}> $focuses
|
||||
*/
|
||||
private function resolveBestRagShopQueryAnchor(array $knowledgeChunks, array $focuses): string
|
||||
{
|
||||
$bestAnchor = '';
|
||||
$bestScore = 0;
|
||||
$minScore = $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMinScore();
|
||||
$earlyBonusMax = max(0, $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentEarlyChunkBonusMax());
|
||||
|
||||
foreach (array_values($knowledgeChunks) as $index => $chunk) {
|
||||
$chunk = (string) $chunk;
|
||||
$anchor = $this->extractRagProductTitleAnchor($chunk);
|
||||
|
||||
if ($anchor === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$score = $this->scoreRagChunkForShopQueryNumericFocus($chunk, $focuses);
|
||||
if ($score <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($this->ragAnchorMatchesAnyBonusPattern($anchor)) {
|
||||
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusScore();
|
||||
}
|
||||
|
||||
if ($earlyBonusMax > 0) {
|
||||
$score += max(0, $earlyBonusMax - min($earlyBonusMax, $index));
|
||||
}
|
||||
|
||||
if ($score < $minScore || $score <= $bestScore) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$bestScore = $score;
|
||||
$bestAnchor = $anchor;
|
||||
}
|
||||
|
||||
return $bestAnchor;
|
||||
}
|
||||
|
||||
private function extractRagProductTitleAnchor(string $chunk): string
|
||||
{
|
||||
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentProductTitlePatterns() as $pattern) {
|
||||
if (@preg_match($pattern, $chunk, $matches) !== 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$title = $matches['title'] ?? ($matches[1] ?? '');
|
||||
if (!is_scalar($title)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$title = trim(preg_replace('/\s+/u', ' ', str_replace('®', '', (string) $title)) ?? '');
|
||||
if ($title !== '') {
|
||||
return $title;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array{value:string, unit:string}> $focuses
|
||||
*/
|
||||
private function scoreRagChunkForShopQueryNumericFocus(string $chunk, array $focuses): int
|
||||
{
|
||||
$normalizedChunk = $this->normalizeShopQueryNumericFocusSearchText($chunk);
|
||||
if ($normalizedChunk === '') {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$score = 0;
|
||||
foreach ($focuses as $focus) {
|
||||
$hasValue = $focus['value'] !== '' && str_contains($normalizedChunk, $focus['value']);
|
||||
$hasUnit = $focus['unit'] === '' || str_contains($normalizedChunk, $focus['unit']);
|
||||
|
||||
if ($hasValue && $hasUnit) {
|
||||
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueUnitScore();
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($hasValue) {
|
||||
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueScore();
|
||||
}
|
||||
}
|
||||
|
||||
return $score;
|
||||
}
|
||||
|
||||
private function ragAnchorMatchesAnyBonusPattern(string $anchor): bool
|
||||
{
|
||||
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusPatterns() as $pattern) {
|
||||
if (@preg_match($pattern, $anchor) === 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function extractRagAnchorSubjectTerms(string $prompt, string $shopSearchQuery): string
|
||||
{
|
||||
$promptTokens = array_fill_keys($this->tokenizeShopQueryCandidate($prompt), true);
|
||||
$queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($shopSearchQuery), true);
|
||||
$subjectTerms = [];
|
||||
|
||||
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentSubjectTerms() as $term) {
|
||||
$termTokens = $this->tokenizeShopQueryCandidate($term);
|
||||
if ($termTokens === []) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$allPresent = true;
|
||||
foreach ($termTokens as $termToken) {
|
||||
if (!isset($promptTokens[$termToken])) {
|
||||
$allPresent = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$allPresent) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$alreadyInQuery = true;
|
||||
foreach ($termTokens as $termToken) {
|
||||
if (!isset($queryTokens[$termToken])) {
|
||||
$alreadyInQuery = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$alreadyInQuery) {
|
||||
$subjectTerms[] = $term;
|
||||
}
|
||||
}
|
||||
|
||||
return implode(' ', array_values(array_unique($subjectTerms)));
|
||||
}
|
||||
|
||||
private function limitShopQueryTerms(string $query, int $maxTerms): string
|
||||
{
|
||||
$maxTerms = max(1, $maxTerms);
|
||||
$tokens = [];
|
||||
|
||||
foreach ($this->tokenizeShopQueryCandidate($query) as $token) {
|
||||
if (isset($tokens[$token])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tokens[$token] = $token;
|
||||
|
||||
if (count($tokens) >= $maxTerms) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return implode(' ', array_values($tokens));
|
||||
}
|
||||
|
||||
private function normalizeShopQueryNumericFocusValue(string $value): string
|
||||
{
|
||||
$value = $this->normalizeShopQueryNumericFocusSearchText($value);
|
||||
$value = preg_replace('/[^0-9,]+/u', '', $value) ?? $value;
|
||||
|
||||
return trim($value, ',');
|
||||
}
|
||||
|
||||
private function normalizeShopQueryNumericFocusUnit(string $unit): string
|
||||
{
|
||||
$unit = $this->normalizeShopQueryNumericFocusSearchText($unit);
|
||||
$unit = preg_replace('/[^\p{L}]+/u', '', $unit) ?? $unit;
|
||||
|
||||
return $unit;
|
||||
}
|
||||
|
||||
private function normalizeShopQueryNumericFocusSearchText(string $value): string
|
||||
{
|
||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||
$value = $this->languageCleanupConfig->normalizeDashEquivalents($value);
|
||||
$value = str_replace('.', ',', $value);
|
||||
$value = preg_replace('/\s+/u', '', $value) ?? $value;
|
||||
$value = preg_replace('/[^\p{L}\p{N},]+/u', '', $value) ?? $value;
|
||||
|
||||
return trim($value);
|
||||
}
|
||||
|
||||
private function standaloneOptimizedShopQueryIntroducesUnsupportedContext(
|
||||
string $prompt,
|
||||
string $optimizedShopQuery
|
||||
@@ -2021,7 +2337,7 @@ final readonly class AgentRunner
|
||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||
$value = $this->languageCleanupConfig->replaceWordSeparatorsWithSpace($value);
|
||||
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', $value, $matches) !== 1) {
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', $value, $matches) === false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user