This commit is contained in:
team 1
2026-05-10 08:55:05 +02:00
parent 7e96af5f1d
commit 96375668b2
9 changed files with 379 additions and 29 deletions

View File

@@ -1719,13 +1719,19 @@ final readonly class AgentRunner
return $shopSearchQuery;
}
$modelVariantSuffixTokens = $this->extractPositiveShopQueryModelVariantSuffixTokens($tokens, $blockedTokens, $codePatterns);
$kept = [];
foreach ($tokens as $token) {
if (isset($blockedTokens[$token]) || isset($kept[$token])) {
continue;
}
if (isset($allowedTokens[$token]) || $this->matchesAnyConfiguredShopQueryCodePattern($token, $codePatterns)) {
if (
isset($allowedTokens[$token])
|| isset($modelVariantSuffixTokens[$token])
|| $this->matchesAnyConfiguredShopQueryCodePattern($token, $codePatterns)
) {
$kept[$token] = $token;
}
}
@@ -1807,6 +1813,53 @@ final readonly class AgentRunner
return false;
}
/**
* Preserve model variant suffixes that are attached to an already retained
* model number in the same query, for example family-number-code product
* names. This prevents the positive-token filter from degrading a specific
* model variant to its generic base model.
*
* @param string[] $tokens
* @param array<string, true> $blockedTokens
* @param string[] $codePatterns
* @return array<string, true>
*/
private function extractPositiveShopQueryModelVariantSuffixTokens(
array $tokens,
array $blockedTokens,
array $codePatterns
): array {
$suffixTokens = [];
$count = count($tokens);
for ($index = 0; $index < $count; $index++) {
$token = $tokens[$index] ?? '';
if (!$this->matchesAnyConfiguredShopQueryCodePattern($token, $codePatterns)) {
continue;
}
for ($suffixIndex = $index + 1; $suffixIndex < $count; $suffixIndex++) {
$suffix = $tokens[$suffixIndex] ?? '';
if (isset($blockedTokens[$suffix]) || !$this->isPositiveShopQueryModelVariantSuffixToken($suffix)) {
break;
}
$suffixTokens[$suffix] = true;
}
}
return $suffixTokens;
}
private function isPositiveShopQueryModelVariantSuffixToken(string $token): bool
{
$token = trim($token);
return $token !== ''
&& preg_match('/^[\p{L}]{2,8}\d{0,3}$/u', $token) === 1;
}
private function cleanupDirectProductAttributeShopQuery(string $prompt, string $shopSearchQuery): string
{
$shopSearchQuery = trim($shopSearchQuery);
@@ -3321,10 +3374,6 @@ final readonly class AgentRunner
return $shopResults;
}
if ($this->isMixedDeviceAndAccessoryProductRequest($prompt, $shopSearchQuery)) {
return $shopResults;
}
$primaryMatches = [];
$corpusMatches = [];
@@ -3383,10 +3432,6 @@ final readonly class AgentRunner
return $emptyResult;
}
if ($this->isMixedDeviceAndAccessoryProductRequest($prompt, $shopSearchQuery)) {
return $emptyResult;
}
$repairQuery = $this->buildDirectProductPrimaryIdentityRepairQuery(
shopSearchQuery: $shopSearchQuery,
requestedTerms: $requestedTerms
@@ -4070,7 +4115,7 @@ final readonly class AgentRunner
}
$terms = [];
foreach ($this->agentRunnerConfig->getDirectShopResultProductIdentityTerms() as $term) {
foreach ($this->agentRunnerConfig->getShopQueryProductAttributeCleanupProductTypeTerms() as $term) {
if ($this->containsAllShopQueryTokens($combined, $term)) {
$terms[] = $term;
}
@@ -4079,17 +4124,6 @@ final readonly class AgentRunner
return array_values(array_unique($terms));
}
private function isMixedDeviceAndAccessoryProductRequest(string $prompt, string $shopSearchQuery): bool
{
$combined = mb_strtolower($this->normalizeOneLine($prompt . ' ' . $shopSearchQuery), 'UTF-8');
if ($combined === '') {
return false;
}
return $this->containsAnyConfiguredTerm($combined, $this->agentRunnerConfig->getNoLlmMainDeviceRequestRoleKeywords())
&& $this->containsAnyConfiguredTerm($combined, $this->agentRunnerConfig->getNoLlmAccessoryProductRoleKeywords());
}
private function containsAllShopQueryTokens(string $text, string $term): bool
{
$tokens = array_fill_keys($this->tokenizeShopQueryCandidate($text), true);
@@ -4235,7 +4269,6 @@ final readonly class AgentRunner
|| !$shopSearchAttempted
|| $shopSearchHadSystemFailure
|| $this->extractRequestedDirectProductTerms($prompt, $shopSearchQuery) === []
|| $this->isMixedDeviceAndAccessoryProductRequest($prompt, $shopSearchQuery)
) {
return '';
}
@@ -5457,7 +5490,7 @@ final readonly class AgentRunner
private function followUpActionAnswerAlreadyContainsPrice(string $answerText): bool
{
return preg_match('/(?:\bpreis\b.{0,24}\d+[,.]\d{2}|\d+[,.]\d{2}\s*(?:€|eur)\b|(?:€|eur)\s*\d+[,.]\d{2})/iu', $answerText) === 1;
return preg_match('/(?:\bpreise?\b.{0,80}\d+[,.]\d{2}\s*(?:€|eur\b)|\d+[,.]\d{2}\s*(?:€|eur\b)|(?:€|eur\b)\s*\d+[,.]\d{2})/iu', $answerText) === 1;
}
/**

View File

@@ -308,8 +308,11 @@ final readonly class CommerceQueryParser
$keep[$previousIndex] = true;
}
$nextIndex = $index + 1;
if (isset($tokens[$nextIndex]) && $this->isModelSuffixToken($tokens[$nextIndex])) {
for ($nextIndex = $index + 1; isset($tokens[$nextIndex]); $nextIndex++) {
if (!$this->isModelSuffixToken($tokens[$nextIndex])) {
break;
}
$keep[$nextIndex] = true;
}
}

View File

@@ -230,6 +230,21 @@ final readonly class SearchRepairService
}
}
if (
$requestedAccessoryCodes === []
&& $accessoryCandidates === []
) {
$modelVariantQueries = $this->buildSpecificModelVariantRepairQueries(
prompt: $prompt,
primaryQuery: $primaryQuery,
modelCandidates: $modelCandidates
);
if ($modelVariantQueries !== []) {
return $this->normalizeRepairQueries($modelVariantQueries, $primaryQuery);
}
}
$topPrimaryName = $primaryShopResults[0]->name ?? '';
$topPrimaryProductNumber = $primaryShopResults[0]->productNumber ?? null;
$topPrimaryPhrase = trim($topPrimaryName . ' ' . ($topPrimaryProductNumber ?? ''));
@@ -339,6 +354,121 @@ final readonly class SearchRepairService
return $query !== '' ? [$query] : [];
}
/**
* Build repair searches for specific model variants discovered in RAG evidence.
* This keeps suffix variants such as family-number-code product names intact
* instead of falling back to the generic base model.
*
* @param string[] $modelCandidates
* @return string[]
*/
private function buildSpecificModelVariantRepairQueries(
string $prompt,
string $primaryQuery,
array $modelCandidates
): array {
if ($modelCandidates === []) {
return [];
}
$combinedQueryText = trim($prompt . ' ' . $primaryQuery);
$decorated = [];
foreach ($modelCandidates as $index => $candidate) {
$candidate = $this->sanitizeQuery($candidate);
if ($candidate === '' || !$this->isSpecificModelVariantCandidate($candidate)) {
continue;
}
if ($this->queryAlreadyContainsCandidate($combinedQueryText, $candidate)) {
continue;
}
$decorated[] = [
'candidate' => $candidate,
'score' => $this->scoreSpecificModelVariantCandidate($candidate, $combinedQueryText),
'index' => $index,
];
}
if ($decorated === []) {
return [];
}
usort($decorated, static function (array $a, array $b): int {
if ($a['score'] === $b['score']) {
return $a['index'] <=> $b['index'];
}
return $b['score'] <=> $a['score'];
});
return array_values(array_unique(array_map(
static fn(array $row): string => $row['candidate'],
$decorated
)));
}
private function isSpecificModelVariantCandidate(string $candidate): bool
{
return preg_match('/\b\d{2,5}[A-Za-z0-9\-]*\s+[A-Za-zÄÖÜäöüß]{2,8}\d{0,3}(?:\s+[A-Za-zÄÖÜäöüß]{2,8})?\b/u', $candidate) === 1
|| preg_match('/\b\d{2,5}[A-Za-z]{1,8}\d{0,3}\b/u', $candidate) === 1;
}
private function scoreSpecificModelVariantCandidate(string $candidate, string $queryText): int
{
$score = $this->scoreCandidate($candidate);
$suffix = $this->extractModelVariantSuffix($candidate);
if ($suffix !== '') {
$suffixLength = mb_strlen(preg_replace('/\s+/u', '', $suffix) ?? $suffix, 'UTF-8');
$score += min(4, $suffixLength);
$normalizedQuery = $this->normalizeForRepairMatching($queryText);
$normalizedSuffix = $this->normalizeForRepairMatching($suffix);
if ($normalizedSuffix !== '' && preg_match('/\b' . preg_quote($normalizedSuffix, '/') . '\b/u', $normalizedQuery) === 1) {
$score += 12;
}
if (preg_match('/\d/u', $suffix) === 1 && preg_match('/\d/u', $queryText) !== 1) {
$score -= 2;
}
}
return $score;
}
private function extractModelVariantSuffix(string $candidate): string
{
if (preg_match('/\b\d{2,5}[A-Za-z0-9\-]*\s+([A-Za-zÄÖÜäöüß]{2,8}\d{0,3}(?:\s+[A-Za-zÄÖÜäöüß]{2,8})?)\b/u', $candidate, $matches) === 1) {
return $this->sanitizeQuery((string) ($matches[1] ?? ''));
}
if (preg_match('/\b\d{2,5}([A-Za-z]{1,8}\d{0,3})\b/u', $candidate, $matches) === 1) {
return $this->sanitizeQuery((string) ($matches[1] ?? ''));
}
return '';
}
private function queryAlreadyContainsCandidate(string $queryText, string $candidate): bool
{
$queryTokens = array_fill_keys($this->tokenize($queryText), true);
$candidateTokens = $this->tokenize($candidate);
if ($queryTokens === [] || $candidateTokens === []) {
return false;
}
foreach ($candidateTokens as $token) {
if (!isset($queryTokens[$token])) {
return false;
}
}
return true;
}
/** @param string[] $terms */
private function buildTokenSet(array $terms): array
{