From 8827a5a13b9a0812392b08b2ac7ae450c50a11a7 Mon Sep 17 00:00:00 2001 From: team 1 Date: Sat, 9 May 2026 20:28:43 +0200 Subject: [PATCH] p72 --- config/retriex/genre.yaml | 1 + ...2_EXACT_ACCESSORY_CODE_PRECISION_README.md | 74 +++++ src/Agent/AgentRunner.php | 298 ++++++++++++++++++ src/Config/AgentRunnerConfig.php | 9 + 4 files changed, 382 insertions(+) create mode 100644 patch_history/RETRIEX_PATCH_72_EXACT_ACCESSORY_CODE_PRECISION_README.md diff --git a/config/retriex/genre.yaml b/config/retriex/genre.yaml index ec6d699..b88d2f8 100644 --- a/config/retriex/genre.yaml +++ b/config/retriex/genre.yaml @@ -1435,6 +1435,7 @@ parameters: - '- Never write shop-hit lines such as price, availability, URL, product number, or Shop-Treffer below a RAG device unless the same exact SHOP PRODUCT RECORD names that device as the exact shop product.' - '- Never rename a role-incompatible accessory shop record into a main device in headings, summaries, or shop-hit lines.' - '- If the user asks for the price or availability of a referenced accessory, indicator, reagent, kit, set, or consumable, use commercial fields only from a shop result that clearly matches that accessory identity and code.' + - '- If an accessory, indicator, reagent, kit, set, or consumable code is explicitly requested, do not merge shop variants whose code has an additional suffix, prefix, or variant token unless the user explicitly requested that full variant code.' - '- For such accessory price follow-ups, do not answer with the price, URL, product number, or availability of the main device or of unrelated reagents; if no matching accessory shop item is present, say that the price is not available in the provided shop data.' prompt_keyword_views: origin: genre_native diff --git a/patch_history/RETRIEX_PATCH_72_EXACT_ACCESSORY_CODE_PRECISION_README.md b/patch_history/RETRIEX_PATCH_72_EXACT_ACCESSORY_CODE_PRECISION_README.md new file mode 100644 index 0000000..8898b6a --- /dev/null +++ b/patch_history/RETRIEX_PATCH_72_EXACT_ACCESSORY_CODE_PRECISION_README.md @@ -0,0 +1,74 @@ +# RetrieX Patch p72 - Exact Accessory Code Precision + +## Ziel + +p72 verhindert, dass bei Preis-/Verfügbarkeits-Follow-ups zu einer konkret referenzierten Zubehör-, Indikator- oder Reagenz-Code-Identität nahe Varianten als gleichwertige Treffer in die Antwort gelangen. + +Konkreter Regressionsfall: + +1. `Was ist der niedrigste Grenzwert für die Wasserhärte, welcher mit einem Testomaten überwacht werden kann?` +2. `mit welchem indikator wird der wert gemessen` +3. `was kostet der indikator` + +Wenn der Verlauf auf `Indikatortyp 300` zeigt, dürfen Produkte wie `300 S` nicht als Preisantwort für `300` mit ausgegeben werden, sofern der Nutzer nicht explizit die vollständige Variante `300 S` angefragt hat. + +## Umsetzung + +- `AgentRunner` filtert Shop-Ergebnisse nach einer exakt angefragten Zubehör-/Code-Identität, bevor die Ergebnisse an den PromptBuilder gehen. +- Die Erkennung ist generisch: + - Code-Begriffe kommen aus `genre.yaml` / bestehender Konfiguration (`search_repair.requested_accessory_code_terms`). + - Code-Token werden anhand bestehender Code-Patterns bzw. generischer alphanumerischer Code-Formen erkannt. + - Ein reiner Code wie `300` matcht nicht mehr automatisch Varianten mit zusätzlichem Ein-Buchstaben-Suffix wie `300 S`. + - Wird die Variante explizit angefragt, z. B. `300 S`, bleibt sie matchfähig. +- `AgentRunnerConfig` stellt die bestehenden requested-accessory-code terms für den Runner bereit. +- `genre.yaml` ergänzt eine generische Prompt-Regel, damit das LLM zusätzlich keine Code-Varianten zusammenführt, die nicht explizit angefragt wurden. + +## Bewusst nicht geändert + +- Kein Testomat-/Indikator-300-Sonderfall im PHP-Core. +- Keine neue harte fachliche Tokenliste im PHP-Core. +- Keine Änderung an Retrieval, Scoring, Shopware-Suche, Ranking oder Follow-up-Action-UI. + +## Geänderte Dateien + +- `src/Agent/AgentRunner.php` +- `src/Config/AgentRunnerConfig.php` +- `config/retriex/genre.yaml` + +## Lokale Checks + +Ausgeführt im Patch-Arbeitsverzeichnis: + +```bash +php -l src/Agent/AgentRunner.php +php -l src/Config/AgentRunnerConfig.php +python3 - <<'PY' +import yaml +for f in ['config/retriex/genre.yaml','config/retriex/chat-messages.yaml','config/retriex/agent.yaml']: + with open(f, 'r', encoding='utf-8') as fh: + yaml.safe_load(fh) +print('YAML parse OK') +PY +``` + +`vendor/` ist im ZIP nicht enthalten; Symfony-Console-Checks bitte in der Zielumgebung ausführen. + +## Empfohlene Regressionstests + +```text +Was ist der niedrigste Grenzwert für die Wasserhärte, welcher mit einem Testomaten überwacht werden kann? +``` + +```text +mit welchem indikator wird der wert gemessen +``` + +```text +was kostet der indikator +``` + +Erwartung: + +- Preisantwort für `Indikatortyp 300` bleibt bei exakt passenden `300`-Produkten. +- `300 S` wird nicht als Preisvariante für `300` ausgegeben, solange `300 S` nicht explizit angefragt wurde. +- Falls explizit nach `300 S` gefragt wird, darf `300 S` weiterhin gefunden werden. diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index b1858a2..9e1d74d 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -519,6 +519,7 @@ final readonly class AgentRunner } $shopResults = $this->guardShopResultsByReferencedProductAnchor($shopSearchQuery, $shopResults); + $shopResults = $this->guardShopResultsByExactRequestedAccessoryCode($prompt, $shopSearchQuery, $shopResults); $shopResults = $this->sortShopResultsForLengthRequest($prompt, $shopSearchQuery, $shopResults); $attemptedShopRepair = $repairPayload['attemptedRepair'] || $directIdentityRepairPayload['attemptedRepair']; $usedShopRepair = $repairPayload['usedRepair'] || $directIdentityRepairPayload['usedRepair']; @@ -3538,6 +3539,303 @@ final readonly class AgentRunner return $this->containsAllShopQueryTokens($productText, $anchor); } + /** + * @param ShopProductResult[] $shopResults + * @return ShopProductResult[] + */ + private function guardShopResultsByExactRequestedAccessoryCode(string $prompt, string $shopSearchQuery, array $shopResults): array + { + if ($shopResults === []) { + return $shopResults; + } + + $requestedCodes = $this->extractExactRequestedAccessoryCodes($prompt, $shopSearchQuery); + if ($requestedCodes === []) { + return $shopResults; + } + + $filtered = []; + foreach ($shopResults as $product) { + if (!$product instanceof ShopProductResult) { + continue; + } + + if ($this->shopProductMatchesExactRequestedAccessoryCode($product, $requestedCodes)) { + $filtered[] = $product; + } + } + + return $filtered !== [] ? $filtered : $shopResults; + } + + /** + * @return string[] + */ + private function extractExactRequestedAccessoryCodes(string $prompt, string $shopSearchQuery): array + { + $text = $this->normalizeOneLine(trim($prompt . ' ' . $shopSearchQuery)); + if ($text === '') { + return []; + } + + $codeTerms = $this->agentRunnerConfig->getRequestedAccessoryCodeTerms(); + if ($codeTerms === []) { + return []; + } + + $tokens = $this->tokenizeAccessoryCodeContext($text); + if ($tokens === []) { + return []; + } + + $termTokenSequences = []; + foreach ($codeTerms as $term) { + $termTokens = $this->tokenizeAccessoryCodeContext($term); + if ($termTokens !== []) { + $termTokenSequences[] = $termTokens; + } + } + + if ($termTokenSequences === []) { + return []; + } + + $codes = []; + foreach ($termTokenSequences as $termTokens) { + $termLength = count($termTokens); + + foreach ($tokens as $position => $_token) { + if (!$this->tokenSequenceMatchesAt($tokens, $termTokens, $position)) { + continue; + } + + $code = $this->findNearestRequestedAccessoryCodeAfter($tokens, $position + $termLength, 3, $termTokenSequences); + if ($code === '') { + $code = $this->findNearestRequestedAccessoryCodeBefore($tokens, $position - 1, 3, $termTokenSequences); + } + + if ($code !== '') { + $codes[$code] = $code; + } + } + } + + return array_values($codes); + } + + /** + * @param string[] $tokens + * @param array $termTokenSequences + */ + private function findNearestRequestedAccessoryCodeAfter(array $tokens, int $start, int $window, array $termTokenSequences): string + { + $end = min(count($tokens) - 1, $start + max(0, $window - 1)); + for ($index = max(0, $start); $index <= $end; $index++) { + $code = $this->buildRequestedAccessoryCodeFromTokenWindow($tokens, $index, $termTokenSequences); + if ($code !== '') { + return $code; + } + } + + return ''; + } + + /** + * @param string[] $tokens + * @param array $termTokenSequences + */ + private function findNearestRequestedAccessoryCodeBefore(array $tokens, int $start, int $window, array $termTokenSequences): string + { + $end = max(0, $start - max(0, $window - 1)); + for ($index = min(count($tokens) - 1, $start); $index >= $end; $index--) { + $code = $this->buildRequestedAccessoryCodeFromTokenWindow($tokens, $index, $termTokenSequences); + if ($code !== '') { + return $code; + } + } + + return ''; + } + + /** + * @param string[] $tokens + * @param string[] $needle + */ + private function tokenSequenceMatchesAt(array $tokens, array $needle, int $position): bool + { + if ($needle === [] || $position < 0 || $position + count($needle) > count($tokens)) { + return false; + } + + foreach ($needle as $offset => $needleToken) { + if (($tokens[$position + $offset] ?? null) !== $needleToken) { + return false; + } + } + + return true; + } + + /** + * @param string[] $tokens + * @param array $termTokenSequences + */ + private function buildRequestedAccessoryCodeFromTokenWindow(array $tokens, int $index, array $termTokenSequences): string + { + $token = $tokens[$index] ?? ''; + if (!$this->isStrictAccessoryCodeToken($token)) { + return ''; + } + + $next = $tokens[$index + 1] ?? ''; + if ($this->isSingleLetterVariantSuffix($next) && !$this->tokenStartsAnyConfiguredTerm($tokens, $termTokenSequences, $index + 1)) { + return $this->normalizeAccessoryCodePhrase($token . ' ' . $next); + } + + $previous = $tokens[$index - 1] ?? ''; + if ($this->isShortAlphaCodePrefix($previous) && !$this->tokenStartsAnyConfiguredTerm($tokens, $termTokenSequences, $index - 1)) { + return $this->normalizeAccessoryCodePhrase($previous . ' ' . $token); + } + + return $this->normalizeAccessoryCodePhrase($token); + } + + /** + * @param string[] $tokens + * @param array $termTokenSequences + */ + private function tokenStartsAnyConfiguredTerm(array $tokens, array $termTokenSequences, int $position): bool + { + foreach ($termTokenSequences as $termTokens) { + if ($this->tokenSequenceMatchesAt($tokens, $termTokens, $position)) { + return true; + } + } + + return false; + } + + /** + * @param string[] $requestedCodes + */ + private function shopProductMatchesExactRequestedAccessoryCode(ShopProductResult $product, array $requestedCodes): bool + { + $identityText = $this->normalizeOneLine(trim(implode(' ', array_filter([ + $product->name, + $product->url, + ])))); + + if ($identityText === '') { + return false; + } + + $tokens = $this->tokenizeAccessoryCodeContext($identityText); + if ($tokens === []) { + return false; + } + + foreach ($requestedCodes as $code) { + if ($this->accessoryCodeTokensContainExactCode($tokens, $code)) { + return true; + } + } + + return false; + } + + /** + * @param string[] $tokens + */ + private function accessoryCodeTokensContainExactCode(array $tokens, string $requestedCode): bool + { + $codeTokens = $this->tokenizeAccessoryCodeContext($requestedCode); + if ($codeTokens === []) { + return false; + } + + $compactCode = $this->normalizeAccessoryCodeForExactMatch($requestedCode); + $codeLength = count($codeTokens); + + foreach ($tokens as $index => $token) { + if ($codeLength === 1 && $this->normalizeAccessoryCodeForExactMatch($token) === $compactCode) { + $next = $tokens[$index + 1] ?? ''; + if (!$this->isSingleLetterVariantSuffix($next)) { + return true; + } + + continue; + } + + if ($this->tokenSequenceMatchesAt($tokens, $codeTokens, $index)) { + return true; + } + + if ($this->normalizeAccessoryCodeForExactMatch(implode(' ', array_slice($tokens, $index, $codeLength))) === $compactCode) { + return true; + } + } + + return false; + } + + /** + * @return string[] + */ + private function tokenizeAccessoryCodeContext(string $text): array + { + $normalized = mb_strtolower($this->normalizeOneLine($text), 'UTF-8'); + if ($normalized === '') { + return []; + } + + preg_match_all('/[\p{L}]+\d[\p{L}\p{N}\-]*|\d+(?:[,.]\d+)?[\p{L}\p{N}\-]*|[\p{L}]+/u', $normalized, $matches); + + return array_values(array_filter( + array_map(static fn(string $token): string => trim($token), $matches[0] ?? []), + static fn(string $token): bool => $token !== '' + )); + } + + private function isStrictAccessoryCodeToken(string $token): bool + { + $token = trim($token); + if ($token === '' || str_contains($token, ',') || str_contains($token, '.')) { + return false; + } + + if (preg_match('/^\d+$/u', $token) === 1) { + return mb_strlen($token, 'UTF-8') >= 2; + } + + foreach ($this->agentRunnerConfig->getShopQueryPositiveTokenFilterCodePatterns() as $pattern) { + if (@preg_match($pattern, $token) === 1) { + return true; + } + } + + return preg_match('/^(?:[a-z]{1,4}\d{1,5}[a-z0-9-]*|\d{2,5}[a-z0-9-]*)$/iu', $token) === 1; + } + + private function isSingleLetterVariantSuffix(string $token): bool + { + return preg_match('/^[a-z]$/iu', trim($token)) === 1; + } + + private function isShortAlphaCodePrefix(string $token): bool + { + return preg_match('/^[a-z]{1,4}$/iu', trim($token)) === 1; + } + + private function normalizeAccessoryCodePhrase(string $code): string + { + return $this->normalizeOneLine(mb_strtolower($code, 'UTF-8')); + } + + private function normalizeAccessoryCodeForExactMatch(string $code): string + { + return preg_replace('/[^a-z0-9]+/iu', '', mb_strtolower($code, 'UTF-8')) ?? ''; + } + /** * @param ShopProductResult[] $shopResults * @return ShopProductResult[] diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index 49ff925..febfff6 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -1379,6 +1379,15 @@ final class AgentRunnerConfig ))); } + /** + * @return string[] + */ + public function getRequestedAccessoryCodeTerms(): array + { + return $this->genreStringList('search_repair.requested_accessory_code_terms.terms') + ?: $this->genreStringList('product_roles.requested_accessory_code_terms.terms'); + } + public function isDirectShopResultGuardEnabled(): bool { return $this->getRequiredBool('shop_runtime.result_identity.enabled');