From ff273ff9a01896e174313468143502d22ababd76 Mon Sep 17 00:00:00 2001 From: team 1 Date: Sun, 26 Apr 2026 18:44:59 +0200 Subject: [PATCH] harden history find tokens --- ...IEX_ACCESSORY_PRICE_FOLLOWUP_FIX_README.md | 38 +++++ ..._ACCESSORY_PRICE_FOLLOWUP_HOTFIX_README.md | 25 +++ config/retriex/prompt.yaml | 2 + config/retriex/query_enrichment.yaml | 1 + config/retriex/vocabulary.yaml | 31 +++- src/Commerce/SearchRepairService.php | 147 +++++++++++++----- src/Config/RetriexEffectiveConfigProvider.php | 9 ++ src/Config/SearchRepairConfig.php | 128 +++++++++++++++ 8 files changed, 343 insertions(+), 38 deletions(-) create mode 100644 RETRIEX_ACCESSORY_PRICE_FOLLOWUP_FIX_README.md create mode 100644 RETRIEX_ACCESSORY_PRICE_FOLLOWUP_HOTFIX_README.md diff --git a/RETRIEX_ACCESSORY_PRICE_FOLLOWUP_FIX_README.md b/RETRIEX_ACCESSORY_PRICE_FOLLOWUP_FIX_README.md new file mode 100644 index 0000000..d1f3b8f --- /dev/null +++ b/RETRIEX_ACCESSORY_PRICE_FOLLOWUP_FIX_README.md @@ -0,0 +1,38 @@ +# RetrieX accessory price follow-up precision fix + +This patch hardens commerce follow-up handling for questions such as: + +- "was kostet der indikator" +- "was kostet indikatortyp 300" + +Problem fixed: + +- Technical RAG correctly resolves `0,02 °dH -> Testomat 808 -> Indikatortyp 300`. +- The subsequent commerce follow-up generated the correct primary query (`indikatortyp 300 testomat 808`), but shop repair broadened the search again. +- Repair queries could add model-only searches or nearby products, causing device prices or unrelated reagents to appear in the final answer. + +Changes: + +- Requested accessory codes are extracted with a configurable regex that also matches `indikatortyp 300`. +- If a requested accessory/reagent code is present, shop repair is restricted to focused accessory-code queries. +- Prompt-anchored models are preferred over proximity-only models for requested accessory-code repair. +- Fallback accessory-code repair query templates are configurable. +- Prompt grounding rules now explicitly forbid answering accessory price follow-ups with device prices or unrelated reagent prices. + +No changes: + +- No retrieval/scoring/vector logic changed. +- No hard keyword list added to the retriever core. +- No PromptBuilder text removed; only more precise shop grounding rules were added. + +After applying: + +```bash +php bin/console cache:clear +php bin/console mto:agent:config:validate +php bin/console mto:agent:regression:test +``` + +Expected behavior: + +If the shop does not contain a clear product for `Indikatortyp 300`, RetrieX should say that the price is not available in the provided shop data and must not add the price of `Testomat 808`, `Testomat 2000`, or unrelated reagents. diff --git a/RETRIEX_ACCESSORY_PRICE_FOLLOWUP_HOTFIX_README.md b/RETRIEX_ACCESSORY_PRICE_FOLLOWUP_HOTFIX_README.md new file mode 100644 index 0000000..9bb2df1 --- /dev/null +++ b/RETRIEX_ACCESSORY_PRICE_FOLLOWUP_HOTFIX_README.md @@ -0,0 +1,25 @@ +# RetrieX Accessory Price Follow-up Hotfix + +This patch fixes the Symfony parameter placeholder error introduced by the previous accessory price follow-up patch and keeps the repair logic generic. + +## Fixes + +- Replaces YAML templates using `%code%` with `{code}` / `{term}` placeholders so Symfony does not treat them as container parameters. +- Keeps backward compatibility in PHP for legacy `%code%` / `%term%` templates if they are ever provided outside Symfony parameters. +- Moves requested-accessory-code repair terms, proximity settings, model-reduction patterns, and model-exclusion terms into configuration. +- Removes hard-coded product-family model patterns from `SearchRepairService`; model extraction now uses configurable generic patterns. +- Keeps the focused accessory-code repair behavior: when the user asks for the price of a specific accessory/code, repair queries remain code-specific and do not broaden into device-only searches. + +## Generic behavior + +This patch does not add any fixed Testomat 808 / Testomat 2000 / Tritromat-specific branch. The model handling remains pattern/config driven so other product families and model variants can be handled through configuration. + +## After applying + +Run: + +```bash +php bin/console cache:clear +php bin/console mto:agent:config:validate +php bin/console mto:agent:regression:test +``` diff --git a/config/retriex/prompt.yaml b/config/retriex/prompt.yaml index d4e34ac..7b55aec 100644 --- a/config/retriex/prompt.yaml +++ b/config/retriex/prompt.yaml @@ -142,6 +142,8 @@ parameters: retrieved knowledge.' - '- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.' - '- If the shop match is ambiguous, keep the technical identification and commercial details separate.' + - '- If the user asks for the price or availability of a referenced accessory, indicator, reagent, kit, set, or consumable, use commercial fields only from a shop result that clearly matches that accessory identity and code.' + - '- For such accessory price follow-ups, do not answer with the price, URL, product number, or availability of the main device or of unrelated reagents; if no matching accessory shop item is present, say that the price is not available in the provided shop data.' without_shop_rules: - '- Use retrieved knowledge as authoritative for factual answers.' - '- If no shop results are present, do not compensate with external recommendations or external product suggestions.' diff --git a/config/retriex/query_enrichment.yaml b/config/retriex/query_enrichment.yaml index 46646ff..4641952 100644 --- a/config/retriex/query_enrichment.yaml +++ b/config/retriex/query_enrichment.yaml @@ -14,3 +14,4 @@ parameters: Resthärte-Grenzwert: Wasserhärte Grenzwert: Überwachungsbereich store: shop + Indikatortyp: Indikator diff --git a/config/retriex/vocabulary.yaml b/config/retriex/vocabulary.yaml index 2add447..ac43b4e 100644 --- a/config/retriex/vocabulary.yaml +++ b/config/retriex/vocabulary.yaml @@ -2,7 +2,36 @@ # Views preserve the previous 1.4.2-tuned ordering exactly; per-service configs may still override them. parameters: retriex.commerce_query.config: {} - retriex.search_repair.config: {} + retriex.search_repair.config: + strict_requested_accessory_code_repair: true + prefer_prompt_anchored_model_for_requested_accessory_code: true + requested_accessory_code_pattern: '/\b(?:indikator(?:typ)?|indicator(?:\s*type)?|reagenz|reagent)\s*([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu' + requested_accessory_code_fallback_query_templates: + - '{term} {code}' + requested_accessory_code_fallback_terms: + - indikatortyp + - indikator + - indicator + - reagenz + - reagent + requested_accessory_code_context_prefix_terms: + - indikatortyp + - indikator + - indicator + - reagenz + - reagent + requested_accessory_code_proximity_window: 1600 + specific_model_candidate_patterns: + - '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß0-9][A-Za-zÄÖÜäöüß0-9®\-]*){0,3}\s+\d{2,5}(?:\s+[A-ZÄÖÜ]{1,8})?)\b/u' + model_candidate_exclude_terms: + - indikatortyp + - indikator + - indicator + - reagenz + - reagent + - verfügbarkeit + - verfuegbarkeit + - shop retriex.vocabulary.config: classes: device: diff --git a/src/Commerce/SearchRepairService.php b/src/Commerce/SearchRepairService.php index cd08fc7..16d52da 100644 --- a/src/Commerce/SearchRepairService.php +++ b/src/Commerce/SearchRepairService.php @@ -212,20 +212,30 @@ final readonly class SearchRepairService $topPrimaryProductNumber = $primaryShopResults[0]->productNumber ?? null; $topPrimaryPhrase = trim($topPrimaryName . ' ' . ($topPrimaryProductNumber ?? '')); - $queries = []; - - $queries = array_merge( - $queries, - $this->buildFocusedModelAccessoryQueries( - prompt: $prompt, - primaryQuery: $primaryQuery, - knowledgeText: $knowledgeText, - modelCandidates: $modelCandidates, - accessoryCandidates: $accessoryCandidates, - requestedAccessoryCodes: $requestedAccessoryCodes - ) + $queries = $this->buildFocusedModelAccessoryQueries( + prompt: $prompt, + primaryQuery: $primaryQuery, + knowledgeText: $knowledgeText, + modelCandidates: $modelCandidates, + accessoryCandidates: $accessoryCandidates, + requestedAccessoryCodes: $requestedAccessoryCodes ); + if ($requestedAccessoryCodes !== [] && $this->config->shouldRestrictRequestedAccessoryCodeRepair()) { + foreach ($accessoryCandidates as $accessoryCandidate) { + if ($this->candidateMatchesRequestedAccessoryCodes($accessoryCandidate, $requestedAccessoryCodes)) { + $queries[] = $accessoryCandidate; + } + } + + $queries = array_merge( + $queries, + $this->buildRequestedAccessoryFallbackQueries($requestedAccessoryCodes) + ); + + return $this->normalizeRepairQueries($queries, $primaryQuery); + } + if ($topPrimaryPhrase !== '' && $this->containsModelLikePhrase($topPrimaryPhrase)) { $queries[] = $topPrimaryPhrase; } elseif ($topPrimaryName !== '' && $this->containsModelLikePhrase($topPrimaryName)) { @@ -246,6 +256,15 @@ final readonly class SearchRepairService } } + return $this->normalizeRepairQueries($queries, $primaryQuery); + } + + /** + * @param string[] $queries + * @return string[] + */ + private function normalizeRepairQueries(array $queries, string $primaryQuery): array + { $queries = array_map( fn(string $query): string => $this->sanitizeQuery($query), $queries @@ -441,9 +460,7 @@ final readonly class SearchRepairService $accessories = $accessoryCandidates; if ($accessories === []) { - foreach ($requestedAccessoryCodes as $code) { - $accessories[] = 'Indikator ' . $code; - } + $accessories = $this->buildRequestedAccessoryFallbackQueries($requestedAccessoryCodes); } foreach ($models as $model) { @@ -469,7 +486,7 @@ final readonly class SearchRepairService { $codes = []; - if (preg_match_all('/\b(?:indikator|indicator|reagenz|reagent)\s*([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu', $text, $matches) !== false) { + if (preg_match_all($this->config->getRequestedAccessoryCodePattern(), $text, $matches) !== false) { foreach ($matches[1] ?? [] as $code) { $normalized = $this->normalizeAccessoryCode((string) $code); if ($normalized !== '') { @@ -481,6 +498,51 @@ final readonly class SearchRepairService return array_values($codes); } + /** + * @param string[] $requestedCodes + * @return string[] + */ + private function buildRequestedAccessoryFallbackQueries(array $requestedCodes): array + { + $queries = []; + $templates = $this->config->getRequestedAccessoryCodeFallbackQueryTemplates(); + $terms = $this->config->getRequestedAccessoryCodeFallbackTerms(); + + foreach ($requestedCodes as $code) { + $normalizedCode = $this->normalizeAccessoryCode($code); + if ($normalizedCode === '') { + continue; + } + + foreach ($templates as $template) { + if (str_contains($template, '{term}')) { + foreach ($terms as $term) { + $queries[] = $this->applyRequestedAccessoryTemplate($template, $normalizedCode, $term); + } + continue; + } + + $queries[] = $this->applyRequestedAccessoryTemplate($template, $normalizedCode, ''); + } + } + + return array_values(array_unique(array_filter( + array_map(fn(string $query): string => $this->sanitizeQuery($query), $queries), + static fn(string $query): bool => $query !== '' + ))); + } + + private function applyRequestedAccessoryTemplate(string $template, string $code, string $term): string + { + $query = str_replace( + ['{code}', '{term}', '%code%', '%term%'], + [$code, $term, $code, $term], + $template + ); + + return $this->sanitizeQuery($query); + } + /** * @param string[] $accessoryCandidates * @param string[] $requestedCodes @@ -505,7 +567,8 @@ final readonly class SearchRepairService array $modelCandidates, array $requestedCodes ): array { - $models = []; + $promptAnchoredModels = []; + $proximityModels = []; $normalizedPrompt = $this->normalizeForRepairMatching($prompt); foreach ($modelCandidates as $candidate) { @@ -517,15 +580,24 @@ final readonly class SearchRepairService $normalizedCandidate = $this->normalizeForRepairMatching($candidate); $isPromptAnchored = $normalizedCandidate !== '' && str_contains($normalizedPrompt, $normalizedCandidate); + if ($isPromptAnchored) { + $promptAnchoredModels[$candidate] = $candidate; + continue; + } + foreach ($requestedCodes as $code) { - if ($isPromptAnchored || $this->modelAppearsNearAccessoryCode($knowledgeText, $candidate, $code)) { - $models[$candidate] = $candidate; + if ($this->modelAppearsNearAccessoryCode($knowledgeText, $candidate, $code)) { + $proximityModels[$candidate] = $candidate; break; } } } - return array_values($models); + if ($this->config->shouldPreferPromptAnchoredModelForRequestedAccessoryCode() && $promptAnchoredModels !== []) { + return array_values($promptAnchoredModels); + } + + return array_values($promptAnchoredModels + $proximityModels); } private function candidateMatchesRequestedAccessoryCodes(string $candidate, array $requestedCodes): bool @@ -564,17 +636,19 @@ final readonly class SearchRepairService return false; } - $codeNeedles = [ - 'indikator ' . $normalizedCode, - 'indicator ' . $normalizedCode, - 'indikatortyp ' . $normalizedCode, - $normalizedCode, - ]; + $codeNeedles = [$normalizedCode]; + foreach ($this->config->getRequestedAccessoryCodeContextPrefixTerms() as $term) { + $normalizedTerm = $this->normalizeForRepairMatching($term); + if ($normalizedTerm !== '') { + $codeNeedles[] = trim($normalizedTerm . ' ' . $normalizedCode); + } + } + $codeNeedles = array_values(array_unique($codeNeedles)); foreach ($codeNeedles as $needle) { foreach ($this->findNeedlePositions($normalizedText, $needle) as $codePos) { foreach ($modelPositions as $modelPos) { - if (abs($codePos - $modelPos) <= 1600) { + if (abs($codePos - $modelPos) <= $this->config->getRequestedAccessoryCodeProximityWindow()) { return true; } } @@ -612,19 +686,18 @@ final readonly class SearchRepairService return ''; } - $patterns = [ - '/\b(Testomat(?:®)?\s+(?:\d{3,4}|EVO(?:\s+[A-ZÄÖÜ]{1,8})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-ZÄÖÜ]{1,8})?))\b/iu', - '/\b(Horiba\s+LAQUA\s+[A-Z0-9\-]+)\b/iu', - ]; - - foreach ($patterns as $pattern) { - if (preg_match($pattern, $candidate, $matches) === 1) { - return $this->sanitizeQuery((string) ($matches[1] ?? '')); + $normalizedCandidate = $this->normalizeForRepairMatching($candidate); + foreach ($this->config->getModelCandidateExcludeTerms() as $term) { + $normalizedTerm = $this->normalizeForRepairMatching($term); + if ($normalizedTerm !== '' && preg_match('/\b' . preg_quote($normalizedTerm, '/') . '\b/u', $normalizedCandidate) === 1) { + return ''; } } - if (preg_match('/\b(?:indikator|indicator|reagenz|reagent|verfuegbarkeit|verfügbarkeit|shop)\b/iu', $candidate) === 1) { - return ''; + foreach ($this->config->getSpecificModelCandidatePatterns() as $pattern) { + if (preg_match($pattern, $candidate, $matches) === 1) { + return $this->sanitizeQuery((string) ($matches[1] ?? '')); + } } return $candidate; diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 5988634..4fb297a 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -527,6 +527,14 @@ final readonly class RetriexEffectiveConfigProvider 'enabled' => $this->searchRepairConfig->isEnabled(), 'max_repair_queries' => $this->searchRepairConfig->getMaxRepairQueries(), 'min_primary_results_without_repair' => $this->searchRepairConfig->getMinPrimaryResultsWithoutRepair(), + 'strict_requested_accessory_code_repair' => $this->searchRepairConfig->shouldRestrictRequestedAccessoryCodeRepair(), + 'prefer_prompt_anchored_model_for_requested_accessory_code' => $this->searchRepairConfig->shouldPreferPromptAnchoredModelForRequestedAccessoryCode(), + 'requested_accessory_code_fallback_query_templates' => $this->searchRepairConfig->getRequestedAccessoryCodeFallbackQueryTemplates(), + 'requested_accessory_code_fallback_terms' => $this->searchRepairConfig->getRequestedAccessoryCodeFallbackTerms(), + 'requested_accessory_code_context_prefix_terms' => $this->searchRepairConfig->getRequestedAccessoryCodeContextPrefixTerms(), + 'requested_accessory_code_proximity_window' => $this->searchRepairConfig->getRequestedAccessoryCodeProximityWindow(), + 'specific_model_candidate_patterns' => $this->searchRepairConfig->getSpecificModelCandidatePatterns(), + 'model_candidate_exclude_terms' => $this->searchRepairConfig->getModelCandidateExcludeTerms(), 'generic_candidate_tokens' => $this->searchRepairConfig->getGenericCandidateTokens(), 'accessory_candidate_terms' => $this->searchRepairConfig->getAccessoryCandidateTerms(), 'accessory_or_bundle_terms' => $this->searchRepairConfig->getAccessoryOrBundleTerms(), @@ -546,6 +554,7 @@ final readonly class RetriexEffectiveConfigProvider 'patterns' => [ 'model_candidate' => $this->searchRepairConfig->getModelCandidatePattern(), 'accessory_candidate' => $this->searchRepairConfig->getAccessoryCandidatePattern(), + 'requested_accessory_code' => $this->searchRepairConfig->getRequestedAccessoryCodePattern(), 'accessory_or_bundle' => $this->searchRepairConfig->getAccessoryOrBundlePattern(), 'model_like' => $this->searchRepairConfig->getModelLikePattern(), 'specificity_boost' => $this->searchRepairConfig->getSpecificityBoostPattern(), diff --git a/src/Config/SearchRepairConfig.php b/src/Config/SearchRepairConfig.php index 5c94993..5057bdb 100644 --- a/src/Config/SearchRepairConfig.php +++ b/src/Config/SearchRepairConfig.php @@ -85,6 +85,66 @@ final class SearchRepairConfig return $this->minPrimaryResultsWithoutRepair; } + public function shouldRestrictRequestedAccessoryCodeRepair(): bool + { + return $this->bool('strict_requested_accessory_code_repair', true); + } + + public function shouldPreferPromptAnchoredModelForRequestedAccessoryCode(): bool + { + return $this->bool('prefer_prompt_anchored_model_for_requested_accessory_code', true); + } + + /** @return string[] */ + public function getRequestedAccessoryCodeFallbackQueryTemplates(): array + { + return $this->stringList( + 'requested_accessory_code_fallback_query_templates', + ['{term} {code}'] + ); + } + + /** @return string[] */ + public function getRequestedAccessoryCodeFallbackTerms(): array + { + return $this->stringList( + 'requested_accessory_code_fallback_terms', + $this->getAccessoryCandidateTerms() + ); + } + + /** @return string[] */ + public function getRequestedAccessoryCodeContextPrefixTerms(): array + { + return $this->stringList( + 'requested_accessory_code_context_prefix_terms', + $this->getAccessoryCandidateTerms() + ); + } + + public function getRequestedAccessoryCodeProximityWindow(): int + { + return $this->int('requested_accessory_code_proximity_window', 1600); + } + + /** @return string[] */ + public function getSpecificModelCandidatePatterns(): array + { + return $this->stringList( + 'specific_model_candidate_patterns', + [$this->getModelLikePattern()] + ); + } + + /** @return string[] */ + public function getModelCandidateExcludeTerms(): array + { + return $this->stringList( + 'model_candidate_exclude_terms', + array_merge($this->getAccessoryCandidateTerms(), ['verfuegbarkeit', 'verfügbarkeit', 'shop']) + ); + } + public function getTopProductLogLimit(): int { return 3; @@ -100,6 +160,21 @@ final class SearchRepairConfig return '/\b((?:' . implode('|', $this->getAccessoryCandidateTerms()) . ')\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu'; } + public function getRequestedAccessoryCodePattern(): string + { + $fallbackTerms = array_map( + static fn(string $term): string => preg_quote($term, '/'), + $this->getRequestedAccessoryCodeContextPrefixTerms() + ); + $fallbackTerms = array_filter($fallbackTerms, static fn(string $term): bool => $term !== ''); + + $fallbackPattern = $fallbackTerms === [] + ? '/\b([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu' + : '/\b(?:' . implode('|', $fallbackTerms) . ')\s*([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu'; + + return $this->string('requested_accessory_code_pattern', $fallbackPattern); + } + public function getAccessoryOrBundlePattern(): string { return '/\b(' . implode('|', $this->getAccessoryOrBundleTerms()) . ')\b/iu'; @@ -232,6 +307,59 @@ final class SearchRepairConfig return $this->vocabulary?->view($path, $fallback) ?? $fallback; } + private function string(string $key, string $default): string + { + $value = $this->config[$key] ?? $default; + + if (!is_scalar($value)) { + return $default; + } + + $value = trim((string) $value); + + return $value !== '' ? $value : $default; + } + + private function int(string $key, int $default): int + { + $value = $this->config[$key] ?? $default; + + if (is_int($value)) { + return $value; + } + + if (is_numeric($value)) { + return (int) $value; + } + + return $default; + } + + private function bool(string $key, bool $default): bool + { + $value = $this->config[$key] ?? $default; + + if (is_bool($value)) { + return $value; + } + + if (is_int($value)) { + return $value !== 0; + } + + if (is_string($value)) { + $normalized = strtolower(trim($value)); + if (in_array($normalized, ['1', 'true', 'yes', 'on'], true)) { + return true; + } + if (in_array($normalized, ['0', 'false', 'no', 'off'], true)) { + return false; + } + } + + return $default; + } + /** @return string[] */ private function stringList(string $key, array $default): array {