From 1897fdf0ebb6fa00f933c69a40feb6057dfe5562 Mon Sep 17 00:00:00 2001 From: team 1 Date: Fri, 1 May 2026 19:49:43 +0200 Subject: [PATCH] patch 17 --- RETRIEX_PATCH_17_ACCURACY_HOTFIX_README.md | 46 +++++++++++++ config/retriex/agent.yaml | 19 ++++++ config/retriex/commerce.yaml | 6 ++ config/retriex/intent.yaml | 11 ++++ config/retriex/prompt.yaml | 2 + src/Agent/AgentRunner.php | 38 +++++++++++ src/Config/AgentRunnerConfig.php | 16 +++++ src/Config/RetriexEffectiveConfigProvider.php | 13 ++++ src/Knowledge/Retrieval/NdjsonChunkLookup.php | 64 +++++++++++++++++++ 9 files changed, 215 insertions(+) create mode 100644 RETRIEX_PATCH_17_ACCURACY_HOTFIX_README.md diff --git a/RETRIEX_PATCH_17_ACCURACY_HOTFIX_README.md b/RETRIEX_PATCH_17_ACCURACY_HOTFIX_README.md new file mode 100644 index 0000000..a6061f5 --- /dev/null +++ b/RETRIEX_PATCH_17_ACCURACY_HOTFIX_README.md @@ -0,0 +1,46 @@ +# RetrieX Patch 17 - Accuracy Hotfix + +## Purpose + +Patch 17 addresses the first concrete accuracy bug set after the YAML-only and governance cleanup. +It keeps the YAML-only policy intact and does not introduce hard domain lists in PHP. + +## Covered issue classes + +1. Product title anchoring for prompts that mention a product family plus variant suffix but omit an intermediate numeric family token. + This prevents variant-specific questions from falling back to broader semantic hits. +2. RAG evidence confidence for aggregate/count-style portfolio questions. + Semantic product hits are no longer enough to mark such answers as `fachlich belegt` unless aggregate evidence terms are present. +3. Commerce intent detection for product-seeking measurement prompts such as wanting to measure a parameter in an application context. +4. Commerce intent/shop query support for cable/accessory searches such as pH/Redox connection cables. +5. Prompt guardrail reinforcement so the model does not transfer accessory roles between separate shop product records. + +## Changed files + +- `src/Knowledge/Retrieval/NdjsonChunkLookup.php` +- `src/Agent/AgentRunner.php` +- `src/Config/AgentRunnerConfig.php` +- `src/Config/RetriexEffectiveConfigProvider.php` +- `config/retriex/agent.yaml` +- `config/retriex/intent.yaml` +- `config/retriex/commerce.yaml` +- `config/retriex/prompt.yaml` + +## Local checks + +Run after applying: + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` + +## Manual accuracy retests + +- `qwelche grenzwerte kann der testomat testomat cal messen` +- `wieviele testomat geräte haben wir` +- `ich würde gern chlor im schwinnbad messen` +- `zeige mir Anschlusskabel für pH/Redox` followed by `suche im shop` +- `suche Messgeräte zur wasseranalyse mit dem parameter Carbonhärte (KH)` \ No newline at end of file diff --git a/config/retriex/agent.yaml b/config/retriex/agent.yaml index ab7acf7..c17e232 100644 --- a/config/retriex/agent.yaml +++ b/config/retriex/agent.yaml @@ -105,6 +105,24 @@ parameters: - produkte - artikel - shop + aggregate_query_patterns: + - '/\bwie\s+viele\b/u' + - '/\bwieviele\b/u' + - '/\banzahl\b/u' + - '/\bcount\b/u' + - '/\bgesamtzahl\b/u' + aggregate_evidence_terms: + - anzahl + - gesamtzahl + - stückzahl + - stueckzahl + - count + - portfolio + - sortiment + - bestand + - bestände + - bestaende + - lieferprogramm synonyms: salinität: - salinität @@ -304,6 +322,7 @@ parameters: - konnte - könnte - ich + - mir - wir - man - nutzen diff --git a/config/retriex/commerce.yaml b/config/retriex/commerce.yaml index 2cd6a83..478e0ca 100644 --- a/config/retriex/commerce.yaml +++ b/config/retriex/commerce.yaml @@ -140,6 +140,8 @@ parameters: sienem: seinem sienes: seines indicatoren: indikatoren + schwinnbad: schwimmbad + schwimbad: schwimmbad search_token_canonical_map: indikatoren: indikator @@ -158,6 +160,10 @@ parameters: - zubehör - zubehor - ersatzteil + - anschlusskabel + - kabel + - sensorkabel + - elektrodenkabel - verbrauchsmaterial - chemie - indikatorchemie diff --git a/config/retriex/intent.yaml b/config/retriex/intent.yaml index 8dd210c..31e9e36 100644 --- a/config/retriex/intent.yaml +++ b/config/retriex/intent.yaml @@ -40,6 +40,11 @@ parameters: - zubehör - zubehoer - ersatzteil + - anschlusskabel + - kabel + - sensorkabel + - elektrode + - elektrodenkabel non_product_commerce_signals: - shop - alle @@ -69,6 +74,8 @@ parameters: - '/\bmit\s+welche(?:m|n|r|s)?\s+(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:messen|messung|überwach(?:en|ung)?|ueberwach(?:en|ung)?)\b/u' - '/\bwelche(?:r|s|n|m)?\s+(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:kann|können|koennen|misst|messen|überwacht|ueberwacht|eignet|geeignet|passt|gut|empfehl)\b.*\b(?:messen|messung|überwach(?:en|ung)?|ueberwach(?:en|ung)?)\b/u' - '/\b(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:für|fuer)\b.*\b(?:messung|messen|überwachung|ueberwachung)\b/u' + - '/\b(?:ich\s+)?(?:würde|wuerde|möchte|moechte|will|brauche|benötige|benoetige)\b.{0,80}\b(?:messen|messung|überwachen|ueberwachen|kontrollieren)\b/u' + - '/\b(?:messen|messung|überwachen|ueberwachen|kontrollieren)\b.{0,80}\b(?:schwimmbad|pool|becken|wasseranalyse)\b/u' price_terms: - euro - € @@ -143,6 +150,10 @@ parameters: - '/\bzubehör\b/u' - '/\bzubehoer\b/u' - '/\bersatzteil(?:e)?\b/u' + - '/\banschlusskabel\b/u' + - '/\bkabel\b/u' + - '/\bsensorkabel\b/u' + - '/\belektrodenkabel\b/u' technical_factual_knowledge: signal_label: technical_factual_knowledge_query question_marker_patterns: diff --git a/config/retriex/prompt.yaml b/config/retriex/prompt.yaml index c17d024..50b2543 100644 --- a/config/retriex/prompt.yaml +++ b/config/retriex/prompt.yaml @@ -434,6 +434,8 @@ parameters: - '- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.' - '- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.' + - '- Use the Requested role, Inferred role, and Role compatibility fields independently for each SHOP PRODUCT RECORD; never transfer the role of an accessory, indicator, reagent, kit, or set to a different shop record.' + - '- If a SHOP PRODUCT RECORD has Inferred role: main_device, do not describe that same product as accessory_or_consumable merely because other shown records are accessories or consumables.' - '- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.' - '- If no price is shown for a shop item, omit the price instead of writing 0,00 €, free, kostenlos, or a guessed price.' - '- For every shop hit shown in the answer, copy the exact shop product name verbatim from the same SHOP PRODUCT RECORD as the item heading.' diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index 0724db3..8ee16a5 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -1653,6 +1653,13 @@ final readonly class AgentRunner $haystack = $this->normalizeRagEvidenceText(implode("\n\n", array_map('strval', $knowledgeChunks))); + if ( + $this->isAggregateRagEvidenceQuery($prompt) + && !$this->containsAnyRagEvidenceTerm($haystack, $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms()) + ) { + return 'weak'; + } + foreach ($needles as $needleGroup) { foreach ($needleGroup as $needle) { if ($this->containsRagEvidenceTerm($haystack, $needle)) { @@ -1687,6 +1694,37 @@ final readonly class AgentRunner }; } + private function isAggregateRagEvidenceQuery(string $prompt): bool + { + $normalizedPrompt = $this->normalizeRagEvidenceText($prompt); + + if ($normalizedPrompt === '') { + return false; + } + + foreach ($this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns() as $pattern) { + if (@preg_match($pattern, $normalizedPrompt) === 1) { + return true; + } + } + + return false; + } + + /** + * @param string[] $terms + */ + private function containsAnyRagEvidenceTerm(string $haystack, array $terms): bool + { + foreach ($terms as $term) { + if ($this->containsRagEvidenceTerm($haystack, $term)) { + return true; + } + } + + return false; + } + /** * @return array */ diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index 4cba91e..5d45c34 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -339,6 +339,22 @@ final class AgentRunnerConfig return $this->getRequiredStringListMap('rag_evidence_guard.synonyms'); } + /** + * @return string[] + */ + public function getRagEvidenceAggregateQueryPatterns(): array + { + return $this->getRequiredStringList('rag_evidence_guard.aggregate_query_patterns'); + } + + /** + * @return string[] + */ + public function getRagEvidenceAggregateEvidenceTerms(): array + { + return $this->getRequiredStringList('rag_evidence_guard.aggregate_evidence_terms'); + } + public function getNoLlmFallbackShopOnlyMessage(): string { return $this->getRequiredString('no_llm_fallback.messages.shop_only'); diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index d3007ca..aeecfde 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -455,6 +455,12 @@ final readonly class RetriexEffectiveConfigProvider 'generic_internal_error' => $this->agentRunnerConfig->getGenericInternalErrorMessage(), 'debug_internal_error_prefix' => $this->agentRunnerConfig->getDebugInternalErrorPrefix(), ], + 'rag_evidence_guard' => [ + 'stop_terms' => $this->agentRunnerConfig->getRagEvidenceStopTerms(), + 'synonyms' => $this->agentRunnerConfig->getRagEvidenceSynonyms(), + 'aggregate_query_patterns' => $this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns(), + 'aggregate_evidence_terms' => $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms(), + ], 'source_labels' => [ 'external_url' => $this->agentRunnerConfig->getExternalUrlSourceLabel(), 'rag_knowledge' => $this->agentRunnerConfig->getRagKnowledgeSourceLabel(), @@ -1009,6 +1015,13 @@ final readonly class RetriexEffectiveConfigProvider $this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings); $this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings); $this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings); + + $ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : []; + $this->validateStringList($this->toList($ragEvidence['stop_terms'] ?? []), 'agent.rag_evidence_guard.stop_terms', $errors, $warnings); + $this->validateStringListMap($ragEvidence['synonyms'] ?? [], 'agent.rag_evidence_guard.synonyms', $errors, $warnings); + $this->validateRegexPatternList($ragEvidence['aggregate_query_patterns'] ?? [], 'agent.rag_evidence_guard.aggregate_query_patterns', $errors); + $this->validateStringList($this->toList($ragEvidence['aggregate_evidence_terms'] ?? []), 'agent.rag_evidence_guard.aggregate_evidence_terms', $errors, $warnings); + $this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings); $this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors); diff --git a/src/Knowledge/Retrieval/NdjsonChunkLookup.php b/src/Knowledge/Retrieval/NdjsonChunkLookup.php index b947158..98a8518 100644 --- a/src/Knowledge/Retrieval/NdjsonChunkLookup.php +++ b/src/Knowledge/Retrieval/NdjsonChunkLookup.php @@ -155,6 +155,27 @@ final readonly class NdjsonChunkLookup } } + if ($best === null) { + foreach ($documents as $document) { + $normalizedTitle = $document['normalized_title']; + + if (!$this->isConfidentTitleAlphaTokenMatch($normalizedPrompt, $normalizedTitle)) { + continue; + } + + $score = 250 + mb_strlen($normalizedTitle, 'UTF-8'); + + if (preg_match('/\d/u', $normalizedTitle) === 1) { + $score += 500; + } + + if ($best === null || $score > $bestScore) { + $best = $document; + $bestScore = $score; + } + } + } + if ($best === null) { return null; } @@ -248,6 +269,49 @@ final readonly class NdjsonChunkLookup return true; } + /** + * Fallback for product titles where the prompt contains the significant + * alphabetic model tokens, but omits a numeric family token. + * + * This keeps prompts such as a product family plus variant suffix anchored + * to the correct document instead of falling back to broader semantic hits. + */ + private function isConfidentTitleAlphaTokenMatch(string $normalizedPrompt, string $normalizedTitle): bool + { + if ($normalizedPrompt === '' || $normalizedTitle === '') { + return false; + } + + $titleTokens = $this->significantTitleTokens($normalizedTitle); + $alphaTokens = array_values(array_filter( + $titleTokens, + static fn (string $token): bool => preg_match('/\d/u', $token) !== 1 + )); + + if (count($alphaTokens) < 2 || count($alphaTokens) === count($titleTokens)) { + return false; + } + + $promptTokenVariants = $this->tokenVariantLookup($normalizedPrompt); + + foreach ($alphaTokens as $titleToken) { + $matched = false; + + foreach ($this->tokenVariants($titleToken) as $variant) { + if (isset($promptTokenVariants[$variant])) { + $matched = true; + break; + } + } + + if (!$matched) { + return false; + } + } + + return true; + } + /** * @return string[] */