From f98de3c7856c388817b0191464ebaee060476554 Mon Sep 17 00:00:00 2001 From: team 1 Date: Fri, 1 May 2026 20:38:10 +0200 Subject: [PATCH] patch 17c --- ...17B_ACCURACY_PROMPT_GUARD_HOTFIX_README.md | 44 +++++++++++++++ ..._17C_ACCURACY_CAL_POOL_GROUNDING_README.md | 55 +++++++++++++++++++ config/retriex/prompt.yaml | 11 ++-- src/Knowledge/Retrieval/NdjsonChunkLookup.php | 46 +++++++++++----- 4 files changed, 136 insertions(+), 20 deletions(-) create mode 100644 RETRIEX_PATCH_17B_ACCURACY_PROMPT_GUARD_HOTFIX_README.md create mode 100644 RETRIEX_PATCH_17C_ACCURACY_CAL_POOL_GROUNDING_README.md diff --git a/RETRIEX_PATCH_17B_ACCURACY_PROMPT_GUARD_HOTFIX_README.md b/RETRIEX_PATCH_17B_ACCURACY_PROMPT_GUARD_HOTFIX_README.md new file mode 100644 index 0000000..341354b --- /dev/null +++ b/RETRIEX_PATCH_17B_ACCURACY_PROMPT_GUARD_HOTFIX_README.md @@ -0,0 +1,44 @@ +# RetrieX Patch 17b - Accuracy Prompt Guard Hotfix + +## Scope + +This patch tightens prompt-side grounding rules for two remaining accuracy issues after p17: + +1. Product-specific threshold / indicator transfer + - A value, indicator, reagent, measuring range, method, accessory code, or application claim must belong to the same explicitly named product or variant. + - Indicator tables from another Testomat variant must not be transferred to Testomat 2000 CAL. + - The rule explicitly prevents applying Testomat 808 indicator types such as 300, 300 S, 301, 302, 303, 305, 310, 320, 330, or 350 to Testomat 2000 CAL unless the same source states that mapping. + +2. Parameter vs. application evidence + - A shop result can support the requested measurement parameter, but application suitability such as Schwimmbad / Pool must be stated in the same source record. + - User wording, generated shop query terms, search ranking, product family, and generic water-treatment wording are not application evidence. + +## Files changed + +- `config/retriex/prompt.yaml` + +## Runtime impact + +No PHP runtime code was changed. This is a prompt-governance hotfix only. + +## Required checks + +Run after applying: + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` + +## Manual regression cases + +- `welche grenzwerte kann der testomat testomat cal messen` + - Must not answer with Testomat 808 indicator/range mappings. + - If CAL-specific TH mappings are present in the retrieved sources, use them. + - If not present, say that the exact CAL mapping is not available in the provided sources. + +- `ich würde gern chlor im schwinnbad messen` + - May use shop results for products that explicitly support chlorine measurement. + - Must not claim Schwimmbad/Pool suitability unless the same source record explicitly states it. diff --git a/RETRIEX_PATCH_17C_ACCURACY_CAL_POOL_GROUNDING_README.md b/RETRIEX_PATCH_17C_ACCURACY_CAL_POOL_GROUNDING_README.md new file mode 100644 index 0000000..d716305 --- /dev/null +++ b/RETRIEX_PATCH_17C_ACCURACY_CAL_POOL_GROUNDING_README.md @@ -0,0 +1,55 @@ +# RetrieX Patch 17c - Accuracy CAL / Pool Grounding Hotfix + +## Ziel + +Patch 17c korrigiert zwei nach Patch 17b verbliebene Accuracy-Probleme: + +1. **Testomat CAL / Grenzwerte** + - Prompts wie `welche grenzwerte kann der testomat cal messen` sollen den passenden Produktdatensatz `Testomat 2000 CAL` sicherer als exaktes Dokument auflösen, auch wenn der Nutzer die numerische Familienkennung `2000` nicht nennt. + - Fehlübertragungen von Testomat-808-Indikatorbereichen auf Testomat CAL werden im Prompt-Grounding weiter untersagt. + +2. **Chlor / Schwimmbad** + - Chlor-Messfähigkeit und Schwimmbad-/Pool-Anwendung werden getrennt bewertet. + - Ein Produkt darf nur dann als Schwimmbad-/Pool-Lösung empfohlen werden, wenn dieselbe RAG- oder Shop-Quelle diese Anwendung explizit nennt. + +## Geänderte Dateien + +- `src/Knowledge/Retrieval/NdjsonChunkLookup.php` +- `config/retriex/prompt.yaml` + +## Details + +### Exact document lookup + +`NdjsonChunkLookup` erhält einen zusätzlichen Fallback für Titelmatches mit fehlender numerischer Produktfamilie. Dadurch kann ein Prompt wie `Testomat CAL` auf einen Titel wie `Testomat 2000 CAL` matchen, wenn alle nicht-numerischen Titelanker übereinstimmen und kein widersprechender numerischer Promptanker vorhanden ist. + +Beispiele: + +- `Testomat CAL` -> darf `Testomat 2000 CAL` matchen. +- `Testomat 808 CAL` -> darf nicht allein wegen `CAL` auf `Testomat 2000 CAL` springen, wenn ein widersprechender Zahlenanker vorhanden ist. + +### Prompt-Grounding + +Die Promptregeln wurden um zwei Guardrails ergänzt: + +- CAL darf nicht mit Testomat-808-Indikatorbereichen oder generischen `0,02 °dH bis 5 °dH`-Bereichen beantwortet werden, außer ein CAL-Quellrecord belegt diese Zuordnung explizit. +- Chlor-Messung ist kein automatischer Beleg für Schwimmbad-/Pool-Eignung. Diese Anwendung muss in derselben Quelle explizit stehen. + +## Erwartete Checks + +Nach dem Einspielen ausführen: + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` + +## Manuelle Regression + +Erneut testen: + +- `welche grenzwerte kann der testomat cal messen` +- `ich würde gern chlor im schwinnbad messen` + diff --git a/config/retriex/prompt.yaml b/config/retriex/prompt.yaml index 50b2543..342b397 100644 --- a/config/retriex/prompt.yaml +++ b/config/retriex/prompt.yaml @@ -209,11 +209,6 @@ parameters: - ph-indikatoren - ph indikatoren - direct_main_device_request_patterns: - - '/\b(welcher|welches|welche)\s+[^?.!,;]{0,40}(testomat|messgerät|messgeraet|analysegerät|analysegeraet|gerät|geraet|analysator)\b/u' - - '/\b(suche|finde|empfiehl|empfehle)\s+[^?.!,;]{0,40}(testomat|messgerät|messgeraet|analysegerät|analysegeraet|gerät|geraet|analysator)\b/u' - - '/\b(testomat|messgerät|messgeraet|analysegerät|analysegeraet|gerät|geraet|analysator)\s+[^?.!,;]{0,40}(messen|misst|überwachen|ueberwachen|kann|für|fuer)\b/u' - measurement_evidence_guard: intro_rules: - '- This block is generated from the current user question and is stricter than broad product-selection wording.' @@ -434,8 +429,6 @@ parameters: - '- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.' - '- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.' - - '- Use the Requested role, Inferred role, and Role compatibility fields independently for each SHOP PRODUCT RECORD; never transfer the role of an accessory, indicator, reagent, kit, or set to a different shop record.' - - '- If a SHOP PRODUCT RECORD has Inferred role: main_device, do not describe that same product as accessory_or_consumable merely because other shown records are accessories or consumables.' - '- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.' - '- If no price is shown for a shop item, omit the price instead of writing 0,00 €, free, kostenlos, or a guessed price.' - '- For every shop hit shown in the answer, copy the exact shop product name verbatim from the same SHOP PRODUCT RECORD as the item heading.' @@ -486,6 +479,8 @@ parameters: - '- Use shop data as highest priority for current commercial fields: price, availability, URL, current shop-visible naming, and explicitly shop-visible product suitability for product-selection questions.' - '- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation when it contains a matching product or fact.' - '- If retrieved knowledge is silent or only contains unrelated products, but live shop results explicitly match the requested parameter/application, use the shop results and do not answer with a negative RAG-only conclusion.' + - '- If the user asks for Schwimmbad, Schwimmbecken, Pool, or typo-like pool wording, a product may only be recommended for that application when the same RAG or SHOP PRODUCT RECORD explicitly names that application. Chlor measurement alone is not proof of swimming-pool suitability.' + - '- If a product record proves Chlor measurement but not Schwimmbad, Schwimmbecken or Pool use, say exactly that distinction and avoid recommendation wording such as empfiehlt sich, geeignet für Schwimmbad, or Anwendung im Schwimmbad.' - '- For product-selection questions, a shop result proves technical suitability only when the same SHOP PRODUCT RECORD explicitly states the requested measurement parameter, application, or compatibility. Search ranking, generated query terms, generic category matches, and similar wording are not proof.' - '- If the requested parameter appears only in the generated shop query, metadata, unrelated highlights, or another product record, treat suitability as unverified and say that the shop hit requires technical verification.' - '- Do not convert p-Wert, m-Wert, minus m-Wert, alkalinity, acid capacity, or other water-treatment parameters into pH or pH-Wert unless the same source explicitly says pH or pH-Wert.' @@ -547,6 +542,8 @@ parameters: entry explicitly connects them.' - '- If several devices or indicators are present, keep each device-indicator-range assignment separate and do not transfer an indicator from one product to another.' + - '- For Testomat CAL or Testomat 2000 CAL threshold/range questions, do not answer with Testomat 808 indicator ranges or the generic 0,02 °dH to 5 °dH range unless a CAL source record explicitly contains that exact assignment.' + - '- Do not use phrases such as typical monitoring range, typical range, or common range for a named product when the provided source only proves another product variant or does not explicitly state the named product range.' - '- If the source states only a threshold function, do not expand it into broader control logic.' - '- If a detail is not explicitly stated in the provided sources, say so plainly.' - '- Prefer short, source-close sentences over explanatory expansion.' diff --git a/src/Knowledge/Retrieval/NdjsonChunkLookup.php b/src/Knowledge/Retrieval/NdjsonChunkLookup.php index 98a8518..208ba33 100644 --- a/src/Knowledge/Retrieval/NdjsonChunkLookup.php +++ b/src/Knowledge/Retrieval/NdjsonChunkLookup.php @@ -159,14 +159,14 @@ final readonly class NdjsonChunkLookup foreach ($documents as $document) { $normalizedTitle = $document['normalized_title']; - if (!$this->isConfidentTitleAlphaTokenMatch($normalizedPrompt, $normalizedTitle)) { + if (!$this->isConfidentTitleTokenMatchAllowingMissingNumeric($normalizedPrompt, $normalizedTitle)) { continue; } - $score = 250 + mb_strlen($normalizedTitle, 'UTF-8'); + $score = 350 + mb_strlen($normalizedTitle, 'UTF-8'); if (preg_match('/\d/u', $normalizedTitle) === 1) { - $score += 500; + $score += 750; } if ($best === null || $score > $bestScore) { @@ -270,34 +270,40 @@ final readonly class NdjsonChunkLookup } /** - * Fallback for product titles where the prompt contains the significant - * alphabetic model tokens, but omits a numeric family token. - * - * This keeps prompts such as a product family plus variant suffix anchored - * to the correct document instead of falling back to broader semantic hits. + * Allows prompts such as "Testomat CAL" to resolve a document titled + * "Testomat 2000 CAL" without also allowing conflicting model numbers. */ - private function isConfidentTitleAlphaTokenMatch(string $normalizedPrompt, string $normalizedTitle): bool + private function isConfidentTitleTokenMatchAllowingMissingNumeric(string $normalizedPrompt, string $normalizedTitle): bool { if ($normalizedPrompt === '' || $normalizedTitle === '') { return false; } $titleTokens = $this->significantTitleTokens($normalizedTitle); + + if (count($titleTokens) < 3 || preg_match('/\d/u', $normalizedTitle) !== 1) { + return false; + } + $alphaTokens = array_values(array_filter( $titleTokens, static fn (string $token): bool => preg_match('/\d/u', $token) !== 1 )); + $numericTokens = array_values(array_filter( + $titleTokens, + static fn (string $token): bool => preg_match('/\d/u', $token) === 1 + )); - if (count($alphaTokens) < 2 || count($alphaTokens) === count($titleTokens)) { + if (count($alphaTokens) < 2 || $numericTokens === []) { return false; } $promptTokenVariants = $this->tokenVariantLookup($normalizedPrompt); - foreach ($alphaTokens as $titleToken) { + foreach ($alphaTokens as $alphaToken) { $matched = false; - foreach ($this->tokenVariants($titleToken) as $variant) { + foreach ($this->tokenVariants($alphaToken) as $variant) { if (isset($promptTokenVariants[$variant])) { $matched = true; break; @@ -309,7 +315,21 @@ final readonly class NdjsonChunkLookup } } - return true; + $promptHasNumericToken = preg_match('/\d/u', $normalizedPrompt) === 1; + + if (!$promptHasNumericToken) { + return true; + } + + foreach ($numericTokens as $numericToken) { + foreach ($this->tokenVariants($numericToken) as $variant) { + if (isset($promptTokenVariants[$variant])) { + return true; + } + } + } + + return false; } /**