From 63210a14deb1714d4a24fd9d991f3d274b94b3e1 Mon Sep 17 00:00:00 2001 From: team 1 Date: Sun, 10 May 2026 12:07:27 +0200 Subject: [PATCH] p85b --- config/retriex/agent.yaml | 2 +- config/retriex/genre.yaml | 24 ++-- ...IC_DEVICE_EXACT_PARAMETER_ANCHOR_README.md | 107 ++++++++++++++++++ src/Agent/AgentRunner.php | 6 +- src/Config/AgentRunnerConfig.php | 14 ++- src/Config/RetriexEffectiveConfigProvider.php | 4 + 6 files changed, 143 insertions(+), 14 deletions(-) create mode 100644 patch_history/RETRIEX_PATCH_85B_GENERIC_DEVICE_EXACT_PARAMETER_ANCHOR_README.md diff --git a/config/retriex/agent.yaml b/config/retriex/agent.yaml index 5a93e6a..1f928fe 100644 --- a/config/retriex/agent.yaml +++ b/config/retriex/agent.yaml @@ -129,7 +129,7 @@ parameters: previous_reference_anchors_template: 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): {anchors}' current_follow_up_question_template: 'Aktuelle Folgefrage: {question}' reference_anchor: - product_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu' + product_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+(?=[A-Z0-9]*[A-Z])[A-Z0-9]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu' measurement_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu' final_answer_guard: diff --git a/config/retriex/genre.yaml b/config/retriex/genre.yaml index 1630c64..5fbc590 100644 --- a/config/retriex/genre.yaml +++ b/config/retriex/genre.yaml @@ -1385,6 +1385,20 @@ parameters: - kit - set anchor_rules: + - anchor: testomat 808 sio2 + # SiO2/Silikat is a concrete measurement variant in the RAG + # device overview. Use the exact model-style anchor instead of the + # broad Testomat family so unrelated Testomat devices are not listed + # merely because they mention cooling or silicate deposits. + template: '{anchor}' + match_terms: + - silikat + - silikatüberwachung + - silikatueberwachung + - sio2 + - si o2 + - kieselsäure + - kieselsaeure - anchor: testomat match_terms: - wasserhärte @@ -1396,16 +1410,6 @@ parameters: - chlor - freies chlor - gesamtchlor - - silikat - - silikatüberwachung - - silikatueberwachung - - sio2 - - kieselsäure - - kieselsaeure - - kühlsystem - - kuehlsystem - - kühlsysteme - - kuehlsysteme - anchor: pockettester match_terms: - ph diff --git a/patch_history/RETRIEX_PATCH_85B_GENERIC_DEVICE_EXACT_PARAMETER_ANCHOR_README.md b/patch_history/RETRIEX_PATCH_85B_GENERIC_DEVICE_EXACT_PARAMETER_ANCHOR_README.md new file mode 100644 index 0000000..caec72f --- /dev/null +++ b/patch_history/RETRIEX_PATCH_85B_GENERIC_DEVICE_EXACT_PARAMETER_ANCHOR_README.md @@ -0,0 +1,107 @@ +# RetrieX Patch p85b - Generic Device Exact Parameter Anchor + +## Goal + +Harden p85 for generic device shop queries where a requested measurement parameter points to a concrete device variant. + +Motivating flow: + +```text +suche gerät kühlsysteme Silikatüberwachung +``` + +p85 correctly recognized a generic device query and added a family anchor, but the resulting query was still too broad: + +```text +testomat kühlsysteme silikatüberwachung +``` + +That can surface unrelated Testomat devices which mention cooling towers or silicate deposits but do not explicitly represent SiO2/Silicate monitoring as the requested measurement parameter. + +## Problem + +`gerät` is only a weak product-role signal. For `Silikatüberwachung`, the RAG device overview contains a more specific model-style identity: `Testomat 808 SiO2`. + +A broad family query such as `testomat silikatüberwachung` can produce noisy live-shop hits, for example Testomat devices for hardness, chlorine or self-cleaning contexts. Those hits must not be presented as precise Silicate-monitoring devices unless the same shop product identity or record explicitly supports that parameter. + +## Solution + +p85b keeps the p85 concept but makes the Silicate/SiO2 branch exact: + +```yaml +shop_query_runtime: + generic_device_anchor: + anchor_rules: + - anchor: testomat 808 sio2 + template: '{anchor}' + match_terms: + - silikat + - silikatüberwachung + - sio2 + - kieselsäure +``` + +The generic `testomat` rule no longer contains the Silicate/Cooling-system terms. This means: + +- `gerät + Silikat/SiO2` becomes the precise shop query `testomat 808 sio2`. +- `gerät + kühlsysteme` alone no longer forces a Testomat anchor. +- Other p85 mappings stay intact, e.g. `gerät redox` -> `pockettester redox`. + +p85b also allows optional per-rule templates for generic device anchors. The default remains `{anchor} {query}`, but a specific rule may choose `{anchor}` when preserving the original context would make the Shopware search less precise. + +Finally, the configured Testomat model-anchor regex now accepts alphanumeric suffixes that contain at least one letter, e.g. `Testomat 808 SiO2`, without treating pure year-like suffixes such as `2019` as model variants. + +## Expected examples + +```text +suche gerät kühlsysteme Silikatüberwachung +=> testomat 808 sio2 +``` + +```text +gerät kühlsysteme +=> no Testomat family anchor is injected solely from the application word +``` + +```text +gerät redox +=> pockettester redox +``` + +```text +testomat gerät freies chlor +=> unchanged by generic-device anchoring because `testomat` is already present +``` + +```text +Testomat 808 SiO2 +=> recognized as a concrete model anchor for result-identity filtering +``` + +## Files changed + +- `config/retriex/genre.yaml` +- `config/retriex/agent.yaml` +- `src/Agent/AgentRunner.php` +- `src/Config/AgentRunnerConfig.php` +- `src/Config/RetriexEffectiveConfigProvider.php` + +## Local checks + +```text +php -l src/Agent/AgentRunner.php +php -l src/Config/AgentRunnerConfig.php +php -l src/Config/RetriexEffectiveConfigProvider.php +python3 YAML parse for config/retriex/genre.yaml and config/retriex/agent.yaml +standalone smoke simulation for generic-device exact-anchor behavior +PHP regex smoke for Testomat 808 SiO2 / Testomat 808 2019 / Testomat 2000 THCL / Testomat LAB CL +``` + +Symfony console checks require the deployment environment with `vendor/`: + +```text +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index 55cf32d..94ab2a6 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -1783,7 +1783,11 @@ final readonly class AgentRunner ) : $shopSearchQuery; - $template = $this->agentRunnerConfig->getGenericDeviceQueryAnchorTemplate(); + $template = trim((string) ($rule['template'] ?? '')); + if ($template === '') { + $template = $this->agentRunnerConfig->getGenericDeviceQueryAnchorTemplate(); + } + if ($template === '') { return $shopSearchQuery; } diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index 6542b3f..3155fac 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -1428,7 +1428,7 @@ final class AgentRunnerConfig } /** - * @return array + * @return array */ public function getGenericDeviceQueryAnchorRules(): array { @@ -1470,10 +1470,20 @@ final class AgentRunnerConfig continue; } - $rules[] = [ + $normalizedRule = [ 'anchor' => $anchor, 'match_terms' => $matchTerms, ]; + + $template = $rule['template'] ?? null; + if (is_scalar($template)) { + $template = trim((string) $template); + if ($template !== '') { + $normalizedRule['template'] = $template; + } + } + + $rules[] = $normalizedRule; } return $rules; diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 98bbf40..bbc43cf 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -1402,6 +1402,10 @@ final readonly class RetriexEffectiveConfigProvider $errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.anchor must be a non-empty string.', (string) $index); } + if (array_key_exists('template', $rule) && (!is_string($rule['template']) || trim((string) $rule['template']) === '')) { + $errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.template must be a non-empty string when configured.', (string) $index); + } + $this->validateStringList( $this->toList($rule['match_terms'] ?? []), sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.match_terms', (string) $index),