This commit is contained in:
team 1
2026-05-10 12:07:27 +02:00
parent 886b6fac84
commit 63210a14de
6 changed files with 143 additions and 14 deletions

View File

@@ -129,7 +129,7 @@ parameters:
previous_reference_anchors_template: 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): {anchors}' previous_reference_anchors_template: 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): {anchors}'
current_follow_up_question_template: 'Aktuelle Folgefrage: {question}' current_follow_up_question_template: 'Aktuelle Folgefrage: {question}'
reference_anchor: reference_anchor:
product_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu' product_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+(?=[A-Z0-9]*[A-Z])[A-Z0-9]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
measurement_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu' measurement_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
final_answer_guard: final_answer_guard:

View File

@@ -1385,6 +1385,20 @@ parameters:
- kit - kit
- set - set
anchor_rules: anchor_rules:
- anchor: testomat 808 sio2
# SiO2/Silikat is a concrete measurement variant in the RAG
# device overview. Use the exact model-style anchor instead of the
# broad Testomat family so unrelated Testomat devices are not listed
# merely because they mention cooling or silicate deposits.
template: '{anchor}'
match_terms:
- silikat
- silikatüberwachung
- silikatueberwachung
- sio2
- si o2
- kieselsäure
- kieselsaeure
- anchor: testomat - anchor: testomat
match_terms: match_terms:
- wasserhärte - wasserhärte
@@ -1396,16 +1410,6 @@ parameters:
- chlor - chlor
- freies chlor - freies chlor
- gesamtchlor - gesamtchlor
- silikat
- silikatüberwachung
- silikatueberwachung
- sio2
- kieselsäure
- kieselsaeure
- kühlsystem
- kuehlsystem
- kühlsysteme
- kuehlsysteme
- anchor: pockettester - anchor: pockettester
match_terms: match_terms:
- ph - ph

View File

@@ -0,0 +1,107 @@
# RetrieX Patch p85b - Generic Device Exact Parameter Anchor
## Goal
Harden p85 for generic device shop queries where a requested measurement parameter points to a concrete device variant.
Motivating flow:
```text
suche gerät kühlsysteme Silikatüberwachung
```
p85 correctly recognized a generic device query and added a family anchor, but the resulting query was still too broad:
```text
testomat kühlsysteme silikatüberwachung
```
That can surface unrelated Testomat devices which mention cooling towers or silicate deposits but do not explicitly represent SiO2/Silicate monitoring as the requested measurement parameter.
## Problem
`gerät` is only a weak product-role signal. For `Silikatüberwachung`, the RAG device overview contains a more specific model-style identity: `Testomat 808 SiO2`.
A broad family query such as `testomat silikatüberwachung` can produce noisy live-shop hits, for example Testomat devices for hardness, chlorine or self-cleaning contexts. Those hits must not be presented as precise Silicate-monitoring devices unless the same shop product identity or record explicitly supports that parameter.
## Solution
p85b keeps the p85 concept but makes the Silicate/SiO2 branch exact:
```yaml
shop_query_runtime:
generic_device_anchor:
anchor_rules:
- anchor: testomat 808 sio2
template: '{anchor}'
match_terms:
- silikat
- silikatüberwachung
- sio2
- kieselsäure
```
The generic `testomat` rule no longer contains the Silicate/Cooling-system terms. This means:
- `gerät + Silikat/SiO2` becomes the precise shop query `testomat 808 sio2`.
- `gerät + kühlsysteme` alone no longer forces a Testomat anchor.
- Other p85 mappings stay intact, e.g. `gerät redox` -> `pockettester redox`.
p85b also allows optional per-rule templates for generic device anchors. The default remains `{anchor} {query}`, but a specific rule may choose `{anchor}` when preserving the original context would make the Shopware search less precise.
Finally, the configured Testomat model-anchor regex now accepts alphanumeric suffixes that contain at least one letter, e.g. `Testomat 808 SiO2`, without treating pure year-like suffixes such as `2019` as model variants.
## Expected examples
```text
suche gerät kühlsysteme Silikatüberwachung
=> testomat 808 sio2
```
```text
gerät kühlsysteme
=> no Testomat family anchor is injected solely from the application word
```
```text
gerät redox
=> pockettester redox
```
```text
testomat gerät freies chlor
=> unchanged by generic-device anchoring because `testomat` is already present
```
```text
Testomat 808 SiO2
=> recognized as a concrete model anchor for result-identity filtering
```
## Files changed
- `config/retriex/genre.yaml`
- `config/retriex/agent.yaml`
- `src/Agent/AgentRunner.php`
- `src/Config/AgentRunnerConfig.php`
- `src/Config/RetriexEffectiveConfigProvider.php`
## Local checks
```text
php -l src/Agent/AgentRunner.php
php -l src/Config/AgentRunnerConfig.php
php -l src/Config/RetriexEffectiveConfigProvider.php
python3 YAML parse for config/retriex/genre.yaml and config/retriex/agent.yaml
standalone smoke simulation for generic-device exact-anchor behavior
PHP regex smoke for Testomat 808 SiO2 / Testomat 808 2019 / Testomat 2000 THCL / Testomat LAB CL
```
Symfony console checks require the deployment environment with `vendor/`:
```text
bin/console mto:agent:config:validate
bin/console mto:agent:regression:test
bin/console mto:agent:config:audit-source --details
bin/console mto:agent:config:audit-patterns --details
```

View File

@@ -1783,7 +1783,11 @@ final readonly class AgentRunner
) )
: $shopSearchQuery; : $shopSearchQuery;
$template = trim((string) ($rule['template'] ?? ''));
if ($template === '') {
$template = $this->agentRunnerConfig->getGenericDeviceQueryAnchorTemplate(); $template = $this->agentRunnerConfig->getGenericDeviceQueryAnchorTemplate();
}
if ($template === '') { if ($template === '') {
return $shopSearchQuery; return $shopSearchQuery;
} }

View File

@@ -1428,7 +1428,7 @@ final class AgentRunnerConfig
} }
/** /**
* @return array<int, array{anchor: string, match_terms: string[]}> * @return array<int, array{anchor: string, match_terms: string[], template?: string}>
*/ */
public function getGenericDeviceQueryAnchorRules(): array public function getGenericDeviceQueryAnchorRules(): array
{ {
@@ -1470,10 +1470,20 @@ final class AgentRunnerConfig
continue; continue;
} }
$rules[] = [ $normalizedRule = [
'anchor' => $anchor, 'anchor' => $anchor,
'match_terms' => $matchTerms, 'match_terms' => $matchTerms,
]; ];
$template = $rule['template'] ?? null;
if (is_scalar($template)) {
$template = trim((string) $template);
if ($template !== '') {
$normalizedRule['template'] = $template;
}
}
$rules[] = $normalizedRule;
} }
return $rules; return $rules;

View File

@@ -1402,6 +1402,10 @@ final readonly class RetriexEffectiveConfigProvider
$errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.anchor must be a non-empty string.', (string) $index); $errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.anchor must be a non-empty string.', (string) $index);
} }
if (array_key_exists('template', $rule) && (!is_string($rule['template']) || trim((string) $rule['template']) === '')) {
$errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.template must be a non-empty string when configured.', (string) $index);
}
$this->validateStringList( $this->validateStringList(
$this->toList($rule['match_terms'] ?? []), $this->toList($rule['match_terms'] ?? []),
sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.match_terms', (string) $index), sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.match_terms', (string) $index),