From 886b6fac8442e96d7b84068b2c3fc22b5a67cb11 Mon Sep 17 00:00:00 2001 From: team 1 Date: Sun, 10 May 2026 12:04:46 +0200 Subject: [PATCH] p85 --- config/retriex/genre.yaml | 84 ++++++++++++++ ...EL_ACRONYM_POSITIVE_FILTER_GUARD_README.md | 95 ++++++++++++++++ ...85_GENERIC_DEVICE_CONTEXT_ANCHOR_README.md | 104 +++++++++++++++++ src/Config/AgentRunnerConfig.php | 107 ++++++++++++++++++ src/Config/RetriexEffectiveConfigProvider.php | 41 +++++++ 5 files changed, 431 insertions(+) create mode 100644 patch_history/RETRIEX_PATCH_84_MODEL_ACRONYM_POSITIVE_FILTER_GUARD_README.md create mode 100644 patch_history/RETRIEX_PATCH_85_GENERIC_DEVICE_CONTEXT_ANCHOR_README.md diff --git a/config/retriex/genre.yaml b/config/retriex/genre.yaml index 5ea827c..1630c64 100644 --- a/config/retriex/genre.yaml +++ b/config/retriex/genre.yaml @@ -131,6 +131,7 @@ parameters: - configuration_values.shop_query_runtime.current_input_preservation_terms - configuration_values.shop_query_runtime.stopword_cleanup - configuration_values.shop_query_runtime.positive_token_filter + - configuration_values.shop_query_runtime.generic_device_anchor - configuration_values.shop_query_runtime.compound_prefix_match - configuration_values.shop_query_runtime.primary_identity_repair - configuration_values.shop_query_runtime.semantic_shop_search_tokens @@ -1327,8 +1328,91 @@ parameters: - clt - cl - cal + - calc + - lab + - th + - mono + - r + - evo + - eco + - plus + - c + - duo adjacent_variant_patterns: - '/^[a-z]{2,8}\d{0,4}$/iu' + generic_device_anchor: + origin: genre_native + enabled: true + # Generic device words are intent signals, not strong Shopware search + # terms. Only a configured measurement/application rule may add a + # product-family anchor, so "gerät" does not always become Testomat. + remove_generic_device_terms: true + template: '{anchor} {query}' + trigger_terms: + - gerät + - geraet + - geräte + - geraete + - messgerät + - messgeraet + - messgeräte + - messgeraete + - analysegerät + - analysegeraet + - analysator + - monitor + suppress_if_terms: + - testomat + - testomaten + - pockettester + - pocket tester + - titromat + - neodewa + - jumo + - sensor + - sonde + - elektrode + - kabel + - anschlusskabel + - indikator + - indikatortyp + - reagenz + - reagent + - zubehör + - zubehor + - ersatzteil + - verbrauchsmaterial + - kit + - set + anchor_rules: + - anchor: testomat + match_terms: + - wasserhärte + - wasserhaerte + - resthärte + - resthaerte + - gesamthärte + - gesamthaerte + - chlor + - freies chlor + - gesamtchlor + - silikat + - silikatüberwachung + - silikatueberwachung + - sio2 + - kieselsäure + - kieselsaeure + - kühlsystem + - kuehlsystem + - kühlsysteme + - kuehlsysteme + - anchor: pockettester + match_terms: + - ph + - redox + - orp + - leitfähigkeit + - leitfaehigkeit compound_prefix_match: origin: genre_native terms: diff --git a/patch_history/RETRIEX_PATCH_84_MODEL_ACRONYM_POSITIVE_FILTER_GUARD_README.md b/patch_history/RETRIEX_PATCH_84_MODEL_ACRONYM_POSITIVE_FILTER_GUARD_README.md new file mode 100644 index 0000000..0fee0c0 --- /dev/null +++ b/patch_history/RETRIEX_PATCH_84_MODEL_ACRONYM_POSITIVE_FILTER_GUARD_README.md @@ -0,0 +1,95 @@ +# RetrieX Patch p84 - Model Acronym Positive Filter Guard + +## Ziel + +Bei Follow-up-Actions mit konkreten Gerätenamen konnte der positive Shopquery-Filter reine Kürzelketten ohne numerische Modellnummer zu stark reduzieren. + +Konkreter Fehlerfall: + +```text +Zeige mir die Preise zu Testomat LAB CL. +=> testomat +``` + +Erwartet ist eine fokussierte Shopquery: + +```text +testomat lab cl +``` + +Dadurch bleiben Preis- und Geräte-Follow-ups auf dem konkret empfohlenen Gerät, statt wieder breit auf `testomat` bzw. `testomat geräte` zurückzufallen. + +## Änderungen + +- `src/Agent/AgentRunner.php` + - erweitert `shouldKeepAdjacentVariantShopQueryToken()` um einen zweiten, bewusst engen Pfad für kompakte rein alphabetische Modell-/Kürzelketten. + - Der bestehende numerische Pfad für Varianten wie `Testomat 2000 THCL 100276` bleibt unverändert. + - Der neue nicht-numerische Pfad greift nur, wenn: + - das aktuelle Token explizit in `adjacent_variant_terms` konfiguriert ist, + - direkt daneben ein weiteres explizit konfiguriertes Variantentoken steht, + - und in der Nähe bereits mindestens ein positiver Kontexttoken erhalten bleibt, z. B. `testomat`. + - Damit werden willkürliche beschreibende Wörter nicht über das generische Pattern allein erhalten. + +- `config/retriex/genre.yaml` + - erweitert `shop_query_runtime.positive_token_filter.adjacent_variant_terms` um typische Gerätekürzel-/Familientokens für kompakte Testomat-Gerätenamen: + - `calc`, `lab`, `th`, `mono`, `r`, `evo`, `eco`, `plus`, `c`, `duo` + +## Erwartete Wirkung + +```text +Zeige mir die Preise zu Testomat LAB CL. +=> testomat lab cl +``` + +```text +Zeige mir die Preise zu Testomat LAB TH-R. +=> testomat lab th r +``` + +```text +Zeige mir die Preise zu Testomat EVO TH. +=> testomat evo th +``` + +```text +Zeige mir die Preise zu Testomat ECO PLUS. +=> testomat eco plus +``` + +Bestehende Guards bleiben erhalten: + +```text +Zeige mir die Preise zu Testomat 2000 THCL 100276. +=> testomat 2000 thcl 100276 +``` + +```text +mit welchem testomat kann ich freies chlor messen +=> testomat freies chlor +``` + +## Nicht geändert + +- Kein Retrieval-, Scoring-, Ranking- oder Shop-Matching-Fix. +- Keine Sonderlogik für `LAB CL` im PHP-Core. +- Keine neue harte Tokenliste im PHP-Core; die zusätzlichen Modell-/Kürzeltokens liegen in `genre.yaml`. +- Die positive Token-Filterung bleibt aktiv und filtert weiterhin Noise-/Relationswörter. + +## Lokale Checks + +```bash +php -l src/Agent/AgentRunner.php +php -l src/Config/AgentRunnerConfig.php +python3 YAML parse OK +model acronym positive-filter smoke OK +``` + +Die Symfony-Console-Checks konnten im ZIP-Arbeitsverzeichnis nicht lokal ausgeführt werden, weil `vendor/` nicht enthalten ist. Bitte wie gewohnt in der Zielumgebung ausführen: + +```bash +bin/console cache:clear +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` diff --git a/patch_history/RETRIEX_PATCH_85_GENERIC_DEVICE_CONTEXT_ANCHOR_README.md b/patch_history/RETRIEX_PATCH_85_GENERIC_DEVICE_CONTEXT_ANCHOR_README.md new file mode 100644 index 0000000..5b3faeb --- /dev/null +++ b/patch_history/RETRIEX_PATCH_85_GENERIC_DEVICE_CONTEXT_ANCHOR_README.md @@ -0,0 +1,104 @@ +# RetrieX Patch p85 - Generic Device Context Anchor + +## Goal + +Improve generic device shop queries without hardwiring every occurrence of `gerät` to `testomat`. + +The motivating example is: + +```text +gerät kühlsysteme Silikatüberwachung +``` + +The desired final Shopware query is product-family aware but still context-preserving: + +```text +testomat kühlsysteme silikatüberwachung +``` + +## Problem + +Generic device words such as `gerät`, `messgerät` or `analysegerät` are useful intent signals, but weak Shopware search terms. +A naive rule like `gerät => testomat` would be wrong because the shop can contain other device families such as PocketTester, Titromat, NeoDeWa, JUMO-related products or sensors. + +The current positive shop-query filter can also reduce unknown application words too aggressively. For example, a query containing `Silikatüberwachung` can lose the application context if that token is not part of the positive allow surface. + +## Solution + +Patch p85 adds a small, configurable shop-query runtime guard: + +```yaml +shop_query_runtime: + generic_device_anchor: + enabled: true + remove_generic_device_terms: true + template: '{anchor} {query}' + trigger_terms: [...] + suppress_if_terms: [...] + anchor_rules: + - anchor: testomat + match_terms: [...] + - anchor: pockettester + match_terms: [...] +``` + +Behavior: + +1. The guard activates only when the query contains a generic device term. +2. It does not activate when the query already contains a concrete family/product/accessory/sensor term from `suppress_if_terms`. +3. It adds only the configured anchor whose `match_terms` are present in the query. +4. Generic device words are removed from the final query when an anchor was added. +5. Rule match terms and anchors are also made visible to the positive token filter, so contextual terms such as `kühlsysteme` and `silikatüberwachung` are not dropped after enrichment. + +## Expected examples + +```text +gerät kühlsysteme Silikatüberwachung +=> testomat kühlsysteme silikatüberwachung +``` + +```text +gerät redox +=> pockettester redox +``` + +```text +gerät sensor redox +=> no family anchor is injected because `sensor` is a suppress term +``` + +```text +testomat gerät freies chlor +=> no extra family anchor is injected because `testomat` is already present +``` + +```text +Zeige mir die Preise zu Testomat LAB CL. +=> unchanged; p84 acronym preservation remains responsible for LAB CL +``` + +## Files changed + +- `src/Agent/AgentRunner.php` +- `src/Config/AgentRunnerConfig.php` +- `src/Config/RetriexEffectiveConfigProvider.php` +- `config/retriex/genre.yaml` + +## Local checks + +```text +php -l src/Agent/AgentRunner.php +php -l src/Config/AgentRunnerConfig.php +php -l src/Config/RetriexEffectiveConfigProvider.php +python3 YAML parse for config/retriex/genre.yaml +standalone smoke simulation for generic-device anchor behavior +``` + +Symfony console checks require the deployment environment with `vendor/`: + +```text +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index c0d1dc4..6542b3f 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -317,6 +317,12 @@ final class AgentRunnerConfig return $this->genreConfig?->getValueInt($path); } + /** @return array */ + private function genreArray(string $path): array + { + return $this->genreConfig?->getValueArray($path) ?? []; + } + private function getRequiredInt(string $key): int { $value = $this->requiredValue($key); @@ -1390,6 +1396,107 @@ final class AgentRunnerConfig return $this->genreStringList('shop_query_runtime.semantic_shop_search_tokens.terms'); } + public function isGenericDeviceQueryAnchorEnabled(): bool + { + return $this->genreBool('shop_query_runtime.generic_device_anchor.enabled') ?? false; + } + + public function shouldGenericDeviceQueryAnchorRemoveGenericDeviceTerms(): bool + { + return $this->genreBool('shop_query_runtime.generic_device_anchor.remove_generic_device_terms') ?? false; + } + + public function getGenericDeviceQueryAnchorTemplate(): string + { + return $this->genreString('shop_query_runtime.generic_device_anchor.template'); + } + + /** + * @return string[] + */ + public function getGenericDeviceQueryAnchorTriggerTerms(): array + { + return $this->genreStringList('shop_query_runtime.generic_device_anchor.trigger_terms'); + } + + /** + * @return string[] + */ + public function getGenericDeviceQueryAnchorSuppressTerms(): array + { + return $this->genreStringList('shop_query_runtime.generic_device_anchor.suppress_if_terms'); + } + + /** + * @return array + */ + public function getGenericDeviceQueryAnchorRules(): array + { + $rules = []; + + foreach ($this->genreArray('shop_query_runtime.generic_device_anchor.anchor_rules') as $rule) { + if (!is_array($rule)) { + continue; + } + + $anchor = $rule['anchor'] ?? ''; + if (!is_scalar($anchor)) { + continue; + } + + $anchor = trim((string) $anchor); + if ($anchor === '') { + continue; + } + + $rawMatchTerms = $rule['match_terms'] ?? []; + if (!is_array($rawMatchTerms)) { + continue; + } + + $matchTerms = []; + foreach ($rawMatchTerms as $term) { + if (!is_scalar($term)) { + continue; + } + + $term = trim((string) $term); + if ($term !== '' && !in_array($term, $matchTerms, true)) { + $matchTerms[] = $term; + } + } + + if ($matchTerms === []) { + continue; + } + + $rules[] = [ + 'anchor' => $anchor, + 'match_terms' => $matchTerms, + ]; + } + + return $rules; + } + + /** + * @return string[] + */ + public function getGenericDeviceQueryAnchorPositiveFilterTerms(): array + { + $terms = []; + + foreach ($this->getGenericDeviceQueryAnchorRules() as $rule) { + $terms[] = $rule['anchor']; + $terms = array_merge($terms, $rule['match_terms']); + } + + return array_values(array_unique(array_filter( + array_map(static fn(string $term): string => trim($term), $terms), + static fn(string $term): bool => $term !== '' + ))); + } + /** * @return string[] */ diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 103d5a7..98bbf40 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -1371,6 +1371,47 @@ final readonly class RetriexEffectiveConfigProvider $this->validateRegexPatternList($positiveTokenFilter['adjacent_variant_patterns'] ?? [], 'genre.configuration_values.shop_query_runtime.positive_token_filter.adjacent_variant_patterns', $errors); } + $genericDeviceAnchor = is_array($shopQueryRuntime['generic_device_anchor'] ?? null) + ? $shopQueryRuntime['generic_device_anchor'] + : []; + if ($genericDeviceAnchor !== []) { + foreach (['enabled', 'remove_generic_device_terms'] as $boolKey) { + if (array_key_exists($boolKey, $genericDeviceAnchor) && !is_bool($genericDeviceAnchor[$boolKey])) { + $errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.%s must be boolean.', $boolKey); + } + } + + if (array_key_exists('template', $genericDeviceAnchor) && (!is_string($genericDeviceAnchor['template']) || trim($genericDeviceAnchor['template']) === '')) { + $errors[] = 'genre.configuration_values.shop_query_runtime.generic_device_anchor.template must be a non-empty string.'; + } + + $this->validateStringList($this->toList($genericDeviceAnchor['trigger_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.generic_device_anchor.trigger_terms', $errors, $warnings); + $this->validateStringList($this->toList($genericDeviceAnchor['suppress_if_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.generic_device_anchor.suppress_if_terms', $errors, $warnings); + + $anchorRules = $genericDeviceAnchor['anchor_rules'] ?? []; + if ($anchorRules !== [] && !is_array($anchorRules)) { + $errors[] = 'genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules must be a list.'; + } elseif (is_array($anchorRules)) { + foreach ($anchorRules as $index => $rule) { + if (!is_array($rule)) { + $errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s must be a map.', (string) $index); + continue; + } + + if (!is_string($rule['anchor'] ?? null) || trim((string) ($rule['anchor'] ?? '')) === '') { + $errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.anchor must be a non-empty string.', (string) $index); + } + + $this->validateStringList( + $this->toList($rule['match_terms'] ?? []), + sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.match_terms', (string) $index), + $errors, + $warnings + ); + } + } + } + foreach ($this->collectGenreConfigurationValueSourcePaths($configurationValues) as $valuePath => $sourcePaths) { foreach ($sourcePaths as $sourcePath) { if (!isset($flattened[$sourcePath])) {