p85
This commit is contained in:
@@ -131,6 +131,7 @@ parameters:
|
||||
- configuration_values.shop_query_runtime.current_input_preservation_terms
|
||||
- configuration_values.shop_query_runtime.stopword_cleanup
|
||||
- configuration_values.shop_query_runtime.positive_token_filter
|
||||
- configuration_values.shop_query_runtime.generic_device_anchor
|
||||
- configuration_values.shop_query_runtime.compound_prefix_match
|
||||
- configuration_values.shop_query_runtime.primary_identity_repair
|
||||
- configuration_values.shop_query_runtime.semantic_shop_search_tokens
|
||||
@@ -1327,8 +1328,91 @@ parameters:
|
||||
- clt
|
||||
- cl
|
||||
- cal
|
||||
- calc
|
||||
- lab
|
||||
- th
|
||||
- mono
|
||||
- r
|
||||
- evo
|
||||
- eco
|
||||
- plus
|
||||
- c
|
||||
- duo
|
||||
adjacent_variant_patterns:
|
||||
- '/^[a-z]{2,8}\d{0,4}$/iu'
|
||||
generic_device_anchor:
|
||||
origin: genre_native
|
||||
enabled: true
|
||||
# Generic device words are intent signals, not strong Shopware search
|
||||
# terms. Only a configured measurement/application rule may add a
|
||||
# product-family anchor, so "gerät" does not always become Testomat.
|
||||
remove_generic_device_terms: true
|
||||
template: '{anchor} {query}'
|
||||
trigger_terms:
|
||||
- gerät
|
||||
- geraet
|
||||
- geräte
|
||||
- geraete
|
||||
- messgerät
|
||||
- messgeraet
|
||||
- messgeräte
|
||||
- messgeraete
|
||||
- analysegerät
|
||||
- analysegeraet
|
||||
- analysator
|
||||
- monitor
|
||||
suppress_if_terms:
|
||||
- testomat
|
||||
- testomaten
|
||||
- pockettester
|
||||
- pocket tester
|
||||
- titromat
|
||||
- neodewa
|
||||
- jumo
|
||||
- sensor
|
||||
- sonde
|
||||
- elektrode
|
||||
- kabel
|
||||
- anschlusskabel
|
||||
- indikator
|
||||
- indikatortyp
|
||||
- reagenz
|
||||
- reagent
|
||||
- zubehör
|
||||
- zubehor
|
||||
- ersatzteil
|
||||
- verbrauchsmaterial
|
||||
- kit
|
||||
- set
|
||||
anchor_rules:
|
||||
- anchor: testomat
|
||||
match_terms:
|
||||
- wasserhärte
|
||||
- wasserhaerte
|
||||
- resthärte
|
||||
- resthaerte
|
||||
- gesamthärte
|
||||
- gesamthaerte
|
||||
- chlor
|
||||
- freies chlor
|
||||
- gesamtchlor
|
||||
- silikat
|
||||
- silikatüberwachung
|
||||
- silikatueberwachung
|
||||
- sio2
|
||||
- kieselsäure
|
||||
- kieselsaeure
|
||||
- kühlsystem
|
||||
- kuehlsystem
|
||||
- kühlsysteme
|
||||
- kuehlsysteme
|
||||
- anchor: pockettester
|
||||
match_terms:
|
||||
- ph
|
||||
- redox
|
||||
- orp
|
||||
- leitfähigkeit
|
||||
- leitfaehigkeit
|
||||
compound_prefix_match:
|
||||
origin: genre_native
|
||||
terms:
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
# RetrieX Patch p84 - Model Acronym Positive Filter Guard
|
||||
|
||||
## Ziel
|
||||
|
||||
Bei Follow-up-Actions mit konkreten Gerätenamen konnte der positive Shopquery-Filter reine Kürzelketten ohne numerische Modellnummer zu stark reduzieren.
|
||||
|
||||
Konkreter Fehlerfall:
|
||||
|
||||
```text
|
||||
Zeige mir die Preise zu Testomat LAB CL.
|
||||
=> testomat
|
||||
```
|
||||
|
||||
Erwartet ist eine fokussierte Shopquery:
|
||||
|
||||
```text
|
||||
testomat lab cl
|
||||
```
|
||||
|
||||
Dadurch bleiben Preis- und Geräte-Follow-ups auf dem konkret empfohlenen Gerät, statt wieder breit auf `testomat` bzw. `testomat geräte` zurückzufallen.
|
||||
|
||||
## Änderungen
|
||||
|
||||
- `src/Agent/AgentRunner.php`
|
||||
- erweitert `shouldKeepAdjacentVariantShopQueryToken()` um einen zweiten, bewusst engen Pfad für kompakte rein alphabetische Modell-/Kürzelketten.
|
||||
- Der bestehende numerische Pfad für Varianten wie `Testomat 2000 THCL 100276` bleibt unverändert.
|
||||
- Der neue nicht-numerische Pfad greift nur, wenn:
|
||||
- das aktuelle Token explizit in `adjacent_variant_terms` konfiguriert ist,
|
||||
- direkt daneben ein weiteres explizit konfiguriertes Variantentoken steht,
|
||||
- und in der Nähe bereits mindestens ein positiver Kontexttoken erhalten bleibt, z. B. `testomat`.
|
||||
- Damit werden willkürliche beschreibende Wörter nicht über das generische Pattern allein erhalten.
|
||||
|
||||
- `config/retriex/genre.yaml`
|
||||
- erweitert `shop_query_runtime.positive_token_filter.adjacent_variant_terms` um typische Gerätekürzel-/Familientokens für kompakte Testomat-Gerätenamen:
|
||||
- `calc`, `lab`, `th`, `mono`, `r`, `evo`, `eco`, `plus`, `c`, `duo`
|
||||
|
||||
## Erwartete Wirkung
|
||||
|
||||
```text
|
||||
Zeige mir die Preise zu Testomat LAB CL.
|
||||
=> testomat lab cl
|
||||
```
|
||||
|
||||
```text
|
||||
Zeige mir die Preise zu Testomat LAB TH-R.
|
||||
=> testomat lab th r
|
||||
```
|
||||
|
||||
```text
|
||||
Zeige mir die Preise zu Testomat EVO TH.
|
||||
=> testomat evo th
|
||||
```
|
||||
|
||||
```text
|
||||
Zeige mir die Preise zu Testomat ECO PLUS.
|
||||
=> testomat eco plus
|
||||
```
|
||||
|
||||
Bestehende Guards bleiben erhalten:
|
||||
|
||||
```text
|
||||
Zeige mir die Preise zu Testomat 2000 THCL 100276.
|
||||
=> testomat 2000 thcl 100276
|
||||
```
|
||||
|
||||
```text
|
||||
mit welchem testomat kann ich freies chlor messen
|
||||
=> testomat freies chlor
|
||||
```
|
||||
|
||||
## Nicht geändert
|
||||
|
||||
- Kein Retrieval-, Scoring-, Ranking- oder Shop-Matching-Fix.
|
||||
- Keine Sonderlogik für `LAB CL` im PHP-Core.
|
||||
- Keine neue harte Tokenliste im PHP-Core; die zusätzlichen Modell-/Kürzeltokens liegen in `genre.yaml`.
|
||||
- Die positive Token-Filterung bleibt aktiv und filtert weiterhin Noise-/Relationswörter.
|
||||
|
||||
## Lokale Checks
|
||||
|
||||
```bash
|
||||
php -l src/Agent/AgentRunner.php
|
||||
php -l src/Config/AgentRunnerConfig.php
|
||||
python3 YAML parse OK
|
||||
model acronym positive-filter smoke OK
|
||||
```
|
||||
|
||||
Die Symfony-Console-Checks konnten im ZIP-Arbeitsverzeichnis nicht lokal ausgeführt werden, weil `vendor/` nicht enthalten ist. Bitte wie gewohnt in der Zielumgebung ausführen:
|
||||
|
||||
```bash
|
||||
bin/console cache:clear
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
@@ -0,0 +1,104 @@
|
||||
# RetrieX Patch p85 - Generic Device Context Anchor
|
||||
|
||||
## Goal
|
||||
|
||||
Improve generic device shop queries without hardwiring every occurrence of `gerät` to `testomat`.
|
||||
|
||||
The motivating example is:
|
||||
|
||||
```text
|
||||
gerät kühlsysteme Silikatüberwachung
|
||||
```
|
||||
|
||||
The desired final Shopware query is product-family aware but still context-preserving:
|
||||
|
||||
```text
|
||||
testomat kühlsysteme silikatüberwachung
|
||||
```
|
||||
|
||||
## Problem
|
||||
|
||||
Generic device words such as `gerät`, `messgerät` or `analysegerät` are useful intent signals, but weak Shopware search terms.
|
||||
A naive rule like `gerät => testomat` would be wrong because the shop can contain other device families such as PocketTester, Titromat, NeoDeWa, JUMO-related products or sensors.
|
||||
|
||||
The current positive shop-query filter can also reduce unknown application words too aggressively. For example, a query containing `Silikatüberwachung` can lose the application context if that token is not part of the positive allow surface.
|
||||
|
||||
## Solution
|
||||
|
||||
Patch p85 adds a small, configurable shop-query runtime guard:
|
||||
|
||||
```yaml
|
||||
shop_query_runtime:
|
||||
generic_device_anchor:
|
||||
enabled: true
|
||||
remove_generic_device_terms: true
|
||||
template: '{anchor} {query}'
|
||||
trigger_terms: [...]
|
||||
suppress_if_terms: [...]
|
||||
anchor_rules:
|
||||
- anchor: testomat
|
||||
match_terms: [...]
|
||||
- anchor: pockettester
|
||||
match_terms: [...]
|
||||
```
|
||||
|
||||
Behavior:
|
||||
|
||||
1. The guard activates only when the query contains a generic device term.
|
||||
2. It does not activate when the query already contains a concrete family/product/accessory/sensor term from `suppress_if_terms`.
|
||||
3. It adds only the configured anchor whose `match_terms` are present in the query.
|
||||
4. Generic device words are removed from the final query when an anchor was added.
|
||||
5. Rule match terms and anchors are also made visible to the positive token filter, so contextual terms such as `kühlsysteme` and `silikatüberwachung` are not dropped after enrichment.
|
||||
|
||||
## Expected examples
|
||||
|
||||
```text
|
||||
gerät kühlsysteme Silikatüberwachung
|
||||
=> testomat kühlsysteme silikatüberwachung
|
||||
```
|
||||
|
||||
```text
|
||||
gerät redox
|
||||
=> pockettester redox
|
||||
```
|
||||
|
||||
```text
|
||||
gerät sensor redox
|
||||
=> no family anchor is injected because `sensor` is a suppress term
|
||||
```
|
||||
|
||||
```text
|
||||
testomat gerät freies chlor
|
||||
=> no extra family anchor is injected because `testomat` is already present
|
||||
```
|
||||
|
||||
```text
|
||||
Zeige mir die Preise zu Testomat LAB CL.
|
||||
=> unchanged; p84 acronym preservation remains responsible for LAB CL
|
||||
```
|
||||
|
||||
## Files changed
|
||||
|
||||
- `src/Agent/AgentRunner.php`
|
||||
- `src/Config/AgentRunnerConfig.php`
|
||||
- `src/Config/RetriexEffectiveConfigProvider.php`
|
||||
- `config/retriex/genre.yaml`
|
||||
|
||||
## Local checks
|
||||
|
||||
```text
|
||||
php -l src/Agent/AgentRunner.php
|
||||
php -l src/Config/AgentRunnerConfig.php
|
||||
php -l src/Config/RetriexEffectiveConfigProvider.php
|
||||
python3 YAML parse for config/retriex/genre.yaml
|
||||
standalone smoke simulation for generic-device anchor behavior
|
||||
```
|
||||
|
||||
Symfony console checks require the deployment environment with `vendor/`:
|
||||
|
||||
```text
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
@@ -317,6 +317,12 @@ final class AgentRunnerConfig
|
||||
return $this->genreConfig?->getValueInt($path);
|
||||
}
|
||||
|
||||
/** @return array<int|string, mixed> */
|
||||
private function genreArray(string $path): array
|
||||
{
|
||||
return $this->genreConfig?->getValueArray($path) ?? [];
|
||||
}
|
||||
|
||||
private function getRequiredInt(string $key): int
|
||||
{
|
||||
$value = $this->requiredValue($key);
|
||||
@@ -1390,6 +1396,107 @@ final class AgentRunnerConfig
|
||||
return $this->genreStringList('shop_query_runtime.semantic_shop_search_tokens.terms');
|
||||
}
|
||||
|
||||
public function isGenericDeviceQueryAnchorEnabled(): bool
|
||||
{
|
||||
return $this->genreBool('shop_query_runtime.generic_device_anchor.enabled') ?? false;
|
||||
}
|
||||
|
||||
public function shouldGenericDeviceQueryAnchorRemoveGenericDeviceTerms(): bool
|
||||
{
|
||||
return $this->genreBool('shop_query_runtime.generic_device_anchor.remove_generic_device_terms') ?? false;
|
||||
}
|
||||
|
||||
public function getGenericDeviceQueryAnchorTemplate(): string
|
||||
{
|
||||
return $this->genreString('shop_query_runtime.generic_device_anchor.template');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getGenericDeviceQueryAnchorTriggerTerms(): array
|
||||
{
|
||||
return $this->genreStringList('shop_query_runtime.generic_device_anchor.trigger_terms');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getGenericDeviceQueryAnchorSuppressTerms(): array
|
||||
{
|
||||
return $this->genreStringList('shop_query_runtime.generic_device_anchor.suppress_if_terms');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, array{anchor: string, match_terms: string[]}>
|
||||
*/
|
||||
public function getGenericDeviceQueryAnchorRules(): array
|
||||
{
|
||||
$rules = [];
|
||||
|
||||
foreach ($this->genreArray('shop_query_runtime.generic_device_anchor.anchor_rules') as $rule) {
|
||||
if (!is_array($rule)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$anchor = $rule['anchor'] ?? '';
|
||||
if (!is_scalar($anchor)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$anchor = trim((string) $anchor);
|
||||
if ($anchor === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$rawMatchTerms = $rule['match_terms'] ?? [];
|
||||
if (!is_array($rawMatchTerms)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$matchTerms = [];
|
||||
foreach ($rawMatchTerms as $term) {
|
||||
if (!is_scalar($term)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$term = trim((string) $term);
|
||||
if ($term !== '' && !in_array($term, $matchTerms, true)) {
|
||||
$matchTerms[] = $term;
|
||||
}
|
||||
}
|
||||
|
||||
if ($matchTerms === []) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$rules[] = [
|
||||
'anchor' => $anchor,
|
||||
'match_terms' => $matchTerms,
|
||||
];
|
||||
}
|
||||
|
||||
return $rules;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getGenericDeviceQueryAnchorPositiveFilterTerms(): array
|
||||
{
|
||||
$terms = [];
|
||||
|
||||
foreach ($this->getGenericDeviceQueryAnchorRules() as $rule) {
|
||||
$terms[] = $rule['anchor'];
|
||||
$terms = array_merge($terms, $rule['match_terms']);
|
||||
}
|
||||
|
||||
return array_values(array_unique(array_filter(
|
||||
array_map(static fn(string $term): string => trim($term), $terms),
|
||||
static fn(string $term): bool => $term !== ''
|
||||
)));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
|
||||
@@ -1371,6 +1371,47 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
$this->validateRegexPatternList($positiveTokenFilter['adjacent_variant_patterns'] ?? [], 'genre.configuration_values.shop_query_runtime.positive_token_filter.adjacent_variant_patterns', $errors);
|
||||
}
|
||||
|
||||
$genericDeviceAnchor = is_array($shopQueryRuntime['generic_device_anchor'] ?? null)
|
||||
? $shopQueryRuntime['generic_device_anchor']
|
||||
: [];
|
||||
if ($genericDeviceAnchor !== []) {
|
||||
foreach (['enabled', 'remove_generic_device_terms'] as $boolKey) {
|
||||
if (array_key_exists($boolKey, $genericDeviceAnchor) && !is_bool($genericDeviceAnchor[$boolKey])) {
|
||||
$errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.%s must be boolean.', $boolKey);
|
||||
}
|
||||
}
|
||||
|
||||
if (array_key_exists('template', $genericDeviceAnchor) && (!is_string($genericDeviceAnchor['template']) || trim($genericDeviceAnchor['template']) === '')) {
|
||||
$errors[] = 'genre.configuration_values.shop_query_runtime.generic_device_anchor.template must be a non-empty string.';
|
||||
}
|
||||
|
||||
$this->validateStringList($this->toList($genericDeviceAnchor['trigger_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.generic_device_anchor.trigger_terms', $errors, $warnings);
|
||||
$this->validateStringList($this->toList($genericDeviceAnchor['suppress_if_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.generic_device_anchor.suppress_if_terms', $errors, $warnings);
|
||||
|
||||
$anchorRules = $genericDeviceAnchor['anchor_rules'] ?? [];
|
||||
if ($anchorRules !== [] && !is_array($anchorRules)) {
|
||||
$errors[] = 'genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules must be a list.';
|
||||
} elseif (is_array($anchorRules)) {
|
||||
foreach ($anchorRules as $index => $rule) {
|
||||
if (!is_array($rule)) {
|
||||
$errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s must be a map.', (string) $index);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_string($rule['anchor'] ?? null) || trim((string) ($rule['anchor'] ?? '')) === '') {
|
||||
$errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.anchor must be a non-empty string.', (string) $index);
|
||||
}
|
||||
|
||||
$this->validateStringList(
|
||||
$this->toList($rule['match_terms'] ?? []),
|
||||
sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.match_terms', (string) $index),
|
||||
$errors,
|
||||
$warnings
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($this->collectGenreConfigurationValueSourcePaths($configurationValues) as $valuePath => $sourcePaths) {
|
||||
foreach ($sourcePaths as $sourcePath) {
|
||||
if (!isset($flattened[$sourcePath])) {
|
||||
|
||||
Reference in New Issue
Block a user