This commit is contained in:
team 1
2026-05-07 18:14:30 +02:00
parent 0977cec651
commit 8ff06cee2c
8 changed files with 285 additions and 8 deletions

View File

@@ -213,6 +213,8 @@ parameters:
template: '' template: ''
vocabulary_views: vocabulary_views:
trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms
query_terms: []
query_noise_terms: []
anchor_patterns: [] anchor_patterns: []
meta_query_guard: meta_query_guard:
enabled: true enabled: true

View File

@@ -1082,6 +1082,27 @@ parameters:
- zubehör - zubehör
- zubehor - zubehor
- accessory - accessory
# Terms that should be sent to Shopware for referential accessory or
# consumable follow-ups. Broader RAG markers can remain in
# trigger_terms without becoming dominant shop query tokens.
query_terms:
- indikator
- indicator
- reagenz
- reagent
- zubehör
- zubehor
- accessory
# Terms that are useful for interpreting RAG/history phrasing but are
# too meta or type-oriented for the plain Shopware text query.
query_noise_terms:
- indikatortyp
- indicator type
- typ
- type
- beim
- gemessen
- messen
anchor_patterns: anchor_patterns:
- /\b(?:indikator(?:typ)?|indicator(?:\s+type)?|reagenz(?:satz|typ)?|reagent(?:\s+set|\s+type)?|typ|type)\s+[A-Za-zÄÖÜäöüß]{0,8}\s*\d{1,5}(?:\s*[A-ZÄÖÜ]{1,4})?(?:\s*%)?\b/iu - /\b(?:indikator(?:typ)?|indicator(?:\s+type)?|reagenz(?:satz|typ)?|reagent(?:\s+set|\s+type)?|typ|type)\s+[A-Za-zÄÖÜäöüß]{0,8}\s*\d{1,5}(?:\s*[A-ZÄÖÜ]{1,4})?(?:\s*%)?\b/iu
template: '{anchor} {query}' template: '{anchor} {query}'

View File

@@ -78,6 +78,7 @@ parameters:
stopword_groups: stopword_groups:
de_core: de_core:
- der - der
- beim
- die - die
- das - das
- den - den

View File

@@ -264,6 +264,7 @@ parameters:
- '- For uncertain technical suitability from shop hits, use a short section like "Shop-Treffer (technische Eignung nicht sicher belegt)" and list only exact shop fields. Do not add a technical explanation or recommendation.' - '- For uncertain technical suitability from shop hits, use a short section like "Shop-Treffer (technische Eignung nicht sicher belegt)" and list only exact shop fields. Do not add a technical explanation or recommendation.'
without_shop_rules: without_shop_rules:
- '- If no shop results are present, do not compensate by inventing external products or external manufacturers.' - '- If no shop results are present, do not compensate by inventing external products or external manufacturers.'
- '- For price, cost, availability, or other commercial follow-up questions with no matching shop result, answer only that the requested commercial detail could not be determined from the provided shop data; do not list unrelated RAG products or unrelated shop examples.'
technical_rules: [] technical_rules: []
accessory_rules: [] accessory_rules: []
language: language:

View File

@@ -0,0 +1,83 @@
# RetrieX Patch p60 - Generic Referential Shop Anchor Guard
## Ziel
Stabilisiert referenzielle Shop-Preisfragen, bei denen der Verlauf bereits einen konkreten Geräteanker und danach ein Zubehör-/Reagenz-/Indikator-Detail enthält.
Der konkrete Regressionspfad war sinngemäß:
1. Grenzwert-Frage belegt `Testomat 808`.
2. Indikator-Folgefrage belegt `Indikatortyp 300`.
3. Preis-Folgefrage fragt `was kostet der indikator`.
Die Shop-Query durfte nicht bei einem typcode-lastigen Ausdruck wie `indikatortyp 300 indikator` hängen bleiben, weil Shopware dadurch auch unpassende Geräte wie `Testomat 2000 DUO` liefern kann.
## Änderungen
### Generische Query-Stabilisierung
- `genre.yaml` ergänzt für `context_resolution.history_anchor_enrichment`:
- `query_terms`
- `query_noise_terms`
- RAG-/Historienmarker wie `indikatortyp` bleiben Trigger-/Kontextbegriffe, werden aber nicht dominant als Shop-Suchtoken ausgegeben.
- Typ-/Code-Tokens wie `300` bleiben erhalten.
- Wenn im selben Verlaufsturn ein konkreter Geräte-/Modellanker und ein Zubehör-/Typanker stehen, wird daraus generisch ein qualifizierter Shopanker.
Beispielhaft ergibt sich:
```text
testomat 808 300 indikator
```
statt:
```text
indikatortyp 300 indikator
```
### Generischer Shop-Treffer-Guard
Wenn die finale Shopquery einen konkreten Produkt-/Modellanker enthält, werden Shop-Treffer verworfen, die diesen Anker nicht tragen. Ein fremder Gerätetreffer darf dann nicht mehr als Preisbasis für ein referenziertes Zubehör/Verbrauchsmittel dienen.
### Antwortregel bei fehlendem passendem Shop-Treffer
`prompt.yaml` erhält eine generische Regel:
- Bei Preis-/Kosten-/Verfügbarkeitsfragen ohne passenden Shop-Treffer soll keine fremde RAG-/Shop-Produktliste als Preisersatz ausgegeben werden.
- Stattdessen soll klar gesagt werden, dass der angefragte kommerzielle Wert aus den bereitgestellten Shopdaten nicht ermittelt werden konnte.
## Kein Sonderfall
Der Patch enthält keine harte Sonderlogik für `Testomat 808` oder `Indikator 300`.
Die Logik ist allgemein:
- konkreter Verlauf-Geräteanker
- Zubehör-/Reagenz-/Indikator-/Accessory-Kontext
- technische/RAG-nahe Typwörter als Query-Noise
- Typ-/Code-Tokens bleiben erhalten
- Shop-Treffer müssen zum konkreten Modellanker passen, wenn ein solcher in der Query enthalten ist
## Dateien
- `config/retriex/agent.yaml`
- `config/retriex/genre.yaml`
- `config/retriex/prompt.yaml`
- `src/Agent/AgentRunner.php`
- `src/Config/AgentRunnerConfig.php`
- `src/Config/RetriexEffectiveConfigProvider.php`
## Lokale Prüfungen
- PHP-Lint für geänderte PHP-Dateien grün
- YAML parsebar für geänderte YAML-Dateien
- lokale Query-Simulation: `testomat 808 300 indikator`
## Projektchecks
```bash
bin/console mto:agent:config:validate
bin/console mto:agent:regression:test
bin/console mto:agent:config:audit-source --details
bin/console mto:agent:config:audit-patterns --details
```

View File

@@ -502,6 +502,7 @@ final readonly class AgentRunner
$shopResults = $directIdentityRepairPayload['results']; $shopResults = $directIdentityRepairPayload['results'];
} }
$shopResults = $this->guardShopResultsByReferencedProductAnchor($shopSearchQuery, $shopResults);
$shopResults = $this->sortShopResultsForLengthRequest($prompt, $shopSearchQuery, $shopResults); $shopResults = $this->sortShopResultsForLengthRequest($prompt, $shopSearchQuery, $shopResults);
$attemptedShopRepair = $repairPayload['attemptedRepair'] || $directIdentityRepairPayload['attemptedRepair']; $attemptedShopRepair = $repairPayload['attemptedRepair'] || $directIdentityRepairPayload['attemptedRepair'];
$usedShopRepair = $repairPayload['usedRepair'] || $directIdentityRepairPayload['usedRepair']; $usedShopRepair = $repairPayload['usedRepair'] || $directIdentityRepairPayload['usedRepair'];
@@ -2672,20 +2673,40 @@ final readonly class AgentRunner
return ''; return '';
} }
$triggerTokens = []; $triggerTokens = $this->buildShopQueryTokenSet(
foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() as $term) { $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms()
foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) { );
$triggerTokens[$termToken] = true;
}
}
if ($triggerTokens === []) { if ($triggerTokens === []) {
return ''; return '';
} }
$hasTrigger = false;
foreach ($tokens as $token) {
if (isset($triggerTokens[$token])) {
$hasTrigger = true;
break;
}
}
if (!$hasTrigger) {
return '';
}
$queryTokens = $this->buildShopQueryTokenSet(
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms()
);
if ($queryTokens === []) {
$queryTokens = $triggerTokens;
}
$noiseTokens = $this->buildShopQueryTokenSet(
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms()
);
$out = []; $out = [];
foreach ($tokens as $token) { foreach ($tokens as $token) {
if (!isset($triggerTokens[$token]) || isset($out[$token])) { if (!isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) {
continue; continue;
} }
@@ -2695,6 +2716,23 @@ final readonly class AgentRunner
return implode(' ', array_values($out)); return implode(' ', array_values($out));
} }
/**
* @param string[] $terms
* @return array<string, true>
*/
private function buildShopQueryTokenSet(array $terms): array
{
$tokens = [];
foreach ($terms as $term) {
foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) {
$tokens[$termToken] = true;
}
}
return $tokens;
}
private function enrichReferentialShopQueryFromHistory( private function enrichReferentialShopQueryFromHistory(
string $query, string $query,
string $sourcePrompt, string $sourcePrompt,
@@ -2763,11 +2801,33 @@ final readonly class AgentRunner
} }
private function extractLatestConfiguredShopQueryContextAnchor(string $commerceHistoryContext): string private function extractLatestConfiguredShopQueryContextAnchor(string $commerceHistoryContext): string
{
foreach ($this->extractHistoryTurnsNewestFirst($commerceHistoryContext) as $turn) {
if (!$this->containsConfiguredShopQueryAnchorTrigger($turn)) {
continue;
}
$modelAnchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($turn);
$turnAnchor = $this->extractLatestConfiguredShopQueryPatternAnchor($turn);
if ($modelAnchor !== '') {
return $this->buildModelQualifiedShopQueryAnchor($modelAnchor, $turnAnchor);
}
if ($turnAnchor !== '') {
return $turnAnchor;
}
}
return $this->extractLatestConfiguredShopQueryPatternAnchor($commerceHistoryContext);
}
private function extractLatestConfiguredShopQueryPatternAnchor(string $text): string
{ {
$latest = ''; $latest = '';
foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns() as $pattern) { foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns() as $pattern) {
if (@preg_match_all($pattern, $commerceHistoryContext, $matches, PREG_SET_ORDER) === false) { if (@preg_match_all($pattern, $text, $matches, PREG_SET_ORDER) === false) {
continue; continue;
} }
@@ -2782,6 +2842,51 @@ final readonly class AgentRunner
return $latest; return $latest;
} }
private function buildModelQualifiedShopQueryAnchor(string $modelAnchor, string $detailAnchor): string
{
$modelAnchor = trim($modelAnchor);
if ($modelAnchor === '') {
return trim($detailAnchor);
}
$detailTokens = $this->extractShopQueryDetailAnchorTokens($detailAnchor, $modelAnchor);
if ($detailTokens === []) {
return $modelAnchor;
}
return trim($modelAnchor . ' ' . implode(' ', $detailTokens));
}
/**
* @return string[]
*/
private function extractShopQueryDetailAnchorTokens(string $detailAnchor, string $modelAnchor): array
{
$tokens = $this->tokenizeShopQueryCandidate($detailAnchor);
if ($tokens === []) {
return [];
}
$modelTokens = array_fill_keys($this->tokenizeShopQueryCandidate($modelAnchor), true);
$queryTokens = $this->buildShopQueryTokenSet(
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms()
);
$noiseTokens = $this->buildShopQueryTokenSet(
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms()
);
$out = [];
foreach ($tokens as $token) {
if (isset($modelTokens[$token]) || isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) {
continue;
}
$out[$token] = $token;
}
return array_values($out);
}
private function normalizeShopQueryAnchor(string $anchor): string private function normalizeShopQueryAnchor(string $anchor): string
{ {
$anchor = str_replace('®', '', $anchor); $anchor = str_replace('®', '', $anchor);
@@ -3249,6 +3354,48 @@ final readonly class AgentRunner
return trim(implode(' ', $this->tokenizeShopQueryCandidate($query))); return trim(implode(' ', $this->tokenizeShopQueryCandidate($query)));
} }
/**
* @param ShopProductResult[] $shopResults
* @return ShopProductResult[]
*/
private function guardShopResultsByReferencedProductAnchor(string $shopSearchQuery, array $shopResults): array
{
if ($shopResults === []) {
return $shopResults;
}
$anchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($shopSearchQuery);
if ($anchor === '') {
return $shopResults;
}
$filtered = [];
foreach ($shopResults as $product) {
if (!$product instanceof ShopProductResult) {
continue;
}
if ($this->shopProductMatchesReferencedProductAnchor($product, $anchor)) {
$filtered[] = $product;
}
}
return $filtered;
}
private function shopProductMatchesReferencedProductAnchor(ShopProductResult $product, string $anchor): bool
{
$productText = trim(implode(' ', array_filter([
$product->name,
$product->description,
implode(' ', $product->highlights),
$product->customFields,
$product->url,
])));
return $this->containsAllShopQueryTokens($productText, $anchor);
}
/** /**
* @param ShopProductResult[] $shopResults * @param ShopProductResult[] $shopResults
* @return ShopProductResult[] * @return ShopProductResult[]

View File

@@ -1434,6 +1434,24 @@ final class AgentRunnerConfig
); );
} }
/**
* @return string[]
*/
public function getShopQueryContextAnchorEnrichmentQueryTerms(): array
{
return $this->genreStringList('context_resolution.history_anchor_enrichment.query_terms')
?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_terms');
}
/**
* @return string[]
*/
public function getShopQueryContextAnchorEnrichmentQueryNoiseTerms(): array
{
return $this->genreStringList('context_resolution.history_anchor_enrichment.query_noise_terms')
?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms');
}
/** /**
* @return string[] * @return string[]
*/ */

View File

@@ -703,6 +703,8 @@ final readonly class RetriexEffectiveConfigProvider
'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(), 'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(),
'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(), 'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(),
'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(), 'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(),
'query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms(),
'query_noise_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms(),
'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(), 'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(),
'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(), 'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(),
], ],
@@ -1834,6 +1836,8 @@ final readonly class RetriexEffectiveConfigProvider
$anchorEnrichment = $contextResolution['history_anchor_enrichment'] ?? []; $anchorEnrichment = $contextResolution['history_anchor_enrichment'] ?? [];
if (is_array($anchorEnrichment)) { if (is_array($anchorEnrichment)) {
$this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms', $errors, $warnings); $this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms', $errors, $warnings);
$this->validateStringList($this->toList($anchorEnrichment['query_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_terms', $errors, $warnings);
$this->validateStringList($this->toList($anchorEnrichment['query_noise_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms', $errors, $warnings);
$this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns', $errors); $this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns', $errors);
if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') { if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') {
$errors[] = 'agent.shop_runtime.context_resolution.history_anchor_enrichment.template must not be empty.'; $errors[] = 'agent.shop_runtime.context_resolution.history_anchor_enrichment.template must not be empty.';