diff --git a/config/retriex/agent.yaml b/config/retriex/agent.yaml index 9520371..b6c0797 100644 --- a/config/retriex/agent.yaml +++ b/config/retriex/agent.yaml @@ -213,6 +213,8 @@ parameters: template: '' vocabulary_views: trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms + query_terms: [] + query_noise_terms: [] anchor_patterns: [] meta_query_guard: enabled: true diff --git a/config/retriex/genre.yaml b/config/retriex/genre.yaml index 5b34c63..755f396 100644 --- a/config/retriex/genre.yaml +++ b/config/retriex/genre.yaml @@ -1082,6 +1082,27 @@ parameters: - zubehör - zubehor - accessory + # Terms that should be sent to Shopware for referential accessory or + # consumable follow-ups. Broader RAG markers can remain in + # trigger_terms without becoming dominant shop query tokens. + query_terms: + - indikator + - indicator + - reagenz + - reagent + - zubehör + - zubehor + - accessory + # Terms that are useful for interpreting RAG/history phrasing but are + # too meta or type-oriented for the plain Shopware text query. + query_noise_terms: + - indikatortyp + - indicator type + - typ + - type + - beim + - gemessen + - messen anchor_patterns: - /\b(?:indikator(?:typ)?|indicator(?:\s+type)?|reagenz(?:satz|typ)?|reagent(?:\s+set|\s+type)?|typ|type)\s+[A-Za-zÄÖÜäöüß]{0,8}\s*\d{1,5}(?:\s*[A-ZÄÖÜ]{1,4})?(?:\s*%)?\b/iu template: '{anchor} {query}' diff --git a/config/retriex/language.yaml b/config/retriex/language.yaml index eaeb134..0f55eef 100644 --- a/config/retriex/language.yaml +++ b/config/retriex/language.yaml @@ -78,6 +78,7 @@ parameters: stopword_groups: de_core: - der + - beim - die - das - den diff --git a/config/retriex/prompt.yaml b/config/retriex/prompt.yaml index c9ea69d..7911823 100644 --- a/config/retriex/prompt.yaml +++ b/config/retriex/prompt.yaml @@ -264,6 +264,7 @@ parameters: - '- For uncertain technical suitability from shop hits, use a short section like "Shop-Treffer (technische Eignung nicht sicher belegt)" and list only exact shop fields. Do not add a technical explanation or recommendation.' without_shop_rules: - '- If no shop results are present, do not compensate by inventing external products or external manufacturers.' + - '- For price, cost, availability, or other commercial follow-up questions with no matching shop result, answer only that the requested commercial detail could not be determined from the provided shop data; do not list unrelated RAG products or unrelated shop examples.' technical_rules: [] accessory_rules: [] language: diff --git a/patch_history/RETRIEX_PATCH_60_GENERIC_REFERENTIAL_SHOP_ANCHOR_GUARD_README.md b/patch_history/RETRIEX_PATCH_60_GENERIC_REFERENTIAL_SHOP_ANCHOR_GUARD_README.md new file mode 100644 index 0000000..a2aa575 --- /dev/null +++ b/patch_history/RETRIEX_PATCH_60_GENERIC_REFERENTIAL_SHOP_ANCHOR_GUARD_README.md @@ -0,0 +1,83 @@ +# RetrieX Patch p60 - Generic Referential Shop Anchor Guard + +## Ziel + +Stabilisiert referenzielle Shop-Preisfragen, bei denen der Verlauf bereits einen konkreten Geräteanker und danach ein Zubehör-/Reagenz-/Indikator-Detail enthält. + +Der konkrete Regressionspfad war sinngemäß: + +1. Grenzwert-Frage belegt `Testomat 808`. +2. Indikator-Folgefrage belegt `Indikatortyp 300`. +3. Preis-Folgefrage fragt `was kostet der indikator`. + +Die Shop-Query durfte nicht bei einem typcode-lastigen Ausdruck wie `indikatortyp 300 indikator` hängen bleiben, weil Shopware dadurch auch unpassende Geräte wie `Testomat 2000 DUO` liefern kann. + +## Änderungen + +### Generische Query-Stabilisierung + +- `genre.yaml` ergänzt für `context_resolution.history_anchor_enrichment`: + - `query_terms` + - `query_noise_terms` +- RAG-/Historienmarker wie `indikatortyp` bleiben Trigger-/Kontextbegriffe, werden aber nicht dominant als Shop-Suchtoken ausgegeben. +- Typ-/Code-Tokens wie `300` bleiben erhalten. +- Wenn im selben Verlaufsturn ein konkreter Geräte-/Modellanker und ein Zubehör-/Typanker stehen, wird daraus generisch ein qualifizierter Shopanker. + +Beispielhaft ergibt sich: + +```text +testomat 808 300 indikator +``` + +statt: + +```text +indikatortyp 300 indikator +``` + +### Generischer Shop-Treffer-Guard + +Wenn die finale Shopquery einen konkreten Produkt-/Modellanker enthält, werden Shop-Treffer verworfen, die diesen Anker nicht tragen. Ein fremder Gerätetreffer darf dann nicht mehr als Preisbasis für ein referenziertes Zubehör/Verbrauchsmittel dienen. + +### Antwortregel bei fehlendem passendem Shop-Treffer + +`prompt.yaml` erhält eine generische Regel: + +- Bei Preis-/Kosten-/Verfügbarkeitsfragen ohne passenden Shop-Treffer soll keine fremde RAG-/Shop-Produktliste als Preisersatz ausgegeben werden. +- Stattdessen soll klar gesagt werden, dass der angefragte kommerzielle Wert aus den bereitgestellten Shopdaten nicht ermittelt werden konnte. + +## Kein Sonderfall + +Der Patch enthält keine harte Sonderlogik für `Testomat 808` oder `Indikator 300`. + +Die Logik ist allgemein: + +- konkreter Verlauf-Geräteanker +- Zubehör-/Reagenz-/Indikator-/Accessory-Kontext +- technische/RAG-nahe Typwörter als Query-Noise +- Typ-/Code-Tokens bleiben erhalten +- Shop-Treffer müssen zum konkreten Modellanker passen, wenn ein solcher in der Query enthalten ist + +## Dateien + +- `config/retriex/agent.yaml` +- `config/retriex/genre.yaml` +- `config/retriex/prompt.yaml` +- `src/Agent/AgentRunner.php` +- `src/Config/AgentRunnerConfig.php` +- `src/Config/RetriexEffectiveConfigProvider.php` + +## Lokale Prüfungen + +- PHP-Lint für geänderte PHP-Dateien grün +- YAML parsebar für geänderte YAML-Dateien +- lokale Query-Simulation: `testomat 808 300 indikator` + +## Projektchecks + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index 0ed4298..be6857a 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -502,6 +502,7 @@ final readonly class AgentRunner $shopResults = $directIdentityRepairPayload['results']; } + $shopResults = $this->guardShopResultsByReferencedProductAnchor($shopSearchQuery, $shopResults); $shopResults = $this->sortShopResultsForLengthRequest($prompt, $shopSearchQuery, $shopResults); $attemptedShopRepair = $repairPayload['attemptedRepair'] || $directIdentityRepairPayload['attemptedRepair']; $usedShopRepair = $repairPayload['usedRepair'] || $directIdentityRepairPayload['usedRepair']; @@ -2672,20 +2673,40 @@ final readonly class AgentRunner return ''; } - $triggerTokens = []; - foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() as $term) { - foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) { - $triggerTokens[$termToken] = true; - } - } + $triggerTokens = $this->buildShopQueryTokenSet( + $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() + ); if ($triggerTokens === []) { return ''; } + $hasTrigger = false; + foreach ($tokens as $token) { + if (isset($triggerTokens[$token])) { + $hasTrigger = true; + break; + } + } + + if (!$hasTrigger) { + return ''; + } + + $queryTokens = $this->buildShopQueryTokenSet( + $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms() + ); + if ($queryTokens === []) { + $queryTokens = $triggerTokens; + } + + $noiseTokens = $this->buildShopQueryTokenSet( + $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms() + ); + $out = []; foreach ($tokens as $token) { - if (!isset($triggerTokens[$token]) || isset($out[$token])) { + if (!isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) { continue; } @@ -2695,6 +2716,23 @@ final readonly class AgentRunner return implode(' ', array_values($out)); } + /** + * @param string[] $terms + * @return array + */ + private function buildShopQueryTokenSet(array $terms): array + { + $tokens = []; + + foreach ($terms as $term) { + foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) { + $tokens[$termToken] = true; + } + } + + return $tokens; + } + private function enrichReferentialShopQueryFromHistory( string $query, string $sourcePrompt, @@ -2763,11 +2801,33 @@ final readonly class AgentRunner } private function extractLatestConfiguredShopQueryContextAnchor(string $commerceHistoryContext): string + { + foreach ($this->extractHistoryTurnsNewestFirst($commerceHistoryContext) as $turn) { + if (!$this->containsConfiguredShopQueryAnchorTrigger($turn)) { + continue; + } + + $modelAnchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($turn); + $turnAnchor = $this->extractLatestConfiguredShopQueryPatternAnchor($turn); + + if ($modelAnchor !== '') { + return $this->buildModelQualifiedShopQueryAnchor($modelAnchor, $turnAnchor); + } + + if ($turnAnchor !== '') { + return $turnAnchor; + } + } + + return $this->extractLatestConfiguredShopQueryPatternAnchor($commerceHistoryContext); + } + + private function extractLatestConfiguredShopQueryPatternAnchor(string $text): string { $latest = ''; foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns() as $pattern) { - if (@preg_match_all($pattern, $commerceHistoryContext, $matches, PREG_SET_ORDER) === false) { + if (@preg_match_all($pattern, $text, $matches, PREG_SET_ORDER) === false) { continue; } @@ -2782,6 +2842,51 @@ final readonly class AgentRunner return $latest; } + private function buildModelQualifiedShopQueryAnchor(string $modelAnchor, string $detailAnchor): string + { + $modelAnchor = trim($modelAnchor); + if ($modelAnchor === '') { + return trim($detailAnchor); + } + + $detailTokens = $this->extractShopQueryDetailAnchorTokens($detailAnchor, $modelAnchor); + if ($detailTokens === []) { + return $modelAnchor; + } + + return trim($modelAnchor . ' ' . implode(' ', $detailTokens)); + } + + /** + * @return string[] + */ + private function extractShopQueryDetailAnchorTokens(string $detailAnchor, string $modelAnchor): array + { + $tokens = $this->tokenizeShopQueryCandidate($detailAnchor); + if ($tokens === []) { + return []; + } + + $modelTokens = array_fill_keys($this->tokenizeShopQueryCandidate($modelAnchor), true); + $queryTokens = $this->buildShopQueryTokenSet( + $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms() + ); + $noiseTokens = $this->buildShopQueryTokenSet( + $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms() + ); + + $out = []; + foreach ($tokens as $token) { + if (isset($modelTokens[$token]) || isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) { + continue; + } + + $out[$token] = $token; + } + + return array_values($out); + } + private function normalizeShopQueryAnchor(string $anchor): string { $anchor = str_replace('®', '', $anchor); @@ -3249,6 +3354,48 @@ final readonly class AgentRunner return trim(implode(' ', $this->tokenizeShopQueryCandidate($query))); } + /** + * @param ShopProductResult[] $shopResults + * @return ShopProductResult[] + */ + private function guardShopResultsByReferencedProductAnchor(string $shopSearchQuery, array $shopResults): array + { + if ($shopResults === []) { + return $shopResults; + } + + $anchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($shopSearchQuery); + if ($anchor === '') { + return $shopResults; + } + + $filtered = []; + foreach ($shopResults as $product) { + if (!$product instanceof ShopProductResult) { + continue; + } + + if ($this->shopProductMatchesReferencedProductAnchor($product, $anchor)) { + $filtered[] = $product; + } + } + + return $filtered; + } + + private function shopProductMatchesReferencedProductAnchor(ShopProductResult $product, string $anchor): bool + { + $productText = trim(implode(' ', array_filter([ + $product->name, + $product->description, + implode(' ', $product->highlights), + $product->customFields, + $product->url, + ]))); + + return $this->containsAllShopQueryTokens($productText, $anchor); + } + /** * @param ShopProductResult[] $shopResults * @return ShopProductResult[] diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index c0eab5e..8bbfde8 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -1434,6 +1434,24 @@ final class AgentRunnerConfig ); } + /** + * @return string[] + */ + public function getShopQueryContextAnchorEnrichmentQueryTerms(): array + { + return $this->genreStringList('context_resolution.history_anchor_enrichment.query_terms') + ?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_terms'); + } + + /** + * @return string[] + */ + public function getShopQueryContextAnchorEnrichmentQueryNoiseTerms(): array + { + return $this->genreStringList('context_resolution.history_anchor_enrichment.query_noise_terms') + ?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms'); + } + /** * @return string[] */ diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 19dd5f8..864bf8d 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -703,6 +703,8 @@ final readonly class RetriexEffectiveConfigProvider 'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(), 'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(), 'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(), + 'query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms(), + 'query_noise_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms(), 'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(), 'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(), ], @@ -1834,6 +1836,8 @@ final readonly class RetriexEffectiveConfigProvider $anchorEnrichment = $contextResolution['history_anchor_enrichment'] ?? []; if (is_array($anchorEnrichment)) { $this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms', $errors, $warnings); + $this->validateStringList($this->toList($anchorEnrichment['query_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_terms', $errors, $warnings); + $this->validateStringList($this->toList($anchorEnrichment['query_noise_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms', $errors, $warnings); $this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns', $errors); if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') { $errors[] = 'agent.shop_runtime.context_resolution.history_anchor_enrichment.template must not be empty.';