diff --git a/config/retriex/agent.yaml b/config/retriex/agent.yaml index 0f89746..93d1895 100644 --- a/config/retriex/agent.yaml +++ b/config/retriex/agent.yaml @@ -188,7 +188,7 @@ parameters: positive_token_filter: enabled: false - min_query_tokens_after_filter: 2 + min_query_tokens_after_filter: 1 include_current_input_preservation_terms: true include_semantic_shop_search_tokens: true include_product_role_terms: true @@ -223,6 +223,8 @@ parameters: template: '' vocabulary_views: trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms + query_terms: [] + query_noise_terms: [] anchor_patterns: [] meta_query_guard: enabled: true @@ -376,10 +378,6 @@ parameters: separator: ' | ' unavailable_reason_template: '{message} Ursache: {reason}' - product_roles: - vocabulary_views: - main_device_request_keywords: agent.no_llm_fallback.product_roles.main_device_request_keywords - accessory_product_keywords: agent.no_llm_fallback.product_roles.accessory_product_keywords production_ui: diff --git a/config/retriex/genre.yaml b/config/retriex/genre.yaml index c22a995..ec6d699 100644 --- a/config/retriex/genre.yaml +++ b/config/retriex/genre.yaml @@ -14,6 +14,8 @@ # the value surface stays native and maintenance-focused. # p59F adds machine-readable review-group metadata so the remaining # compatibility paths can be removed or kept deliberately by category. +# p59G rewires the no-LLM product-role fallback directly to genre values +# and removes its obsolete legacy compatibility view declarations. parameters: retriex.genre.config: id: water_analysis @@ -30,15 +32,6 @@ parameters: - configuration_values.product_roles.shop_views - configuration_values.product_roles.prompt_views - configuration_values.product_roles.no_llm_fallback_terms - review_path_groups: - frozen_compatibility_views: - description: Legacy no-LLM product-role vocabulary views. Keep frozen until the fallback reads only from genre.configuration_values.product_roles. - classification: legacy_compatibility_view - source_state: legacy_frozen_non_empty - cleanup_action: rewire_to_genre_value_and_remove_legacy_view - paths: - - agent.no_llm_fallback.product_roles.vocabulary_views.main_device_request_keywords - - agent.no_llm_fallback.product_roles.vocabulary_views.accessory_product_keywords product_attributes: description: Genre-specific attributes and constraints, for example measurement values now or size/color/material later. value_paths: @@ -1090,6 +1083,24 @@ parameters: - zubehör - zubehor - accessory + # Terms that should be sent to Shopware for referential accessory or + # consumable follow-ups. Broader RAG markers can remain in + # trigger_terms without becoming dominant shop query tokens. + query_terms: + - indikator + - indicator + - reagenz + - reagent + - zubehör + - zubehor + - accessory + # Terms that are useful for interpreting RAG/history phrasing but are + # too meta or type-oriented for the plain Shopware text query. + query_noise_terms: + - indikatortyp + - indicator type + - typ + - type anchor_patterns: - /\b(?:indikator(?:typ)?|indicator(?:\s+type)?|reagenz(?:satz|typ)?|reagent(?:\s+set|\s+type)?|typ|type)\s+[A-Za-zÄÖÜäöüß]{0,8}\s*\d{1,5}(?:\s*[A-ZÄÖÜ]{1,4})?(?:\s*%)?\b/iu template: '{anchor} {query}' @@ -1198,7 +1209,7 @@ parameters: positive_token_filter: origin: genre_native enabled: true - min_query_tokens_after_filter: 2 + min_query_tokens_after_filter: 1 include_current_input_preservation_terms: true include_semantic_shop_search_tokens: true include_product_role_terms: true diff --git a/patch_history/RETRIEX_PATCH_61C_POSITIVE_SHOP_QUERY_TOKEN_FILTER_ON_P60_README.md b/patch_history/RETRIEX_PATCH_61C_POSITIVE_SHOP_QUERY_TOKEN_FILTER_ON_P60_README.md new file mode 100644 index 0000000..45ee0a3 --- /dev/null +++ b/patch_history/RETRIEX_PATCH_61C_POSITIVE_SHOP_QUERY_TOKEN_FILTER_ON_P60_README.md @@ -0,0 +1,86 @@ +# RetrieX Patch p61C - Positive Shop Query Token Filter on p60 + +p61C reapplies the positive Shopware query token filter on the confirmed p60 baseline. + +## Why p61C exists + +p61B was built on a stale base and reintroduced legacy `agent.no_llm_fallback.product_roles.vocabulary_views.*` paths that had already been removed by p59G. It also did not reliably preserve the p60 referential device anchor in the generated query. + +p61C uses the confirmed p60 baseline and keeps the p59G/p60 cleanup intact. + +## Goal + +The final plain Shopware query should contain only product-relevant tokens: + +- product/device/accessory names from the active genre vocabulary +- explicitly allowed product family/application terms +- protected short technical terms such as pH/RX/TH/TC/TP/TM when configured +- model/type/code tokens such as `808`, `300`, `TH2100`, `2x100ml` when they match configured regex patterns + +Sentence, relation and RAG-only reference words such as `gemessen`, `beim` or `indikatortyp` must not dominate the shop query. + +## Expected example + +Input query after p60 referential/RAG anchoring: + +```text +testomat 808 gemessen 300 beim indikator +``` + +Final shop query after p61C: + +```text +testomat 808 300 indikator +``` + +## Configuration + +Primary configuration lives in: + +```yaml +config/retriex/genre.yaml +parameters: + retriex.genre.config: + configuration_values: + shop_query_runtime: + positive_token_filter: +``` + +Important fields: + +- `enabled`: activates the filter for the active genre. +- `min_query_tokens_after_filter`: set to `1` so a single valid product token can still replace a noisy query. +- `allowed_terms`: extra genre-specific product family/application terms. +- `blocked_terms`: terms that are useful for RAG/reference resolution but poor shop search tokens. +- `code_patterns`: regex patterns for model/type/article/size tokens. +- `include_current_input_preservation_terms`: includes configured protected short terms from the shop query preservation surface. +- `include_semantic_shop_search_tokens`: includes the genre's shop semantic product vocabulary. +- `include_product_role_terms`: includes the genre's device/accessory role vocabulary. + +`agent.yaml` contains only an inactive compatibility fallback for this feature. Runtime values should be maintained in `genre.yaml`. + +## Scope + +No hard-coded product names or stopword lists were added to PHP. The PHP code only applies the configured positive token filter. + +No changes to: + +- retrieval ranking +- prompt rules +- shop result scoring +- SearchRepair +- intent routing +- product identity matching + +## Validation + +Run: + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` + +The p59G no-LLM legacy paths must remain absent from `agent.yaml`, `genre.yaml` source paths and `governance.yaml` frozen hashes. diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index 6bf157d..82c5d09 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -517,6 +517,7 @@ final readonly class AgentRunner $shopResults = $directIdentityRepairPayload['results']; } + $shopResults = $this->guardShopResultsByReferencedProductAnchor($shopSearchQuery, $shopResults); $shopResults = $this->sortShopResultsForLengthRequest($prompt, $shopSearchQuery, $shopResults); $attemptedShopRepair = $repairPayload['attemptedRepair'] || $directIdentityRepairPayload['attemptedRepair']; $usedShopRepair = $repairPayload['usedRepair'] || $directIdentityRepairPayload['usedRepair']; @@ -2799,20 +2800,40 @@ final readonly class AgentRunner return ''; } - $triggerTokens = []; - foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() as $term) { - foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) { - $triggerTokens[$termToken] = true; - } - } + $triggerTokens = $this->buildShopQueryTokenSet( + $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() + ); if ($triggerTokens === []) { return ''; } + $hasTrigger = false; + foreach ($tokens as $token) { + if (isset($triggerTokens[$token])) { + $hasTrigger = true; + break; + } + } + + if (!$hasTrigger) { + return ''; + } + + $queryTokens = $this->buildShopQueryTokenSet( + $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms() + ); + if ($queryTokens === []) { + $queryTokens = $triggerTokens; + } + + $noiseTokens = $this->buildShopQueryTokenSet( + $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms() + ); + $out = []; foreach ($tokens as $token) { - if (!isset($triggerTokens[$token]) || isset($out[$token])) { + if (!isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) { continue; } @@ -2822,6 +2843,23 @@ final readonly class AgentRunner return implode(' ', array_values($out)); } + /** + * @param string[] $terms + * @return array + */ + private function buildShopQueryTokenSet(array $terms): array + { + $tokens = []; + + foreach ($terms as $term) { + foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) { + $tokens[$termToken] = true; + } + } + + return $tokens; + } + private function enrichReferentialShopQueryFromHistory( string $query, string $sourcePrompt, @@ -2890,11 +2928,33 @@ final readonly class AgentRunner } private function extractLatestConfiguredShopQueryContextAnchor(string $commerceHistoryContext): string + { + foreach ($this->extractHistoryTurnsNewestFirst($commerceHistoryContext) as $turn) { + if (!$this->containsConfiguredShopQueryAnchorTrigger($turn)) { + continue; + } + + $modelAnchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($turn); + $turnAnchor = $this->extractLatestConfiguredShopQueryPatternAnchor($turn); + + if ($modelAnchor !== '') { + return $this->buildModelQualifiedShopQueryAnchor($modelAnchor, $turnAnchor); + } + + if ($turnAnchor !== '') { + return $turnAnchor; + } + } + + return $this->extractLatestConfiguredShopQueryPatternAnchor($commerceHistoryContext); + } + + private function extractLatestConfiguredShopQueryPatternAnchor(string $text): string { $latest = ''; foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns() as $pattern) { - if (@preg_match_all($pattern, $commerceHistoryContext, $matches, PREG_SET_ORDER) === false) { + if (@preg_match_all($pattern, $text, $matches, PREG_SET_ORDER) === false) { continue; } @@ -2909,6 +2969,51 @@ final readonly class AgentRunner return $latest; } + private function buildModelQualifiedShopQueryAnchor(string $modelAnchor, string $detailAnchor): string + { + $modelAnchor = trim($modelAnchor); + if ($modelAnchor === '') { + return trim($detailAnchor); + } + + $detailTokens = $this->extractShopQueryDetailAnchorTokens($detailAnchor, $modelAnchor); + if ($detailTokens === []) { + return $modelAnchor; + } + + return trim($modelAnchor . ' ' . implode(' ', $detailTokens)); + } + + /** + * @return string[] + */ + private function extractShopQueryDetailAnchorTokens(string $detailAnchor, string $modelAnchor): array + { + $tokens = $this->tokenizeShopQueryCandidate($detailAnchor); + if ($tokens === []) { + return []; + } + + $modelTokens = array_fill_keys($this->tokenizeShopQueryCandidate($modelAnchor), true); + $queryTokens = $this->buildShopQueryTokenSet( + $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms() + ); + $noiseTokens = $this->buildShopQueryTokenSet( + $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms() + ); + + $out = []; + foreach ($tokens as $token) { + if (isset($modelTokens[$token]) || isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) { + continue; + } + + $out[$token] = $token; + } + + return array_values($out); + } + private function normalizeShopQueryAnchor(string $anchor): string { $anchor = str_replace('®', '', $anchor); @@ -3376,6 +3481,48 @@ final readonly class AgentRunner return trim(implode(' ', $this->tokenizeShopQueryCandidate($query))); } + /** + * @param ShopProductResult[] $shopResults + * @return ShopProductResult[] + */ + private function guardShopResultsByReferencedProductAnchor(string $shopSearchQuery, array $shopResults): array + { + if ($shopResults === []) { + return $shopResults; + } + + $anchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($shopSearchQuery); + if ($anchor === '') { + return $shopResults; + } + + $filtered = []; + foreach ($shopResults as $product) { + if (!$product instanceof ShopProductResult) { + continue; + } + + if ($this->shopProductMatchesReferencedProductAnchor($product, $anchor)) { + $filtered[] = $product; + } + } + + return $filtered; + } + + private function shopProductMatchesReferencedProductAnchor(ShopProductResult $product, string $anchor): bool + { + $productText = trim(implode(' ', array_filter([ + $product->name, + $product->description, + implode(' ', $product->highlights), + $product->customFields, + $product->url, + ]))); + + return $this->containsAllShopQueryTokens($productText, $anchor); + } + /** * @param ShopProductResult[] $shopResults * @return ShopProductResult[] diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index f73b813..c19349d 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -962,6 +962,11 @@ final class AgentRunnerConfig */ public function getNoLlmMainDeviceRequestRoleKeywords(): array { + $terms = $this->genreStringList('product_roles.no_llm_fallback_terms.main_device_request_keywords'); + if ($terms !== []) { + return $terms; + } + return $this->getConfiguredStringListOrVocabularyView( 'no_llm_fallback.product_roles.main_device_request_keywords', 'no_llm_fallback.product_roles.vocabulary_views.main_device_request_keywords' @@ -973,6 +978,11 @@ final class AgentRunnerConfig */ public function getNoLlmAccessoryProductRoleKeywords(): array { + $terms = $this->genreStringList('product_roles.no_llm_fallback_terms.accessory_product_keywords'); + if ($terms !== []) { + return $terms; + } + return $this->getConfiguredStringListOrVocabularyView( 'no_llm_fallback.product_roles.accessory_product_keywords', 'no_llm_fallback.product_roles.vocabulary_views.accessory_product_keywords' @@ -1195,7 +1205,7 @@ final class AgentRunnerConfig public function getShopQueryPositiveTokenFilterMinTokens(): int { return $this->genreInt('shop_query_runtime.positive_token_filter.min_query_tokens_after_filter') - ?? $this->getOptionalInt('shop_runtime.query_cleanup.positive_token_filter.min_query_tokens_after_filter', 2); + ?? $this->getOptionalInt('shop_runtime.query_cleanup.positive_token_filter.min_query_tokens_after_filter', 1); } public function shouldShopQueryPositiveTokenFilterIncludeCurrentInputPreservationTerms(): bool @@ -1508,6 +1518,24 @@ final class AgentRunnerConfig ); } + /** + * @return string[] + */ + public function getShopQueryContextAnchorEnrichmentQueryTerms(): array + { + return $this->genreStringList('context_resolution.history_anchor_enrichment.query_terms') + ?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_terms'); + } + + /** + * @return string[] + */ + public function getShopQueryContextAnchorEnrichmentQueryNoiseTerms(): array + { + return $this->genreStringList('context_resolution.history_anchor_enrichment.query_noise_terms') + ?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms'); + } + /** * @return string[] */ diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 1f69e3b..5e99eb9 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -713,6 +713,8 @@ final readonly class RetriexEffectiveConfigProvider 'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(), 'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(), 'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(), + 'query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms(), + 'query_noise_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms(), 'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(), 'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(), ], @@ -1871,6 +1873,8 @@ final readonly class RetriexEffectiveConfigProvider $anchorEnrichment = $contextResolution['history_anchor_enrichment'] ?? []; if (is_array($anchorEnrichment)) { $this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms', $errors, $warnings); + $this->validateStringList($this->toList($anchorEnrichment['query_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_terms', $errors, $warnings); + $this->validateStringList($this->toList($anchorEnrichment['query_noise_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms', $errors, $warnings); $this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns', $errors); if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') { $errors[] = 'agent.shop_runtime.context_resolution.history_anchor_enrichment.template must not be empty.';