diff --git a/config/retriex/agent.yaml b/config/retriex/agent.yaml index b6c0797..0f89746 100644 --- a/config/retriex/agent.yaml +++ b/config/retriex/agent.yaml @@ -186,6 +186,16 @@ parameters: # removed after LLM query optimization. Keep this list simple and local. terms: [] + positive_token_filter: + enabled: false + min_query_tokens_after_filter: 2 + include_current_input_preservation_terms: true + include_semantic_shop_search_tokens: true + include_product_role_terms: true + allowed_terms: [] + blocked_terms: [] + code_patterns: [] + attribute_cleanup: enabled: true @@ -213,8 +223,6 @@ parameters: template: '' vocabulary_views: trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms - query_terms: [] - query_noise_terms: [] anchor_patterns: [] meta_query_guard: enabled: true @@ -368,6 +376,10 @@ parameters: separator: ' | ' unavailable_reason_template: '{message} Ursache: {reason}' + product_roles: + vocabulary_views: + main_device_request_keywords: agent.no_llm_fallback.product_roles.main_device_request_keywords + accessory_product_keywords: agent.no_llm_fallback.product_roles.accessory_product_keywords production_ui: diff --git a/config/retriex/genre.yaml b/config/retriex/genre.yaml index be14e59..c22a995 100644 --- a/config/retriex/genre.yaml +++ b/config/retriex/genre.yaml @@ -14,8 +14,6 @@ # the value surface stays native and maintenance-focused. # p59F adds machine-readable review-group metadata so the remaining # compatibility paths can be removed or kept deliberately by category. -# p59G rewires the no-LLM product-role fallback directly to genre values -# and removes its obsolete legacy compatibility view declarations. parameters: retriex.genre.config: id: water_analysis @@ -32,6 +30,15 @@ parameters: - configuration_values.product_roles.shop_views - configuration_values.product_roles.prompt_views - configuration_values.product_roles.no_llm_fallback_terms + review_path_groups: + frozen_compatibility_views: + description: Legacy no-LLM product-role vocabulary views. Keep frozen until the fallback reads only from genre.configuration_values.product_roles. + classification: legacy_compatibility_view + source_state: legacy_frozen_non_empty + cleanup_action: rewire_to_genre_value_and_remove_legacy_view + paths: + - agent.no_llm_fallback.product_roles.vocabulary_views.main_device_request_keywords + - agent.no_llm_fallback.product_roles.vocabulary_views.accessory_product_keywords product_attributes: description: Genre-specific attributes and constraints, for example measurement values now or size/color/material later. value_paths: @@ -130,6 +137,7 @@ parameters: value_paths: - configuration_values.shop_query_runtime.current_input_preservation_terms - configuration_values.shop_query_runtime.stopword_cleanup + - configuration_values.shop_query_runtime.positive_token_filter - configuration_values.shop_query_runtime.compound_prefix_match - configuration_values.shop_query_runtime.primary_identity_repair - configuration_values.shop_query_runtime.semantic_shop_search_tokens @@ -1082,28 +1090,6 @@ parameters: - zubehör - zubehor - accessory - # Terms that should be sent to Shopware for referential accessory or - # consumable follow-ups. Broader RAG markers can remain in - # trigger_terms without becoming dominant shop query tokens. - query_terms: - - indikator - - indicator - - reagenz - - reagent - - zubehör - - zubehor - - accessory - # Terms that are useful for interpreting RAG/history phrasing but are - # too meta or type-oriented for the plain Shopware text query. - query_noise_terms: - - indikatortyp - - indicator type - - typ - - type - - beim - - gemessen - - ablesbar - - messen anchor_patterns: - /\b(?:indikator(?:typ)?|indicator(?:\s+type)?|reagenz(?:satz|typ)?|reagent(?:\s+set|\s+type)?|typ|type)\s+[A-Za-zÄÖÜäöüß]{0,8}\s*\d{1,5}(?:\s*[A-ZÄÖÜ]{1,4})?(?:\s*%)?\b/iu template: '{anchor} {query}' @@ -1209,6 +1195,67 @@ parameters: - auflistung - meter - metern + positive_token_filter: + origin: genre_native + enabled: true + min_query_tokens_after_filter: 2 + include_current_input_preservation_terms: true + include_semantic_shop_search_tokens: true + include_product_role_terms: true + # Keep this as the small positive allow-list for genre-specific + # product families, applications and shop-searchable measurement terms + # that are not already covered by the role/semantic vocabularies above. + allowed_terms: + - testomat + - testomaten + - pockettester + - pocket tester + - wasserhärte + - wasserhaerte + - resthärte + - resthaerte + - gesamthärte + - gesamthaerte + - chlor + - freies chlor + - gesamtchlor + - redox + - orp + - ph + - rx + - th + - tc + - tp + - tm + # These terms may be useful for RAG/reference resolution, but they are + # poor plain Shopware search terms and must not dominate the final + # shop query. + blocked_terms: + - indikatortyp + - typ + - type + - für + - fuer + - mit + - bei + - beim + - gemessen + - messen + - messung + - wert + - wurde + - welcher + - welchem + - welche + - dazu + - passend + - passende + - passendes + code_patterns: + - '/^\d+(?:[,.]\d+)?$/u' + - '/^\d+(?:[,.]\d+)?(?:m|mm|cm|ml|l)$/iu' + - '/^[a-z]{1,4}\d{1,5}[a-z0-9-]*$/iu' + - '/^\d{1,5}[a-z0-9-]*$/iu' compound_prefix_match: origin: genre_native terms: diff --git a/patch_history/RETRIEX_PATCH_61B_POSITIVE_SHOP_QUERY_TOKEN_FILTER_README.md b/patch_history/RETRIEX_PATCH_61B_POSITIVE_SHOP_QUERY_TOKEN_FILTER_README.md new file mode 100644 index 0000000..24aad99 --- /dev/null +++ b/patch_history/RETRIEX_PATCH_61B_POSITIVE_SHOP_QUERY_TOKEN_FILTER_README.md @@ -0,0 +1,77 @@ +# RetrieX Patch p61B - Positive Shop Query Token Filter + +p61B adds a generic positive filter for the final Shopware search query. + +## Goal + +The final plain Shopware query should contain only product-relevant tokens: + +- product/device/accessory names from the active genre vocabulary +- explicitly allowed product family/application terms +- protected short technical terms such as pH/RX/TH/TC/TP/TM when configured +- model/type/code tokens such as `808`, `300`, `TH2100`, `2x100ml` when they match configured regex patterns + +Sentence, relation and RAG-only reference words such as `gemessen`, `beim` or `indikatortyp` must not dominate the shop query. + +## Example + +Input query after referential/RAG anchoring: + +```text +testomat 808 gemessen 300 beim indikator +``` + +Final shop query after p61B: + +```text +testomat 808 300 indikator +``` + +## Configuration + +Primary configuration lives in: + +```yaml +config/retriex/genre.yaml +parameters: + retriex.genre.config: + configuration_values: + shop_query_runtime: + positive_token_filter: +``` + +Important fields: + +- `enabled`: activates the filter for the active genre. +- `allowed_terms`: extra genre-specific product family/application terms. +- `blocked_terms`: terms that are useful for RAG/reference resolution but poor shop search tokens. +- `code_patterns`: regex patterns for model/type/article/size tokens. +- `include_current_input_preservation_terms`: includes configured protected short terms from the shop query preservation surface. +- `include_semantic_shop_search_tokens`: includes the genre's shop semantic product vocabulary. +- `include_product_role_terms`: includes the genre's device/accessory role vocabulary. + +`agent.yaml` contains only an inactive compatibility fallback. Runtime values should be maintained in `genre.yaml`. + +## Scope + +No hard-coded product names or stopword lists were added to PHP. The PHP code only applies the configured positive token filter. + +No changes to: + +- retrieval ranking +- prompt rules +- shop result scoring +- SearchRepair +- intent routing +- product identity matching + +## Validation + +Run: + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` diff --git a/public/assets/styles/base.css b/public/assets/styles/base.css index 626986c..621147f 100644 --- a/public/assets/styles/base.css +++ b/public/assets/styles/base.css @@ -97,7 +97,7 @@ input, textarea, select { .message { - margin-bottom: .7rem; + margin-bottom: 0; } .message.user { diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index be6857a..6bf157d 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -332,6 +332,21 @@ final readonly class AgentRunner $optimizedShopQuery = ''; } + $positiveFilteredShopSearchQuery = $this->filterShopQueryToPositiveTokens($shopSearchQuery); + if ($positiveFilteredShopSearchQuery !== $shopSearchQuery) { + $this->agentLogger->info('Filtered final shop search query to positive product tokens', [ + 'userId' => $userId, + 'prompt' => $prompt, + 'routingPrompt' => $routingPrompt, + 'optimizedShopQuery' => $optimizedShopQuery, + 'shopSearchQuery' => $shopSearchQuery, + 'positiveFilteredShopSearchQuery' => $positiveFilteredShopSearchQuery, + ]); + + $shopSearchQuery = $positiveFilteredShopSearchQuery; + $optimizedShopQuery = ''; + } + if ($shopSearchQuery === '') { $this->agentLogger->info('Commerce search skipped because no concrete shop query could be resolved', [ 'userId' => $userId, @@ -502,7 +517,6 @@ final readonly class AgentRunner $shopResults = $directIdentityRepairPayload['results']; } - $shopResults = $this->guardShopResultsByReferencedProductAnchor($shopSearchQuery, $shopResults); $shopResults = $this->sortShopResultsForLengthRequest($prompt, $shopSearchQuery, $shopResults); $attemptedShopRepair = $repairPayload['attemptedRepair'] || $directIdentityRepairPayload['attemptedRepair']; $usedShopRepair = $repairPayload['usedRepair'] || $directIdentityRepairPayload['usedRepair']; @@ -1664,6 +1678,118 @@ final readonly class AgentRunner return $cleaned !== '' ? $cleaned : $shopSearchQuery; } + private function filterShopQueryToPositiveTokens(string $shopSearchQuery): string + { + $shopSearchQuery = trim($shopSearchQuery); + + if ( + $shopSearchQuery === '' + || !$this->agentRunnerConfig->isShopQueryPositiveTokenFilterEnabled() + ) { + return $shopSearchQuery; + } + + $tokens = $this->tokenizeShopQueryCandidate($shopSearchQuery); + if ($tokens === []) { + return $shopSearchQuery; + } + + $allowedTokens = $this->buildPositiveShopQueryAllowedTokenSet(); + $blockedTokens = $this->buildPositiveShopQueryBlockedTokenSet(); + $codePatterns = $this->agentRunnerConfig->getShopQueryPositiveTokenFilterCodePatterns(); + + if ($allowedTokens === [] && $codePatterns === []) { + return $shopSearchQuery; + } + + $kept = []; + foreach ($tokens as $token) { + if (isset($blockedTokens[$token]) || isset($kept[$token])) { + continue; + } + + if (isset($allowedTokens[$token]) || $this->matchesAnyConfiguredShopQueryCodePattern($token, $codePatterns)) { + $kept[$token] = $token; + } + } + + if (count($kept) < max(1, $this->agentRunnerConfig->getShopQueryPositiveTokenFilterMinTokens())) { + return $shopSearchQuery; + } + + $filtered = implode(' ', array_values($kept)); + + return $filtered !== '' ? $filtered : $shopSearchQuery; + } + + /** + * @return array + */ + private function buildPositiveShopQueryAllowedTokenSet(): array + { + $terms = $this->agentRunnerConfig->getShopQueryPositiveTokenFilterAllowedTerms(); + + if ($this->agentRunnerConfig->shouldShopQueryPositiveTokenFilterIncludeCurrentInputPreservationTerms()) { + $terms = $this->mergeUniqueStrings( + $terms, + $this->agentRunnerConfig->getShopQueryCurrentInputPreservationTerms() + ); + } + + if ($this->agentRunnerConfig->shouldShopQueryPositiveTokenFilterIncludeSemanticShopSearchTokens()) { + $terms = $this->mergeUniqueStrings( + $terms, + $this->agentRunnerConfig->getShopQueryPositiveTokenFilterSemanticShopSearchTokens() + ); + } + + if ($this->agentRunnerConfig->shouldShopQueryPositiveTokenFilterIncludeProductRoleTerms()) { + $terms = $this->mergeUniqueStrings( + $terms, + $this->agentRunnerConfig->getShopQueryPositiveTokenFilterProductRoleTerms() + ); + } + + $tokens = []; + foreach ($terms as $term) { + foreach ($this->tokenizeShopQueryCandidate($term) as $token) { + $tokens[$token] = true; + } + } + + return $tokens; + } + + /** + * @return array + */ + private function buildPositiveShopQueryBlockedTokenSet(): array + { + $tokens = []; + + foreach ($this->agentRunnerConfig->getShopQueryPositiveTokenFilterBlockedTerms() as $term) { + foreach ($this->tokenizeShopQueryCandidate($term) as $token) { + $tokens[$token] = true; + } + } + + return $tokens; + } + + /** + * @param string[] $patterns + */ + private function matchesAnyConfiguredShopQueryCodePattern(string $token, array $patterns): bool + { + foreach ($patterns as $pattern) { + if (@preg_match($pattern, $token) === 1) { + return true; + } + } + + return false; + } + private function cleanupDirectProductAttributeShopQuery(string $prompt, string $shopSearchQuery): string { $shopSearchQuery = trim($shopSearchQuery); @@ -2673,40 +2799,20 @@ final readonly class AgentRunner return ''; } - $triggerTokens = $this->buildShopQueryTokenSet( - $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() - ); + $triggerTokens = []; + foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() as $term) { + foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) { + $triggerTokens[$termToken] = true; + } + } if ($triggerTokens === []) { return ''; } - $hasTrigger = false; - foreach ($tokens as $token) { - if (isset($triggerTokens[$token])) { - $hasTrigger = true; - break; - } - } - - if (!$hasTrigger) { - return ''; - } - - $queryTokens = $this->buildShopQueryTokenSet( - $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms() - ); - if ($queryTokens === []) { - $queryTokens = $triggerTokens; - } - - $noiseTokens = $this->buildShopQueryTokenSet( - $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms() - ); - $out = []; foreach ($tokens as $token) { - if (!isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) { + if (!isset($triggerTokens[$token]) || isset($out[$token])) { continue; } @@ -2716,23 +2822,6 @@ final readonly class AgentRunner return implode(' ', array_values($out)); } - /** - * @param string[] $terms - * @return array - */ - private function buildShopQueryTokenSet(array $terms): array - { - $tokens = []; - - foreach ($terms as $term) { - foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) { - $tokens[$termToken] = true; - } - } - - return $tokens; - } - private function enrichReferentialShopQueryFromHistory( string $query, string $sourcePrompt, @@ -2801,33 +2890,11 @@ final readonly class AgentRunner } private function extractLatestConfiguredShopQueryContextAnchor(string $commerceHistoryContext): string - { - foreach ($this->extractHistoryTurnsNewestFirst($commerceHistoryContext) as $turn) { - if (!$this->containsConfiguredShopQueryAnchorTrigger($turn)) { - continue; - } - - $modelAnchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($turn); - $turnAnchor = $this->extractLatestConfiguredShopQueryPatternAnchor($turn); - - if ($modelAnchor !== '') { - return $this->buildModelQualifiedShopQueryAnchor($modelAnchor, $turnAnchor); - } - - if ($turnAnchor !== '') { - return $turnAnchor; - } - } - - return $this->extractLatestConfiguredShopQueryPatternAnchor($commerceHistoryContext); - } - - private function extractLatestConfiguredShopQueryPatternAnchor(string $text): string { $latest = ''; foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns() as $pattern) { - if (@preg_match_all($pattern, $text, $matches, PREG_SET_ORDER) === false) { + if (@preg_match_all($pattern, $commerceHistoryContext, $matches, PREG_SET_ORDER) === false) { continue; } @@ -2842,51 +2909,6 @@ final readonly class AgentRunner return $latest; } - private function buildModelQualifiedShopQueryAnchor(string $modelAnchor, string $detailAnchor): string - { - $modelAnchor = trim($modelAnchor); - if ($modelAnchor === '') { - return trim($detailAnchor); - } - - $detailTokens = $this->extractShopQueryDetailAnchorTokens($detailAnchor, $modelAnchor); - if ($detailTokens === []) { - return $modelAnchor; - } - - return trim($modelAnchor . ' ' . implode(' ', $detailTokens)); - } - - /** - * @return string[] - */ - private function extractShopQueryDetailAnchorTokens(string $detailAnchor, string $modelAnchor): array - { - $tokens = $this->tokenizeShopQueryCandidate($detailAnchor); - if ($tokens === []) { - return []; - } - - $modelTokens = array_fill_keys($this->tokenizeShopQueryCandidate($modelAnchor), true); - $queryTokens = $this->buildShopQueryTokenSet( - $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms() - ); - $noiseTokens = $this->buildShopQueryTokenSet( - $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms() - ); - - $out = []; - foreach ($tokens as $token) { - if (isset($modelTokens[$token]) || isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) { - continue; - } - - $out[$token] = $token; - } - - return array_values($out); - } - private function normalizeShopQueryAnchor(string $anchor): string { $anchor = str_replace('®', '', $anchor); @@ -3354,48 +3376,6 @@ final readonly class AgentRunner return trim(implode(' ', $this->tokenizeShopQueryCandidate($query))); } - /** - * @param ShopProductResult[] $shopResults - * @return ShopProductResult[] - */ - private function guardShopResultsByReferencedProductAnchor(string $shopSearchQuery, array $shopResults): array - { - if ($shopResults === []) { - return $shopResults; - } - - $anchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($shopSearchQuery); - if ($anchor === '') { - return $shopResults; - } - - $filtered = []; - foreach ($shopResults as $product) { - if (!$product instanceof ShopProductResult) { - continue; - } - - if ($this->shopProductMatchesReferencedProductAnchor($product, $anchor)) { - $filtered[] = $product; - } - } - - return $filtered; - } - - private function shopProductMatchesReferencedProductAnchor(ShopProductResult $product, string $anchor): bool - { - $productText = trim(implode(' ', array_filter([ - $product->name, - $product->description, - implode(' ', $product->highlights), - $product->customFields, - $product->url, - ]))); - - return $this->containsAllShopQueryTokens($productText, $anchor); - } - /** * @param ShopProductResult[] $shopResults * @return ShopProductResult[] diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index 8bbfde8..f73b813 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -962,11 +962,6 @@ final class AgentRunnerConfig */ public function getNoLlmMainDeviceRequestRoleKeywords(): array { - $terms = $this->genreStringList('product_roles.no_llm_fallback_terms.main_device_request_keywords'); - if ($terms !== []) { - return $terms; - } - return $this->getConfiguredStringListOrVocabularyView( 'no_llm_fallback.product_roles.main_device_request_keywords', 'no_llm_fallback.product_roles.vocabulary_views.main_device_request_keywords' @@ -978,11 +973,6 @@ final class AgentRunnerConfig */ public function getNoLlmAccessoryProductRoleKeywords(): array { - $terms = $this->genreStringList('product_roles.no_llm_fallback_terms.accessory_product_keywords'); - if ($terms !== []) { - return $terms; - } - return $this->getConfiguredStringListOrVocabularyView( 'no_llm_fallback.product_roles.accessory_product_keywords', 'no_llm_fallback.product_roles.vocabulary_views.accessory_product_keywords' @@ -1196,6 +1186,90 @@ final class AgentRunnerConfig ?: $this->getRequiredStringList('shop_runtime.query_cleanup.stopword_cleanup.terms'); } + public function isShopQueryPositiveTokenFilterEnabled(): bool + { + return $this->genreBool('shop_query_runtime.positive_token_filter.enabled') + ?? $this->getOptionalBool('shop_runtime.query_cleanup.positive_token_filter.enabled', false); + } + + public function getShopQueryPositiveTokenFilterMinTokens(): int + { + return $this->genreInt('shop_query_runtime.positive_token_filter.min_query_tokens_after_filter') + ?? $this->getOptionalInt('shop_runtime.query_cleanup.positive_token_filter.min_query_tokens_after_filter', 2); + } + + public function shouldShopQueryPositiveTokenFilterIncludeCurrentInputPreservationTerms(): bool + { + return $this->genreBool('shop_query_runtime.positive_token_filter.include_current_input_preservation_terms') + ?? $this->getOptionalBool('shop_runtime.query_cleanup.positive_token_filter.include_current_input_preservation_terms', true); + } + + public function shouldShopQueryPositiveTokenFilterIncludeSemanticShopSearchTokens(): bool + { + return $this->genreBool('shop_query_runtime.positive_token_filter.include_semantic_shop_search_tokens') + ?? $this->getOptionalBool('shop_runtime.query_cleanup.positive_token_filter.include_semantic_shop_search_tokens', true); + } + + public function shouldShopQueryPositiveTokenFilterIncludeProductRoleTerms(): bool + { + return $this->genreBool('shop_query_runtime.positive_token_filter.include_product_role_terms') + ?? $this->getOptionalBool('shop_runtime.query_cleanup.positive_token_filter.include_product_role_terms', true); + } + + /** + * @return string[] + */ + public function getShopQueryPositiveTokenFilterAllowedTerms(): array + { + return $this->genreStringList('shop_query_runtime.positive_token_filter.allowed_terms') + ?: $this->getOptionalStringList('shop_runtime.query_cleanup.positive_token_filter.allowed_terms'); + } + + /** + * @return string[] + */ + public function getShopQueryPositiveTokenFilterBlockedTerms(): array + { + return $this->genreStringList('shop_query_runtime.positive_token_filter.blocked_terms') + ?: $this->getOptionalStringList('shop_runtime.query_cleanup.positive_token_filter.blocked_terms'); + } + + /** + * @return string[] + */ + public function getShopQueryPositiveTokenFilterCodePatterns(): array + { + return $this->genreStringList('shop_query_runtime.positive_token_filter.code_patterns') + ?: $this->getOptionalStringList('shop_runtime.query_cleanup.positive_token_filter.code_patterns'); + } + + /** + * @return string[] + */ + public function getShopQueryPositiveTokenFilterSemanticShopSearchTokens(): array + { + return $this->genreStringList('shop_query_runtime.semantic_shop_search_tokens.terms'); + } + + /** + * @return string[] + */ + public function getShopQueryPositiveTokenFilterProductRoleTerms(): array + { + return array_values(array_unique(array_merge( + $this->genreStringList('product_roles.primary_product_terms.terms'), + $this->genreStringList('product_roles.accessory_product_terms.terms'), + $this->genreStringList('product_roles.shop_views.device_query_terms'), + $this->genreStringList('product_roles.shop_views.accessory_query_terms'), + $this->genreStringList('product_roles.shop_views.device_product_terms'), + $this->genreStringList('product_roles.shop_views.accessory_product_terms'), + $this->genreStringList('product_roles.shop_views.device_focus_terms'), + $this->genreStringList('product_roles.shop_views.accessory_focus_terms'), + $this->genreStringList('product_roles.no_llm_fallback_terms.main_device_request_keywords'), + $this->genreStringList('product_roles.no_llm_fallback_terms.accessory_product_keywords') + ))); + } + public function isDirectShopResultGuardEnabled(): bool { return $this->getRequiredBool('shop_runtime.result_identity.enabled'); @@ -1434,24 +1508,6 @@ final class AgentRunnerConfig ); } - /** - * @return string[] - */ - public function getShopQueryContextAnchorEnrichmentQueryTerms(): array - { - return $this->genreStringList('context_resolution.history_anchor_enrichment.query_terms') - ?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_terms'); - } - - /** - * @return string[] - */ - public function getShopQueryContextAnchorEnrichmentQueryNoiseTerms(): array - { - return $this->genreStringList('context_resolution.history_anchor_enrichment.query_noise_terms') - ?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms'); - } - /** * @return string[] */ diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 864bf8d..1f69e3b 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -687,6 +687,16 @@ final readonly class RetriexEffectiveConfigProvider 'min_query_tokens_after_cleanup' => $this->agentRunnerConfig->getShopQueryStopwordCleanupMinTokens(), 'terms' => $this->agentRunnerConfig->getShopQueryStopwordCleanupTerms(), ], + 'positive_token_filter' => [ + 'enabled' => $this->agentRunnerConfig->isShopQueryPositiveTokenFilterEnabled(), + 'min_query_tokens_after_filter' => $this->agentRunnerConfig->getShopQueryPositiveTokenFilterMinTokens(), + 'include_current_input_preservation_terms' => $this->agentRunnerConfig->shouldShopQueryPositiveTokenFilterIncludeCurrentInputPreservationTerms(), + 'include_semantic_shop_search_tokens' => $this->agentRunnerConfig->shouldShopQueryPositiveTokenFilterIncludeSemanticShopSearchTokens(), + 'include_product_role_terms' => $this->agentRunnerConfig->shouldShopQueryPositiveTokenFilterIncludeProductRoleTerms(), + 'allowed_terms' => $this->agentRunnerConfig->getShopQueryPositiveTokenFilterAllowedTerms(), + 'blocked_terms' => $this->agentRunnerConfig->getShopQueryPositiveTokenFilterBlockedTerms(), + 'code_patterns' => $this->agentRunnerConfig->getShopQueryPositiveTokenFilterCodePatterns(), + ], ], 'attribute_cleanup' => [ 'enabled' => $this->agentRunnerConfig->isShopQueryProductAttributeCleanupEnabled(), @@ -703,8 +713,6 @@ final readonly class RetriexEffectiveConfigProvider 'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(), 'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(), 'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(), - 'query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms(), - 'query_noise_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms(), 'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(), 'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(), ], @@ -1314,6 +1322,33 @@ final readonly class RetriexEffectiveConfigProvider } } + $shopQueryRuntime = is_array($configurationValues['shop_query_runtime'] ?? null) + ? $configurationValues['shop_query_runtime'] + : []; + $positiveTokenFilter = is_array($shopQueryRuntime['positive_token_filter'] ?? null) + ? $shopQueryRuntime['positive_token_filter'] + : []; + if ($positiveTokenFilter !== []) { + foreach ([ + 'enabled', + 'include_current_input_preservation_terms', + 'include_semantic_shop_search_tokens', + 'include_product_role_terms', + ] as $boolKey) { + if (array_key_exists($boolKey, $positiveTokenFilter) && !is_bool($positiveTokenFilter[$boolKey])) { + $errors[] = sprintf('genre.configuration_values.shop_query_runtime.positive_token_filter.%s must be boolean.', $boolKey); + } + } + + if (array_key_exists('min_query_tokens_after_filter', $positiveTokenFilter) && !is_numeric($positiveTokenFilter['min_query_tokens_after_filter'])) { + $errors[] = 'genre.configuration_values.shop_query_runtime.positive_token_filter.min_query_tokens_after_filter must be numeric.'; + } + + $this->validateStringList($this->toList($positiveTokenFilter['allowed_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.positive_token_filter.allowed_terms', $errors, $warnings); + $this->validateStringList($this->toList($positiveTokenFilter['blocked_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.positive_token_filter.blocked_terms', $errors, $warnings); + $this->validateRegexPatternList($positiveTokenFilter['code_patterns'] ?? [], 'genre.configuration_values.shop_query_runtime.positive_token_filter.code_patterns', $errors); + } + foreach ($this->collectGenreConfigurationValueSourcePaths($configurationValues) as $valuePath => $sourcePaths) { foreach ($sourcePaths as $sourcePath) { if (!isset($flattened[$sourcePath])) { @@ -1836,8 +1871,6 @@ final readonly class RetriexEffectiveConfigProvider $anchorEnrichment = $contextResolution['history_anchor_enrichment'] ?? []; if (is_array($anchorEnrichment)) { $this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms', $errors, $warnings); - $this->validateStringList($this->toList($anchorEnrichment['query_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_terms', $errors, $warnings); - $this->validateStringList($this->toList($anchorEnrichment['query_noise_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms', $errors, $warnings); $this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns', $errors); if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') { $errors[] = 'agent.shop_runtime.context_resolution.history_anchor_enrichment.template must not be empty.';