From 427dfe9987a6b66e27a4ffc736b1ffac7d6f1717 Mon Sep 17 00:00:00 2001 From: team 1 Date: Sun, 3 May 2026 20:51:47 +0200 Subject: [PATCH] p23 --- ..._COMMERCE_CLEANUP_PROFILE_WIRING_README.md | 26 ++++++++++ config/retriex/commerce.yaml | 2 + src/Commerce/CommerceQueryParser.php | 47 ++++++++++++++++++- src/Config/CommerceQueryParserConfig.php | 6 +++ src/Config/RetriexEffectiveConfigProvider.php | 9 ++++ 5 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 RETRIEX_PATCH_23_COMMERCE_CLEANUP_PROFILE_WIRING_README.md diff --git a/RETRIEX_PATCH_23_COMMERCE_CLEANUP_PROFILE_WIRING_README.md b/RETRIEX_PATCH_23_COMMERCE_CLEANUP_PROFILE_WIRING_README.md new file mode 100644 index 0000000..fcb5bfb --- /dev/null +++ b/RETRIEX_PATCH_23_COMMERCE_CLEANUP_PROFILE_WIRING_README.md @@ -0,0 +1,26 @@ +# RetrieX Patch 23 - Commerce Cleanup Profile Wiring + +## Ziel + +Commerce nutzt erstmals das zentrale Language-Cleanup-Profil `commerce_query`. + +## Änderungen + +- `config/retriex/commerce.yaml` erhält `cleanup_profile: commerce_query`. +- `CommerceQueryParserConfig` liest den Profilnamen aus YAML. +- `CommerceQueryParser` kombiniert Profil-Phrasen/Stopwords mit den bestehenden Legacy-Listen. +- `phrases_to_remove` und `filter_search_tokens` bleiben bewusst erhalten. +- `RetriexEffectiveConfigProvider` validiert, dass das referenzierte Profil existiert. + +## Wichtig + +Dieser Patch entfernt noch keine alten Listen. Er verdrahtet nur die neue zentrale Struktur mit Commerce. + +## Pflichtchecks + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` diff --git a/config/retriex/commerce.yaml b/config/retriex/commerce.yaml index 9357c97..f430498 100644 --- a/config/retriex/commerce.yaml +++ b/config/retriex/commerce.yaml @@ -14,6 +14,8 @@ parameters: # Commerce query parser configuration. # YAML is the only operative source of truth; PHP must not contain parser defaults. retriex.commerce_query.config: + cleanup_profile: commerce_query + known_brands: - heyl - horiba diff --git a/src/Commerce/CommerceQueryParser.php b/src/Commerce/CommerceQueryParser.php index 42da1a8..b25508f 100644 --- a/src/Commerce/CommerceQueryParser.php +++ b/src/Commerce/CommerceQueryParser.php @@ -7,6 +7,7 @@ namespace App\Commerce; use App\Commerce\Dto\CommerceSearchQuery; use App\Config\CommerceIntentConfig; use App\Config\CommerceQueryParserConfig; +use App\Config\LanguageCleanupConfig; use App\Knowledge\Retrieval\QueryCleaner; use App\Knowledge\Text\TextNormalizer; @@ -17,6 +18,7 @@ final readonly class CommerceQueryParser private QueryCleaner $queryCleaner, private CommerceQueryParserConfig $config, private CommerceIntentConfig $intentConfig, + private LanguageCleanupConfig $languageCleanupConfig, ) { } @@ -177,7 +179,7 @@ final readonly class CommerceQueryParser $text = $this->wrapForPhraseReplacement($prompt); - foreach ($this->config->getPhrasesToRemove() as $phrase) { + foreach ($this->getCommercePhrasesToRemove() as $phrase) { $normalizedPhrase = $this->normalize((string) $phrase); if ($normalizedPhrase === '') { @@ -500,6 +502,47 @@ final readonly class CommerceQueryParser return preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text; } + /** @return string[] */ + private function getCommercePhrasesToRemove(): array + { + return $this->mergeUniqueTokens( + $this->languageCleanupConfig->getPhrasesForProfile($this->config->getCleanupProfile()), + $this->config->getPhrasesToRemove() + ); + } + + /** @return string[] */ + private function getCommerceFilterSearchTokens(): array + { + return $this->mergeUniqueTokens( + $this->languageCleanupConfig->getStopWordsForProfile($this->config->getCleanupProfile()), + $this->config->getFilterSearchTokens() + ); + } + + /** + * @param string[] $left + * @param string[] $right + * @return string[] + */ + private function mergeUniqueTokens(array $left, array $right): array + { + $out = []; + + foreach ([$left, $right] as $list) { + foreach ($list as $token) { + $token = trim(mb_strtolower((string) $token, 'UTF-8')); + if ($token === '' || in_array($token, $out, true)) { + continue; + } + + $out[] = $token; + } + } + + return $out; + } + private function isSearchControlToken(string $token): bool { $token = trim(mb_strtolower($token)); @@ -508,7 +551,7 @@ final readonly class CommerceQueryParser return true; } - if (in_array($token, $this->config->getFilterSearchTokens(), true)) { + if (in_array($token, $this->getCommerceFilterSearchTokens(), true)) { return true; } diff --git a/src/Config/CommerceQueryParserConfig.php b/src/Config/CommerceQueryParserConfig.php index e3d03dc..731de59 100644 --- a/src/Config/CommerceQueryParserConfig.php +++ b/src/Config/CommerceQueryParserConfig.php @@ -16,6 +16,12 @@ final class CommerceQueryParserConfig ) { } + + public function getCleanupProfile(): string + { + return $this->string('cleanup_profile'); + } + /** @return string[] */ public function getKnownBrands(): array { diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index c2b2fdf..5adc1dc 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -25,6 +25,7 @@ final readonly class RetriexEffectiveConfigProvider private SalesIntentConfig $salesIntentConfig, private ShopServiceConfig $shopServiceConfig, private StopWordsConfig $stopWordsConfig, + private LanguageCleanupConfig $languageCleanupConfig, private QueryEnricherConfig $queryEnricherConfig, private GovernanceConfig $governanceConfig, private CatalogIntentConfig $catalogIntentConfig, @@ -607,6 +608,7 @@ final readonly class RetriexEffectiveConfigProvider private function commerceQueryConfig(): array { return [ + 'cleanup_profile' => $this->commerceQueryParserConfig->getCleanupProfile(), 'known_brands' => $this->commerceQueryParserConfig->getKnownBrands(), 'phrases_to_remove' => $this->commerceQueryParserConfig->getPhrasesToRemove(), 'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokens(), @@ -1212,6 +1214,13 @@ final readonly class RetriexEffectiveConfigProvider $this->validateCommerceQueryPatterns($patterns, $errors, $warnings); } + $cleanupProfile = $commerceQuery['cleanup_profile'] ?? null; + if (!is_string($cleanupProfile) || trim($cleanupProfile) === '') { + $errors[] = 'commerce_query.cleanup_profile must be a non-empty string.'; + } elseif (!in_array($cleanupProfile, $this->languageCleanupConfig->getCleanupProfileNames(), true)) { + $errors[] = 'commerce_query.cleanup_profile references unknown language cleanup profile: ' . $cleanupProfile . '.'; + } + $measurementPattern = $patterns['measurement_value_token'] ?? null; $filterTokens = $commerceQuery['filter_search_tokens'] ?? []; foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) {