This commit is contained in:
team 1
2026-05-03 20:51:47 +02:00
parent 3d0b6b1cf8
commit 427dfe9987
5 changed files with 88 additions and 2 deletions

View File

@@ -0,0 +1,26 @@
# RetrieX Patch 23 - Commerce Cleanup Profile Wiring
## Ziel
Commerce nutzt erstmals das zentrale Language-Cleanup-Profil `commerce_query`.
## Änderungen
- `config/retriex/commerce.yaml` erhält `cleanup_profile: commerce_query`.
- `CommerceQueryParserConfig` liest den Profilnamen aus YAML.
- `CommerceQueryParser` kombiniert Profil-Phrasen/Stopwords mit den bestehenden Legacy-Listen.
- `phrases_to_remove` und `filter_search_tokens` bleiben bewusst erhalten.
- `RetriexEffectiveConfigProvider` validiert, dass das referenzierte Profil existiert.
## Wichtig
Dieser Patch entfernt noch keine alten Listen. Er verdrahtet nur die neue zentrale Struktur mit Commerce.
## Pflichtchecks
```bash
bin/console mto:agent:config:validate
bin/console mto:agent:regression:test
bin/console mto:agent:config:audit-source --details
bin/console mto:agent:config:audit-patterns --details
```

View File

@@ -14,6 +14,8 @@ parameters:
# Commerce query parser configuration. # Commerce query parser configuration.
# YAML is the only operative source of truth; PHP must not contain parser defaults. # YAML is the only operative source of truth; PHP must not contain parser defaults.
retriex.commerce_query.config: retriex.commerce_query.config:
cleanup_profile: commerce_query
known_brands: known_brands:
- heyl - heyl
- horiba - horiba

View File

@@ -7,6 +7,7 @@ namespace App\Commerce;
use App\Commerce\Dto\CommerceSearchQuery; use App\Commerce\Dto\CommerceSearchQuery;
use App\Config\CommerceIntentConfig; use App\Config\CommerceIntentConfig;
use App\Config\CommerceQueryParserConfig; use App\Config\CommerceQueryParserConfig;
use App\Config\LanguageCleanupConfig;
use App\Knowledge\Retrieval\QueryCleaner; use App\Knowledge\Retrieval\QueryCleaner;
use App\Knowledge\Text\TextNormalizer; use App\Knowledge\Text\TextNormalizer;
@@ -17,6 +18,7 @@ final readonly class CommerceQueryParser
private QueryCleaner $queryCleaner, private QueryCleaner $queryCleaner,
private CommerceQueryParserConfig $config, private CommerceQueryParserConfig $config,
private CommerceIntentConfig $intentConfig, private CommerceIntentConfig $intentConfig,
private LanguageCleanupConfig $languageCleanupConfig,
) { ) {
} }
@@ -177,7 +179,7 @@ final readonly class CommerceQueryParser
$text = $this->wrapForPhraseReplacement($prompt); $text = $this->wrapForPhraseReplacement($prompt);
foreach ($this->config->getPhrasesToRemove() as $phrase) { foreach ($this->getCommercePhrasesToRemove() as $phrase) {
$normalizedPhrase = $this->normalize((string) $phrase); $normalizedPhrase = $this->normalize((string) $phrase);
if ($normalizedPhrase === '') { if ($normalizedPhrase === '') {
@@ -500,6 +502,47 @@ final readonly class CommerceQueryParser
return preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text; return preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
} }
/** @return string[] */
private function getCommercePhrasesToRemove(): array
{
return $this->mergeUniqueTokens(
$this->languageCleanupConfig->getPhrasesForProfile($this->config->getCleanupProfile()),
$this->config->getPhrasesToRemove()
);
}
/** @return string[] */
private function getCommerceFilterSearchTokens(): array
{
return $this->mergeUniqueTokens(
$this->languageCleanupConfig->getStopWordsForProfile($this->config->getCleanupProfile()),
$this->config->getFilterSearchTokens()
);
}
/**
* @param string[] $left
* @param string[] $right
* @return string[]
*/
private function mergeUniqueTokens(array $left, array $right): array
{
$out = [];
foreach ([$left, $right] as $list) {
foreach ($list as $token) {
$token = trim(mb_strtolower((string) $token, 'UTF-8'));
if ($token === '' || in_array($token, $out, true)) {
continue;
}
$out[] = $token;
}
}
return $out;
}
private function isSearchControlToken(string $token): bool private function isSearchControlToken(string $token): bool
{ {
$token = trim(mb_strtolower($token)); $token = trim(mb_strtolower($token));
@@ -508,7 +551,7 @@ final readonly class CommerceQueryParser
return true; return true;
} }
if (in_array($token, $this->config->getFilterSearchTokens(), true)) { if (in_array($token, $this->getCommerceFilterSearchTokens(), true)) {
return true; return true;
} }

View File

@@ -16,6 +16,12 @@ final class CommerceQueryParserConfig
) { ) {
} }
public function getCleanupProfile(): string
{
return $this->string('cleanup_profile');
}
/** @return string[] */ /** @return string[] */
public function getKnownBrands(): array public function getKnownBrands(): array
{ {

View File

@@ -25,6 +25,7 @@ final readonly class RetriexEffectiveConfigProvider
private SalesIntentConfig $salesIntentConfig, private SalesIntentConfig $salesIntentConfig,
private ShopServiceConfig $shopServiceConfig, private ShopServiceConfig $shopServiceConfig,
private StopWordsConfig $stopWordsConfig, private StopWordsConfig $stopWordsConfig,
private LanguageCleanupConfig $languageCleanupConfig,
private QueryEnricherConfig $queryEnricherConfig, private QueryEnricherConfig $queryEnricherConfig,
private GovernanceConfig $governanceConfig, private GovernanceConfig $governanceConfig,
private CatalogIntentConfig $catalogIntentConfig, private CatalogIntentConfig $catalogIntentConfig,
@@ -607,6 +608,7 @@ final readonly class RetriexEffectiveConfigProvider
private function commerceQueryConfig(): array private function commerceQueryConfig(): array
{ {
return [ return [
'cleanup_profile' => $this->commerceQueryParserConfig->getCleanupProfile(),
'known_brands' => $this->commerceQueryParserConfig->getKnownBrands(), 'known_brands' => $this->commerceQueryParserConfig->getKnownBrands(),
'phrases_to_remove' => $this->commerceQueryParserConfig->getPhrasesToRemove(), 'phrases_to_remove' => $this->commerceQueryParserConfig->getPhrasesToRemove(),
'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokens(), 'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokens(),
@@ -1212,6 +1214,13 @@ final readonly class RetriexEffectiveConfigProvider
$this->validateCommerceQueryPatterns($patterns, $errors, $warnings); $this->validateCommerceQueryPatterns($patterns, $errors, $warnings);
} }
$cleanupProfile = $commerceQuery['cleanup_profile'] ?? null;
if (!is_string($cleanupProfile) || trim($cleanupProfile) === '') {
$errors[] = 'commerce_query.cleanup_profile must be a non-empty string.';
} elseif (!in_array($cleanupProfile, $this->languageCleanupConfig->getCleanupProfileNames(), true)) {
$errors[] = 'commerce_query.cleanup_profile references unknown language cleanup profile: ' . $cleanupProfile . '.';
}
$measurementPattern = $patterns['measurement_value_token'] ?? null; $measurementPattern = $patterns['measurement_value_token'] ?? null;
$filterTokens = $commerceQuery['filter_search_tokens'] ?? []; $filterTokens = $commerceQuery['filter_search_tokens'] ?? [];
foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) { foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) {