p23
This commit is contained in:
26
RETRIEX_PATCH_23_COMMERCE_CLEANUP_PROFILE_WIRING_README.md
Normal file
26
RETRIEX_PATCH_23_COMMERCE_CLEANUP_PROFILE_WIRING_README.md
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# RetrieX Patch 23 - Commerce Cleanup Profile Wiring
|
||||||
|
|
||||||
|
## Ziel
|
||||||
|
|
||||||
|
Commerce nutzt erstmals das zentrale Language-Cleanup-Profil `commerce_query`.
|
||||||
|
|
||||||
|
## Änderungen
|
||||||
|
|
||||||
|
- `config/retriex/commerce.yaml` erhält `cleanup_profile: commerce_query`.
|
||||||
|
- `CommerceQueryParserConfig` liest den Profilnamen aus YAML.
|
||||||
|
- `CommerceQueryParser` kombiniert Profil-Phrasen/Stopwords mit den bestehenden Legacy-Listen.
|
||||||
|
- `phrases_to_remove` und `filter_search_tokens` bleiben bewusst erhalten.
|
||||||
|
- `RetriexEffectiveConfigProvider` validiert, dass das referenzierte Profil existiert.
|
||||||
|
|
||||||
|
## Wichtig
|
||||||
|
|
||||||
|
Dieser Patch entfernt noch keine alten Listen. Er verdrahtet nur die neue zentrale Struktur mit Commerce.
|
||||||
|
|
||||||
|
## Pflichtchecks
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bin/console mto:agent:config:validate
|
||||||
|
bin/console mto:agent:regression:test
|
||||||
|
bin/console mto:agent:config:audit-source --details
|
||||||
|
bin/console mto:agent:config:audit-patterns --details
|
||||||
|
```
|
||||||
@@ -14,6 +14,8 @@ parameters:
|
|||||||
# Commerce query parser configuration.
|
# Commerce query parser configuration.
|
||||||
# YAML is the only operative source of truth; PHP must not contain parser defaults.
|
# YAML is the only operative source of truth; PHP must not contain parser defaults.
|
||||||
retriex.commerce_query.config:
|
retriex.commerce_query.config:
|
||||||
|
cleanup_profile: commerce_query
|
||||||
|
|
||||||
known_brands:
|
known_brands:
|
||||||
- heyl
|
- heyl
|
||||||
- horiba
|
- horiba
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ namespace App\Commerce;
|
|||||||
use App\Commerce\Dto\CommerceSearchQuery;
|
use App\Commerce\Dto\CommerceSearchQuery;
|
||||||
use App\Config\CommerceIntentConfig;
|
use App\Config\CommerceIntentConfig;
|
||||||
use App\Config\CommerceQueryParserConfig;
|
use App\Config\CommerceQueryParserConfig;
|
||||||
|
use App\Config\LanguageCleanupConfig;
|
||||||
use App\Knowledge\Retrieval\QueryCleaner;
|
use App\Knowledge\Retrieval\QueryCleaner;
|
||||||
use App\Knowledge\Text\TextNormalizer;
|
use App\Knowledge\Text\TextNormalizer;
|
||||||
|
|
||||||
@@ -17,6 +18,7 @@ final readonly class CommerceQueryParser
|
|||||||
private QueryCleaner $queryCleaner,
|
private QueryCleaner $queryCleaner,
|
||||||
private CommerceQueryParserConfig $config,
|
private CommerceQueryParserConfig $config,
|
||||||
private CommerceIntentConfig $intentConfig,
|
private CommerceIntentConfig $intentConfig,
|
||||||
|
private LanguageCleanupConfig $languageCleanupConfig,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -177,7 +179,7 @@ final readonly class CommerceQueryParser
|
|||||||
|
|
||||||
$text = $this->wrapForPhraseReplacement($prompt);
|
$text = $this->wrapForPhraseReplacement($prompt);
|
||||||
|
|
||||||
foreach ($this->config->getPhrasesToRemove() as $phrase) {
|
foreach ($this->getCommercePhrasesToRemove() as $phrase) {
|
||||||
$normalizedPhrase = $this->normalize((string) $phrase);
|
$normalizedPhrase = $this->normalize((string) $phrase);
|
||||||
|
|
||||||
if ($normalizedPhrase === '') {
|
if ($normalizedPhrase === '') {
|
||||||
@@ -500,6 +502,47 @@ final readonly class CommerceQueryParser
|
|||||||
return preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
|
return preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return string[] */
|
||||||
|
private function getCommercePhrasesToRemove(): array
|
||||||
|
{
|
||||||
|
return $this->mergeUniqueTokens(
|
||||||
|
$this->languageCleanupConfig->getPhrasesForProfile($this->config->getCleanupProfile()),
|
||||||
|
$this->config->getPhrasesToRemove()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return string[] */
|
||||||
|
private function getCommerceFilterSearchTokens(): array
|
||||||
|
{
|
||||||
|
return $this->mergeUniqueTokens(
|
||||||
|
$this->languageCleanupConfig->getStopWordsForProfile($this->config->getCleanupProfile()),
|
||||||
|
$this->config->getFilterSearchTokens()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $left
|
||||||
|
* @param string[] $right
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
private function mergeUniqueTokens(array $left, array $right): array
|
||||||
|
{
|
||||||
|
$out = [];
|
||||||
|
|
||||||
|
foreach ([$left, $right] as $list) {
|
||||||
|
foreach ($list as $token) {
|
||||||
|
$token = trim(mb_strtolower((string) $token, 'UTF-8'));
|
||||||
|
if ($token === '' || in_array($token, $out, true)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$out[] = $token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
private function isSearchControlToken(string $token): bool
|
private function isSearchControlToken(string $token): bool
|
||||||
{
|
{
|
||||||
$token = trim(mb_strtolower($token));
|
$token = trim(mb_strtolower($token));
|
||||||
@@ -508,7 +551,7 @@ final readonly class CommerceQueryParser
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (in_array($token, $this->config->getFilterSearchTokens(), true)) {
|
if (in_array($token, $this->getCommerceFilterSearchTokens(), true)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,12 @@ final class CommerceQueryParserConfig
|
|||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public function getCleanupProfile(): string
|
||||||
|
{
|
||||||
|
return $this->string('cleanup_profile');
|
||||||
|
}
|
||||||
|
|
||||||
/** @return string[] */
|
/** @return string[] */
|
||||||
public function getKnownBrands(): array
|
public function getKnownBrands(): array
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
private SalesIntentConfig $salesIntentConfig,
|
private SalesIntentConfig $salesIntentConfig,
|
||||||
private ShopServiceConfig $shopServiceConfig,
|
private ShopServiceConfig $shopServiceConfig,
|
||||||
private StopWordsConfig $stopWordsConfig,
|
private StopWordsConfig $stopWordsConfig,
|
||||||
|
private LanguageCleanupConfig $languageCleanupConfig,
|
||||||
private QueryEnricherConfig $queryEnricherConfig,
|
private QueryEnricherConfig $queryEnricherConfig,
|
||||||
private GovernanceConfig $governanceConfig,
|
private GovernanceConfig $governanceConfig,
|
||||||
private CatalogIntentConfig $catalogIntentConfig,
|
private CatalogIntentConfig $catalogIntentConfig,
|
||||||
@@ -607,6 +608,7 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
private function commerceQueryConfig(): array
|
private function commerceQueryConfig(): array
|
||||||
{
|
{
|
||||||
return [
|
return [
|
||||||
|
'cleanup_profile' => $this->commerceQueryParserConfig->getCleanupProfile(),
|
||||||
'known_brands' => $this->commerceQueryParserConfig->getKnownBrands(),
|
'known_brands' => $this->commerceQueryParserConfig->getKnownBrands(),
|
||||||
'phrases_to_remove' => $this->commerceQueryParserConfig->getPhrasesToRemove(),
|
'phrases_to_remove' => $this->commerceQueryParserConfig->getPhrasesToRemove(),
|
||||||
'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokens(),
|
'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokens(),
|
||||||
@@ -1212,6 +1214,13 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
$this->validateCommerceQueryPatterns($patterns, $errors, $warnings);
|
$this->validateCommerceQueryPatterns($patterns, $errors, $warnings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$cleanupProfile = $commerceQuery['cleanup_profile'] ?? null;
|
||||||
|
if (!is_string($cleanupProfile) || trim($cleanupProfile) === '') {
|
||||||
|
$errors[] = 'commerce_query.cleanup_profile must be a non-empty string.';
|
||||||
|
} elseif (!in_array($cleanupProfile, $this->languageCleanupConfig->getCleanupProfileNames(), true)) {
|
||||||
|
$errors[] = 'commerce_query.cleanup_profile references unknown language cleanup profile: ' . $cleanupProfile . '.';
|
||||||
|
}
|
||||||
|
|
||||||
$measurementPattern = $patterns['measurement_value_token'] ?? null;
|
$measurementPattern = $patterns['measurement_value_token'] ?? null;
|
||||||
$filterTokens = $commerceQuery['filter_search_tokens'] ?? [];
|
$filterTokens = $commerceQuery['filter_search_tokens'] ?? [];
|
||||||
foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) {
|
foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) {
|
||||||
|
|||||||
Reference in New Issue
Block a user