This commit is contained in:
team 1
2026-04-30 12:29:02 +02:00
parent 45561abee0
commit 01891698f7
3 changed files with 293 additions and 166 deletions

View File

@@ -0,0 +1,32 @@
# RetrieX Patch 6 - ShopServiceConfig / Shop Matching YAML-only
Scope:
- Converts `ShopServiceConfig` to YAML-only access.
- Removes PHP keyword/scoring/presentation defaults from `ShopServiceConfig`.
- Adds required YAML links for shop vocabulary views/maps in `config/retriex/commerce.yaml`.
- Keeps existing shop matching behavior values identical to the previous effective defaults.
Changed files:
- `src/Config/ShopServiceConfig.php`
- `config/retriex/commerce.yaml`
Not changed:
- PromptBuilder
- Retrieval configuration
- CommerceQueryParser
- ShopSearchService logic
- SSE/frontend behavior
After applying, run:
```bash
php bin/console cache:clear
php bin/console mto:agent:config:validate
php bin/console mto:agent:config:audit-source --details
php bin/console mto:agent:regression:test
```
Manual regression focus:
- `was kostet der indikator`
- `welcher pockettester ist für Redox messung gut` -> `suche im shop`
- `mit welchem testomat kann ich freies chlor messen`
- Testomat 808 / Indikatortyp 300 baseline

View File

@@ -221,12 +221,26 @@ parameters:
brand_part_of_model_template: '/\b{brand}\s+\d{2,5}[a-z0-9\-]*\b/u'
# Shop matching and presentation configuration.
# Defaults are intentionally identical to the previous PHP values.
# YAML is the only operative source of truth; PHP must not contain shop matching defaults.
retriex.shop_matching.config:
top_product_log_limit: 3
# Vocabulary-backed lists live in config/retriex/vocabulary.yaml.
# The old per-key entries may still be added here to override a specific view.
# Shop role and focus lists are resolved from config/retriex/vocabulary.yaml.
# Direct list overrides may still be added to this parameter if a project needs them.
vocabulary_views:
device_focus_keywords: shop.device_focus
accessory_focus_keywords: shop.accessory_focus
device_query_keywords: shop.device_query
accessory_query_keywords: shop.accessory_query
accessory_product_keywords: shop.accessory_product
device_product_keywords: shop.device_product
vocabulary_maps:
accessory_focus_variant_map: shop.accessory_focus_variants
role_guard:
filter_accessory_products_for_device_queries: true
keep_ambiguous_products_for_device_queries: true
scores:
exact_product_number_phrase: 160

View File

@@ -6,356 +6,362 @@ namespace App\Config;
final class ShopServiceConfig
{
public const DEVICE_QUERY_KEYWORDS = [
'analysegerät', 'analysegeraet', 'analysegeräte', 'analysegeraete',
'messgerät', 'messgeraet', 'messgeräte', 'messgeraete',
'analysator', 'analysatoren', 'analyzer', 'gerät', 'geraet', 'geräte',
'geraete', 'monitor', 'monitore', 'controller', 'gerät für',
'geraet fuer', 'geräte für', 'geraete fuer', 'system', 'systeme',
'anlage', 'anlagen',
];
public const ACCESSORY_QUERY_KEYWORDS = [
'zubehör', 'zubehor', 'reagenz', 'reagenzien', 'reagent', 'indikator',
'indikatoren', 'indicator', 'kit', 'set', 'ersatz', 'ersatzteil',
'ersatzteile', 'verbrauchsmaterial', 'consumable', 'dazu', 'passend',
'passende', 'passendes', 'nachfüll', 'nachfuell', 'refill', 'filter',
'pumpenkopf', 'motorblock', 'service set', 'serviceset', 'service-set',
];
public const ACCESSORY_PRODUCT_KEYWORDS = [
'reagenz', 'reagenzien', 'reagent', 'indikator', 'indikatoren',
'indicator', 'kit', 'set', 'verbrauchsmaterial', 'consumable',
'zubehör', 'zubehor', 'ersatz', 'ersatzteil', 'ersatzteile',
'nachfüll', 'nachfuell', 'refill', 'lösung', 'loesung', 'solution',
'teststreifen', 'test strip', 'filter', 'pumpenkopf', 'motorblock',
'service set', 'serviceset', 'service-set',
];
public const DEVICE_PRODUCT_KEYWORDS = [
'analysegerät', 'analysegeraet', 'analysegeräte', 'analysegeraete',
'messgerät', 'messgeraet', 'messgeräte', 'messgeraete',
'analysator', 'analysatoren', 'analyzer', 'monitor', 'monitore',
'controller', 'online-analysator', 'online analysator',
'online-analysegerät', 'online analysegeraet', 'online-analysegeräte',
'online analysegeraete', 'online analyzer', 'online monitor', 'system',
'systeme', 'anlage', 'anlagen', 'gerät', 'geraet', 'geräte', 'geraete',
];
private const DEVICE_FOCUS_KEYWORDS = [
'geräte', 'geraete', 'gerät', 'geraet', 'analysegerät', 'analysegeraet',
'messgerät', 'messgeraet', 'analysator', 'controller', 'monitor',
];
private const ACCESSORY_FOCUS_KEYWORDS = [
'indikator', 'indikatoren', 'reagenz', 'reagenzien', 'zubehör',
'zubehor', 'ersatzteil', 'ersatzteile', 'verbrauchsmaterial',
'service set', 'serviceset', 'filter', 'pumpenkopf', 'motorblock',
];
private const ACCESSORY_FOCUS_VARIANT_MAP = [
'indikator' => ['indikator', 'indikatoren'],
'indikatoren' => ['indikator', 'indikatoren'],
'reagenz' => ['reagenz', 'reagenzien'],
'reagenzien' => ['reagenz', 'reagenzien'],
'ersatzteil' => ['ersatzteil', 'ersatzteile'],
'ersatzteile' => ['ersatzteil', 'ersatzteile'],
'service set' => ['service set', 'serviceset', 'service-set'],
'serviceset' => ['service set', 'serviceset', 'service-set'],
'service-set' => ['service set', 'serviceset', 'service-set'],
];
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly DomainVocabularyConfig $vocabulary,
private array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
public function getTopProductLogLimit(): int
{
return $this->int('top_product_log_limit', 3, 0);
return $this->requiredInt('top_product_log_limit', 0);
}
/** @return string[] */
public function getDeviceFocusKeywords(): array
{
return $this->stringList('device_focus_keywords', $this->vocabularyView('shop.device_focus', self::DEVICE_FOCUS_KEYWORDS));
return $this->configuredStringListOrVocabularyView(
'device_focus_keywords',
'vocabulary_views.device_focus_keywords'
);
}
/** @return string[] */
public function getAccessoryFocusKeywords(): array
{
return $this->stringList('accessory_focus_keywords', $this->vocabularyView('shop.accessory_focus', self::ACCESSORY_FOCUS_KEYWORDS));
return $this->configuredStringListOrVocabularyView(
'accessory_focus_keywords',
'vocabulary_views.accessory_focus_keywords'
);
}
/** @return array<string, string[]> */
public function getAccessoryFocusVariantMap(): array
{
return $this->stringListMap('accessory_focus_variant_map', $this->vocabularyMap('shop.accessory_focus_variants', self::ACCESSORY_FOCUS_VARIANT_MAP));
return $this->configuredStringListMapOrVocabularyMap(
'accessory_focus_variant_map',
'vocabulary_maps.accessory_focus_variant_map'
);
}
/** @return string[] */
public function getDeviceQueryKeywords(): array
{
return $this->stringList('device_query_keywords', $this->vocabularyView('shop.device_query', self::DEVICE_QUERY_KEYWORDS));
return $this->configuredStringListOrVocabularyView(
'device_query_keywords',
'vocabulary_views.device_query_keywords'
);
}
/** @return string[] */
public function getAccessoryQueryKeywords(): array
{
return $this->stringList('accessory_query_keywords', $this->vocabularyView('shop.accessory_query', self::ACCESSORY_QUERY_KEYWORDS));
return $this->configuredStringListOrVocabularyView(
'accessory_query_keywords',
'vocabulary_views.accessory_query_keywords'
);
}
/** @return string[] */
public function getAccessoryProductKeywords(): array
{
return $this->stringList('accessory_product_keywords', $this->vocabularyView('shop.accessory_product', self::ACCESSORY_PRODUCT_KEYWORDS));
return $this->configuredStringListOrVocabularyView(
'accessory_product_keywords',
'vocabulary_views.accessory_product_keywords'
);
}
/** @return string[] */
public function getDeviceProductKeywords(): array
{
return $this->stringList('device_product_keywords', $this->vocabularyView('shop.device_product', self::DEVICE_PRODUCT_KEYWORDS));
return $this->configuredStringListOrVocabularyView(
'device_product_keywords',
'vocabulary_views.device_product_keywords'
);
}
public function getExactProductNumberPhraseScore(): int
{
return $this->int('scores.exact_product_number_phrase', 160);
return $this->requiredInt('scores.exact_product_number_phrase');
}
public function getExactProductNamePhraseScore(): int
{
return $this->int('scores.exact_product_name_phrase', 90);
return $this->requiredInt('scores.exact_product_name_phrase');
}
public function getExactManufacturerMatchScore(): int
{
return $this->int('scores.exact_manufacturer_match', 40);
return $this->requiredInt('scores.exact_manufacturer_match');
}
public function getBrandContainedInNameScore(): int
{
return $this->int('scores.brand_contained_in_name', 20);
return $this->requiredInt('scores.brand_contained_in_name');
}
public function getNameTokenOverlapWeight(): int
{
return $this->int('scores.name_token_overlap_weight', 6);
return $this->requiredInt('scores.name_token_overlap_weight');
}
public function getProductNumberTokenOverlapWeight(): int
{
return $this->int('scores.product_number_token_overlap_weight', 10);
return $this->requiredInt('scores.product_number_token_overlap_weight');
}
public function getCorpusTokenOverlapWeight(): int
{
return $this->int('scores.corpus_token_overlap_weight', 2);
return $this->requiredInt('scores.corpus_token_overlap_weight');
}
public function getNameNumberOverlapWeight(): int
{
return $this->int('scores.name_number_overlap_weight', 18);
return $this->requiredInt('scores.name_number_overlap_weight');
}
public function getProductNumberNumberOverlapWeight(): int
{
return $this->int('scores.product_number_number_overlap_weight', 28);
return $this->requiredInt('scores.product_number_number_overlap_weight');
}
public function getCorpusNumberOverlapWeight(): int
{
return $this->int('scores.corpus_number_overlap_weight', 8);
return $this->requiredInt('scores.corpus_number_overlap_weight');
}
public function getSizeMatchScore(): int
{
return $this->int('scores.size_match', 12);
return $this->requiredInt('scores.size_match');
}
public function getAvailabilityBonusScore(): int
{
return $this->int('scores.availability_bonus', 1);
return $this->requiredInt('scores.availability_bonus');
}
public function getDeviceQueryDeviceProductBonus(): int
{
return $this->int('scores.device_query_device_product_bonus', 60);
return $this->requiredInt('scores.device_query_device_product_bonus');
}
public function getDeviceQueryAccessoryPenalty(): int
{
return $this->int('scores.device_query_accessory_penalty', 120);
return $this->requiredInt('scores.device_query_accessory_penalty');
}
public function getAccessoryQueryAccessoryProductBonus(): int
{
return $this->int('scores.accessory_query_accessory_product_bonus', 30);
return $this->requiredInt('scores.accessory_query_accessory_product_bonus');
}
public function getAccessoryQueryDeviceProductBonus(): int
{
return $this->int('scores.accessory_query_device_product_bonus', 10);
return $this->requiredInt('scores.accessory_query_device_product_bonus');
}
public function shouldFilterAccessoryProductsForDeviceQueries(): bool
{
return $this->bool('role_guard.filter_accessory_products_for_device_queries', true);
return $this->requiredBool('role_guard.filter_accessory_products_for_device_queries');
}
public function shouldKeepAmbiguousProductsForDeviceQueries(): bool
{
return $this->bool('role_guard.keep_ambiguous_products_for_device_queries', true);
return $this->requiredBool('role_guard.keep_ambiguous_products_for_device_queries');
}
public function getContainsDigitPattern(): string
{
return $this->string('patterns.contains_digit', '/\d/u');
return $this->requiredString('patterns.contains_digit');
}
public function getMatchingCleanupPattern(): string
{
return $this->string('patterns.matching_cleanup', '/[^\p{L}\p{N}]+/u');
return $this->requiredString('patterns.matching_cleanup');
}
public function getWhitespaceCollapsePattern(): string
{
return $this->string('patterns.whitespace_collapse', '/\s+/u');
return $this->requiredString('patterns.whitespace_collapse');
}
public function getTokenSplitPattern(): string
{
return $this->string('patterns.token_split', '/[^\p{L}\p{N}]+/u');
return $this->requiredString('patterns.token_split');
}
public function wrapWithPaddingSpaces(string $value): string
{
return $this->string('padding.prefix', ' ') . trim($value) . $this->string('padding.suffix', ' ');
return $this->requiredString('padding.prefix', true) . trim($value) . $this->requiredString('padding.suffix', true);
}
/** @return string[] */
public function getPriceNormalizationSearch(): array
{
return $this->stringList('price.normalization_search', ['€', ' ', '.']);
return $this->requiredStringList('price.normalization_search');
}
/** @return string[] */
public function getPriceNormalizationReplace(): array
{
return $this->stringList('price.normalization_replace', ['', '', ''], true, ['', '', '']);
return $this->requiredStringList('price.normalization_replace', true);
}
public function getPrimaryCustomFieldKey(): string
{
return $this->string('custom_fields.primary', 'migration_Backup_product_attr1');
return $this->requiredString('custom_fields.primary');
}
public function getSecondaryCustomFieldKey(): string
{
return $this->string('custom_fields.secondary', 'migration_Backup_product_attr2');
return $this->requiredString('custom_fields.secondary');
}
public function getUseCasesCustomFieldKey(): string
{
return $this->string('custom_fields.use_cases', 'migration_Backup_product_attr4');
return $this->requiredString('custom_fields.use_cases');
}
public function getLanguagesCustomFieldKey(): string
{
return $this->string('custom_fields.languages', 'migration_Backup_product_attr5');
return $this->requiredString('custom_fields.languages');
}
public function getPrimarySecondarySeparator(): string
{
return $this->string('text.primary_secondary_separator', ': ');
return $this->requiredString('text.primary_secondary_separator');
}
public function getUseCasesLabel(): string
{
return $this->string('text.use_cases_label', 'Einsatzgebiete: ');
return $this->requiredString('text.use_cases_label');
}
public function getLanguagesLabel(): string
{
return $this->string('text.languages_label', 'Sprachen: ');
return $this->requiredString('text.languages_label');
}
public function getCustomFieldJoinSeparator(): string
{
return $this->string('text.custom_field_join_separator', ' | ');
return $this->requiredString('text.custom_field_join_separator');
}
public function getDescriptionEmptyLinePattern(): string
{
return $this->string('description.empty_line_pattern', '/^[ \t]*\R/m');
return $this->requiredString('description.empty_line_pattern');
}
public function getDescriptionWhitespaceCleanupPattern(): string
{
return $this->string('description.whitespace_cleanup_pattern', '/[ \t]{2,}/');
return $this->requiredString('description.whitespace_cleanup_pattern');
}
public function getDescriptionMaxLength(): int
{
return $this->int('description.max_length', 1500, 0);
return $this->requiredInt('description.max_length', 0);
}
public function getPriceDecimals(): int
{
return $this->int('price.decimals', 2, 0);
return $this->requiredInt('price.decimals', 0);
}
public function getPriceDecimalSeparator(): string
{
return $this->string('price.decimal_separator', ',');
return $this->requiredString('price.decimal_separator');
}
public function getPriceThousandsSeparator(): string
{
return $this->string('price.thousands_separator', '.');
return $this->requiredString('price.thousands_separator');
}
public function getPriceSuffix(): string
{
return $this->string('price.suffix', ' €');
return $this->requiredString('price.suffix');
}
public function buildRelativeSeoUrl(string $path): string
{
return $this->string('seo.relative_prefix', '/') . ltrim($path, '/');
return $this->requiredString('seo.relative_prefix') . ltrim($path, '/');
}
public function getAvailableHighlightLabel(): string
{
return $this->string('highlight.available_label', 'Verfügbar');
return $this->requiredString('highlight.available_label');
}
public function getUnavailableHighlightLabel(): string
{
return $this->string('highlight.unavailable_label', 'Nicht verfügbar');
return $this->requiredString('highlight.unavailable_label');
}
public function getProductNumberHighlightPrefix(): string
{
return $this->string('highlight.product_number_prefix', 'Produktnummer: ');
return $this->requiredString('highlight.product_number_prefix');
}
public function getMissingProductImagePlaceholder(): string
{
return $this->string('image.missing_placeholder', 'no-image');
return $this->requiredString('image.missing_placeholder');
}
public function getDeduplicationSeparator(): string
{
return $this->string('deduplication.separator', '|');
return $this->requiredString('deduplication.separator');
}
private function bool(string $path, bool $default): bool
/**
* @return string[]
*/
private function configuredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array
{
$value = $this->value($path, $default);
if ($this->has($configPath)) {
return $this->requiredStringList($configPath);
}
$viewPath = $this->requiredString($viewPathConfigPath);
$terms = $this->vocabulary->view($viewPath, []);
if ($terms === []) {
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching vocabulary view "%s" resolved to an empty list.',
$viewPath
));
}
return $terms;
}
/**
* @return array<string, string[]>
*/
private function configuredStringListMapOrVocabularyMap(string $configPath, string $mapPathConfigPath): array
{
if ($this->has($configPath)) {
return $this->requiredStringListMap($configPath);
}
$mapPath = $this->requiredString($mapPathConfigPath);
$map = $this->vocabulary->map($mapPath, []);
if ($map === []) {
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching vocabulary map "%s" resolved to an empty map.',
$mapPath
));
}
return $map;
}
private function requiredBool(string $path): bool
{
$value = $this->requiredValue($path);
if (is_bool($value)) {
return $value;
@@ -373,60 +379,79 @@ final class ShopServiceConfig
}
}
return $default;
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" must be a boolean value.',
$path
));
}
private function int(string $path, int $default, int $min = PHP_INT_MIN): int
private function requiredInt(string $path, int $min = PHP_INT_MIN): int
{
$value = $this->value($path, $default);
$value = $this->requiredValue($path);
if (!is_numeric($value)) {
return $default;
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" must be an integer value.',
$path
));
}
return max($min, (int) $value);
$value = (int) $value;
if ($value < $min) {
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" must be greater than or equal to %d.',
$path,
$min
));
}
private function string(string $path, string $default): string
return $value;
}
private function requiredString(string $path, bool $allowBlank = false): string
{
$value = $this->value($path, $default);
$value = $this->requiredValue($path);
if (!is_scalar($value)) {
return $default;
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" must be a scalar string value.',
$path
));
}
return (string) $value;
$value = (string) $value;
if (!$allowBlank && trim($value) === '') {
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" must not be empty.',
$path
));
}
return $value;
}
/**
* @param string[] $default
* @param string[]|null $emptySafeDefault
* @return string[]
*/
/** @return string[] */
private function vocabularyView(string $path, array $fallback): array
private function requiredStringList(string $path, bool $allowEmptyStrings = false): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
/** @return array<string, string[]> */
private function vocabularyMap(string $path, array $fallback): array
{
return $this->vocabulary?->map($path, $fallback) ?? $fallback;
}
private function stringList(string $path, array $default, bool $allowEmptyStrings = false, ?array $emptySafeDefault = null): array
{
$value = $this->value($path, $default);
$value = $this->requiredValue($path);
if (!is_array($value)) {
return $emptySafeDefault ?? $default;
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" must be a string list.',
$path
));
}
$out = [];
foreach ($value as $item) {
foreach ($value as $index => $item) {
if (!is_scalar($item)) {
continue;
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s.%s" must be a scalar string value.',
$path,
(string) $index
));
}
$item = (string) $item;
@@ -435,7 +460,11 @@ final class ShopServiceConfig
}
if (!$allowEmptyStrings && $item === '') {
continue;
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s.%s" must not be empty.',
$path,
(string) $index
));
}
if ($allowEmptyStrings || !in_array($item, $out, true)) {
@@ -443,45 +472,66 @@ final class ShopServiceConfig
}
}
if ($out === [] && !$allowEmptyStrings) {
return $emptySafeDefault ?? $default;
if ($out === []) {
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" must not be empty.',
$path
));
}
return $out;
}
/**
* @param array<string, string[]> $default
* @return array<string, string[]>
*/
private function stringListMap(string $path, array $default): array
private function requiredStringListMap(string $path): array
{
$value = $this->value($path, $default);
$value = $this->requiredValue($path);
if (!is_array($value)) {
return $default;
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" must be a map of string lists.',
$path
));
}
$out = [];
foreach ($value as $key => $items) {
if (!is_string($key) || !is_array($items)) {
continue;
if (!is_scalar($key) || !is_array($items)) {
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" must be a map of string lists.',
$path
));
}
$cleanKey = trim($key);
$cleanKey = trim((string) $key);
if ($cleanKey === '') {
continue;
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" contains an empty map key.',
$path
));
}
$cleanItems = [];
foreach ($items as $item) {
foreach ($items as $index => $item) {
if (!is_scalar($item)) {
continue;
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s.%s.%s" must be a scalar string value.',
$path,
$cleanKey,
(string) $index
));
}
$item = trim((string) $item);
if ($item === '') {
continue;
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s.%s.%s" must not be empty.',
$path,
$cleanKey,
(string) $index
));
}
if (!in_array($item, $cleanItems, true)) {
@@ -489,21 +539,52 @@ final class ShopServiceConfig
}
}
if ($cleanItems !== []) {
if ($cleanItems === []) {
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s.%s" must not be empty.',
$path,
$cleanKey
));
}
$out[$cleanKey] = $cleanItems;
}
if ($out === []) {
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" must not be empty.',
$path
));
}
return $out !== [] ? $out : $default;
return $out;
}
private function value(string $path, mixed $default): mixed
private function has(string $path): bool
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return $default;
return false;
}
$current = $current[$segment];
}
return true;
}
private function requiredValue(string $path): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
throw new \InvalidArgumentException(sprintf(
'RetrieX shop matching config path "%s" is missing. YAML must be the source of truth.',
$path
));
}
$current = $current[$segment];