last step

This commit is contained in:
team2
2026-04-29 22:22:57 +02:00
parent 8ece67b461
commit d618265044
12 changed files with 918 additions and 656 deletions

View File

@@ -4,243 +4,60 @@ declare(strict_types=1);
namespace App\Config;
use InvalidArgumentException;
final class CommerceQueryParserConfig
{
private const KNOWN_BRANDS = [
'heyl',
'horiba',
'neomeris',
];
private const PHRASES_TO_REMOVE = [
'ich suche',
'suche',
'habt ihr',
'gibt es',
'gebe mir',
'gib mir',
'zeige mir',
'welches gerät',
'welche gerät',
'welches modell',
'welches ist besser',
'welches ist am besten',
'alternative',
'alternativen',
'unter anderem',
'u a',
'welche',
'welcher',
'welches',
'welchen',
'sind',
'ist',
'geeignet',
'geeigent',
'verfügbarkeit',
'verfuegbarkeit',
];
private const FILTER_SEARCH_TOKENS = [
'auch',
'noch',
'nochmal',
'zusätzlich',
'dazu',
'davon',
'stattdessen',
'bitte',
'gern',
'gerne',
'zeige',
'zeig',
'such',
'suche',
'finde',
'find',
'mir',
'mal',
'von',
'im',
'in',
'für',
'fuer',
'welche',
'welcher',
'welches',
'welchen',
'sind',
'ist',
'geeignet',
'geeigent',
'verfügbarkeit',
'verfuegbarkeit',
'prüfe',
'pruefe',
'den',
'die',
'das',
'der',
'dem',
'des',
'und',
'oder',
'sowie',
'seine',
'seinen',
'seiner',
'seinem',
'seines',
'siene',
'sienen',
'siener',
'sienem',
'sienes',
'gebe',
'gib',
'nenne',
'nenn',
'preis',
'preise',
'preisen',
'kostet',
'kosten',
'ua',
'also',
'gut',
'gute',
'guten',
'guter',
'gutes',
'passen',
'passend',
];
private const SEARCH_TOKEN_CORRECTIONS = [
'siene' => 'seine',
'sienen' => 'seinen',
'siener' => 'seiner',
'sienem' => 'seinem',
'sienes' => 'seines',
'indicatoren' => 'indikatoren',
];
private const SEARCH_TOKEN_CANONICAL_MAP = [
'indikatoren' => 'indikator',
'indicators' => 'indikator',
'indicator' => 'indikator',
'reagenzien' => 'reagenz',
'reagents' => 'reagenz',
'reagent' => 'reagenz',
'produkte' => 'produkt',
];
private const SEMANTIC_SHOP_SEARCH_TOKENS = [
'indikator',
'indicator',
'reagenz',
'reagent',
'zubehör',
'zubehor',
'ersatzteil',
'verbrauchsmaterial',
'chemie',
'indikatorchemie',
'reagenzchemie',
'kit',
'set',
'filter',
'pumpe',
'pumpenkopf',
'motorblock',
'lösung',
'loesung',
'solution',
'teststreifen',
'gerät',
'geraet',
'messgerät',
'messgeraet',
'analysegerät',
'analysegeraet',
'analysator',
'monitor',
'controller',
'system',
];
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
/**
* @return string[]
*/
/** @return string[] */
public function getKnownBrands(): array
{
return $this->stringList(
'known_brands',
$this->vocabularyView('commerce_query.known_brands', self::KNOWN_BRANDS)
);
return $this->stringList('known_brands');
}
/**
* @return string[]
*/
/** @return string[] */
public function getPhrasesToRemove(): array
{
return $this->stringList(
'phrases_to_remove',
$this->vocabularyView('commerce_query.phrases_to_remove', self::PHRASES_TO_REMOVE)
);
return $this->stringList('phrases_to_remove');
}
public function getHistoryContextPattern(): string
{
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
return $this->string('patterns.history_context');
}
public function getHistoryContextValuePattern(): string
{
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
return $this->renderTemplate(
$this->string('patterns.history_context_value_template'),
['fragment' => $this->getHistoryContextPattern()],
'patterns.history_context_value_template'
);
}
/**
* @return string[]
*/
/** @return string[] */
public function getFilterSearchTokens(): array
{
return $this->stringList(
'filter_search_tokens',
$this->vocabularyView('commerce_query.filter_search_tokens', self::FILTER_SEARCH_TOKENS)
);
return $this->stringList('filter_search_tokens');
}
/**
* @return array<string, string>
*/
/** @return array<string, string> */
public function getSearchTokenCorrections(): array
{
return $this->stringMap(
'search_token_corrections',
$this->vocabularyStringMap('commerce_query.search_token_corrections', self::SEARCH_TOKEN_CORRECTIONS)
);
return $this->stringMap('search_token_corrections');
}
/**
* @return array<string, string>
*/
/** @return array<string, string> */
public function getSearchTokenCanonicalMap(): array
{
return $this->stringMap(
'search_token_canonical_map',
$this->vocabularyStringMap('commerce_query.search_token_canonical', self::SEARCH_TOKEN_CANONICAL_MAP)
);
return $this->stringMap('search_token_canonical_map');
}
/**
@@ -253,181 +70,190 @@ final class CommerceQueryParserConfig
return $this->getFilterSearchTokens();
}
/**
* @return string[]
*/
/** @return string[] */
public function getNormalizationSearch(): array
{
return ['€'];
return $this->stringList('normalization.search', true);
}
/**
* @return string[]
*/
/** @return string[] */
public function getNormalizationReplace(): array
{
return [' euro '];
return $this->stringList('normalization.replace', true);
}
public function getPromptSanitizePattern(): string
{
return '/[^\p{L}\p{N}\s.,\-]/u';
return $this->string('patterns.prompt_sanitize');
}
public function getWhitespaceCollapsePattern(): string
{
return '/\s+/u';
return $this->string('patterns.whitespace_collapse');
}
public function getWhitespaceSplitPattern(): string
{
return '/\s+/u';
return $this->string('patterns.whitespace_split');
}
public function getSearchTextTrimCharacters(): string
{
return " \t\n\r\0\x0B-.,";
$characters = '';
foreach ($this->stringList('text.trim_characters') as $item) {
$characters .= match ($item) {
'space' => ' ',
'tab' => "\t",
'lf' => "\n",
'cr' => "\r",
'nul' => "\0",
'vertical_tab' => "\x0B",
default => $item,
};
}
return $characters;
}
public function getMinSearchTokenLength(): int
{
return 1;
return $this->int('limits.min_search_token_length');
}
public function getMinDirectProductTokenLength(): int
{
return 1;
return $this->int('limits.min_direct_product_token_length');
}
public function getHistoryQuestionPattern(): string
{
return '/^Question:\s*(.+)$/m';
return $this->string('patterns.history_question');
}
public function getPriceBetweenPattern(): string
{
return '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
return $this->string('patterns.price_between');
}
public function getPriceMaxPattern(): string
{
return '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
return $this->string('patterns.price_max');
}
public function getPriceMinPattern(): string
{
return '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
return $this->string('patterns.price_min');
}
/**
* @return string[]
*/
/** @return string[] */
public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array
{
return [
'/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u',
'/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u',
'/\b(?:' . $intentConfig->getPricePattern() . ')\b/u',
$this->string('patterns.price_removal_between'),
$this->string('patterns.price_removal_minmax'),
$this->renderTemplate(
$this->string('patterns.price_removal_intent_template'),
['price_pattern' => $intentConfig->getPricePattern()],
'patterns.price_removal_intent_template'
),
];
}
public function getDirectProductDigitPattern(): string
{
return '/\d/u';
return $this->string('patterns.direct_product_digit');
}
public function getDirectProductMaxTokens(): int
{
return 4;
return $this->int('limits.direct_product_max_tokens');
}
public function getModelLikePattern(): string
{
return '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u';
return $this->string('patterns.model_like');
}
public function getAccessoryLikePattern(): string
{
return '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u';
return $this->string('patterns.accessory_like');
}
public function getContainsDigitPattern(): string
{
return '/\d/u';
return $this->string('patterns.contains_digit');
}
public function getModelNumberTokenPattern(): string
{
return '/^(?:\d{2,5}[a-z0-9\-]*|[a-z]{1,6}\d{1,5}[a-z0-9\-]*)$/u';
return $this->string('patterns.model_number_token');
}
public function getModelContextTokenPattern(): string
{
return '/^[\p{L}][\p{L}0-9®\-]{2,}$/u';
return $this->string('patterns.model_context_token');
}
public function getModelSuffixTokenPattern(): string
{
return '/^[a-z]{1,4}\d{0,3}$/u';
return $this->string('patterns.model_suffix_token');
}
public function getModelContextTokenWindow(): int
{
return 4;
return $this->int('limits.model_context_token_window');
}
public function getMinMeaningfulAlphaTokenLength(): int
{
return 2;
return $this->int('limits.min_meaningful_alpha_token_length');
}
public function getMaxShopSearchTokens(): int
{
return 6;
return $this->int('limits.max_shop_search_tokens');
}
public function getInstructionOrPresentationTokenPattern(): string
{
return '/^(?:zeig(?:e)?|such(?:e)?|find(?:e)?|gib|gebe|nenn(?:e)?|liefer(?:e)?|erstelle?|mach(?:e)?|brauch(?:e)?|will|möchte|moechte|hätte|haette|kannst|bitte|mal|alle|alles|komplett|vollständig|vollstaendig|gesamt|ganze|ganzen|liste|listung|auflistung|tabelle|tabellarisch|übersicht|uebersicht|anzeigen?|ausgeben?|darstellen?|antwort(?:e)?|erklär(?:e)?|erklaer(?:e)?|info|infos|informationen|dazu|hierzu|damit|davon|an|als|mit|ohne|inkl|inklusive|also|gut|gute|guten|guter|gutes|passend|passen)$/u';
}
public function getMeasurementValueTokenPattern(): string
{
return '/^\d+[.,]\d+$/u';
return $this->string('patterns.instruction_or_presentation_token');
}
/**
* Product/category tokens that are useful for Store API search even when they are not next to a model number.
* This is intentionally a semantic allowlist, not a spelling-error blocklist.
*
* @return string[]
*/
public function getMeasurementValueTokenPattern(): string
{
return $this->string('patterns.measurement_value_token');
}
/** @return string[] */
public function getSemanticShopSearchTokens(): array
{
return $this->stringList(
'semantic_shop_search_tokens',
$this->vocabularyView('commerce_query.semantic_shop_search_tokens', self::SEMANTIC_SHOP_SEARCH_TOKENS)
return $this->stringList('semantic_shop_search_tokens');
}
public function buildExactTokenRemovalPattern(string $token): string
{
return $this->renderTemplate(
$this->string('patterns.exact_token_removal_template'),
['token' => preg_quote($token, '/')],
'patterns.exact_token_removal_template'
);
}
/** @return string[] */
private function vocabularyView(string $path, array $fallback): array
public function buildBrandPartOfModelPattern(string $brand): string
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
/** @return array<string, string> */
private function vocabularyStringMap(string $path, array $fallback): array
{
return $this->vocabulary?->stringMap($path, $fallback) ?? $fallback;
return $this->renderTemplate(
$this->string('patterns.brand_part_of_model_template'),
['brand' => preg_quote($brand, '/')],
'patterns.brand_part_of_model_template'
);
}
/** @return string[] */
private function stringList(string $path, array $default): array
private function stringList(string $path, bool $preserveWhitespace = false): array
{
$value = $this->value($path, $default);
$value = $this->value($path);
if (!is_array($value)) {
return $default;
throw $this->invalid($path, 'must be a list of non-empty strings');
}
$out = [];
@@ -436,23 +262,31 @@ final class CommerceQueryParserConfig
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
$item = (string) $item;
if (!$preserveWhitespace) {
$item = trim($item);
}
if (trim($item) === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
return $out !== [] ? $out : $default;
if ($out === []) {
throw $this->invalid($path, 'must contain at least one non-empty string');
}
return $out;
}
/** @return array<string, string> */
private function stringMap(string $path, array $default): array
private function stringMap(string $path): array
{
$value = $this->value($path, $default);
$value = $this->value($path);
if (!is_array($value)) {
return $default;
throw $this->invalid($path, 'must be a map of non-empty strings');
}
$out = [];
@@ -468,15 +302,44 @@ final class CommerceQueryParserConfig
}
}
return $out !== [] ? $out : $default;
if ($out === []) {
throw $this->invalid($path, 'must contain at least one non-empty mapping');
}
return $out;
}
private function value(string $path, mixed $default): mixed
private function string(string $path): string
{
$value = $this->value($path);
if (!is_scalar($value)) {
throw $this->invalid($path, 'must be a non-empty string');
}
$value = (string) $value;
if ($value === '') {
throw $this->invalid($path, 'must be a non-empty string');
}
return $value;
}
private function int(string $path): int
{
$value = $this->value($path);
if (!is_int($value)) {
throw $this->invalid($path, 'must be an integer');
}
return $value;
}
private function value(string $path): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return $default;
throw $this->missing($path);
}
$current = $current[$segment];
@@ -485,13 +348,29 @@ final class CommerceQueryParserConfig
return $current;
}
public function buildExactTokenRemovalPattern(string $token): string
/**
* @param array<string, string> $replacements
*/
private function renderTemplate(string $template, array $replacements, string $path): string
{
return '/\b' . preg_quote($token, '/') . '\b/u';
foreach ($replacements as $placeholder => $value) {
$template = str_replace('{' . $placeholder . '}', $value, $template);
}
if (preg_match('/\{[A-Za-z_][A-Za-z0-9_]*\}/', $template) === 1) {
throw $this->invalid($path, 'contains unresolved placeholders');
}
return $template;
}
public function buildBrandPartOfModelPattern(string $brand): string
private function missing(string $path): InvalidArgumentException
{
return '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u';
return new InvalidArgumentException(sprintf('RetrieX commerce query config "%s" is missing.', $path));
}
}
private function invalid(string $path, string $reason): InvalidArgumentException
{
return new InvalidArgumentException(sprintf('RetrieX commerce query config "%s" %s.', $path, $reason));
}
}

View File

@@ -4,31 +4,18 @@ declare(strict_types=1);
namespace App\Config;
/**
* YAML-backed query-enrichment configuration.
*
* This class intentionally has no PHP fallback values. Missing or invalid
* configuration must be fixed in config/retriex/query_enrichment.yaml.
*/
final readonly class QueryEnricherConfig
{
/**
* Backwards-compatible fallback vocabulary.
* Active values are loaded from retriex.query_enrichment.config when present.
*
* @var array<int|string, mixed>
*/
private const DEFAULT_ENRICH_QUERY_LIST = [
'Wasserhärte' => 'Resthärte',
'Gerät' => 'Modell',
'Indikator' => 'Chemie',
'Seminar' => 'Webinar',
'Schulung' => 'Seminar',
'Indikatoren' => 'Indikator',
'Wasserhärte-Grenzwert' => 'Resthärte',
'Resthärte-Grenzwert' => 'Wasserhärte',
'Grenzwert' => 'Überwachungsbereich',
'store' => 'shop',
];
/**
* @param array<string, mixed> $config
*/
public function __construct(private array $config = [])
public function __construct(private array $config)
{
}
@@ -52,11 +39,7 @@ final readonly class QueryEnricherConfig
public function getEnrichQueryList(): array
{
$normalized = [];
$rules = $this->config['rules'] ?? self::DEFAULT_ENRICH_QUERY_LIST;
if (!is_array($rules)) {
$rules = self::DEFAULT_ENRICH_QUERY_LIST;
}
$rules = $this->requiredArray('rules');
foreach ($rules as $key => $value) {
if (is_array($value)) {
@@ -76,18 +59,16 @@ final readonly class QueryEnricherConfig
}
}
if ($normalized === []) {
throw new \InvalidArgumentException('RetrieX query enrichment config key "rules" must contain at least one valid enrichment rule.');
}
return $normalized;
}
public function getMaxExpansions(): int
{
$value = $this->config['max_expansions'] ?? 4;
if (!is_numeric($value)) {
return 4;
}
return max(0, (int) $value);
return $this->requiredNonNegativeInt('max_expansions');
}
/**
@@ -160,6 +141,49 @@ final readonly class QueryEnricherConfig
return true;
}
/** @return array<int|string, mixed> */
private function requiredArray(string $key): array
{
if (!array_key_exists($key, $this->config)) {
throw new \InvalidArgumentException(sprintf('Missing required RetrieX query enrichment config key "%s".', $key));
}
$value = $this->config[$key];
if (!is_array($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must be an array.', $key));
}
if ($value === []) {
throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must not be empty.', $key));
}
return $value;
}
private function requiredNonNegativeInt(string $key): int
{
if (!array_key_exists($key, $this->config)) {
throw new \InvalidArgumentException(sprintf('Missing required RetrieX query enrichment config key "%s".', $key));
}
$value = $this->config[$key];
if (is_int($value)) {
$intValue = $value;
} elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
$intValue = (int) trim($value);
} else {
throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must be an integer.', $key));
}
if ($intValue < 0) {
throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must be greater than or equal to 0.', $key));
}
return $intValue;
}
private function normalizePhrase(string $value): string
{
$value = trim($value);

View File

@@ -522,6 +522,13 @@ final readonly class RetriexEffectiveConfigProvider
'search_token_corrections' => $this->commerceQueryParserConfig->getSearchTokenCorrections(),
'search_token_canonical_map' => $this->commerceQueryParserConfig->getSearchTokenCanonicalMap(),
'semantic_shop_search_tokens' => $this->commerceQueryParserConfig->getSemanticShopSearchTokens(),
'normalization' => [
'search' => $this->commerceQueryParserConfig->getNormalizationSearch(),
'replace' => $this->commerceQueryParserConfig->getNormalizationReplace(),
],
'text' => [
'trim_characters_length' => strlen($this->commerceQueryParserConfig->getSearchTextTrimCharacters()),
],
'limits' => [
'min_search_token_length' => $this->commerceQueryParserConfig->getMinSearchTokenLength(),
'min_direct_product_token_length' => $this->commerceQueryParserConfig->getMinDirectProductTokenLength(),

View File

@@ -4,69 +4,24 @@ declare(strict_types=1);
namespace App\Config;
/**
* YAML-backed shop search-repair configuration.
*
* This class intentionally has no PHP fallback values. Missing or invalid
* configuration must be fixed in config/retriex/search_repair.yaml and
* config/retriex/vocabulary.yaml.
*/
final class SearchRepairConfig
{
private const GENERIC_CANDIDATE_TOKENS = [
'wasser',
'messgerät',
'messgeraet',
'produkt',
'geräte',
'geraete',
'gerät',
'geraet',
'resthärte',
'resthaerte',
'preis',
'infos',
'wissen',
];
private const ACCESSORY_CANDIDATE_TERMS = [
'indikator',
'indicator',
'reagenz',
'reagent',
'kit',
'set',
];
private const ACCESSORY_OR_BUNDLE_TERMS = [
'passend',
'passende',
'zubehor',
'zubehör',
'dazu',
'zusatz',
'erganzung',
'ergänzung',
'indikator',
'reagenz',
'kit',
'set',
'auch\s+das',
'mit\s+preis\s+und\s+allen\s+infos',
];
private const SPECIFICITY_BOOST_TERMS = [
'indikator',
'indicator',
'testomat',
'tritromat',
'titromat',
'reagenz',
'reagent',
];
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly bool $enabled = true,
private readonly int $maxRepairQueries = 3,
private readonly int $minPrimaryResultsWithoutRepair = 2,
private readonly array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
private readonly bool $enabled,
private readonly int $maxRepairQueries,
private readonly int $minPrimaryResultsWithoutRepair,
private readonly array $config,
private readonly DomainVocabularyConfig $vocabulary,
) {
}
@@ -87,257 +42,259 @@ final class SearchRepairConfig
public function shouldRestrictRequestedAccessoryCodeRepair(): bool
{
return $this->bool('strict_requested_accessory_code_repair', true);
return $this->requiredBool('strict_requested_accessory_code_repair');
}
public function shouldPreferPromptAnchoredModelForRequestedAccessoryCode(): bool
{
return $this->bool('prefer_prompt_anchored_model_for_requested_accessory_code', true);
return $this->requiredBool('prefer_prompt_anchored_model_for_requested_accessory_code');
}
/** @return string[] */
public function getRequestedAccessoryCodeFallbackQueryTemplates(): array
{
return $this->stringList(
'requested_accessory_code_fallback_query_templates',
['{term} {code}']
);
return $this->requiredStringList('requested_accessory_code_fallback_query_templates');
}
/** @return string[] */
public function getRequestedAccessoryCodeFallbackTerms(): array
{
return $this->stringList(
'requested_accessory_code_fallback_terms',
$this->getAccessoryCandidateTerms()
);
return $this->requiredStringList('requested_accessory_code_fallback_terms');
}
/** @return string[] */
public function getRequestedAccessoryCodeContextPrefixTerms(): array
{
return $this->stringList(
'requested_accessory_code_context_prefix_terms',
$this->getAccessoryCandidateTerms()
);
return $this->requiredStringList('requested_accessory_code_context_prefix_terms');
}
public function getRequestedAccessoryCodeProximityWindow(): int
{
return $this->int('requested_accessory_code_proximity_window', 1600);
return $this->requiredPositiveInt('requested_accessory_code_proximity_window');
}
/** @return string[] */
public function getSpecificModelCandidatePatterns(): array
{
return $this->stringList(
'specific_model_candidate_patterns',
[$this->getModelLikePattern()]
);
return $this->requiredStringList('specific_model_candidate_patterns');
}
/** @return string[] */
public function getModelCandidateExcludeTerms(): array
{
return $this->stringList(
'model_candidate_exclude_terms',
array_merge($this->getAccessoryCandidateTerms(), ['verfuegbarkeit', 'verfügbarkeit', 'shop'])
);
return $this->requiredStringList('model_candidate_exclude_terms');
}
public function getTopProductLogLimit(): int
{
return 3;
return $this->requiredNonNegativeInt('limits.top_product_log_limit');
}
public function getModelCandidatePattern(): string
{
return '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u';
return $this->requiredString('patterns.model_candidate');
}
public function getAccessoryCandidatePattern(): string
{
return '/\b((?:' . implode('|', $this->getAccessoryCandidateTerms()) . ')\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu';
return $this->renderPatternTemplate(
'patterns.accessory_candidate_template',
['terms' => $this->patternAlternation($this->getAccessoryCandidateTerms())]
);
}
public function getRequestedAccessoryCodePattern(): string
{
$fallbackTerms = array_map(
static fn(string $term): string => preg_quote($term, '/'),
$this->getRequestedAccessoryCodeContextPrefixTerms()
);
$fallbackTerms = array_filter($fallbackTerms, static fn(string $term): bool => $term !== '');
$fallbackPattern = $fallbackTerms === []
? '/\b([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu'
: '/\b(?:' . implode('|', $fallbackTerms) . ')\s*([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu';
return $this->string('requested_accessory_code_pattern', $fallbackPattern);
return $this->requiredString('patterns.requested_accessory_code');
}
public function getAccessoryOrBundlePattern(): string
{
return '/\b(' . implode('|', $this->getAccessoryOrBundleTerms()) . ')\b/iu';
return $this->renderPatternTemplate(
'patterns.accessory_or_bundle_template',
['terms' => $this->patternAlternation($this->getAccessoryOrBundleTerms())]
);
}
public function getModelLikePattern(): string
{
return '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u';
return $this->requiredString('patterns.model_like');
}
public function getSpecificityBoostPattern(): string
{
return '/\b(?:' . implode('|', $this->getSpecificityBoostTerms()) . ')\b/iu';
return $this->renderPatternTemplate(
'patterns.specificity_boost_template',
['terms' => $this->patternAlternation($this->getSpecificityBoostTerms())]
);
}
/** @return string[] */
public function getGenericCandidateTokens(): array
{
return $this->stringList(
return $this->configOrVocabularyStringList(
'generic_candidate_tokens',
$this->vocabularyView('search_repair.generic_candidate_tokens', self::GENERIC_CANDIDATE_TOKENS)
'search_repair.generic_candidate_tokens'
);
}
public function getSanitizeTrimCharacters(): string
{
return " \t\n\r\0\x0B\"'`.,;:-";
return implode('', array_map(
static fn (int $code): string => chr($code),
$this->requiredCharacterCodes('sanitize_trim_character_codes')
));
}
public function getContainsDigitPattern(): string
{
return '/\d/u';
return $this->requiredString('patterns.contains_digit');
}
public function getWhitespaceCollapsePattern(): string
{
return '/\s+/u';
return $this->requiredString('patterns.whitespace_collapse');
}
public function getTokenizeCleanupPattern(): string
{
return '/[^\p{L}\p{N}\s\-]+/u';
return $this->requiredString('patterns.tokenize_cleanup');
}
public function getProductKeySeparator(): string
{
return '|';
return $this->requiredString('product_key_separator');
}
public function getCandidateDigitScore(): int
{
return 4;
return $this->requiredInt('scores.candidate_digit');
}
public function getCandidateWordCountCap(): int
{
return 4;
return $this->requiredPositiveInt('scores.candidate_word_count_cap');
}
public function getSpecificityBoostScore(): int
{
return 3;
return $this->requiredInt('scores.specificity_boost');
}
public function getPrimaryQueryOverlapThreshold(): float
{
return 0.9;
return $this->requiredFloat('scores.primary_query_overlap_threshold');
}
public function getPromptMatchWeight(): int
{
return 3;
return $this->requiredInt('scores.prompt_match_weight');
}
public function getPrimaryQueryMatchWeight(): int
{
return 2;
return $this->requiredInt('scores.primary_query_match_weight');
}
public function getRepairSignalMatchWeight(): int
{
return 4;
return $this->requiredInt('scores.repair_signal_match_weight');
}
public function getPrimaryResultOrderBonus(): int
{
return 1;
return $this->requiredInt('scores.primary_result_order_bonus');
}
public function getTokenIntersectionScore(): int
{
return 2;
return $this->requiredInt('scores.token_intersection_score');
}
public function getNumericTokenMatchScore(): int
{
return 4;
return $this->requiredInt('scores.numeric_token_match_score');
}
/** @return string[] */
public function getAccessoryCandidateTerms(): array
{
return $this->stringList(
return $this->configOrVocabularyStringList(
'accessory_candidate_terms',
$this->vocabularyView('search_repair.accessory_candidate_terms', self::ACCESSORY_CANDIDATE_TERMS)
'search_repair.accessory_candidate_terms'
);
}
/** @return string[] */
public function getAccessoryOrBundleTerms(): array
{
return $this->stringList(
return $this->configOrVocabularyStringList(
'accessory_or_bundle_terms',
$this->vocabularyView('search_repair.accessory_or_bundle_terms', self::ACCESSORY_OR_BUNDLE_TERMS)
'search_repair.accessory_or_bundle_terms'
);
}
/** @return string[] */
public function getSpecificityBoostTerms(): array
{
return $this->stringList(
return $this->configOrVocabularyStringList(
'specificity_boost_terms',
$this->vocabularyView('search_repair.specificity_boost_terms', self::SPECIFICITY_BOOST_TERMS)
'search_repair.specificity_boost_terms'
);
}
/** @return string[] */
private function vocabularyView(string $path, array $fallback): array
private function configOrVocabularyStringList(string $configKey, string $vocabularyPath): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
private function string(string $key, string $default): string
{
$value = $this->config[$key] ?? $default;
if (!is_scalar($value)) {
return $default;
if (array_key_exists($configKey, $this->config)) {
return $this->requiredStringList($configKey);
}
$value = trim((string) $value);
return $value !== '' ? $value : $default;
}
private function int(string $key, int $default): int
{
$value = $this->config[$key] ?? $default;
if (is_int($value)) {
return $value;
$items = $this->vocabulary->view($vocabularyPath, []);
if ($items === []) {
throw new \InvalidArgumentException(sprintf(
'Missing required RetrieX search repair vocabulary view "%s".',
$vocabularyPath
));
}
if (is_numeric($value)) {
return (int) $value;
}
return $default;
return $items;
}
private function bool(string $key, bool $default): bool
/** @param array<string, string> $variables */
private function renderPatternTemplate(string $path, array $variables): string
{
$value = $this->config[$key] ?? $default;
$template = $this->requiredString($path);
foreach ($variables as $key => $value) {
$template = str_replace('{' . $key . '}', $value, $template);
}
if (preg_match('/\{[A-Za-z_][A-Za-z0-9_]*\}/', $template) === 1) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair pattern template "%s" contains unresolved placeholders.', $path));
}
return $template;
}
/** @param string[] $terms */
private function patternAlternation(array $terms): string
{
$terms = array_values(array_filter(
array_map(static fn (string $term): string => trim($term), $terms),
static fn (string $term): bool => $term !== ''
));
if ($terms === []) {
throw new \InvalidArgumentException('RetrieX search repair pattern alternation requires at least one term.');
}
return implode('|', $terms);
}
private function requiredBool(string $path): bool
{
$value = $this->requiredValue($path);
if (is_bool($value)) {
return $value;
@@ -357,15 +314,80 @@ final class SearchRepairConfig
}
}
return $default;
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be boolean.', $path));
}
private function requiredString(string $path): string
{
$value = $this->requiredValue($path);
if (!is_scalar($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a string.', $path));
}
$value = trim((string) $value);
if ($value === '') {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
}
return $value;
}
private function requiredInt(string $path): int
{
$value = $this->requiredValue($path);
if (is_int($value)) {
return $value;
}
if (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
return (int) trim($value);
}
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be an integer.', $path));
}
private function requiredNonNegativeInt(string $path): int
{
$value = $this->requiredInt($path);
if ($value < 0) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be greater than or equal to 0.', $path));
}
return $value;
}
private function requiredPositiveInt(string $path): int
{
$value = $this->requiredInt($path);
if ($value <= 0) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be greater than 0.', $path));
}
return $value;
}
private function requiredFloat(string $path): float
{
$value = $this->requiredValue($path);
if (is_int($value) || is_float($value) || (is_string($value) && is_numeric(trim($value)))) {
return (float) $value;
}
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be numeric.', $path));
}
/** @return string[] */
private function stringList(string $key, array $default): array
private function requiredStringList(string $path): array
{
$value = $this->config[$key] ?? $default;
$value = $this->requiredValue($path);
if (!is_array($value)) {
return $default;
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a list.', $path));
}
$out = [];
@@ -382,6 +404,57 @@ final class SearchRepairConfig
$out[] = $item;
}
return $out !== [] ? $out : $default;
if ($out === []) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
}
return $out;
}
/** @return int[] */
private function requiredCharacterCodes(string $path): array
{
$value = $this->requiredValue($path);
if (!is_array($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a list of character codes.', $path));
}
$codes = [];
foreach ($value as $item) {
if (is_int($item)) {
$code = $item;
} elseif (is_string($item) && preg_match('/^\d+$/', trim($item)) === 1) {
$code = (int) trim($item);
} else {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" contains a non-integer character code.', $path));
}
if ($code < 0 || $code > 255) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" contains an invalid character code.', $path));
}
$codes[] = $code;
}
if ($codes === []) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
}
return $codes;
}
private function requiredValue(string $path): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
throw new \InvalidArgumentException(sprintf('Missing required RetrieX search repair config key "%s".', $path));
}
$current = $current[$segment];
}
return $current;
}
}