last step

This commit is contained in:
team2
2026-04-29 22:22:57 +02:00
parent 8ece67b461
commit d618265044
12 changed files with 918 additions and 656 deletions

View File

@@ -4,69 +4,24 @@ declare(strict_types=1);
namespace App\Config;
/**
* YAML-backed shop search-repair configuration.
*
* This class intentionally has no PHP fallback values. Missing or invalid
* configuration must be fixed in config/retriex/search_repair.yaml and
* config/retriex/vocabulary.yaml.
*/
final class SearchRepairConfig
{
private const GENERIC_CANDIDATE_TOKENS = [
'wasser',
'messgerät',
'messgeraet',
'produkt',
'geräte',
'geraete',
'gerät',
'geraet',
'resthärte',
'resthaerte',
'preis',
'infos',
'wissen',
];
private const ACCESSORY_CANDIDATE_TERMS = [
'indikator',
'indicator',
'reagenz',
'reagent',
'kit',
'set',
];
private const ACCESSORY_OR_BUNDLE_TERMS = [
'passend',
'passende',
'zubehor',
'zubehör',
'dazu',
'zusatz',
'erganzung',
'ergänzung',
'indikator',
'reagenz',
'kit',
'set',
'auch\s+das',
'mit\s+preis\s+und\s+allen\s+infos',
];
private const SPECIFICITY_BOOST_TERMS = [
'indikator',
'indicator',
'testomat',
'tritromat',
'titromat',
'reagenz',
'reagent',
];
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly bool $enabled = true,
private readonly int $maxRepairQueries = 3,
private readonly int $minPrimaryResultsWithoutRepair = 2,
private readonly array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
private readonly bool $enabled,
private readonly int $maxRepairQueries,
private readonly int $minPrimaryResultsWithoutRepair,
private readonly array $config,
private readonly DomainVocabularyConfig $vocabulary,
) {
}
@@ -87,257 +42,259 @@ final class SearchRepairConfig
public function shouldRestrictRequestedAccessoryCodeRepair(): bool
{
return $this->bool('strict_requested_accessory_code_repair', true);
return $this->requiredBool('strict_requested_accessory_code_repair');
}
public function shouldPreferPromptAnchoredModelForRequestedAccessoryCode(): bool
{
return $this->bool('prefer_prompt_anchored_model_for_requested_accessory_code', true);
return $this->requiredBool('prefer_prompt_anchored_model_for_requested_accessory_code');
}
/** @return string[] */
public function getRequestedAccessoryCodeFallbackQueryTemplates(): array
{
return $this->stringList(
'requested_accessory_code_fallback_query_templates',
['{term} {code}']
);
return $this->requiredStringList('requested_accessory_code_fallback_query_templates');
}
/** @return string[] */
public function getRequestedAccessoryCodeFallbackTerms(): array
{
return $this->stringList(
'requested_accessory_code_fallback_terms',
$this->getAccessoryCandidateTerms()
);
return $this->requiredStringList('requested_accessory_code_fallback_terms');
}
/** @return string[] */
public function getRequestedAccessoryCodeContextPrefixTerms(): array
{
return $this->stringList(
'requested_accessory_code_context_prefix_terms',
$this->getAccessoryCandidateTerms()
);
return $this->requiredStringList('requested_accessory_code_context_prefix_terms');
}
public function getRequestedAccessoryCodeProximityWindow(): int
{
return $this->int('requested_accessory_code_proximity_window', 1600);
return $this->requiredPositiveInt('requested_accessory_code_proximity_window');
}
/** @return string[] */
public function getSpecificModelCandidatePatterns(): array
{
return $this->stringList(
'specific_model_candidate_patterns',
[$this->getModelLikePattern()]
);
return $this->requiredStringList('specific_model_candidate_patterns');
}
/** @return string[] */
public function getModelCandidateExcludeTerms(): array
{
return $this->stringList(
'model_candidate_exclude_terms',
array_merge($this->getAccessoryCandidateTerms(), ['verfuegbarkeit', 'verfügbarkeit', 'shop'])
);
return $this->requiredStringList('model_candidate_exclude_terms');
}
public function getTopProductLogLimit(): int
{
return 3;
return $this->requiredNonNegativeInt('limits.top_product_log_limit');
}
public function getModelCandidatePattern(): string
{
return '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u';
return $this->requiredString('patterns.model_candidate');
}
public function getAccessoryCandidatePattern(): string
{
return '/\b((?:' . implode('|', $this->getAccessoryCandidateTerms()) . ')\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu';
return $this->renderPatternTemplate(
'patterns.accessory_candidate_template',
['terms' => $this->patternAlternation($this->getAccessoryCandidateTerms())]
);
}
public function getRequestedAccessoryCodePattern(): string
{
$fallbackTerms = array_map(
static fn(string $term): string => preg_quote($term, '/'),
$this->getRequestedAccessoryCodeContextPrefixTerms()
);
$fallbackTerms = array_filter($fallbackTerms, static fn(string $term): bool => $term !== '');
$fallbackPattern = $fallbackTerms === []
? '/\b([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu'
: '/\b(?:' . implode('|', $fallbackTerms) . ')\s*([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu';
return $this->string('requested_accessory_code_pattern', $fallbackPattern);
return $this->requiredString('patterns.requested_accessory_code');
}
public function getAccessoryOrBundlePattern(): string
{
return '/\b(' . implode('|', $this->getAccessoryOrBundleTerms()) . ')\b/iu';
return $this->renderPatternTemplate(
'patterns.accessory_or_bundle_template',
['terms' => $this->patternAlternation($this->getAccessoryOrBundleTerms())]
);
}
public function getModelLikePattern(): string
{
return '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u';
return $this->requiredString('patterns.model_like');
}
public function getSpecificityBoostPattern(): string
{
return '/\b(?:' . implode('|', $this->getSpecificityBoostTerms()) . ')\b/iu';
return $this->renderPatternTemplate(
'patterns.specificity_boost_template',
['terms' => $this->patternAlternation($this->getSpecificityBoostTerms())]
);
}
/** @return string[] */
public function getGenericCandidateTokens(): array
{
return $this->stringList(
return $this->configOrVocabularyStringList(
'generic_candidate_tokens',
$this->vocabularyView('search_repair.generic_candidate_tokens', self::GENERIC_CANDIDATE_TOKENS)
'search_repair.generic_candidate_tokens'
);
}
public function getSanitizeTrimCharacters(): string
{
return " \t\n\r\0\x0B\"'`.,;:-";
return implode('', array_map(
static fn (int $code): string => chr($code),
$this->requiredCharacterCodes('sanitize_trim_character_codes')
));
}
public function getContainsDigitPattern(): string
{
return '/\d/u';
return $this->requiredString('patterns.contains_digit');
}
public function getWhitespaceCollapsePattern(): string
{
return '/\s+/u';
return $this->requiredString('patterns.whitespace_collapse');
}
public function getTokenizeCleanupPattern(): string
{
return '/[^\p{L}\p{N}\s\-]+/u';
return $this->requiredString('patterns.tokenize_cleanup');
}
public function getProductKeySeparator(): string
{
return '|';
return $this->requiredString('product_key_separator');
}
public function getCandidateDigitScore(): int
{
return 4;
return $this->requiredInt('scores.candidate_digit');
}
public function getCandidateWordCountCap(): int
{
return 4;
return $this->requiredPositiveInt('scores.candidate_word_count_cap');
}
public function getSpecificityBoostScore(): int
{
return 3;
return $this->requiredInt('scores.specificity_boost');
}
public function getPrimaryQueryOverlapThreshold(): float
{
return 0.9;
return $this->requiredFloat('scores.primary_query_overlap_threshold');
}
public function getPromptMatchWeight(): int
{
return 3;
return $this->requiredInt('scores.prompt_match_weight');
}
public function getPrimaryQueryMatchWeight(): int
{
return 2;
return $this->requiredInt('scores.primary_query_match_weight');
}
public function getRepairSignalMatchWeight(): int
{
return 4;
return $this->requiredInt('scores.repair_signal_match_weight');
}
public function getPrimaryResultOrderBonus(): int
{
return 1;
return $this->requiredInt('scores.primary_result_order_bonus');
}
public function getTokenIntersectionScore(): int
{
return 2;
return $this->requiredInt('scores.token_intersection_score');
}
public function getNumericTokenMatchScore(): int
{
return 4;
return $this->requiredInt('scores.numeric_token_match_score');
}
/** @return string[] */
public function getAccessoryCandidateTerms(): array
{
return $this->stringList(
return $this->configOrVocabularyStringList(
'accessory_candidate_terms',
$this->vocabularyView('search_repair.accessory_candidate_terms', self::ACCESSORY_CANDIDATE_TERMS)
'search_repair.accessory_candidate_terms'
);
}
/** @return string[] */
public function getAccessoryOrBundleTerms(): array
{
return $this->stringList(
return $this->configOrVocabularyStringList(
'accessory_or_bundle_terms',
$this->vocabularyView('search_repair.accessory_or_bundle_terms', self::ACCESSORY_OR_BUNDLE_TERMS)
'search_repair.accessory_or_bundle_terms'
);
}
/** @return string[] */
public function getSpecificityBoostTerms(): array
{
return $this->stringList(
return $this->configOrVocabularyStringList(
'specificity_boost_terms',
$this->vocabularyView('search_repair.specificity_boost_terms', self::SPECIFICITY_BOOST_TERMS)
'search_repair.specificity_boost_terms'
);
}
/** @return string[] */
private function vocabularyView(string $path, array $fallback): array
private function configOrVocabularyStringList(string $configKey, string $vocabularyPath): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
private function string(string $key, string $default): string
{
$value = $this->config[$key] ?? $default;
if (!is_scalar($value)) {
return $default;
if (array_key_exists($configKey, $this->config)) {
return $this->requiredStringList($configKey);
}
$value = trim((string) $value);
return $value !== '' ? $value : $default;
}
private function int(string $key, int $default): int
{
$value = $this->config[$key] ?? $default;
if (is_int($value)) {
return $value;
$items = $this->vocabulary->view($vocabularyPath, []);
if ($items === []) {
throw new \InvalidArgumentException(sprintf(
'Missing required RetrieX search repair vocabulary view "%s".',
$vocabularyPath
));
}
if (is_numeric($value)) {
return (int) $value;
}
return $default;
return $items;
}
private function bool(string $key, bool $default): bool
/** @param array<string, string> $variables */
private function renderPatternTemplate(string $path, array $variables): string
{
$value = $this->config[$key] ?? $default;
$template = $this->requiredString($path);
foreach ($variables as $key => $value) {
$template = str_replace('{' . $key . '}', $value, $template);
}
if (preg_match('/\{[A-Za-z_][A-Za-z0-9_]*\}/', $template) === 1) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair pattern template "%s" contains unresolved placeholders.', $path));
}
return $template;
}
/** @param string[] $terms */
private function patternAlternation(array $terms): string
{
$terms = array_values(array_filter(
array_map(static fn (string $term): string => trim($term), $terms),
static fn (string $term): bool => $term !== ''
));
if ($terms === []) {
throw new \InvalidArgumentException('RetrieX search repair pattern alternation requires at least one term.');
}
return implode('|', $terms);
}
private function requiredBool(string $path): bool
{
$value = $this->requiredValue($path);
if (is_bool($value)) {
return $value;
@@ -357,15 +314,80 @@ final class SearchRepairConfig
}
}
return $default;
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be boolean.', $path));
}
private function requiredString(string $path): string
{
$value = $this->requiredValue($path);
if (!is_scalar($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a string.', $path));
}
$value = trim((string) $value);
if ($value === '') {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
}
return $value;
}
private function requiredInt(string $path): int
{
$value = $this->requiredValue($path);
if (is_int($value)) {
return $value;
}
if (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
return (int) trim($value);
}
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be an integer.', $path));
}
private function requiredNonNegativeInt(string $path): int
{
$value = $this->requiredInt($path);
if ($value < 0) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be greater than or equal to 0.', $path));
}
return $value;
}
private function requiredPositiveInt(string $path): int
{
$value = $this->requiredInt($path);
if ($value <= 0) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be greater than 0.', $path));
}
return $value;
}
private function requiredFloat(string $path): float
{
$value = $this->requiredValue($path);
if (is_int($value) || is_float($value) || (is_string($value) && is_numeric(trim($value)))) {
return (float) $value;
}
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be numeric.', $path));
}
/** @return string[] */
private function stringList(string $key, array $default): array
private function requiredStringList(string $path): array
{
$value = $this->config[$key] ?? $default;
$value = $this->requiredValue($path);
if (!is_array($value)) {
return $default;
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a list.', $path));
}
$out = [];
@@ -382,6 +404,57 @@ final class SearchRepairConfig
$out[] = $item;
}
return $out !== [] ? $out : $default;
if ($out === []) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
}
return $out;
}
/** @return int[] */
private function requiredCharacterCodes(string $path): array
{
$value = $this->requiredValue($path);
if (!is_array($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a list of character codes.', $path));
}
$codes = [];
foreach ($value as $item) {
if (is_int($item)) {
$code = $item;
} elseif (is_string($item) && preg_match('/^\d+$/', trim($item)) === 1) {
$code = (int) trim($item);
} else {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" contains a non-integer character code.', $path));
}
if ($code < 0 || $code > 255) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" contains an invalid character code.', $path));
}
$codes[] = $code;
}
if ($codes === []) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
}
return $codes;
}
private function requiredValue(string $path): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
throw new \InvalidArgumentException(sprintf('Missing required RetrieX search repair config key "%s".', $path));
}
$current = $current[$segment];
}
return $current;
}
}