Files
MtoRagSystem/src/Config/SearchRepairConfig.php
team 1 de12386a98 p42
2026-05-05 08:16:45 +02:00

492 lines
15 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Config;
/**
* YAML-backed shop search-repair configuration.
*
* This class intentionally has no PHP fallback values. Missing or invalid
* configuration must be fixed in config/retriex/search_repair.yaml and
* config/retriex/vocabulary.yaml.
*/
final class SearchRepairConfig
{
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly bool $enabled,
private readonly int $maxRepairQueries,
private readonly int $minPrimaryResultsWithoutRepair,
private readonly array $config,
private readonly DomainVocabularyConfig $vocabulary,
) {
}
public function isEnabled(): bool
{
return $this->enabled;
}
public function getMaxRepairQueries(): int
{
return $this->maxRepairQueries;
}
public function getMinPrimaryResultsWithoutRepair(): int
{
return $this->minPrimaryResultsWithoutRepair;
}
public function shouldRestrictRequestedAccessoryCodeRepair(): bool
{
return $this->requiredBool('strict_requested_accessory_code_repair');
}
public function shouldPreferPromptAnchoredModelForRequestedAccessoryCode(): bool
{
return $this->requiredBool('prefer_prompt_anchored_model_for_requested_accessory_code');
}
public function isDirectProductAttributeLookupRepairEnabled(): bool
{
return $this->requiredBool('direct_product_attribute_lookup.enabled');
}
public function getDirectProductAttributeLookupMinTokens(): int
{
return $this->requiredPositiveInt('direct_product_attribute_lookup.min_query_tokens_after_cleanup');
}
/** @return string[] */
public function getDirectProductAttributeLookupProductTypeTerms(): array
{
return $this->configOrVocabularyStringList(
'direct_product_attribute_lookup.product_type_terms',
'search_repair.direct_product_type_terms'
);
}
/** @return string[] */
public function getDirectProductAttributeLookupStopTerms(): array
{
return $this->requiredStringList('direct_product_attribute_lookup.stop_terms');
}
/** @return string[] */
public function getDirectProductAttributeLookupComparativeConstraintPatterns(): array
{
return $this->requiredStringList('direct_product_attribute_lookup.comparative_constraint_patterns');
}
/** @return string[] */
public function getRequestedAccessoryCodeFallbackQueryTemplates(): array
{
return $this->requiredStringList('requested_accessory_code_fallback_query_templates');
}
/** @return string[] */
public function getRequestedAccessoryCodeFallbackTerms(): array
{
return $this->requiredStringList('requested_accessory_code_fallback_terms');
}
/** @return string[] */
public function getRequestedAccessoryCodeContextPrefixTerms(): array
{
return $this->requiredStringList('requested_accessory_code_context_prefix_terms');
}
public function getRequestedAccessoryCodeProximityWindow(): int
{
return $this->requiredPositiveInt('requested_accessory_code_proximity_window');
}
/** @return string[] */
public function getSpecificModelCandidatePatterns(): array
{
return $this->requiredStringList('specific_model_candidate_patterns');
}
/** @return string[] */
public function getModelCandidateExcludeTerms(): array
{
return $this->requiredStringList('model_candidate_exclude_terms');
}
public function getTopProductLogLimit(): int
{
return $this->requiredNonNegativeInt('limits.top_product_log_limit');
}
public function getModelCandidatePattern(): string
{
return $this->requiredString('patterns.model_candidate');
}
public function getAccessoryCandidatePattern(): string
{
return $this->renderPatternTemplate(
'patterns.accessory_candidate_template',
['terms' => $this->patternAlternation($this->getAccessoryCandidateTerms())]
);
}
public function getRequestedAccessoryCodePattern(): string
{
return $this->requiredString('patterns.requested_accessory_code');
}
public function getAccessoryOrBundlePattern(): string
{
return $this->renderPatternTemplate(
'patterns.accessory_or_bundle_template',
['terms' => $this->patternAlternation($this->getAccessoryOrBundleTerms())]
);
}
public function getModelLikePattern(): string
{
return $this->requiredString('patterns.model_like');
}
public function getSpecificityBoostPattern(): string
{
return $this->renderPatternTemplate(
'patterns.specificity_boost_template',
['terms' => $this->patternAlternation($this->getSpecificityBoostTerms())]
);
}
/** @return string[] */
public function getGenericCandidateTokens(): array
{
return $this->configOrVocabularyStringList(
'generic_candidate_tokens',
'search_repair.generic_candidate_tokens'
);
}
public function getSanitizeTrimCharacters(): string
{
return implode('', array_map(
static fn (int $code): string => chr($code),
$this->requiredCharacterCodes('sanitize_trim_character_codes')
));
}
public function getContainsDigitPattern(): string
{
return $this->requiredString('patterns.contains_digit');
}
public function getWhitespaceCollapsePattern(): string
{
return $this->requiredString('patterns.whitespace_collapse');
}
public function getTokenizeCleanupPattern(): string
{
return $this->requiredString('patterns.tokenize_cleanup');
}
public function getProductKeySeparator(): string
{
return $this->requiredString('product_key_separator');
}
public function getCandidateDigitScore(): int
{
return $this->requiredInt('scores.candidate_digit');
}
public function getCandidateWordCountCap(): int
{
return $this->requiredPositiveInt('scores.candidate_word_count_cap');
}
public function getSpecificityBoostScore(): int
{
return $this->requiredInt('scores.specificity_boost');
}
public function getPrimaryQueryOverlapThreshold(): float
{
return $this->requiredFloat('scores.primary_query_overlap_threshold');
}
public function getPromptMatchWeight(): int
{
return $this->requiredInt('scores.prompt_match_weight');
}
public function getPrimaryQueryMatchWeight(): int
{
return $this->requiredInt('scores.primary_query_match_weight');
}
public function getRepairSignalMatchWeight(): int
{
return $this->requiredInt('scores.repair_signal_match_weight');
}
public function getPrimaryResultOrderBonus(): int
{
return $this->requiredInt('scores.primary_result_order_bonus');
}
public function getTokenIntersectionScore(): int
{
return $this->requiredInt('scores.token_intersection_score');
}
public function getNumericTokenMatchScore(): int
{
return $this->requiredInt('scores.numeric_token_match_score');
}
/** @return string[] */
public function getAccessoryCandidateTerms(): array
{
return $this->configOrVocabularyStringList(
'accessory_candidate_terms',
'search_repair.accessory_candidate_terms'
);
}
/** @return string[] */
public function getAccessoryOrBundleTerms(): array
{
return $this->configOrVocabularyStringList(
'accessory_or_bundle_terms',
'search_repair.accessory_or_bundle_terms'
);
}
/** @return string[] */
public function getSpecificityBoostTerms(): array
{
return $this->configOrVocabularyStringList(
'specificity_boost_terms',
'search_repair.specificity_boost_terms'
);
}
/** @return string[] */
private function configOrVocabularyStringList(string $configKey, string $vocabularyPath): array
{
if (array_key_exists($configKey, $this->config)) {
return $this->requiredStringList($configKey);
}
$items = $this->vocabulary->view($vocabularyPath, []);
if ($items === []) {
throw new \InvalidArgumentException(sprintf(
'Missing required RetrieX search repair vocabulary view "%s".',
$vocabularyPath
));
}
return $items;
}
/** @param array<string, string> $variables */
private function renderPatternTemplate(string $path, array $variables): string
{
$template = $this->requiredString($path);
foreach ($variables as $key => $value) {
$template = str_replace('{' . $key . '}', $value, $template);
}
if (preg_match('/\{[A-Za-z_][A-Za-z0-9_]*\}/', $template) === 1) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair pattern template "%s" contains unresolved placeholders.', $path));
}
return $template;
}
/** @param string[] $terms */
private function patternAlternation(array $terms): string
{
$terms = array_values(array_filter(
array_map(static fn (string $term): string => trim($term), $terms),
static fn (string $term): bool => $term !== ''
));
if ($terms === []) {
throw new \InvalidArgumentException('RetrieX search repair pattern alternation requires at least one term.');
}
return implode('|', $terms);
}
private function requiredBool(string $path): bool
{
$value = $this->requiredValue($path);
if (is_bool($value)) {
return $value;
}
if (is_int($value)) {
return $value !== 0;
}
if (is_string($value)) {
$normalized = strtolower(trim($value));
if (in_array($normalized, ['1', 'true', 'yes', 'on'], true)) {
return true;
}
if (in_array($normalized, ['0', 'false', 'no', 'off'], true)) {
return false;
}
}
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be boolean.', $path));
}
private function requiredString(string $path): string
{
$value = $this->requiredValue($path);
if (!is_scalar($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a string.', $path));
}
$value = trim((string) $value);
if ($value === '') {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
}
return $value;
}
private function requiredInt(string $path): int
{
$value = $this->requiredValue($path);
if (is_int($value)) {
return $value;
}
if (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
return (int) trim($value);
}
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be an integer.', $path));
}
private function requiredNonNegativeInt(string $path): int
{
$value = $this->requiredInt($path);
if ($value < 0) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be greater than or equal to 0.', $path));
}
return $value;
}
private function requiredPositiveInt(string $path): int
{
$value = $this->requiredInt($path);
if ($value <= 0) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be greater than 0.', $path));
}
return $value;
}
private function requiredFloat(string $path): float
{
$value = $this->requiredValue($path);
if (is_int($value) || is_float($value) || (is_string($value) && is_numeric(trim($value)))) {
return (float) $value;
}
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be numeric.', $path));
}
/** @return string[] */
private function requiredStringList(string $path): array
{
$value = $this->requiredValue($path);
if (!is_array($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a list.', $path));
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
if ($out === []) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
}
return $out;
}
/** @return int[] */
private function requiredCharacterCodes(string $path): array
{
$value = $this->requiredValue($path);
if (!is_array($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a list of character codes.', $path));
}
$codes = [];
foreach ($value as $item) {
if (is_int($item)) {
$code = $item;
} elseif (is_string($item) && preg_match('/^\d+$/', trim($item)) === 1) {
$code = (int) trim($item);
} else {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" contains a non-integer character code.', $path));
}
if ($code < 0 || $code > 255) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" contains an invalid character code.', $path));
}
$codes[] = $code;
}
if ($codes === []) {
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
}
return $codes;
}
private function requiredValue(string $path): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
throw new \InvalidArgumentException(sprintf('Missing required RetrieX search repair config key "%s".', $path));
}
$current = $current[$segment];
}
return $current;
}
}