544 lines
17 KiB
PHP
544 lines
17 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Config;
|
|
|
|
/**
|
|
* YAML-backed shop search-repair configuration.
|
|
*
|
|
* This class intentionally has no PHP fallback values. Missing or invalid
|
|
* configuration must be fixed in config/retriex/search_repair.yaml and
|
|
* config/retriex/vocabulary.yaml.
|
|
*/
|
|
final class SearchRepairConfig
|
|
{
|
|
/**
|
|
* @param array<string, mixed> $config
|
|
*/
|
|
public function __construct(
|
|
private readonly bool $enabled,
|
|
private readonly int $maxRepairQueries,
|
|
private readonly int $minPrimaryResultsWithoutRepair,
|
|
private readonly array $config,
|
|
private readonly DomainVocabularyConfig $vocabulary,
|
|
private readonly ?GenreConfig $genreConfig = null,
|
|
) {
|
|
}
|
|
|
|
public function isEnabled(): bool
|
|
{
|
|
return $this->enabled;
|
|
}
|
|
|
|
public function getMaxRepairQueries(): int
|
|
{
|
|
return $this->maxRepairQueries;
|
|
}
|
|
|
|
public function getMinPrimaryResultsWithoutRepair(): int
|
|
{
|
|
return $this->minPrimaryResultsWithoutRepair;
|
|
}
|
|
|
|
public function shouldRestrictRequestedAccessoryCodeRepair(): bool
|
|
{
|
|
return $this->requiredBool('strict_requested_accessory_code_repair');
|
|
}
|
|
|
|
public function shouldPreferPromptAnchoredModelForRequestedAccessoryCode(): bool
|
|
{
|
|
return $this->requiredBool('prefer_prompt_anchored_model_for_requested_accessory_code');
|
|
}
|
|
|
|
public function isDirectProductAttributeLookupRepairEnabled(): bool
|
|
{
|
|
return $this->genreBool('search_repair.direct_product_attribute_lookup.enabled')
|
|
?? $this->requiredBool('direct_product_attribute_lookup.enabled');
|
|
}
|
|
|
|
public function getDirectProductAttributeLookupMinTokens(): int
|
|
{
|
|
$genreValue = $this->genreInt('search_repair.direct_product_attribute_lookup.min_query_tokens_after_cleanup');
|
|
|
|
return $genreValue !== null && $genreValue > 0
|
|
? $genreValue
|
|
: $this->requiredPositiveInt('direct_product_attribute_lookup.min_query_tokens_after_cleanup');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getDirectProductAttributeLookupProductTypeTerms(): array
|
|
{
|
|
return $this->genreStringList('product_attributes.direct_attribute_cleanup.product_type_terms')
|
|
?: $this->configOrVocabularyStringList(
|
|
'direct_product_attribute_lookup.product_type_terms',
|
|
'search_repair.direct_product_type_terms'
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getDirectProductAttributeLookupStopTerms(): array
|
|
{
|
|
return $this->genreStringList('product_attributes.direct_attribute_cleanup.stop_terms')
|
|
?: $this->configOrVocabularyStringList(
|
|
'direct_product_attribute_lookup.stop_terms',
|
|
'search_repair.direct_product_attribute_stop_terms'
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getDirectProductAttributeLookupComparativeConstraintPatterns(): array
|
|
{
|
|
return $this->genreStringList('product_attributes.direct_attribute_cleanup.comparative_constraint_patterns')
|
|
?: $this->requiredStringList('direct_product_attribute_lookup.comparative_constraint_patterns');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRequestedAccessoryCodeFallbackQueryTemplates(): array
|
|
{
|
|
return $this->requiredStringList('requested_accessory_code_fallback_query_templates');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRequestedAccessoryCodeFallbackTerms(): array
|
|
{
|
|
return $this->configOrVocabularyStringList(
|
|
'requested_accessory_code_fallback_terms',
|
|
'search_repair.requested_accessory_code_terms'
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRequestedAccessoryCodeContextPrefixTerms(): array
|
|
{
|
|
return $this->configOrVocabularyStringList(
|
|
'requested_accessory_code_context_prefix_terms',
|
|
'search_repair.requested_accessory_code_terms'
|
|
);
|
|
}
|
|
|
|
public function getRequestedAccessoryCodeProximityWindow(): int
|
|
{
|
|
return $this->requiredPositiveInt('requested_accessory_code_proximity_window');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getSpecificModelCandidatePatterns(): array
|
|
{
|
|
return $this->genreStringList('search_repair.candidate_patterns.specific_model_candidate_patterns')
|
|
?: $this->requiredStringList('specific_model_candidate_patterns');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getModelCandidateExcludeTerms(): array
|
|
{
|
|
return $this->configOrVocabularyStringList(
|
|
'model_candidate_exclude_terms',
|
|
'search_repair.model_candidate_exclude_terms'
|
|
);
|
|
}
|
|
|
|
public function getTopProductLogLimit(): int
|
|
{
|
|
return $this->requiredNonNegativeInt('limits.top_product_log_limit');
|
|
}
|
|
|
|
public function getModelCandidatePattern(): string
|
|
{
|
|
return $this->genreString('search_repair.candidate_patterns.patterns.model_candidate')
|
|
?: $this->requiredString('patterns.model_candidate');
|
|
}
|
|
|
|
public function getAccessoryCandidatePattern(): string
|
|
{
|
|
return $this->renderPatternTemplate(
|
|
'patterns.accessory_candidate_template',
|
|
['terms' => $this->patternAlternation($this->getAccessoryCandidateTerms())],
|
|
'search_repair.candidate_patterns.patterns.accessory_candidate_template'
|
|
);
|
|
}
|
|
|
|
public function getRequestedAccessoryCodePattern(): string
|
|
{
|
|
return $this->genreString('search_repair.candidate_patterns.patterns.requested_accessory_code')
|
|
?: $this->requiredString('patterns.requested_accessory_code');
|
|
}
|
|
|
|
public function getAccessoryOrBundlePattern(): string
|
|
{
|
|
return $this->renderPatternTemplate(
|
|
'patterns.accessory_or_bundle_template',
|
|
['terms' => $this->patternAlternation($this->getAccessoryOrBundleTerms())],
|
|
'search_repair.candidate_patterns.patterns.accessory_or_bundle_template'
|
|
);
|
|
}
|
|
|
|
public function getModelLikePattern(): string
|
|
{
|
|
return $this->genreString('search_repair.candidate_patterns.patterns.model_like')
|
|
?: $this->requiredString('patterns.model_like');
|
|
}
|
|
|
|
public function getSpecificityBoostPattern(): string
|
|
{
|
|
return $this->renderPatternTemplate(
|
|
'patterns.specificity_boost_template',
|
|
['terms' => $this->patternAlternation($this->getSpecificityBoostTerms())],
|
|
'search_repair.candidate_patterns.patterns.specificity_boost_template'
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getGenericCandidateTokens(): array
|
|
{
|
|
return $this->configOrVocabularyStringList(
|
|
'generic_candidate_tokens',
|
|
'search_repair.generic_candidate_tokens'
|
|
);
|
|
}
|
|
|
|
public function getSanitizeTrimCharacters(): string
|
|
{
|
|
return implode('', array_map(
|
|
static fn (int $code): string => chr($code),
|
|
$this->requiredCharacterCodes('sanitize_trim_character_codes')
|
|
));
|
|
}
|
|
|
|
public function getContainsDigitPattern(): string
|
|
{
|
|
return $this->requiredString('patterns.contains_digit');
|
|
}
|
|
|
|
public function getWhitespaceCollapsePattern(): string
|
|
{
|
|
return $this->requiredString('patterns.whitespace_collapse');
|
|
}
|
|
|
|
public function getTokenizeCleanupPattern(): string
|
|
{
|
|
return $this->requiredString('patterns.tokenize_cleanup');
|
|
}
|
|
|
|
public function getProductKeySeparator(): string
|
|
{
|
|
return $this->requiredString('product_key_separator');
|
|
}
|
|
|
|
public function getCandidateDigitScore(): int
|
|
{
|
|
return $this->requiredInt('scores.candidate_digit');
|
|
}
|
|
|
|
public function getCandidateWordCountCap(): int
|
|
{
|
|
return $this->requiredPositiveInt('scores.candidate_word_count_cap');
|
|
}
|
|
|
|
public function getSpecificityBoostScore(): int
|
|
{
|
|
return $this->requiredInt('scores.specificity_boost');
|
|
}
|
|
|
|
public function getPrimaryQueryOverlapThreshold(): float
|
|
{
|
|
return $this->requiredFloat('scores.primary_query_overlap_threshold');
|
|
}
|
|
|
|
public function getPromptMatchWeight(): int
|
|
{
|
|
return $this->requiredInt('scores.prompt_match_weight');
|
|
}
|
|
|
|
public function getPrimaryQueryMatchWeight(): int
|
|
{
|
|
return $this->requiredInt('scores.primary_query_match_weight');
|
|
}
|
|
|
|
public function getRepairSignalMatchWeight(): int
|
|
{
|
|
return $this->requiredInt('scores.repair_signal_match_weight');
|
|
}
|
|
|
|
public function getPrimaryResultOrderBonus(): int
|
|
{
|
|
return $this->requiredInt('scores.primary_result_order_bonus');
|
|
}
|
|
|
|
public function getTokenIntersectionScore(): int
|
|
{
|
|
return $this->requiredInt('scores.token_intersection_score');
|
|
}
|
|
|
|
public function getNumericTokenMatchScore(): int
|
|
{
|
|
return $this->requiredInt('scores.numeric_token_match_score');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getAccessoryCandidateTerms(): array
|
|
{
|
|
return $this->configOrVocabularyStringList(
|
|
'accessory_candidate_terms',
|
|
'search_repair.accessory_candidate_terms'
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getAccessoryOrBundleTerms(): array
|
|
{
|
|
return $this->configOrVocabularyStringList(
|
|
'accessory_or_bundle_terms',
|
|
'search_repair.accessory_or_bundle_terms'
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getSpecificityBoostTerms(): array
|
|
{
|
|
return $this->configOrVocabularyStringList(
|
|
'specificity_boost_terms',
|
|
'search_repair.specificity_boost_terms'
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function genreStringList(string $path): array
|
|
{
|
|
return $this->genreConfig?->getValueStringList($path) ?? [];
|
|
}
|
|
|
|
private function genreString(string $path): string
|
|
{
|
|
return $this->genreConfig?->getValueString($path) ?? '';
|
|
}
|
|
|
|
private function genreBool(string $path): ?bool
|
|
{
|
|
return $this->genreConfig?->getValueBool($path);
|
|
}
|
|
|
|
private function genreInt(string $path): ?int
|
|
{
|
|
return $this->genreConfig?->getValueInt($path);
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function configOrVocabularyStringList(string $configKey, string $vocabularyPath): array
|
|
{
|
|
if (array_key_exists($configKey, $this->config)) {
|
|
return $this->requiredStringList($configKey);
|
|
}
|
|
|
|
$items = $this->vocabulary->view($vocabularyPath, []);
|
|
if ($items === []) {
|
|
throw new \InvalidArgumentException(sprintf(
|
|
'Missing required RetrieX search repair vocabulary view "%s".',
|
|
$vocabularyPath
|
|
));
|
|
}
|
|
|
|
return $items;
|
|
}
|
|
|
|
/** @param array<string, string> $variables */
|
|
private function renderPatternTemplate(string $path, array $variables, ?string $genrePath = null): string
|
|
{
|
|
$template = $genrePath !== null ? $this->genreString($genrePath) : '';
|
|
if ($template === '') {
|
|
$template = $this->requiredString($path);
|
|
}
|
|
|
|
foreach ($variables as $key => $value) {
|
|
$template = str_replace('{' . $key . '}', $value, $template);
|
|
}
|
|
|
|
if (preg_match('/\{[A-Za-z_][A-Za-z0-9_]*\}/', $template) === 1) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair pattern template "%s" contains unresolved placeholders.', $path));
|
|
}
|
|
|
|
return $template;
|
|
}
|
|
|
|
/** @param string[] $terms */
|
|
private function patternAlternation(array $terms): string
|
|
{
|
|
$terms = array_values(array_filter(
|
|
array_map(static fn (string $term): string => trim($term), $terms),
|
|
static fn (string $term): bool => $term !== ''
|
|
));
|
|
|
|
if ($terms === []) {
|
|
throw new \InvalidArgumentException('RetrieX search repair pattern alternation requires at least one term.');
|
|
}
|
|
|
|
return implode('|', $terms);
|
|
}
|
|
|
|
private function requiredBool(string $path): bool
|
|
{
|
|
$value = $this->requiredValue($path);
|
|
|
|
if (is_bool($value)) {
|
|
return $value;
|
|
}
|
|
|
|
if (is_int($value)) {
|
|
return $value !== 0;
|
|
}
|
|
|
|
if (is_string($value)) {
|
|
$normalized = strtolower(trim($value));
|
|
if (in_array($normalized, ['1', 'true', 'yes', 'on'], true)) {
|
|
return true;
|
|
}
|
|
if (in_array($normalized, ['0', 'false', 'no', 'off'], true)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be boolean.', $path));
|
|
}
|
|
|
|
private function requiredString(string $path): string
|
|
{
|
|
$value = $this->requiredValue($path);
|
|
|
|
if (!is_scalar($value)) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a string.', $path));
|
|
}
|
|
|
|
$value = trim((string) $value);
|
|
if ($value === '') {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
private function requiredInt(string $path): int
|
|
{
|
|
$value = $this->requiredValue($path);
|
|
|
|
if (is_int($value)) {
|
|
return $value;
|
|
}
|
|
|
|
if (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
|
|
return (int) trim($value);
|
|
}
|
|
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be an integer.', $path));
|
|
}
|
|
|
|
private function requiredNonNegativeInt(string $path): int
|
|
{
|
|
$value = $this->requiredInt($path);
|
|
|
|
if ($value < 0) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be greater than or equal to 0.', $path));
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
private function requiredPositiveInt(string $path): int
|
|
{
|
|
$value = $this->requiredInt($path);
|
|
|
|
if ($value <= 0) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be greater than 0.', $path));
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
private function requiredFloat(string $path): float
|
|
{
|
|
$value = $this->requiredValue($path);
|
|
|
|
if (is_int($value) || is_float($value) || (is_string($value) && is_numeric(trim($value)))) {
|
|
return (float) $value;
|
|
}
|
|
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be numeric.', $path));
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function requiredStringList(string $path): array
|
|
{
|
|
$value = $this->requiredValue($path);
|
|
|
|
if (!is_array($value)) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a list.', $path));
|
|
}
|
|
|
|
$out = [];
|
|
foreach ($value as $item) {
|
|
if (!is_scalar($item)) {
|
|
continue;
|
|
}
|
|
|
|
$item = trim((string) $item);
|
|
if ($item === '' || in_array($item, $out, true)) {
|
|
continue;
|
|
}
|
|
|
|
$out[] = $item;
|
|
}
|
|
|
|
if ($out === []) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
/** @return int[] */
|
|
private function requiredCharacterCodes(string $path): array
|
|
{
|
|
$value = $this->requiredValue($path);
|
|
|
|
if (!is_array($value)) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must be a list of character codes.', $path));
|
|
}
|
|
|
|
$codes = [];
|
|
foreach ($value as $item) {
|
|
if (is_int($item)) {
|
|
$code = $item;
|
|
} elseif (is_string($item) && preg_match('/^\d+$/', trim($item)) === 1) {
|
|
$code = (int) trim($item);
|
|
} else {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" contains a non-integer character code.', $path));
|
|
}
|
|
|
|
if ($code < 0 || $code > 255) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" contains an invalid character code.', $path));
|
|
}
|
|
|
|
$codes[] = $code;
|
|
}
|
|
|
|
if ($codes === []) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX search repair config key "%s" must not be empty.', $path));
|
|
}
|
|
|
|
return $codes;
|
|
}
|
|
|
|
private function requiredValue(string $path): mixed
|
|
{
|
|
$current = $this->config;
|
|
foreach (explode('.', $path) as $segment) {
|
|
if (!is_array($current) || !array_key_exists($segment, $current)) {
|
|
throw new \InvalidArgumentException(sprintf('Missing required RetrieX search repair config key "%s".', $path));
|
|
}
|
|
|
|
$current = $current[$segment];
|
|
}
|
|
|
|
return $current;
|
|
}
|
|
}
|