$config */ public function __construct( private readonly array $config = [], private readonly ?DomainVocabularyConfig $vocabulary = null, ) { } public function getCleanupProfile(): string { return $this->string('cleanup_profile'); } /** @return string[] */ public function getKnownBrands(): array { return $this->stringList('known_brands'); } /** @return string[] */ public function getPhrasesToRemove(): array { return $this->stringList('phrases_to_remove'); } public function getHistoryContextPattern(): string { return $this->string('patterns.history_context'); } public function getHistoryContextValuePattern(): string { return $this->renderTemplate( $this->string('patterns.history_context_value_template'), ['fragment' => $this->getHistoryContextPattern()], 'patterns.history_context_value_template' ); } /** @return string[] */ public function getFilterSearchTokens(): array { return $this->stringList('filter_search_tokens'); } /** @return string[] */ public function getSearchControlTokens(): array { return $this->stringList('search_control_tokens'); } /** @return string[] */ private function whitespacePreservingStringList(string $path): array { $value = $this->value($path); if (!is_array($value)) { throw $this->invalid($path, 'must be a list of non-empty strings'); } $out = []; foreach ($value as $item) { if (!is_scalar($item)) { continue; } $item = (string) $item; if (trim($item) === '' || in_array($item, $out, true)) { continue; } $out[] = $item; } if ($out === []) { throw $this->invalid($path, 'must contain at least one non-empty string'); } return $out; } /** @return array */ public function getSearchTokenCorrections(): array { return $this->stringMap('search_token_corrections'); } /** @return array */ public function getSearchTokenCanonicalMap(): array { return $this->stringMap('search_token_canonical_map'); } /** * Backward-compatible alias for older callers. * * @return string[] */ public function getFilterSearchTokensPattern(): array { return $this->getFilterSearchTokens(); } /** @return string[] */ public function getNormalizationSearch(): array { return $this->whitespacePreservingStringList('normalization.search'); } /** @return string[] */ public function getNormalizationReplace(): array { return $this->whitespacePreservingStringList('normalization.replace'); } public function getPromptSanitizePattern(): string { return $this->string('patterns.prompt_sanitize'); } public function getWhitespaceCollapsePattern(): string { return $this->string('patterns.whitespace_collapse'); } public function getWhitespaceSplitPattern(): string { return $this->string('patterns.whitespace_split'); } public function getSearchTextTrimCharacters(): string { $characters = ''; foreach ($this->stringList('text.trim_characters') as $item) { $characters .= match ($item) { 'space' => ' ', 'tab' => "\t", 'lf' => "\n", 'cr' => "\r", 'nul' => "\0", 'vertical_tab' => "\x0B", default => $item, }; } return $characters; } public function getMinSearchTokenLength(): int { return $this->int('limits.min_search_token_length'); } public function getMinDirectProductTokenLength(): int { return $this->int('limits.min_direct_product_token_length'); } public function getHistoryQuestionPattern(): string { return $this->string('patterns.history_question'); } public function getPriceBetweenPattern(): string { return $this->string('patterns.price_between'); } public function getPriceMaxPattern(): string { return $this->string('patterns.price_max'); } public function getPriceMinPattern(): string { return $this->string('patterns.price_min'); } /** @return string[] */ public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array { return [ $this->string('patterns.price_removal_between'), $this->string('patterns.price_removal_minmax'), $this->renderTemplate( $this->string('patterns.price_removal_intent_template'), ['price_pattern' => $intentConfig->getPricePattern()], 'patterns.price_removal_intent_template' ), ]; } public function getDirectProductDigitPattern(): string { return $this->string('patterns.direct_product_digit'); } public function getExactProductNumberSearchTextPattern(): string { return $this->string('patterns.exact_product_number_search_text'); } public function getDirectProductMaxTokens(): int { return $this->int('limits.direct_product_max_tokens'); } public function getModelLikePattern(): string { return $this->string('patterns.model_like'); } public function getAccessoryLikePattern(): string { return $this->string('patterns.accessory_like'); } public function getContainsDigitPattern(): string { return $this->string('patterns.contains_digit'); } public function getModelNumberTokenPattern(): string { return $this->string('patterns.model_number_token'); } public function getModelContextTokenPattern(): string { return $this->string('patterns.model_context_token'); } public function getModelSuffixTokenPattern(): string { return $this->string('patterns.model_suffix_token'); } public function getModelContextTokenWindow(): int { return $this->int('limits.model_context_token_window'); } public function getMinMeaningfulAlphaTokenLength(): int { return $this->int('limits.min_meaningful_alpha_token_length'); } public function getMaxShopSearchTokens(): int { return $this->int('limits.max_shop_search_tokens'); } public function getInstructionOrPresentationTokenPattern(): string { return $this->string('patterns.instruction_or_presentation_token'); } public function getMeasurementValueTokenPattern(): string { return $this->string('patterns.measurement_value_token'); } /** @return string[] */ public function getSemanticShopSearchTokens(): array { return $this->configuredStringListOrVocabularyView( 'semantic_shop_search_tokens', 'vocabulary_views.semantic_shop_search_tokens' ); } public function buildExactTokenRemovalPattern(string $token): string { return $this->renderTemplate( $this->string('patterns.exact_token_removal_template'), ['token' => preg_quote($token, '/')], 'patterns.exact_token_removal_template' ); } public function buildBrandPartOfModelPattern(string $brand): string { return $this->renderTemplate( $this->string('patterns.brand_part_of_model_template'), ['brand' => preg_quote($brand, '/')], 'patterns.brand_part_of_model_template' ); } /** @return string[] */ private function stringList(string $path): array { $value = $this->value($path); if (!is_array($value)) { throw $this->invalid($path, 'must be a list of non-empty strings'); } $out = []; foreach ($value as $item) { if (!is_scalar($item)) { continue; } $item = trim((string) $item); if ($item === '' || in_array($item, $out, true)) { continue; } $out[] = $item; } if ($out === []) { throw $this->invalid($path, 'must contain at least one non-empty string'); } return $out; } /** @return string[] */ private function configuredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array { if ($this->hasPath($configPath)) { return $this->stringList($configPath); } if ($this->vocabulary === null) { throw $this->missing($configPath); } $viewPath = $this->string($viewPathConfigPath); $terms = $this->vocabulary->view($viewPath, []); if ($terms === []) { throw $this->invalid($viewPathConfigPath, sprintf('references empty vocabulary view "%s"', $viewPath)); } return $terms; } /** @return array */ private function stringMap(string $path): array { $value = $this->value($path); if (!is_array($value)) { throw $this->invalid($path, 'must be a map of non-empty strings'); } $out = []; foreach ($value as $key => $item) { if (!is_scalar($key) || !is_scalar($item)) { continue; } $cleanKey = trim((string) $key); $cleanValue = trim((string) $item); if ($cleanKey !== '' && $cleanValue !== '') { $out[$cleanKey] = $cleanValue; } } if ($out === []) { throw $this->invalid($path, 'must contain at least one non-empty mapping'); } return $out; } private function string(string $path): string { $value = $this->value($path); if (!is_scalar($value)) { throw $this->invalid($path, 'must be a non-empty string'); } $value = (string) $value; if ($value === '') { throw $this->invalid($path, 'must be a non-empty string'); } return $value; } private function int(string $path): int { $value = $this->value($path); if (!is_int($value)) { throw $this->invalid($path, 'must be an integer'); } return $value; } private function hasPath(string $path): bool { $current = $this->config; foreach (explode('.', $path) as $segment) { if (!is_array($current) || !array_key_exists($segment, $current)) { return false; } $current = $current[$segment]; } return true; } private function value(string $path): mixed { $current = $this->config; foreach (explode('.', $path) as $segment) { if (!is_array($current) || !array_key_exists($segment, $current)) { throw $this->missing($path); } $current = $current[$segment]; } return $current; } /** * @param array $replacements */ private function renderTemplate(string $template, array $replacements, string $path): string { foreach ($replacements as $placeholder => $value) { $template = str_replace('{' . $placeholder . '}', $value, $template); } if (preg_match('/\{[A-Za-z_][A-Za-z0-9_]*\}/', $template) === 1) { throw $this->invalid($path, 'contains unresolved placeholders'); } return $template; } private function missing(string $path): InvalidArgumentException { return new InvalidArgumentException(sprintf('RetrieX commerce query config "%s" is missing.', $path)); } private function invalid(string $path, string $reason): InvalidArgumentException { return new InvalidArgumentException(sprintf('RetrieX commerce query config "%s" %s.', $path, $reason)); } }