459 lines
12 KiB
PHP
459 lines
12 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Config;
|
|
|
|
use InvalidArgumentException;
|
|
|
|
final class CommerceQueryParserConfig
|
|
{
|
|
/**
|
|
* @param array<string, mixed> $config
|
|
*/
|
|
public function __construct(
|
|
private readonly array $config = [],
|
|
private readonly ?DomainVocabularyConfig $vocabulary = null,
|
|
) {
|
|
}
|
|
|
|
|
|
public function getCleanupProfile(): string
|
|
{
|
|
return $this->string('cleanup_profile');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getKnownBrands(): array
|
|
{
|
|
return $this->stringList('known_brands');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getPhrasesToRemove(): array
|
|
{
|
|
return $this->stringList('phrases_to_remove');
|
|
}
|
|
|
|
public function getHistoryContextPattern(): string
|
|
{
|
|
return $this->string('patterns.history_context');
|
|
}
|
|
|
|
public function getHistoryContextValuePattern(): string
|
|
{
|
|
return $this->renderTemplate(
|
|
$this->string('patterns.history_context_value_template'),
|
|
['fragment' => $this->getHistoryContextPattern()],
|
|
'patterns.history_context_value_template'
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getFilterSearchTokens(): array
|
|
{
|
|
return $this->stringList('filter_search_tokens');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getSearchControlTokens(): array
|
|
{
|
|
return $this->stringList('search_control_tokens');
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function whitespacePreservingStringList(string $path): array
|
|
{
|
|
$value = $this->value($path);
|
|
if (!is_array($value)) {
|
|
throw $this->invalid($path, 'must be a list of non-empty strings');
|
|
}
|
|
|
|
$out = [];
|
|
foreach ($value as $item) {
|
|
if (!is_scalar($item)) {
|
|
continue;
|
|
}
|
|
|
|
$item = (string) $item;
|
|
if (trim($item) === '' || in_array($item, $out, true)) {
|
|
continue;
|
|
}
|
|
|
|
$out[] = $item;
|
|
}
|
|
|
|
if ($out === []) {
|
|
throw $this->invalid($path, 'must contain at least one non-empty string');
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
/** @return array<string, string> */
|
|
public function getSearchTokenCorrections(): array
|
|
{
|
|
return $this->stringMap('search_token_corrections');
|
|
}
|
|
|
|
/** @return array<string, string> */
|
|
public function getSearchTokenCanonicalMap(): array
|
|
{
|
|
return $this->stringMap('search_token_canonical_map');
|
|
}
|
|
|
|
/**
|
|
* Backward-compatible alias for older callers.
|
|
*
|
|
* @return string[]
|
|
*/
|
|
public function getFilterSearchTokensPattern(): array
|
|
{
|
|
return $this->getFilterSearchTokens();
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getNormalizationSearch(): array
|
|
{
|
|
return $this->whitespacePreservingStringList('normalization.search');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getNormalizationReplace(): array
|
|
{
|
|
return $this->whitespacePreservingStringList('normalization.replace');
|
|
}
|
|
|
|
public function getPromptSanitizePattern(): string
|
|
{
|
|
return $this->string('patterns.prompt_sanitize');
|
|
}
|
|
|
|
public function getWhitespaceCollapsePattern(): string
|
|
{
|
|
return $this->string('patterns.whitespace_collapse');
|
|
}
|
|
|
|
public function getWhitespaceSplitPattern(): string
|
|
{
|
|
return $this->string('patterns.whitespace_split');
|
|
}
|
|
|
|
public function getSearchTextTrimCharacters(): string
|
|
{
|
|
$characters = '';
|
|
foreach ($this->stringList('text.trim_characters') as $item) {
|
|
$characters .= match ($item) {
|
|
'space' => ' ',
|
|
'tab' => "\t",
|
|
'lf' => "\n",
|
|
'cr' => "\r",
|
|
'nul' => "\0",
|
|
'vertical_tab' => "\x0B",
|
|
default => $item,
|
|
};
|
|
}
|
|
|
|
return $characters;
|
|
}
|
|
|
|
public function getMinSearchTokenLength(): int
|
|
{
|
|
return $this->int('limits.min_search_token_length');
|
|
}
|
|
|
|
public function getMinDirectProductTokenLength(): int
|
|
{
|
|
return $this->int('limits.min_direct_product_token_length');
|
|
}
|
|
|
|
public function getHistoryQuestionPattern(): string
|
|
{
|
|
return $this->string('patterns.history_question');
|
|
}
|
|
|
|
public function getPriceBetweenPattern(): string
|
|
{
|
|
return $this->string('patterns.price_between');
|
|
}
|
|
|
|
public function getPriceMaxPattern(): string
|
|
{
|
|
return $this->string('patterns.price_max');
|
|
}
|
|
|
|
public function getPriceMinPattern(): string
|
|
{
|
|
return $this->string('patterns.price_min');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array
|
|
{
|
|
return [
|
|
$this->string('patterns.price_removal_between'),
|
|
$this->string('patterns.price_removal_minmax'),
|
|
$this->renderTemplate(
|
|
$this->string('patterns.price_removal_intent_template'),
|
|
['price_pattern' => $intentConfig->getPricePattern()],
|
|
'patterns.price_removal_intent_template'
|
|
),
|
|
];
|
|
}
|
|
|
|
public function getDirectProductDigitPattern(): string
|
|
{
|
|
return $this->string('patterns.direct_product_digit');
|
|
}
|
|
|
|
public function getExactProductNumberSearchTextPattern(): string
|
|
{
|
|
return $this->string('patterns.exact_product_number_search_text');
|
|
}
|
|
|
|
public function getDirectProductMaxTokens(): int
|
|
{
|
|
return $this->int('limits.direct_product_max_tokens');
|
|
}
|
|
|
|
public function getModelLikePattern(): string
|
|
{
|
|
return $this->string('patterns.model_like');
|
|
}
|
|
|
|
public function getAccessoryLikePattern(): string
|
|
{
|
|
return $this->string('patterns.accessory_like');
|
|
}
|
|
|
|
public function getContainsDigitPattern(): string
|
|
{
|
|
return $this->string('patterns.contains_digit');
|
|
}
|
|
|
|
public function getModelNumberTokenPattern(): string
|
|
{
|
|
return $this->string('patterns.model_number_token');
|
|
}
|
|
|
|
public function getModelContextTokenPattern(): string
|
|
{
|
|
return $this->string('patterns.model_context_token');
|
|
}
|
|
|
|
public function getModelSuffixTokenPattern(): string
|
|
{
|
|
return $this->string('patterns.model_suffix_token');
|
|
}
|
|
|
|
public function getModelContextTokenWindow(): int
|
|
{
|
|
return $this->int('limits.model_context_token_window');
|
|
}
|
|
|
|
public function getMinMeaningfulAlphaTokenLength(): int
|
|
{
|
|
return $this->int('limits.min_meaningful_alpha_token_length');
|
|
}
|
|
|
|
public function getMaxShopSearchTokens(): int
|
|
{
|
|
return $this->int('limits.max_shop_search_tokens');
|
|
}
|
|
|
|
public function getInstructionOrPresentationTokenPattern(): string
|
|
{
|
|
return $this->string('patterns.instruction_or_presentation_token');
|
|
}
|
|
|
|
public function getMeasurementValueTokenPattern(): string
|
|
{
|
|
return $this->string('patterns.measurement_value_token');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getSemanticShopSearchTokens(): array
|
|
{
|
|
return $this->configuredStringListOrVocabularyView(
|
|
'semantic_shop_search_tokens',
|
|
'vocabulary_views.semantic_shop_search_tokens'
|
|
);
|
|
}
|
|
|
|
public function buildExactTokenRemovalPattern(string $token): string
|
|
{
|
|
return $this->renderTemplate(
|
|
$this->string('patterns.exact_token_removal_template'),
|
|
['token' => preg_quote($token, '/')],
|
|
'patterns.exact_token_removal_template'
|
|
);
|
|
}
|
|
|
|
public function buildBrandPartOfModelPattern(string $brand): string
|
|
{
|
|
return $this->renderTemplate(
|
|
$this->string('patterns.brand_part_of_model_template'),
|
|
['brand' => preg_quote($brand, '/')],
|
|
'patterns.brand_part_of_model_template'
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function stringList(string $path): array
|
|
{
|
|
$value = $this->value($path);
|
|
if (!is_array($value)) {
|
|
throw $this->invalid($path, 'must be a list of non-empty strings');
|
|
}
|
|
|
|
$out = [];
|
|
foreach ($value as $item) {
|
|
if (!is_scalar($item)) {
|
|
continue;
|
|
}
|
|
|
|
$item = trim((string) $item);
|
|
|
|
if ($item === '' || in_array($item, $out, true)) {
|
|
continue;
|
|
}
|
|
|
|
$out[] = $item;
|
|
}
|
|
|
|
if ($out === []) {
|
|
throw $this->invalid($path, 'must contain at least one non-empty string');
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function configuredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array
|
|
{
|
|
if ($this->hasPath($configPath)) {
|
|
return $this->stringList($configPath);
|
|
}
|
|
|
|
if ($this->vocabulary === null) {
|
|
throw $this->missing($configPath);
|
|
}
|
|
|
|
$viewPath = $this->string($viewPathConfigPath);
|
|
$terms = $this->vocabulary->view($viewPath, []);
|
|
|
|
if ($terms === []) {
|
|
throw $this->invalid($viewPathConfigPath, sprintf('references empty vocabulary view "%s"', $viewPath));
|
|
}
|
|
|
|
return $terms;
|
|
}
|
|
|
|
/** @return array<string, string> */
|
|
private function stringMap(string $path): array
|
|
{
|
|
$value = $this->value($path);
|
|
if (!is_array($value)) {
|
|
throw $this->invalid($path, 'must be a map of non-empty strings');
|
|
}
|
|
|
|
$out = [];
|
|
foreach ($value as $key => $item) {
|
|
if (!is_scalar($key) || !is_scalar($item)) {
|
|
continue;
|
|
}
|
|
|
|
$cleanKey = trim((string) $key);
|
|
$cleanValue = trim((string) $item);
|
|
if ($cleanKey !== '' && $cleanValue !== '') {
|
|
$out[$cleanKey] = $cleanValue;
|
|
}
|
|
}
|
|
|
|
if ($out === []) {
|
|
throw $this->invalid($path, 'must contain at least one non-empty mapping');
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
private function string(string $path): string
|
|
{
|
|
$value = $this->value($path);
|
|
if (!is_scalar($value)) {
|
|
throw $this->invalid($path, 'must be a non-empty string');
|
|
}
|
|
|
|
$value = (string) $value;
|
|
if ($value === '') {
|
|
throw $this->invalid($path, 'must be a non-empty string');
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
private function int(string $path): int
|
|
{
|
|
$value = $this->value($path);
|
|
if (!is_int($value)) {
|
|
throw $this->invalid($path, 'must be an integer');
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
private function hasPath(string $path): bool
|
|
{
|
|
$current = $this->config;
|
|
foreach (explode('.', $path) as $segment) {
|
|
if (!is_array($current) || !array_key_exists($segment, $current)) {
|
|
return false;
|
|
}
|
|
|
|
$current = $current[$segment];
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
private function value(string $path): mixed
|
|
{
|
|
$current = $this->config;
|
|
foreach (explode('.', $path) as $segment) {
|
|
if (!is_array($current) || !array_key_exists($segment, $current)) {
|
|
throw $this->missing($path);
|
|
}
|
|
|
|
$current = $current[$segment];
|
|
}
|
|
|
|
return $current;
|
|
}
|
|
|
|
/**
|
|
* @param array<string, string> $replacements
|
|
*/
|
|
private function renderTemplate(string $template, array $replacements, string $path): string
|
|
{
|
|
foreach ($replacements as $placeholder => $value) {
|
|
$template = str_replace('{' . $placeholder . '}', $value, $template);
|
|
}
|
|
|
|
if (preg_match('/\{[A-Za-z_][A-Za-z0-9_]*\}/', $template) === 1) {
|
|
throw $this->invalid($path, 'contains unresolved placeholders');
|
|
}
|
|
|
|
return $template;
|
|
}
|
|
|
|
private function missing(string $path): InvalidArgumentException
|
|
{
|
|
return new InvalidArgumentException(sprintf('RetrieX commerce query config "%s" is missing.', $path));
|
|
}
|
|
|
|
private function invalid(string $path, string $reason): InvalidArgumentException
|
|
{
|
|
return new InvalidArgumentException(sprintf('RetrieX commerce query config "%s" %s.', $path, $reason));
|
|
}
|
|
}
|