Files
MtoRagSystem/src/Config/CommerceQueryParserConfig.php
2026-05-01 12:10:32 +02:00

409 lines
11 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Config;
use InvalidArgumentException;
final class CommerceQueryParserConfig
{
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly array $config = [],
) {
}
/** @return string[] */
public function getKnownBrands(): array
{
return $this->stringList('known_brands');
}
/** @return string[] */
public function getPhrasesToRemove(): array
{
return $this->stringList('phrases_to_remove');
}
public function getHistoryContextPattern(): string
{
return $this->string('patterns.history_context');
}
public function getHistoryContextValuePattern(): string
{
return $this->renderTemplate(
$this->string('patterns.history_context_value_template'),
['fragment' => $this->getHistoryContextPattern()],
'patterns.history_context_value_template'
);
}
/** @return string[] */
public function getFilterSearchTokens(): array
{
return $this->stringList('filter_search_tokens');
}
/** @return string[] */
public function getSearchControlTokens(): array
{
return $this->stringList('search_control_tokens');
}
/** @return string[] */
private function whitespacePreservingStringList(string $path): array
{
$value = $this->value($path);
if (!is_array($value)) {
throw $this->invalid($path, 'must be a list of non-empty strings');
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = (string) $item;
if (trim($item) === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
if ($out === []) {
throw $this->invalid($path, 'must contain at least one non-empty string');
}
return $out;
}
/** @return array<string, string> */
public function getSearchTokenCorrections(): array
{
return $this->stringMap('search_token_corrections');
}
/** @return array<string, string> */
public function getSearchTokenCanonicalMap(): array
{
return $this->stringMap('search_token_canonical_map');
}
/**
* Backward-compatible alias for older callers.
*
* @return string[]
*/
public function getFilterSearchTokensPattern(): array
{
return $this->getFilterSearchTokens();
}
/** @return string[] */
public function getNormalizationSearch(): array
{
return $this->whitespacePreservingStringList('normalization.search');
}
/** @return string[] */
public function getNormalizationReplace(): array
{
return $this->whitespacePreservingStringList('normalization.replace');
}
public function getPromptSanitizePattern(): string
{
return $this->string('patterns.prompt_sanitize');
}
public function getWhitespaceCollapsePattern(): string
{
return $this->string('patterns.whitespace_collapse');
}
public function getWhitespaceSplitPattern(): string
{
return $this->string('patterns.whitespace_split');
}
public function getSearchTextTrimCharacters(): string
{
$characters = '';
foreach ($this->stringList('text.trim_characters') as $item) {
$characters .= match ($item) {
'space' => ' ',
'tab' => "\t",
'lf' => "\n",
'cr' => "\r",
'nul' => "\0",
'vertical_tab' => "\x0B",
default => $item,
};
}
return $characters;
}
public function getMinSearchTokenLength(): int
{
return $this->int('limits.min_search_token_length');
}
public function getMinDirectProductTokenLength(): int
{
return $this->int('limits.min_direct_product_token_length');
}
public function getHistoryQuestionPattern(): string
{
return $this->string('patterns.history_question');
}
public function getPriceBetweenPattern(): string
{
return $this->string('patterns.price_between');
}
public function getPriceMaxPattern(): string
{
return $this->string('patterns.price_max');
}
public function getPriceMinPattern(): string
{
return $this->string('patterns.price_min');
}
/** @return string[] */
public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array
{
return [
$this->string('patterns.price_removal_between'),
$this->string('patterns.price_removal_minmax'),
$this->renderTemplate(
$this->string('patterns.price_removal_intent_template'),
['price_pattern' => $intentConfig->getPricePattern()],
'patterns.price_removal_intent_template'
),
];
}
public function getDirectProductDigitPattern(): string
{
return $this->string('patterns.direct_product_digit');
}
public function getDirectProductMaxTokens(): int
{
return $this->int('limits.direct_product_max_tokens');
}
public function getModelLikePattern(): string
{
return $this->string('patterns.model_like');
}
public function getAccessoryLikePattern(): string
{
return $this->string('patterns.accessory_like');
}
public function getContainsDigitPattern(): string
{
return $this->string('patterns.contains_digit');
}
public function getModelNumberTokenPattern(): string
{
return $this->string('patterns.model_number_token');
}
public function getModelContextTokenPattern(): string
{
return $this->string('patterns.model_context_token');
}
public function getModelSuffixTokenPattern(): string
{
return $this->string('patterns.model_suffix_token');
}
public function getModelContextTokenWindow(): int
{
return $this->int('limits.model_context_token_window');
}
public function getMinMeaningfulAlphaTokenLength(): int
{
return $this->int('limits.min_meaningful_alpha_token_length');
}
public function getMaxShopSearchTokens(): int
{
return $this->int('limits.max_shop_search_tokens');
}
public function getInstructionOrPresentationTokenPattern(): string
{
return $this->string('patterns.instruction_or_presentation_token');
}
public function getMeasurementValueTokenPattern(): string
{
return $this->string('patterns.measurement_value_token');
}
/** @return string[] */
public function getSemanticShopSearchTokens(): array
{
return $this->stringList('semantic_shop_search_tokens');
}
public function buildExactTokenRemovalPattern(string $token): string
{
return $this->renderTemplate(
$this->string('patterns.exact_token_removal_template'),
['token' => preg_quote($token, '/')],
'patterns.exact_token_removal_template'
);
}
public function buildBrandPartOfModelPattern(string $brand): string
{
return $this->renderTemplate(
$this->string('patterns.brand_part_of_model_template'),
['brand' => preg_quote($brand, '/')],
'patterns.brand_part_of_model_template'
);
}
/** @return string[] */
private function stringList(string $path): array
{
$value = $this->value($path);
if (!is_array($value)) {
throw $this->invalid($path, 'must be a list of non-empty strings');
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
if ($out === []) {
throw $this->invalid($path, 'must contain at least one non-empty string');
}
return $out;
}
/** @return array<string, string> */
private function stringMap(string $path): array
{
$value = $this->value($path);
if (!is_array($value)) {
throw $this->invalid($path, 'must be a map of non-empty strings');
}
$out = [];
foreach ($value as $key => $item) {
if (!is_scalar($key) || !is_scalar($item)) {
continue;
}
$cleanKey = trim((string) $key);
$cleanValue = trim((string) $item);
if ($cleanKey !== '' && $cleanValue !== '') {
$out[$cleanKey] = $cleanValue;
}
}
if ($out === []) {
throw $this->invalid($path, 'must contain at least one non-empty mapping');
}
return $out;
}
private function string(string $path): string
{
$value = $this->value($path);
if (!is_scalar($value)) {
throw $this->invalid($path, 'must be a non-empty string');
}
$value = (string) $value;
if ($value === '') {
throw $this->invalid($path, 'must be a non-empty string');
}
return $value;
}
private function int(string $path): int
{
$value = $this->value($path);
if (!is_int($value)) {
throw $this->invalid($path, 'must be an integer');
}
return $value;
}
private function value(string $path): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
throw $this->missing($path);
}
$current = $current[$segment];
}
return $current;
}
/**
* @param array<string, string> $replacements
*/
private function renderTemplate(string $template, array $replacements, string $path): string
{
foreach ($replacements as $placeholder => $value) {
$template = str_replace('{' . $placeholder . '}', $value, $template);
}
if (preg_match('/\{[A-Za-z_][A-Za-z0-9_]*\}/', $template) === 1) {
throw $this->invalid($path, 'contains unresolved placeholders');
}
return $template;
}
private function missing(string $path): InvalidArgumentException
{
return new InvalidArgumentException(sprintf('RetrieX commerce query config "%s" is missing.', $path));
}
private function invalid(string $path, string $reason): InvalidArgumentException
{
return new InvalidArgumentException(sprintf('RetrieX commerce query config "%s" %s.', $path, $reason));
}
}