201 lines
5.3 KiB
PHP
201 lines
5.3 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Config;
|
|
|
|
/**
|
|
* YAML-backed query-enrichment configuration.
|
|
*
|
|
* This class intentionally has no PHP fallback values. Missing or invalid
|
|
* configuration must be fixed in config/retriex/query_enrichment.yaml.
|
|
*/
|
|
final readonly class QueryEnricherConfig
|
|
{
|
|
/**
|
|
* @param array<string, mixed> $config
|
|
*/
|
|
public function __construct(private array $config)
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Returns a normalized, deduplicated mapping for the QueryEnricher.
|
|
*
|
|
* Output format:
|
|
* [
|
|
* 'term a' => 'term b',
|
|
* 'term c' => 'term d',
|
|
* ]
|
|
*
|
|
* Rules:
|
|
* - ignore empty / invalid values
|
|
* - trim and normalize whitespace
|
|
* - ignore self-mappings
|
|
* - preserve first valid rule if duplicates normalize to the same key
|
|
*
|
|
* @return array<string, string>
|
|
*/
|
|
public function getEnrichQueryList(): array
|
|
{
|
|
$normalized = [];
|
|
$rules = $this->requiredArray('rules');
|
|
|
|
foreach ($rules as $key => $value) {
|
|
if (is_array($value)) {
|
|
$this->ingestGroup($normalized, $value);
|
|
continue;
|
|
}
|
|
|
|
$left = $this->normalizePhrase(is_string($key) ? $key : '');
|
|
$right = $this->normalizePhrase(is_string($value) ? $value : '');
|
|
|
|
if (!$this->isValidPair($left, $right)) {
|
|
continue;
|
|
}
|
|
|
|
if (!isset($normalized[$left])) {
|
|
$normalized[$left] = $right;
|
|
}
|
|
}
|
|
|
|
if ($normalized === []) {
|
|
throw new \InvalidArgumentException('RetrieX query enrichment config key "rules" must contain at least one valid enrichment rule.');
|
|
}
|
|
|
|
return $normalized;
|
|
}
|
|
|
|
public function getMaxExpansions(): int
|
|
{
|
|
return $this->requiredNonNegativeInt('max_expansions');
|
|
}
|
|
|
|
/**
|
|
* Returns true when at least one valid enrichment rule exists.
|
|
*/
|
|
public function hasRules(): bool
|
|
{
|
|
return $this->getEnrichQueryList() !== [];
|
|
}
|
|
|
|
/**
|
|
* @param array<string, string> $normalized
|
|
* @param array<int|string, mixed> $group
|
|
*/
|
|
private function ingestGroup(array &$normalized, array $group): void
|
|
{
|
|
$items = [];
|
|
|
|
foreach ($group as $item) {
|
|
if (!is_string($item)) {
|
|
continue;
|
|
}
|
|
|
|
$item = $this->normalizePhrase($item);
|
|
|
|
if ($item === '') {
|
|
continue;
|
|
}
|
|
|
|
$items[$item] = $item;
|
|
}
|
|
|
|
$items = array_values($items);
|
|
|
|
if (count($items) < 2) {
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* Turn a synonym group into a conservative chain:
|
|
* ['a', 'b', 'c'] => a=>b, b=>c
|
|
*
|
|
* QueryEnricher builds a bidirectional lookup later,
|
|
* so the config output stays intentionally small.
|
|
*/
|
|
for ($i = 0, $max = count($items) - 1; $i < $max; $i++) {
|
|
$left = $items[$i];
|
|
$right = $items[$i + 1];
|
|
|
|
if (!$this->isValidPair($left, $right)) {
|
|
continue;
|
|
}
|
|
|
|
if (!isset($normalized[$left])) {
|
|
$normalized[$left] = $right;
|
|
}
|
|
}
|
|
}
|
|
|
|
private function isValidPair(string $left, string $right): bool
|
|
{
|
|
if ($left === '' || $right === '') {
|
|
return false;
|
|
}
|
|
|
|
if ($left === $right) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/** @return array<int|string, mixed> */
|
|
private function requiredArray(string $key): array
|
|
{
|
|
if (!array_key_exists($key, $this->config)) {
|
|
throw new \InvalidArgumentException(sprintf('Missing required RetrieX query enrichment config key "%s".', $key));
|
|
}
|
|
|
|
$value = $this->config[$key];
|
|
|
|
if (!is_array($value)) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must be an array.', $key));
|
|
}
|
|
|
|
if ($value === []) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must not be empty.', $key));
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
private function requiredNonNegativeInt(string $key): int
|
|
{
|
|
if (!array_key_exists($key, $this->config)) {
|
|
throw new \InvalidArgumentException(sprintf('Missing required RetrieX query enrichment config key "%s".', $key));
|
|
}
|
|
|
|
$value = $this->config[$key];
|
|
|
|
if (is_int($value)) {
|
|
$intValue = $value;
|
|
} elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
|
|
$intValue = (int) trim($value);
|
|
} else {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must be an integer.', $key));
|
|
}
|
|
|
|
if ($intValue < 0) {
|
|
throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must be greater than or equal to 0.', $key));
|
|
}
|
|
|
|
return $intValue;
|
|
}
|
|
|
|
private function normalizePhrase(string $value): string
|
|
{
|
|
$value = trim($value);
|
|
|
|
if ($value === '') {
|
|
return '';
|
|
}
|
|
|
|
$value = mb_strtolower($value, 'UTF-8');
|
|
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
|
|
|
return trim($value);
|
|
}
|
|
}
|