$config */ public function __construct(private array $config) { } /** * Returns a normalized, deduplicated mapping for the QueryEnricher. * * Output format: * [ * 'term a' => 'term b', * 'term c' => 'term d', * ] * * Rules: * - ignore empty / invalid values * - trim and normalize whitespace * - ignore self-mappings * - preserve first valid rule if duplicates normalize to the same key * * @return array */ public function getEnrichQueryList(): array { $normalized = []; $rules = $this->requiredArray('rules'); foreach ($rules as $key => $value) { if (is_array($value)) { $this->ingestGroup($normalized, $value); continue; } $left = $this->normalizePhrase(is_string($key) ? $key : ''); $right = $this->normalizePhrase(is_string($value) ? $value : ''); if (!$this->isValidPair($left, $right)) { continue; } if (!isset($normalized[$left])) { $normalized[$left] = $right; } } if ($normalized === []) { throw new \InvalidArgumentException('RetrieX query enrichment config key "rules" must contain at least one valid enrichment rule.'); } return $normalized; } public function getMaxExpansions(): int { return $this->requiredNonNegativeInt('max_expansions'); } /** * Returns true when at least one valid enrichment rule exists. */ public function hasRules(): bool { return $this->getEnrichQueryList() !== []; } /** * @param array $normalized * @param array $group */ private function ingestGroup(array &$normalized, array $group): void { $items = []; foreach ($group as $item) { if (!is_string($item)) { continue; } $item = $this->normalizePhrase($item); if ($item === '') { continue; } $items[$item] = $item; } $items = array_values($items); if (count($items) < 2) { return; } /** * Turn a synonym group into a conservative chain: * ['a', 'b', 'c'] => a=>b, b=>c * * QueryEnricher builds a bidirectional lookup later, * so the config output stays intentionally small. */ for ($i = 0, $max = count($items) - 1; $i < $max; $i++) { $left = $items[$i]; $right = $items[$i + 1]; if (!$this->isValidPair($left, $right)) { continue; } if (!isset($normalized[$left])) { $normalized[$left] = $right; } } } private function isValidPair(string $left, string $right): bool { if ($left === '' || $right === '') { return false; } if ($left === $right) { return false; } return true; } /** @return array */ private function requiredArray(string $key): array { if (!array_key_exists($key, $this->config)) { throw new \InvalidArgumentException(sprintf('Missing required RetrieX query enrichment config key "%s".', $key)); } $value = $this->config[$key]; if (!is_array($value)) { throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must be an array.', $key)); } if ($value === []) { throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must not be empty.', $key)); } return $value; } private function requiredNonNegativeInt(string $key): int { if (!array_key_exists($key, $this->config)) { throw new \InvalidArgumentException(sprintf('Missing required RetrieX query enrichment config key "%s".', $key)); } $value = $this->config[$key]; if (is_int($value)) { $intValue = $value; } elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) { $intValue = (int) trim($value); } else { throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must be an integer.', $key)); } if ($intValue < 0) { throw new \InvalidArgumentException(sprintf('RetrieX query enrichment config key "%s" must be greater than or equal to 0.', $key)); } return $intValue; } private function normalizePhrase(string $value): string { $value = trim($value); if ($value === '') { return ''; } $value = mb_strtolower($value, 'UTF-8'); $value = preg_replace('/\s+/u', ' ', $value) ?? $value; return trim($value); } }