config->getEnrichQueryList(); if ($mapping === []) { return $originalQuery; } $lookup = $this->buildBidirectionalLookup($mapping); if ($lookup === []) { return $originalQuery; } $lookup = $this->sortLookupBySpecificity($lookup); $normalizedQuery = $this->normalizeForMatching($originalQuery); if ($normalizedQuery === '') { return $originalQuery; } $matches = []; $seenNormalizedExpansions = []; foreach ($lookup as $normalizedNeedle => $mappedValue) { if ($normalizedNeedle === '') { continue; } if (!$this->containsWholePhrase($normalizedQuery, $normalizedNeedle)) { continue; } $mappedValue = trim($mappedValue); if ($mappedValue === '') { continue; } $normalizedMappedValue = $this->normalizeForMatching($mappedValue); if ($normalizedMappedValue === '') { continue; } // Do not re-add information that is already present in the query. if ($this->containsWholePhrase($normalizedQuery, $normalizedMappedValue)) { continue; } if (isset($seenNormalizedExpansions[$normalizedMappedValue])) { continue; } $matches[] = $mappedValue; $seenNormalizedExpansions[$normalizedMappedValue] = true; if (count($matches) >= $this->config->getMaxExpansions()) { break; } } if ($matches === []) { return $originalQuery; } return trim($originalQuery . ' ' . implode(' ', $matches)); } /** * Normalizes a string for case-insensitive matching. */ private function normalize(string $value): string { return mb_strtolower(trim($value), 'UTF-8'); } /** * Normalizes a string for phrase-aware matching. * * This keeps words searchable across spaces, punctuation and hyphens. */ private function normalizeForMatching(string $value): string { $value = $this->normalize($value); $value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value; $value = preg_replace('/\s+/u', ' ', $value) ?? $value; return trim($value); } /** * Checks whether a normalized phrase exists as a full phrase in a normalized query. */ private function containsWholePhrase(string $normalizedQuery, string $normalizedPhrase): bool { if ($normalizedQuery === '' || $normalizedPhrase === '') { return false; } return str_contains(' ' . $normalizedQuery . ' ', ' ' . $normalizedPhrase . ' '); } /** * Builds a lookup table that works in both directions. * * Example: * [ * 'trousers' => 'jeans', * 'jacket' => 'coat', * ] * * becomes: * [ * 'trousers' => 'jeans', * 'jeans' => 'trousers', * 'jacket' => 'coat', * 'coat' => 'jacket', * ] * * Returned format: * [ * '' => '', * ] */ private function buildBidirectionalLookup(array $mapping): array { $lookup = []; foreach ($mapping as $key => $value) { $key = trim((string) $key); $value = trim((string) $value); if ($key === '' || $value === '') { continue; } $normalizedKey = $this->normalizeForMatching($key); $normalizedValue = $this->normalizeForMatching($value); if ($normalizedKey !== '' && !isset($lookup[$normalizedKey])) { $lookup[$normalizedKey] = $value; } if ($normalizedValue !== '' && !isset($lookup[$normalizedValue])) { $lookup[$normalizedValue] = $key; } } return $lookup; } /** * Sorts phrase rules by specificity so longer / more precise phrases win first. * * Priority: * 1. more words * 2. longer character length * 3. lexical order for deterministic output * * @param array $lookup * @return array */ private function sortLookupBySpecificity(array $lookup): array { uksort($lookup, static function (string $a, string $b): int { $aWordCount = substr_count($a, ' ') + 1; $bWordCount = substr_count($b, ' ') + 1; if ($aWordCount !== $bWordCount) { return $bWordCount <=> $aWordCount; } $aLength = mb_strlen($a, 'UTF-8'); $bLength = mb_strlen($b, 'UTF-8'); if ($aLength !== $bLength) { return $bLength <=> $aLength; } return strcmp($a, $b); }); return $lookup; } }