From 59840912824aad143c77e07ae6eddcf3bfdbcc05 Mon Sep 17 00:00:00 2001 From: team2 Date: Sat, 18 Apr 2026 21:49:30 +0200 Subject: [PATCH] harden retrieval logic bugfixes --- src/Agent/AgentRunner.php | 26 +- src/Agent/PromptBuilder.php | 39 ++- src/Commerce/CommerceQueryParser.php | 37 +-- src/Commerce/ShopSearchService.php | 289 +++++++++++++++++++++- src/Config/CommerceIntentConfig.php | 47 ++-- src/Intent/SalesIntentLite.php | 6 +- src/Knowledge/Retrieval/QueryEnricher.php | 90 +++---- 7 files changed, 426 insertions(+), 108 deletions(-) diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index 89d3019..fae5962 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -51,6 +51,9 @@ final readonly class AgentRunner $shopResults = []; $sources = []; $optimizedShopQuery = ''; + $shopSearchQuery = ''; + $commerceIntent = CommerceIntentLite::NONE; + $commerceHistoryContext = ''; $this->agentLogger->info('Agent run started', [ 'userId' => $userId, @@ -97,7 +100,7 @@ final readonly class AgentRunner $commerceHistoryContext = $this->buildCommerceHistoryContext($userId); - if($commerceHistoryContext){ + if ($commerceHistoryContext !== '') { $this->addSource($sources, 'Chatverlauf'); } @@ -109,6 +112,16 @@ final readonly class AgentRunner $shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt; + $this->agentLogger->info('Commerce search prepared', [ + 'userId' => $userId, + 'commerceIntent' => $commerceIntent, + 'usedOptimizedShopQuery' => $optimizedShopQuery !== '', + 'optimizedShopQuery' => $optimizedShopQuery, + 'shopSearchQuery' => $shopSearchQuery, + 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', + 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), + ]); + yield $this->systemMsg( 'Ich rufe Recherchedaten ab (type: ' . $commerceIntent . ')', 'think' @@ -126,7 +139,9 @@ final readonly class AgentRunner } } - $knowledgeChunks = $this->limitKnowledgeChunks($knowledgeChunks, $commerceIntent); + if ($shopResults !== []) { + $knowledgeChunks = $this->limitKnowledgeChunks($knowledgeChunks, $commerceIntent); + } yield $this->systemMsg('Ich analysiere alle Informationen...', 'think'); @@ -148,6 +163,7 @@ final readonly class AgentRunner 'userId' => $userId, 'finalPrompt' => $finalPrompt, 'optimizedShopQuery' => $optimizedShopQuery, + 'shopSearchQuery' => $shopSearchQuery, ]); } @@ -198,6 +214,10 @@ final readonly class AgentRunner 'knowledgeChunkCount' => count($knowledgeChunks), 'hasUrlContent' => $urlContent !== '', 'usedOptimizedShopQuery' => $optimizedShopQuery !== '', + 'optimizedShopQuery' => $optimizedShopQuery, + 'shopSearchQuery' => $shopSearchQuery, + 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', + 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), ]); } catch (Throwable $e) { $this->agentLogger->error('Agent run failed', [ @@ -282,6 +302,8 @@ final readonly class AgentRunner 'userId' => $userId, 'commerceIntent' => $commerceIntent, 'query' => $query, + 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', + 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), 'exception' => $e, ]); diff --git a/src/Agent/PromptBuilder.php b/src/Agent/PromptBuilder.php index 6998567..c6892f5 100644 --- a/src/Agent/PromptBuilder.php +++ b/src/Agent/PromptBuilder.php @@ -42,6 +42,13 @@ final readonly class PromptBuilder */ private const MIN_PROMPT_BUDGET_TOKENS = 1024; + /** + * Limit how many ranked shop results are passed into the final prompt. + * The shop search may return many candidates, but the LLM should only see + * the most relevant top subset after local reranking. + */ + private const MAX_SHOP_RESULTS_IN_PROMPT = 8; + /** * Technical product prompts should be answered like documentation, * not like sales copy. @@ -84,8 +91,7 @@ final readonly class PromptBuilder private ContextService $contextService, private SystemPromptRepository $systemPromptRepository, private ModelGenerationConfigProvider $modelGenerationConfigProvider, - ) - { + ) { } /** @@ -222,18 +228,21 @@ final readonly class PromptBuilder "Source: Shop Search"; } - if ($shopResults === []) { + $normalizedShopResults = array_values(array_filter( + $shopResults, + static fn(mixed $product): bool => $product instanceof ShopProductResult + )); + + if ($normalizedShopResults === []) { return $this->implodeBlocks($parts); } - $isDetailed = count($shopResults) <= 5; + $totalCount = count($normalizedShopResults); + $limitedShopResults = array_slice($normalizedShopResults, 0, self::MAX_SHOP_RESULTS_IN_PROMPT); + $isDetailed = count($limitedShopResults) <= 5; $lines = []; - foreach ($shopResults as $i => $product) { - if (!$product instanceof ShopProductResult) { - continue; - } - + foreach ($limitedShopResults as $i => $product) { $n = $i + 1; $entryParts = [ "[{$n}] " . $this->normalizeBlockText($product->name), @@ -283,13 +292,19 @@ final readonly class PromptBuilder } if ($lines !== []) { - $parts[] = + $header = "LIVE SHOP RESULTS (authoritative for current commercial details):\n" . "Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.\n" . "If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.\n" . "Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" . - "Do not infer undocumented technical specifications from shop data.\n\n" . - implode("\n\n", $lines); + "Do not infer undocumented technical specifications from shop data."; + + if ($totalCount > count($limitedShopResults)) { + $header .= "\n" . + "Only the top " . count($limitedShopResults) . " ranked shop results are shown here out of {$totalCount} total results."; + } + + $parts[] = $header . "\n\n" . implode("\n\n", $lines); } return $this->implodeBlocks($parts); diff --git a/src/Commerce/CommerceQueryParser.php b/src/Commerce/CommerceQueryParser.php index 83505f6..c0802b6 100644 --- a/src/Commerce/CommerceQueryParser.php +++ b/src/Commerce/CommerceQueryParser.php @@ -13,20 +13,18 @@ use App\Knowledge\Text\TextNormalizer; final readonly class CommerceQueryParser { public function __construct( - private TextNormalizer $textNormalizer, - private QueryCleaner $queryCleaner, + private TextNormalizer $textNormalizer, + private QueryCleaner $queryCleaner, private CommerceQueryParserConfig $config, - private CommerceIntentConfig $intentConfig, - ) - { + private CommerceIntentConfig $intentConfig, + ) { } public function parse( string $originalPrompt, string $intent, string $historyContext = '' - ): CommerceSearchQuery - { + ): CommerceSearchQuery { $normalizedPrompt = $this->normalize($originalPrompt); [$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt); @@ -152,8 +150,10 @@ final readonly class CommerceQueryParser private function extractBrand(string $prompt): ?string { foreach ($this->config->getKnownBrands() as $brand) { - if (str_contains($prompt, $brand)) { - return $brand; + $normalizedBrand = $this->normalize((string) $brand); + + if ($normalizedBrand !== '' && str_contains($prompt, $normalizedBrand)) { + return $normalizedBrand; } } @@ -161,13 +161,12 @@ final readonly class CommerceQueryParser } private function buildSearchText( - string $prompt, - array $sizes, + string $prompt, + array $sizes, ?string $brand, - ?float $priceMin, - ?float $priceMax - ): string - { + ?float $priceMin, + ?float $priceMax + ): string { $text = ' ' . $prompt . ' '; foreach ($this->config->getPhrasesToRemove() as $phrase) { @@ -179,7 +178,7 @@ final readonly class CommerceQueryParser } if ($brand !== null && $brand !== '') { - $text = str_replace($brand, ' ', $text); + $text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text; } if ($priceMin !== null || $priceMax !== null) { @@ -211,7 +210,9 @@ final readonly class CommerceQueryParser private function extractLatestQuestionFromHistory(string $historyContext): string { - if (preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches) !== 1 && preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches) === false) { + $result = preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches); + + if ($result === false) { return ''; } @@ -266,6 +267,6 @@ final readonly class CommerceQueryParser { $value = str_replace(',', '.', trim($value)); - return is_numeric($value) ? (float)$value : null; + return is_numeric($value) ? (float) $value : null; } } \ No newline at end of file diff --git a/src/Commerce/ShopSearchService.php b/src/Commerce/ShopSearchService.php index 8af88eb..94c0b3f 100644 --- a/src/Commerce/ShopSearchService.php +++ b/src/Commerce/ShopSearchService.php @@ -4,9 +4,11 @@ declare(strict_types=1); namespace App\Commerce; +use App\Commerce\Dto\CommerceSearchQuery; use App\Commerce\Dto\ShopProductResult; use App\Shopware\ShopwareCriteriaBuilder; use App\Shopware\StoreApiClient; +use Psr\Log\LoggerInterface; use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface; use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface; use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface; @@ -18,11 +20,11 @@ final readonly class ShopSearchService private CommerceQueryParser $queryParser, private ShopwareCriteriaBuilder $criteriaBuilder, private StoreApiClient $storeApiClient, + private LoggerInterface $logger, private bool $enabled = true, private int $maxResults = 25, private string $baseUrl - ) - { + ) { } /** @@ -34,19 +36,87 @@ final readonly class ShopSearchService string $commerceHistoryContext = '' ): array { if (!$this->enabled) { + $this->logger->info('Shop search skipped because commerce search is disabled', [ + 'commerceIntent' => $commerceIntent, + ]); + return []; } - $response = []; - - $query = $this->queryParser->parse( + $primaryQuery = $this->queryParser->parse( $originalPrompt, $commerceIntent, $commerceHistoryContext ); + $this->logger->info('Shop search started', [ + 'commerceIntent' => $commerceIntent, + 'originalPrompt' => $originalPrompt, + 'normalizedPrompt' => $primaryQuery->normalizedPrompt, + 'searchText' => $primaryQuery->searchText, + 'brand' => $primaryQuery->brand, + 'sizes' => $primaryQuery->sizes, + 'priceMin' => $primaryQuery->priceMin, + 'priceMax' => $primaryQuery->priceMax, + 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', + 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), + 'criteriaLimit' => $this->maxResults, + ]); + + $rankedProducts = $this->executeSearch($primaryQuery, $commerceIntent, $originalPrompt, true); + + if ($rankedProducts === [] && $commerceHistoryContext !== '') { + $fallbackQuery = $this->queryParser->parse( + $originalPrompt, + $commerceIntent, + '' + ); + + $this->logger->info('Shop search retry without commerce history context', [ + 'commerceIntent' => $commerceIntent, + 'originalPrompt' => $originalPrompt, + 'normalizedPrompt' => $fallbackQuery->normalizedPrompt, + 'searchText' => $fallbackQuery->searchText, + 'brand' => $fallbackQuery->brand, + 'sizes' => $fallbackQuery->sizes, + 'priceMin' => $fallbackQuery->priceMin, + 'priceMax' => $fallbackQuery->priceMax, + ]); + + $rankedProducts = $this->executeSearch($fallbackQuery, $commerceIntent, $originalPrompt, false); + } + + $this->logger->info('Shop search finished', [ + 'commerceIntent' => $commerceIntent, + 'originalPrompt' => $originalPrompt, + 'rankedProductsCount' => count($rankedProducts), + 'topProducts' => array_map( + static fn(ShopProductResult $product): array => [ + 'name' => $product->name, + 'productNumber' => $product->productNumber, + 'manufacturer' => $product->manufacturer, + 'available' => $product->available, + ], + array_slice($rankedProducts, 0, 3) + ), + ]); + + return $rankedProducts; + } + + /** + * @return ShopProductResult[] + */ + private function executeSearch( + CommerceSearchQuery $query, + string $commerceIntent, + string $originalPrompt, + bool $usesHistoryContext + ): array { $criteria = $this->criteriaBuilder->build($query, $this->maxResults); + $response = []; + try { $response = $this->storeApiClient->searchProducts($criteria); } catch ( @@ -55,9 +125,52 @@ final readonly class ShopSearchService | ServerExceptionInterface | TransportExceptionInterface $e ) { + $this->logger->warning('Shop search request failed', [ + 'commerceIntent' => $commerceIntent, + 'originalPrompt' => $originalPrompt, + 'normalizedPrompt' => $query->normalizedPrompt, + 'searchText' => $query->searchText, + 'brand' => $query->brand, + 'sizes' => $query->sizes, + 'priceMin' => $query->priceMin, + 'priceMax' => $query->priceMax, + 'usesHistoryContext' => $usesHistoryContext, + 'criteria' => $criteria, + 'exceptionClass' => $e::class, + 'exceptionMessage' => $e->getMessage(), + ]); + + return []; } - return $this->mapProducts($response); + $mappedProducts = $this->mapProducts($response); + $rankedProducts = $this->rerankProducts($mappedProducts, $query); + + $this->logger->info('Shop search request finished', [ + 'commerceIntent' => $commerceIntent, + 'originalPrompt' => $originalPrompt, + 'normalizedPrompt' => $query->normalizedPrompt, + 'searchText' => $query->searchText, + 'brand' => $query->brand, + 'sizes' => $query->sizes, + 'priceMin' => $query->priceMin, + 'priceMax' => $query->priceMax, + 'usesHistoryContext' => $usesHistoryContext, + 'rawElementsCount' => is_array($response['elements'] ?? null) ? count($response['elements']) : 0, + 'mappedProductsCount' => count($mappedProducts), + 'rankedProductsCount' => count($rankedProducts), + 'topProducts' => array_map( + static fn(ShopProductResult $product): array => [ + 'name' => $product->name, + 'productNumber' => $product->productNumber, + 'manufacturer' => $product->manufacturer, + 'available' => $product->available, + ], + array_slice($rankedProducts, 0, 3) + ), + ]); + + return $rankedProducts; } /** @@ -77,6 +190,8 @@ final readonly class ShopSearchService continue; } + $relativeUrl = $this->extractUrl($row); + $results[] = new ShopProductResult( id: (string) ($row['id'] ?? ''), name: trim((string) ($row['translated']['name'] ?? '')), @@ -84,7 +199,7 @@ final readonly class ShopSearchService manufacturer: $this->extractManufacturer($row), price: $this->extractPrice($row), available: isset($row['available']) ? (bool) $row['available'] : null, - url: $this->baseUrl . $this->extractUrl($row), + url: $this->buildAbsoluteUrl($relativeUrl), highlights: $this->extractHighlights($row), description: $this->cleanUpDescription($row), productImage: $row['cover']['media']['thumbnails'][0]['url'] ?? 'no-image', @@ -98,6 +213,157 @@ final readonly class ShopSearchService )); } + /** + * @param ShopProductResult[] $products + * @return ShopProductResult[] + */ + private function rerankProducts(array $products, CommerceSearchQuery $query): array + { + if (count($products) <= 1) { + return $products; + } + + $decorated = []; + + foreach ($products as $index => $product) { + $decorated[] = [ + 'index' => $index, + 'score' => $this->scoreProduct($product, $query), + 'product' => $product, + ]; + } + + usort($decorated, static function (array $a, array $b): int { + if ($a['score'] === $b['score']) { + return $a['index'] <=> $b['index']; + } + + return $b['score'] <=> $a['score']; + }); + + return array_values(array_map( + static fn(array $entry): ShopProductResult => $entry['product'], + $decorated + )); + } + + private function scoreProduct(ShopProductResult $product, CommerceSearchQuery $query): int + { + $score = 0; + + $normalizedPrompt = $this->normalizeForMatching($query->normalizedPrompt ?: $query->originalPrompt); + $normalizedSearchText = $this->normalizeForMatching($query->searchText); + $normalizedQuery = trim($normalizedPrompt . ' ' . $normalizedSearchText); + + $queryTokens = $this->tokenize($normalizedQuery); + $queryNumberTokens = $this->extractNumberTokens($queryTokens); + + $normalizedProductName = $this->normalizeForMatching($product->name); + $productNameTokens = $this->tokenize($normalizedProductName); + $productNameNumberTokens = $this->extractNumberTokens($productNameTokens); + + $normalizedProductNumber = $this->normalizeForMatching((string) ($product->productNumber ?? '')); + $productNumberTokens = $this->tokenize($normalizedProductNumber); + $productNumberNumberTokens = $this->extractNumberTokens($productNumberTokens); + + $normalizedManufacturer = $this->normalizeForMatching((string) ($product->manufacturer ?? '')); + $normalizedBrand = $this->normalizeForMatching((string) ($query->brand ?? '')); + + if ($normalizedProductNumber !== '' && $this->containsWholePhrase($normalizedQuery, $normalizedProductNumber)) { + $score += 120; + } + + if ($normalizedBrand !== '') { + if ($normalizedManufacturer !== '' && $normalizedManufacturer === $normalizedBrand) { + $score += 40; + } elseif ($this->containsWholePhrase($normalizedProductName, $normalizedBrand)) { + $score += 20; + } + } + + $score += $this->countOverlap($queryTokens, $productNameTokens) * 4; + $score += $this->countOverlap($queryTokens, $productNumberTokens) * 8; + $score += $this->countOverlap($queryNumberTokens, $productNameNumberTokens) * 16; + $score += $this->countOverlap($queryNumberTokens, $productNumberNumberTokens) * 24; + + foreach ($query->sizes as $size) { + $normalizedSize = $this->normalizeForMatching((string) $size); + + if ($normalizedSize === '') { + continue; + } + + if ($this->containsWholePhrase($normalizedProductName, $normalizedSize) + || $this->containsWholePhrase($normalizedProductNumber, $normalizedSize)) { + $score += 12; + } + } + + if ($product->available === true) { + $score += 1; + } + + return $score; + } + + /** + * @param string[] $left + * @param string[] $right + */ + private function countOverlap(array $left, array $right): int + { + if ($left === [] || $right === []) { + return 0; + } + + $leftSet = array_fill_keys($left, true); + $rightSet = array_fill_keys($right, true); + + return count(array_intersect_key($leftSet, $rightSet)); + } + + /** + * @param string[] $tokens + * @return string[] + */ + private function extractNumberTokens(array $tokens): array + { + return array_values(array_filter( + $tokens, + static fn(string $token): bool => preg_match('/\d/u', $token) === 1 + )); + } + + private function normalizeForMatching(string $value): string + { + $value = mb_strtolower(trim($value)); + $value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value; + $value = preg_replace('/\s+/u', ' ', $value) ?? $value; + + return trim($value); + } + + /** + * @return string[] + */ + private function tokenize(string $value): array + { + if ($value === '') { + return []; + } + + return preg_split('/[^\p{L}\p{N}]+/u', $value, -1, PREG_SPLIT_NO_EMPTY) ?: []; + } + + private function containsWholePhrase(string $normalizedText, string $normalizedPhrase): bool + { + if ($normalizedText === '' || $normalizedPhrase === '') { + return false; + } + + return str_contains(' ' . $normalizedText . ' ', ' ' . $normalizedPhrase . ' '); + } + private function getRelevantCustomFields(array $customField): string { $result = ($customField['migration_Backup_product_attr1'] ?? '') . ': ' . ($customField['migration_Backup_product_attr2'] ?? ''); @@ -179,6 +445,15 @@ final readonly class ShopSearchService return null; } + private function buildAbsoluteUrl(?string $relativeUrl): ?string + { + if ($relativeUrl === null || trim($relativeUrl) === '') { + return null; + } + + return rtrim($this->baseUrl, '/') . '/' . ltrim($relativeUrl, '/'); + } + /** * @return string[] */ diff --git a/src/Config/CommerceIntentConfig.php b/src/Config/CommerceIntentConfig.php index d0ecb39..744b180 100644 --- a/src/Config/CommerceIntentConfig.php +++ b/src/Config/CommerceIntentConfig.php @@ -15,25 +15,25 @@ class CommerceIntentConfig 'produkt', 'sku', 'Artikel', - 'kaufen' + 'kaufen', - /* 'zeig', - 'welche', - 'vergleich', - 'alternativ', - 'find', - 'shop', - 'store', - 'sku', - 'Artikel', - 'Gerät', - 'testomat', - 'indikator', - 'Titromat', - 'Seminar', - 'Schulung', - 'Sensor', - 'liste'*/ + /* 'zeig', + 'welche', + 'vergleich', + 'alternativ', + 'find', + 'shop', + 'store', + 'sku', + 'Artikel', + 'Gerät', + 'testomat', + 'indikator', + 'Titromat', + 'Seminar', + 'Schulung', + 'Sensor', + 'liste'*/ ]; } @@ -58,8 +58,9 @@ class CommerceIntentConfig 'eur', 'teuer', 'preis', - 'kosten' + 'kosten', ]; + return implode('|', $pattern); } @@ -76,8 +77,9 @@ class CommerceIntentConfig 'pink', 'gruen', 'orange', - 'braun' + 'braun', ]; + return implode('|', $pattern); } @@ -89,10 +91,10 @@ class CommerceIntentConfig 'm', 'l', 'xl', - '', 'xxl', 'xxxxl', ]; + return implode('|', $pattern); } @@ -101,8 +103,9 @@ class CommerceIntentConfig $pattern = [ 'größe', 'groesse', - 'grösse' + 'grösse', ]; + return implode('|', $pattern); } } \ No newline at end of file diff --git a/src/Intent/SalesIntentLite.php b/src/Intent/SalesIntentLite.php index 06c74df..c9f582f 100644 --- a/src/Intent/SalesIntentLite.php +++ b/src/Intent/SalesIntentLite.php @@ -17,9 +17,7 @@ final class SalesIntentLite public function __construct( private readonly SalesIntentConfig $config - ) - { - + ) { } public function detect(string $originalPrompt): array @@ -62,7 +60,7 @@ final class SalesIntentLite // ------------------------------------------------------------ // OBJECTION // ------------------------------------------------------------ - foreach ($this->config->getComparisonSignals() as $word) { + foreach ($this->config->getObjectionSignals() as $word) { if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) { $scores[self::OBJECTION] += 3; } diff --git a/src/Knowledge/Retrieval/QueryEnricher.php b/src/Knowledge/Retrieval/QueryEnricher.php index bf00664..87faf88 100644 --- a/src/Knowledge/Retrieval/QueryEnricher.php +++ b/src/Knowledge/Retrieval/QueryEnricher.php @@ -10,8 +10,7 @@ final readonly class QueryEnricher { public function __construct( private QueryEnricherConfig $config - ) - { + ) { } /** @@ -19,58 +18,46 @@ final readonly class QueryEnricher * * Example: * - input: "water hardness device" - * - output: "water hardness device | Synonyms: residual hardness, model" + * - output: "water hardness device residual hardness model" */ public function enrichPrompt(string $query): string { - if (trim($query) === '') { + $originalQuery = trim($query); + + if ($originalQuery === '') { return ''; } - // Keep the original query untouched for the final output. - $originalQuery = $query; - - // Normalize the query for case-insensitive matching. - $normalizedQuery = $this->normalize($query); - - // Expected format: - // [ - // 'trousers' => 'jeans', - // 'jacket' => 'coat', - // ] $mapping = $this->config->getEnrichQueryList(); - - // Build a bidirectional lookup table: - // key -> value - // value -> key $lookup = $this->buildBidirectionalLookup($mapping); - - // Split the query into searchable tokens. - $tokens = $this->tokenize($normalizedQuery); + $normalizedQuery = $this->normalizeForMatching($originalQuery); $matches = []; - foreach ($tokens as $token) { - // If the token exists in the lookup table, add the mapped counterpart. - if (isset($lookup[$token])) { - $matches[] = $lookup[$token]; + foreach ($lookup as $needle => $mappedValue) { + if ($needle === '') { + continue; + } + + if ($this->containsWholePhrase($normalizedQuery, $needle)) { + $matches[] = $mappedValue; } } - // Remove duplicates while preserving order. - $matches = array_values(array_unique($matches)); + $matches = array_values(array_unique(array_filter( + $matches, + static fn(string $value): bool => trim($value) !== '' + ))); - // If no matches were found, return the original query unchanged. if ($matches === []) { return $originalQuery; } - // Append the matched counterpart terms to the original query. - return $originalQuery . ' | Synonyms: ' . implode(', ', $matches); + return trim($originalQuery . ' ' . implode(' ', $matches)); } /** - * Normalizes a string for case-insensitive comparison. + * Normalizes a string for case-insensitive matching. */ private function normalize(string $value): string { @@ -78,13 +65,29 @@ final readonly class QueryEnricher } /** - * Tokenizes the query into words. + * Normalizes a string for phrase-aware matching. * - * Splits on every character that is not a letter or number. + * This keeps words searchable across spaces, punctuation and hyphens. */ - private function tokenize(string $value): array + private function normalizeForMatching(string $value): string { - return preg_split('/[^\p{L}\p{N}]+/u', $value, -1, PREG_SPLIT_NO_EMPTY) ?: []; + $value = $this->normalize($value); + $value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value; + $value = preg_replace('/\s+/u', ' ', $value) ?? $value; + + return trim($value); + } + + /** + * Checks whether a normalized phrase exists as a full phrase in a normalized query. + */ + private function containsWholePhrase(string $normalizedQuery, string $normalizedPhrase): bool + { + if ($normalizedQuery === '' || $normalizedPhrase === '') { + return false; + } + + return str_contains(' ' . $normalizedQuery . ' ', ' ' . $normalizedPhrase . ' '); } /** @@ -112,19 +115,20 @@ final readonly class QueryEnricher $key = trim((string) $key); $value = trim((string) $value); - // Skip incomplete pairs. if ($key === '' || $value === '') { continue; } - $normalizedKey = $this->normalize($key); - $normalizedValue = $this->normalize($value); + $normalizedKey = $this->normalizeForMatching($key); + $normalizedValue = $this->normalizeForMatching($value); - // If the key is found in the query, return the value. - $lookup[$normalizedKey] = $value; + if ($normalizedKey !== '') { + $lookup[$normalizedKey] = $value; + } - // If the value is found in the query, return the key. - $lookup[$normalizedValue] = $key; + if ($normalizedValue !== '') { + $lookup[$normalizedValue] = $key; + } } return $lookup;