normalize($originalPrompt); $isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt); [$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt); $sizes = $this->extractSizes($normalizedPrompt); $brand = $this->extractBrand($normalizedPrompt); $searchText = $this->buildSearchText( prompt: $normalizedPrompt, sizes: $sizes, brand: $brand, priceMin: $priceMin, priceMax: $priceMax, preserveDirectProductQuery: $isDirectProductQuery ); if ( !$isDirectProductQuery && $historyContext !== '' && $this->shouldUseHistoryContext($normalizedPrompt) ) { $latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext); if ($latestHistoryQuestion !== '') { $normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion); $isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt); [$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt); $historySizes = $this->extractSizes($normalizedHistoryPrompt); $historyBrand = $this->extractBrand($normalizedHistoryPrompt); $historySearchText = $this->buildSearchText( prompt: $normalizedHistoryPrompt, sizes: $historySizes, brand: $historyBrand, priceMin: $historyPriceMin, priceMax: $historyPriceMax, preserveDirectProductQuery: $isDirectHistoryProductQuery ); $searchText = $this->mergeSearchTexts($historySearchText, $searchText); if (($brand === null || $brand === '') && $historyBrand !== null && $historyBrand !== '') { $brand = $historyBrand; } } } $finalSearchText = $searchText !== '' ? $searchText : $normalizedPrompt; return new CommerceSearchQuery( originalPrompt: $originalPrompt, normalizedPrompt: $normalizedPrompt, searchText: $finalSearchText, brand: $brand, sizes: $sizes, properties: [], priceMin: $priceMin, priceMax: $priceMax, intent: $intent, needsLlmFallback: false, ); } private function normalize(string $prompt): string { $value = $this->textNormalizer->normalize($prompt); $value = $this->queryCleaner->clean($value); $value = mb_strtolower(trim($value)); $value = str_replace(['€'], ' euro ', $value); $value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value; $value = preg_replace('/\s+/u', ' ', $value) ?? $value; return trim($value); } /** * @return array{0:?float,1:?float} */ private function extractPriceRange(string $prompt): array { $priceMin = null; $priceMax = null; if (preg_match('/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) { $a = $this->toFloat($m[1]); $b = $this->toFloat($m[2]); if ($a !== null && $b !== null) { return [min($a, $b), max($a, $b)]; } } if (preg_match('/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) { $priceMax = $this->toFloat($m[1]); } if (preg_match('/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) { $priceMin = $this->toFloat($m[1]); } return [$priceMin, $priceMax]; } /** * @return string[] */ private function extractSizes(string $prompt): array { $sizes = []; $sizePattern = $this->intentConfig->getSizePattern(); if (preg_match_all('/\b(?:' . $sizePattern . ')\s*([a-z0-9.-]+)\b/u', $prompt, $matches) === false) { return []; } foreach ($matches[1] as $size) { $sizes[] = trim($size); } $sizeTokenPattern = $this->intentConfig->getSizeTokenPattern(); if (preg_match_all('/\b(' . $sizeTokenPattern . ')\b/u', $prompt, $tokenMatches) !== false) { foreach ($tokenMatches[1] as $sizeToken) { $sizes[] = trim($sizeToken); } } return array_values(array_unique(array_filter($sizes, static fn($v) => $v !== ''))); } private function extractBrand(string $prompt): ?string { foreach ($this->config->getKnownBrands() as $brand) { $normalizedBrand = $this->normalize((string) $brand); if ($normalizedBrand !== '' && str_contains($prompt, $normalizedBrand)) { return $normalizedBrand; } } return null; } private function buildSearchText( string $prompt, array $sizes, ?string $brand, ?float $priceMin, ?float $priceMax, bool $preserveDirectProductQuery = false ): string { if ($preserveDirectProductQuery) { return $this->buildDirectProductSearchText($prompt); } $text = ' ' . $prompt . ' '; foreach ($this->config->getPhrasesToRemove() as $phrase) { $normalizedPhrase = $this->normalize((string) $phrase); if ($normalizedPhrase === '') { continue; } $text = str_replace(' ' . $normalizedPhrase . ' ', ' ', $text); } foreach ($sizes as $size) { $normalizedSize = $this->normalize((string) $size); if ($normalizedSize === '') { continue; } $text = preg_replace('/\b' . preg_quote($normalizedSize, '/') . '\b/u', ' ', $text) ?? $text; } if ($brand !== null && $brand !== '' && !$this->isBrandPartOfModelPhrase($prompt, $brand)) { $text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text; } if ($priceMin !== null || $priceMax !== null) { $text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; $text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; $text = preg_replace('/\b' . $this->intentConfig->getPricePattern() . '\b/u', ' ', $text) ?? $text; } $text = preg_replace('/\s+/u', ' ', $text) ?? $text; $text = trim($text, " \t\n\r\0\x0B-.,"); $tokens = array_filter( explode(' ', $text), static fn(string $token): bool => mb_strlen($token) > 1 ); $tokens = $this->filterSearchTokens($tokens); return trim(implode(' ', $tokens)); } private function buildDirectProductSearchText(string $prompt): string { $text = $prompt; $text = preg_replace('/\s+/u', ' ', $text) ?? $text; $text = trim($text, " \t\n\r\0\x0B-.,"); $tokens = array_filter( explode(' ', $text), static fn(string $token): bool => mb_strlen($token) > 0 ); $tokens = array_values(array_unique($tokens)); return trim(implode(' ', $tokens)); } private function shouldUseHistoryContext(string $prompt): bool { return preg_match( '/\b(' . $this->config->getHistoryContextPattern() . ')\b/u', $prompt ) === 1; } private function extractLatestQuestionFromHistory(string $historyContext): string { $result = preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches); if ($result === false) { return ''; } $questions = $matches[1] ?? []; if ($questions === []) { return ''; } $lastQuestion = end($questions); return is_string($lastQuestion) ? trim($lastQuestion) : ''; } private function mergeSearchTexts(string $historySearchText, string $currentSearchText): string { $tokens = []; foreach ([$historySearchText, $currentSearchText] as $text) { if ($text === '') { continue; } foreach (explode(' ', $text) as $token) { $token = trim($token); if ($token === '' || mb_strlen($token) <= 1) { continue; } $tokens[$token] = $token; } } return implode(' ', array_values($tokens)); } /** * @param string[] $tokens * @return string[] */ private function filterSearchTokens(array $tokens): array { $stopWords = $this->config->getFilterSearchTokensPattern(); return array_values(array_filter( $tokens, static fn(string $token): bool => !in_array($token, $stopWords, true) )); } private function isDirectProductQuery(string $prompt): bool { if ($prompt === '') { return false; } if ($this->containsModelLikePhrase($prompt)) { return true; } if ($this->containsAccessoryLikePhrase($prompt)) { return true; } $tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: []; if (count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1) { return true; } return false; } private function containsModelLikePhrase(string $text): bool { return preg_match( '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u', $text ) === 1; } private function containsAccessoryLikePhrase(string $text): bool { return preg_match( '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u', $text ) === 1; } private function isBrandPartOfModelPhrase(string $prompt, string $brand): bool { if ($brand === '') { return false; } return preg_match( '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u', $prompt ) === 1; } private function toFloat(string $value): ?float { $value = str_replace(',', '.', trim($value)); return is_numeric($value) ? (float) $value : null; } }