normalize($originalPrompt); $isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt); [$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt); $sizes = $this->extractSizes($normalizedPrompt); $brand = $this->extractBrand($normalizedPrompt); $searchText = $this->buildSearchText( prompt: $normalizedPrompt, sizes: $sizes, brand: $brand, priceMin: $priceMin, priceMax: $priceMax, preserveDirectProductQuery: $isDirectProductQuery ); if ( !$isDirectProductQuery && $historyContext !== '' && $this->shouldUseHistoryContext($normalizedPrompt) ) { $historyParse = $this->parseHistoryContext($historyContext); if ($historyParse !== null) { $searchText = $this->mergeSearchTexts( $historyParse['searchText'], $searchText ); if (($brand === null || $brand === '') && $historyParse['brand'] !== null && $historyParse['brand'] !== '') { $brand = $historyParse['brand']; } } } $finalSearchText = $searchText !== '' ? $searchText : $normalizedPrompt; return new CommerceSearchQuery( originalPrompt: $originalPrompt, normalizedPrompt: $normalizedPrompt, searchText: $finalSearchText, brand: $brand, sizes: $sizes, properties: [], priceMin: $priceMin, priceMax: $priceMax, intent: $intent, needsLlmFallback: false, ); } private function normalize(string $prompt): string { $value = $this->textNormalizer->normalize($prompt); $value = mb_strtolower(trim($value)); $value = str_replace( $this->config->getNormalizationSearch(), $this->config->getNormalizationReplace(), $value ); $value = preg_replace($this->config->getPromptSanitizePattern(), ' ', $value) ?? $value; $value = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $value) ?? $value; $value = $this->applySearchTokenCorrections($value); return trim($value); } /** * @return array{0:?float,1:?float} */ private function extractPriceRange(string $prompt): array { $priceMin = null; $priceMax = null; if (preg_match($this->config->getPriceBetweenPattern(), $prompt, $matches) === 1) { $a = $this->toFloat($matches[1]); $b = $this->toFloat($matches[2]); if ($a !== null && $b !== null) { return [min($a, $b), max($a, $b)]; } } if (preg_match($this->config->getPriceMaxPattern(), $prompt, $matches) === 1) { $priceMax = $this->toFloat($matches[1]); } if (preg_match($this->config->getPriceMinPattern(), $prompt, $matches) === 1) { $priceMin = $this->toFloat($matches[1]); } return [$priceMin, $priceMax]; } /** * @return string[] */ private function extractSizes(string $prompt): array { $sizes = []; if (preg_match_all($this->intentConfig->getSizeExtractionPattern(), $prompt, $matches) === false) { return []; } foreach ($matches[1] ?? [] as $size) { $sizes[] = trim((string) $size); } if (preg_match_all($this->intentConfig->getSizeTokenValuePattern(), $prompt, $tokenMatches) !== false) { foreach ($tokenMatches[0] ?? [] as $sizeToken) { $sizes[] = trim((string) $sizeToken); } } return array_values(array_unique(array_filter( $sizes, static fn(string $value): bool => $value !== '' ))); } private function extractBrand(string $prompt): ?string { foreach ($this->config->getKnownBrands() as $brand) { $normalizedBrand = $this->normalize((string) $brand); if ($normalizedBrand !== '' && str_contains($prompt, $normalizedBrand)) { return $normalizedBrand; } } return null; } /** * @param string[] $sizes */ private function buildSearchText( string $prompt, array $sizes, ?string $brand, ?float $priceMin, ?float $priceMax, bool $preserveDirectProductQuery = false ): string { if ($preserveDirectProductQuery) { return $this->buildDirectProductSearchText($prompt); } $text = $this->wrapForPhraseReplacement($prompt); foreach ($this->config->getPhrasesToRemove() as $phrase) { $normalizedPhrase = $this->normalize((string) $phrase); if ($normalizedPhrase === '') { continue; } $text = str_replace( $this->wrapForPhraseReplacement($normalizedPhrase), ' ', $text ); } foreach ($sizes as $size) { $normalizedSize = $this->normalize((string) $size); if ($normalizedSize === '') { continue; } $text = preg_replace( $this->config->buildExactTokenRemovalPattern($normalizedSize), ' ', $text ) ?? $text; } // Keep known brand terms in the shop search text because the Store API // request does not add a separate manufacturer filter. if ($priceMin !== null || $priceMax !== null) { foreach ($this->config->getPriceRemovalPatterns($this->intentConfig) as $pattern) { $text = preg_replace($pattern, ' ', $text) ?? $text; } } $text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text; $text = trim($text, $this->config->getSearchTextTrimCharacters()); $tokens = array_filter( explode(' ', $text), fn(string $token): bool => mb_strlen($token) > $this->config->getMinSearchTokenLength() ); $tokens = $this->normalizeSearchTokens($tokens); return trim(implode(' ', $tokens)); } private function buildDirectProductSearchText(string $prompt): string { $text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $prompt) ?? $prompt; $text = trim($text, $this->config->getSearchTextTrimCharacters()); $tokens = array_filter( explode(' ', $text), fn(string $token): bool => mb_strlen($token) >= $this->config->getMinDirectProductTokenLength() ); $tokens = $this->normalizeSearchTokens($tokens); $tokens = $this->compactShopSearchTokens($tokens); return trim(implode(' ', $tokens)); } /** * Keep the Store API query narrow without relying on endless spelling-specific stop words. * * Direct product queries often contain user instructions such as "show all as a list". * Shopware search performs best when the query only contains product-defining tokens: * model numbers, the immediately related model name, brands, and semantic commerce terms. * * @param string[] $tokens * @return string[] */ private function compactShopSearchTokens(array $tokens): array { $tokens = array_values(array_filter( $tokens, fn(string $token): bool => !$this->isQueryNoiseToken($token) )); if ($tokens === []) { return []; } $keep = []; foreach ($tokens as $index => $token) { if ($this->isModelNumberToken($token)) { $keep[$index] = true; for ($offset = 1; $offset <= $this->config->getModelContextTokenWindow(); $offset++) { $previousIndex = $index - $offset; if (!isset($tokens[$previousIndex])) { break; } if ($this->isSemanticShopToken($tokens[$previousIndex])) { $keep[$previousIndex] = true; continue; } if (!$this->isLikelyModelContextToken($tokens[$previousIndex])) { break; } $keep[$previousIndex] = true; } $nextIndex = $index + 1; if (isset($tokens[$nextIndex]) && $this->isModelSuffixToken($tokens[$nextIndex])) { $keep[$nextIndex] = true; } } if ($this->isSemanticShopToken($token) || $this->isKnownBrandToken($token) || $this->isMeasurementValueToken($token)) { $keep[$index] = true; } } if ($keep === []) { return $this->limitShopSearchTokens($tokens); } ksort($keep); $compacted = []; foreach (array_keys($keep) as $index) { $compacted[] = $tokens[$index]; } return $this->limitShopSearchTokens(array_values(array_unique($compacted))); } private function isQueryNoiseToken(string $token): bool { $token = trim(mb_strtolower($token, 'UTF-8')); if ($token === '') { return true; } if ($this->isMeasurementValueToken($token)) { return false; } if (preg_match($this->config->getContainsDigitPattern(), $token) === 1) { return false; } if (mb_strlen($token) <= $this->config->getMinMeaningfulAlphaTokenLength()) { return true; } if ($this->isSearchControlToken($token)) { return true; } return preg_match($this->config->getInstructionOrPresentationTokenPattern(), $token) === 1; } private function isModelNumberToken(string $token): bool { return preg_match($this->config->getModelNumberTokenPattern(), $token) === 1; } private function isMeasurementValueToken(string $token): bool { return preg_match($this->config->getMeasurementValueTokenPattern(), $token) === 1; } private function isLikelyModelContextToken(string $token): bool { if ($this->isQueryNoiseToken($token)) { return false; } if ($this->isSemanticShopToken($token)) { return false; } return preg_match($this->config->getModelContextTokenPattern(), $token) === 1; } private function isModelSuffixToken(string $token): bool { if ($this->isQueryNoiseToken($token)) { return false; } return preg_match($this->config->getModelSuffixTokenPattern(), $token) === 1; } private function isSemanticShopToken(string $token): bool { return in_array($token, $this->config->getSemanticShopSearchTokens(), true); } private function isKnownBrandToken(string $token): bool { return in_array($token, $this->config->getKnownBrands(), true); } /** * @param string[] $tokens * @return string[] */ private function limitShopSearchTokens(array $tokens): array { $limit = $this->config->getMaxShopSearchTokens(); if ($limit <= 0 || count($tokens) <= $limit) { return $tokens; } return array_slice($tokens, 0, $limit); } private function shouldUseHistoryContext(string $prompt): bool { return preg_match($this->config->getHistoryContextValuePattern(), $prompt) === 1; } private function extractLatestQuestionFromHistory(string $historyContext): string { $result = preg_match_all($this->config->getHistoryQuestionPattern(), $historyContext, $matches); if ($result === false) { return ''; } $questions = $matches[1] ?? []; if ($questions === []) { return ''; } $lastQuestion = end($questions); return is_string($lastQuestion) ? trim($lastQuestion) : ''; } private function mergeSearchTexts(string $historySearchText, string $currentSearchText): string { $tokens = []; foreach ([$historySearchText, $currentSearchText] as $text) { if ($text === '') { continue; } foreach (explode(' ', $text) as $token) { $token = trim($token); if ($token === '' || mb_strlen($token) <= $this->config->getMinSearchTokenLength()) { continue; } foreach ($this->normalizeSearchTokens([$token]) as $normalizedToken) { $tokens[$normalizedToken] = $normalizedToken; } } } return implode(' ', array_values($tokens)); } /** * @param string[] $tokens * @return string[] */ private function filterSearchTokens(array $tokens): array { return $this->normalizeSearchTokens($tokens); } /** * @param string[] $tokens * @return string[] */ private function normalizeSearchTokens(array $tokens): array { $normalizedTokens = []; foreach ($tokens as $token) { $token = trim(mb_strtolower((string) $token, 'UTF-8')); if ($token === '') { continue; } $token = $this->config->getSearchTokenCorrections()[$token] ?? $token; $token = $this->config->getSearchTokenCanonicalMap()[$token] ?? $token; if ($this->isSearchControlToken($token)) { continue; } $normalizedTokens[$token] = $token; } return array_values($normalizedTokens); } private function applySearchTokenCorrections(string $text): string { if ($text === '') { return ''; } foreach ($this->config->getSearchTokenCorrections() as $from => $to) { $text = preg_replace( '/\b' . preg_quote((string) $from, '/') . '\b/u', (string) $to, $text ) ?? $text; } return preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text; } private function isSearchControlToken(string $token): bool { $token = trim(mb_strtolower($token)); if ($token === '') { return true; } if (in_array($token, $this->config->getFilterSearchTokens(), true)) { return true; } return in_array($token, [ 'shop', 'store', 'produkt', 'produkte', 'artikel', 'kaufen', 'kaufe', 'bestellen', 'bestelle', 'online', ], true); } private function isDirectProductQuery(string $prompt): bool { if ($prompt === '') { return false; } if ($this->containsModelLikePhrase($prompt)) { return true; } if ($this->containsAccessoryLikePhrase($prompt)) { return true; } $tokens = preg_split( $this->config->getWhitespaceSplitPattern(), $prompt, -1, PREG_SPLIT_NO_EMPTY ) ?: []; $tokens = $this->filterSearchTokens($tokens); return count($tokens) <= $this->config->getDirectProductMaxTokens() && preg_match($this->config->getDirectProductDigitPattern(), implode(' ', $tokens)) === 1; } private function containsModelLikePhrase(string $text): bool { return preg_match($this->config->getModelLikePattern(), $text) === 1; } private function containsAccessoryLikePhrase(string $text): bool { return preg_match($this->config->getAccessoryLikePattern(), $text) === 1; } private function isBrandPartOfModelPhrase(string $prompt, string $brand): bool { if ($brand === '') { return false; } return preg_match( $this->config->buildBrandPartOfModelPattern($brand), $prompt ) === 1; } private function toFloat(string $value): ?float { $value = str_replace(',', '.', trim($value)); return is_numeric($value) ? (float) $value : null; } /** * @return array{searchText:string, brand:?string}|null */ private function parseHistoryContext(string $historyContext): ?array { $latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext); if ($latestHistoryQuestion === '') { return null; } $normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion); $isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt); [$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt); $historySizes = $this->extractSizes($normalizedHistoryPrompt); $historyBrand = $this->extractBrand($normalizedHistoryPrompt); $historySearchText = $this->buildSearchText( prompt: $normalizedHistoryPrompt, sizes: $historySizes, brand: $historyBrand, priceMin: $historyPriceMin, priceMax: $historyPriceMax, preserveDirectProductQuery: $isDirectHistoryProductQuery ); return [ 'searchText' => $historySearchText, 'brand' => $historyBrand, ]; } private function wrapForPhraseReplacement(string $text): string { return ' ' . $text . ' '; } }