optimize cleanup search query shop api extends

This commit is contained in:
team2
2026-04-25 22:05:35 +02:00
parent 4823752b3e
commit 6cf8aac872
4 changed files with 327 additions and 5 deletions

View File

@@ -239,10 +239,148 @@ final readonly class CommerceQueryParser
);
$tokens = $this->normalizeSearchTokens($tokens);
$tokens = $this->compactShopSearchTokens($tokens);
return trim(implode(' ', $tokens));
}
/**
* Keep the Store API query narrow without relying on endless spelling-specific stop words.
*
* Direct product queries often contain user instructions such as "show all as a list".
* Shopware search performs best when the query only contains product-defining tokens:
* model numbers, the immediately related model name, brands, and semantic commerce terms.
*
* @param string[] $tokens
* @return string[]
*/
private function compactShopSearchTokens(array $tokens): array
{
$tokens = array_values(array_filter(
$tokens,
fn(string $token): bool => !$this->isQueryNoiseToken($token)
));
if ($tokens === []) {
return [];
}
$keep = [];
foreach ($tokens as $index => $token) {
if ($this->isModelNumberToken($token)) {
$keep[$index] = true;
for ($offset = 1; $offset <= $this->config->getModelContextTokenWindow(); $offset++) {
$previousIndex = $index - $offset;
if (!isset($tokens[$previousIndex]) || !$this->isLikelyModelContextToken($tokens[$previousIndex])) {
break;
}
$keep[$previousIndex] = true;
}
$nextIndex = $index + 1;
if (isset($tokens[$nextIndex]) && $this->isModelSuffixToken($tokens[$nextIndex])) {
$keep[$nextIndex] = true;
}
}
if ($this->isSemanticShopToken($token) || $this->isKnownBrandToken($token)) {
$keep[$index] = true;
}
}
if ($keep === []) {
return $this->limitShopSearchTokens($tokens);
}
ksort($keep);
$compacted = [];
foreach (array_keys($keep) as $index) {
$compacted[] = $tokens[$index];
}
return $this->limitShopSearchTokens(array_values(array_unique($compacted)));
}
private function isQueryNoiseToken(string $token): bool
{
$token = trim(mb_strtolower($token, 'UTF-8'));
if ($token === '') {
return true;
}
if (preg_match($this->config->getContainsDigitPattern(), $token) === 1) {
return false;
}
if (mb_strlen($token) <= $this->config->getMinMeaningfulAlphaTokenLength()) {
return true;
}
if ($this->isSearchControlToken($token)) {
return true;
}
return preg_match($this->config->getInstructionOrPresentationTokenPattern(), $token) === 1;
}
private function isModelNumberToken(string $token): bool
{
return preg_match($this->config->getModelNumberTokenPattern(), $token) === 1;
}
private function isLikelyModelContextToken(string $token): bool
{
if ($this->isQueryNoiseToken($token)) {
return false;
}
if ($this->isSemanticShopToken($token)) {
return false;
}
return preg_match($this->config->getModelContextTokenPattern(), $token) === 1;
}
private function isModelSuffixToken(string $token): bool
{
if ($this->isQueryNoiseToken($token)) {
return false;
}
return preg_match($this->config->getModelSuffixTokenPattern(), $token) === 1;
}
private function isSemanticShopToken(string $token): bool
{
return in_array($token, $this->config->getSemanticShopSearchTokens(), true);
}
private function isKnownBrandToken(string $token): bool
{
return in_array($token, $this->config->getKnownBrands(), true);
}
/**
* @param string[] $tokens
* @return string[]
*/
private function limitShopSearchTokens(array $tokens): array
{
$limit = $this->config->getMaxShopSearchTokens();
if ($limit <= 0 || count($tokens) <= $limit) {
return $tokens;
}
return array_slice($tokens, 0, $limit);
}
private function shouldUseHistoryContext(string $prompt): bool
{
return preg_match($this->config->getHistoryContextValuePattern(), $prompt) === 1;