optimize cleanup search query shop api extends
This commit is contained in:
@@ -239,10 +239,148 @@ final readonly class CommerceQueryParser
|
||||
);
|
||||
|
||||
$tokens = $this->normalizeSearchTokens($tokens);
|
||||
$tokens = $this->compactShopSearchTokens($tokens);
|
||||
|
||||
return trim(implode(' ', $tokens));
|
||||
}
|
||||
|
||||
/**
|
||||
* Keep the Store API query narrow without relying on endless spelling-specific stop words.
|
||||
*
|
||||
* Direct product queries often contain user instructions such as "show all as a list".
|
||||
* Shopware search performs best when the query only contains product-defining tokens:
|
||||
* model numbers, the immediately related model name, brands, and semantic commerce terms.
|
||||
*
|
||||
* @param string[] $tokens
|
||||
* @return string[]
|
||||
*/
|
||||
private function compactShopSearchTokens(array $tokens): array
|
||||
{
|
||||
$tokens = array_values(array_filter(
|
||||
$tokens,
|
||||
fn(string $token): bool => !$this->isQueryNoiseToken($token)
|
||||
));
|
||||
|
||||
if ($tokens === []) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$keep = [];
|
||||
|
||||
foreach ($tokens as $index => $token) {
|
||||
if ($this->isModelNumberToken($token)) {
|
||||
$keep[$index] = true;
|
||||
|
||||
for ($offset = 1; $offset <= $this->config->getModelContextTokenWindow(); $offset++) {
|
||||
$previousIndex = $index - $offset;
|
||||
|
||||
if (!isset($tokens[$previousIndex]) || !$this->isLikelyModelContextToken($tokens[$previousIndex])) {
|
||||
break;
|
||||
}
|
||||
|
||||
$keep[$previousIndex] = true;
|
||||
}
|
||||
|
||||
$nextIndex = $index + 1;
|
||||
if (isset($tokens[$nextIndex]) && $this->isModelSuffixToken($tokens[$nextIndex])) {
|
||||
$keep[$nextIndex] = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->isSemanticShopToken($token) || $this->isKnownBrandToken($token)) {
|
||||
$keep[$index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ($keep === []) {
|
||||
return $this->limitShopSearchTokens($tokens);
|
||||
}
|
||||
|
||||
ksort($keep);
|
||||
|
||||
$compacted = [];
|
||||
foreach (array_keys($keep) as $index) {
|
||||
$compacted[] = $tokens[$index];
|
||||
}
|
||||
|
||||
return $this->limitShopSearchTokens(array_values(array_unique($compacted)));
|
||||
}
|
||||
|
||||
private function isQueryNoiseToken(string $token): bool
|
||||
{
|
||||
$token = trim(mb_strtolower($token, 'UTF-8'));
|
||||
|
||||
if ($token === '') {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (preg_match($this->config->getContainsDigitPattern(), $token) === 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mb_strlen($token) <= $this->config->getMinMeaningfulAlphaTokenLength()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($this->isSearchControlToken($token)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return preg_match($this->config->getInstructionOrPresentationTokenPattern(), $token) === 1;
|
||||
}
|
||||
|
||||
private function isModelNumberToken(string $token): bool
|
||||
{
|
||||
return preg_match($this->config->getModelNumberTokenPattern(), $token) === 1;
|
||||
}
|
||||
|
||||
private function isLikelyModelContextToken(string $token): bool
|
||||
{
|
||||
if ($this->isQueryNoiseToken($token)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($this->isSemanticShopToken($token)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return preg_match($this->config->getModelContextTokenPattern(), $token) === 1;
|
||||
}
|
||||
|
||||
private function isModelSuffixToken(string $token): bool
|
||||
{
|
||||
if ($this->isQueryNoiseToken($token)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return preg_match($this->config->getModelSuffixTokenPattern(), $token) === 1;
|
||||
}
|
||||
|
||||
private function isSemanticShopToken(string $token): bool
|
||||
{
|
||||
return in_array($token, $this->config->getSemanticShopSearchTokens(), true);
|
||||
}
|
||||
|
||||
private function isKnownBrandToken(string $token): bool
|
||||
{
|
||||
return in_array($token, $this->config->getKnownBrands(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $tokens
|
||||
* @return string[]
|
||||
*/
|
||||
private function limitShopSearchTokens(array $tokens): array
|
||||
{
|
||||
$limit = $this->config->getMaxShopSearchTokens();
|
||||
|
||||
if ($limit <= 0 || count($tokens) <= $limit) {
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
return array_slice($tokens, 0, $limit);
|
||||
}
|
||||
|
||||
private function shouldUseHistoryContext(string $prompt): bool
|
||||
{
|
||||
return preg_match($this->config->getHistoryContextValuePattern(), $prompt) === 1;
|
||||
|
||||
Reference in New Issue
Block a user