optimize cleanup search query shop api

This commit is contained in:
team2
2026-04-25 21:41:39 +02:00
parent c90675d17d
commit 4823752b3e
2 changed files with 115 additions and 11 deletions

View File

@@ -88,6 +88,7 @@ final readonly class CommerceQueryParser
);
$value = preg_replace($this->config->getPromptSanitizePattern(), ' ', $value) ?? $value;
$value = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $value) ?? $value;
$value = $this->applySearchTokenCorrections($value);
return trim($value);
}
@@ -222,7 +223,7 @@ final readonly class CommerceQueryParser
fn(string $token): bool => mb_strlen($token) > $this->config->getMinSearchTokenLength()
);
$tokens = $this->filterSearchTokens($tokens);
$tokens = $this->normalizeSearchTokens($tokens);
return trim(implode(' ', $tokens));
}
@@ -237,8 +238,7 @@ final readonly class CommerceQueryParser
fn(string $token): bool => mb_strlen($token) >= $this->config->getMinDirectProductTokenLength()
);
$tokens = $this->filterSearchTokens($tokens);
$tokens = array_values(array_unique($tokens));
$tokens = $this->normalizeSearchTokens($tokens);
return trim(implode(' ', $tokens));
}
@@ -282,11 +282,9 @@ final readonly class CommerceQueryParser
continue;
}
if ($this->isSearchControlToken($token)) {
continue;
foreach ($this->normalizeSearchTokens([$token]) as $normalizedToken) {
$tokens[$normalizedToken] = $normalizedToken;
}
$tokens[$token] = $token;
}
}
@@ -299,10 +297,52 @@ final readonly class CommerceQueryParser
*/
private function filterSearchTokens(array $tokens): array
{
return array_values(array_filter(
$tokens,
fn(string $token): bool => !$this->isSearchControlToken($token)
));
return $this->normalizeSearchTokens($tokens);
}
/**
* @param string[] $tokens
* @return string[]
*/
private function normalizeSearchTokens(array $tokens): array
{
$normalizedTokens = [];
foreach ($tokens as $token) {
$token = trim(mb_strtolower((string) $token, 'UTF-8'));
if ($token === '') {
continue;
}
$token = $this->config->getSearchTokenCorrections()[$token] ?? $token;
$token = $this->config->getSearchTokenCanonicalMap()[$token] ?? $token;
if ($this->isSearchControlToken($token)) {
continue;
}
$normalizedTokens[$token] = $token;
}
return array_values($normalizedTokens);
}
private function applySearchTokenCorrections(string $text): string
{
if ($text === '') {
return '';
}
foreach ($this->config->getSearchTokenCorrections() as $from => $to) {
$text = preg_replace(
'/\b' . preg_quote((string) $from, '/') . '\b/u',
(string) $to,
$text
) ?? $text;
}
return preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
}
private function isSearchControlToken(string $token): bool

View File

@@ -28,6 +28,8 @@ final class CommerceQueryParserConfig
'suche',
'habt ihr',
'gibt es',
'gebe mir',
'gib mir',
'zeige mir',
'welches gerät',
'welche gerät',
@@ -36,6 +38,8 @@ final class CommerceQueryParserConfig
'welches ist am besten',
'alternative',
'alternativen',
'unter anderem',
'u a',
'welche',
'welcher',
'welches',
@@ -100,6 +104,66 @@ final class CommerceQueryParserConfig
'verfuegbarkeit',
'prüfe',
'pruefe',
'den',
'die',
'das',
'der',
'dem',
'des',
'und',
'oder',
'sowie',
'seine',
'seinen',
'seiner',
'seinem',
'seines',
'siene',
'sienen',
'siener',
'sienem',
'sienes',
'gebe',
'gib',
'nenne',
'nenn',
'preis',
'preise',
'preisen',
'kostet',
'kosten',
'ua',
];
}
/**
* @return array<string, string>
*/
public function getSearchTokenCorrections(): array
{
return [
'siene' => 'seine',
'sienen' => 'seinen',
'siener' => 'seiner',
'sienem' => 'seinem',
'sienes' => 'seines',
'indicatoren' => 'indikatoren',
];
}
/**
* @return array<string, string>
*/
public function getSearchTokenCanonicalMap(): array
{
return [
'indikatoren' => 'indikator',
'indicators' => 'indikator',
'indicator' => 'indikator',
'reagenzien' => 'reagenz',
'reagents' => 'reagenz',
'reagent' => 'reagenz',
'produkte' => 'produkt',
];
}