diff --git a/src/Commerce/CommerceQueryParser.php b/src/Commerce/CommerceQueryParser.php index 1a7e950..ed7adaf 100644 --- a/src/Commerce/CommerceQueryParser.php +++ b/src/Commerce/CommerceQueryParser.php @@ -5,77 +5,37 @@ declare(strict_types=1); namespace App\Commerce; use App\Commerce\Dto\CommerceSearchQuery; +use App\Knowledge\Retrieval\QueryCleaner; +use App\Knowledge\Text\TextNormalizer; final class CommerceQueryParser { - /** - * @var string[] - */ - private array $knownColors = [ - 'schwarz', - 'weiß', - 'weiss', - 'rot', - 'blau', - 'grün', - 'gruen', - 'gelb', - 'grau', - 'beige', - 'rosa', - 'pink', - 'orange', - 'braun', - ]; + public function __construct( + private readonly TextNormalizer $textNormalizer, + private readonly QueryCleaner $queryCleaner, + ) + { - /** - * @var string[] - */ - private array $knownCategories = [ - 'sneaker', - 'schuhe', - 'hoodie', - 't-shirt', - 'shirt', - 'jacke', - 'regenjacke', - 'trinkflasche', - 'flasche', - 'rucksack', - 'tasche', - 'mütze', - 'muetze', - 'kappe', - 'hose', - 'pullover', - ]; + } /** * @var string[] */ private array $knownBrands = [ - 'nike', - 'adidas', - 'puma', - 'reebok', - 'under armour', - 'new balance', + 'heyl', + 'horiba' ]; public function parse(string $originalPrompt, string $intent): CommerceSearchQuery { $normalized = $this->normalize($originalPrompt); - [$priceMin, $priceMax] = $this->extractPriceRange($normalized); $sizes = $this->extractSizes($normalized); - $colors = $this->extractColors($normalized); $brand = $this->extractBrand($normalized); - $category = $this->extractCategory($normalized); $properties = []; $searchText = $this->buildSearchText( $normalized, - $colors, $sizes, $brand, $priceMin, @@ -86,9 +46,7 @@ final class CommerceQueryParser originalPrompt: $originalPrompt, normalizedPrompt: $normalized, searchText: $searchText !== '' ? $searchText : $normalized, - category: $category, brand: $brand, - colors: $colors, sizes: $sizes, properties: $properties, priceMin: $priceMin, @@ -100,7 +58,9 @@ final class CommerceQueryParser private function normalize(string $prompt): string { - $value = mb_strtolower(trim($prompt)); + $value = $this->textNormalizer->normalize($prompt); + $value = $this->queryCleaner->clean($value); + $value = mb_strtolower(trim($value)); $value = str_replace(['€'], ' euro ', $value); $value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value; $value = preg_replace('/\s+/u', ' ', $value) ?? $value; @@ -157,23 +117,7 @@ final class CommerceQueryParser } } - return array_values(array_unique(array_filter($sizes, static fn ($v) => $v !== ''))); - } - - /** - * @return string[] - */ - private function extractColors(string $prompt): array - { - $colors = []; - - foreach ($this->knownColors as $color) { - if (preg_match('/\b' . preg_quote($color, '/') . '\b/u', $prompt) === 1) { - $colors[] = $color; - } - } - - return array_values(array_unique($colors)); + return array_values(array_unique(array_filter($sizes, static fn($v) => $v !== ''))); } private function extractBrand(string $prompt): ?string @@ -184,32 +128,21 @@ final class CommerceQueryParser } } - if (preg_match('/\bmarke\s+([a-z0-9][a-z0-9\s\-]+)/u', $prompt, $m) === 1) { + if (preg_match('/\bheyl\s+([a-z0-9][a-z0-9\s\-]+)/u', $prompt, $m) === 1) { return trim($m[1]); } return null; } - private function extractCategory(string $prompt): ?string - { - foreach ($this->knownCategories as $category) { - if (preg_match('/\b' . preg_quote($category, '/') . '\b/u', $prompt) === 1) { - return $category; - } - } - - return null; - } - private function buildSearchText( - string $prompt, - array $colors, - array $sizes, + string $prompt, + array $sizes, ?string $brand, - ?float $priceMin, - ?float $priceMax - ): string { + ?float $priceMin, + ?float $priceMax + ): string + { $text = ' ' . $prompt . ' '; $phrasesToRemove = [ @@ -231,10 +164,6 @@ final class CommerceQueryParser $text = str_replace($phrase, ' ', $text); } - foreach ($colors as $color) { - $text = preg_replace('/\b' . preg_quote($color, '/') . '\b/u', ' ', $text) ?? $text; - } - foreach ($sizes as $size) { $text = preg_replace('/\b' . preg_quote($size, '/') . '\b/u', ' ', $text) ?? $text; } @@ -253,7 +182,7 @@ final class CommerceQueryParser $text = preg_replace('/\s+/u', ' ', $text) ?? $text; $text = trim($text, " \t\n\r\0\x0B-.,"); - $tokens = array_filter(explode(' ', $text), static fn (string $token): bool => mb_strlen($token) > 1); + $tokens = array_filter(explode(' ', $text), static fn(string $token): bool => mb_strlen($token) > 1); return trim(implode(' ', $tokens)); } @@ -262,6 +191,6 @@ final class CommerceQueryParser { $value = str_replace(',', '.', trim($value)); - return is_numeric($value) ? (float) $value : null; + return is_numeric($value) ? (float)$value : null; } } \ No newline at end of file diff --git a/src/Commerce/Dto/CommerceSearchQuery.php b/src/Commerce/Dto/CommerceSearchQuery.php index 7cb12ea..0d5a811 100644 --- a/src/Commerce/Dto/CommerceSearchQuery.php +++ b/src/Commerce/Dto/CommerceSearchQuery.php @@ -15,9 +15,7 @@ final readonly class CommerceSearchQuery public string $originalPrompt, public string $normalizedPrompt, public string $searchText, - public ?string $category = null, public ?string $brand = null, - public array $colors = [], public array $sizes = [], public array $properties = [], public ?float $priceMin = null, diff --git a/src/Intent/CommerceIntentLite.php b/src/Intent/CommerceIntentLite.php index 700036f..567446b 100644 --- a/src/Intent/CommerceIntentLite.php +++ b/src/Intent/CommerceIntentLite.php @@ -40,12 +40,14 @@ final class CommerceIntentLite 'shop', 'sku', 'Artikel', - 'Gerät' + 'Gerät', + 'testomat', + 'indikator' ]; foreach ($strongSignals as $signal) { - if (str_contains($p, $signal)) { - $score += 2; + if (str_contains($p, strtolower($signal))) { + $score += 3; $signals[] = $signal; } } diff --git a/src/Knowledge/StopWords.php b/src/Knowledge/StopWords.php index 5f4185b..0017588 100644 --- a/src/Knowledge/StopWords.php +++ b/src/Knowledge/StopWords.php @@ -17,6 +17,7 @@ final class StopWords */ private const STOP_WORDS = [ + 'mit', // Artikel 'der', 'die', 'das', 'ein', 'eine', 'einer', 'eines', diff --git a/src/Shopware/ShopwareCriteriaBuilder.php b/src/Shopware/ShopwareCriteriaBuilder.php index d672ee4..cfd7eea 100644 --- a/src/Shopware/ShopwareCriteriaBuilder.php +++ b/src/Shopware/ShopwareCriteriaBuilder.php @@ -8,12 +8,15 @@ use App\Commerce\Dto\CommerceSearchQuery; final class ShopwareCriteriaBuilder { - public function build(CommerceSearchQuery $query, int $limit = 25): array + public function build( + CommerceSearchQuery $query, + ?int $limit = 25, + ?bool $grouping = true + ): array { $criteria = [ 'page' => 1, 'limit' => max(1, $limit), - "grouping" => ["parentId"], 'total-count-mode' => 0, 'includes' => [ 'product' => [ @@ -54,6 +57,10 @@ final class ShopwareCriteriaBuilder ], ]; + if ($grouping) { + $criteria["grouping"] = ["parentId"]; + } + if ($query->searchText !== '') { $criteria['term'] = $query->searchText; } @@ -68,13 +75,6 @@ final class ShopwareCriteriaBuilder 'type' => 'equals', 'field' => 'available', 'value' => true, - ], - [ - 'type' => 'range', - 'field' => 'price.gross', - 'parameters' => [ - 'gt' => 0, - ], ] ];