harden queries

This commit is contained in:
team 1
2026-04-09 20:10:41 +02:00
parent 1aee32f1d8
commit 521f8bd5a3
5 changed files with 38 additions and 108 deletions

View File

@@ -5,77 +5,37 @@ declare(strict_types=1);
namespace App\Commerce; namespace App\Commerce;
use App\Commerce\Dto\CommerceSearchQuery; use App\Commerce\Dto\CommerceSearchQuery;
use App\Knowledge\Retrieval\QueryCleaner;
use App\Knowledge\Text\TextNormalizer;
final class CommerceQueryParser final class CommerceQueryParser
{ {
/** public function __construct(
* @var string[] private readonly TextNormalizer $textNormalizer,
*/ private readonly QueryCleaner $queryCleaner,
private array $knownColors = [ )
'schwarz', {
'weiß',
'weiss',
'rot',
'blau',
'grün',
'gruen',
'gelb',
'grau',
'beige',
'rosa',
'pink',
'orange',
'braun',
];
/** }
* @var string[]
*/
private array $knownCategories = [
'sneaker',
'schuhe',
'hoodie',
't-shirt',
'shirt',
'jacke',
'regenjacke',
'trinkflasche',
'flasche',
'rucksack',
'tasche',
'mütze',
'muetze',
'kappe',
'hose',
'pullover',
];
/** /**
* @var string[] * @var string[]
*/ */
private array $knownBrands = [ private array $knownBrands = [
'nike', 'heyl',
'adidas', 'horiba'
'puma',
'reebok',
'under armour',
'new balance',
]; ];
public function parse(string $originalPrompt, string $intent): CommerceSearchQuery public function parse(string $originalPrompt, string $intent): CommerceSearchQuery
{ {
$normalized = $this->normalize($originalPrompt); $normalized = $this->normalize($originalPrompt);
[$priceMin, $priceMax] = $this->extractPriceRange($normalized); [$priceMin, $priceMax] = $this->extractPriceRange($normalized);
$sizes = $this->extractSizes($normalized); $sizes = $this->extractSizes($normalized);
$colors = $this->extractColors($normalized);
$brand = $this->extractBrand($normalized); $brand = $this->extractBrand($normalized);
$category = $this->extractCategory($normalized);
$properties = []; $properties = [];
$searchText = $this->buildSearchText( $searchText = $this->buildSearchText(
$normalized, $normalized,
$colors,
$sizes, $sizes,
$brand, $brand,
$priceMin, $priceMin,
@@ -86,9 +46,7 @@ final class CommerceQueryParser
originalPrompt: $originalPrompt, originalPrompt: $originalPrompt,
normalizedPrompt: $normalized, normalizedPrompt: $normalized,
searchText: $searchText !== '' ? $searchText : $normalized, searchText: $searchText !== '' ? $searchText : $normalized,
category: $category,
brand: $brand, brand: $brand,
colors: $colors,
sizes: $sizes, sizes: $sizes,
properties: $properties, properties: $properties,
priceMin: $priceMin, priceMin: $priceMin,
@@ -100,7 +58,9 @@ final class CommerceQueryParser
private function normalize(string $prompt): string private function normalize(string $prompt): string
{ {
$value = mb_strtolower(trim($prompt)); $value = $this->textNormalizer->normalize($prompt);
$value = $this->queryCleaner->clean($value);
$value = mb_strtolower(trim($value));
$value = str_replace(['€'], ' euro ', $value); $value = str_replace(['€'], ' euro ', $value);
$value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value; $value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value;
$value = preg_replace('/\s+/u', ' ', $value) ?? $value; $value = preg_replace('/\s+/u', ' ', $value) ?? $value;
@@ -157,23 +117,7 @@ final class CommerceQueryParser
} }
} }
return array_values(array_unique(array_filter($sizes, static fn ($v) => $v !== ''))); return array_values(array_unique(array_filter($sizes, static fn($v) => $v !== '')));
}
/**
* @return string[]
*/
private function extractColors(string $prompt): array
{
$colors = [];
foreach ($this->knownColors as $color) {
if (preg_match('/\b' . preg_quote($color, '/') . '\b/u', $prompt) === 1) {
$colors[] = $color;
}
}
return array_values(array_unique($colors));
} }
private function extractBrand(string $prompt): ?string private function extractBrand(string $prompt): ?string
@@ -184,32 +128,21 @@ final class CommerceQueryParser
} }
} }
if (preg_match('/\bmarke\s+([a-z0-9][a-z0-9\s\-]+)/u', $prompt, $m) === 1) { if (preg_match('/\bheyl\s+([a-z0-9][a-z0-9\s\-]+)/u', $prompt, $m) === 1) {
return trim($m[1]); return trim($m[1]);
} }
return null; return null;
} }
private function extractCategory(string $prompt): ?string
{
foreach ($this->knownCategories as $category) {
if (preg_match('/\b' . preg_quote($category, '/') . '\b/u', $prompt) === 1) {
return $category;
}
}
return null;
}
private function buildSearchText( private function buildSearchText(
string $prompt, string $prompt,
array $colors,
array $sizes, array $sizes,
?string $brand, ?string $brand,
?float $priceMin, ?float $priceMin,
?float $priceMax ?float $priceMax
): string { ): string
{
$text = ' ' . $prompt . ' '; $text = ' ' . $prompt . ' ';
$phrasesToRemove = [ $phrasesToRemove = [
@@ -231,10 +164,6 @@ final class CommerceQueryParser
$text = str_replace($phrase, ' ', $text); $text = str_replace($phrase, ' ', $text);
} }
foreach ($colors as $color) {
$text = preg_replace('/\b' . preg_quote($color, '/') . '\b/u', ' ', $text) ?? $text;
}
foreach ($sizes as $size) { foreach ($sizes as $size) {
$text = preg_replace('/\b' . preg_quote($size, '/') . '\b/u', ' ', $text) ?? $text; $text = preg_replace('/\b' . preg_quote($size, '/') . '\b/u', ' ', $text) ?? $text;
} }
@@ -253,7 +182,7 @@ final class CommerceQueryParser
$text = preg_replace('/\s+/u', ' ', $text) ?? $text; $text = preg_replace('/\s+/u', ' ', $text) ?? $text;
$text = trim($text, " \t\n\r\0\x0B-.,"); $text = trim($text, " \t\n\r\0\x0B-.,");
$tokens = array_filter(explode(' ', $text), static fn (string $token): bool => mb_strlen($token) > 1); $tokens = array_filter(explode(' ', $text), static fn(string $token): bool => mb_strlen($token) > 1);
return trim(implode(' ', $tokens)); return trim(implode(' ', $tokens));
} }
@@ -262,6 +191,6 @@ final class CommerceQueryParser
{ {
$value = str_replace(',', '.', trim($value)); $value = str_replace(',', '.', trim($value));
return is_numeric($value) ? (float) $value : null; return is_numeric($value) ? (float)$value : null;
} }
} }

View File

@@ -15,9 +15,7 @@ final readonly class CommerceSearchQuery
public string $originalPrompt, public string $originalPrompt,
public string $normalizedPrompt, public string $normalizedPrompt,
public string $searchText, public string $searchText,
public ?string $category = null,
public ?string $brand = null, public ?string $brand = null,
public array $colors = [],
public array $sizes = [], public array $sizes = [],
public array $properties = [], public array $properties = [],
public ?float $priceMin = null, public ?float $priceMin = null,

View File

@@ -40,12 +40,14 @@ final class CommerceIntentLite
'shop', 'shop',
'sku', 'sku',
'Artikel', 'Artikel',
'Gerät' 'Gerät',
'testomat',
'indikator'
]; ];
foreach ($strongSignals as $signal) { foreach ($strongSignals as $signal) {
if (str_contains($p, $signal)) { if (str_contains($p, strtolower($signal))) {
$score += 2; $score += 3;
$signals[] = $signal; $signals[] = $signal;
} }
} }

View File

@@ -17,6 +17,7 @@ final class StopWords
*/ */
private const STOP_WORDS = [ private const STOP_WORDS = [
'mit',
// Artikel // Artikel
'der', 'die', 'das', 'der', 'die', 'das',
'ein', 'eine', 'einer', 'eines', 'ein', 'eine', 'einer', 'eines',

View File

@@ -8,12 +8,15 @@ use App\Commerce\Dto\CommerceSearchQuery;
final class ShopwareCriteriaBuilder final class ShopwareCriteriaBuilder
{ {
public function build(CommerceSearchQuery $query, int $limit = 25): array public function build(
CommerceSearchQuery $query,
?int $limit = 25,
?bool $grouping = true
): array
{ {
$criteria = [ $criteria = [
'page' => 1, 'page' => 1,
'limit' => max(1, $limit), 'limit' => max(1, $limit),
"grouping" => ["parentId"],
'total-count-mode' => 0, 'total-count-mode' => 0,
'includes' => [ 'includes' => [
'product' => [ 'product' => [
@@ -54,6 +57,10 @@ final class ShopwareCriteriaBuilder
], ],
]; ];
if ($grouping) {
$criteria["grouping"] = ["parentId"];
}
if ($query->searchText !== '') { if ($query->searchText !== '') {
$criteria['term'] = $query->searchText; $criteria['term'] = $query->searchText;
} }
@@ -68,13 +75,6 @@ final class ShopwareCriteriaBuilder
'type' => 'equals', 'type' => 'equals',
'field' => 'available', 'field' => 'available',
'value' => true, 'value' => true,
],
[
'type' => 'range',
'field' => 'price.gross',
'parameters' => [
'gt' => 0,
],
] ]
]; ];