harden queries
This commit is contained in:
@@ -5,77 +5,37 @@ declare(strict_types=1);
|
|||||||
namespace App\Commerce;
|
namespace App\Commerce;
|
||||||
|
|
||||||
use App\Commerce\Dto\CommerceSearchQuery;
|
use App\Commerce\Dto\CommerceSearchQuery;
|
||||||
|
use App\Knowledge\Retrieval\QueryCleaner;
|
||||||
|
use App\Knowledge\Text\TextNormalizer;
|
||||||
|
|
||||||
final class CommerceQueryParser
|
final class CommerceQueryParser
|
||||||
{
|
{
|
||||||
/**
|
public function __construct(
|
||||||
* @var string[]
|
private readonly TextNormalizer $textNormalizer,
|
||||||
*/
|
private readonly QueryCleaner $queryCleaner,
|
||||||
private array $knownColors = [
|
)
|
||||||
'schwarz',
|
{
|
||||||
'weiß',
|
|
||||||
'weiss',
|
|
||||||
'rot',
|
|
||||||
'blau',
|
|
||||||
'grün',
|
|
||||||
'gruen',
|
|
||||||
'gelb',
|
|
||||||
'grau',
|
|
||||||
'beige',
|
|
||||||
'rosa',
|
|
||||||
'pink',
|
|
||||||
'orange',
|
|
||||||
'braun',
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
}
|
||||||
* @var string[]
|
|
||||||
*/
|
|
||||||
private array $knownCategories = [
|
|
||||||
'sneaker',
|
|
||||||
'schuhe',
|
|
||||||
'hoodie',
|
|
||||||
't-shirt',
|
|
||||||
'shirt',
|
|
||||||
'jacke',
|
|
||||||
'regenjacke',
|
|
||||||
'trinkflasche',
|
|
||||||
'flasche',
|
|
||||||
'rucksack',
|
|
||||||
'tasche',
|
|
||||||
'mütze',
|
|
||||||
'muetze',
|
|
||||||
'kappe',
|
|
||||||
'hose',
|
|
||||||
'pullover',
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @var string[]
|
* @var string[]
|
||||||
*/
|
*/
|
||||||
private array $knownBrands = [
|
private array $knownBrands = [
|
||||||
'nike',
|
'heyl',
|
||||||
'adidas',
|
'horiba'
|
||||||
'puma',
|
|
||||||
'reebok',
|
|
||||||
'under armour',
|
|
||||||
'new balance',
|
|
||||||
];
|
];
|
||||||
|
|
||||||
public function parse(string $originalPrompt, string $intent): CommerceSearchQuery
|
public function parse(string $originalPrompt, string $intent): CommerceSearchQuery
|
||||||
{
|
{
|
||||||
$normalized = $this->normalize($originalPrompt);
|
$normalized = $this->normalize($originalPrompt);
|
||||||
|
|
||||||
[$priceMin, $priceMax] = $this->extractPriceRange($normalized);
|
[$priceMin, $priceMax] = $this->extractPriceRange($normalized);
|
||||||
$sizes = $this->extractSizes($normalized);
|
$sizes = $this->extractSizes($normalized);
|
||||||
$colors = $this->extractColors($normalized);
|
|
||||||
$brand = $this->extractBrand($normalized);
|
$brand = $this->extractBrand($normalized);
|
||||||
$category = $this->extractCategory($normalized);
|
|
||||||
$properties = [];
|
$properties = [];
|
||||||
|
|
||||||
$searchText = $this->buildSearchText(
|
$searchText = $this->buildSearchText(
|
||||||
$normalized,
|
$normalized,
|
||||||
$colors,
|
|
||||||
$sizes,
|
$sizes,
|
||||||
$brand,
|
$brand,
|
||||||
$priceMin,
|
$priceMin,
|
||||||
@@ -86,9 +46,7 @@ final class CommerceQueryParser
|
|||||||
originalPrompt: $originalPrompt,
|
originalPrompt: $originalPrompt,
|
||||||
normalizedPrompt: $normalized,
|
normalizedPrompt: $normalized,
|
||||||
searchText: $searchText !== '' ? $searchText : $normalized,
|
searchText: $searchText !== '' ? $searchText : $normalized,
|
||||||
category: $category,
|
|
||||||
brand: $brand,
|
brand: $brand,
|
||||||
colors: $colors,
|
|
||||||
sizes: $sizes,
|
sizes: $sizes,
|
||||||
properties: $properties,
|
properties: $properties,
|
||||||
priceMin: $priceMin,
|
priceMin: $priceMin,
|
||||||
@@ -100,7 +58,9 @@ final class CommerceQueryParser
|
|||||||
|
|
||||||
private function normalize(string $prompt): string
|
private function normalize(string $prompt): string
|
||||||
{
|
{
|
||||||
$value = mb_strtolower(trim($prompt));
|
$value = $this->textNormalizer->normalize($prompt);
|
||||||
|
$value = $this->queryCleaner->clean($value);
|
||||||
|
$value = mb_strtolower(trim($value));
|
||||||
$value = str_replace(['€'], ' euro ', $value);
|
$value = str_replace(['€'], ' euro ', $value);
|
||||||
$value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value;
|
$value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value;
|
||||||
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
||||||
@@ -157,23 +117,7 @@ final class CommerceQueryParser
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return array_values(array_unique(array_filter($sizes, static fn ($v) => $v !== '')));
|
return array_values(array_unique(array_filter($sizes, static fn($v) => $v !== '')));
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return string[]
|
|
||||||
*/
|
|
||||||
private function extractColors(string $prompt): array
|
|
||||||
{
|
|
||||||
$colors = [];
|
|
||||||
|
|
||||||
foreach ($this->knownColors as $color) {
|
|
||||||
if (preg_match('/\b' . preg_quote($color, '/') . '\b/u', $prompt) === 1) {
|
|
||||||
$colors[] = $color;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return array_values(array_unique($colors));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function extractBrand(string $prompt): ?string
|
private function extractBrand(string $prompt): ?string
|
||||||
@@ -184,32 +128,21 @@ final class CommerceQueryParser
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (preg_match('/\bmarke\s+([a-z0-9][a-z0-9\s\-]+)/u', $prompt, $m) === 1) {
|
if (preg_match('/\bheyl\s+([a-z0-9][a-z0-9\s\-]+)/u', $prompt, $m) === 1) {
|
||||||
return trim($m[1]);
|
return trim($m[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function extractCategory(string $prompt): ?string
|
|
||||||
{
|
|
||||||
foreach ($this->knownCategories as $category) {
|
|
||||||
if (preg_match('/\b' . preg_quote($category, '/') . '\b/u', $prompt) === 1) {
|
|
||||||
return $category;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function buildSearchText(
|
private function buildSearchText(
|
||||||
string $prompt,
|
string $prompt,
|
||||||
array $colors,
|
array $sizes,
|
||||||
array $sizes,
|
|
||||||
?string $brand,
|
?string $brand,
|
||||||
?float $priceMin,
|
?float $priceMin,
|
||||||
?float $priceMax
|
?float $priceMax
|
||||||
): string {
|
): string
|
||||||
|
{
|
||||||
$text = ' ' . $prompt . ' ';
|
$text = ' ' . $prompt . ' ';
|
||||||
|
|
||||||
$phrasesToRemove = [
|
$phrasesToRemove = [
|
||||||
@@ -231,10 +164,6 @@ final class CommerceQueryParser
|
|||||||
$text = str_replace($phrase, ' ', $text);
|
$text = str_replace($phrase, ' ', $text);
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($colors as $color) {
|
|
||||||
$text = preg_replace('/\b' . preg_quote($color, '/') . '\b/u', ' ', $text) ?? $text;
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ($sizes as $size) {
|
foreach ($sizes as $size) {
|
||||||
$text = preg_replace('/\b' . preg_quote($size, '/') . '\b/u', ' ', $text) ?? $text;
|
$text = preg_replace('/\b' . preg_quote($size, '/') . '\b/u', ' ', $text) ?? $text;
|
||||||
}
|
}
|
||||||
@@ -253,7 +182,7 @@ final class CommerceQueryParser
|
|||||||
|
|
||||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
||||||
$text = trim($text, " \t\n\r\0\x0B-.,");
|
$text = trim($text, " \t\n\r\0\x0B-.,");
|
||||||
$tokens = array_filter(explode(' ', $text), static fn (string $token): bool => mb_strlen($token) > 1);
|
$tokens = array_filter(explode(' ', $text), static fn(string $token): bool => mb_strlen($token) > 1);
|
||||||
|
|
||||||
return trim(implode(' ', $tokens));
|
return trim(implode(' ', $tokens));
|
||||||
}
|
}
|
||||||
@@ -262,6 +191,6 @@ final class CommerceQueryParser
|
|||||||
{
|
{
|
||||||
$value = str_replace(',', '.', trim($value));
|
$value = str_replace(',', '.', trim($value));
|
||||||
|
|
||||||
return is_numeric($value) ? (float) $value : null;
|
return is_numeric($value) ? (float)$value : null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -15,9 +15,7 @@ final readonly class CommerceSearchQuery
|
|||||||
public string $originalPrompt,
|
public string $originalPrompt,
|
||||||
public string $normalizedPrompt,
|
public string $normalizedPrompt,
|
||||||
public string $searchText,
|
public string $searchText,
|
||||||
public ?string $category = null,
|
|
||||||
public ?string $brand = null,
|
public ?string $brand = null,
|
||||||
public array $colors = [],
|
|
||||||
public array $sizes = [],
|
public array $sizes = [],
|
||||||
public array $properties = [],
|
public array $properties = [],
|
||||||
public ?float $priceMin = null,
|
public ?float $priceMin = null,
|
||||||
|
|||||||
@@ -40,12 +40,14 @@ final class CommerceIntentLite
|
|||||||
'shop',
|
'shop',
|
||||||
'sku',
|
'sku',
|
||||||
'Artikel',
|
'Artikel',
|
||||||
'Gerät'
|
'Gerät',
|
||||||
|
'testomat',
|
||||||
|
'indikator'
|
||||||
];
|
];
|
||||||
|
|
||||||
foreach ($strongSignals as $signal) {
|
foreach ($strongSignals as $signal) {
|
||||||
if (str_contains($p, $signal)) {
|
if (str_contains($p, strtolower($signal))) {
|
||||||
$score += 2;
|
$score += 3;
|
||||||
$signals[] = $signal;
|
$signals[] = $signal;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ final class StopWords
|
|||||||
*/
|
*/
|
||||||
private const STOP_WORDS = [
|
private const STOP_WORDS = [
|
||||||
|
|
||||||
|
'mit',
|
||||||
// Artikel
|
// Artikel
|
||||||
'der', 'die', 'das',
|
'der', 'die', 'das',
|
||||||
'ein', 'eine', 'einer', 'eines',
|
'ein', 'eine', 'einer', 'eines',
|
||||||
|
|||||||
@@ -8,12 +8,15 @@ use App\Commerce\Dto\CommerceSearchQuery;
|
|||||||
|
|
||||||
final class ShopwareCriteriaBuilder
|
final class ShopwareCriteriaBuilder
|
||||||
{
|
{
|
||||||
public function build(CommerceSearchQuery $query, int $limit = 25): array
|
public function build(
|
||||||
|
CommerceSearchQuery $query,
|
||||||
|
?int $limit = 25,
|
||||||
|
?bool $grouping = true
|
||||||
|
): array
|
||||||
{
|
{
|
||||||
$criteria = [
|
$criteria = [
|
||||||
'page' => 1,
|
'page' => 1,
|
||||||
'limit' => max(1, $limit),
|
'limit' => max(1, $limit),
|
||||||
"grouping" => ["parentId"],
|
|
||||||
'total-count-mode' => 0,
|
'total-count-mode' => 0,
|
||||||
'includes' => [
|
'includes' => [
|
||||||
'product' => [
|
'product' => [
|
||||||
@@ -54,6 +57,10 @@ final class ShopwareCriteriaBuilder
|
|||||||
],
|
],
|
||||||
];
|
];
|
||||||
|
|
||||||
|
if ($grouping) {
|
||||||
|
$criteria["grouping"] = ["parentId"];
|
||||||
|
}
|
||||||
|
|
||||||
if ($query->searchText !== '') {
|
if ($query->searchText !== '') {
|
||||||
$criteria['term'] = $query->searchText;
|
$criteria['term'] = $query->searchText;
|
||||||
}
|
}
|
||||||
@@ -68,13 +75,6 @@ final class ShopwareCriteriaBuilder
|
|||||||
'type' => 'equals',
|
'type' => 'equals',
|
||||||
'field' => 'available',
|
'field' => 'available',
|
||||||
'value' => true,
|
'value' => true,
|
||||||
],
|
|
||||||
[
|
|
||||||
'type' => 'range',
|
|
||||||
'field' => 'price.gross',
|
|
||||||
'parameters' => [
|
|
||||||
'gt' => 0,
|
|
||||||
],
|
|
||||||
]
|
]
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user