harden retrieval logic

bugfixes
This commit is contained in:
team2
2026-04-18 21:49:30 +02:00
parent a2425b68a6
commit 5984091282
7 changed files with 426 additions and 108 deletions

View File

@@ -13,20 +13,18 @@ use App\Knowledge\Text\TextNormalizer;
final readonly class CommerceQueryParser
{
public function __construct(
private TextNormalizer $textNormalizer,
private QueryCleaner $queryCleaner,
private TextNormalizer $textNormalizer,
private QueryCleaner $queryCleaner,
private CommerceQueryParserConfig $config,
private CommerceIntentConfig $intentConfig,
)
{
private CommerceIntentConfig $intentConfig,
) {
}
public function parse(
string $originalPrompt,
string $intent,
string $historyContext = ''
): CommerceSearchQuery
{
): CommerceSearchQuery {
$normalizedPrompt = $this->normalize($originalPrompt);
[$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt);
@@ -152,8 +150,10 @@ final readonly class CommerceQueryParser
private function extractBrand(string $prompt): ?string
{
foreach ($this->config->getKnownBrands() as $brand) {
if (str_contains($prompt, $brand)) {
return $brand;
$normalizedBrand = $this->normalize((string) $brand);
if ($normalizedBrand !== '' && str_contains($prompt, $normalizedBrand)) {
return $normalizedBrand;
}
}
@@ -161,13 +161,12 @@ final readonly class CommerceQueryParser
}
private function buildSearchText(
string $prompt,
array $sizes,
string $prompt,
array $sizes,
?string $brand,
?float $priceMin,
?float $priceMax
): string
{
?float $priceMin,
?float $priceMax
): string {
$text = ' ' . $prompt . ' ';
foreach ($this->config->getPhrasesToRemove() as $phrase) {
@@ -179,7 +178,7 @@ final readonly class CommerceQueryParser
}
if ($brand !== null && $brand !== '') {
$text = str_replace($brand, ' ', $text);
$text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text;
}
if ($priceMin !== null || $priceMax !== null) {
@@ -211,7 +210,9 @@ final readonly class CommerceQueryParser
private function extractLatestQuestionFromHistory(string $historyContext): string
{
if (preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches) !== 1 && preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches) === false) {
$result = preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches);
if ($result === false) {
return '';
}
@@ -266,6 +267,6 @@ final readonly class CommerceQueryParser
{
$value = str_replace(',', '.', trim($value));
return is_numeric($value) ? (float)$value : null;
return is_numeric($value) ? (float) $value : null;
}
}

View File

@@ -4,9 +4,11 @@ declare(strict_types=1);
namespace App\Commerce;
use App\Commerce\Dto\CommerceSearchQuery;
use App\Commerce\Dto\ShopProductResult;
use App\Shopware\ShopwareCriteriaBuilder;
use App\Shopware\StoreApiClient;
use Psr\Log\LoggerInterface;
use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface;
use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface;
use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface;
@@ -18,11 +20,11 @@ final readonly class ShopSearchService
private CommerceQueryParser $queryParser,
private ShopwareCriteriaBuilder $criteriaBuilder,
private StoreApiClient $storeApiClient,
private LoggerInterface $logger,
private bool $enabled = true,
private int $maxResults = 25,
private string $baseUrl
)
{
) {
}
/**
@@ -34,19 +36,87 @@ final readonly class ShopSearchService
string $commerceHistoryContext = ''
): array {
if (!$this->enabled) {
$this->logger->info('Shop search skipped because commerce search is disabled', [
'commerceIntent' => $commerceIntent,
]);
return [];
}
$response = [];
$query = $this->queryParser->parse(
$primaryQuery = $this->queryParser->parse(
$originalPrompt,
$commerceIntent,
$commerceHistoryContext
);
$this->logger->info('Shop search started', [
'commerceIntent' => $commerceIntent,
'originalPrompt' => $originalPrompt,
'normalizedPrompt' => $primaryQuery->normalizedPrompt,
'searchText' => $primaryQuery->searchText,
'brand' => $primaryQuery->brand,
'sizes' => $primaryQuery->sizes,
'priceMin' => $primaryQuery->priceMin,
'priceMax' => $primaryQuery->priceMax,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'criteriaLimit' => $this->maxResults,
]);
$rankedProducts = $this->executeSearch($primaryQuery, $commerceIntent, $originalPrompt, true);
if ($rankedProducts === [] && $commerceHistoryContext !== '') {
$fallbackQuery = $this->queryParser->parse(
$originalPrompt,
$commerceIntent,
''
);
$this->logger->info('Shop search retry without commerce history context', [
'commerceIntent' => $commerceIntent,
'originalPrompt' => $originalPrompt,
'normalizedPrompt' => $fallbackQuery->normalizedPrompt,
'searchText' => $fallbackQuery->searchText,
'brand' => $fallbackQuery->brand,
'sizes' => $fallbackQuery->sizes,
'priceMin' => $fallbackQuery->priceMin,
'priceMax' => $fallbackQuery->priceMax,
]);
$rankedProducts = $this->executeSearch($fallbackQuery, $commerceIntent, $originalPrompt, false);
}
$this->logger->info('Shop search finished', [
'commerceIntent' => $commerceIntent,
'originalPrompt' => $originalPrompt,
'rankedProductsCount' => count($rankedProducts),
'topProducts' => array_map(
static fn(ShopProductResult $product): array => [
'name' => $product->name,
'productNumber' => $product->productNumber,
'manufacturer' => $product->manufacturer,
'available' => $product->available,
],
array_slice($rankedProducts, 0, 3)
),
]);
return $rankedProducts;
}
/**
* @return ShopProductResult[]
*/
private function executeSearch(
CommerceSearchQuery $query,
string $commerceIntent,
string $originalPrompt,
bool $usesHistoryContext
): array {
$criteria = $this->criteriaBuilder->build($query, $this->maxResults);
$response = [];
try {
$response = $this->storeApiClient->searchProducts($criteria);
} catch (
@@ -55,9 +125,52 @@ final readonly class ShopSearchService
| ServerExceptionInterface
| TransportExceptionInterface $e
) {
$this->logger->warning('Shop search request failed', [
'commerceIntent' => $commerceIntent,
'originalPrompt' => $originalPrompt,
'normalizedPrompt' => $query->normalizedPrompt,
'searchText' => $query->searchText,
'brand' => $query->brand,
'sizes' => $query->sizes,
'priceMin' => $query->priceMin,
'priceMax' => $query->priceMax,
'usesHistoryContext' => $usesHistoryContext,
'criteria' => $criteria,
'exceptionClass' => $e::class,
'exceptionMessage' => $e->getMessage(),
]);
return [];
}
return $this->mapProducts($response);
$mappedProducts = $this->mapProducts($response);
$rankedProducts = $this->rerankProducts($mappedProducts, $query);
$this->logger->info('Shop search request finished', [
'commerceIntent' => $commerceIntent,
'originalPrompt' => $originalPrompt,
'normalizedPrompt' => $query->normalizedPrompt,
'searchText' => $query->searchText,
'brand' => $query->brand,
'sizes' => $query->sizes,
'priceMin' => $query->priceMin,
'priceMax' => $query->priceMax,
'usesHistoryContext' => $usesHistoryContext,
'rawElementsCount' => is_array($response['elements'] ?? null) ? count($response['elements']) : 0,
'mappedProductsCount' => count($mappedProducts),
'rankedProductsCount' => count($rankedProducts),
'topProducts' => array_map(
static fn(ShopProductResult $product): array => [
'name' => $product->name,
'productNumber' => $product->productNumber,
'manufacturer' => $product->manufacturer,
'available' => $product->available,
],
array_slice($rankedProducts, 0, 3)
),
]);
return $rankedProducts;
}
/**
@@ -77,6 +190,8 @@ final readonly class ShopSearchService
continue;
}
$relativeUrl = $this->extractUrl($row);
$results[] = new ShopProductResult(
id: (string) ($row['id'] ?? ''),
name: trim((string) ($row['translated']['name'] ?? '')),
@@ -84,7 +199,7 @@ final readonly class ShopSearchService
manufacturer: $this->extractManufacturer($row),
price: $this->extractPrice($row),
available: isset($row['available']) ? (bool) $row['available'] : null,
url: $this->baseUrl . $this->extractUrl($row),
url: $this->buildAbsoluteUrl($relativeUrl),
highlights: $this->extractHighlights($row),
description: $this->cleanUpDescription($row),
productImage: $row['cover']['media']['thumbnails'][0]['url'] ?? 'no-image',
@@ -98,6 +213,157 @@ final readonly class ShopSearchService
));
}
/**
* @param ShopProductResult[] $products
* @return ShopProductResult[]
*/
private function rerankProducts(array $products, CommerceSearchQuery $query): array
{
if (count($products) <= 1) {
return $products;
}
$decorated = [];
foreach ($products as $index => $product) {
$decorated[] = [
'index' => $index,
'score' => $this->scoreProduct($product, $query),
'product' => $product,
];
}
usort($decorated, static function (array $a, array $b): int {
if ($a['score'] === $b['score']) {
return $a['index'] <=> $b['index'];
}
return $b['score'] <=> $a['score'];
});
return array_values(array_map(
static fn(array $entry): ShopProductResult => $entry['product'],
$decorated
));
}
private function scoreProduct(ShopProductResult $product, CommerceSearchQuery $query): int
{
$score = 0;
$normalizedPrompt = $this->normalizeForMatching($query->normalizedPrompt ?: $query->originalPrompt);
$normalizedSearchText = $this->normalizeForMatching($query->searchText);
$normalizedQuery = trim($normalizedPrompt . ' ' . $normalizedSearchText);
$queryTokens = $this->tokenize($normalizedQuery);
$queryNumberTokens = $this->extractNumberTokens($queryTokens);
$normalizedProductName = $this->normalizeForMatching($product->name);
$productNameTokens = $this->tokenize($normalizedProductName);
$productNameNumberTokens = $this->extractNumberTokens($productNameTokens);
$normalizedProductNumber = $this->normalizeForMatching((string) ($product->productNumber ?? ''));
$productNumberTokens = $this->tokenize($normalizedProductNumber);
$productNumberNumberTokens = $this->extractNumberTokens($productNumberTokens);
$normalizedManufacturer = $this->normalizeForMatching((string) ($product->manufacturer ?? ''));
$normalizedBrand = $this->normalizeForMatching((string) ($query->brand ?? ''));
if ($normalizedProductNumber !== '' && $this->containsWholePhrase($normalizedQuery, $normalizedProductNumber)) {
$score += 120;
}
if ($normalizedBrand !== '') {
if ($normalizedManufacturer !== '' && $normalizedManufacturer === $normalizedBrand) {
$score += 40;
} elseif ($this->containsWholePhrase($normalizedProductName, $normalizedBrand)) {
$score += 20;
}
}
$score += $this->countOverlap($queryTokens, $productNameTokens) * 4;
$score += $this->countOverlap($queryTokens, $productNumberTokens) * 8;
$score += $this->countOverlap($queryNumberTokens, $productNameNumberTokens) * 16;
$score += $this->countOverlap($queryNumberTokens, $productNumberNumberTokens) * 24;
foreach ($query->sizes as $size) {
$normalizedSize = $this->normalizeForMatching((string) $size);
if ($normalizedSize === '') {
continue;
}
if ($this->containsWholePhrase($normalizedProductName, $normalizedSize)
|| $this->containsWholePhrase($normalizedProductNumber, $normalizedSize)) {
$score += 12;
}
}
if ($product->available === true) {
$score += 1;
}
return $score;
}
/**
* @param string[] $left
* @param string[] $right
*/
private function countOverlap(array $left, array $right): int
{
if ($left === [] || $right === []) {
return 0;
}
$leftSet = array_fill_keys($left, true);
$rightSet = array_fill_keys($right, true);
return count(array_intersect_key($leftSet, $rightSet));
}
/**
* @param string[] $tokens
* @return string[]
*/
private function extractNumberTokens(array $tokens): array
{
return array_values(array_filter(
$tokens,
static fn(string $token): bool => preg_match('/\d/u', $token) === 1
));
}
private function normalizeForMatching(string $value): string
{
$value = mb_strtolower(trim($value));
$value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value;
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim($value);
}
/**
* @return string[]
*/
private function tokenize(string $value): array
{
if ($value === '') {
return [];
}
return preg_split('/[^\p{L}\p{N}]+/u', $value, -1, PREG_SPLIT_NO_EMPTY) ?: [];
}
private function containsWholePhrase(string $normalizedText, string $normalizedPhrase): bool
{
if ($normalizedText === '' || $normalizedPhrase === '') {
return false;
}
return str_contains(' ' . $normalizedText . ' ', ' ' . $normalizedPhrase . ' ');
}
private function getRelevantCustomFields(array $customField): string
{
$result = ($customField['migration_Backup_product_attr1'] ?? '') . ': ' . ($customField['migration_Backup_product_attr2'] ?? '');
@@ -179,6 +445,15 @@ final readonly class ShopSearchService
return null;
}
private function buildAbsoluteUrl(?string $relativeUrl): ?string
{
if ($relativeUrl === null || trim($relativeUrl) === '') {
return null;
}
return rtrim($this->baseUrl, '/') . '/' . ltrim($relativeUrl, '/');
}
/**
* @return string[]
*/