optimize retrieval
This commit is contained in:
@@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Commerce;
|
||||
|
||||
use App\Commerce\Dto\CommerceReferenceContext;
|
||||
use App\Commerce\Dto\CommerceSearchQuery;
|
||||
use App\Config\CommerceIntentConfig;
|
||||
use App\Config\CommerceQueryParserConfig;
|
||||
@@ -23,10 +24,12 @@ final readonly class CommerceQueryParser
|
||||
public function parse(
|
||||
string $originalPrompt,
|
||||
string $intent,
|
||||
string $historyContext = ''
|
||||
string $historyContext = '',
|
||||
?CommerceReferenceContext $referenceContext = null
|
||||
): CommerceSearchQuery {
|
||||
$normalizedPrompt = $this->normalize($originalPrompt);
|
||||
$isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt);
|
||||
$isReferenceOnlyFollowUp = $this->isReferenceOnlyFollowUp($normalizedPrompt);
|
||||
|
||||
[$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt);
|
||||
$sizes = $this->extractSizes($normalizedPrompt);
|
||||
@@ -44,7 +47,7 @@ final readonly class CommerceQueryParser
|
||||
if (
|
||||
!$isDirectProductQuery
|
||||
&& $historyContext !== ''
|
||||
&& $this->shouldUseHistoryContext($normalizedPrompt)
|
||||
&& $this->shouldUseHistoryContext($normalizedPrompt, $searchText)
|
||||
) {
|
||||
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
|
||||
|
||||
@@ -73,7 +76,29 @@ final readonly class CommerceQueryParser
|
||||
}
|
||||
}
|
||||
|
||||
$finalSearchText = $searchText !== '' ? $searchText : $normalizedPrompt;
|
||||
if (
|
||||
!$isDirectProductQuery
|
||||
&& $referenceContext !== null
|
||||
&& $this->shouldUseReferenceContext($normalizedPrompt, $searchText)
|
||||
) {
|
||||
$referenceSearchText = $this->buildReferenceSearchText($referenceContext);
|
||||
|
||||
if ($isReferenceOnlyFollowUp || $this->isTooGenericSearchText($searchText)) {
|
||||
$searchText = $referenceSearchText !== '' ? $referenceSearchText : $searchText;
|
||||
} else {
|
||||
$searchText = $this->mergeSearchTexts($referenceSearchText, $searchText);
|
||||
}
|
||||
|
||||
if (($brand === null || $brand === '') && $referenceContext->manufacturer !== null) {
|
||||
$normalizedManufacturer = $this->normalize($referenceContext->manufacturer);
|
||||
|
||||
if ($normalizedManufacturer !== '') {
|
||||
$brand = $normalizedManufacturer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$finalSearchText = trim($searchText !== '' ? $searchText : $normalizedPrompt);
|
||||
|
||||
return new CommerceSearchQuery(
|
||||
originalPrompt: $originalPrompt,
|
||||
@@ -93,7 +118,7 @@ final readonly class CommerceQueryParser
|
||||
{
|
||||
$value = $this->textNormalizer->normalize($prompt);
|
||||
$value = $this->queryCleaner->clean($value);
|
||||
$value = mb_strtolower(trim($value));
|
||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||
$value = str_replace(['€'], ' euro ', $value);
|
||||
$value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value;
|
||||
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
||||
@@ -126,6 +151,17 @@ final readonly class CommerceQueryParser
|
||||
$priceMin = $this->toFloat($m[1]);
|
||||
}
|
||||
|
||||
// NEW:
|
||||
// Recognize comparative lower-bound phrasing such as:
|
||||
// - mehr als 3000 euro
|
||||
// - über 3000 euro
|
||||
// - ueber 3000 euro
|
||||
// - größer als 3000 euro
|
||||
// - groesser als 3000 euro
|
||||
if (preg_match('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
||||
$priceMin = $this->toFloat($m[1]);
|
||||
}
|
||||
|
||||
return [$priceMin, $priceMax];
|
||||
}
|
||||
|
||||
@@ -152,7 +188,10 @@ final readonly class CommerceQueryParser
|
||||
}
|
||||
}
|
||||
|
||||
return array_values(array_unique(array_filter($sizes, static fn($v) => $v !== '')));
|
||||
return array_values(array_unique(array_filter(
|
||||
$sizes,
|
||||
static fn(string $value): bool => $value !== ''
|
||||
)));
|
||||
}
|
||||
|
||||
private function extractBrand(string $prompt): ?string
|
||||
@@ -184,6 +223,7 @@ final readonly class CommerceQueryParser
|
||||
|
||||
foreach ($this->config->getPhrasesToRemove() as $phrase) {
|
||||
$normalizedPhrase = $this->normalize((string) $phrase);
|
||||
|
||||
if ($normalizedPhrase === '') {
|
||||
continue;
|
||||
}
|
||||
@@ -193,6 +233,7 @@ final readonly class CommerceQueryParser
|
||||
|
||||
foreach ($sizes as $size) {
|
||||
$normalizedSize = $this->normalize((string) $size);
|
||||
|
||||
if ($normalizedSize === '') {
|
||||
continue;
|
||||
}
|
||||
@@ -207,6 +248,7 @@ final readonly class CommerceQueryParser
|
||||
if ($priceMin !== null || $priceMax !== null) {
|
||||
$text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\b' . $this->intentConfig->getPricePattern() . '\b/u', ' ', $text) ?? $text;
|
||||
}
|
||||
|
||||
@@ -219,14 +261,14 @@ final readonly class CommerceQueryParser
|
||||
);
|
||||
|
||||
$tokens = $this->filterSearchTokens($tokens);
|
||||
$tokens = $this->stripReferenceOnlyTokens($tokens);
|
||||
|
||||
return trim(implode(' ', $tokens));
|
||||
}
|
||||
|
||||
private function buildDirectProductSearchText(string $prompt): string
|
||||
{
|
||||
$text = $prompt;
|
||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\s+/u', ' ', $prompt) ?? $prompt;
|
||||
$text = trim($text, " \t\n\r\0\x0B-.,");
|
||||
|
||||
$tokens = array_filter(
|
||||
@@ -234,17 +276,61 @@ final readonly class CommerceQueryParser
|
||||
static fn(string $token): bool => mb_strlen($token) > 0
|
||||
);
|
||||
|
||||
$tokens = array_values(array_unique($tokens));
|
||||
|
||||
return trim(implode(' ', $tokens));
|
||||
return trim(implode(' ', array_values(array_unique($tokens))));
|
||||
}
|
||||
|
||||
private function shouldUseHistoryContext(string $prompt): bool
|
||||
private function shouldUseHistoryContext(string $prompt, string $searchText): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b(' . $this->config->getHistoryContextPattern() . ')\b/u',
|
||||
$prompt
|
||||
) === 1;
|
||||
if ($this->isReferenceOnlyFollowUp($prompt)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($this->isTooGenericSearchText($searchText)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return preg_match('/\b(' . $this->config->getHistoryContextPattern() . ')\b/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function shouldUseReferenceContext(string $prompt, string $searchText): bool
|
||||
{
|
||||
if ($this->isReferenceOnlyFollowUp($prompt)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return $this->isTooGenericSearchText($searchText);
|
||||
}
|
||||
|
||||
private function isReferenceOnlyFollowUp(string $prompt): bool
|
||||
{
|
||||
return preg_match('/\b(' . $this->config->getReferenceFollowUpPattern() . ')\b/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function isTooGenericSearchText(string $searchText): bool
|
||||
{
|
||||
$tokens = array_values(array_filter(
|
||||
preg_split('/\s+/u', $searchText, -1, PREG_SPLIT_NO_EMPTY) ?: [],
|
||||
static fn(string $token): bool => $token !== ''
|
||||
));
|
||||
|
||||
if ($tokens === []) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$genericTokens = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
|
||||
|
||||
foreach ($tokens as $token) {
|
||||
if (!isset($genericTokens[$token])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function buildReferenceSearchText(CommerceReferenceContext $referenceContext): string
|
||||
{
|
||||
return $this->normalize($referenceContext->buildReferenceSearchText());
|
||||
}
|
||||
|
||||
private function extractLatestQuestionFromHistory(string $historyContext): string
|
||||
@@ -256,6 +342,7 @@ final readonly class CommerceQueryParser
|
||||
}
|
||||
|
||||
$questions = $matches[1] ?? [];
|
||||
|
||||
if ($questions === []) {
|
||||
return '';
|
||||
}
|
||||
@@ -265,11 +352,11 @@ final readonly class CommerceQueryParser
|
||||
return is_string($lastQuestion) ? trim($lastQuestion) : '';
|
||||
}
|
||||
|
||||
private function mergeSearchTexts(string $historySearchText, string $currentSearchText): string
|
||||
private function mergeSearchTexts(string $left, string $right): string
|
||||
{
|
||||
$tokens = [];
|
||||
|
||||
foreach ([$historySearchText, $currentSearchText] as $text) {
|
||||
foreach ([$left, $right] as $text) {
|
||||
if ($text === '') {
|
||||
continue;
|
||||
}
|
||||
@@ -294,11 +381,25 @@ final readonly class CommerceQueryParser
|
||||
*/
|
||||
private function filterSearchTokens(array $tokens): array
|
||||
{
|
||||
$stopWords = $this->config->getFilterSearchTokensPattern();
|
||||
$stopWords = array_fill_keys($this->config->getFilterSearchTokensPattern(), true);
|
||||
|
||||
return array_values(array_filter(
|
||||
$tokens,
|
||||
static fn(string $token): bool => !in_array($token, $stopWords, true)
|
||||
static fn(string $token): bool => !isset($stopWords[$token])
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $tokens
|
||||
* @return string[]
|
||||
*/
|
||||
private function stripReferenceOnlyTokens(array $tokens): array
|
||||
{
|
||||
$referenceOnly = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
|
||||
|
||||
return array_values(array_filter(
|
||||
$tokens,
|
||||
static fn(string $token): bool => !isset($referenceOnly[$token])
|
||||
));
|
||||
}
|
||||
|
||||
@@ -318,11 +419,7 @@ final readonly class CommerceQueryParser
|
||||
|
||||
$tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: [];
|
||||
|
||||
if (count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function containsModelLikePhrase(string $text): bool
|
||||
|
||||
Reference in New Issue
Block a user