harden retrieval logic

bugfixes
This commit is contained in:
team2
2026-04-18 21:49:30 +02:00
parent a2425b68a6
commit 5984091282
7 changed files with 426 additions and 108 deletions

View File

@@ -13,20 +13,18 @@ use App\Knowledge\Text\TextNormalizer;
final readonly class CommerceQueryParser
{
public function __construct(
private TextNormalizer $textNormalizer,
private QueryCleaner $queryCleaner,
private TextNormalizer $textNormalizer,
private QueryCleaner $queryCleaner,
private CommerceQueryParserConfig $config,
private CommerceIntentConfig $intentConfig,
)
{
private CommerceIntentConfig $intentConfig,
) {
}
public function parse(
string $originalPrompt,
string $intent,
string $historyContext = ''
): CommerceSearchQuery
{
): CommerceSearchQuery {
$normalizedPrompt = $this->normalize($originalPrompt);
[$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt);
@@ -152,8 +150,10 @@ final readonly class CommerceQueryParser
private function extractBrand(string $prompt): ?string
{
foreach ($this->config->getKnownBrands() as $brand) {
if (str_contains($prompt, $brand)) {
return $brand;
$normalizedBrand = $this->normalize((string) $brand);
if ($normalizedBrand !== '' && str_contains($prompt, $normalizedBrand)) {
return $normalizedBrand;
}
}
@@ -161,13 +161,12 @@ final readonly class CommerceQueryParser
}
private function buildSearchText(
string $prompt,
array $sizes,
string $prompt,
array $sizes,
?string $brand,
?float $priceMin,
?float $priceMax
): string
{
?float $priceMin,
?float $priceMax
): string {
$text = ' ' . $prompt . ' ';
foreach ($this->config->getPhrasesToRemove() as $phrase) {
@@ -179,7 +178,7 @@ final readonly class CommerceQueryParser
}
if ($brand !== null && $brand !== '') {
$text = str_replace($brand, ' ', $text);
$text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text;
}
if ($priceMin !== null || $priceMax !== null) {
@@ -211,7 +210,9 @@ final readonly class CommerceQueryParser
private function extractLatestQuestionFromHistory(string $historyContext): string
{
if (preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches) !== 1 && preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches) === false) {
$result = preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches);
if ($result === false) {
return '';
}
@@ -266,6 +267,6 @@ final readonly class CommerceQueryParser
{
$value = str_replace(',', '.', trim($value));
return is_numeric($value) ? (float)$value : null;
return is_numeric($value) ? (float) $value : null;
}
}