move intent an config value into config files
This commit is contained in:
@@ -4,7 +4,6 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Commerce;
|
||||
|
||||
use App\Commerce\Dto\CommerceReferenceContext;
|
||||
use App\Commerce\Dto\CommerceSearchQuery;
|
||||
use App\Config\CommerceIntentConfig;
|
||||
use App\Config\CommerceQueryParserConfig;
|
||||
@@ -24,12 +23,10 @@ final readonly class CommerceQueryParser
|
||||
public function parse(
|
||||
string $originalPrompt,
|
||||
string $intent,
|
||||
string $historyContext = '',
|
||||
?CommerceReferenceContext $referenceContext = null
|
||||
string $historyContext = ''
|
||||
): CommerceSearchQuery {
|
||||
$normalizedPrompt = $this->normalize($originalPrompt);
|
||||
$isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt);
|
||||
$isReferenceOnlyFollowUp = $this->isReferenceOnlyFollowUp($normalizedPrompt);
|
||||
|
||||
[$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt);
|
||||
$sizes = $this->extractSizes($normalizedPrompt);
|
||||
@@ -47,58 +44,23 @@ final readonly class CommerceQueryParser
|
||||
if (
|
||||
!$isDirectProductQuery
|
||||
&& $historyContext !== ''
|
||||
&& $this->shouldUseHistoryContext($normalizedPrompt, $searchText)
|
||||
&& $this->shouldUseHistoryContext($normalizedPrompt)
|
||||
) {
|
||||
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
|
||||
$historyParse = $this->parseHistoryContext($historyContext);
|
||||
|
||||
if ($latestHistoryQuestion !== '') {
|
||||
$normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion);
|
||||
$isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt);
|
||||
|
||||
[$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt);
|
||||
$historySizes = $this->extractSizes($normalizedHistoryPrompt);
|
||||
$historyBrand = $this->extractBrand($normalizedHistoryPrompt);
|
||||
|
||||
$historySearchText = $this->buildSearchText(
|
||||
prompt: $normalizedHistoryPrompt,
|
||||
sizes: $historySizes,
|
||||
brand: $historyBrand,
|
||||
priceMin: $historyPriceMin,
|
||||
priceMax: $historyPriceMax,
|
||||
preserveDirectProductQuery: $isDirectHistoryProductQuery
|
||||
if ($historyParse !== null) {
|
||||
$searchText = $this->mergeSearchTexts(
|
||||
$historyParse['searchText'],
|
||||
$searchText
|
||||
);
|
||||
|
||||
$searchText = $this->mergeSearchTexts($historySearchText, $searchText);
|
||||
|
||||
if (($brand === null || $brand === '') && $historyBrand !== null && $historyBrand !== '') {
|
||||
$brand = $historyBrand;
|
||||
if (($brand === null || $brand === '') && $historyParse['brand'] !== null && $historyParse['brand'] !== '') {
|
||||
$brand = $historyParse['brand'];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
!$isDirectProductQuery
|
||||
&& $referenceContext !== null
|
||||
&& $this->shouldUseReferenceContext($normalizedPrompt, $searchText)
|
||||
) {
|
||||
$referenceSearchText = $this->buildReferenceSearchText($referenceContext);
|
||||
|
||||
if ($isReferenceOnlyFollowUp || $this->isTooGenericSearchText($searchText)) {
|
||||
$searchText = $referenceSearchText !== '' ? $referenceSearchText : $searchText;
|
||||
} else {
|
||||
$searchText = $this->mergeSearchTexts($referenceSearchText, $searchText);
|
||||
}
|
||||
|
||||
if (($brand === null || $brand === '') && $referenceContext->manufacturer !== null) {
|
||||
$normalizedManufacturer = $this->normalize($referenceContext->manufacturer);
|
||||
|
||||
if ($normalizedManufacturer !== '') {
|
||||
$brand = $normalizedManufacturer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$finalSearchText = trim($searchText !== '' ? $searchText : $normalizedPrompt);
|
||||
$finalSearchText = $searchText !== '' ? $searchText : $normalizedPrompt;
|
||||
|
||||
return new CommerceSearchQuery(
|
||||
originalPrompt: $originalPrompt,
|
||||
@@ -118,10 +80,14 @@ final readonly class CommerceQueryParser
|
||||
{
|
||||
$value = $this->textNormalizer->normalize($prompt);
|
||||
$value = $this->queryCleaner->clean($value);
|
||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||
$value = str_replace(['€'], ' euro ', $value);
|
||||
$value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value;
|
||||
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
||||
$value = mb_strtolower(trim($value));
|
||||
$value = str_replace(
|
||||
$this->config->getNormalizationSearch(),
|
||||
$this->config->getNormalizationReplace(),
|
||||
$value
|
||||
);
|
||||
$value = preg_replace($this->config->getPromptSanitizePattern(), ' ', $value) ?? $value;
|
||||
$value = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $value) ?? $value;
|
||||
|
||||
return trim($value);
|
||||
}
|
||||
@@ -134,32 +100,21 @@ final readonly class CommerceQueryParser
|
||||
$priceMin = null;
|
||||
$priceMax = null;
|
||||
|
||||
if (preg_match('/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
||||
$a = $this->toFloat($m[1]);
|
||||
$b = $this->toFloat($m[2]);
|
||||
if (preg_match($this->config->getPriceBetweenPattern(), $prompt, $matches) === 1) {
|
||||
$a = $this->toFloat($matches[1]);
|
||||
$b = $this->toFloat($matches[2]);
|
||||
|
||||
if ($a !== null && $b !== null) {
|
||||
return [min($a, $b), max($a, $b)];
|
||||
}
|
||||
}
|
||||
|
||||
if (preg_match('/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
||||
$priceMax = $this->toFloat($m[1]);
|
||||
if (preg_match($this->config->getPriceMaxPattern(), $prompt, $matches) === 1) {
|
||||
$priceMax = $this->toFloat($matches[1]);
|
||||
}
|
||||
|
||||
if (preg_match('/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
||||
$priceMin = $this->toFloat($m[1]);
|
||||
}
|
||||
|
||||
// NEW:
|
||||
// Recognize comparative lower-bound phrasing such as:
|
||||
// - mehr als 3000 euro
|
||||
// - über 3000 euro
|
||||
// - ueber 3000 euro
|
||||
// - größer als 3000 euro
|
||||
// - groesser als 3000 euro
|
||||
if (preg_match('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
||||
$priceMin = $this->toFloat($m[1]);
|
||||
if (preg_match($this->config->getPriceMinPattern(), $prompt, $matches) === 1) {
|
||||
$priceMin = $this->toFloat($matches[1]);
|
||||
}
|
||||
|
||||
return [$priceMin, $priceMax];
|
||||
@@ -172,8 +127,7 @@ final readonly class CommerceQueryParser
|
||||
{
|
||||
$sizes = [];
|
||||
|
||||
$sizePattern = $this->intentConfig->getSizePattern();
|
||||
if (preg_match_all('/\b(?:' . $sizePattern . ')\s*([a-z0-9.-]+)\b/u', $prompt, $matches) === false) {
|
||||
if (preg_match_all($this->intentConfig->getSizeExtractionPattern(), $prompt, $matches) === false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -181,8 +135,7 @@ final readonly class CommerceQueryParser
|
||||
$sizes[] = trim($size);
|
||||
}
|
||||
|
||||
$sizeTokenPattern = $this->intentConfig->getSizeTokenPattern();
|
||||
if (preg_match_all('/\b(' . $sizeTokenPattern . ')\b/u', $prompt, $tokenMatches) !== false) {
|
||||
if (preg_match_all($this->intentConfig->getSizeTokenValuePattern(), $prompt, $tokenMatches) !== false) {
|
||||
foreach ($tokenMatches[1] as $sizeToken) {
|
||||
$sizes[] = trim($sizeToken);
|
||||
}
|
||||
@@ -207,6 +160,9 @@ final readonly class CommerceQueryParser
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $sizes
|
||||
*/
|
||||
private function buildSearchText(
|
||||
string $prompt,
|
||||
array $sizes,
|
||||
@@ -219,7 +175,7 @@ final readonly class CommerceQueryParser
|
||||
return $this->buildDirectProductSearchText($prompt);
|
||||
}
|
||||
|
||||
$text = ' ' . $prompt . ' ';
|
||||
$text = $this->wrapForPhraseReplacement($prompt);
|
||||
|
||||
foreach ($this->config->getPhrasesToRemove() as $phrase) {
|
||||
$normalizedPhrase = $this->normalize((string) $phrase);
|
||||
@@ -228,7 +184,11 @@ final readonly class CommerceQueryParser
|
||||
continue;
|
||||
}
|
||||
|
||||
$text = str_replace(' ' . $normalizedPhrase . ' ', ' ', $text);
|
||||
$text = str_replace(
|
||||
$this->wrapForPhraseReplacement($normalizedPhrase),
|
||||
' ',
|
||||
$text
|
||||
);
|
||||
}
|
||||
|
||||
foreach ($sizes as $size) {
|
||||
@@ -238,111 +198,69 @@ final readonly class CommerceQueryParser
|
||||
continue;
|
||||
}
|
||||
|
||||
$text = preg_replace('/\b' . preg_quote($normalizedSize, '/') . '\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace(
|
||||
$this->config->buildExactTokenRemovalPattern($normalizedSize),
|
||||
' ',
|
||||
$text
|
||||
) ?? $text;
|
||||
}
|
||||
|
||||
if ($brand !== null && $brand !== '' && !$this->isBrandPartOfModelPhrase($prompt, $brand)) {
|
||||
$text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace(
|
||||
$this->config->buildExactTokenRemovalPattern($brand),
|
||||
' ',
|
||||
$text
|
||||
) ?? $text;
|
||||
}
|
||||
|
||||
if ($priceMin !== null || $priceMax !== null) {
|
||||
$text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\b' . $this->intentConfig->getPricePattern() . '\b/u', ' ', $text) ?? $text;
|
||||
foreach ($this->config->getPriceRemovalPatterns($this->intentConfig) as $pattern) {
|
||||
$text = preg_replace($pattern, ' ', $text) ?? $text;
|
||||
}
|
||||
}
|
||||
|
||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
||||
$text = trim($text, " \t\n\r\0\x0B-.,");
|
||||
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
|
||||
$text = trim($text, $this->config->getSearchTextTrimCharacters());
|
||||
|
||||
$tokens = array_filter(
|
||||
explode(' ', $text),
|
||||
static fn(string $token): bool => mb_strlen($token) > 1
|
||||
fn(string $token): bool => mb_strlen($token) > $this->config->getMinSearchTokenLength()
|
||||
);
|
||||
|
||||
$tokens = $this->filterSearchTokens($tokens);
|
||||
$tokens = $this->stripReferenceOnlyTokens($tokens);
|
||||
|
||||
return trim(implode(' ', $tokens));
|
||||
}
|
||||
|
||||
private function buildDirectProductSearchText(string $prompt): string
|
||||
{
|
||||
$text = preg_replace('/\s+/u', ' ', $prompt) ?? $prompt;
|
||||
$text = trim($text, " \t\n\r\0\x0B-.,");
|
||||
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $prompt) ?? $prompt;
|
||||
$text = trim($text, $this->config->getSearchTextTrimCharacters());
|
||||
|
||||
$tokens = array_filter(
|
||||
explode(' ', $text),
|
||||
static fn(string $token): bool => mb_strlen($token) > 0
|
||||
fn(string $token): bool => mb_strlen($token) >= $this->config->getMinDirectProductTokenLength()
|
||||
);
|
||||
|
||||
return trim(implode(' ', array_values(array_unique($tokens))));
|
||||
$tokens = array_values(array_unique($tokens));
|
||||
|
||||
return trim(implode(' ', $tokens));
|
||||
}
|
||||
|
||||
private function shouldUseHistoryContext(string $prompt, string $searchText): bool
|
||||
private function shouldUseHistoryContext(string $prompt): bool
|
||||
{
|
||||
if ($this->isReferenceOnlyFollowUp($prompt)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($this->isTooGenericSearchText($searchText)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return preg_match('/\b(' . $this->config->getHistoryContextPattern() . ')\b/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function shouldUseReferenceContext(string $prompt, string $searchText): bool
|
||||
{
|
||||
if ($this->isReferenceOnlyFollowUp($prompt)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return $this->isTooGenericSearchText($searchText);
|
||||
}
|
||||
|
||||
private function isReferenceOnlyFollowUp(string $prompt): bool
|
||||
{
|
||||
return preg_match('/\b(' . $this->config->getReferenceFollowUpPattern() . ')\b/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function isTooGenericSearchText(string $searchText): bool
|
||||
{
|
||||
$tokens = array_values(array_filter(
|
||||
preg_split('/\s+/u', $searchText, -1, PREG_SPLIT_NO_EMPTY) ?: [],
|
||||
static fn(string $token): bool => $token !== ''
|
||||
));
|
||||
|
||||
if ($tokens === []) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$genericTokens = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
|
||||
|
||||
foreach ($tokens as $token) {
|
||||
if (!isset($genericTokens[$token])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function buildReferenceSearchText(CommerceReferenceContext $referenceContext): string
|
||||
{
|
||||
return $this->normalize($referenceContext->buildReferenceSearchText());
|
||||
return preg_match($this->config->getHistoryContextValuePattern(), $prompt) === 1;
|
||||
}
|
||||
|
||||
private function extractLatestQuestionFromHistory(string $historyContext): string
|
||||
{
|
||||
$result = preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches);
|
||||
$result = preg_match_all($this->config->getHistoryQuestionPattern(), $historyContext, $matches);
|
||||
|
||||
if ($result === false) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$questions = $matches[1] ?? [];
|
||||
|
||||
if ($questions === []) {
|
||||
return '';
|
||||
}
|
||||
@@ -352,11 +270,11 @@ final readonly class CommerceQueryParser
|
||||
return is_string($lastQuestion) ? trim($lastQuestion) : '';
|
||||
}
|
||||
|
||||
private function mergeSearchTexts(string $left, string $right): string
|
||||
private function mergeSearchTexts(string $historySearchText, string $currentSearchText): string
|
||||
{
|
||||
$tokens = [];
|
||||
|
||||
foreach ([$left, $right] as $text) {
|
||||
foreach ([$historySearchText, $currentSearchText] as $text) {
|
||||
if ($text === '') {
|
||||
continue;
|
||||
}
|
||||
@@ -364,7 +282,7 @@ final readonly class CommerceQueryParser
|
||||
foreach (explode(' ', $text) as $token) {
|
||||
$token = trim($token);
|
||||
|
||||
if ($token === '' || mb_strlen($token) <= 1) {
|
||||
if ($token === '' || mb_strlen($token) <= $this->config->getMinSearchTokenLength()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -381,25 +299,11 @@ final readonly class CommerceQueryParser
|
||||
*/
|
||||
private function filterSearchTokens(array $tokens): array
|
||||
{
|
||||
$stopWords = array_fill_keys($this->config->getFilterSearchTokensPattern(), true);
|
||||
$stopWords = $this->config->getFilterSearchTokens();
|
||||
|
||||
return array_values(array_filter(
|
||||
$tokens,
|
||||
static fn(string $token): bool => !isset($stopWords[$token])
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $tokens
|
||||
* @return string[]
|
||||
*/
|
||||
private function stripReferenceOnlyTokens(array $tokens): array
|
||||
{
|
||||
$referenceOnly = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
|
||||
|
||||
return array_values(array_filter(
|
||||
$tokens,
|
||||
static fn(string $token): bool => !isset($referenceOnly[$token])
|
||||
static fn(string $token): bool => !in_array($token, $stopWords, true)
|
||||
));
|
||||
}
|
||||
|
||||
@@ -417,25 +321,25 @@ final readonly class CommerceQueryParser
|
||||
return true;
|
||||
}
|
||||
|
||||
$tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: [];
|
||||
$tokens = preg_split(
|
||||
$this->config->getWhitespaceSplitPattern(),
|
||||
$prompt,
|
||||
-1,
|
||||
PREG_SPLIT_NO_EMPTY
|
||||
) ?: [];
|
||||
|
||||
return count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1;
|
||||
return count($tokens) <= $this->config->getDirectProductMaxTokens()
|
||||
&& preg_match($this->config->getDirectProductDigitPattern(), $prompt) === 1;
|
||||
}
|
||||
|
||||
private function containsModelLikePhrase(string $text): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u',
|
||||
$text
|
||||
) === 1;
|
||||
return preg_match($this->config->getModelLikePattern(), $text) === 1;
|
||||
}
|
||||
|
||||
private function containsAccessoryLikePhrase(string $text): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u',
|
||||
$text
|
||||
) === 1;
|
||||
return preg_match($this->config->getAccessoryLikePattern(), $text) === 1;
|
||||
}
|
||||
|
||||
private function isBrandPartOfModelPhrase(string $prompt, string $brand): bool
|
||||
@@ -445,7 +349,7 @@ final readonly class CommerceQueryParser
|
||||
}
|
||||
|
||||
return preg_match(
|
||||
'/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u',
|
||||
$this->config->buildBrandPartOfModelPattern($brand),
|
||||
$prompt
|
||||
) === 1;
|
||||
}
|
||||
@@ -456,4 +360,42 @@ final readonly class CommerceQueryParser
|
||||
|
||||
return is_numeric($value) ? (float) $value : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{searchText:string, brand:?string}|null
|
||||
*/
|
||||
private function parseHistoryContext(string $historyContext): ?array
|
||||
{
|
||||
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
|
||||
|
||||
if ($latestHistoryQuestion === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion);
|
||||
$isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt);
|
||||
|
||||
[$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt);
|
||||
$historySizes = $this->extractSizes($normalizedHistoryPrompt);
|
||||
$historyBrand = $this->extractBrand($normalizedHistoryPrompt);
|
||||
|
||||
$historySearchText = $this->buildSearchText(
|
||||
prompt: $normalizedHistoryPrompt,
|
||||
sizes: $historySizes,
|
||||
brand: $historyBrand,
|
||||
priceMin: $historyPriceMin,
|
||||
priceMax: $historyPriceMax,
|
||||
preserveDirectProductQuery: $isDirectHistoryProductQuery
|
||||
);
|
||||
|
||||
return [
|
||||
'searchText' => $historySearchText,
|
||||
'brand' => $historyBrand,
|
||||
];
|
||||
}
|
||||
|
||||
private function wrapForPhraseReplacement(string $text): string
|
||||
{
|
||||
return ' ' . $text . ' ';
|
||||
}
|
||||
}
|
||||
@@ -5,16 +5,15 @@ declare(strict_types=1);
|
||||
namespace App\Commerce;
|
||||
|
||||
use App\Commerce\Dto\ShopProductResult;
|
||||
use App\Config\SearchRepairConfig;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
final readonly class SearchRepairService
|
||||
{
|
||||
public function __construct(
|
||||
private ShopSearchService $shopSearchService,
|
||||
private SearchRepairConfig $config,
|
||||
private LoggerInterface $logger,
|
||||
private bool $enabled = true,
|
||||
private int $maxRepairQueries = 3,
|
||||
private int $minPrimaryResultsWithoutRepair = 2,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -37,22 +36,22 @@ final readonly class SearchRepairService
|
||||
array $primaryShopResults,
|
||||
array $knowledgeChunks
|
||||
): array {
|
||||
if (!$this->enabled) {
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => false,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => [],
|
||||
];
|
||||
if (!$this->config->isEnabled()) {
|
||||
return $this->buildRepairResult(
|
||||
results: $primaryShopResults,
|
||||
attemptedRepair: false,
|
||||
usedRepair: false,
|
||||
repairQueries: []
|
||||
);
|
||||
}
|
||||
|
||||
if (!$this->shouldAttemptRepair($prompt, $primaryQuery, $primaryShopResults)) {
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => false,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => [],
|
||||
];
|
||||
return $this->buildRepairResult(
|
||||
results: $primaryShopResults,
|
||||
attemptedRepair: false,
|
||||
usedRepair: false,
|
||||
repairQueries: []
|
||||
);
|
||||
}
|
||||
|
||||
$repairQueries = $this->buildRepairQueries(
|
||||
@@ -63,12 +62,12 @@ final readonly class SearchRepairService
|
||||
);
|
||||
|
||||
if ($repairQueries === []) {
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => false,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => [],
|
||||
];
|
||||
return $this->buildRepairResult(
|
||||
results: $primaryShopResults,
|
||||
attemptedRepair: false,
|
||||
usedRepair: false,
|
||||
repairQueries: []
|
||||
);
|
||||
}
|
||||
|
||||
$this->logger->info('Shop repair started', [
|
||||
@@ -99,12 +98,12 @@ final readonly class SearchRepairService
|
||||
'repairQueries' => $repairQueries,
|
||||
]);
|
||||
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => true,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => $repairQueries,
|
||||
];
|
||||
return $this->buildRepairResult(
|
||||
results: $primaryShopResults,
|
||||
attemptedRepair: true,
|
||||
usedRepair: false,
|
||||
repairQueries: $repairQueries
|
||||
);
|
||||
}
|
||||
|
||||
$mergedResults = $this->rankMergedResults(
|
||||
@@ -129,16 +128,16 @@ final readonly class SearchRepairService
|
||||
'manufacturer' => $product->manufacturer,
|
||||
'available' => $product->available,
|
||||
],
|
||||
array_slice($mergedResults, 0, 3)
|
||||
array_slice($mergedResults, 0, $this->config->getTopProductLogLimit())
|
||||
),
|
||||
]);
|
||||
|
||||
return [
|
||||
'results' => $mergedResults,
|
||||
'attemptedRepair' => true,
|
||||
'usedRepair' => true,
|
||||
'repairQueries' => $repairQueries,
|
||||
];
|
||||
return $this->buildRepairResult(
|
||||
results: $mergedResults,
|
||||
attemptedRepair: true,
|
||||
usedRepair: true,
|
||||
repairQueries: $repairQueries
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -157,15 +156,11 @@ final readonly class SearchRepairService
|
||||
return true;
|
||||
}
|
||||
|
||||
// Always try repair for bundle/accessory prompts.
|
||||
// These prompts often need a second pass even when the first search
|
||||
// already returned some results, because the user is asking for a
|
||||
// combination of main device + matching accessory.
|
||||
if ($asksForBundle) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($primaryResultsCount >= $this->minPrimaryResultsWithoutRepair) {
|
||||
if ($primaryResultsCount >= $this->config->getMinPrimaryResultsWithoutRepair()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -173,7 +168,7 @@ final readonly class SearchRepairService
|
||||
return false;
|
||||
}
|
||||
|
||||
return $primaryResultsCount < $this->minPrimaryResultsWithoutRepair;
|
||||
return $primaryResultsCount < $this->config->getMinPrimaryResultsWithoutRepair();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -230,7 +225,7 @@ final readonly class SearchRepairService
|
||||
fn(string $query): bool => $query !== '' && !$this->isTooCloseToPrimaryQuery($query, $primaryQuery)
|
||||
));
|
||||
|
||||
return array_slice($queries, 0, max(1, $this->maxRepairQueries));
|
||||
return array_slice($queries, 0, max(1, $this->config->getMaxRepairQueries()));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -291,7 +286,7 @@ final readonly class SearchRepairService
|
||||
$candidates = [];
|
||||
|
||||
preg_match_all(
|
||||
'/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u',
|
||||
$this->config->getModelCandidatePattern(),
|
||||
$text,
|
||||
$matches
|
||||
);
|
||||
@@ -321,7 +316,7 @@ final readonly class SearchRepairService
|
||||
$candidates = [];
|
||||
|
||||
preg_match_all(
|
||||
'/\b((?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu',
|
||||
$this->config->getAccessoryCandidatePattern(),
|
||||
$text,
|
||||
$matches
|
||||
);
|
||||
@@ -368,15 +363,15 @@ final readonly class SearchRepairService
|
||||
{
|
||||
$score = 0;
|
||||
|
||||
if (preg_match('/\d/u', $candidate) === 1) {
|
||||
$score += 4;
|
||||
if (preg_match($this->config->getContainsDigitPattern(), $candidate) === 1) {
|
||||
$score += $this->config->getCandidateDigitScore();
|
||||
}
|
||||
|
||||
$wordCount = count($this->tokenize($candidate));
|
||||
$score += min($wordCount, 4);
|
||||
$score += min($wordCount, $this->config->getCandidateWordCountCap());
|
||||
|
||||
if (preg_match('/\b(?:indikator|indicator|testomat|tritromat|titromat|reagenz|reagent)\b/iu', $candidate) === 1) {
|
||||
$score += 3;
|
||||
if (preg_match($this->config->getSpecificityBoostPattern(), $candidate) === 1) {
|
||||
$score += $this->config->getSpecificityBoostScore();
|
||||
}
|
||||
|
||||
return $score;
|
||||
@@ -384,39 +379,19 @@ final readonly class SearchRepairService
|
||||
|
||||
private function asksForBundleOrAccessory(string $prompt): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b(passend|passende|zubehor|zubehör|dazu|zusatz|erganzung|ergänzung|indikator|reagenz|kit|set|auch\s+das|mit\s+preis\s+und\s+allen\s+infos)\b/iu',
|
||||
$prompt
|
||||
) === 1;
|
||||
return preg_match($this->config->getAccessoryOrBundlePattern(), $prompt) === 1;
|
||||
}
|
||||
|
||||
private function containsModelLikePhrase(string $text): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u',
|
||||
$text
|
||||
) === 1;
|
||||
return preg_match($this->config->getModelLikePattern(), $text) === 1;
|
||||
}
|
||||
|
||||
private function looksTooGeneric(string $candidate): bool
|
||||
{
|
||||
$normalized = mb_strtolower($candidate);
|
||||
|
||||
foreach ([
|
||||
'wasser',
|
||||
'messgerät',
|
||||
'messgeraet',
|
||||
'produkt',
|
||||
'geräte',
|
||||
'geraete',
|
||||
'gerät',
|
||||
'geraet',
|
||||
'resthärte',
|
||||
'resthaerte',
|
||||
'preis',
|
||||
'infos',
|
||||
'wissen',
|
||||
] as $genericToken) {
|
||||
foreach ($this->config->getGenericCandidateTokens() as $genericToken) {
|
||||
if ($normalized === $genericToken) {
|
||||
return true;
|
||||
}
|
||||
@@ -428,8 +403,8 @@ final readonly class SearchRepairService
|
||||
private function sanitizeQuery(string $query): string
|
||||
{
|
||||
$query = trim($query);
|
||||
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
|
||||
$query = trim($query, " \t\n\r\0\x0B\"'`.,;:-");
|
||||
$query = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $query) ?? $query;
|
||||
$query = trim($query, $this->config->getSanitizeTrimCharacters());
|
||||
|
||||
return trim($query);
|
||||
}
|
||||
@@ -446,7 +421,7 @@ final readonly class SearchRepairService
|
||||
$intersection = array_intersect($candidateTokens, $primaryTokens);
|
||||
$overlapRatio = count($intersection) / max(count($candidateTokens), count($primaryTokens));
|
||||
|
||||
return $overlapRatio >= 0.9;
|
||||
return $overlapRatio >= $this->config->getPrimaryQueryOverlapThreshold();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -497,12 +472,12 @@ final readonly class SearchRepairService
|
||||
|
||||
foreach ($allResults as $index => $product) {
|
||||
$score = 0;
|
||||
$score += $this->scoreProductAgainstText($product, $prompt) * 3;
|
||||
$score += $this->scoreProductAgainstText($product, $primaryQuery) * 2;
|
||||
$score += $this->scoreProductAgainstText($product, $repairSignal) * 4;
|
||||
$score += $this->scoreProductAgainstText($product, $prompt) * $this->config->getPromptMatchWeight();
|
||||
$score += $this->scoreProductAgainstText($product, $primaryQuery) * $this->config->getPrimaryQueryMatchWeight();
|
||||
$score += $this->scoreProductAgainstText($product, $repairSignal) * $this->config->getRepairSignalMatchWeight();
|
||||
|
||||
if ($index < count($primaryResults)) {
|
||||
$score += 1;
|
||||
$score += $this->config->getPrimaryResultOrderBonus();
|
||||
}
|
||||
|
||||
$decorated[] = [
|
||||
@@ -549,11 +524,11 @@ final readonly class SearchRepairService
|
||||
|
||||
$score = 0;
|
||||
$intersection = array_intersect($queryTokens, $productTokens);
|
||||
$score += count($intersection) * 2;
|
||||
$score += count($intersection) * $this->config->getTokenIntersectionScore();
|
||||
|
||||
foreach ($this->extractNumberTokens($queryTokens) as $numberToken) {
|
||||
if (in_array($numberToken, $productTokens, true)) {
|
||||
$score += 4;
|
||||
$score += $this->config->getNumericTokenMatchScore();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -562,7 +537,7 @@ final readonly class SearchRepairService
|
||||
|
||||
private function buildProductKey(ShopProductResult $product): string
|
||||
{
|
||||
return mb_strtolower(trim(implode('|', [
|
||||
return mb_strtolower(trim(implode($this->config->getProductKeySeparator(), [
|
||||
$product->id,
|
||||
$product->productNumber ?? '',
|
||||
$product->name,
|
||||
@@ -576,8 +551,8 @@ final readonly class SearchRepairService
|
||||
private function tokenize(string $text): array
|
||||
{
|
||||
$text = mb_strtolower($text);
|
||||
$text = preg_replace('/[^\p{L}\p{N}\s\-]+/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace($this->config->getTokenizeCleanupPattern(), ' ', $text) ?? $text;
|
||||
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
|
||||
$text = trim($text);
|
||||
|
||||
if ($text === '') {
|
||||
@@ -595,7 +570,31 @@ final readonly class SearchRepairService
|
||||
{
|
||||
return array_values(array_filter(
|
||||
$tokens,
|
||||
static fn(string $token): bool => preg_match('/\d/u', $token) === 1
|
||||
fn(string $token): bool => preg_match($this->config->getContainsDigitPattern(), $token) === 1
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $results
|
||||
* @param string[] $repairQueries
|
||||
* @return array{
|
||||
* results: ShopProductResult[],
|
||||
* attemptedRepair: bool,
|
||||
* usedRepair: bool,
|
||||
* repairQueries: string[]
|
||||
* }
|
||||
*/
|
||||
private function buildRepairResult(
|
||||
array $results,
|
||||
bool $attemptedRepair,
|
||||
bool $usedRepair,
|
||||
array $repairQueries
|
||||
): array {
|
||||
return [
|
||||
'results' => $results,
|
||||
'attemptedRepair' => $attemptedRepair,
|
||||
'usedRepair' => $usedRepair,
|
||||
'repairQueries' => $repairQueries,
|
||||
];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user