diff --git a/config/services.yaml b/config/services.yaml index f5ba347..6195a7b 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -47,6 +47,10 @@ parameters: mto.commerce.store_api_base_url: '%env(SHOPWARE_STORE_API_BASE_URL)%' mto.commerce.sales_channel_access_key: '%env(SHOPWARE_SALES_CHANNEL_ACCESS_KEY)%' + mto.commerce.search_repair.enabled: true + mto.commerce.search_repair.max_queries: 3 + mto.commerce.search_repair.min_primary_results_without_repair: 2 + # ------------------------------------------------------------ # Services @@ -116,6 +120,13 @@ services: App\Commerce\CommerceQueryParser: ~ + App\Commerce\SearchRepairService: + arguments: + $logger: '@monolog.logger.agent' + $enabled: '%mto.commerce.search_repair.enabled%' + $maxRepairQueries: '%mto.commerce.search_repair.max_queries%' + $minPrimaryResultsWithoutRepair: '%mto.commerce.search_repair.min_primary_results_without_repair%' + App\Shopware\ShopwareCriteriaBuilder: ~ App\Shopware\StoreApiClient: diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index fae5962..083e056 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -4,6 +4,7 @@ declare(strict_types=1); namespace App\Agent; +use App\Commerce\SearchRepairService; use App\Commerce\ShopSearchService; use App\Config\AgentRunnerConfig; use App\Context\ContextService; @@ -28,6 +29,7 @@ final readonly class AgentRunner private UrlAnalyzer $urlAnalyzer, private RetrieverInterface $retriever, private ShopSearchService $shopSearchService, + private SearchRepairService $searchRepairService, private CommerceIntentLite $commerceIntentLite, private OllamaClient $ollamaClient, private LoggerInterface $agentLogger, @@ -49,20 +51,21 @@ final readonly class AgentRunner } $shopResults = []; + $primaryShopResults = []; $sources = []; $optimizedShopQuery = ''; $shopSearchQuery = ''; $commerceIntent = CommerceIntentLite::NONE; $commerceHistoryContext = ''; + $attemptedShopRepair = false; + $usedShopRepair = false; + $shopRepairQueries = []; $this->agentLogger->info('Agent run started', [ 'userId' => $userId, ]); try { - // --------------------------------------------------------- - // 1) Context strategy - // --------------------------------------------------------- if ($includeFullContext) { // Full context mode is already passed to PromptBuilder. // Additional context strategies can be added here later. @@ -70,9 +73,6 @@ final readonly class AgentRunner yield $this->systemMsg('Ich analysiere deine Anfrage...', 'think'); - // --------------------------------------------------------- - // 2) Extract URL content - // --------------------------------------------------------- yield $this->systemMsg('Ich prüfe auf Internetquellen...', 'think'); $urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt); @@ -80,9 +80,6 @@ final readonly class AgentRunner $this->addSource($sources, 'Externe URL'); } - // --------------------------------------------------------- - // 3) Retrieve RAG knowledge - // --------------------------------------------------------- yield $this->systemMsg('Ich hole relevante Daten aus meinem RAG-Wissen...', 'think'); $knowledgeChunks = $this->retriever->retrieve($prompt); @@ -90,9 +87,6 @@ final readonly class AgentRunner $this->addSource($sources, 'RAG Wissen'); } - // --------------------------------------------------------- - // 4) Optional commerce/shop search - // --------------------------------------------------------- $commerceIntent = $this->detectCommerceIntent($prompt); if ($this->isCommerceIntent($commerceIntent)) { @@ -127,16 +121,35 @@ final readonly class AgentRunner 'think' ); - $shopResults = $this->searchShop( + $primaryShopResults = $this->searchShop( $shopSearchQuery, $commerceIntent, $userId, $commerceHistoryContext ); + $repairPayload = $this->repairShopResults( + prompt: $prompt, + userId: $userId, + commerceIntent: $commerceIntent, + commerceHistoryContext: $commerceHistoryContext, + primaryQuery: $shopSearchQuery, + primaryShopResults: $primaryShopResults, + knowledgeChunks: $knowledgeChunks + ); + + $shopResults = $repairPayload['results']; + $attemptedShopRepair = $repairPayload['attemptedRepair']; + $usedShopRepair = $repairPayload['usedRepair']; + $shopRepairQueries = $repairPayload['repairQueries']; + if ($shopResults !== []) { $this->addSource($sources, 'Shopsystem'); } + + if ($attemptedShopRepair) { + $this->addSource($sources, 'Erweiterte Shopsuche'); + } } if ($shopResults !== []) { @@ -145,9 +158,6 @@ final readonly class AgentRunner yield $this->systemMsg('Ich analysiere alle Informationen...', 'think'); - // --------------------------------------------------------- - // 5) Build final prompt - // --------------------------------------------------------- $finalPrompt = $this->promptBuilder->build( prompt: $prompt, userId: $userId, @@ -164,6 +174,11 @@ final readonly class AgentRunner 'finalPrompt' => $finalPrompt, 'optimizedShopQuery' => $optimizedShopQuery, 'shopSearchQuery' => $shopSearchQuery, + 'primaryShopResultsCount' => count($primaryShopResults), + 'shopResultsCount' => count($shopResults), + 'attemptedShopRepair' => $attemptedShopRepair, + 'usedShopRepair' => $usedShopRepair, + 'shopRepairQueries' => $shopRepairQueries, ]); } @@ -181,9 +196,6 @@ final readonly class AgentRunner yield $this->emitSources($sources, 'Genutzte Quellen: '); } - // --------------------------------------------------------- - // 6) Stream final LLM answer - // --------------------------------------------------------- $fullOutput = yield from $this->streamFinalAnswer($finalPrompt); if ($sources !== []) { @@ -194,9 +206,6 @@ final readonly class AgentRunner yield $this->systemMsg($finalPrompt, 'debug'); } - // --------------------------------------------------------- - // 7) Persist conversation history - // --------------------------------------------------------- if ($fullOutput !== '') { $this->contextService->appendHistory( $userId, @@ -210,7 +219,11 @@ final readonly class AgentRunner 'outputLength' => mb_strlen($fullOutput), 'contextMode' => $includeFullContext ? 'full' : 'recent', 'commerceIntent' => $commerceIntent, + 'primaryShopResultsCount' => count($primaryShopResults), 'shopResultsCount' => count($shopResults), + 'attemptedShopRepair' => $attemptedShopRepair, + 'usedShopRepair' => $usedShopRepair, + 'shopRepairQueries' => $shopRepairQueries, 'knowledgeChunkCount' => count($knowledgeChunks), 'hasUrlContent' => $urlContent !== '', 'usedOptimizedShopQuery' => $optimizedShopQuery !== '', @@ -282,7 +295,51 @@ final readonly class AgentRunner return ''; } - return trim($optimizedQuery); + return $this->sanitizeOptimizedShopQuery($optimizedQuery); + } + + /** + * @return array{ + * results: array, + * attemptedRepair: bool, + * usedRepair: bool, + * repairQueries: string[] + * } + */ + private function repairShopResults( + string $prompt, + string $userId, + string $commerceIntent, + string $commerceHistoryContext, + string $primaryQuery, + array $primaryShopResults, + array $knowledgeChunks + ): array { + try { + return $this->searchRepairService->repair( + prompt: $prompt, + commerceIntent: $commerceIntent, + commerceHistoryContext: $commerceHistoryContext, + primaryQuery: $primaryQuery, + primaryShopResults: $primaryShopResults, + knowledgeChunks: $knowledgeChunks + ); + } catch (Throwable $e) { + $this->agentLogger->warning('Shop repair failed, continuing with primary shop results', [ + 'userId' => $userId, + 'commerceIntent' => $commerceIntent, + 'primaryQuery' => $primaryQuery, + 'primaryShopResultsCount' => count($primaryShopResults), + 'exception' => $e, + ]); + + return [ + 'results' => $primaryShopResults, + 'attemptedRepair' => false, + 'usedRepair' => false, + 'repairQueries' => [], + ]; + } } private function searchShop( @@ -328,6 +385,22 @@ final readonly class AgentRunner }; } + private function sanitizeOptimizedShopQuery(string $query): string + { + $query = trim($query); + + if ($query === '') { + return ''; + } + + $query = preg_split('/\R+/u', $query, 2)[0] ?? $query; + $query = preg_replace('/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu', '', $query) ?? $query; + $query = trim($query, " \t\n\r\0\x0B\"'`"); + $query = preg_replace('/\s+/u', ' ', $query) ?? $query; + + return trim($query); + } + /** * @return Generator */ diff --git a/src/Agent/PromptBuilder.php b/src/Agent/PromptBuilder.php index c6892f5..975b955 100644 --- a/src/Agent/PromptBuilder.php +++ b/src/Agent/PromptBuilder.php @@ -87,6 +87,22 @@ final readonly class PromptBuilder 'testomat', ]; + private const ACCESSORY_REQUEST_KEYWORDS = [ + 'passend', + 'passende', + 'passendes', + 'zubehör', + 'zubehor', + 'dazu', + 'indikator', + 'reagenz', + 'kit', + 'set', + 'zusatz', + 'ergänzung', + 'ergaenzung', + ]; + public function __construct( private ContextService $contextService, private SystemPromptRepository $systemPromptRepository, @@ -119,18 +135,20 @@ final readonly class PromptBuilder $swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut); $hasShopResults = $shopResults !== []; + $isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt); $systemBlock = $this->buildSystemBlock(); $shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut); $outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults); + $responseFormatBlock = $this->buildResponseFormatBlock($prompt, $hasShopResults, $isTechnicalProductQuestion); $knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults); $userBlock = $this->buildUserBlock($prompt); - // Build fixed blocks first so history only receives the remaining budget. $fixedPrompt = $this->implodeBlocks([ $systemBlock, $shopBlock, $outputPriorityBlock, + $responseFormatBlock, $knowledgeBlock, $userBlock, ]); @@ -145,6 +163,7 @@ final readonly class PromptBuilder $systemBlock, $shopBlock, $outputPriorityBlock, + $responseFormatBlock, $knowledgeBlock, $contextBlock, $userBlock, @@ -326,6 +345,39 @@ final readonly class PromptBuilder "Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n"; } + private function buildResponseFormatBlock( + string $prompt, + bool $hasShopResults, + bool $isTechnicalProductQuestion + ): string { + $rules = [ + "RESPONSE FORMAT RULES:", + "- Keep normal spacing between all words. Never fuse words together.", + "- Use short, clean paragraphs or short labeled sections.", + "- Do not use persuasive or promotional wording.", + "- Do not repeat the same fact in slightly different wording.", + ]; + + if ($hasShopResults) { + $rules[] = "- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts."; + $rules[] = "- Keep price, availability, and URL on separate lines when they are present."; + } + + if ($isTechnicalProductQuestion) { + $rules[] = "- Write like technical documentation: precise, neutral, and source-close."; + $rules[] = "- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation."; + } + + if ($this->asksForAccessoryOrBundle($prompt)) { + $rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory."; + $rules[] = "- The main device must come first. The accessory must not replace the main device."; + $rules[] = "- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources."; + $rules[] = "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so."; + } + + return implode("\n", $rules); + } + /** * Build the knowledge block. * @@ -451,6 +503,8 @@ final readonly class PromptBuilder "- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.", "- When shop results are present and relevant, include current price and the actual URL if available.", "- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.", + "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.", + "- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.", ]); } else { $rules[] = "- Use retrieved knowledge as authoritative for factual answers."; @@ -484,10 +538,10 @@ final readonly class PromptBuilder { $filtered = array_values(array_filter( array_map( - fn ($block): string => is_string($block) ? $this->normalizeBlockText($block) : '', + fn($block): string => is_string($block) ? $this->normalizeBlockText($block) : '', $blocks ), - static fn (string $block): bool => $block !== '' + static fn(string $block): bool => $block !== '' )); return implode("\n\n", $filtered); @@ -536,6 +590,19 @@ final readonly class PromptBuilder return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1; } + private function asksForAccessoryOrBundle(string $prompt): bool + { + $normalized = mb_strtolower($prompt, 'UTF-8'); + + foreach (self::ACCESSORY_REQUEST_KEYWORDS as $keyword) { + if (str_contains($normalized, $keyword)) { + return true; + } + } + + return false; + } + private function clamp(int $value, int $min, int $max): int { return max($min, min($max, $value)); diff --git a/src/Command/TestShopSearchCommand.php b/src/Command/TestShopSearchCommand.php index 1481197..b4004be 100644 --- a/src/Command/TestShopSearchCommand.php +++ b/src/Command/TestShopSearchCommand.php @@ -4,11 +4,15 @@ declare(strict_types=1); namespace App\Command; +use App\Commerce\SearchRepairService; use App\Commerce\ShopSearchService; +use App\Intent\CommerceIntentLite; +use App\Knowledge\Retrieval\RetrieverInterface; use Symfony\Component\Console\Attribute\AsCommand; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Input\InputArgument; use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Console\Output\OutputInterface; #[AsCommand(name: 'mto:agent:test:shop-search')] @@ -16,33 +20,105 @@ final class TestShopSearchCommand extends Command { public function __construct( private readonly ShopSearchService $shopSearchService, + private readonly SearchRepairService $searchRepairService, + private readonly RetrieverInterface $retriever, ) { parent::__construct(); } protected function configure(): void { - $this->addArgument( - 'query', - InputArgument::OPTIONAL, - 'Die zu testende Suchanfrage', - 'zeige mir testomat modelle wasserhärte unter 5000 euro' - ); + $this + ->addArgument( + 'query', + InputArgument::OPTIONAL, + 'Die zu testende Suchanfrage', + 'zeige mir testomat modelle wasserhärte unter 5000 euro' + ) + ->addOption( + 'intent', + null, + InputOption::VALUE_OPTIONAL, + 'Commerce intent', + CommerceIntentLite::ADVISORY_PRODUCT_SEARCH + ) + ->addOption( + 'history', + null, + InputOption::VALUE_OPTIONAL, + 'Optionaler Commerce-History-Kontext', + '' + ) + ->addOption( + 'repair', + null, + InputOption::VALUE_NONE, + 'Aktiviert zusätzlich den Search-Repair-Test' + ); } protected function execute(InputInterface $input, OutputInterface $output): int { - $query = (string) $input->getArgument('query'); + $query = trim((string) $input->getArgument('query')); + $intent = trim((string) $input->getOption('intent')); + $history = trim((string) $input->getOption('history')); + $useRepair = (bool) $input->getOption('repair'); + + $output->writeln('Test query: ' . $query); + $output->writeln('Intent: ' . $intent); + $output->writeln('Repair: ' . ($useRepair ? 'ja' : 'nein')); + + if ($history !== '') { + $output->writeln('History: ' . $history); + } - $output->writeln('Test query: ' . $query); $output->writeln(''); - $results = $this->shopSearchService->search($query); + $primaryResults = $this->shopSearchService->search($query, $intent, $history); + $output->writeln('Primärsuche'); + $this->renderResults($output, $primaryResults); + + if (!$useRepair) { + return Command::SUCCESS; + } + + $knowledgeChunks = $this->retriever->retrieve($query); + + $repairPayload = $this->searchRepairService->repair( + prompt: $query, + commerceIntent: $intent, + commerceHistoryContext: $history, + primaryQuery: $query, + primaryShopResults: $primaryResults, + knowledgeChunks: $knowledgeChunks + ); + + $output->writeln(''); + $output->writeln('Repair-Auswertung'); + $output->writeln(' Used repair: ' . ($repairPayload['usedRepair'] ? 'ja' : 'nein')); + $output->writeln(' Repair queries: ' . ( + $repairPayload['repairQueries'] !== [] + ? implode(' | ', $repairPayload['repairQueries']) + : '-' + )); + $output->writeln(''); + + $output->writeln('Finale Ergebnisse nach Repair/Merge'); + $this->renderResults($output, $repairPayload['results']); + + return Command::SUCCESS; + } + + /** + * @param array $results + */ + private function renderResults(OutputInterface $output, array $results): void + { if ($results === []) { $output->writeln('Keine Shop-Ergebnisse gefunden.'); - return Command::SUCCESS; + return; } foreach ($results as $index => $result) { @@ -57,7 +133,19 @@ final class TestShopSearchCommand extends Command $output->writeln(' URL: ' . ($result->url ?? '-')); $output->writeln(' Description: ' . ($result->description ?? '-')); - if ($result->highlights !== []) { + if (property_exists($result, 'matchScore')) { + $output->writeln(' MatchScore: ' . (($result->matchScore ?? null) !== null ? (string) $result->matchScore : '-')); + } + + if (property_exists($result, 'matchSource')) { + $output->writeln(' MatchSource: ' . ($result->matchSource ?? '-')); + } + + if (property_exists($result, 'matchedQueries') && is_array($result->matchedQueries) && $result->matchedQueries !== []) { + $output->writeln(' MatchedQueries: ' . implode(' | ', $result->matchedQueries)); + } + + if (is_array($result->highlights) && $result->highlights !== []) { $output->writeln(' Highlights:'); foreach ($result->highlights as $highlight) { $output->writeln(' - ' . $highlight); @@ -66,7 +154,5 @@ final class TestShopSearchCommand extends Command $output->writeln(''); } - - return Command::SUCCESS; } } \ No newline at end of file diff --git a/src/Commerce/CommerceQueryParser.php b/src/Commerce/CommerceQueryParser.php index c0802b6..f9e8b20 100644 --- a/src/Commerce/CommerceQueryParser.php +++ b/src/Commerce/CommerceQueryParser.php @@ -26,35 +26,43 @@ final readonly class CommerceQueryParser string $historyContext = '' ): CommerceSearchQuery { $normalizedPrompt = $this->normalize($originalPrompt); + $isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt); [$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt); $sizes = $this->extractSizes($normalizedPrompt); $brand = $this->extractBrand($normalizedPrompt); $searchText = $this->buildSearchText( - $normalizedPrompt, - $sizes, - $brand, - $priceMin, - $priceMax + prompt: $normalizedPrompt, + sizes: $sizes, + brand: $brand, + priceMin: $priceMin, + priceMax: $priceMax, + preserveDirectProductQuery: $isDirectProductQuery ); - if ($historyContext !== '' && $this->shouldUseHistoryContext($normalizedPrompt)) { + if ( + !$isDirectProductQuery + && $historyContext !== '' + && $this->shouldUseHistoryContext($normalizedPrompt) + ) { $latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext); if ($latestHistoryQuestion !== '') { $normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion); + $isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt); [$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt); $historySizes = $this->extractSizes($normalizedHistoryPrompt); $historyBrand = $this->extractBrand($normalizedHistoryPrompt); $historySearchText = $this->buildSearchText( - $normalizedHistoryPrompt, - $historySizes, - $historyBrand, - $historyPriceMin, - $historyPriceMax + prompt: $normalizedHistoryPrompt, + sizes: $historySizes, + brand: $historyBrand, + priceMin: $historyPriceMin, + priceMax: $historyPriceMax, + preserveDirectProductQuery: $isDirectHistoryProductQuery ); $searchText = $this->mergeSearchTexts($historySearchText, $searchText); @@ -98,7 +106,7 @@ final readonly class CommerceQueryParser */ private function extractPriceRange(string $prompt): array { - $priceMin = 0; + $priceMin = null; $priceMax = null; if (preg_match('/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) { @@ -165,19 +173,34 @@ final readonly class CommerceQueryParser array $sizes, ?string $brand, ?float $priceMin, - ?float $priceMax + ?float $priceMax, + bool $preserveDirectProductQuery = false ): string { + if ($preserveDirectProductQuery) { + return $this->buildDirectProductSearchText($prompt); + } + $text = ' ' . $prompt . ' '; foreach ($this->config->getPhrasesToRemove() as $phrase) { - $text = str_replace($phrase, ' ', $text); + $normalizedPhrase = $this->normalize((string) $phrase); + if ($normalizedPhrase === '') { + continue; + } + + $text = str_replace(' ' . $normalizedPhrase . ' ', ' ', $text); } foreach ($sizes as $size) { - $text = preg_replace('/\b' . preg_quote($size, '/') . '\b/u', ' ', $text) ?? $text; + $normalizedSize = $this->normalize((string) $size); + if ($normalizedSize === '') { + continue; + } + + $text = preg_replace('/\b' . preg_quote($normalizedSize, '/') . '\b/u', ' ', $text) ?? $text; } - if ($brand !== null && $brand !== '') { + if ($brand !== null && $brand !== '' && !$this->isBrandPartOfModelPhrase($prompt, $brand)) { $text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text; } @@ -200,6 +223,22 @@ final readonly class CommerceQueryParser return trim(implode(' ', $tokens)); } + private function buildDirectProductSearchText(string $prompt): string + { + $text = $prompt; + $text = preg_replace('/\s+/u', ' ', $text) ?? $text; + $text = trim($text, " \t\n\r\0\x0B-.,"); + + $tokens = array_filter( + explode(' ', $text), + static fn(string $token): bool => mb_strlen($token) > 0 + ); + + $tokens = array_values(array_unique($tokens)); + + return trim(implode(' ', $tokens)); + } + private function shouldUseHistoryContext(string $prompt): bool { return preg_match( @@ -263,6 +302,57 @@ final readonly class CommerceQueryParser )); } + private function isDirectProductQuery(string $prompt): bool + { + if ($prompt === '') { + return false; + } + + if ($this->containsModelLikePhrase($prompt)) { + return true; + } + + if ($this->containsAccessoryLikePhrase($prompt)) { + return true; + } + + $tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: []; + + if (count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1) { + return true; + } + + return false; + } + + private function containsModelLikePhrase(string $text): bool + { + return preg_match( + '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u', + $text + ) === 1; + } + + private function containsAccessoryLikePhrase(string $text): bool + { + return preg_match( + '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u', + $text + ) === 1; + } + + private function isBrandPartOfModelPhrase(string $prompt, string $brand): bool + { + if ($brand === '') { + return false; + } + + return preg_match( + '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u', + $prompt + ) === 1; + } + private function toFloat(string $value): ?float { $value = str_replace(',', '.', trim($value)); diff --git a/src/Commerce/Dto/ShopProductResult.php b/src/Commerce/Dto/ShopProductResult.php index b5e8310..d311a98 100644 --- a/src/Commerce/Dto/ShopProductResult.php +++ b/src/Commerce/Dto/ShopProductResult.php @@ -10,18 +10,22 @@ final readonly class ShopProductResult * @param string[] $highlights */ public function __construct( - public string $id, - public string $name, + public string $id, + public string $name, public ?string $productNumber = null, public ?string $manufacturer = null, public ?string $price = null, - public ?bool $available = null, + public ?bool $available = null, public ?string $url = null, - public array $highlights = [], + public array $highlights = [], public ?string $description = null, public ?string $productImage = null, public ?string $customFields = null, - ) - { + + // Internal ranking/merge metadata + public ?int $matchScore = null, + public ?string $matchSource = null, + public array $matchedQueries = [], + ) { } } \ No newline at end of file diff --git a/src/Commerce/SearchRepairService.php b/src/Commerce/SearchRepairService.php new file mode 100644 index 0000000..485c1f9 --- /dev/null +++ b/src/Commerce/SearchRepairService.php @@ -0,0 +1,601 @@ + $knowledgeChunks + * + * @return array{ + * results: ShopProductResult[], + * attemptedRepair: bool, + * usedRepair: bool, + * repairQueries: string[] + * } + */ + public function repair( + string $prompt, + string $commerceIntent, + string $commerceHistoryContext, + string $primaryQuery, + array $primaryShopResults, + array $knowledgeChunks + ): array { + if (!$this->enabled) { + return [ + 'results' => $primaryShopResults, + 'attemptedRepair' => false, + 'usedRepair' => false, + 'repairQueries' => [], + ]; + } + + if (!$this->shouldAttemptRepair($prompt, $primaryQuery, $primaryShopResults)) { + return [ + 'results' => $primaryShopResults, + 'attemptedRepair' => false, + 'usedRepair' => false, + 'repairQueries' => [], + ]; + } + + $repairQueries = $this->buildRepairQueries( + prompt: $prompt, + primaryQuery: $primaryQuery, + primaryShopResults: $primaryShopResults, + knowledgeChunks: $knowledgeChunks + ); + + if ($repairQueries === []) { + return [ + 'results' => $primaryShopResults, + 'attemptedRepair' => false, + 'usedRepair' => false, + 'repairQueries' => [], + ]; + } + + $this->logger->info('Shop repair started', [ + 'commerceIntent' => $commerceIntent, + 'primaryQuery' => $primaryQuery, + 'primaryResultsCount' => count($primaryShopResults), + 'repairQueries' => $repairQueries, + 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', + 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), + ]); + + $repairResults = []; + + foreach ($repairQueries as $repairQuery) { + $results = $this->shopSearchService->search($repairQuery, $commerceIntent, ''); + + if ($results === []) { + continue; + } + + $repairResults = $this->mergeUniqueProducts($repairResults, $results); + } + + if ($repairResults === []) { + $this->logger->info('Shop repair finished without additional products', [ + 'commerceIntent' => $commerceIntent, + 'primaryQuery' => $primaryQuery, + 'repairQueries' => $repairQueries, + ]); + + return [ + 'results' => $primaryShopResults, + 'attemptedRepair' => true, + 'usedRepair' => false, + 'repairQueries' => $repairQueries, + ]; + } + + $mergedResults = $this->rankMergedResults( + primaryResults: $primaryShopResults, + repairResults: $repairResults, + prompt: $prompt, + primaryQuery: $primaryQuery, + repairQueries: $repairQueries + ); + + $this->logger->info('Shop repair finished', [ + 'commerceIntent' => $commerceIntent, + 'primaryQuery' => $primaryQuery, + 'primaryResultsCount' => count($primaryShopResults), + 'repairResultsCount' => count($repairResults), + 'mergedResultsCount' => count($mergedResults), + 'repairQueries' => $repairQueries, + 'topProducts' => array_map( + static fn(ShopProductResult $product): array => [ + 'name' => $product->name, + 'productNumber' => $product->productNumber, + 'manufacturer' => $product->manufacturer, + 'available' => $product->available, + ], + array_slice($mergedResults, 0, 3) + ), + ]); + + return [ + 'results' => $mergedResults, + 'attemptedRepair' => true, + 'usedRepair' => true, + 'repairQueries' => $repairQueries, + ]; + } + + /** + * @param ShopProductResult[] $primaryShopResults + */ + private function shouldAttemptRepair( + string $prompt, + string $primaryQuery, + array $primaryShopResults + ): bool { + $asksForBundle = $this->asksForBundleOrAccessory($prompt); + $hasModelLikePrimaryQuery = $this->containsModelLikePhrase($primaryQuery); + $primaryResultsCount = count($primaryShopResults); + + if ($primaryResultsCount === 0) { + return true; + } + + // Always try repair for bundle/accessory prompts. + // These prompts often need a second pass even when the first search + // already returned some results, because the user is asking for a + // combination of main device + matching accessory. + if ($asksForBundle) { + return true; + } + + if ($primaryResultsCount >= $this->minPrimaryResultsWithoutRepair) { + return false; + } + + if ($hasModelLikePrimaryQuery && $primaryResultsCount > 0) { + return false; + } + + return $primaryResultsCount < $this->minPrimaryResultsWithoutRepair; + } + + /** + * @param ShopProductResult[] $primaryShopResults + * @param array $knowledgeChunks + * @return string[] + */ + private function buildRepairQueries( + string $prompt, + string $primaryQuery, + array $primaryShopResults, + array $knowledgeChunks + ): array { + $knowledgeText = $this->flattenToText($knowledgeChunks); + $primaryResultText = $this->flattenPrimaryResults($primaryShopResults); + $combinedText = trim($prompt . "\n" . $knowledgeText . "\n" . $primaryResultText); + + $modelCandidates = $this->extractModelCandidates($combinedText); + $accessoryCandidates = $this->extractAccessoryCandidates($combinedText); + + $topPrimaryName = $primaryShopResults[0]->name ?? ''; + $topPrimaryProductNumber = $primaryShopResults[0]->productNumber ?? null; + $topPrimaryPhrase = trim($topPrimaryName . ' ' . ($topPrimaryProductNumber ?? '')); + + $queries = []; + + if ($topPrimaryPhrase !== '' && $this->containsModelLikePhrase($topPrimaryPhrase)) { + $queries[] = $topPrimaryPhrase; + } elseif ($topPrimaryName !== '' && $this->containsModelLikePhrase($topPrimaryName)) { + $queries[] = $topPrimaryName; + } + + foreach ($modelCandidates as $candidate) { + $queries[] = $candidate; + } + + if ($this->asksForBundleOrAccessory($prompt)) { + foreach ($accessoryCandidates as $accessoryCandidate) { + if ($topPrimaryName !== '') { + $queries[] = trim($topPrimaryName . ' ' . $accessoryCandidate); + } + + $queries[] = $accessoryCandidate; + } + } + + $queries = array_map( + fn(string $query): string => $this->sanitizeQuery($query), + $queries + ); + + $queries = array_values(array_filter( + array_unique($queries), + fn(string $query): bool => $query !== '' && !$this->isTooCloseToPrimaryQuery($query, $primaryQuery) + )); + + return array_slice($queries, 0, max(1, $this->maxRepairQueries)); + } + + /** + * @param array $value + */ + private function flattenToText(array $value): string + { + $parts = []; + + $walker = function (mixed $item) use (&$parts, &$walker): void { + if (is_array($item)) { + foreach ($item as $subItem) { + $walker($subItem); + } + + return; + } + + if (is_scalar($item)) { + $text = trim((string) $item); + if ($text !== '') { + $parts[] = $text; + } + } + }; + + $walker($value); + + return trim(implode("\n", $parts)); + } + + /** + * @param ShopProductResult[] $products + */ + private function flattenPrimaryResults(array $products): string + { + $parts = []; + + foreach ($products as $product) { + $parts[] = trim(implode(' ', array_filter([ + $product->name, + $product->productNumber, + $product->manufacturer, + $product->description, + implode(' ', $product->highlights), + $product->customFields, + ]))); + } + + return trim(implode("\n", array_filter($parts))); + } + + /** + * @return string[] + */ + private function extractModelCandidates(string $text): array + { + $candidates = []; + + preg_match_all( + '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u', + $text, + $matches + ); + + foreach ($matches[1] ?? [] as $candidate) { + $candidate = $this->sanitizeQuery($candidate); + + if ($candidate === '') { + continue; + } + + if ($this->looksTooGeneric($candidate)) { + continue; + } + + $candidates[] = $candidate; + } + + return $this->sortCandidatesBySpecificity($candidates); + } + + /** + * @return string[] + */ + private function extractAccessoryCandidates(string $text): array + { + $candidates = []; + + preg_match_all( + '/\b((?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu', + $text, + $matches + ); + + foreach ($matches[1] ?? [] as $candidate) { + $candidate = $this->sanitizeQuery($candidate); + + if ($candidate === '') { + continue; + } + + $candidates[] = $candidate; + } + + return $this->sortCandidatesBySpecificity($candidates); + } + + /** + * @param string[] $candidates + * @return string[] + */ + private function sortCandidatesBySpecificity(array $candidates): array + { + $decorated = []; + + foreach (array_values(array_unique($candidates)) as $candidate) { + $decorated[] = [ + 'candidate' => $candidate, + 'score' => $this->scoreCandidate($candidate), + ]; + } + + usort($decorated, static function (array $a, array $b): int { + return $b['score'] <=> $a['score']; + }); + + return array_values(array_map( + static fn(array $row): string => $row['candidate'], + $decorated + )); + } + + private function scoreCandidate(string $candidate): int + { + $score = 0; + + if (preg_match('/\d/u', $candidate) === 1) { + $score += 4; + } + + $wordCount = count($this->tokenize($candidate)); + $score += min($wordCount, 4); + + if (preg_match('/\b(?:indikator|indicator|testomat|tritromat|titromat|reagenz|reagent)\b/iu', $candidate) === 1) { + $score += 3; + } + + return $score; + } + + private function asksForBundleOrAccessory(string $prompt): bool + { + return preg_match( + '/\b(passend|passende|zubehor|zubehör|dazu|zusatz|erganzung|ergänzung|indikator|reagenz|kit|set|auch\s+das|mit\s+preis\s+und\s+allen\s+infos)\b/iu', + $prompt + ) === 1; + } + + private function containsModelLikePhrase(string $text): bool + { + return preg_match( + '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u', + $text + ) === 1; + } + + private function looksTooGeneric(string $candidate): bool + { + $normalized = mb_strtolower($candidate); + + foreach ([ + 'wasser', + 'messgerät', + 'messgeraet', + 'produkt', + 'geräte', + 'geraete', + 'gerät', + 'geraet', + 'resthärte', + 'resthaerte', + 'preis', + 'infos', + 'wissen', + ] as $genericToken) { + if ($normalized === $genericToken) { + return true; + } + } + + return false; + } + + private function sanitizeQuery(string $query): string + { + $query = trim($query); + $query = preg_replace('/\s+/u', ' ', $query) ?? $query; + $query = trim($query, " \t\n\r\0\x0B\"'`.,;:-"); + + return trim($query); + } + + private function isTooCloseToPrimaryQuery(string $candidateQuery, string $primaryQuery): bool + { + $candidateTokens = $this->tokenize($candidateQuery); + $primaryTokens = $this->tokenize($primaryQuery); + + if ($candidateTokens === [] || $primaryTokens === []) { + return false; + } + + $intersection = array_intersect($candidateTokens, $primaryTokens); + $overlapRatio = count($intersection) / max(count($candidateTokens), count($primaryTokens)); + + return $overlapRatio >= 0.9; + } + + /** + * @param ShopProductResult[] $existing + * @param ShopProductResult[] $incoming + * @return ShopProductResult[] + */ + private function mergeUniqueProducts(array $existing, array $incoming): array + { + $merged = $existing; + $seen = []; + + foreach ($existing as $product) { + $seen[$this->buildProductKey($product)] = true; + } + + foreach ($incoming as $product) { + $key = $this->buildProductKey($product); + + if (isset($seen[$key])) { + continue; + } + + $seen[$key] = true; + $merged[] = $product; + } + + return $merged; + } + + /** + * @param ShopProductResult[] $primaryResults + * @param ShopProductResult[] $repairResults + * @param string[] $repairQueries + * @return ShopProductResult[] + */ + private function rankMergedResults( + array $primaryResults, + array $repairResults, + string $prompt, + string $primaryQuery, + array $repairQueries + ): array { + $allResults = $this->mergeUniqueProducts($primaryResults, $repairResults); + $repairSignal = trim(implode(' ', $repairQueries)); + + $decorated = []; + + foreach ($allResults as $index => $product) { + $score = 0; + $score += $this->scoreProductAgainstText($product, $prompt) * 3; + $score += $this->scoreProductAgainstText($product, $primaryQuery) * 2; + $score += $this->scoreProductAgainstText($product, $repairSignal) * 4; + + if ($index < count($primaryResults)) { + $score += 1; + } + + $decorated[] = [ + 'index' => $index, + 'score' => $score, + 'product' => $product, + ]; + } + + usort($decorated, static function (array $a, array $b): int { + if ($a['score'] === $b['score']) { + return $a['index'] <=> $b['index']; + } + + return $b['score'] <=> $a['score']; + }); + + return array_values(array_map( + static fn(array $row): ShopProductResult => $row['product'], + $decorated + )); + } + + private function scoreProductAgainstText(ShopProductResult $product, string $text): int + { + $queryTokens = $this->tokenize($text); + if ($queryTokens === []) { + return 0; + } + + $productText = trim(implode(' ', array_filter([ + $product->name, + $product->productNumber, + $product->manufacturer, + implode(' ', $product->highlights), + $product->description, + $product->customFields, + ]))); + + $productTokens = $this->tokenize($productText); + if ($productTokens === []) { + return 0; + } + + $score = 0; + $intersection = array_intersect($queryTokens, $productTokens); + $score += count($intersection) * 2; + + foreach ($this->extractNumberTokens($queryTokens) as $numberToken) { + if (in_array($numberToken, $productTokens, true)) { + $score += 4; + } + } + + return $score; + } + + private function buildProductKey(ShopProductResult $product): string + { + return mb_strtolower(trim(implode('|', [ + $product->id, + $product->productNumber ?? '', + $product->name, + $product->url ?? '', + ]))); + } + + /** + * @return string[] + */ + private function tokenize(string $text): array + { + $text = mb_strtolower($text); + $text = preg_replace('/[^\p{L}\p{N}\s\-]+/u', ' ', $text) ?? $text; + $text = preg_replace('/\s+/u', ' ', $text) ?? $text; + $text = trim($text); + + if ($text === '') { + return []; + } + + return array_values(array_filter(explode(' ', $text))); + } + + /** + * @param string[] $tokens + * @return string[] + */ + private function extractNumberTokens(array $tokens): array + { + return array_values(array_filter( + $tokens, + static fn(string $token): bool => preg_match('/\d/u', $token) === 1 + )); + } +} \ No newline at end of file diff --git a/src/Commerce/ShopSearchService.php b/src/Commerce/ShopSearchService.php index 504e316..79a5e1e 100644 --- a/src/Commerce/ShopSearchService.php +++ b/src/Commerce/ShopSearchService.php @@ -207,10 +207,12 @@ final readonly class ShopSearchService ); } - return array_values(array_filter( + $results = array_values(array_filter( $results, static fn(ShopProductResult $product): bool => $product->name !== '' )); + + return $this->deduplicateProducts($results); } /** @@ -251,26 +253,46 @@ final readonly class ShopSearchService { $score = 0; - $normalizedPrompt = $this->normalizeForMatching($query->normalizedPrompt ?: $query->originalPrompt); + $normalizedPrompt = $this->normalizeForMatching($query->normalizedPrompt !== '' + ? $query->normalizedPrompt + : $query->originalPrompt); + $normalizedSearchText = $this->normalizeForMatching($query->searchText); - $normalizedQuery = trim($normalizedPrompt . ' ' . $normalizedSearchText); + $normalizedBrand = $this->normalizeForMatching((string) ($query->brand ?? '')); + $normalizedSizes = array_values(array_filter(array_map( + fn(mixed $size): string => $this->normalizeForMatching((string) $size), + $query->sizes + ))); + + $normalizedQuery = trim(implode(' ', array_filter([ + $normalizedPrompt, + $normalizedSearchText, + $normalizedBrand, + implode(' ', $normalizedSizes), + ]))); $queryTokens = $this->tokenize($normalizedQuery); $queryNumberTokens = $this->extractNumberTokens($queryTokens); $normalizedProductName = $this->normalizeForMatching($product->name); - $productNameTokens = $this->tokenize($normalizedProductName); - $productNameNumberTokens = $this->extractNumberTokens($productNameTokens); - $normalizedProductNumber = $this->normalizeForMatching((string) ($product->productNumber ?? '')); - $productNumberTokens = $this->tokenize($normalizedProductNumber); - $productNumberNumberTokens = $this->extractNumberTokens($productNumberTokens); - $normalizedManufacturer = $this->normalizeForMatching((string) ($product->manufacturer ?? '')); - $normalizedBrand = $this->normalizeForMatching((string) ($query->brand ?? '')); + $normalizedProductCorpus = $this->buildNormalizedProductCorpus($product); + + $productNameTokens = $this->tokenize($normalizedProductName); + $productNumberTokens = $this->tokenize($normalizedProductNumber); + $productCorpusTokens = $this->tokenize($normalizedProductCorpus); + + $productNameNumberTokens = $this->extractNumberTokens($productNameTokens); + $productNumberNumberTokens = $this->extractNumberTokens($productNumberTokens); + $productCorpusNumberTokens = $this->extractNumberTokens($productCorpusTokens); if ($normalizedProductNumber !== '' && $this->containsWholePhrase($normalizedQuery, $normalizedProductNumber)) { - $score += 120; + $score += 140; + } + + if ($normalizedProductName !== '' && $this->containsWholePhrase($normalizedQuery, $normalizedProductName)) { + $score += 80; } if ($normalizedBrand !== '') { @@ -281,20 +303,22 @@ final readonly class ShopSearchService } } - $score += $this->countOverlap($queryTokens, $productNameTokens) * 4; - $score += $this->countOverlap($queryTokens, $productNumberTokens) * 8; - $score += $this->countOverlap($queryNumberTokens, $productNameNumberTokens) * 16; - $score += $this->countOverlap($queryNumberTokens, $productNumberNumberTokens) * 24; + $score += $this->countOverlap($queryTokens, $productNameTokens) * 6; + $score += $this->countOverlap($queryTokens, $productNumberTokens) * 10; + $score += $this->countOverlap($queryTokens, $productCorpusTokens) * 2; - foreach ($query->sizes as $size) { - $normalizedSize = $this->normalizeForMatching((string) $size); + $score += $this->countOverlap($queryNumberTokens, $productNameNumberTokens) * 18; + $score += $this->countOverlap($queryNumberTokens, $productNumberNumberTokens) * 28; + $score += $this->countOverlap($queryNumberTokens, $productCorpusNumberTokens) * 8; + foreach ($normalizedSizes as $normalizedSize) { if ($normalizedSize === '') { continue; } if ($this->containsWholePhrase($normalizedProductName, $normalizedSize) - || $this->containsWholePhrase($normalizedProductNumber, $normalizedSize)) { + || $this->containsWholePhrase($normalizedProductNumber, $normalizedSize) + || $this->containsWholePhrase($normalizedProductCorpus, $normalizedSize)) { $score += 12; } } @@ -306,6 +330,18 @@ final readonly class ShopSearchService return $score; } + private function buildNormalizedProductCorpus(ShopProductResult $product): string + { + return $this->normalizeForMatching(implode(' ', array_filter([ + $product->name, + $product->productNumber, + $product->manufacturer, + implode(' ', $product->highlights), + $product->description, + $product->customFields, + ]))); + } + /** * @param string[] $left * @param string[] $right @@ -480,4 +516,32 @@ final readonly class ShopSearchService return array_values(array_unique($highlights)); } + + /** + * @param ShopProductResult[] $products + * @return ShopProductResult[] + */ + private function deduplicateProducts(array $products): array + { + $unique = []; + $seen = []; + + foreach ($products as $product) { + $key = mb_strtolower(trim(implode('|', [ + $product->id, + $product->productNumber ?? '', + $product->name, + $product->url ?? '', + ]))); + + if (isset($seen[$key])) { + continue; + } + + $seen[$key] = true; + $unique[] = $product; + } + + return $unique; + } } \ No newline at end of file diff --git a/src/Knowledge/Retrieval/CachedRetriever.php b/src/Knowledge/Retrieval/CachedRetriever.php deleted file mode 100644 index d2ae610..0000000 --- a/src/Knowledge/Retrieval/CachedRetriever.php +++ /dev/null @@ -1,50 +0,0 @@ -buildCacheKey($prompt); - - $item = $this->cache->getItem($key); - if ($item->isHit()) { - $cached = $item->get(); - - return is_array($cached) ? $cached : []; - } - - $result = $this->inner->retrieve($prompt); - - $item->set($result); - $item->expiresAfter($this->ttlSeconds); - $this->cache->save($item); - - return $result; - } - - private function buildCacheKey(string $prompt): string - { - $normalized = mb_strtolower(trim($prompt)); - $normalized = preg_replace('/\s+/u', ' ', $normalized) ?? $normalized; - - return 'rag_retrieval_' . sha1($normalized); - } -} \ No newline at end of file