From 87417febf47146a193d311afacba7eea6c240c2b Mon Sep 17 00:00:00 2001 From: team 1 Date: Thu, 23 Apr 2026 15:47:53 +0200 Subject: [PATCH] optimize retrieval --- public/assets/js/base.js | 157 +-- src/Agent/AgentRunner.php | 233 +++- src/Agent/PromptBuilder.php | 94 +- src/Commerce/CommerceQueryParser.php | 145 ++- src/Commerce/CommerceReferenceResolver.php | 239 +++++ src/Commerce/CommerceReferenceStore.php | 99 ++ src/Commerce/Dto/CommerceReferenceContext.php | 149 +++ src/Commerce/ShopSearchService.php | 999 ++++++++++++++++-- src/Config/CommerceIntentConfig.php | 26 +- src/Config/CommerceQueryParserConfig.php | 109 +- src/Config/ShopServiceConfig.php | 58 +- src/Controller/HistoryController.php | 14 +- src/Shopware/StoreApiClient.php | 58 +- 13 files changed, 2093 insertions(+), 287 deletions(-) create mode 100644 src/Commerce/CommerceReferenceResolver.php create mode 100644 src/Commerce/CommerceReferenceStore.php create mode 100644 src/Commerce/Dto/CommerceReferenceContext.php diff --git a/public/assets/js/base.js b/public/assets/js/base.js index f374d77..90e5bfe 100644 --- a/public/assets/js/base.js +++ b/public/assets/js/base.js @@ -62,7 +62,7 @@ document.addEventListener('DOMContentLoaded', () => { function hasMeaningfulChildContent(element) { return element.querySelector( - 'img, table, pre, code, ul, ol, h1, h2, h3, h4, h5, h6, a, hr' + 'img, table, pre, code, ul, ol, h1, h2, h3, h4, h5, h6, a, hr, .badge' ) !== null; } @@ -86,68 +86,23 @@ document.addEventListener('DOMContentLoaded', () => { }); } - function stripAllThinkContent(container) { - const blockSelector = 'p, div, li, blockquote'; - const thinkSpans = Array.from(container.querySelectorAll('.think')); - - if (thinkSpans.length === 0) { - return; - } - - const handledBlocks = new Set(); - - thinkSpans.forEach((span) => { - const block = span.closest(blockSelector) || span.parentElement; - - if (!block || handledBlocks.has(block)) { - return; - } - - handledBlocks.add(block); - - const thinksInBlock = Array.from(block.querySelectorAll('.think')); - const lastThinkInBlock = thinksInBlock[thinksInBlock.length - 1]; - - if (!lastThinkInBlock) { - return; - } - - let node = block.firstChild; - - while (node) { - const next = node.nextSibling; - const isLastThink = node === lastThinkInBlock; - - node.remove(); - - if (isLastThink) { - break; - } - - node = next; - } - - while ( - block.firstChild && - ( - (block.firstChild.nodeType === Node.TEXT_NODE && - block.firstChild.textContent.trim() === '') || - (block.firstChild.nodeType === Node.ELEMENT_NODE && - block.firstChild.tagName === 'BR') - ) - ) { - block.firstChild.remove(); - } + function removeThinkSpansOnly(container) { + container.querySelectorAll('.think').forEach((span) => { + span.remove(); }); cleanupEmptyBlocks(container); } - function hasNonThinkContent(container) { + function cloneWithoutThinkContent(container) { const clone = container.cloneNode(true); - - stripAllThinkContent(clone); + clone.querySelectorAll('.think').forEach((span) => span.remove()); cleanupEmptyBlocks(clone); + return clone; + } + + function hasNonThinkContent(container) { + const clone = cloneWithoutThinkContent(container); if ((clone.textContent || '').trim() !== '') { return true; @@ -156,6 +111,49 @@ document.addEventListener('DOMContentLoaded', () => { return hasMeaningfulChildContent(clone); } + function keepOnlyLastThink(container) { + const thinkSpans = Array.from(container.querySelectorAll('.think')); + + if (thinkSpans.length <= 1) { + cleanupEmptyBlocks(container); + return; + } + + const lastThink = thinkSpans[thinkSpans.length - 1]; + + thinkSpans.slice(0, -1).forEach((span) => { + span.remove(); + }); + + const blockSelector = 'p, div, li, blockquote'; + const lastBlock = lastThink.closest(blockSelector) || lastThink.parentElement; + + if (lastBlock && lastThink.parentElement === lastBlock) { + Array.from(lastBlock.childNodes).forEach((node) => { + if (node === lastThink) { + return; + } + + if ( + node.nodeType === Node.TEXT_NODE && + node.textContent.trim() === '' + ) { + node.remove(); + return; + } + + if ( + node.nodeType === Node.ELEMENT_NODE && + node.tagName === 'BR' + ) { + node.remove(); + } + }); + } + + cleanupEmptyBlocks(container); + } + function cleanupThinkSpans(container) { if (!container) { return; @@ -168,54 +166,11 @@ document.addEventListener('DOMContentLoaded', () => { } if (hasNonThinkContent(container)) { - stripAllThinkContent(container); + removeThinkSpansOnly(container); return; } - if (thinkSpans.length <= 1) { - return; - } - - const blockSelector = 'p, div, li, blockquote'; - const lastThink = thinkSpans[thinkSpans.length - 1]; - const lastBlock = lastThink.closest(blockSelector) || lastThink.parentElement; - - thinkSpans.slice(0, -1).forEach((span) => { - const block = span.closest(blockSelector) || span.parentElement; - - if (block && block !== lastBlock) { - block.remove(); - return; - } - - if (block === lastBlock) { - span.remove(); - } - }); - - if (lastBlock && lastBlock.contains(lastThink)) { - let node = lastBlock.firstChild; - - while (node && node !== lastThink) { - const next = node.nextSibling; - node.remove(); - node = next; - } - - while ( - lastThink.nextSibling && - ( - (lastThink.nextSibling.nodeType === Node.TEXT_NODE && - lastThink.nextSibling.textContent.trim() === '') || - (lastThink.nextSibling.nodeType === Node.ELEMENT_NODE && - lastThink.nextSibling.tagName === 'BR') - ) - ) { - lastThink.nextSibling.remove(); - } - } - - cleanupEmptyBlocks(container); + keepOnlyLastThink(container); } function renderBubbleContent(bubble, raw) { diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index dc496a1..8e157fb 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -4,6 +4,9 @@ declare(strict_types=1); namespace App\Agent; +use App\Commerce\CommerceReferenceResolver; +use App\Commerce\CommerceReferenceStore; +use App\Commerce\Dto\CommerceReferenceContext; use App\Commerce\SearchRepairService; use App\Commerce\ShopSearchService; use App\Config\AgentRunnerConfig; @@ -30,6 +33,8 @@ final readonly class AgentRunner private RetrieverInterface $retriever, private ShopSearchService $shopSearchService, private SearchRepairService $searchRepairService, + private CommerceReferenceStore $commerceReferenceStore, + private CommerceReferenceResolver $commerceReferenceResolver, private CommerceIntentLite $commerceIntentLite, private OllamaClient $ollamaClient, private LoggerInterface $agentLogger, @@ -52,7 +57,8 @@ final readonly class AgentRunner $shopResults = []; $primaryShopResults = []; - $sources = []; + $factSources = []; + $contextSignals = []; $optimizedShopQuery = ''; $shopSearchQuery = ''; $commerceIntent = CommerceIntentLite::NONE; @@ -60,6 +66,8 @@ final readonly class AgentRunner $attemptedShopRepair = false; $usedShopRepair = false; $shopRepairQueries = []; + $activeCommerceReference = null; + $shopChecked = false; $this->agentLogger->info('Agent run started', [ 'userId' => $userId, @@ -72,19 +80,18 @@ final readonly class AgentRunner } yield $this->systemMsg('Ich analysiere deine Anfrage...', 'think'); - yield $this->systemMsg('Ich prüfe auf Internetquellen...', 'think'); $urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt); if ($urlContent !== '') { - $this->addSource($sources, 'Externe URL'); + $this->addBadge($factSources, 'Externe URL'); } yield $this->systemMsg('Ich hole relevante Daten aus meinem RAG-Wissen...', 'think'); $knowledgeChunks = $this->retriever->retrieve($prompt); if ($knowledgeChunks !== []) { - $this->addSource($sources, 'RAG Wissen'); + $this->addBadge($factSources, 'RAG Wissen'); } $commerceIntent = $this->detectCommerceIntent($prompt); @@ -93,18 +100,53 @@ final readonly class AgentRunner yield $this->systemMsg('Ich optimiere die Recherche...', 'think'); $commerceHistoryContext = $this->buildCommerceHistoryContext($userId); + $activeCommerceReference = $this->loadCommerceReference($userId); if ($commerceHistoryContext !== '') { - $this->addSource($sources, 'Chatverlauf'); + $this->addBadge($contextSignals, 'Gesprächskontext'); } - $optimizedShopQuery = $this->buildOptimizedShopQuery( + if ($activeCommerceReference !== null) { + $this->addBadge($contextSignals, 'Commerce-Referenz'); + } + + $isReferenceOnlyFollowUp = $this->isReferenceOnlyCommerceFollowUp( $prompt, - $userId, - $commerceHistoryContext + $activeCommerceReference ); - $shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt; + if ($isReferenceOnlyFollowUp) { + $shopSearchQuery = $this->buildDeterministicReferenceShopQuery($activeCommerceReference); + + if ($shopSearchQuery !== '') { + $this->addBadge($contextSignals, 'Deterministische Referenzsuche'); + } + + $this->agentLogger->info('Using deterministic reference shop query', [ + 'userId' => $userId, + 'commerceIntent' => $commerceIntent, + 'prompt' => $prompt, + 'shopSearchQuery' => $shopSearchQuery, + 'referenceProductName' => $activeCommerceReference?->productName, + 'referenceFocusTerms' => $activeCommerceReference?->focusTerms, + ]); + } else { + $optimizedShopQuery = $this->buildOptimizedShopQuery( + $prompt, + $userId, + $commerceHistoryContext + ); + + if ($optimizedShopQuery !== '' && $optimizedShopQuery !== $prompt) { + $this->addBadge($contextSignals, 'Query-Optimierung'); + } + + $shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt; + } + + if ($shopSearchQuery === '') { + $shopSearchQuery = $prompt; + } $this->agentLogger->info('Commerce search prepared', [ 'userId' => $userId, @@ -112,8 +154,11 @@ final readonly class AgentRunner 'usedOptimizedShopQuery' => $optimizedShopQuery !== '', 'optimizedShopQuery' => $optimizedShopQuery, 'shopSearchQuery' => $shopSearchQuery, + 'usedDeterministicReferenceQuery' => $isReferenceOnlyFollowUp, 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), + 'hasActiveCommerceReference' => $activeCommerceReference !== null, + 'activeCommerceReferenceProduct' => $activeCommerceReference?->productName, ]); yield $this->systemMsg( @@ -121,11 +166,14 @@ final readonly class AgentRunner 'think' ); + $shopChecked = true; + $primaryShopResults = $this->searchShop( $shopSearchQuery, $commerceIntent, $userId, - $commerceHistoryContext + $commerceHistoryContext, + $activeCommerceReference ); $repairPayload = $this->repairShopResults( @@ -144,11 +192,13 @@ final readonly class AgentRunner $shopRepairQueries = $repairPayload['repairQueries']; if ($shopResults !== []) { - $this->addSource($sources, 'Shopsystem'); + $this->addBadge($factSources, 'Shopsystem'); + } elseif ($shopChecked) { + $this->addBadge($factSources, 'Shopsystem geprüft'); } if ($attemptedShopRepair) { - $this->addSource($sources, 'Erweiterte Shopsuche'); + $this->addBadge($contextSignals, 'Erweiterte Shopsuche'); } } @@ -176,6 +226,7 @@ final readonly class AgentRunner 'shopSearchQuery' => $shopSearchQuery, 'primaryShopResultsCount' => count($primaryShopResults), 'shopResultsCount' => count($shopResults), + 'shopChecked' => $shopChecked, 'attemptedShopRepair' => $attemptedShopRepair, 'usedShopRepair' => $usedShopRepair, 'shopRepairQueries' => $shopRepairQueries, @@ -192,14 +243,22 @@ final readonly class AgentRunner ]); } - if ($sources !== []) { - yield $this->emitSources($sources, 'Genutzte Quellen: '); + if ($factSources !== [] || $contextSignals !== []) { + yield $this->emitSourceSummary( + $factSources, + $contextSignals, + 'Genutzte Datenpfade' + ); } $fullOutput = yield from $this->streamFinalAnswer($finalPrompt); - if ($sources !== []) { - yield $this->emitSources($sources, 'Quellen: '); + if ($factSources !== [] || $contextSignals !== []) { + yield $this->emitSourceSummary( + $factSources, + $contextSignals, + 'Quellen und Signale' + ); } if ($this->debug) { @@ -207,10 +266,11 @@ final readonly class AgentRunner } if ($fullOutput !== '') { - $this->contextService->appendHistory( - $userId, - $prompt, - $fullOutput + $this->persistConversationState( + userId: $userId, + prompt: $prompt, + fullOutput: $fullOutput, + shopResults: $shopResults ); } @@ -221,6 +281,7 @@ final readonly class AgentRunner 'commerceIntent' => $commerceIntent, 'primaryShopResultsCount' => count($primaryShopResults), 'shopResultsCount' => count($shopResults), + 'shopChecked' => $shopChecked, 'attemptedShopRepair' => $attemptedShopRepair, 'usedShopRepair' => $usedShopRepair, 'shopRepairQueries' => $shopRepairQueries, @@ -231,6 +292,8 @@ final readonly class AgentRunner 'shopSearchQuery' => $shopSearchQuery, 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), + 'hasActiveCommerceReference' => $activeCommerceReference !== null, + 'activeCommerceReferenceProduct' => $activeCommerceReference?->productName, ]); } catch (Throwable $e) { $this->agentLogger->error('Agent run failed', [ @@ -298,6 +361,42 @@ final readonly class AgentRunner return $this->sanitizeOptimizedShopQuery($optimizedQuery); } + private function isReferenceOnlyCommerceFollowUp( + string $prompt, + ?CommerceReferenceContext $referenceContext + ): bool { + if ($referenceContext === null) { + return false; + } + + $normalizedPrompt = mb_strtolower(trim($prompt), 'UTF-8'); + $normalizedPrompt = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt; + $normalizedPrompt = preg_replace('/\s+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt; + $normalizedPrompt = trim($normalizedPrompt); + + if ($normalizedPrompt === '') { + return false; + } + + if (preg_match('/\b(testomat|lab|evo|eco|calc|thcl|808|2000)\b/u', $normalizedPrompt) === 1) { + return false; + } + + return preg_match( + '/\b(preis|preise|kosten|kostet|dazu|dafuer|dafür|davon|was kostet das|verfuegbarkeit|verfügbarkeit|shop|link)\b/u', + $normalizedPrompt + ) === 1; + } + + private function buildDeterministicReferenceShopQuery(?CommerceReferenceContext $referenceContext): string + { + if ($referenceContext === null) { + return ''; + } + + return trim($referenceContext->buildReferenceSearchText()); + } + /** * @return array{ * results: array, @@ -346,13 +445,15 @@ final readonly class AgentRunner string $query, string $commerceIntent, string $userId, - string $commerceHistoryContext = '' + string $commerceHistoryContext = '', + ?CommerceReferenceContext $referenceContext = null ): array { try { return $this->shopSearchService->search( $query, $commerceIntent, - $commerceHistoryContext + $commerceHistoryContext, + $referenceContext ); } catch (Throwable $e) { $this->agentLogger->warning('Shop search failed, continuing without shop results', [ @@ -361,6 +462,8 @@ final readonly class AgentRunner 'query' => $query, 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), + 'hasReferenceContext' => $referenceContext !== null, + 'referenceProductName' => $referenceContext?->productName, 'exception' => $e, ]); @@ -376,6 +479,64 @@ final readonly class AgentRunner ); } + private function loadCommerceReference(string $userId): ?CommerceReferenceContext + { + try { + return $this->commerceReferenceStore->load($userId); + } catch (Throwable $e) { + $this->agentLogger->warning('Failed to load commerce reference context', [ + 'userId' => $userId, + 'exception' => $e, + ]); + + return null; + } + } + + /** + * @param array $shopResults + */ + private function storeCommerceReference(string $userId, string $prompt, string $answer, array $shopResults): void + { + try { + $referenceContext = $this->commerceReferenceResolver->resolveFromCommerceTurn( + $prompt, + $answer, + $shopResults + ); + + if ($referenceContext === null) { + return; + } + + $this->commerceReferenceStore->save($userId, $referenceContext); + } catch (Throwable $e) { + $this->agentLogger->warning('Failed to persist commerce reference context', [ + 'userId' => $userId, + 'exception' => $e, + ]); + } + } + + /** + * @param array $shopResults + */ + private function persistConversationState( + string $userId, + string $prompt, + string $fullOutput, + array $shopResults + ): void { + $this->contextService->appendHistory($userId, $prompt, $fullOutput); + + $this->storeCommerceReference( + userId: $userId, + prompt: $prompt, + answer: $fullOutput, + shopResults: $shopResults + ); + } + private function limitKnowledgeChunks(array $knowledgeChunks, string $commerceIntent): array { return match ($commerceIntent) { @@ -447,22 +608,36 @@ final readonly class AgentRunner } /** - * @param string[] $sources + * @param string[] $factSources + * @param string[] $contextSignals */ - private function emitSources(array $sources, string $prefix): string + private function emitSourceSummary(array $factSources, array $contextSignals, string $label): string { - return $this->systemMsg($prefix . implode(' ', $sources), 'info'); + $parts = []; + + if ($factSources !== []) { + $parts[] = 'Fakten: ' . implode(' ', $factSources); + } + + if ($contextSignals !== []) { + $parts[] = 'Kontext: ' . implode(' ', $contextSignals); + } + + return $this->systemMsg( + $label . ': ' . implode('    ', $parts), + 'info' + ); } /** - * @param string[] $sources + * @param string[] $target */ - private function addSource(array &$sources, string $label): void + private function addBadge(array &$target, string $label): void { $badge = $this->badge($label); - if (!in_array($badge, $sources, true)) { - $sources[] = $badge; + if (!in_array($badge, $target, true)) { + $target[] = $badge; } } diff --git a/src/Agent/PromptBuilder.php b/src/Agent/PromptBuilder.php index 29af2c5..a680a53 100644 --- a/src/Agent/PromptBuilder.php +++ b/src/Agent/PromptBuilder.php @@ -14,7 +14,6 @@ use RuntimeException; final readonly class PromptBuilder { - public function __construct( private ContextService $contextService, private SystemPromptRepository $systemPromptRepository, @@ -49,12 +48,24 @@ final readonly class PromptBuilder $hasShopResults = $shopResults !== []; $isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt); + $isPriceDrivenQuestion = $this->isLikelyPriceDrivenQuestion($prompt); $systemBlock = $this->buildSystemBlock(); $shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut); - $outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults); - $responseFormatBlock = $this->buildResponseFormatBlock($prompt, $hasShopResults, $isTechnicalProductQuestion); - $knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults); + $outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults, $isPriceDrivenQuestion); + $responseFormatBlock = $this->buildResponseFormatBlock( + $prompt, + $hasShopResults, + $isTechnicalProductQuestion, + $isPriceDrivenQuestion + ); + $knowledgeBlock = $this->buildKnowledgeBlock( + $knowledgeChunks, + $urlContent, + $prompt, + $hasShopResults, + $isPriceDrivenQuestion + ); $userBlock = $this->buildUserBlock($prompt); $fixedPrompt = $this->implodeBlocks([ @@ -231,7 +242,9 @@ final readonly class PromptBuilder "Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" . "Do not infer undocumented technical specifications from shop data.\n" . "Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.\n" . - "Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item."; + "Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.\n" . + "If shop results only contain accessories, reagents, indicators, or consumables, do not conclude that no matching main device exists unless the sources explicitly support that conclusion.\n" . + "If the user asks for price filtering, use the numeric prices in these live shop results as the decisive source for filtering."; if ($totalCount > count($limitedShopResults)) { $header .= "\n" . @@ -247,12 +260,20 @@ final readonly class PromptBuilder /** * Build a small priority block that tells the model what to surface first. */ - private function buildOutputPriorityBlock(bool $hasShopResults): string + private function buildOutputPriorityBlock(bool $hasShopResults, bool $isPriceDrivenQuestion): string { if (!$hasShopResults) { return ''; } + if ($isPriceDrivenQuestion) { + return + "OUTPUT PRIORITY:\n" . + "For price-driven questions, evaluate shop results first for numeric price filtering.\n" . + "Use retrieved knowledge afterwards only to add technical context or explain missing commercial coverage.\n" . + "Do not let accessory-only shop results prove that no matching device exists unless the sources explicitly support that conclusion.\n"; + } + return "OUTPUT PRIORITY:\n" . "Use retrieved knowledge first to determine the technically matching product or answer.\n" . @@ -263,7 +284,8 @@ final readonly class PromptBuilder private function buildResponseFormatBlock( string $prompt, bool $hasShopResults, - bool $isTechnicalProductQuestion + bool $isTechnicalProductQuestion, + bool $isPriceDrivenQuestion ): string { $rules = [ "RESPONSE FORMAT RULES:", @@ -284,6 +306,8 @@ final readonly class PromptBuilder $rules[] = "- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product."; $rules[] = "- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device."; $rules[] = "- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results."; + $rules[] = "- If the question includes a price threshold, filter using only explicit numeric shop prices."; + $rules[] = "- Do not say that no device exists above a threshold merely because only cheaper accessories were found in the shop results."; } else { $rules[] = "- If no shop results are present, do not compensate by inventing external products or external manufacturers."; } @@ -294,6 +318,12 @@ final readonly class PromptBuilder $rules[] = "- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives."; } + if ($isPriceDrivenQuestion) { + $rules[] = "- For price-driven questions, answer the threshold result first."; + $rules[] = "- If no grounded shop product fulfills the threshold, say that clearly."; + $rules[] = "- Then optionally explain whether retrieved knowledge mentions relevant devices that are not commercially listed in the current shop results."; + } + if ($this->asksForAccessoryOrBundle($prompt)) { $rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory."; $rules[] = "- The main device must come first. The accessory must not replace the main device."; @@ -304,14 +334,13 @@ final readonly class PromptBuilder return implode("\n", $rules); } - /** - * Build the knowledge block. - * - * Retrieved knowledge remains the main source for technical matching and explanation. - * Shop data is preferred for current commercial fields. - */ - private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt, bool $hasShopResults): string - { + private function buildKnowledgeBlock( + array $knowledgeChunks, + string $urlContent, + string $prompt, + bool $hasShopResults, + bool $isPriceDrivenQuestion + ): string { $knowledgeParts = []; $isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt); @@ -334,7 +363,7 @@ final readonly class PromptBuilder "LANGUAGE RULES:\n" . implode("\n", $this->buildLanguageRules()), "FACT GROUNDING RULES:\n" . - implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults)), + implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults, $isPriceDrivenQuestion)), "RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" . "Source: Documents\n" . implode("\n\n", $lines), @@ -354,13 +383,6 @@ final readonly class PromptBuilder return $this->implodeBlocks($knowledgeParts); } - /** - * Resolve how many characters may still be used by history. - * - * The active model num_ctx is converted into a conservative prompt budget. - * Shop, knowledge and user question are fixed priority blocks. - * History only receives the remaining space. - */ private function resolveHistoryBudgetChars(string $fixedPrompt): int { $numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx(); @@ -407,8 +429,11 @@ final readonly class PromptBuilder /** * @return string[] */ - private function buildFactGroundingRules(bool $isTechnicalProductQuestion, bool $hasShopResults): array - { + private function buildFactGroundingRules( + bool $isTechnicalProductQuestion, + bool $hasShopResults, + bool $isPriceDrivenQuestion + ): array { $rules = [ "- State only facts that are explicitly present in the provided sources.", "- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.", @@ -437,6 +462,11 @@ final readonly class PromptBuilder "- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.", "- If the shop match is ambiguous, keep the technical identification and commercial details separate.", ]); + + if ($isPriceDrivenQuestion) { + $rules[] = "- For price-threshold questions, shop prices are authoritative for the threshold check."; + $rules[] = "- Accessory-only shop hits do not prove that no qualifying device exists."; + } } else { $rules[] = "- Use retrieved knowledge as authoritative for factual answers."; $rules[] = "- If no shop results are present, do not compensate with external recommendations or external product suggestions."; @@ -523,6 +553,20 @@ final readonly class PromptBuilder return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1; } + private function isLikelyPriceDrivenQuestion(string $prompt): bool + { + $normalized = mb_strtolower($prompt, 'UTF-8'); + + if (preg_match('/\b(mehr\s+als|über|ueber|größer\s+als|groesser\s+als|unter|bis|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*(?:euro|eur|€)\b/u', $normalized) === 1) { + return true; + } + + return str_contains($normalized, 'preis') + || str_contains($normalized, 'preise') + || str_contains($normalized, 'kosten') + || str_contains($normalized, 'kostet'); + } + private function asksForAccessoryOrBundle(string $prompt): bool { $normalized = mb_strtolower($prompt, 'UTF-8'); diff --git a/src/Commerce/CommerceQueryParser.php b/src/Commerce/CommerceQueryParser.php index f9e8b20..defc35f 100644 --- a/src/Commerce/CommerceQueryParser.php +++ b/src/Commerce/CommerceQueryParser.php @@ -4,6 +4,7 @@ declare(strict_types=1); namespace App\Commerce; +use App\Commerce\Dto\CommerceReferenceContext; use App\Commerce\Dto\CommerceSearchQuery; use App\Config\CommerceIntentConfig; use App\Config\CommerceQueryParserConfig; @@ -23,10 +24,12 @@ final readonly class CommerceQueryParser public function parse( string $originalPrompt, string $intent, - string $historyContext = '' + string $historyContext = '', + ?CommerceReferenceContext $referenceContext = null ): CommerceSearchQuery { $normalizedPrompt = $this->normalize($originalPrompt); $isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt); + $isReferenceOnlyFollowUp = $this->isReferenceOnlyFollowUp($normalizedPrompt); [$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt); $sizes = $this->extractSizes($normalizedPrompt); @@ -44,7 +47,7 @@ final readonly class CommerceQueryParser if ( !$isDirectProductQuery && $historyContext !== '' - && $this->shouldUseHistoryContext($normalizedPrompt) + && $this->shouldUseHistoryContext($normalizedPrompt, $searchText) ) { $latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext); @@ -73,7 +76,29 @@ final readonly class CommerceQueryParser } } - $finalSearchText = $searchText !== '' ? $searchText : $normalizedPrompt; + if ( + !$isDirectProductQuery + && $referenceContext !== null + && $this->shouldUseReferenceContext($normalizedPrompt, $searchText) + ) { + $referenceSearchText = $this->buildReferenceSearchText($referenceContext); + + if ($isReferenceOnlyFollowUp || $this->isTooGenericSearchText($searchText)) { + $searchText = $referenceSearchText !== '' ? $referenceSearchText : $searchText; + } else { + $searchText = $this->mergeSearchTexts($referenceSearchText, $searchText); + } + + if (($brand === null || $brand === '') && $referenceContext->manufacturer !== null) { + $normalizedManufacturer = $this->normalize($referenceContext->manufacturer); + + if ($normalizedManufacturer !== '') { + $brand = $normalizedManufacturer; + } + } + } + + $finalSearchText = trim($searchText !== '' ? $searchText : $normalizedPrompt); return new CommerceSearchQuery( originalPrompt: $originalPrompt, @@ -93,7 +118,7 @@ final readonly class CommerceQueryParser { $value = $this->textNormalizer->normalize($prompt); $value = $this->queryCleaner->clean($value); - $value = mb_strtolower(trim($value)); + $value = mb_strtolower(trim($value), 'UTF-8'); $value = str_replace(['€'], ' euro ', $value); $value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value; $value = preg_replace('/\s+/u', ' ', $value) ?? $value; @@ -126,6 +151,17 @@ final readonly class CommerceQueryParser $priceMin = $this->toFloat($m[1]); } + // NEW: + // Recognize comparative lower-bound phrasing such as: + // - mehr als 3000 euro + // - über 3000 euro + // - ueber 3000 euro + // - größer als 3000 euro + // - groesser als 3000 euro + if (preg_match('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) { + $priceMin = $this->toFloat($m[1]); + } + return [$priceMin, $priceMax]; } @@ -152,7 +188,10 @@ final readonly class CommerceQueryParser } } - return array_values(array_unique(array_filter($sizes, static fn($v) => $v !== ''))); + return array_values(array_unique(array_filter( + $sizes, + static fn(string $value): bool => $value !== '' + ))); } private function extractBrand(string $prompt): ?string @@ -184,6 +223,7 @@ final readonly class CommerceQueryParser foreach ($this->config->getPhrasesToRemove() as $phrase) { $normalizedPhrase = $this->normalize((string) $phrase); + if ($normalizedPhrase === '') { continue; } @@ -193,6 +233,7 @@ final readonly class CommerceQueryParser foreach ($sizes as $size) { $normalizedSize = $this->normalize((string) $size); + if ($normalizedSize === '') { continue; } @@ -207,6 +248,7 @@ final readonly class CommerceQueryParser if ($priceMin !== null || $priceMax !== null) { $text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; $text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; + $text = preg_replace('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; $text = preg_replace('/\b' . $this->intentConfig->getPricePattern() . '\b/u', ' ', $text) ?? $text; } @@ -219,14 +261,14 @@ final readonly class CommerceQueryParser ); $tokens = $this->filterSearchTokens($tokens); + $tokens = $this->stripReferenceOnlyTokens($tokens); return trim(implode(' ', $tokens)); } private function buildDirectProductSearchText(string $prompt): string { - $text = $prompt; - $text = preg_replace('/\s+/u', ' ', $text) ?? $text; + $text = preg_replace('/\s+/u', ' ', $prompt) ?? $prompt; $text = trim($text, " \t\n\r\0\x0B-.,"); $tokens = array_filter( @@ -234,17 +276,61 @@ final readonly class CommerceQueryParser static fn(string $token): bool => mb_strlen($token) > 0 ); - $tokens = array_values(array_unique($tokens)); - - return trim(implode(' ', $tokens)); + return trim(implode(' ', array_values(array_unique($tokens)))); } - private function shouldUseHistoryContext(string $prompt): bool + private function shouldUseHistoryContext(string $prompt, string $searchText): bool { - return preg_match( - '/\b(' . $this->config->getHistoryContextPattern() . ')\b/u', - $prompt - ) === 1; + if ($this->isReferenceOnlyFollowUp($prompt)) { + return true; + } + + if ($this->isTooGenericSearchText($searchText)) { + return true; + } + + return preg_match('/\b(' . $this->config->getHistoryContextPattern() . ')\b/u', $prompt) === 1; + } + + private function shouldUseReferenceContext(string $prompt, string $searchText): bool + { + if ($this->isReferenceOnlyFollowUp($prompt)) { + return true; + } + + return $this->isTooGenericSearchText($searchText); + } + + private function isReferenceOnlyFollowUp(string $prompt): bool + { + return preg_match('/\b(' . $this->config->getReferenceFollowUpPattern() . ')\b/u', $prompt) === 1; + } + + private function isTooGenericSearchText(string $searchText): bool + { + $tokens = array_values(array_filter( + preg_split('/\s+/u', $searchText, -1, PREG_SPLIT_NO_EMPTY) ?: [], + static fn(string $token): bool => $token !== '' + )); + + if ($tokens === []) { + return true; + } + + $genericTokens = array_fill_keys($this->config->getReferenceOnlyTokens(), true); + + foreach ($tokens as $token) { + if (!isset($genericTokens[$token])) { + return false; + } + } + + return true; + } + + private function buildReferenceSearchText(CommerceReferenceContext $referenceContext): string + { + return $this->normalize($referenceContext->buildReferenceSearchText()); } private function extractLatestQuestionFromHistory(string $historyContext): string @@ -256,6 +342,7 @@ final readonly class CommerceQueryParser } $questions = $matches[1] ?? []; + if ($questions === []) { return ''; } @@ -265,11 +352,11 @@ final readonly class CommerceQueryParser return is_string($lastQuestion) ? trim($lastQuestion) : ''; } - private function mergeSearchTexts(string $historySearchText, string $currentSearchText): string + private function mergeSearchTexts(string $left, string $right): string { $tokens = []; - foreach ([$historySearchText, $currentSearchText] as $text) { + foreach ([$left, $right] as $text) { if ($text === '') { continue; } @@ -294,11 +381,25 @@ final readonly class CommerceQueryParser */ private function filterSearchTokens(array $tokens): array { - $stopWords = $this->config->getFilterSearchTokensPattern(); + $stopWords = array_fill_keys($this->config->getFilterSearchTokensPattern(), true); return array_values(array_filter( $tokens, - static fn(string $token): bool => !in_array($token, $stopWords, true) + static fn(string $token): bool => !isset($stopWords[$token]) + )); + } + + /** + * @param string[] $tokens + * @return string[] + */ + private function stripReferenceOnlyTokens(array $tokens): array + { + $referenceOnly = array_fill_keys($this->config->getReferenceOnlyTokens(), true); + + return array_values(array_filter( + $tokens, + static fn(string $token): bool => !isset($referenceOnly[$token]) )); } @@ -318,11 +419,7 @@ final readonly class CommerceQueryParser $tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: []; - if (count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1) { - return true; - } - - return false; + return count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1; } private function containsModelLikePhrase(string $text): bool diff --git a/src/Commerce/CommerceReferenceResolver.php b/src/Commerce/CommerceReferenceResolver.php new file mode 100644 index 0000000..db163c9 --- /dev/null +++ b/src/Commerce/CommerceReferenceResolver.php @@ -0,0 +1,239 @@ +> $shopResults + */ + public function resolveFromCommerceTurn( + string $prompt, + string $answerText, + array $shopResults + ): ?CommerceReferenceContext { + $fromText = $this->resolveFromText($prompt, $answerText); + $fromShop = $this->resolveFromShopResults($prompt, $shopResults); + + if ($fromText !== null && $fromShop !== null && $this->areCompatibleProductNames($fromText->productName, $fromShop->productName)) { + return new CommerceReferenceContext( + productName: $fromShop->productName, + productNumber: $fromShop->productNumber, + manufacturer: $fromShop->manufacturer ?? $fromText->manufacturer, + url: $fromShop->url, + sourceType: 'shop', + confidence: 1.0, + resolvedFromPrompt: $fromText->resolvedFromPrompt ?? $fromShop->resolvedFromPrompt, + resolvedAt: (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM), + focusTerms: $this->mergeFocusTerms( + $fromText->focusTerms, + $fromShop->focusTerms + ), + ); + } + + if ($fromText !== null) { + return $fromText; + } + + return $fromShop; + } + + /** + * @param array> $shopResults + */ + private function resolveFromShopResults(string $prompt, array $shopResults): ?CommerceReferenceContext + { + $top = $shopResults[0] ?? null; + + if (!is_array($top)) { + return null; + } + + $name = $this->extractString($top, 'name'); + $productNumber = $this->extractString($top, 'productNumber'); + $manufacturer = $this->extractString($top, 'manufacturer'); + $url = $this->extractString($top, 'url'); + + if ($name === '') { + return null; + } + + return new CommerceReferenceContext( + productName: $name, + productNumber: $productNumber !== '' ? $productNumber : null, + manufacturer: $manufacturer !== '' ? $manufacturer : null, + url: $url !== '' ? $url : null, + sourceType: 'shop', + confidence: 1.0, + resolvedFromPrompt: trim($prompt) !== '' ? trim($prompt) : null, + resolvedAt: (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM), + focusTerms: $this->extractFocusTerms($prompt . "\n" . $name), + ); + } + + private function resolveFromText(string $prompt, string $answerText): ?CommerceReferenceContext + { + $text = trim($prompt . "\n" . $answerText); + + if ($text === '') { + return null; + } + + $patterns = [ + '/\b(Testomat\s+2000\s+THCL)\b/ui', + '/\b(Testomat\s+808)\b/ui', + '/\b(Testomat\s+EVO\s+TH)\b/ui', + '/\b(Testomat\s+EVO\s+CALC)\b/ui', + '/\b(Testomat\s+ECO\s+PLUS)\b/ui', + '/\b(Testomat\s+ECO\s+C)\b/ui', + '/\b(Testomat\s+ECO)\b/ui', + '/\b(Testomat\s+LAB\s+CL)\b/ui', + '/\b(Testomat\s+LAB\s+MONO)\b/ui', + '/\b(Testomat\s+2000)\b/ui', + ]; + + foreach ($patterns as $pattern) { + if (!preg_match($pattern, $text, $matches)) { + continue; + } + + $productName = trim((string) ($matches[1] ?? '')); + + if ($productName === '') { + continue; + } + + return new CommerceReferenceContext( + productName: $productName, + productNumber: null, + manufacturer: null, + url: null, + sourceType: 'conversation', + confidence: 0.8, + resolvedFromPrompt: trim($prompt) !== '' ? trim($prompt) : null, + resolvedAt: (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM), + focusTerms: $this->extractFocusTerms($text), + ); + } + + return null; + } + + /** + * @return string[] + */ + private function extractFocusTerms(string $text): array + { + $normalized = $this->normalizeText($text); + + if ($normalized === '') { + return []; + } + + $patterns = [ + 'indikator' => '/\bindikator(?:en)?\b/u', + 'indikatoren' => '/\bindikator(?:en)?\b/u', + 'reagenz' => '/\breagenz(?:ien)?\b/u', + 'reagenzien' => '/\breagenz(?:ien)?\b/u', + 'zubehör' => '/\bzubeh[oö]r\b/u', + 'ersatzteil' => '/\bersatzteile?\b/u', + 'ersatzteile' => '/\bersatzteile?\b/u', + 'service-set' => '/\bservice(?:\s|-)?set\b/u', + 'filter' => '/\bfilter\b/u', + 'pumpenkopf' => '/\bpumpenkopf\b/u', + 'motorblock' => '/\bmotorblock\b/u', + 'mehrwertpaket' => '/\bmehrwertpaket\b/u', + 'neotecmaster' => '/\bneotecmaster\b/u', + ]; + + $terms = []; + + foreach ($patterns as $canonical => $pattern) { + if (preg_match($pattern, $normalized) === 1) { + $terms[] = $canonical; + } + } + + return array_values(array_unique($terms)); + } + + /** + * @param string[] $left + * @param string[] $right + * @return string[] + */ + private function mergeFocusTerms(array $left, array $right): array + { + $merged = []; + + foreach ([$left, $right] as $list) { + foreach ($list as $item) { + if (!is_string($item)) { + continue; + } + + $item = trim($item); + + if ($item === '') { + continue; + } + + $merged[$item] = $item; + } + } + + return array_values($merged); + } + + private function areCompatibleProductNames(string $left, string $right): bool + { + $left = $this->normalizeName($left); + $right = $this->normalizeName($right); + + if ($left === '' || $right === '') { + return false; + } + + if ($left === $right) { + return true; + } + + return str_contains($left, $right) || str_contains($right, $left); + } + + private function normalizeName(string $value): string + { + $value = mb_strtolower(trim($value), 'UTF-8'); + $value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value; + $value = preg_replace('/\s+/u', ' ', $value) ?? $value; + + return trim($value); + } + + private function normalizeText(string $value): string + { + $value = mb_strtolower(trim($value), 'UTF-8'); + $value = preg_replace('/\s+/u', ' ', $value) ?? $value; + + return trim($value); + } + + /** + * @param array $row + */ + private function extractString(array $row, string $key): string + { + $value = $row[$key] ?? null; + + if (!is_string($value)) { + return ''; + } + + return trim($value); + } +} \ No newline at end of file diff --git a/src/Commerce/CommerceReferenceStore.php b/src/Commerce/CommerceReferenceStore.php new file mode 100644 index 0000000..345a88a --- /dev/null +++ b/src/Commerce/CommerceReferenceStore.php @@ -0,0 +1,99 @@ +directory = rtrim($projectDir, '/') . '/var/agent-commerce-context'; + + if (!is_dir($this->directory) && !mkdir($this->directory, 0775, true) && !is_dir($this->directory)) { + throw new \RuntimeException(sprintf( + 'Failed to create commerce reference directory: %s', + $this->directory + )); + } + } + + public function load(string $userId): ?CommerceReferenceContext + { + $path = $this->getPath($userId); + + if (!is_file($path)) { + return null; + } + + $content = file_get_contents($path); + + if ($content === false || trim($content) === '') { + return null; + } + + $decoded = json_decode($content, true); + + if (!is_array($decoded)) { + return null; + } + + return CommerceReferenceContext::fromArray($decoded); + } + + public function save(string $userId, CommerceReferenceContext $context): void + { + $path = $this->getPath($userId); + $tmpPath = $path . '.tmp'; + + $json = json_encode( + $context->toArray(), + JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_INVALID_UTF8_SUBSTITUTE + ); + + if (!is_string($json)) { + throw new \RuntimeException('Failed to encode commerce reference context.'); + } + + if (file_put_contents($tmpPath, $json, LOCK_EX) === false) { + throw new \RuntimeException(sprintf( + 'Failed to write commerce reference context: %s', + $tmpPath + )); + } + + if (!rename($tmpPath, $path)) { + @unlink($tmpPath); + + throw new \RuntimeException(sprintf( + 'Failed to move commerce reference context into place: %s', + $path + )); + } + } + + public function clear(string $userId): void + { + $path = $this->getPath($userId); + + if (is_file($path)) { + @unlink($path); + } + } + + private function getPath(string $userId): string + { + $safeUserId = preg_replace('/[^a-zA-Z0-9_-]/', '_', trim($userId)); + $safeUserId = is_string($safeUserId) ? trim($safeUserId, '_') : ''; + + if ($safeUserId === '') { + throw new \InvalidArgumentException('User id must not be empty.'); + } + + return $this->directory . '/' . $safeUserId . '.json'; + } +} \ No newline at end of file diff --git a/src/Commerce/Dto/CommerceReferenceContext.php b/src/Commerce/Dto/CommerceReferenceContext.php new file mode 100644 index 0000000..45fd920 --- /dev/null +++ b/src/Commerce/Dto/CommerceReferenceContext.php @@ -0,0 +1,149 @@ + $payload + */ + public static function fromArray(array $payload): ?self + { + $productName = self::normalizeNullableString($payload['productName'] ?? null); + + if ($productName === null) { + return null; + } + + return new self( + productName: $productName, + productNumber: self::normalizeNullableString($payload['productNumber'] ?? null), + manufacturer: self::normalizeNullableString($payload['manufacturer'] ?? null), + url: self::normalizeNullableString($payload['url'] ?? null), + sourceType: self::normalizeNullableString($payload['sourceType'] ?? null) ?? 'conversation', + confidence: isset($payload['confidence']) && is_numeric($payload['confidence']) ? (float) $payload['confidence'] : 0.0, + resolvedFromPrompt: self::normalizeNullableString($payload['resolvedFromPrompt'] ?? null), + resolvedAt: self::normalizeNullableString($payload['resolvedAt'] ?? null), + focusTerms: self::normalizeStringList($payload['focusTerms'] ?? []), + ); + } + + /** + * @return array + */ + public function toArray(): array + { + return [ + 'productName' => $this->productName, + 'productNumber' => $this->productNumber, + 'manufacturer' => $this->manufacturer, + 'url' => $this->url, + 'sourceType' => $this->sourceType, + 'confidence' => $this->confidence, + 'resolvedFromPrompt' => $this->resolvedFromPrompt, + 'resolvedAt' => $this->resolvedAt, + 'focusTerms' => $this->focusTerms, + ]; + } + + public function hasStrongIdentity(): bool + { + return $this->productNumber !== null || $this->confidence >= 0.8; + } + + public function buildReferenceSearchText(): string + { + $parts = []; + + if ($this->productName !== '') { + $parts[] = $this->productName; + } + + if ( + $this->productNumber !== null + && $this->productNumber !== '' + && stripos($this->productName, $this->productNumber) === false + ) { + $parts[] = $this->productNumber; + } + + foreach ($this->focusTerms as $focusTerm) { + if ($focusTerm === '') { + continue; + } + + $alreadyIncluded = false; + + foreach ($parts as $part) { + if (stripos($part, $focusTerm) !== false) { + $alreadyIncluded = true; + break; + } + } + + if (!$alreadyIncluded) { + $parts[] = $focusTerm; + } + } + + return trim(implode(' ', $parts)); + } + + private static function normalizeNullableString(mixed $value): ?string + { + if (!is_string($value)) { + return null; + } + + $value = trim($value); + + return $value !== '' ? $value : null; + } + + /** + * @param mixed $value + * @return string[] + */ + private static function normalizeStringList(mixed $value): array + { + if (!is_array($value)) { + return []; + } + + $out = []; + + foreach ($value as $item) { + if (!is_string($item)) { + continue; + } + + $item = trim($item); + + if ($item === '') { + continue; + } + + $out[] = $item; + } + + return array_values(array_unique($out)); + } +} \ No newline at end of file diff --git a/src/Commerce/ShopSearchService.php b/src/Commerce/ShopSearchService.php index 0cd0bb9..671b836 100644 --- a/src/Commerce/ShopSearchService.php +++ b/src/Commerce/ShopSearchService.php @@ -1,7 +1,10 @@ -enabled) { - $this->logger->info('Shop search skipped because commerce search is disabled', ['commerceIntent' => $commerceIntent,]); + $this->logger->info('Shop search skipped because commerce search is disabled', [ + 'commerceIntent' => $commerceIntent, + ]); + return []; } - $primaryQuery = $this->queryParser->parse($originalPrompt, $commerceIntent, $commerceHistoryContext); - $this->logger->info('Shop search started', ['commerceIntent' => $commerceIntent, 'originalPrompt' => $originalPrompt, 'normalizedPrompt' => $primaryQuery->normalizedPrompt, 'searchText' => $primaryQuery->searchText, 'brand' => $primaryQuery->brand, 'sizes' => $primaryQuery->sizes, 'priceMin' => $primaryQuery->priceMin, 'priceMax' => $primaryQuery->priceMax, 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), 'criteriaLimit' => $this->maxResults,]); - $rankedProducts = $this->executeSearch($primaryQuery, $commerceIntent, $originalPrompt, true); + + $primaryQuery = $this->queryParser->parse( + $originalPrompt, + $commerceIntent, + $commerceHistoryContext, + $referenceContext + ); + + $focusMode = $this->determineFocusMode( + originalPrompt: $originalPrompt, + referenceContext: $referenceContext + ); + + $this->logger->info('Shop search started', [ + 'commerceIntent' => $commerceIntent, + 'originalPrompt' => $originalPrompt, + 'normalizedPrompt' => $primaryQuery->normalizedPrompt, + 'searchText' => $primaryQuery->searchText, + 'brand' => $primaryQuery->brand, + 'sizes' => $primaryQuery->sizes, + 'priceMin' => $primaryQuery->priceMin, + 'priceMax' => $primaryQuery->priceMax, + 'focusMode' => $focusMode, + 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', + 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), + 'hasReferenceContext' => $referenceContext !== null, + 'referenceProductName' => $referenceContext?->productName, + 'referenceFocusTerms' => $referenceContext?->focusTerms, + 'criteriaLimit' => $this->maxResults, + ]); + + $referenceProbeResults = $this->probeReferenceContext( + originalPrompt: $originalPrompt, + commerceIntent: $commerceIntent, + referenceContext: $referenceContext + ); + + $rankedProducts = $this->executeSearch( + $primaryQuery, + $commerceIntent, + $originalPrompt, + true + ); + if ($rankedProducts === [] && $commerceHistoryContext !== '') { - $fallbackQuery = $this->queryParser->parse($originalPrompt, $commerceIntent, ''); - $this->logger->info('Shop search retry without commerce history context', ['commerceIntent' => $commerceIntent, 'originalPrompt' => $originalPrompt, 'normalizedPrompt' => $fallbackQuery->normalizedPrompt, 'searchText' => $fallbackQuery->searchText, 'brand' => $fallbackQuery->brand, 'sizes' => $fallbackQuery->sizes, 'priceMin' => $fallbackQuery->priceMin, 'priceMax' => $fallbackQuery->priceMax,]); - $rankedProducts = $this->executeSearch($fallbackQuery, $commerceIntent, $originalPrompt, false); + $fallbackQuery = $this->queryParser->parse( + $originalPrompt, + $commerceIntent, + '', + $referenceContext + ); + + $this->logger->info('Shop search retry without commerce history context', [ + 'commerceIntent' => $commerceIntent, + 'originalPrompt' => $originalPrompt, + 'normalizedPrompt' => $fallbackQuery->normalizedPrompt, + 'searchText' => $fallbackQuery->searchText, + 'brand' => $fallbackQuery->brand, + 'sizes' => $fallbackQuery->sizes, + 'priceMin' => $fallbackQuery->priceMin, + 'priceMax' => $fallbackQuery->priceMax, + 'focusMode' => $focusMode, + 'hasReferenceContext' => $referenceContext !== null, + 'referenceProductName' => $referenceContext?->productName, + 'referenceFocusTerms' => $referenceContext?->focusTerms, + ]); + + $rankedProducts = $this->executeSearch( + $fallbackQuery, + $commerceIntent, + $originalPrompt, + false + ); } - $this->logger->info('Shop search finished', ['commerceIntent' => $commerceIntent, 'originalPrompt' => $originalPrompt, 'rankedProductsCount' => count($rankedProducts), 'topProducts' => array_map(static fn(ShopProductResult $product): array => ['name' => $product->name, 'productNumber' => $product->productNumber, 'manufacturer' => $product->manufacturer, 'available' => $product->available,], array_slice($rankedProducts, 0, 3)),]); - return $rankedProducts; + + $finalProducts = $this->mergeRankedProductLists( + $referenceProbeResults, + $rankedProducts, + $primaryQuery + ); + + $finalProducts = $this->applyFocusGuardrails( + products: $finalProducts, + focusMode: $focusMode, + originalPrompt: $originalPrompt, + referenceContext: $referenceContext + ); + + $finalProducts = $this->applyPriceFilters( + products: $finalProducts, + query: $primaryQuery + ); + + $this->logger->info('Shop search finished', [ + 'commerceIntent' => $commerceIntent, + 'originalPrompt' => $originalPrompt, + 'focusMode' => $focusMode, + 'referenceProbeResultsCount' => count($referenceProbeResults), + 'rankedProductsCount' => count($finalProducts), + 'topProducts' => array_map( + static fn(ShopProductResult $product): array => [ + 'name' => $product->name, + 'productNumber' => $product->productNumber, + 'manufacturer' => $product->manufacturer, + 'available' => $product->available, + 'price' => $product->price, + ], + array_slice($finalProducts, 0, 3) + ), + ]); + + return $finalProducts; } - /** * @return ShopProductResult[] */ - private function executeSearch(CommerceSearchQuery $query, string $commerceIntent, string $originalPrompt, bool $usesHistoryContext): array + /** + * @return ShopProductResult[] + */ + private function probeReferenceContext( + string $originalPrompt, + string $commerceIntent, + ?CommerceReferenceContext $referenceContext + ): array { + if ($referenceContext === null) { + return []; + } + + $probeQueries = $this->buildReferenceProbeQueries($referenceContext); + + if ($probeQueries === []) { + return []; + } + + $allResults = []; + + foreach ($probeQueries as $referenceSearchText) { + $probeQuery = new CommerceSearchQuery( + originalPrompt: $originalPrompt, + normalizedPrompt: mb_strtolower($referenceSearchText, 'UTF-8'), + searchText: $referenceSearchText, + brand: $referenceContext->manufacturer, + sizes: [], + properties: [], + priceMin: null, + priceMax: null, + intent: $commerceIntent, + needsLlmFallback: false, + ); + + $this->logger->info('Shop search reference probe', [ + 'originalPrompt' => $originalPrompt, + 'commerceIntent' => $commerceIntent, + 'referenceSearchText' => $referenceSearchText, + 'referenceProductName' => $referenceContext->productName, + 'referenceProductNumber' => $referenceContext->productNumber, + 'referenceFocusTerms' => $referenceContext->focusTerms, + ]); + + $results = $this->executeSearch( + $probeQuery, + $commerceIntent, + $originalPrompt, + false + ); + + if ($results !== []) { + $allResults = array_merge($allResults, $results); + } + } + + if ($allResults === []) { + return []; + } + + $baseQuery = new CommerceSearchQuery( + originalPrompt: $originalPrompt, + normalizedPrompt: mb_strtolower($referenceContext->buildReferenceSearchText(), 'UTF-8'), + searchText: $referenceContext->buildReferenceSearchText(), + brand: $referenceContext->manufacturer, + sizes: [], + properties: [], + priceMin: null, + priceMax: null, + intent: $commerceIntent, + needsLlmFallback: false, + ); + + return $this->rerankProducts( + $this->deduplicateProducts($allResults), + $baseQuery + ); + } + + /** + * @return string[] + */ + private function buildReferenceProbeQueries(CommerceReferenceContext $referenceContext): array { + $queries = []; + + $baseProduct = trim($referenceContext->productName); + $baseSearch = trim($referenceContext->buildReferenceSearchText()); + + if ($baseSearch !== '') { + $queries[] = $baseSearch; + } + + if ($baseProduct !== '') { + $queries[] = $baseProduct; + } + + foreach ($referenceContext->focusTerms as $focusTerm) { + if (!is_string($focusTerm)) { + continue; + } + + $focusTerm = trim($focusTerm); + + if ($focusTerm === '') { + continue; + } + + if ($baseProduct !== '') { + $queries[] = trim($baseProduct . ' ' . $focusTerm); + } + + foreach ($this->expandFocusTermVariants($focusTerm) as $variant) { + if ($variant === '') { + continue; + } + + if ($baseProduct !== '') { + $queries[] = trim($baseProduct . ' ' . $variant); + } + + if ($referenceContext->productNumber !== null && $referenceContext->productNumber !== '') { + $queries[] = trim($baseProduct . ' ' . $referenceContext->productNumber . ' ' . $variant); + } + } + } + + $queries = array_map( + fn(string $value): string => $this->normalizeForMatching($value), + $queries + ); + + $queries = array_values(array_unique(array_filter( + $queries, + static fn(string $value): bool => $value !== '' + ))); + + return $queries; + } + + /** + * @return string[] + */ + private function expandFocusTermVariants(string $focusTerm): array + { + $normalized = $this->normalizeForMatching($focusTerm); + + $variants = [$normalized]; + + $map = [ + 'indikator' => ['indikator', 'indikatoren'], + 'indikatoren' => ['indikator', 'indikatoren'], + 'reagenz' => ['reagenz', 'reagenzien'], + 'reagenzien' => ['reagenz', 'reagenzien'], + 'ersatzteil' => ['ersatzteil', 'ersatzteile'], + 'ersatzteile' => ['ersatzteil', 'ersatzteile'], + 'service set' => ['service set', 'serviceset', 'service-set'], + 'serviceset' => ['service set', 'serviceset', 'service-set'], + ]; + + if (isset($map[$normalized])) { + $variants = array_merge($variants, $map[$normalized]); + } + + return array_values(array_unique(array_filter( + array_map(fn(string $value): string => $this->normalizeForMatching($value), $variants), + static fn(string $value): bool => $value !== '' + ))); + } + + /** + * @return ShopProductResult[] + */ + private function executeSearch( + CommerceSearchQuery $query, + string $commerceIntent, + string $originalPrompt, + bool $usesHistoryContext + ): array { $criteria = $this->criteriaBuilder->build($query, $this->maxResults); + try { $response = $this->storeApiClient->searchProducts($criteria); - } catch (ClientExceptionInterface|RedirectionExceptionInterface|ServerExceptionInterface|TransportExceptionInterface $e) { - $this->logger->warning('Shop search request failed', ['commerceIntent' => $commerceIntent, 'originalPrompt' => $originalPrompt, 'normalizedPrompt' => $query->normalizedPrompt, 'searchText' => $query->searchText, 'brand' => $query->brand, 'sizes' => $query->sizes, 'priceMin' => $query->priceMin, 'priceMax' => $query->priceMax, 'usesHistoryContext' => $usesHistoryContext, 'criteria' => $criteria, 'exceptionClass' => $e::class, 'exceptionMessage' => $e->getMessage(),]); + } catch ( + ClientExceptionInterface | + RedirectionExceptionInterface | + ServerExceptionInterface | + TransportExceptionInterface | + \RuntimeException $e + ) { + $this->logger->warning('Shop search request failed', [ + 'commerceIntent' => $commerceIntent, + 'originalPrompt' => $originalPrompt, + 'normalizedPrompt' => $query->normalizedPrompt, + 'searchText' => $query->searchText, + 'brand' => $query->brand, + 'sizes' => $query->sizes, + 'priceMin' => $query->priceMin, + 'priceMax' => $query->priceMax, + 'usesHistoryContext' => $usesHistoryContext, + 'exceptionClass' => $e::class, + 'exceptionMessage' => $e->getMessage(), + ]); + return []; } + $mappedProducts = $this->mapProducts($response); $rankedProducts = $this->rerankProducts($mappedProducts, $query); - $this->logger->info('Shop search request finished', ['commerceIntent' => $commerceIntent, 'originalPrompt' => $originalPrompt, 'normalizedPrompt' => $query->normalizedPrompt, 'searchText' => $query->searchText, 'brand' => $query->brand, 'sizes' => $query->sizes, 'priceMin' => $query->priceMin, 'priceMax' => $query->priceMax, 'usesHistoryContext' => $usesHistoryContext, 'rawElementsCount' => is_array($response['elements'] ?? null) ? count($response['elements']) : 0, 'mappedProductsCount' => count($mappedProducts), 'rankedProductsCount' => count($rankedProducts), 'topProducts' => array_map(static fn(ShopProductResult $product): array => ['name' => $product->name, 'productNumber' => $product->productNumber, 'manufacturer' => $product->manufacturer, 'available' => $product->available,], array_slice($rankedProducts, 0, 3)),]); + + $this->logger->info('Shop search request finished', [ + 'commerceIntent' => $commerceIntent, + 'originalPrompt' => $originalPrompt, + 'normalizedPrompt' => $query->normalizedPrompt, + 'searchText' => $query->searchText, + 'brand' => $query->brand, + 'sizes' => $query->sizes, + 'priceMin' => $query->priceMin, + 'priceMax' => $query->priceMax, + 'usesHistoryContext' => $usesHistoryContext, + 'rawElementsCount' => is_array($response['elements'] ?? null) ? count($response['elements']) : 0, + 'mappedProductsCount' => count($mappedProducts), + 'rankedProductsCount' => count($rankedProducts), + ]); + return $rankedProducts; } - /** * @return ShopProductResult[] */ + /** + * @param ShopProductResult[] $referenceProbeResults + * @param ShopProductResult[] $rankedProducts + * @return ShopProductResult[] + */ + private function mergeRankedProductLists( + array $referenceProbeResults, + array $rankedProducts, + CommerceSearchQuery $primaryQuery + ): array { + if ($referenceProbeResults === [] && $rankedProducts === []) { + return []; + } + + $merged = $this->deduplicateProducts(array_merge($referenceProbeResults, $rankedProducts)); + + return $this->rerankProducts($merged, $primaryQuery); + } + + private function determineFocusMode( + string $originalPrompt, + ?CommerceReferenceContext $referenceContext + ): string { + $normalizedPrompt = $this->normalizeForMatching($originalPrompt); + + if ($this->containsAnyKeyword($normalizedPrompt, [ + 'geräte', + 'geraete', + 'gerät', + 'geraet', + 'analysegerät', + 'analysegeraet', + 'messgerät', + 'messgeraet', + 'analysator', + 'controller', + 'monitor', + ])) { + return self::FOCUS_DEVICE; + } + + if ($this->containsAnyKeyword($normalizedPrompt, [ + 'indikator', + 'indikatoren', + 'reagenz', + 'reagenzien', + 'zubehör', + 'zubehor', + 'ersatzteil', + 'ersatzteile', + 'verbrauchsmaterial', + 'service set', + 'serviceset', + 'filter', + 'pumpenkopf', + 'motorblock', + ])) { + return self::FOCUS_ACCESSORY; + } + + foreach ($referenceContext?->focusTerms ?? [] as $focusTerm) { + if (!is_string($focusTerm)) { + continue; + } + + $normalizedFocusTerm = $this->normalizeForMatching($focusTerm); + + if ($normalizedFocusTerm === '') { + continue; + } + + if ($this->isAccessoryFocusToken($normalizedFocusTerm)) { + return self::FOCUS_ACCESSORY; + } + } + + return self::FOCUS_NEUTRAL; + } + + /** + * @param ShopProductResult[] $products + * @return ShopProductResult[] + */ + private function applyFocusGuardrails( + array $products, + string $focusMode, + string $originalPrompt, + ?CommerceReferenceContext $referenceContext + ): array { + if ($products === []) { + return []; + } + + return match ($focusMode) { + self::FOCUS_ACCESSORY => $this->filterForAccessoryFocus( + products: $products, + originalPrompt: $originalPrompt, + referenceContext: $referenceContext + ), + self::FOCUS_DEVICE => $this->filterForDeviceFocus( + products: $products, + originalPrompt: $originalPrompt + ), + default => $products, + }; + } + + /** + * @param ShopProductResult[] $products + * @return ShopProductResult[] + */ + private function filterForAccessoryFocus( + array $products, + string $originalPrompt, + ?CommerceReferenceContext $referenceContext + ): array { + $normalizedPrompt = $this->normalizeForMatching($originalPrompt); + $focusTerms = $this->extractAccessoryFocusTerms($normalizedPrompt, $referenceContext); + + if ($focusTerms === []) { + return $products; + } + + $accessoryMatches = []; + $deviceMatches = []; + $neutralMatches = []; + + foreach ($products as $product) { + $isAccessoryLike = $this->isAccessoryLikeProduct($product); + $isDeviceLike = $this->isDeviceLikeProduct($product); + $matchesFocus = $this->productMatchesAnyFocusTerm($product, $focusTerms); + + if ($matchesFocus && $isAccessoryLike) { + $accessoryMatches[] = $product; + continue; + } + + if ($matchesFocus) { + $neutralMatches[] = $product; + continue; + } + + if ($isDeviceLike && !$isAccessoryLike) { + $deviceMatches[] = $product; + continue; + } + + $neutralMatches[] = $product; + } + + if ($accessoryMatches !== []) { + $filtered = array_merge($accessoryMatches, $neutralMatches); + + $this->logger->info('Accessory focus guardrail kept focused accessory-like results', [ + 'originalPrompt' => $originalPrompt, + 'focusTerms' => $focusTerms, + 'beforeCount' => count($products), + 'afterCount' => count($filtered), + ]); + + return $filtered; + } + + if ($deviceMatches !== [] && $neutralMatches === []) { + $this->logger->info('Accessory focus guardrail suppressed device-only results', [ + 'originalPrompt' => $originalPrompt, + 'focusTerms' => $focusTerms, + 'suppressedDeviceCount' => count($deviceMatches), + ]); + + return []; + } + + return $neutralMatches !== [] ? $neutralMatches : $products; + } + + /** + * @param ShopProductResult[] $products + * @return ShopProductResult[] + */ + private function filterForDeviceFocus(array $products, string $originalPrompt): array + { + $deviceMatches = []; + $neutralMatches = []; + $accessoryMatches = []; + + foreach ($products as $product) { + $isAccessoryLike = $this->isAccessoryLikeProduct($product); + $isDeviceLike = $this->isDeviceLikeProduct($product); + + if ($isDeviceLike && !$isAccessoryLike) { + $deviceMatches[] = $product; + continue; + } + + if ($isAccessoryLike && !$isDeviceLike) { + $accessoryMatches[] = $product; + continue; + } + + $neutralMatches[] = $product; + } + + if ($deviceMatches !== []) { + $filtered = array_merge($deviceMatches, $neutralMatches); + + $this->logger->info('Device focus guardrail kept device-like results', [ + 'originalPrompt' => $originalPrompt, + 'beforeCount' => count($products), + 'afterCount' => count($filtered), + ]); + + return $filtered; + } + + if ($accessoryMatches !== [] && $neutralMatches === []) { + $this->logger->info('Device focus guardrail suppressed accessory-only results', [ + 'originalPrompt' => $originalPrompt, + 'suppressedAccessoryCount' => count($accessoryMatches), + ]); + + return []; + } + + return $neutralMatches !== [] ? $neutralMatches : $products; + } + + /** + * @param ShopProductResult[] $products + * @return ShopProductResult[] + */ + private function applyPriceFilters(array $products, CommerceSearchQuery $query): array + { + if ($products === []) { + return []; + } + + if ($query->priceMin === null && $query->priceMax === null) { + return $products; + } + + $filtered = []; + + foreach ($products as $product) { + $price = $this->extractNumericPrice($product->price); + + if ($price === null) { + continue; + } + + if ($query->priceMin !== null && $price < $query->priceMin) { + continue; + } + + if ($query->priceMax !== null && $price > $query->priceMax) { + continue; + } + + $filtered[] = $product; + } + + return $filtered; + } + + /** + * @return string[] + */ + private function extractAccessoryFocusTerms(string $normalizedPrompt, ?CommerceReferenceContext $referenceContext): array + { + $terms = []; + + foreach ($referenceContext?->focusTerms ?? [] as $focusTerm) { + if (!is_string($focusTerm)) { + continue; + } + + $normalized = $this->normalizeForMatching($focusTerm); + + if ($normalized !== '' && $this->isAccessoryFocusToken($normalized)) { + $terms[$normalized] = $normalized; + } + } + + foreach ([ + 'indikator', + 'indikatoren', + 'reagenz', + 'reagenzien', + 'zubehor', + 'zubehör', + 'ersatzteil', + 'ersatzteile', + 'verbrauchsmaterial', + 'service set', + 'serviceset', + 'filter', + 'pumpenkopf', + 'motorblock', + ] as $candidate) { + $normalizedCandidate = $this->normalizeForMatching($candidate); + + if ($normalizedCandidate !== '' && str_contains($normalizedPrompt, $normalizedCandidate)) { + $terms[$normalizedCandidate] = $normalizedCandidate; + } + } + + return array_values($terms); + } + + private function isAccessoryFocusToken(string $token): bool + { + foreach ([ + 'indikator', + 'indikatoren', + 'reagenz', + 'reagenzien', + 'zubehor', + 'zubehör', + 'ersatzteil', + 'ersatzteile', + 'verbrauchsmaterial', + 'service set', + 'serviceset', + 'filter', + 'pumpenkopf', + 'motorblock', + ] as $candidate) { + if ($token === $this->normalizeForMatching($candidate)) { + return true; + } + } + + return false; + } + + private function productMatchesAnyFocusTerm(ShopProductResult $product, array $focusTerms): bool + { + if ($focusTerms === []) { + return false; + } + + $corpus = $this->buildNormalizedProductCorpus($product); + + foreach ($focusTerms as $focusTerm) { + if ($focusTerm === '') { + continue; + } + + if (str_contains($corpus, $focusTerm)) { + return true; + } + } + + return false; + } + + /** + * @param string[] $keywords + */ + private function containsAnyKeyword(string $text, array $keywords): bool + { + foreach ($keywords as $keyword) { + $normalizedKeyword = $this->normalizeForMatching($keyword); + + if ($normalizedKeyword !== '' && str_contains($text, $normalizedKeyword)) { + return true; + } + } + + return false; + } + + private function extractNumericPrice(?string $price): ?float + { + if ($price === null) { + return null; + } + + $normalized = str_replace(['€', ' '], '', $price); + $normalized = str_replace('.', '', $normalized); + $normalized = str_replace(',', '.', $normalized); + + return is_numeric($normalized) ? (float) $normalized : null; + } + + /** + * @param array $response + * @return ShopProductResult[] + */ private function mapProducts(array $response): array { $elements = $response['elements'] ?? []; + if (!is_array($elements)) { return []; } + $results = []; + foreach ($elements as $row) { if (!is_array($row)) { continue; } + $relativeUrl = $this->extractUrl($row); - $results[] = new ShopProductResult(id: (string)($row['id'] ?? ''), name: trim((string)($row['translated']['name'] ?? '')), productNumber: isset($row['productNumber']) ? (string)$row['productNumber'] : null, manufacturer: $this->extractManufacturer($row), price: $this->extractPrice($row), available: isset($row['available']) ? (bool)$row['available'] : null, url: $this->buildAbsoluteUrl($relativeUrl), highlights: $this->extractHighlights($row), description: $this->cleanUpDescription($row), productImage: $row['cover']['media']['thumbnails'][0]['url'] ?? 'no-image', customFields: $this->getRelevantCustomFields($row['customFields'] ?? [])); + + $results[] = new ShopProductResult( + id: (string) ($row['id'] ?? ''), + name: trim((string) ($row['translated']['name'] ?? '')), + productNumber: isset($row['productNumber']) ? (string) $row['productNumber'] : null, + manufacturer: $this->extractManufacturer($row), + price: $this->extractPrice($row), + available: isset($row['available']) ? (bool) $row['available'] : null, + url: $this->buildAbsoluteUrl($relativeUrl), + highlights: $this->extractHighlights($row), + description: $this->cleanUpDescription($row), + productImage: $row['cover']['media']['thumbnails'][0]['url'] ?? 'no-image', + customFields: $this->getRelevantCustomFields($row['customFields'] ?? []) + ); } - $results = array_values(array_filter($results, static fn(ShopProductResult $product): bool => $product->name !== '')); + + $results = array_values(array_filter( + $results, + static fn(ShopProductResult $product): bool => $product->name !== '' + )); + return $this->deduplicateProducts($results); } - /** * @param ShopProductResult[] $products * @return ShopProductResult[] */ + /** + * @param ShopProductResult[] $products + * @return ShopProductResult[] + */ private function rerankProducts(array $products, CommerceSearchQuery $query): array { if (count($products) <= 1) { return $products; } + $decorated = []; + foreach ($products as $index => $product) { - $decorated[] = ['index' => $index, 'score' => $this->scoreProduct($product, $query), 'product' => $product,]; + $decorated[] = [ + 'index' => $index, + 'score' => $this->scoreProduct($product, $query), + 'product' => $product, + ]; } + usort($decorated, static function (array $a, array $b): int { if ($a['score'] === $b['score']) { return $a['index'] <=> $b['index']; } + return $b['score'] <=> $a['score']; }); - return array_values(array_map(static fn(array $entry): ShopProductResult => $entry['product'], $decorated)); + + return array_values(array_map( + static fn(array $entry): ShopProductResult => $entry['product'], + $decorated + )); } private function scoreProduct(ShopProductResult $product, CommerceSearchQuery $query): int { $score = 0; - $normalizedPrompt = $this->normalizeForMatching($query->normalizedPrompt !== '' ? $query->normalizedPrompt : $query->originalPrompt); + $normalizedPrompt = $this->normalizeForMatching( + $query->normalizedPrompt !== '' ? $query->normalizedPrompt : $query->originalPrompt + ); $normalizedSearchText = $this->normalizeForMatching($query->searchText); - $normalizedBrand = $this->normalizeForMatching((string)($query->brand ?? '')); - $normalizedSizes = array_values(array_filter(array_map(fn(mixed $size): string => $this->normalizeForMatching((string)$size), $query->sizes))); - $normalizedQuery = trim(implode(' ', array_filter([$normalizedPrompt, $normalizedSearchText, $normalizedBrand, implode(' ', $normalizedSizes),]))); + $normalizedBrand = $this->normalizeForMatching((string) ($query->brand ?? '')); + $normalizedSizes = array_values(array_filter(array_map( + fn(mixed $size): string => $this->normalizeForMatching((string) $size), + $query->sizes + ))); + + $normalizedQuery = trim(implode(' ', array_filter([ + $normalizedPrompt, + $normalizedSearchText, + $normalizedBrand, + implode(' ', $normalizedSizes), + ]))); + $queryTokens = $this->tokenize($normalizedQuery); $queryNumberTokens = $this->extractNumberTokens($queryTokens); + $normalizedProductName = $this->normalizeForMatching($product->name); - $normalizedProductNumber = $this->normalizeForMatching((string)($product->productNumber ?? '')); - $normalizedManufacturer = $this->normalizeForMatching((string)($product->manufacturer ?? '')); + $normalizedProductNumber = $this->normalizeForMatching((string) ($product->productNumber ?? '')); + $normalizedManufacturer = $this->normalizeForMatching((string) ($product->manufacturer ?? '')); $normalizedProductCorpus = $this->buildNormalizedProductCorpus($product); + $productNameTokens = $this->tokenize($normalizedProductName); $productNumberTokens = $this->tokenize($normalizedProductNumber); $productCorpusTokens = $this->tokenize($normalizedProductCorpus); + $productNameNumberTokens = $this->extractNumberTokens($productNameTokens); $productNumberNumberTokens = $this->extractNumberTokens($productNumberTokens); $productCorpusNumberTokens = $this->extractNumberTokens($productCorpusTokens); + if ($normalizedProductNumber !== '' && $this->containsWholePhrase($normalizedQuery, $normalizedProductNumber)) { - $score += 140; + $score += 160; } + if ($normalizedProductName !== '' && $this->containsWholePhrase($normalizedQuery, $normalizedProductName)) { - $score += 80; + $score += 90; } + if ($normalizedBrand !== '') { if ($normalizedManufacturer !== '' && $normalizedManufacturer === $normalizedBrand) { $score += 40; @@ -133,53 +904,72 @@ final readonly class ShopSearchService $score += 20; } } + $score += $this->countOverlap($queryTokens, $productNameTokens) * 6; $score += $this->countOverlap($queryTokens, $productNumberTokens) * 10; $score += $this->countOverlap($queryTokens, $productCorpusTokens) * 2; + $score += $this->countOverlap($queryNumberTokens, $productNameNumberTokens) * 18; $score += $this->countOverlap($queryNumberTokens, $productNumberNumberTokens) * 28; $score += $this->countOverlap($queryNumberTokens, $productCorpusNumberTokens) * 8; + foreach ($normalizedSizes as $normalizedSize) { if ($normalizedSize === '') { continue; } - if ($this->containsWholePhrase($normalizedProductName, $normalizedSize) || $this->containsWholePhrase($normalizedProductNumber, $normalizedSize) || $this->containsWholePhrase($normalizedProductCorpus, $normalizedSize)) { + + if ( + $this->containsWholePhrase($normalizedProductName, $normalizedSize) + || $this->containsWholePhrase($normalizedProductNumber, $normalizedSize) + || $this->containsWholePhrase($normalizedProductCorpus, $normalizedSize) + ) { $score += 12; } } + $score += $this->scoreProductTypeMatch($product, $normalizedQuery); + if ($product->available === true) { $score += 1; } + return $score; } private function scoreProductTypeMatch(ShopProductResult $product, string $normalizedQuery): int { $score = 0; + $isDeviceQuery = $this->isDeviceQuery($normalizedQuery); $isAccessoryQuery = $this->isAccessoryQuery($normalizedQuery); + if (!$isDeviceQuery && !$isAccessoryQuery) { return 0; } + $isAccessoryLikeProduct = $this->isAccessoryLikeProduct($product); $isDeviceLikeProduct = $this->isDeviceLikeProduct($product); + if ($isDeviceQuery && !$isAccessoryQuery) { if ($isDeviceLikeProduct) { $score += 60; } + if ($isAccessoryLikeProduct) { $score -= 120; } } + if ($isAccessoryQuery) { if ($isAccessoryLikeProduct) { $score += 30; } + if ($isDeviceLikeProduct) { $score += 10; } } + return $score; } @@ -190,6 +980,7 @@ final readonly class ShopSearchService return true; } } + return false; } @@ -200,67 +991,95 @@ final readonly class ShopSearchService return true; } } + return false; } private function isAccessoryLikeProduct(ShopProductResult $product): bool { $corpus = $this->buildNormalizedProductCorpus($product); + foreach (ShopServiceConfig::ACCESSORY_PRODUCT_KEYWORDS as $keyword) { if (str_contains($corpus, $this->normalizeForMatching($keyword))) { return true; } } + return false; } private function isDeviceLikeProduct(ShopProductResult $product): bool { $corpus = $this->buildNormalizedProductCorpus($product); + foreach (ShopServiceConfig::DEVICE_PRODUCT_KEYWORDS as $keyword) { if (str_contains($corpus, $this->normalizeForMatching($keyword))) { return true; } } + return false; } private function buildNormalizedProductCorpus(ShopProductResult $product): string { - return $this->normalizeForMatching(implode(' ', array_filter([$product->name, $product->productNumber, $product->manufacturer, implode(' ', $product->highlights), $product->description, $product->customFields, $product->url,]))); + return $this->normalizeForMatching(implode(' ', array_filter([ + $product->name, + $product->productNumber, + $product->manufacturer, + implode(' ', $product->highlights), + $product->description, + $product->customFields, + $product->url, + ]))); } - /** * @param string[] $left * @param string[] $right */ + /** + * @param string[] $left + * @param string[] $right + */ private function countOverlap(array $left, array $right): int { if ($left === [] || $right === []) { return 0; } + $leftSet = array_fill_keys($left, true); $rightSet = array_fill_keys($right, true); + return count(array_intersect_key($leftSet, $rightSet)); } - /** * @param string[] $tokens * @return string[] */ + /** + * @param string[] $tokens + * @return string[] + */ private function extractNumberTokens(array $tokens): array { - return array_values(array_filter($tokens, static fn(string $token): bool => preg_match('/\d/u', $token) === 1)); + return array_values(array_filter( + $tokens, + static fn(string $token): bool => preg_match('/\d/u', $token) === 1 + )); } private function normalizeForMatching(string $value): string { - $value = mb_strtolower(trim($value)); + $value = mb_strtolower(trim($value), 'UTF-8'); $value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value; $value = preg_replace('/\s+/u', ' ', $value) ?? $value; + return trim($value); } - /** * @return string[] */ + /** + * @return string[] + */ private function tokenize(string $value): array { if ($value === '') { return []; } + return preg_split('/[^\p{L}\p{N}]+/u', $value, -1, PREG_SPLIT_NO_EMPTY) ?: []; } @@ -269,74 +1088,113 @@ final readonly class ShopSearchService if ($normalizedText === '' || $normalizedPhrase === '') { return false; } + return str_contains(' ' . $normalizedText . ' ', ' ' . $normalizedPhrase . ' '); } + /** + * @param array $customField + */ private function getRelevantCustomFields(array $customField): string { $result = ($customField['migration_Backup_product_attr1'] ?? '') . ': ' . ($customField['migration_Backup_product_attr2'] ?? ''); $result .= ' | Einsatzgebiete: ' . ($customField['migration_Backup_product_attr4'] ?? ''); $result .= ' | Sprachen: ' . ($customField['migration_Backup_product_attr5'] ?? ''); + return trim($result); } + /** + * @param array $description + */ private function cleanUpDescription(array $description): string { - if (isset($description['translated']['description'])) { - $newDesc = strip_tags((string)($description['translated']['description'])); - $newDesc = html_entity_decode($newDesc); - $newDesc = preg_replace('/^[ \t]*\R/m', '', $newDesc); - $newDesc = preg_replace('/[ \t]{2,}/', ' ', $newDesc); - $result = trim((string)$newDesc); - return mb_substr($result, 0, 1500); + if (!isset($description['translated']['description'])) { + return ''; } - return ''; + + $newDesc = strip_tags((string) ($description['translated']['description'])); + $newDesc = html_entity_decode($newDesc); + $newDesc = preg_replace('/^[ \t]*\R/m', '', $newDesc) ?? $newDesc; + $newDesc = preg_replace('/[ \t]{2,}/', ' ', $newDesc) ?? $newDesc; + $result = trim((string) $newDesc); + + return mb_substr($result, 0, 1500); } + /** + * @param array $row + */ private function extractManufacturer(array $row): ?string { $manufacturer = $row['manufacturer'] ?? null; + if (is_array($manufacturer) && isset($manufacturer['name']) && is_string($manufacturer['name'])) { $name = trim($manufacturer['name']); + return $name !== '' ? $name : null; } + return null; } + /** + * @param array $row + */ private function extractPrice(array $row): ?string { $calculatedPrice = $row['calculatedPrice'] ?? null; + if (!is_array($calculatedPrice)) { return null; } - $candidates = [$calculatedPrice['unitPrice'] ?? null, $calculatedPrice['totalPrice'] ?? null, $calculatedPrice['referencePrice'] ?? null, $calculatedPrice['listPrice'] ?? null, $calculatedPrice['regulationPrice'] ?? null,]; + + $candidates = [ + $calculatedPrice['unitPrice'] ?? null, + $calculatedPrice['totalPrice'] ?? null, + $calculatedPrice['referencePrice'] ?? null, + $calculatedPrice['listPrice'] ?? null, + $calculatedPrice['regulationPrice'] ?? null, + ]; + foreach ($candidates as $candidate) { if (!is_numeric($candidate)) { continue; } - $value = (float)$candidate; + + $value = (float) $candidate; + if ($value > 0.0) { return number_format($value, 2, ',', '.') . ' €'; } } + return null; } + /** + * @param array $row + */ private function extractUrl(array $row): ?string { $seoUrls = $row['seoUrls'] ?? null; + if (!is_array($seoUrls) || $seoUrls === []) { return null; } + foreach ($seoUrls as $seoUrl) { if (!is_array($seoUrl)) { continue; } + $path = $seoUrl['seoPathInfo'] ?? null; + if (is_string($path) && trim($path) !== '') { return '/' . ltrim($path, '/'); } } + return null; } @@ -345,35 +1203,54 @@ final readonly class ShopSearchService if ($relativeUrl === null || trim($relativeUrl) === '') { return null; } + return rtrim($this->baseUrl, '/') . '/' . ltrim($relativeUrl, '/'); } - /** * @return string[] */ + /** + * @param array $row + * @return string[] + */ private function extractHighlights(array $row): array { $highlights = []; + if (isset($row['available'])) { - $highlights[] = (bool)$row['available'] ? 'Verfügbar' : 'Nicht verfügbar'; + $highlights[] = (bool) $row['available'] ? 'Verfügbar' : 'Nicht verfügbar'; } + if (isset($row['productNumber']) && is_string($row['productNumber']) && trim($row['productNumber']) !== '') { $highlights[] = 'Produktnummer: ' . trim($row['productNumber']); } + return array_values(array_unique($highlights)); } - /** * @param ShopProductResult[] $products * @return ShopProductResult[] */ + /** + * @param ShopProductResult[] $products + * @return ShopProductResult[] + */ private function deduplicateProducts(array $products): array { $unique = []; $seen = []; + foreach ($products as $product) { - $key = mb_strtolower(trim(implode('|', [$product->id, $product->productNumber ?? '', $product->name, $product->url ?? '',]))); + $key = mb_strtolower(trim(implode('|', [ + $product->id, + $product->productNumber ?? '', + $product->name, + $product->url ?? '', + ])), 'UTF-8'); + if (isset($seen[$key])) { continue; } + $seen[$key] = true; $unique[] = $product; } + return $unique; } } \ No newline at end of file diff --git a/src/Config/CommerceIntentConfig.php b/src/Config/CommerceIntentConfig.php index 6351557..4bc7b1d 100644 --- a/src/Config/CommerceIntentConfig.php +++ b/src/Config/CommerceIntentConfig.php @@ -12,27 +12,48 @@ final class CommerceIntentConfig 'shop', 'alle', 'preis', + 'preise', 'kunde', 'online', 'produkt', + 'produkte', 'artikel', 'sku', 'kaufen', 'kostet', + 'kosten', + 'verfügbarkeit', + 'verfuegbarkeit', // Search / product discovery signals 'suche', 'such', 'finde', 'finden', + 'welche', + 'welcher', + 'welches', - // Product-type signals for technical/commercial requests + // Device / system signals 'analysegerät', 'analysegeraet', + 'analysegeräte', + 'analysegeraete', 'messgerät', 'messgeraet', + 'messgeräte', + 'messgeraete', + 'gerät', + 'geraet', + 'geräte', + 'geraete', 'analysator', + 'analysatoren', 'analyzer', + 'system', + 'systeme', + 'anlage', + 'anlagen', ]; } @@ -46,6 +67,8 @@ final class CommerceIntentConfig 'geeignet', 'empfiehl', 'empfehl', + 'vergleich', + 'vergleichen', ]; } @@ -57,6 +80,7 @@ final class CommerceIntentConfig 'eur', 'teuer', 'preis', + 'preise', 'kosten', 'kostet', ]; diff --git a/src/Config/CommerceQueryParserConfig.php b/src/Config/CommerceQueryParserConfig.php index 889e59e..ecd7f62 100644 --- a/src/Config/CommerceQueryParserConfig.php +++ b/src/Config/CommerceQueryParserConfig.php @@ -1,63 +1,94 @@ knownBrands; + } + + /** + * @return string[] + */ public function getPhrasesToRemove(): array { - return [ - 'ich suche', - 'suche', - 'habt ihr', - 'gibt es', - 'zeige mir', - 'welches gerät', - 'welche gerät', - 'welches modell', - 'welches ist besser', - 'welches ist am besten', - 'alternative', - 'alternativen', - ]; + return $this->phrasesToRemove; } public function getHistoryContextPattern(): string { - return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt'; + return 'auch|noch|nochmal|dazu|wie oben|wie zuvor|ähnlich|aehnlich|stattdessen|alternative|alternativ|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|und der preis|kosten|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte'; } + public function getReferenceFollowUpPattern(): string + { + return 'preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte'; + } + + /** + * @return string[] + */ public function getFilterSearchTokensPattern(): array { + return $this->filterSearchTokensPattern; + } + + /** + * @return string[] + */ + public function getReferenceOnlyTokens(): array + { + if ($this->referenceOnlyTokens !== []) { + return $this->referenceOnlyTokens; + } + return [ - 'auch', - 'noch', - 'nochmal', - 'zusätzlich', + 'preis', + 'preise', + 'kosten', + 'kostet', + 'gerät', + 'geraet', + 'modell', + 'produkt', + 'artikel', + 'dafür', + 'dafuer', 'dazu', 'davon', - 'stattdessen', + 'verfügbarkeit', + 'verfuegbarkeit', + 'shop', + 'link', + 'zum', + 'zur', + 'das', + 'dieses', + 'den', + 'dem', 'bitte', - 'gern', - 'gerne', - 'zeige', - 'zeig', - 'such', - 'suche', - 'finde', - 'find', - 'mir', - 'mal', - 'von', + 'und', ]; } } \ No newline at end of file diff --git a/src/Config/ShopServiceConfig.php b/src/Config/ShopServiceConfig.php index 65eb3ae..e0cb6b9 100644 --- a/src/Config/ShopServiceConfig.php +++ b/src/Config/ShopServiceConfig.php @@ -1,46 +1,78 @@ clientIdResolver->resolve($request, $response); $this->contextService->deleteHistory($clientId); + $this->commerceReferenceStore->clear($clientId); return $this->jsonWithCookies( [ - 'status' => 'ok', + 'status' => 'ok', 'message' => 'History deleted', ], $response @@ -113,6 +117,8 @@ final readonly class HistoryController /** * Helper to return JSON responses while forwarding cookies. + * + * @param array|array> $data */ private function jsonWithCookies(array $data, Response $cookieResponse): JsonResponse { @@ -124,4 +130,4 @@ final readonly class HistoryController return $json; } -} +} \ No newline at end of file diff --git a/src/Shopware/StoreApiClient.php b/src/Shopware/StoreApiClient.php index e7a2da1..b087b29 100644 --- a/src/Shopware/StoreApiClient.php +++ b/src/Shopware/StoreApiClient.php @@ -6,7 +6,6 @@ namespace App\Shopware; use RuntimeException; use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface; -use Symfony\Contracts\HttpClient\Exception\DecodingExceptionInterface; use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface; use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface; use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface; @@ -31,6 +30,16 @@ final readonly class StoreApiClient public function searchProducts(array $criteria): array { $url = rtrim($this->baseUrl, '/') . '/store-api/search'; + $sanitizedCriteria = $this->sanitizeValue($criteria); + + $body = json_encode( + $sanitizedCriteria, + JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_INVALID_UTF8_SUBSTITUTE + ); + + if (!is_string($body)) { + throw new RuntimeException('Failed to encode Store API criteria.'); + } $response = $this->httpClient->request('POST', $url, [ 'headers' => [ @@ -38,12 +47,13 @@ final readonly class StoreApiClient 'Accept' => 'application/json', 'sw-access-key' => $this->salesChannelAccessKey, ], - 'json' => $criteria, + 'body' => $body, 'timeout' => $this->timeoutSeconds, ]); $statusCode = $response->getStatusCode(); $content = $response->getContent(false); + $content = $this->sanitizeString($content); if ($statusCode < 200 || $statusCode >= 300) { throw new RuntimeException(sprintf( @@ -61,4 +71,48 @@ final readonly class StoreApiClient return $data; } + + private function sanitizeValue(mixed $value): mixed + { + if (is_array($value)) { + $out = []; + + foreach ($value as $key => $item) { + $out[$key] = $this->sanitizeValue($item); + } + + return $out; + } + + if (!is_string($value)) { + return $value; + } + + return $this->sanitizeString($value); + } + + private function sanitizeString(string $value): string + { + if (preg_match('//u', $value) === 1) { + return $value; + } + + if (function_exists('mb_convert_encoding')) { + $value = mb_convert_encoding($value, 'UTF-8', 'UTF-8'); + } + + if (preg_match('//u', $value) === 1) { + return $value; + } + + if (function_exists('iconv')) { + $converted = @iconv('UTF-8', 'UTF-8//IGNORE', $value); + + if (is_string($converted) && $converted !== '') { + return $converted; + } + } + + return ''; + } } \ No newline at end of file