diff --git a/config/services.yaml b/config/services.yaml index 6195a7b..cdf0a98 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -120,12 +120,7 @@ services: App\Commerce\CommerceQueryParser: ~ - App\Commerce\SearchRepairService: - arguments: - $logger: '@monolog.logger.agent' - $enabled: '%mto.commerce.search_repair.enabled%' - $maxRepairQueries: '%mto.commerce.search_repair.max_queries%' - $minPrimaryResultsWithoutRepair: '%mto.commerce.search_repair.min_primary_results_without_repair%' + App\Commerce\SearchRepairService: ~ App\Shopware\ShopwareCriteriaBuilder: ~ diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index 8e157fb..7207239 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -4,9 +4,6 @@ declare(strict_types=1); namespace App\Agent; -use App\Commerce\CommerceReferenceResolver; -use App\Commerce\CommerceReferenceStore; -use App\Commerce\Dto\CommerceReferenceContext; use App\Commerce\SearchRepairService; use App\Commerce\ShopSearchService; use App\Config\AgentRunnerConfig; @@ -21,8 +18,6 @@ use Throwable; final readonly class AgentRunner { - private const COMMERCE_HISTORY_BUDGET_CHARS = 1000; - private bool $systemMsgOn; public function __construct( @@ -33,8 +28,6 @@ final readonly class AgentRunner private RetrieverInterface $retriever, private ShopSearchService $shopSearchService, private SearchRepairService $searchRepairService, - private CommerceReferenceStore $commerceReferenceStore, - private CommerceReferenceResolver $commerceReferenceResolver, private CommerceIntentLite $commerceIntentLite, private OllamaClient $ollamaClient, private LoggerInterface $agentLogger, @@ -51,14 +44,13 @@ final readonly class AgentRunner $prompt = trim($prompt); if ($prompt === '') { - yield $this->systemMsg('❌ Empty prompt.', 'err'); + yield $this->systemMsg($this->agentRunnerConfig->getEmptyPromptMessage(), 'err'); return; } $shopResults = []; $primaryShopResults = []; - $factSources = []; - $contextSignals = []; + $sources = []; $optimizedShopQuery = ''; $shopSearchQuery = ''; $commerceIntent = CommerceIntentLite::NONE; @@ -66,8 +58,6 @@ final readonly class AgentRunner $attemptedShopRepair = false; $usedShopRepair = false; $shopRepairQueries = []; - $activeCommerceReference = null; - $shopChecked = false; $this->agentLogger->info('Agent run started', [ 'userId' => $userId, @@ -79,74 +69,39 @@ final readonly class AgentRunner // Additional context strategies can be added here later. } - yield $this->systemMsg('Ich analysiere deine Anfrage...', 'think'); - yield $this->systemMsg('Ich prüfe auf Internetquellen...', 'think'); + yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeRequestMessage(), 'think'); + yield $this->systemMsg($this->agentRunnerConfig->getCheckInternetSourcesMessage(), 'think'); $urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt); if ($urlContent !== '') { - $this->addBadge($factSources, 'Externe URL'); + $this->addSource($sources, $this->agentRunnerConfig->getExternalUrlSourceLabel()); } - yield $this->systemMsg('Ich hole relevante Daten aus meinem RAG-Wissen...', 'think'); + yield $this->systemMsg($this->agentRunnerConfig->getRetrieveKnowledgeMessage(), 'think'); $knowledgeChunks = $this->retriever->retrieve($prompt); if ($knowledgeChunks !== []) { - $this->addBadge($factSources, 'RAG Wissen'); + $this->addSource($sources, $this->agentRunnerConfig->getRagKnowledgeSourceLabel()); } $commerceIntent = $this->detectCommerceIntent($prompt); if ($this->isCommerceIntent($commerceIntent)) { - yield $this->systemMsg('Ich optimiere die Recherche...', 'think'); + yield $this->systemMsg($this->agentRunnerConfig->getOptimizeSearchMessage(), 'think'); $commerceHistoryContext = $this->buildCommerceHistoryContext($userId); - $activeCommerceReference = $this->loadCommerceReference($userId); if ($commerceHistoryContext !== '') { - $this->addBadge($contextSignals, 'Gesprächskontext'); + $this->addSource($sources, $this->agentRunnerConfig->getConversationHistorySourceLabel()); } - if ($activeCommerceReference !== null) { - $this->addBadge($contextSignals, 'Commerce-Referenz'); - } - - $isReferenceOnlyFollowUp = $this->isReferenceOnlyCommerceFollowUp( + $optimizedShopQuery = $this->buildOptimizedShopQuery( $prompt, - $activeCommerceReference + $userId, + $commerceHistoryContext ); - if ($isReferenceOnlyFollowUp) { - $shopSearchQuery = $this->buildDeterministicReferenceShopQuery($activeCommerceReference); - - if ($shopSearchQuery !== '') { - $this->addBadge($contextSignals, 'Deterministische Referenzsuche'); - } - - $this->agentLogger->info('Using deterministic reference shop query', [ - 'userId' => $userId, - 'commerceIntent' => $commerceIntent, - 'prompt' => $prompt, - 'shopSearchQuery' => $shopSearchQuery, - 'referenceProductName' => $activeCommerceReference?->productName, - 'referenceFocusTerms' => $activeCommerceReference?->focusTerms, - ]); - } else { - $optimizedShopQuery = $this->buildOptimizedShopQuery( - $prompt, - $userId, - $commerceHistoryContext - ); - - if ($optimizedShopQuery !== '' && $optimizedShopQuery !== $prompt) { - $this->addBadge($contextSignals, 'Query-Optimierung'); - } - - $shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt; - } - - if ($shopSearchQuery === '') { - $shopSearchQuery = $prompt; - } + $shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt; $this->agentLogger->info('Commerce search prepared', [ 'userId' => $userId, @@ -154,26 +109,20 @@ final readonly class AgentRunner 'usedOptimizedShopQuery' => $optimizedShopQuery !== '', 'optimizedShopQuery' => $optimizedShopQuery, 'shopSearchQuery' => $shopSearchQuery, - 'usedDeterministicReferenceQuery' => $isReferenceOnlyFollowUp, 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), - 'hasActiveCommerceReference' => $activeCommerceReference !== null, - 'activeCommerceReferenceProduct' => $activeCommerceReference?->productName, ]); yield $this->systemMsg( - 'Ich rufe Recherchedaten ab (type: ' . $commerceIntent . ')', + sprintf($this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), $commerceIntent), 'think' ); - $shopChecked = true; - $primaryShopResults = $this->searchShop( $shopSearchQuery, $commerceIntent, $userId, - $commerceHistoryContext, - $activeCommerceReference + $commerceHistoryContext ); $repairPayload = $this->repairShopResults( @@ -192,13 +141,11 @@ final readonly class AgentRunner $shopRepairQueries = $repairPayload['repairQueries']; if ($shopResults !== []) { - $this->addBadge($factSources, 'Shopsystem'); - } elseif ($shopChecked) { - $this->addBadge($factSources, 'Shopsystem geprüft'); + $this->addSource($sources, $this->agentRunnerConfig->getShopSystemSourceLabel()); } if ($attemptedShopRepair) { - $this->addBadge($contextSignals, 'Erweiterte Shopsuche'); + $this->addSource($sources, $this->agentRunnerConfig->getExtendedShopSearchSourceLabel()); } } @@ -206,7 +153,7 @@ final readonly class AgentRunner $knowledgeChunks = $this->limitKnowledgeChunks($knowledgeChunks, $commerceIntent); } - yield $this->systemMsg('Ich analysiere alle Informationen...', 'think'); + yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeAllInformationMessage(), 'think'); $finalPrompt = $this->promptBuilder->build( prompt: $prompt, @@ -226,7 +173,6 @@ final readonly class AgentRunner 'shopSearchQuery' => $shopSearchQuery, 'primaryShopResultsCount' => count($primaryShopResults), 'shopResultsCount' => count($shopResults), - 'shopChecked' => $shopChecked, 'attemptedShopRepair' => $attemptedShopRepair, 'usedShopRepair' => $usedShopRepair, 'shopRepairQueries' => $shopRepairQueries, @@ -243,21 +189,19 @@ final readonly class AgentRunner ]); } - if ($factSources !== [] || $contextSignals !== []) { - yield $this->emitSourceSummary( - $factSources, - $contextSignals, - 'Genutzte Datenpfade' + if ($sources !== []) { + yield $this->emitSources( + $sources, + $this->agentRunnerConfig->getUsedSourcesPrefix() ); } $fullOutput = yield from $this->streamFinalAnswer($finalPrompt); - if ($factSources !== [] || $contextSignals !== []) { - yield $this->emitSourceSummary( - $factSources, - $contextSignals, - 'Quellen und Signale' + if ($sources !== []) { + yield $this->emitSources( + $sources, + $this->agentRunnerConfig->getSourcesPrefix() ); } @@ -266,11 +210,10 @@ final readonly class AgentRunner } if ($fullOutput !== '') { - $this->persistConversationState( - userId: $userId, - prompt: $prompt, - fullOutput: $fullOutput, - shopResults: $shopResults + $this->contextService->appendHistory( + $userId, + $prompt, + $fullOutput ); } @@ -281,7 +224,6 @@ final readonly class AgentRunner 'commerceIntent' => $commerceIntent, 'primaryShopResultsCount' => count($primaryShopResults), 'shopResultsCount' => count($shopResults), - 'shopChecked' => $shopChecked, 'attemptedShopRepair' => $attemptedShopRepair, 'usedShopRepair' => $usedShopRepair, 'shopRepairQueries' => $shopRepairQueries, @@ -292,8 +234,6 @@ final readonly class AgentRunner 'shopSearchQuery' => $shopSearchQuery, 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), - 'hasActiveCommerceReference' => $activeCommerceReference !== null, - 'activeCommerceReferenceProduct' => $activeCommerceReference?->productName, ]); } catch (Throwable $e) { $this->agentLogger->error('Agent run failed', [ @@ -361,42 +301,6 @@ final readonly class AgentRunner return $this->sanitizeOptimizedShopQuery($optimizedQuery); } - private function isReferenceOnlyCommerceFollowUp( - string $prompt, - ?CommerceReferenceContext $referenceContext - ): bool { - if ($referenceContext === null) { - return false; - } - - $normalizedPrompt = mb_strtolower(trim($prompt), 'UTF-8'); - $normalizedPrompt = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt; - $normalizedPrompt = preg_replace('/\s+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt; - $normalizedPrompt = trim($normalizedPrompt); - - if ($normalizedPrompt === '') { - return false; - } - - if (preg_match('/\b(testomat|lab|evo|eco|calc|thcl|808|2000)\b/u', $normalizedPrompt) === 1) { - return false; - } - - return preg_match( - '/\b(preis|preise|kosten|kostet|dazu|dafuer|dafür|davon|was kostet das|verfuegbarkeit|verfügbarkeit|shop|link)\b/u', - $normalizedPrompt - ) === 1; - } - - private function buildDeterministicReferenceShopQuery(?CommerceReferenceContext $referenceContext): string - { - if ($referenceContext === null) { - return ''; - } - - return trim($referenceContext->buildReferenceSearchText()); - } - /** * @return array{ * results: array, @@ -445,15 +349,13 @@ final readonly class AgentRunner string $query, string $commerceIntent, string $userId, - string $commerceHistoryContext = '', - ?CommerceReferenceContext $referenceContext = null + string $commerceHistoryContext = '' ): array { try { return $this->shopSearchService->search( $query, $commerceIntent, - $commerceHistoryContext, - $referenceContext + $commerceHistoryContext ); } catch (Throwable $e) { $this->agentLogger->warning('Shop search failed, continuing without shop results', [ @@ -462,8 +364,6 @@ final readonly class AgentRunner 'query' => $query, 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), - 'hasReferenceContext' => $referenceContext !== null, - 'referenceProductName' => $referenceContext?->productName, 'exception' => $e, ]); @@ -475,73 +375,23 @@ final readonly class AgentRunner { return $this->contextService->buildUserContextWithinBudget( $userId, - self::COMMERCE_HISTORY_BUDGET_CHARS - ); - } - - private function loadCommerceReference(string $userId): ?CommerceReferenceContext - { - try { - return $this->commerceReferenceStore->load($userId); - } catch (Throwable $e) { - $this->agentLogger->warning('Failed to load commerce reference context', [ - 'userId' => $userId, - 'exception' => $e, - ]); - - return null; - } - } - - /** - * @param array $shopResults - */ - private function storeCommerceReference(string $userId, string $prompt, string $answer, array $shopResults): void - { - try { - $referenceContext = $this->commerceReferenceResolver->resolveFromCommerceTurn( - $prompt, - $answer, - $shopResults - ); - - if ($referenceContext === null) { - return; - } - - $this->commerceReferenceStore->save($userId, $referenceContext); - } catch (Throwable $e) { - $this->agentLogger->warning('Failed to persist commerce reference context', [ - 'userId' => $userId, - 'exception' => $e, - ]); - } - } - - /** - * @param array $shopResults - */ - private function persistConversationState( - string $userId, - string $prompt, - string $fullOutput, - array $shopResults - ): void { - $this->contextService->appendHistory($userId, $prompt, $fullOutput); - - $this->storeCommerceReference( - userId: $userId, - prompt: $prompt, - answer: $fullOutput, - shopResults: $shopResults + $this->agentRunnerConfig->getCommerceHistoryBudgetChars() ); } private function limitKnowledgeChunks(array $knowledgeChunks, string $commerceIntent): array { return match ($commerceIntent) { - CommerceIntentLite::PRODUCT_SEARCH => array_slice($knowledgeChunks, 0, 2), - CommerceIntentLite::ADVISORY_PRODUCT_SEARCH => array_slice($knowledgeChunks, 0, 3), + CommerceIntentLite::PRODUCT_SEARCH => array_slice( + $knowledgeChunks, + 0, + $this->agentRunnerConfig->getProductSearchKnowledgeChunkLimit() + ), + CommerceIntentLite::ADVISORY_PRODUCT_SEARCH => array_slice( + $knowledgeChunks, + 0, + $this->agentRunnerConfig->getAdvisoryProductSearchKnowledgeChunkLimit() + ), default => $knowledgeChunks, }; } @@ -555,8 +405,8 @@ final readonly class AgentRunner } $query = preg_split('/\R+/u', $query, 2)[0] ?? $query; - $query = preg_replace('/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu', '', $query) ?? $query; - $query = trim($query, " \t\n\r\0\x0B\"'`"); + $query = preg_replace($this->agentRunnerConfig->getOptimizedShopQueryPrefixPattern(), '', $query) ?? $query; + $query = trim($query, $this->agentRunnerConfig->getOptimizedShopQueryTrimCharacters()); $query = preg_replace('/\s+/u', ' ', $query) ?? $query; return trim($query); @@ -582,7 +432,7 @@ final readonly class AgentRunner if ($cleanToken === '') { if ($firstThinkLoop) { - yield $this->systemMsg('Denke nach...', 'think'); + yield $this->systemMsg($this->agentRunnerConfig->getThinkingWhileStreamingMessage(), 'think'); $firstThinkLoop = false; } @@ -601,60 +451,46 @@ final readonly class AgentRunner if ($finalChunk !== null) { yield $this->systemMsg($finalChunk, 'answer'); } elseif ($fullOutput === '') { - yield $this->systemMsg('❌ Es wurden keine Daten vom LLM empfangen.', 'err'); + yield $this->systemMsg($this->agentRunnerConfig->getNoLlmDataReceivedMessage(), 'err'); } return $fullOutput; } /** - * @param string[] $factSources - * @param string[] $contextSignals + * @param string[] $sources */ - private function emitSourceSummary(array $factSources, array $contextSignals, string $label): string + private function emitSources(array $sources, string $prefix): string { - $parts = []; - - if ($factSources !== []) { - $parts[] = 'Fakten: ' . implode(' ', $factSources); - } - - if ($contextSignals !== []) { - $parts[] = 'Kontext: ' . implode(' ', $contextSignals); - } - - return $this->systemMsg( - $label . ': ' . implode('    ', $parts), - 'info' - ); + return $this->systemMsg($prefix . implode(' ', $sources), 'info'); } /** - * @param string[] $target + * @param string[] $sources */ - private function addBadge(array &$target, string $label): void + private function addSource(array &$sources, string $label): void { $badge = $this->badge($label); - if (!in_array($badge, $target, true)) { - $target[] = $badge; + if (!in_array($badge, $sources, true)) { + $sources[] = $badge; } } private function buildUserErrorMessage(Throwable $e): string { if (!$this->debug) { - return '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.'; + return $this->agentRunnerConfig->getGenericInternalErrorMessage(); } - return '❌ Interner Fehler: ' + return $this->agentRunnerConfig->getDebugInternalErrorPrefix() . htmlspecialchars($e->getMessage(), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); } private function badge(string $label): string { return sprintf( - '%s', + $this->agentRunnerConfig->getSourceBadgeHtmlTemplate(), htmlspecialchars($label, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8') ); } @@ -667,10 +503,13 @@ final readonly class AgentRunner return match ($type) { 'answer' => $msg, - 'err' => '' . $msg . "\n
\n", - 'think' => '' . $msg . "\n", - 'info' => "\n\n" . $msg . "\n", - 'debug' => "\n\nDEBUG: " . htmlspecialchars($msg, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8') . "\n", + 'err' => sprintf($this->agentRunnerConfig->getErrorHtmlTemplate(), $msg), + 'think' => sprintf($this->agentRunnerConfig->getThinkHtmlTemplate(), $msg), + 'info' => sprintf($this->agentRunnerConfig->getInfoHtmlTemplate(), $msg), + 'debug' => sprintf( + $this->agentRunnerConfig->getDebugHtmlTemplate(), + htmlspecialchars($msg, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8') + ), default => $msg, }; } diff --git a/src/Agent/PromptBuilder.php b/src/Agent/PromptBuilder.php index a680a53..45dc173 100644 --- a/src/Agent/PromptBuilder.php +++ b/src/Agent/PromptBuilder.php @@ -18,6 +18,7 @@ final readonly class PromptBuilder private ContextService $contextService, private SystemPromptRepository $systemPromptRepository, private ModelGenerationConfigProvider $modelGenerationConfigProvider, + private PromptBuilderConfig $config, ) { } @@ -31,7 +32,6 @@ final readonly class PromptBuilder * @param ShopProductResult[] $shopResults * @param bool|null $fullContext * @param string|null $swagFullOutPut - * @return string */ public function build( string $prompt, @@ -48,23 +48,21 @@ final readonly class PromptBuilder $hasShopResults = $shopResults !== []; $isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt); - $isPriceDrivenQuestion = $this->isLikelyPriceDrivenQuestion($prompt); + $asksForAccessoryOrBundle = $this->asksForAccessoryOrBundle($prompt); $systemBlock = $this->buildSystemBlock(); $shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut); - $outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults, $isPriceDrivenQuestion); + $outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults); $responseFormatBlock = $this->buildResponseFormatBlock( - $prompt, - $hasShopResults, - $isTechnicalProductQuestion, - $isPriceDrivenQuestion + hasShopResults: $hasShopResults, + isTechnicalProductQuestion: $isTechnicalProductQuestion, + asksForAccessoryOrBundle: $asksForAccessoryOrBundle ); $knowledgeBlock = $this->buildKnowledgeBlock( - $knowledgeChunks, - $urlContent, - $prompt, - $hasShopResults, - $isPriceDrivenQuestion + knowledgeChunks: $knowledgeChunks, + urlContent: $urlContent, + hasShopResults: $hasShopResults, + isTechnicalProductQuestion: $isTechnicalProductQuestion ); $userBlock = $this->buildUserBlock($prompt); @@ -106,12 +104,12 @@ final readonly class PromptBuilder $activeSystemPrompt = str_replace('{% now %}', $now, $activePrompt->getContent()); - return "SYSTEM:\n" . $this->normalizeBlockText($activeSystemPrompt); + return $this->config->getSystemSectionLabel() . ":\n" . $this->normalizeBlockText($activeSystemPrompt); } private function buildUserBlock(string $prompt): string { - return "USER QUESTION:\n" . $prompt; + return $this->config->getUserQuestionSectionLabel() . ":\n" . $prompt; } /** @@ -146,12 +144,11 @@ final readonly class PromptBuilder return ''; } - return - "CONVERSATION CONTEXT (contextual only):\n" . - "The following messages are previous turns of this conversation.\n" . - "Use them to resolve references, follow-up questions, and user intent.\n" . - "They must not override retrieved factual knowledge or live shop data.\n\n" . - $history; + return $this->implodeBlocks([ + $this->config->getConversationContextSectionLabel() . ':', + $this->implodeLines($this->config->getConversationContextIntroLines()), + $history, + ]); } /** @@ -165,10 +162,11 @@ final readonly class PromptBuilder $parts = []; if ($swagFullOutPut !== null && $swagFullOutPut !== '') { - $parts[] = - "SHOP SEARCH QUERY:\n" . - $swagFullOutPut . "\n" . - "Source: Shop Search"; + $parts[] = $this->implodeBlocks([ + $this->config->getShopSearchQuerySectionLabel() . ':', + $swagFullOutPut, + $this->config->getShopSearchQuerySourceLine(), + ]); } $normalizedShopResults = array_values(array_filter( @@ -181,77 +179,33 @@ final readonly class PromptBuilder } $totalCount = count($normalizedShopResults); - $limitedShopResults = array_slice($normalizedShopResults, 0, PromptBuilderConfig::MAX_SHOP_RESULTS_IN_PROMPT); - $isDetailed = count($limitedShopResults) <= 5; + $limitedShopResults = array_slice($normalizedShopResults, 0, $this->config->getMaxShopResultsInPrompt()); + $isDetailed = count($limitedShopResults) <= $this->config->getDetailedShopResultsMaxCount(); $lines = []; foreach ($limitedShopResults as $i => $product) { - $n = $i + 1; - $entryParts = [ - "[{$n}] " . $this->normalizeBlockText($product->name), - ]; - - if ($product->productNumber) { - $entryParts[] = "Product number: " . $this->normalizeBlockText($product->productNumber); - } - - if ($product->manufacturer) { - $entryParts[] = "Manufacturer: " . $this->normalizeBlockText($product->manufacturer); - } - - if ($product->price) { - $entryParts[] = "Price: " . $this->normalizeBlockText($product->price); - } - - if ($product->available !== null) { - $entryParts[] = "Available: " . ($product->available ? 'yes' : 'no'); - } - - foreach ($product->highlights as $highlight) { - $highlight = $this->normalizeBlockText((string) $highlight); - - if ($highlight !== '') { - $entryParts[] = "- " . $highlight; - } - } - - if ($product->url) { - $entryParts[] = "URL: " . $this->normalizeBlockText($product->url); - } - - if ($product->productImage) { - $entryParts[] = "Product image: " . $this->normalizeBlockText($product->productImage); - } - - if ($isDetailed && $product->description) { - $entryParts[] = "Description: " . $this->normalizeBlockText($product->description); - } - - if ($product->customFields) { - $entryParts[] = "Meta information: " . $this->normalizeBlockText($product->customFields); - } - - $lines[] = implode("\n", $entryParts); + $lines[] = $this->buildShopProductEntry( + product: $product, + index: $i + 1, + isDetailed: $isDetailed + ); } if ($lines !== []) { - $header = - "LIVE SHOP RESULTS (authoritative for current commercial details):\n" . - "Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.\n" . - "If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.\n" . - "Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" . - "Do not infer undocumented technical specifications from shop data.\n" . - "Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.\n" . - "Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.\n" . - "If shop results only contain accessories, reagents, indicators, or consumables, do not conclude that no matching main device exists unless the sources explicitly support that conclusion.\n" . - "If the user asks for price filtering, use the numeric prices in these live shop results as the decisive source for filtering."; + $headerLines = $this->config->getLiveShopResultsHeaderLines(); if ($totalCount > count($limitedShopResults)) { - $header .= "\n" . - "Only the top " . count($limitedShopResults) . " ranked shop results are shown here out of {$totalCount} total results."; + $headerLines[] = sprintf( + $this->config->getLiveShopResultsOverflowNoticeTemplate(), + count($limitedShopResults), + $totalCount + ); } - $parts[] = $header . "\n\n" . implode("\n\n", $lines); + $parts[] = $this->implodeBlocks([ + $this->implodeLines($headerLines), + implode("\n\n", $lines), + ]); } return $this->implodeBlocks($parts); @@ -260,89 +214,60 @@ final readonly class PromptBuilder /** * Build a small priority block that tells the model what to surface first. */ - private function buildOutputPriorityBlock(bool $hasShopResults, bool $isPriceDrivenQuestion): string + private function buildOutputPriorityBlock(bool $hasShopResults): string { if (!$hasShopResults) { return ''; } - if ($isPriceDrivenQuestion) { - return - "OUTPUT PRIORITY:\n" . - "For price-driven questions, evaluate shop results first for numeric price filtering.\n" . - "Use retrieved knowledge afterwards only to add technical context or explain missing commercial coverage.\n" . - "Do not let accessory-only shop results prove that no matching device exists unless the sources explicitly support that conclusion.\n"; - } - - return - "OUTPUT PRIORITY:\n" . - "Use retrieved knowledge first to determine the technically matching product or answer.\n" . - "If shop results are present, use them afterwards to add current price, availability, and the actual URL.\n" . - "Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n"; + return $this->buildRuleBlock( + $this->config->getOutputPrioritySectionLabel(), + $this->config->getOutputPriorityRules() + ); } private function buildResponseFormatBlock( - string $prompt, bool $hasShopResults, bool $isTechnicalProductQuestion, - bool $isPriceDrivenQuestion + bool $asksForAccessoryOrBundle ): string { - $rules = [ - "RESPONSE FORMAT RULES:", - "- Keep normal spacing between all words. Never fuse words together.", - "- Use short, clean paragraphs or short labeled sections.", - "- Do not use persuasive or promotional wording.", - "- Do not repeat the same fact in slightly different wording.", - "- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.", - "- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.", - "- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.", - "- Do not combine technical identity from one source with commercial fields from a different product.", - "- Product number, price, availability, and URL must belong to the same explicitly grounded product.", - ]; + $rules = $this->config->getResponseFormatBaseRules(); if ($hasShopResults) { - $rules[] = "- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts."; - $rules[] = "- Keep price, availability, and URL on separate lines when they are present."; - $rules[] = "- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product."; - $rules[] = "- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device."; - $rules[] = "- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results."; - $rules[] = "- If the question includes a price threshold, filter using only explicit numeric shop prices."; - $rules[] = "- Do not say that no device exists above a threshold merely because only cheaper accessories were found in the shop results."; + $rules = array_merge($rules, $this->config->getResponseFormatWithShopRules()); } else { - $rules[] = "- If no shop results are present, do not compensate by inventing external products or external manufacturers."; + $rules = array_merge($rules, $this->config->getResponseFormatWithoutShopRules()); } if ($isTechnicalProductQuestion) { - $rules[] = "- Write like technical documentation: precise, neutral, and source-close."; - $rules[] = "- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation."; - $rules[] = "- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives."; + $rules = array_merge($rules, $this->config->getResponseFormatTechnicalRules()); } - if ($isPriceDrivenQuestion) { - $rules[] = "- For price-driven questions, answer the threshold result first."; - $rules[] = "- If no grounded shop product fulfills the threshold, say that clearly."; - $rules[] = "- Then optionally explain whether retrieved knowledge mentions relevant devices that are not commercially listed in the current shop results."; + if ($asksForAccessoryOrBundle) { + $rules = array_merge($rules, $this->config->getResponseFormatAccessoryRules()); } - if ($this->asksForAccessoryOrBundle($prompt)) { - $rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory."; - $rules[] = "- The main device must come first. The accessory must not replace the main device."; - $rules[] = "- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources."; - $rules[] = "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so."; - } - - return implode("\n", $rules); + return $this->buildRuleBlock( + $this->config->getResponseFormatSectionLabel(), + $rules + ); } + /** + * Build the knowledge block. + * + * Retrieved knowledge remains the main source for technical matching and explanation. + * Shop data is preferred for current commercial fields. + * + * @param string[] $knowledgeChunks + */ private function buildKnowledgeBlock( array $knowledgeChunks, string $urlContent, - string $prompt, bool $hasShopResults, - bool $isPriceDrivenQuestion + bool $isTechnicalProductQuestion ): string { $knowledgeParts = []; - $isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt); if ($knowledgeChunks !== []) { $lines = []; @@ -359,56 +284,71 @@ final readonly class PromptBuilder } if ($lines !== []) { - $parts = [ - "LANGUAGE RULES:\n" . - implode("\n", $this->buildLanguageRules()), - "FACT GROUNDING RULES:\n" . - implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults, $isPriceDrivenQuestion)), - "RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" . - "Source: Documents\n" . - implode("\n\n", $lines), - ]; - - $knowledgeParts[] = implode("\n\n", $parts); + $knowledgeParts[] = $this->implodeBlocks([ + $this->buildRuleBlock( + $this->config->getLanguageRulesSectionLabel(), + $this->config->getLanguageRules() + ), + $this->buildRuleBlock( + $this->config->getFactGroundingRulesSectionLabel(), + $this->buildFactGroundingRules( + hasShopResults: $hasShopResults, + isTechnicalProductQuestion: $isTechnicalProductQuestion + ) + ), + $this->implodeBlocks([ + $this->config->getRetrievedKnowledgeSectionLabel() . ':', + $this->config->getRetrievedKnowledgeSourceLine(), + implode("\n\n", $lines), + ]), + ]); } } if ($urlContent !== '') { - $knowledgeParts[] = - "CONTENT FROM URL (authoritative if user-provided):\n" . - "Source: URL\n" . - $urlContent; + $knowledgeParts[] = $this->implodeBlocks([ + $this->config->getUrlContentSectionLabel() . ':', + $this->config->getUrlContentSourceLine(), + $urlContent, + ]); } return $this->implodeBlocks($knowledgeParts); } + /** + * Resolve how many characters may still be used by history. + * + * The active model num_ctx is converted into a conservative prompt budget. + * Shop, knowledge and user question are fixed priority blocks. + * History only receives the remaining space. + */ private function resolveHistoryBudgetChars(string $fixedPrompt): int { $numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx(); $outputReserveTokens = $this->clamp( - (int) floor($numCtx * PromptBuilderConfig::OUTPUT_RESERVE_RATIO), - PromptBuilderConfig::OUTPUT_RESERVE_MIN_TOKENS, - PromptBuilderConfig::OUTPUT_RESERVE_MAX_TOKENS + (int) floor($numCtx * $this->config->getOutputReserveRatio()), + $this->config->getOutputReserveMinTokens(), + $this->config->getOutputReserveMaxTokens() ); $safetyReserveTokens = $this->clamp( - (int) floor($numCtx * PromptBuilderConfig::SAFETY_RESERVE_RATIO), - PromptBuilderConfig::SAFETY_RESERVE_MIN_TOKENS, - PromptBuilderConfig::SAFETY_RESERVE_MAX_TOKENS + (int) floor($numCtx * $this->config->getSafetyReserveRatio()), + $this->config->getSafetyReserveMinTokens(), + $this->config->getSafetyReserveMaxTokens() ); $promptBudgetTokens = max( - PromptBuilderConfig::MIN_PROMPT_BUDGET_TOKENS, + $this->config->getMinPromptBudgetTokens(), $numCtx - $outputReserveTokens - $safetyReserveTokens ); - $promptBudgetChars = $promptBudgetTokens * PromptBuilderConfig::CHARS_PER_TOKEN; + $promptBudgetChars = $promptBudgetTokens * $this->config->getCharsPerToken(); $remaining = $promptBudgetChars - mb_strlen($fixedPrompt) - - PromptBuilderConfig::HISTORY_PADDING_CHARS; + - $this->config->getHistoryPaddingChars(); return max(0, $remaining); } @@ -416,87 +356,118 @@ final readonly class PromptBuilder /** * @return string[] */ - private function buildLanguageRules(): array + private function buildFactGroundingRules(bool $hasShopResults, bool $isTechnicalProductQuestion): array { - return [ - "- Answer only in the same language as the user question.", - "- All headings, labels, notes, and structural elements must be in the same language as the user question.", - "- Do not switch languages unless the user does.", - "- If headings are used, write them in the user's language.", - ]; - } - - /** - * @return string[] - */ - private function buildFactGroundingRules( - bool $isTechnicalProductQuestion, - bool $hasShopResults, - bool $isPriceDrivenQuestion - ): array { - $rules = [ - "- State only facts that are explicitly present in the provided sources.", - "- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.", - "- Do not invent missing values.", - "- Do not replace missing values with estimates, defaults, or typical industry assumptions.", - "- Do not claim that information is missing if it appears in the provided sources.", - "- Do not compare with other products unless those products are also present in the provided sources.", - "- Prefer source-faithful wording over persuasive wording.", - "- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', 'entscheidend', 'optimal', 'kosteneffizient', 'prozesssicher', or 'state-of-the-art'.", - "- Clearly separate explicit facts from inferences.", - "- If a conclusion goes beyond the source wording, label it exactly as 'Inference:'.", - "- If a sentence cannot be traced to the provided sources, do not write it.", - "- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.", - "- If the sources do not identify a suitable product, do not invent one.", - ]; + $rules = $this->config->getFactGroundingBaseRules(); if ($hasShopResults) { - $rules = array_merge($rules, [ - "- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.", - "- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.", - "- When shop results are present and relevant, include current price and the actual URL if available.", - "- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.", - "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.", - "- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.", - "- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.", - "- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.", - "- If the shop match is ambiguous, keep the technical identification and commercial details separate.", - ]); - - if ($isPriceDrivenQuestion) { - $rules[] = "- For price-threshold questions, shop prices are authoritative for the threshold check."; - $rules[] = "- Accessory-only shop hits do not prove that no qualifying device exists."; - } + $rules = array_merge($rules, $this->config->getFactGroundingWithShopRules()); } else { - $rules[] = "- Use retrieved knowledge as authoritative for factual answers."; - $rules[] = "- If no shop results are present, do not compensate with external recommendations or external product suggestions."; + $rules = array_merge($rules, $this->config->getFactGroundingWithoutShopRules()); } if ($isTechnicalProductQuestion) { - $rules = array_merge($rules, [ - "- For technical product questions, answer primarily with explicitly stated facts.", - "- Behave like a technical documentation assistant, not like a sales advisor.", - "- Keep interpretations minimal and do not generalize application areas beyond the provided sources.", - "- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.", - "- Do not translate technical facts into business value unless the source explicitly does so.", - "- Do not recommend process changes unless explicitly present in the source.", - "- Do not use persuasive summaries or advisory conclusions.", - "- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.", - "- Use neutral engineering language.", - "- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.", - "- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.", - "- If the source lists application areas, repeat only those areas and do not broaden them.", - "- If the source names an indicator and threshold, reproduce that exactly without extrapolation.", - "- If the source states only a threshold function, do not expand it into broader control logic.", - "- If a detail is not explicitly stated in the provided sources, say so plainly.", - "- Prefer short, source-close sentences over explanatory expansion.", - "- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.", - ]); + $rules = array_merge($rules, $this->config->getFactGroundingTechnicalRules()); } return $rules; } + private function buildShopProductEntry(ShopProductResult $product, int $index, bool $isDetailed): string + { + $entryParts = [ + "[{$index}] " . $this->normalizeBlockText($product->name), + ]; + + if ($product->productNumber) { + $entryParts[] = $this->config->getShopProductNumberLabel() . ': ' + . $this->normalizeBlockText($product->productNumber); + } + + if ($product->manufacturer) { + $entryParts[] = $this->config->getShopManufacturerLabel() . ': ' + . $this->normalizeBlockText($product->manufacturer); + } + + if ($product->price) { + $entryParts[] = $this->config->getShopPriceLabel() . ': ' + . $this->normalizeBlockText($product->price); + } + + if ($product->available !== null) { + $entryParts[] = $this->config->getShopAvailabilityLabel() . ': ' + . ($product->available + ? $this->config->getShopAvailabilityYesLabel() + : $this->config->getShopAvailabilityNoLabel()); + } + + foreach ($product->highlights as $highlight) { + $highlight = $this->normalizeBlockText((string) $highlight); + + if ($highlight !== '') { + $entryParts[] = $this->config->getShopHighlightPrefix() . $highlight; + } + } + + if ($product->url) { + $entryParts[] = $this->config->getShopUrlLabel() . ': ' + . $this->normalizeBlockText($product->url); + } + + if ($product->productImage) { + $entryParts[] = $this->config->getShopProductImageLabel() . ': ' + . $this->normalizeBlockText($product->productImage); + } + + if ($isDetailed && $product->description) { + $entryParts[] = $this->config->getShopDescriptionLabel() . ': ' + . $this->normalizeBlockText($product->description); + } + + if ($product->customFields) { + $entryParts[] = $this->config->getShopMetaInformationLabel() . ': ' + . $this->normalizeBlockText($product->customFields); + } + + return implode("\n", $entryParts); + } + + /** + * @param string[] $rules + */ + private function buildRuleBlock(string $sectionLabel, array $rules): string + { + $normalizedRules = array_values(array_filter( + array_map( + fn(string $rule): string => $this->normalizeBlockText($rule), + $rules + ), + static fn(string $rule): bool => $rule !== '' + )); + + if ($normalizedRules === []) { + return ''; + } + + return $sectionLabel . ":\n" . implode("\n", $normalizedRules); + } + + /** + * @param string[] $lines + */ + private function implodeLines(array $lines): string + { + $normalizedLines = array_values(array_filter( + array_map( + fn(string $line): string => $this->normalizeBlockText($line), + $lines + ), + static fn(string $line): bool => $line !== '' + )); + + return implode("\n", $normalizedLines); + } + private function implodeBlocks(array $blocks): string { $filtered = array_values(array_filter( @@ -537,41 +508,26 @@ final readonly class PromptBuilder private function isLikelyTechnicalProductQuestion(string $prompt): bool { $normalized = mb_strtolower($prompt, 'UTF-8'); - $matches = 0; - foreach (PromptBuilderConfig::TECHNICAL_PRODUCT_KEYWORDS as $keyword) { + foreach ($this->config->getTechnicalProductKeywords() as $keyword) { if (str_contains($normalized, $keyword)) { $matches++; } } - if ($matches >= 2) { + if ($matches >= $this->config->getTechnicalProductKeywordMatchThreshold()) { return true; } - return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1; - } - - private function isLikelyPriceDrivenQuestion(string $prompt): bool - { - $normalized = mb_strtolower($prompt, 'UTF-8'); - - if (preg_match('/\b(mehr\s+als|über|ueber|größer\s+als|groesser\s+als|unter|bis|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*(?:euro|eur|€)\b/u', $normalized) === 1) { - return true; - } - - return str_contains($normalized, 'preis') - || str_contains($normalized, 'preise') - || str_contains($normalized, 'kosten') - || str_contains($normalized, 'kostet'); + return preg_match($this->config->getTechnicalProductModelPattern(), $prompt) === 1; } private function asksForAccessoryOrBundle(string $prompt): bool { $normalized = mb_strtolower($prompt, 'UTF-8'); - foreach (PromptBuilderConfig::ACCESSORY_REQUEST_KEYWORDS as $keyword) { + foreach ($this->config->getAccessoryRequestKeywords() as $keyword) { if (str_contains($normalized, $keyword)) { return true; } diff --git a/src/Commerce/CommerceQueryParser.php b/src/Commerce/CommerceQueryParser.php index defc35f..6a113e4 100644 --- a/src/Commerce/CommerceQueryParser.php +++ b/src/Commerce/CommerceQueryParser.php @@ -4,7 +4,6 @@ declare(strict_types=1); namespace App\Commerce; -use App\Commerce\Dto\CommerceReferenceContext; use App\Commerce\Dto\CommerceSearchQuery; use App\Config\CommerceIntentConfig; use App\Config\CommerceQueryParserConfig; @@ -24,12 +23,10 @@ final readonly class CommerceQueryParser public function parse( string $originalPrompt, string $intent, - string $historyContext = '', - ?CommerceReferenceContext $referenceContext = null + string $historyContext = '' ): CommerceSearchQuery { $normalizedPrompt = $this->normalize($originalPrompt); $isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt); - $isReferenceOnlyFollowUp = $this->isReferenceOnlyFollowUp($normalizedPrompt); [$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt); $sizes = $this->extractSizes($normalizedPrompt); @@ -47,58 +44,23 @@ final readonly class CommerceQueryParser if ( !$isDirectProductQuery && $historyContext !== '' - && $this->shouldUseHistoryContext($normalizedPrompt, $searchText) + && $this->shouldUseHistoryContext($normalizedPrompt) ) { - $latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext); + $historyParse = $this->parseHistoryContext($historyContext); - if ($latestHistoryQuestion !== '') { - $normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion); - $isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt); - - [$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt); - $historySizes = $this->extractSizes($normalizedHistoryPrompt); - $historyBrand = $this->extractBrand($normalizedHistoryPrompt); - - $historySearchText = $this->buildSearchText( - prompt: $normalizedHistoryPrompt, - sizes: $historySizes, - brand: $historyBrand, - priceMin: $historyPriceMin, - priceMax: $historyPriceMax, - preserveDirectProductQuery: $isDirectHistoryProductQuery + if ($historyParse !== null) { + $searchText = $this->mergeSearchTexts( + $historyParse['searchText'], + $searchText ); - $searchText = $this->mergeSearchTexts($historySearchText, $searchText); - - if (($brand === null || $brand === '') && $historyBrand !== null && $historyBrand !== '') { - $brand = $historyBrand; + if (($brand === null || $brand === '') && $historyParse['brand'] !== null && $historyParse['brand'] !== '') { + $brand = $historyParse['brand']; } } } - if ( - !$isDirectProductQuery - && $referenceContext !== null - && $this->shouldUseReferenceContext($normalizedPrompt, $searchText) - ) { - $referenceSearchText = $this->buildReferenceSearchText($referenceContext); - - if ($isReferenceOnlyFollowUp || $this->isTooGenericSearchText($searchText)) { - $searchText = $referenceSearchText !== '' ? $referenceSearchText : $searchText; - } else { - $searchText = $this->mergeSearchTexts($referenceSearchText, $searchText); - } - - if (($brand === null || $brand === '') && $referenceContext->manufacturer !== null) { - $normalizedManufacturer = $this->normalize($referenceContext->manufacturer); - - if ($normalizedManufacturer !== '') { - $brand = $normalizedManufacturer; - } - } - } - - $finalSearchText = trim($searchText !== '' ? $searchText : $normalizedPrompt); + $finalSearchText = $searchText !== '' ? $searchText : $normalizedPrompt; return new CommerceSearchQuery( originalPrompt: $originalPrompt, @@ -118,10 +80,14 @@ final readonly class CommerceQueryParser { $value = $this->textNormalizer->normalize($prompt); $value = $this->queryCleaner->clean($value); - $value = mb_strtolower(trim($value), 'UTF-8'); - $value = str_replace(['€'], ' euro ', $value); - $value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value; - $value = preg_replace('/\s+/u', ' ', $value) ?? $value; + $value = mb_strtolower(trim($value)); + $value = str_replace( + $this->config->getNormalizationSearch(), + $this->config->getNormalizationReplace(), + $value + ); + $value = preg_replace($this->config->getPromptSanitizePattern(), ' ', $value) ?? $value; + $value = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $value) ?? $value; return trim($value); } @@ -134,32 +100,21 @@ final readonly class CommerceQueryParser $priceMin = null; $priceMax = null; - if (preg_match('/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) { - $a = $this->toFloat($m[1]); - $b = $this->toFloat($m[2]); + if (preg_match($this->config->getPriceBetweenPattern(), $prompt, $matches) === 1) { + $a = $this->toFloat($matches[1]); + $b = $this->toFloat($matches[2]); if ($a !== null && $b !== null) { return [min($a, $b), max($a, $b)]; } } - if (preg_match('/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) { - $priceMax = $this->toFloat($m[1]); + if (preg_match($this->config->getPriceMaxPattern(), $prompt, $matches) === 1) { + $priceMax = $this->toFloat($matches[1]); } - if (preg_match('/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) { - $priceMin = $this->toFloat($m[1]); - } - - // NEW: - // Recognize comparative lower-bound phrasing such as: - // - mehr als 3000 euro - // - über 3000 euro - // - ueber 3000 euro - // - größer als 3000 euro - // - groesser als 3000 euro - if (preg_match('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) { - $priceMin = $this->toFloat($m[1]); + if (preg_match($this->config->getPriceMinPattern(), $prompt, $matches) === 1) { + $priceMin = $this->toFloat($matches[1]); } return [$priceMin, $priceMax]; @@ -172,8 +127,7 @@ final readonly class CommerceQueryParser { $sizes = []; - $sizePattern = $this->intentConfig->getSizePattern(); - if (preg_match_all('/\b(?:' . $sizePattern . ')\s*([a-z0-9.-]+)\b/u', $prompt, $matches) === false) { + if (preg_match_all($this->intentConfig->getSizeExtractionPattern(), $prompt, $matches) === false) { return []; } @@ -181,8 +135,7 @@ final readonly class CommerceQueryParser $sizes[] = trim($size); } - $sizeTokenPattern = $this->intentConfig->getSizeTokenPattern(); - if (preg_match_all('/\b(' . $sizeTokenPattern . ')\b/u', $prompt, $tokenMatches) !== false) { + if (preg_match_all($this->intentConfig->getSizeTokenValuePattern(), $prompt, $tokenMatches) !== false) { foreach ($tokenMatches[1] as $sizeToken) { $sizes[] = trim($sizeToken); } @@ -207,6 +160,9 @@ final readonly class CommerceQueryParser return null; } + /** + * @param string[] $sizes + */ private function buildSearchText( string $prompt, array $sizes, @@ -219,7 +175,7 @@ final readonly class CommerceQueryParser return $this->buildDirectProductSearchText($prompt); } - $text = ' ' . $prompt . ' '; + $text = $this->wrapForPhraseReplacement($prompt); foreach ($this->config->getPhrasesToRemove() as $phrase) { $normalizedPhrase = $this->normalize((string) $phrase); @@ -228,7 +184,11 @@ final readonly class CommerceQueryParser continue; } - $text = str_replace(' ' . $normalizedPhrase . ' ', ' ', $text); + $text = str_replace( + $this->wrapForPhraseReplacement($normalizedPhrase), + ' ', + $text + ); } foreach ($sizes as $size) { @@ -238,111 +198,69 @@ final readonly class CommerceQueryParser continue; } - $text = preg_replace('/\b' . preg_quote($normalizedSize, '/') . '\b/u', ' ', $text) ?? $text; + $text = preg_replace( + $this->config->buildExactTokenRemovalPattern($normalizedSize), + ' ', + $text + ) ?? $text; } if ($brand !== null && $brand !== '' && !$this->isBrandPartOfModelPhrase($prompt, $brand)) { - $text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text; + $text = preg_replace( + $this->config->buildExactTokenRemovalPattern($brand), + ' ', + $text + ) ?? $text; } if ($priceMin !== null || $priceMax !== null) { - $text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; - $text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; - $text = preg_replace('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; - $text = preg_replace('/\b' . $this->intentConfig->getPricePattern() . '\b/u', ' ', $text) ?? $text; + foreach ($this->config->getPriceRemovalPatterns($this->intentConfig) as $pattern) { + $text = preg_replace($pattern, ' ', $text) ?? $text; + } } - $text = preg_replace('/\s+/u', ' ', $text) ?? $text; - $text = trim($text, " \t\n\r\0\x0B-.,"); + $text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text; + $text = trim($text, $this->config->getSearchTextTrimCharacters()); $tokens = array_filter( explode(' ', $text), - static fn(string $token): bool => mb_strlen($token) > 1 + fn(string $token): bool => mb_strlen($token) > $this->config->getMinSearchTokenLength() ); $tokens = $this->filterSearchTokens($tokens); - $tokens = $this->stripReferenceOnlyTokens($tokens); return trim(implode(' ', $tokens)); } private function buildDirectProductSearchText(string $prompt): string { - $text = preg_replace('/\s+/u', ' ', $prompt) ?? $prompt; - $text = trim($text, " \t\n\r\0\x0B-.,"); + $text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $prompt) ?? $prompt; + $text = trim($text, $this->config->getSearchTextTrimCharacters()); $tokens = array_filter( explode(' ', $text), - static fn(string $token): bool => mb_strlen($token) > 0 + fn(string $token): bool => mb_strlen($token) >= $this->config->getMinDirectProductTokenLength() ); - return trim(implode(' ', array_values(array_unique($tokens)))); + $tokens = array_values(array_unique($tokens)); + + return trim(implode(' ', $tokens)); } - private function shouldUseHistoryContext(string $prompt, string $searchText): bool + private function shouldUseHistoryContext(string $prompt): bool { - if ($this->isReferenceOnlyFollowUp($prompt)) { - return true; - } - - if ($this->isTooGenericSearchText($searchText)) { - return true; - } - - return preg_match('/\b(' . $this->config->getHistoryContextPattern() . ')\b/u', $prompt) === 1; - } - - private function shouldUseReferenceContext(string $prompt, string $searchText): bool - { - if ($this->isReferenceOnlyFollowUp($prompt)) { - return true; - } - - return $this->isTooGenericSearchText($searchText); - } - - private function isReferenceOnlyFollowUp(string $prompt): bool - { - return preg_match('/\b(' . $this->config->getReferenceFollowUpPattern() . ')\b/u', $prompt) === 1; - } - - private function isTooGenericSearchText(string $searchText): bool - { - $tokens = array_values(array_filter( - preg_split('/\s+/u', $searchText, -1, PREG_SPLIT_NO_EMPTY) ?: [], - static fn(string $token): bool => $token !== '' - )); - - if ($tokens === []) { - return true; - } - - $genericTokens = array_fill_keys($this->config->getReferenceOnlyTokens(), true); - - foreach ($tokens as $token) { - if (!isset($genericTokens[$token])) { - return false; - } - } - - return true; - } - - private function buildReferenceSearchText(CommerceReferenceContext $referenceContext): string - { - return $this->normalize($referenceContext->buildReferenceSearchText()); + return preg_match($this->config->getHistoryContextValuePattern(), $prompt) === 1; } private function extractLatestQuestionFromHistory(string $historyContext): string { - $result = preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches); + $result = preg_match_all($this->config->getHistoryQuestionPattern(), $historyContext, $matches); if ($result === false) { return ''; } $questions = $matches[1] ?? []; - if ($questions === []) { return ''; } @@ -352,11 +270,11 @@ final readonly class CommerceQueryParser return is_string($lastQuestion) ? trim($lastQuestion) : ''; } - private function mergeSearchTexts(string $left, string $right): string + private function mergeSearchTexts(string $historySearchText, string $currentSearchText): string { $tokens = []; - foreach ([$left, $right] as $text) { + foreach ([$historySearchText, $currentSearchText] as $text) { if ($text === '') { continue; } @@ -364,7 +282,7 @@ final readonly class CommerceQueryParser foreach (explode(' ', $text) as $token) { $token = trim($token); - if ($token === '' || mb_strlen($token) <= 1) { + if ($token === '' || mb_strlen($token) <= $this->config->getMinSearchTokenLength()) { continue; } @@ -381,25 +299,11 @@ final readonly class CommerceQueryParser */ private function filterSearchTokens(array $tokens): array { - $stopWords = array_fill_keys($this->config->getFilterSearchTokensPattern(), true); + $stopWords = $this->config->getFilterSearchTokens(); return array_values(array_filter( $tokens, - static fn(string $token): bool => !isset($stopWords[$token]) - )); - } - - /** - * @param string[] $tokens - * @return string[] - */ - private function stripReferenceOnlyTokens(array $tokens): array - { - $referenceOnly = array_fill_keys($this->config->getReferenceOnlyTokens(), true); - - return array_values(array_filter( - $tokens, - static fn(string $token): bool => !isset($referenceOnly[$token]) + static fn(string $token): bool => !in_array($token, $stopWords, true) )); } @@ -417,25 +321,25 @@ final readonly class CommerceQueryParser return true; } - $tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: []; + $tokens = preg_split( + $this->config->getWhitespaceSplitPattern(), + $prompt, + -1, + PREG_SPLIT_NO_EMPTY + ) ?: []; - return count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1; + return count($tokens) <= $this->config->getDirectProductMaxTokens() + && preg_match($this->config->getDirectProductDigitPattern(), $prompt) === 1; } private function containsModelLikePhrase(string $text): bool { - return preg_match( - '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u', - $text - ) === 1; + return preg_match($this->config->getModelLikePattern(), $text) === 1; } private function containsAccessoryLikePhrase(string $text): bool { - return preg_match( - '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u', - $text - ) === 1; + return preg_match($this->config->getAccessoryLikePattern(), $text) === 1; } private function isBrandPartOfModelPhrase(string $prompt, string $brand): bool @@ -445,7 +349,7 @@ final readonly class CommerceQueryParser } return preg_match( - '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u', + $this->config->buildBrandPartOfModelPattern($brand), $prompt ) === 1; } @@ -456,4 +360,42 @@ final readonly class CommerceQueryParser return is_numeric($value) ? (float) $value : null; } + + /** + * @return array{searchText:string, brand:?string}|null + */ + private function parseHistoryContext(string $historyContext): ?array + { + $latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext); + + if ($latestHistoryQuestion === '') { + return null; + } + + $normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion); + $isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt); + + [$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt); + $historySizes = $this->extractSizes($normalizedHistoryPrompt); + $historyBrand = $this->extractBrand($normalizedHistoryPrompt); + + $historySearchText = $this->buildSearchText( + prompt: $normalizedHistoryPrompt, + sizes: $historySizes, + brand: $historyBrand, + priceMin: $historyPriceMin, + priceMax: $historyPriceMax, + preserveDirectProductQuery: $isDirectHistoryProductQuery + ); + + return [ + 'searchText' => $historySearchText, + 'brand' => $historyBrand, + ]; + } + + private function wrapForPhraseReplacement(string $text): string + { + return ' ' . $text . ' '; + } } \ No newline at end of file diff --git a/src/Commerce/SearchRepairService.php b/src/Commerce/SearchRepairService.php index 485c1f9..5dec354 100644 --- a/src/Commerce/SearchRepairService.php +++ b/src/Commerce/SearchRepairService.php @@ -5,16 +5,15 @@ declare(strict_types=1); namespace App\Commerce; use App\Commerce\Dto\ShopProductResult; +use App\Config\SearchRepairConfig; use Psr\Log\LoggerInterface; final readonly class SearchRepairService { public function __construct( private ShopSearchService $shopSearchService, + private SearchRepairConfig $config, private LoggerInterface $logger, - private bool $enabled = true, - private int $maxRepairQueries = 3, - private int $minPrimaryResultsWithoutRepair = 2, ) { } @@ -37,22 +36,22 @@ final readonly class SearchRepairService array $primaryShopResults, array $knowledgeChunks ): array { - if (!$this->enabled) { - return [ - 'results' => $primaryShopResults, - 'attemptedRepair' => false, - 'usedRepair' => false, - 'repairQueries' => [], - ]; + if (!$this->config->isEnabled()) { + return $this->buildRepairResult( + results: $primaryShopResults, + attemptedRepair: false, + usedRepair: false, + repairQueries: [] + ); } if (!$this->shouldAttemptRepair($prompt, $primaryQuery, $primaryShopResults)) { - return [ - 'results' => $primaryShopResults, - 'attemptedRepair' => false, - 'usedRepair' => false, - 'repairQueries' => [], - ]; + return $this->buildRepairResult( + results: $primaryShopResults, + attemptedRepair: false, + usedRepair: false, + repairQueries: [] + ); } $repairQueries = $this->buildRepairQueries( @@ -63,12 +62,12 @@ final readonly class SearchRepairService ); if ($repairQueries === []) { - return [ - 'results' => $primaryShopResults, - 'attemptedRepair' => false, - 'usedRepair' => false, - 'repairQueries' => [], - ]; + return $this->buildRepairResult( + results: $primaryShopResults, + attemptedRepair: false, + usedRepair: false, + repairQueries: [] + ); } $this->logger->info('Shop repair started', [ @@ -99,12 +98,12 @@ final readonly class SearchRepairService 'repairQueries' => $repairQueries, ]); - return [ - 'results' => $primaryShopResults, - 'attemptedRepair' => true, - 'usedRepair' => false, - 'repairQueries' => $repairQueries, - ]; + return $this->buildRepairResult( + results: $primaryShopResults, + attemptedRepair: true, + usedRepair: false, + repairQueries: $repairQueries + ); } $mergedResults = $this->rankMergedResults( @@ -129,16 +128,16 @@ final readonly class SearchRepairService 'manufacturer' => $product->manufacturer, 'available' => $product->available, ], - array_slice($mergedResults, 0, 3) + array_slice($mergedResults, 0, $this->config->getTopProductLogLimit()) ), ]); - return [ - 'results' => $mergedResults, - 'attemptedRepair' => true, - 'usedRepair' => true, - 'repairQueries' => $repairQueries, - ]; + return $this->buildRepairResult( + results: $mergedResults, + attemptedRepair: true, + usedRepair: true, + repairQueries: $repairQueries + ); } /** @@ -157,15 +156,11 @@ final readonly class SearchRepairService return true; } - // Always try repair for bundle/accessory prompts. - // These prompts often need a second pass even when the first search - // already returned some results, because the user is asking for a - // combination of main device + matching accessory. if ($asksForBundle) { return true; } - if ($primaryResultsCount >= $this->minPrimaryResultsWithoutRepair) { + if ($primaryResultsCount >= $this->config->getMinPrimaryResultsWithoutRepair()) { return false; } @@ -173,7 +168,7 @@ final readonly class SearchRepairService return false; } - return $primaryResultsCount < $this->minPrimaryResultsWithoutRepair; + return $primaryResultsCount < $this->config->getMinPrimaryResultsWithoutRepair(); } /** @@ -230,7 +225,7 @@ final readonly class SearchRepairService fn(string $query): bool => $query !== '' && !$this->isTooCloseToPrimaryQuery($query, $primaryQuery) )); - return array_slice($queries, 0, max(1, $this->maxRepairQueries)); + return array_slice($queries, 0, max(1, $this->config->getMaxRepairQueries())); } /** @@ -291,7 +286,7 @@ final readonly class SearchRepairService $candidates = []; preg_match_all( - '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u', + $this->config->getModelCandidatePattern(), $text, $matches ); @@ -321,7 +316,7 @@ final readonly class SearchRepairService $candidates = []; preg_match_all( - '/\b((?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu', + $this->config->getAccessoryCandidatePattern(), $text, $matches ); @@ -368,15 +363,15 @@ final readonly class SearchRepairService { $score = 0; - if (preg_match('/\d/u', $candidate) === 1) { - $score += 4; + if (preg_match($this->config->getContainsDigitPattern(), $candidate) === 1) { + $score += $this->config->getCandidateDigitScore(); } $wordCount = count($this->tokenize($candidate)); - $score += min($wordCount, 4); + $score += min($wordCount, $this->config->getCandidateWordCountCap()); - if (preg_match('/\b(?:indikator|indicator|testomat|tritromat|titromat|reagenz|reagent)\b/iu', $candidate) === 1) { - $score += 3; + if (preg_match($this->config->getSpecificityBoostPattern(), $candidate) === 1) { + $score += $this->config->getSpecificityBoostScore(); } return $score; @@ -384,39 +379,19 @@ final readonly class SearchRepairService private function asksForBundleOrAccessory(string $prompt): bool { - return preg_match( - '/\b(passend|passende|zubehor|zubehör|dazu|zusatz|erganzung|ergänzung|indikator|reagenz|kit|set|auch\s+das|mit\s+preis\s+und\s+allen\s+infos)\b/iu', - $prompt - ) === 1; + return preg_match($this->config->getAccessoryOrBundlePattern(), $prompt) === 1; } private function containsModelLikePhrase(string $text): bool { - return preg_match( - '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u', - $text - ) === 1; + return preg_match($this->config->getModelLikePattern(), $text) === 1; } private function looksTooGeneric(string $candidate): bool { $normalized = mb_strtolower($candidate); - foreach ([ - 'wasser', - 'messgerät', - 'messgeraet', - 'produkt', - 'geräte', - 'geraete', - 'gerät', - 'geraet', - 'resthärte', - 'resthaerte', - 'preis', - 'infos', - 'wissen', - ] as $genericToken) { + foreach ($this->config->getGenericCandidateTokens() as $genericToken) { if ($normalized === $genericToken) { return true; } @@ -428,8 +403,8 @@ final readonly class SearchRepairService private function sanitizeQuery(string $query): string { $query = trim($query); - $query = preg_replace('/\s+/u', ' ', $query) ?? $query; - $query = trim($query, " \t\n\r\0\x0B\"'`.,;:-"); + $query = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $query) ?? $query; + $query = trim($query, $this->config->getSanitizeTrimCharacters()); return trim($query); } @@ -446,7 +421,7 @@ final readonly class SearchRepairService $intersection = array_intersect($candidateTokens, $primaryTokens); $overlapRatio = count($intersection) / max(count($candidateTokens), count($primaryTokens)); - return $overlapRatio >= 0.9; + return $overlapRatio >= $this->config->getPrimaryQueryOverlapThreshold(); } /** @@ -497,12 +472,12 @@ final readonly class SearchRepairService foreach ($allResults as $index => $product) { $score = 0; - $score += $this->scoreProductAgainstText($product, $prompt) * 3; - $score += $this->scoreProductAgainstText($product, $primaryQuery) * 2; - $score += $this->scoreProductAgainstText($product, $repairSignal) * 4; + $score += $this->scoreProductAgainstText($product, $prompt) * $this->config->getPromptMatchWeight(); + $score += $this->scoreProductAgainstText($product, $primaryQuery) * $this->config->getPrimaryQueryMatchWeight(); + $score += $this->scoreProductAgainstText($product, $repairSignal) * $this->config->getRepairSignalMatchWeight(); if ($index < count($primaryResults)) { - $score += 1; + $score += $this->config->getPrimaryResultOrderBonus(); } $decorated[] = [ @@ -549,11 +524,11 @@ final readonly class SearchRepairService $score = 0; $intersection = array_intersect($queryTokens, $productTokens); - $score += count($intersection) * 2; + $score += count($intersection) * $this->config->getTokenIntersectionScore(); foreach ($this->extractNumberTokens($queryTokens) as $numberToken) { if (in_array($numberToken, $productTokens, true)) { - $score += 4; + $score += $this->config->getNumericTokenMatchScore(); } } @@ -562,7 +537,7 @@ final readonly class SearchRepairService private function buildProductKey(ShopProductResult $product): string { - return mb_strtolower(trim(implode('|', [ + return mb_strtolower(trim(implode($this->config->getProductKeySeparator(), [ $product->id, $product->productNumber ?? '', $product->name, @@ -576,8 +551,8 @@ final readonly class SearchRepairService private function tokenize(string $text): array { $text = mb_strtolower($text); - $text = preg_replace('/[^\p{L}\p{N}\s\-]+/u', ' ', $text) ?? $text; - $text = preg_replace('/\s+/u', ' ', $text) ?? $text; + $text = preg_replace($this->config->getTokenizeCleanupPattern(), ' ', $text) ?? $text; + $text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text; $text = trim($text); if ($text === '') { @@ -595,7 +570,31 @@ final readonly class SearchRepairService { return array_values(array_filter( $tokens, - static fn(string $token): bool => preg_match('/\d/u', $token) === 1 + fn(string $token): bool => preg_match($this->config->getContainsDigitPattern(), $token) === 1 )); } + + /** + * @param ShopProductResult[] $results + * @param string[] $repairQueries + * @return array{ + * results: ShopProductResult[], + * attemptedRepair: bool, + * usedRepair: bool, + * repairQueries: string[] + * } + */ + private function buildRepairResult( + array $results, + bool $attemptedRepair, + bool $usedRepair, + array $repairQueries + ): array { + return [ + 'results' => $results, + 'attemptedRepair' => $attemptedRepair, + 'usedRepair' => $usedRepair, + 'repairQueries' => $repairQueries, + ]; + } } \ No newline at end of file diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index 847d91e..ea9ec0e 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -6,50 +6,249 @@ namespace App\Config; final class AgentRunnerConfig { + public function getCommerceHistoryBudgetChars(): int + { + return 1000; + } + + public function getProductSearchKnowledgeChunkLimit(): int + { + return 2; + } + + public function getAdvisoryProductSearchKnowledgeChunkLimit(): int + { + return 3; + } + + public function getOptimizedShopQueryPrefixPattern(): string + { + return '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu'; + } + + public function getOptimizedShopQueryTrimCharacters(): string + { + return " \t\n\r\0\x0B\"'`"; + } + + public function getEmptyPromptMessage(): string + { + return '❌ Empty prompt.'; + } + + public function getAnalyzeRequestMessage(): string + { + return 'Ich analysiere deine Anfrage...'; + } + + public function getCheckInternetSourcesMessage(): string + { + return 'Ich prüfe auf Internetquellen...'; + } + + public function getRetrieveKnowledgeMessage(): string + { + return 'Ich hole relevante Daten aus meinem RAG-Wissen...'; + } + + public function getOptimizeSearchMessage(): string + { + return 'Ich optimiere die Recherche...'; + } + + public function getFetchSearchDataMessageTemplate(): string + { + return 'Ich rufe Recherchedaten ab (type: %s)'; + } + + public function getAnalyzeAllInformationMessage(): string + { + return 'Ich analysiere alle Informationen...'; + } + + public function getThinkingWhileStreamingMessage(): string + { + return 'Denke nach...'; + } + + public function getNoLlmDataReceivedMessage(): string + { + return '❌ Es wurden keine Daten vom LLM empfangen.'; + } + + public function getGenericInternalErrorMessage(): string + { + return '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.'; + } + + public function getDebugInternalErrorPrefix(): string + { + return '❌ Interner Fehler: '; + } + + public function getExternalUrlSourceLabel(): string + { + return 'Externe URL'; + } + + public function getRagKnowledgeSourceLabel(): string + { + return 'RAG Wissen'; + } + + public function getConversationHistorySourceLabel(): string + { + return 'Chatverlauf'; + } + + public function getShopSystemSourceLabel(): string + { + return 'Shopsystem'; + } + + public function getExtendedShopSearchSourceLabel(): string + { + return 'Erweiterte Shopsuche'; + } + + public function getUsedSourcesPrefix(): string + { + return 'Genutzte Quellen: '; + } + + public function getSourcesPrefix(): string + { + return 'Quellen: '; + } + + public function getSourceBadgeHtmlTemplate(): string + { + return '%s'; + } + + public function getErrorHtmlTemplate(): string + { + return '%s' . "\n
\n"; + } + + public function getThinkHtmlTemplate(): string + { + return '%s' . "\n"; + } + + public function getInfoHtmlTemplate(): string + { + return "\n\n" . '%s' . "\n"; + } + + public function getDebugHtmlTemplate(): string + { + return "\n\nDEBUG: %s\n"; + } + public function getShopPrompt(string $prompt, string $commerceHistoryContext = ''): string { $historyBlock = ''; if (trim($commerceHistoryContext) !== '') { - $historyBlock = ' - RECENT CONVERSATION CONTEXT: - ' . $commerceHistoryContext . ' - - Additional rules for conversation context: - - The current user input has highest priority. - - Use the recent conversation context only to resolve omitted references. - - Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups. - - Do not revive older products unless the current user input clearly refers to them. - - If the current input starts a new topic, ignore older product context. - - Prefer the most recent product reference over older ones. - '; + $historyBlock = $this->buildHistoryBlock($commerceHistoryContext); } - return ' - Generate a short search query for Shopware 6 from the following user input text. - - Rules: - - Output only the final search query. - - Always convert relevant search terms to their singular form. - - No introduction, no explanation, no quotation marks. - - Use only shop-relevant search terms from the user input for a shop search. - - Maximum 6 search terms, preferably fewer. - - Remove filler words, polite phrases, and irrelevant words. - - Preserve product names, brands, model numbers, and compound terms exactly if they are relevant. - - Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000). - - Separate terms using spaces only. - - If a relevant product name is present, it must be placed at the beginning of the final search query. - - Try to always identify all products mentioned in the user input text, even in long prompts. - - Look for terms such as Testomat, Horiba, Tritromat, or words like indicator. - - If the current user input is vague or referential, use the recent conversation context only as support. - - Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above". - - Output format: - Keyword1 Keyword2 Keyword3 - ' . $historyBlock . ' - - CURRENT USER INPUT: - ' . $prompt . ' - '; + return $this->implodePromptBlocks([ + $this->getShopPromptIntro(), + $this->buildRulesBlock($this->getShopPromptRules()), + $this->getShopPromptOutputFormatBlock(), + $historyBlock, + $this->getCurrentUserInputLabel() . ':', + trim($prompt), + ]); + } + + private function buildHistoryBlock(string $commerceHistoryContext): string + { + return $this->implodePromptBlocks([ + $this->getRecentConversationContextLabel() . ':', + trim($commerceHistoryContext), + $this->buildRulesBlock($this->getConversationContextRules(), 'Additional rules for conversation context:'), + ]); + } + + /** + * @return string[] + */ + public function getShopPromptRules(): array + { + return [ + '- Output only the final search query.', + '- Always convert relevant search terms to their singular form.', + '- No introduction, no explanation, no quotation marks.', + '- Use only shop-relevant search terms from the user input for a shop search.', + '- Maximum 6 search terms, preferably fewer.', + '- Remove filler words, polite phrases, and irrelevant words.', + '- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.', + '- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).', + '- Separate terms using spaces only.', + '- If a relevant product name is present, it must be placed at the beginning of the final search query.', + '- Try to always identify all products mentioned in the user input text, even in long prompts.', + '- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.', + '- If the current user input is vague or referential, use the recent conversation context only as support.', + '- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".', + ]; + } + + /** + * @return string[] + */ + public function getConversationContextRules(): array + { + return [ + '- The current user input has highest priority.', + '- Use the recent conversation context only to resolve omitted references.', + '- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.', + '- Do not revive older products unless the current user input clearly refers to them.', + '- If the current input starts a new topic, ignore older product context.', + '- Prefer the most recent product reference over older ones.', + ]; + } + + public function getShopPromptIntro(): string + { + return 'Generate a short search query for Shopware 6 from the following user input text.'; + } + + public function getShopPromptOutputFormatBlock(): string + { + return "Output format:\nKeyword1 Keyword2 Keyword3"; + } + + public function getRecentConversationContextLabel(): string + { + return 'RECENT CONVERSATION CONTEXT'; + } + + public function getCurrentUserInputLabel(): string + { + return 'CURRENT USER INPUT'; + } + + private function buildRulesBlock(array $rules, string $headline = 'Rules:'): string + { + return $headline . "\n" . implode("\n", $rules); + } + + /** + * @param string[] $blocks + */ + private function implodePromptBlocks(array $blocks): string + { + $normalized = array_values(array_filter( + array_map( + static fn(string $block): string => trim($block), + $blocks + ), + static fn(string $block): bool => $block !== '' + )); + + return implode("\n\n", $normalized); } } \ No newline at end of file diff --git a/src/Config/CommerceIntentConfig.php b/src/Config/CommerceIntentConfig.php index 4bc7b1d..04d8e40 100644 --- a/src/Config/CommerceIntentConfig.php +++ b/src/Config/CommerceIntentConfig.php @@ -6,57 +6,38 @@ namespace App\Config; final class CommerceIntentConfig { + /** + * @return string[] + */ public function getStrongSignalsList(): array { return [ 'shop', 'alle', 'preis', - 'preise', 'kunde', 'online', 'produkt', - 'produkte', 'artikel', 'sku', 'kaufen', 'kostet', - 'kosten', - 'verfügbarkeit', - 'verfuegbarkeit', - - // Search / product discovery signals 'suche', 'such', 'finde', 'finden', - 'welche', - 'welcher', - 'welches', - - // Device / system signals 'analysegerät', 'analysegeraet', - 'analysegeräte', - 'analysegeraete', 'messgerät', 'messgeraet', - 'messgeräte', - 'messgeraete', - 'gerät', - 'geraet', - 'geräte', - 'geraete', 'analysator', - 'analysatoren', 'analyzer', - 'system', - 'systeme', - 'anlage', - 'anlagen', ]; } + /** + * @return string[] + */ public function getAdvisorySignals(): array { return [ @@ -67,30 +48,36 @@ final class CommerceIntentConfig 'geeignet', 'empfiehl', 'empfehl', - 'vergleich', - 'vergleichen', ]; } - public function getPricePattern(): string + /** + * @return string[] + */ + public function getPriceTerms(): array { - $pattern = [ + return [ 'euro', '€', 'eur', 'teuer', 'preis', - 'preise', 'kosten', 'kostet', ]; - - return implode('|', $pattern); } - public function getColorPattern(): string + public function getPricePattern(): string { - $pattern = [ + return implode('|', $this->getPriceTerms()); + } + + /** + * @return string[] + */ + public function getColorTerms(): array + { + return [ 'schwarz', 'weiß', 'weis', @@ -103,13 +90,19 @@ final class CommerceIntentConfig 'orange', 'braun', ]; - - return implode('|', $pattern); } - public function getSizeTokenPattern(): string + public function getColorPattern(): string { - $pattern = [ + return implode('|', $this->getColorTerms()); + } + + /** + * @return string[] + */ + public function getSizeTokenTerms(): array + { + return [ 'xs', 's', 'm', @@ -118,18 +111,189 @@ final class CommerceIntentConfig 'xxl', 'xxxxl', ]; - - return implode('|', $pattern); } - public function getSizePattern(): string + public function getSizeTokenPattern(): string { - $pattern = [ + return implode('|', $this->getSizeTokenTerms()); + } + + /** + * @return string[] + */ + public function getSizeTerms(): array + { + return [ 'größe', 'groesse', 'grösse', ]; + } - return implode('|', $pattern); + public function getSizePattern(): string + { + return implode('|', $this->getSizeTerms()); + } + + public function getSizeExtractionPattern(): string + { + return '/\b(?:' . $this->getSizePattern() . ')\s*([a-z0-9.-]+)\b/u'; + } + + /** + * @return string[] + */ + public function getSupportDiagnosticPatterns(): array + { + return [ + '/\bfehler\b/u', + '/\bfehlercode\b/u', + '/\berror\b/u', + '/\bstörung\b/u', + '/\bstoerung\b/u', + '/\balarm\b/u', + '/\bstörungsmeldung\b/u', + '/\bstoerungsmeldung\b/u', + '/\bmeldung\b/u', + '/\bwarnung\b/u', + '/\bwarncode\b/u', + '/\bcode\b/u', + '/\bwas bedeutet\b/u', + '/\bwarum\b/u', + '/\bblinkt\b/u', + '/\bzeigt\b/u', + '/\bzeigt an\b/u', + '/\bursache\b/u', + '/\bdiagnose\b/u', + '/\bservicefall\b/u', + '/\bproblem\b/u', + '/\bstörung beheben\b/u', + '/\bstoerung beheben\b/u', + '/\be\d{1,3}\b/u', + ]; + } + + /** + * @return string[] + */ + public function getExplicitCommerceIntentPatterns(): array + { + return [ + '/\bshop\b/u', + '/\bpreis\b/u', + '/\bkosten\b/u', + '/\bkostet\b/u', + '/\bkaufen\b/u', + '/\bbestellen\b/u', + '/\bprodukt\b/u', + '/\bartikel\b/u', + '/\bsku\b/u', + '/\bonline\b/u', + ]; + } + + public function getSkuLikePattern(): string + { + return '/\b\d{4,10}\b/u'; + } + + public function getPriceValuePattern(): string + { + return '/\b\d+(?:[.,]\d+)?\s*(?:' . $this->getPricePattern() . ')\b/u'; + } + + public function getSizeValuePattern(): string + { + return '/\b(?:' . $this->getSizePattern() . ')\s*[a-z0-9.-]+\b/u'; + } + + public function getSizeTokenValuePattern(): string + { + return '/\b(?:' . $this->getSizeTokenPattern() . ')\b/u'; + } + + public function getColorValuePattern(): string + { + return '/\b(?:' . $this->getColorPattern() . ')\b/u'; + } + + public function getSupportOrDiagnosticSignalLabel(): string + { + return 'support_or_diagnostic'; + } + + public function getSkuSignalLabel(): string + { + return 'sku'; + } + + public function getPriceSignalLabel(): string + { + return 'price'; + } + + public function getSizeSignalLabel(): string + { + return 'size'; + } + + public function getSizeTokenSignalLabel(): string + { + return 'size_token'; + } + + public function getColorSignalLabel(): string + { + return 'color'; + } + + public function getAdvisorySignalPrefix(): string + { + return 'advisory:'; + } + + public function getProductSearchMinScore(): int + { + return 3; + } + + public function getAdvisoryProductSearchMinScore(): int + { + return 2; + } + + public function getStrongSignalScore(): int + { + return 3; + } + + public function getSkuSignalScore(): int + { + return 2; + } + + public function getPriceSignalScore(): int + { + return 2; + } + + public function getSizeSignalScore(): int + { + return 2; + } + + public function getSizeTokenSignalScore(): int + { + return 1; + } + + public function getColorSignalScore(): int + { + return 1; + } + + public function getAdvisorySignalScore(): int + { + return 1; } } \ No newline at end of file diff --git a/src/Config/CommerceQueryParserConfig.php b/src/Config/CommerceQueryParserConfig.php index ecd7f62..5cd76aa 100644 --- a/src/Config/CommerceQueryParserConfig.php +++ b/src/Config/CommerceQueryParserConfig.php @@ -4,28 +4,18 @@ declare(strict_types=1); namespace App\Config; -final readonly class CommerceQueryParserConfig +final class CommerceQueryParserConfig { - /** - * @param string[] $knownBrands - * @param string[] $phrasesToRemove - * @param string[] $filterSearchTokensPattern - * @param string[] $referenceOnlyTokens - */ - public function __construct( - private array $knownBrands = [], - private array $phrasesToRemove = [], - private array $filterSearchTokensPattern = [], - private array $referenceOnlyTokens = [], - ) { - } - /** * @return string[] */ public function getKnownBrands(): array { - return $this->knownBrands; + return [ + 'heyl', + 'horiba', + 'neomeris', + ]; } /** @@ -33,62 +23,175 @@ final readonly class CommerceQueryParserConfig */ public function getPhrasesToRemove(): array { - return $this->phrasesToRemove; + return [ + 'ich suche', + 'suche', + 'habt ihr', + 'gibt es', + 'zeige mir', + 'welches gerät', + 'welche gerät', + 'welches modell', + 'welches ist besser', + 'welches ist am besten', + 'alternative', + 'alternativen', + ]; } public function getHistoryContextPattern(): string { - return 'auch|noch|nochmal|dazu|wie oben|wie zuvor|ähnlich|aehnlich|stattdessen|alternative|alternativ|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|und der preis|kosten|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte'; + return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt'; } - public function getReferenceFollowUpPattern(): string + public function getHistoryContextValuePattern(): string { - return 'preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte'; + return '/\b(' . $this->getHistoryContextPattern() . ')\b/u'; } /** * @return string[] */ + public function getFilterSearchTokens(): array + { + return [ + 'auch', + 'noch', + 'nochmal', + 'zusätzlich', + 'dazu', + 'davon', + 'stattdessen', + 'bitte', + 'gern', + 'gerne', + 'zeige', + 'zeig', + 'such', + 'suche', + 'finde', + 'find', + 'mir', + 'mal', + 'von', + ]; + } + + /** + * Backward-compatible alias for older callers. + * + * @return string[] + */ public function getFilterSearchTokensPattern(): array { - return $this->filterSearchTokensPattern; + return $this->getFilterSearchTokens(); } /** * @return string[] */ - public function getReferenceOnlyTokens(): array + public function getNormalizationSearch(): array { - if ($this->referenceOnlyTokens !== []) { - return $this->referenceOnlyTokens; - } + return ['€']; + } + /** + * @return string[] + */ + public function getNormalizationReplace(): array + { + return [' euro ']; + } + + public function getPromptSanitizePattern(): string + { + return '/[^\p{L}\p{N}\s.,\-]/u'; + } + + public function getWhitespaceCollapsePattern(): string + { + return '/\s+/u'; + } + + public function getWhitespaceSplitPattern(): string + { + return '/\s+/u'; + } + + public function getSearchTextTrimCharacters(): string + { + return " \t\n\r\0\x0B-.,"; + } + + public function getMinSearchTokenLength(): int + { + return 1; + } + + public function getMinDirectProductTokenLength(): int + { + return 1; + } + + public function getHistoryQuestionPattern(): string + { + return '/^Question:\s*(.+)$/m'; + } + + public function getPriceBetweenPattern(): string + { + return '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'; + } + + public function getPriceMaxPattern(): string + { + return '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'; + } + + public function getPriceMinPattern(): string + { + return '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'; + } + + /** + * @return string[] + */ + public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array + { return [ - 'preis', - 'preise', - 'kosten', - 'kostet', - 'gerät', - 'geraet', - 'modell', - 'produkt', - 'artikel', - 'dafür', - 'dafuer', - 'dazu', - 'davon', - 'verfügbarkeit', - 'verfuegbarkeit', - 'shop', - 'link', - 'zum', - 'zur', - 'das', - 'dieses', - 'den', - 'dem', - 'bitte', - 'und', + '/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', + '/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', + '/\b(?:' . $intentConfig->getPricePattern() . ')\b/u', ]; } + + public function getDirectProductDigitPattern(): string + { + return '/\d/u'; + } + + public function getDirectProductMaxTokens(): int + { + return 4; + } + + public function getModelLikePattern(): string + { + return '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u'; + } + + public function getAccessoryLikePattern(): string + { + return '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u'; + } + + public function buildExactTokenRemovalPattern(string $token): string + { + return '/\b' . preg_quote($token, '/') . '\b/u'; + } + + public function buildBrandPartOfModelPattern(string $brand): string + { + return '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u'; + } } \ No newline at end of file diff --git a/src/Config/NdjsonHybridRetrieverConfig.php b/src/Config/NdjsonHybridRetrieverConfig.php index 80129c5..dbab01b 100644 --- a/src/Config/NdjsonHybridRetrieverConfig.php +++ b/src/Config/NdjsonHybridRetrieverConfig.php @@ -34,7 +34,7 @@ final class NdjsonHybridRetrieverConfig * - the system now has more safeguards: * lexical cross-signals, scoped retrieval, title/meta boost, selection rules */ - public const VECTOR_SCORE_THRESHOLD = 0.82; + public const VECTOR_SCORE_THRESHOLD = 0.83; /** * Lower safety boundary for dynamic threshold adjustments. diff --git a/src/Config/PromptBuilderConfig.php b/src/Config/PromptBuilderConfig.php index 6c4bd88..72b93bd 100644 --- a/src/Config/PromptBuilderConfig.php +++ b/src/Config/PromptBuilderConfig.php @@ -1,97 +1,459 @@ getAccessoryCandidateTerms()) . ')\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu'; + } + + public function getAccessoryOrBundlePattern(): string + { + return '/\b(' . implode('|', $this->getAccessoryOrBundleTerms()) . ')\b/iu'; + } + + public function getModelLikePattern(): string + { + return '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u'; + } + + public function getSpecificityBoostPattern(): string + { + return '/\b(?:' . implode('|', $this->getSpecificityBoostTerms()) . ')\b/iu'; + } + + /** + * @return string[] + */ + public function getGenericCandidateTokens(): array + { + return [ + 'wasser', + 'messgerät', + 'messgeraet', + 'produkt', + 'geräte', + 'geraete', + 'gerät', + 'geraet', + 'resthärte', + 'resthaerte', + 'preis', + 'infos', + 'wissen', + ]; + } + + public function getSanitizeTrimCharacters(): string + { + return " \t\n\r\0\x0B\"'`.,;:-"; + } + + public function getContainsDigitPattern(): string + { + return '/\d/u'; + } + + public function getWhitespaceCollapsePattern(): string + { + return '/\s+/u'; + } + + public function getTokenizeCleanupPattern(): string + { + return '/[^\p{L}\p{N}\s\-]+/u'; + } + + public function getProductKeySeparator(): string + { + return '|'; + } + + public function getCandidateDigitScore(): int + { + return 4; + } + + public function getCandidateWordCountCap(): int + { + return 4; + } + + public function getSpecificityBoostScore(): int + { + return 3; + } + + public function getPrimaryQueryOverlapThreshold(): float + { + return 0.9; + } + + public function getPromptMatchWeight(): int + { + return 3; + } + + public function getPrimaryQueryMatchWeight(): int + { + return 2; + } + + public function getRepairSignalMatchWeight(): int + { + return 4; + } + + public function getPrimaryResultOrderBonus(): int + { + return 1; + } + + public function getTokenIntersectionScore(): int + { + return 2; + } + + public function getNumericTokenMatchScore(): int + { + return 4; + } + + /** + * @return string[] + */ + public function getAccessoryCandidateTerms(): array + { + return [ + 'indikator', + 'indicator', + 'reagenz', + 'reagent', + 'kit', + 'set', + ]; + } + + /** + * @return string[] + */ + public function getAccessoryOrBundleTerms(): array + { + return [ + 'passend', + 'passende', + 'zubehor', + 'zubehör', + 'dazu', + 'zusatz', + 'erganzung', + 'ergänzung', + 'indikator', + 'reagenz', + 'kit', + 'set', + 'auch\s+das', + 'mit\s+preis\s+und\s+allen\s+infos', + ]; + } + + /** + * @return string[] + */ + public function getSpecificityBoostTerms(): array + { + return [ + 'indikator', + 'indicator', + 'testomat', + 'tritromat', + 'titromat', + 'reagenz', + 'reagent', + ]; + } +} \ No newline at end of file diff --git a/src/Config/StopWordsConfig.php b/src/Config/StopWordsConfig.php new file mode 100644 index 0000000..c0797c4 --- /dev/null +++ b/src/Config/StopWordsConfig.php @@ -0,0 +1,40 @@ + self::NONE, - 'score' => 0, - 'signals' => [], - ]; + if ($prompt === '') { + return $this->buildDetectionResult( + intent: self::NONE, + score: 0, + signals: [] + ); } - // Block support / diagnostic questions from entering the commerce flow - // unless the prompt also contains very explicit purchase / shop intent. - if ($this->isSupportOrDiagnosticQuery($p) && !$this->hasExplicitCommerceIntent($p)) { - return [ - 'intent' => self::NONE, - 'score' => 0, - 'signals' => ['support_or_diagnostic'], - ]; + if ($this->isSupportOrDiagnosticQuery($prompt) && !$this->hasExplicitCommerceIntent($prompt)) { + return $this->buildDetectionResult( + intent: self::NONE, + score: 0, + signals: [$this->config->getSupportOrDiagnosticSignalLabel()] + ); } $score = 0; $signals = []; - $strongSignals = $this->config->getStrongSignalsList(); - - foreach ($strongSignals as $signal) { - if (str_contains($p, mb_strtolower($signal))) { - $score += 3; - $signals[] = $signal; - } - } - - // Treat long numeric identifiers as stronger product-number-like signals. - // This avoids over-triggering commerce purely because a model name contains - // a short number such as "808" in support questions. - if (preg_match('/\b\d{4,10}\b/u', $p) === 1) { - $score += 2; - $signals[] = 'sku'; - } - - $pricePattern = $this->config->getPricePattern(); - if (preg_match('/\b\d+(?:[.,]\d+)?\s*(' . $pricePattern . ')\b/u', $p) === 1) { - $score += 2; - $signals[] = 'price'; - } - - $sizePattern = $this->config->getSizePattern(); - if (preg_match('/\b(' . $sizePattern . ')\s*[a-z0-9.-]+\b/u', $p) === 1) { - $score += 2; - $signals[] = 'size'; - } - - $sizeTokenPattern = $this->config->getSizeTokenPattern(); - if (preg_match('/\b(' . $sizeTokenPattern . ')\b/u', $p) === 1) { - $score += 1; - $signals[] = 'size_token'; - } - - $colorPattern = $this->config->getColorPattern(); - if (preg_match('/\b(' . $colorPattern . ')\b/u', $p) === 1) { - $score += 1; - $signals[] = 'color'; - } - - $advisorySignals = $this->config->getAdvisorySignals(); - - foreach ($advisorySignals as $signal) { - if (str_contains($p, mb_strtolower($signal))) { - $score += 1; - $signals[] = 'advisory:' . $signal; - } - } + [$score, $signals] = $this->applyStrongSignals($prompt, $score, $signals); + [$score, $signals] = $this->applySkuSignal($prompt, $score, $signals); + [$score, $signals] = $this->applyPriceSignal($prompt, $score, $signals); + [$score, $signals] = $this->applySizeSignal($prompt, $score, $signals); + [$score, $signals] = $this->applySizeTokenSignal($prompt, $score, $signals); + [$score, $signals] = $this->applyColorSignal($prompt, $score, $signals); + [$score, $signals] = $this->applyAdvisorySignals($prompt, $score, $signals); $signals = array_values(array_unique($signals)); - if ($score >= 3) { - return [ - 'intent' => self::PRODUCT_SEARCH, - 'score' => $score, - 'signals' => $signals, - ]; + if ($score >= $this->config->getProductSearchMinScore()) { + return $this->buildDetectionResult( + intent: self::PRODUCT_SEARCH, + score: $score, + signals: $signals + ); } - if ($score >= 2) { - return [ - 'intent' => self::ADVISORY_PRODUCT_SEARCH, - 'score' => $score, - 'signals' => $signals, - ]; + if ($score >= $this->config->getAdvisoryProductSearchMinScore()) { + return $this->buildDetectionResult( + intent: self::ADVISORY_PRODUCT_SEARCH, + score: $score, + signals: $signals + ); } - return [ - 'intent' => self::NONE, - 'score' => $score, - 'signals' => $signals, - ]; + return $this->buildDetectionResult( + intent: self::NONE, + score: $score, + signals: $signals + ); } private function isSupportOrDiagnosticQuery(string $prompt): bool { - $patterns = [ - '/\bfehler\b/u', - '/\bfehlercode\b/u', - '/\berror\b/u', - '/\bstörung\b/u', - '/\bstoerung\b/u', - '/\balarm\b/u', - '/\bstörungsmeldung\b/u', - '/\bstoerungsmeldung\b/u', - '/\bmeldung\b/u', - '/\bwarnung\b/u', - '/\bwarncode\b/u', - '/\bcode\b/u', - '/\bwas bedeutet\b/u', - '/\bwarum\b/u', - '/\bblinkt\b/u', - '/\bzeigt\b/u', - '/\bzeigt an\b/u', - '/\bursache\b/u', - '/\bdiagnose\b/u', - '/\bservicefall\b/u', - '/\bproblem\b/u', - '/\bstörung beheben\b/u', - '/\bstoerung beheben\b/u', - '/\be\d{1,3}\b/u', - ]; - - foreach ($patterns as $pattern) { - if (preg_match($pattern, $prompt) === 1) { - return true; - } - } - - return false; + return $this->matchesAnyPattern($prompt, $this->config->getSupportDiagnosticPatterns()); } private function hasExplicitCommerceIntent(string $prompt): bool { - $patterns = [ - '/\bshop\b/u', - '/\bpreis\b/u', - '/\bkosten\b/u', - '/\bkostet\b/u', - '/\bkaufen\b/u', - '/\bbestellen\b/u', - '/\bprodukt\b/u', - '/\bartikel\b/u', - '/\bsku\b/u', - '/\bonline\b/u', - ]; + return $this->matchesAnyPattern($prompt, $this->config->getExplicitCommerceIntentPatterns()); + } + /** + * @param string[] $patterns + */ + private function matchesAnyPattern(string $prompt, array $patterns): bool + { foreach ($patterns as $pattern) { if (preg_match($pattern, $prompt) === 1) { return true; @@ -181,4 +99,119 @@ final class CommerceIntentLite return false; } + + /** + * @param string[] $signals + * @return array{0:int,1:string[]} + */ + private function applyStrongSignals(string $prompt, int $score, array $signals): array + { + foreach ($this->config->getStrongSignalsList() as $signal) { + if (str_contains($prompt, mb_strtolower($signal))) { + $score += $this->config->getStrongSignalScore(); + $signals[] = $signal; + } + } + + return [$score, $signals]; + } + + /** + * @param string[] $signals + * @return array{0:int,1:string[]} + */ + private function applySkuSignal(string $prompt, int $score, array $signals): array + { + if (preg_match($this->config->getSkuLikePattern(), $prompt) === 1) { + $score += $this->config->getSkuSignalScore(); + $signals[] = $this->config->getSkuSignalLabel(); + } + + return [$score, $signals]; + } + + /** + * @param string[] $signals + * @return array{0:int,1:string[]} + */ + private function applyPriceSignal(string $prompt, int $score, array $signals): array + { + if (preg_match($this->config->getPriceValuePattern(), $prompt) === 1) { + $score += $this->config->getPriceSignalScore(); + $signals[] = $this->config->getPriceSignalLabel(); + } + + return [$score, $signals]; + } + + /** + * @param string[] $signals + * @return array{0:int,1:string[]} + */ + private function applySizeSignal(string $prompt, int $score, array $signals): array + { + if (preg_match($this->config->getSizeValuePattern(), $prompt) === 1) { + $score += $this->config->getSizeSignalScore(); + $signals[] = $this->config->getSizeSignalLabel(); + } + + return [$score, $signals]; + } + + /** + * @param string[] $signals + * @return array{0:int,1:string[]} + */ + private function applySizeTokenSignal(string $prompt, int $score, array $signals): array + { + if (preg_match($this->config->getSizeTokenValuePattern(), $prompt) === 1) { + $score += $this->config->getSizeTokenSignalScore(); + $signals[] = $this->config->getSizeTokenSignalLabel(); + } + + return [$score, $signals]; + } + + /** + * @param string[] $signals + * @return array{0:int,1:string[]} + */ + private function applyColorSignal(string $prompt, int $score, array $signals): array + { + if (preg_match($this->config->getColorValuePattern(), $prompt) === 1) { + $score += $this->config->getColorSignalScore(); + $signals[] = $this->config->getColorSignalLabel(); + } + + return [$score, $signals]; + } + + /** + * @param string[] $signals + * @return array{0:int,1:string[]} + */ + private function applyAdvisorySignals(string $prompt, int $score, array $signals): array + { + foreach ($this->config->getAdvisorySignals() as $signal) { + if (str_contains($prompt, mb_strtolower($signal))) { + $score += $this->config->getAdvisorySignalScore(); + $signals[] = $this->config->getAdvisorySignalPrefix() . $signal; + } + } + + return [$score, $signals]; + } + + /** + * @param string[] $signals + * @return array{intent:string, score:int, signals:string[]} + */ + private function buildDetectionResult(string $intent, int $score, array $signals): array + { + return [ + 'intent' => $intent, + 'score' => $score, + 'signals' => $signals, + ]; + } } \ No newline at end of file diff --git a/src/Knowledge/Retrieval/NdjsonKeywordRetriever.php b/src/Knowledge/Retrieval/NdjsonKeywordRetriever.php index 1d4c16a..5d77a97 100644 --- a/src/Knowledge/Retrieval/NdjsonKeywordRetriever.php +++ b/src/Knowledge/Retrieval/NdjsonKeywordRetriever.php @@ -17,6 +17,7 @@ final readonly class NdjsonKeywordRetriever public function __construct( private string $projectDir, private LoggerInterface $agentLogger, + private StopWords $stopWords, ) { } @@ -170,7 +171,7 @@ final readonly class NdjsonKeywordRetriever return true; } - return StopWords::isStopWord($token); + return $this->stopWords->isStopWord($token); } private function normalizeText(string $value): string @@ -348,7 +349,7 @@ final readonly class NdjsonKeywordRetriever * token:string, * chunk_id:string, * document_id:string, - * chunk_index:?int, + * chunk_index $rows :?int, * tf:int, * title_tf:int, * df:int diff --git a/src/Knowledge/Retrieval/NdjsonLexicalIndexBuilder.php b/src/Knowledge/Retrieval/NdjsonLexicalIndexBuilder.php index c83f1f7..910c348 100644 --- a/src/Knowledge/Retrieval/NdjsonLexicalIndexBuilder.php +++ b/src/Knowledge/Retrieval/NdjsonLexicalIndexBuilder.php @@ -1,6 +1,5 @@ stopWords->isStopWord($token); } private function normalizeText(string $value): string diff --git a/src/Knowledge/Retrieval/QueryCleaner.php b/src/Knowledge/Retrieval/QueryCleaner.php index dbb465c..584cd1b 100644 --- a/src/Knowledge/Retrieval/QueryCleaner.php +++ b/src/Knowledge/Retrieval/QueryCleaner.php @@ -6,8 +6,13 @@ namespace App\Knowledge\Retrieval; use App\Knowledge\StopWords; -final class QueryCleaner +final readonly class QueryCleaner { + public function __construct( + private StopWords $stopWords + ) { + } + /** * Cleans a query strictly for retrieval purposes. * @@ -66,7 +71,7 @@ final class QueryCleaner } // Remove stop words - if (StopWords::isStopWord($token)) { + if ($this->stopWords->isStopWord($token)) { continue; } diff --git a/src/Knowledge/StopWords.php b/src/Knowledge/StopWords.php index 0017588..e1d9ae1 100644 --- a/src/Knowledge/StopWords.php +++ b/src/Knowledge/StopWords.php @@ -4,62 +4,25 @@ declare(strict_types=1); namespace App\Knowledge; -final class StopWords +use App\Config\StopWordsConfig; + +final readonly class StopWords { - /** - * Retrieval-optimierte Stopwortliste (Deutsch). - * - * WICHTIG: - * - Keine Negationen entfernen - * - Keine Fragewörter entfernen - * - Keine fachlichen Begriffe entfernen - * - Nur echte Füll- und Strukturwörter - */ - private const STOP_WORDS = [ - - 'mit', - // Artikel - 'der', 'die', 'das', - 'ein', 'eine', 'einer', 'eines', - 'den', 'dem', 'des', - - // Konjunktionen - 'und', 'oder', 'aber', 'sowie', - - // Schwache Pronomen - 'ich', 'du', 'er', 'sie', 'es', - 'wir', 'ihr', - - // Füllwörter - 'halt', 'eben', 'auch', 'schon', - 'noch', 'mal', 'bitte', 'danke', - - // Strukturwörter - 'also', 'nun', 'tja', - 'dann', 'danach', 'davor', - 'hier', 'dort', - - // Zeit-Füller (kontextarm) - 'heute', 'gestern', 'morgen', - - // Höflichkeits-/Modalformen - 'könnte', 'kannst', 'kann', - 'würde', 'würdest', 'würden', - ]; - - /** - * Gibt die vollständige Stopwortliste zurück. - */ - public static function getStopWords(): array - { - return self::STOP_WORDS; + public function __construct( + private StopWordsConfig $config + ) { } /** - * Prüft, ob ein Wort ein Stopwort ist. + * @return string[] */ - public static function isStopWord(string $word): bool + public function getStopWords(): array { - return in_array($word, self::STOP_WORDS, true); + return $this->config->getStopWords(); + } + + public function isStopWord(string $word): bool + { + return in_array($word, $this->config->getStopWords(), true); } } \ No newline at end of file