move intent an config value into config files

This commit is contained in:
team2
2026-04-23 21:49:54 +02:00
parent 87417febf4
commit fce44e971d
17 changed files with 1937 additions and 1133 deletions

View File

@@ -120,12 +120,7 @@ services:
App\Commerce\CommerceQueryParser: ~
App\Commerce\SearchRepairService:
arguments:
$logger: '@monolog.logger.agent'
$enabled: '%mto.commerce.search_repair.enabled%'
$maxRepairQueries: '%mto.commerce.search_repair.max_queries%'
$minPrimaryResultsWithoutRepair: '%mto.commerce.search_repair.min_primary_results_without_repair%'
App\Commerce\SearchRepairService: ~
App\Shopware\ShopwareCriteriaBuilder: ~

View File

@@ -4,9 +4,6 @@ declare(strict_types=1);
namespace App\Agent;
use App\Commerce\CommerceReferenceResolver;
use App\Commerce\CommerceReferenceStore;
use App\Commerce\Dto\CommerceReferenceContext;
use App\Commerce\SearchRepairService;
use App\Commerce\ShopSearchService;
use App\Config\AgentRunnerConfig;
@@ -21,8 +18,6 @@ use Throwable;
final readonly class AgentRunner
{
private const COMMERCE_HISTORY_BUDGET_CHARS = 1000;
private bool $systemMsgOn;
public function __construct(
@@ -33,8 +28,6 @@ final readonly class AgentRunner
private RetrieverInterface $retriever,
private ShopSearchService $shopSearchService,
private SearchRepairService $searchRepairService,
private CommerceReferenceStore $commerceReferenceStore,
private CommerceReferenceResolver $commerceReferenceResolver,
private CommerceIntentLite $commerceIntentLite,
private OllamaClient $ollamaClient,
private LoggerInterface $agentLogger,
@@ -51,14 +44,13 @@ final readonly class AgentRunner
$prompt = trim($prompt);
if ($prompt === '') {
yield $this->systemMsg('❌ Empty prompt.', 'err');
yield $this->systemMsg($this->agentRunnerConfig->getEmptyPromptMessage(), 'err');
return;
}
$shopResults = [];
$primaryShopResults = [];
$factSources = [];
$contextSignals = [];
$sources = [];
$optimizedShopQuery = '';
$shopSearchQuery = '';
$commerceIntent = CommerceIntentLite::NONE;
@@ -66,8 +58,6 @@ final readonly class AgentRunner
$attemptedShopRepair = false;
$usedShopRepair = false;
$shopRepairQueries = [];
$activeCommerceReference = null;
$shopChecked = false;
$this->agentLogger->info('Agent run started', [
'userId' => $userId,
@@ -79,74 +69,39 @@ final readonly class AgentRunner
// Additional context strategies can be added here later.
}
yield $this->systemMsg('Ich analysiere deine Anfrage...', 'think');
yield $this->systemMsg('Ich prüfe auf Internetquellen...', 'think');
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeRequestMessage(), 'think');
yield $this->systemMsg($this->agentRunnerConfig->getCheckInternetSourcesMessage(), 'think');
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
if ($urlContent !== '') {
$this->addBadge($factSources, 'Externe URL');
$this->addSource($sources, $this->agentRunnerConfig->getExternalUrlSourceLabel());
}
yield $this->systemMsg('Ich hole relevante Daten aus meinem RAG-Wissen...', 'think');
yield $this->systemMsg($this->agentRunnerConfig->getRetrieveKnowledgeMessage(), 'think');
$knowledgeChunks = $this->retriever->retrieve($prompt);
if ($knowledgeChunks !== []) {
$this->addBadge($factSources, 'RAG Wissen');
$this->addSource($sources, $this->agentRunnerConfig->getRagKnowledgeSourceLabel());
}
$commerceIntent = $this->detectCommerceIntent($prompt);
if ($this->isCommerceIntent($commerceIntent)) {
yield $this->systemMsg('Ich optimiere die Recherche...', 'think');
yield $this->systemMsg($this->agentRunnerConfig->getOptimizeSearchMessage(), 'think');
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId);
$activeCommerceReference = $this->loadCommerceReference($userId);
if ($commerceHistoryContext !== '') {
$this->addBadge($contextSignals, 'Gesprächskontext');
$this->addSource($sources, $this->agentRunnerConfig->getConversationHistorySourceLabel());
}
if ($activeCommerceReference !== null) {
$this->addBadge($contextSignals, 'Commerce-Referenz');
}
$isReferenceOnlyFollowUp = $this->isReferenceOnlyCommerceFollowUp(
$prompt,
$activeCommerceReference
);
if ($isReferenceOnlyFollowUp) {
$shopSearchQuery = $this->buildDeterministicReferenceShopQuery($activeCommerceReference);
if ($shopSearchQuery !== '') {
$this->addBadge($contextSignals, 'Deterministische Referenzsuche');
}
$this->agentLogger->info('Using deterministic reference shop query', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'prompt' => $prompt,
'shopSearchQuery' => $shopSearchQuery,
'referenceProductName' => $activeCommerceReference?->productName,
'referenceFocusTerms' => $activeCommerceReference?->focusTerms,
]);
} else {
$optimizedShopQuery = $this->buildOptimizedShopQuery(
$prompt,
$userId,
$commerceHistoryContext
);
if ($optimizedShopQuery !== '' && $optimizedShopQuery !== $prompt) {
$this->addBadge($contextSignals, 'Query-Optimierung');
}
$shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt;
}
if ($shopSearchQuery === '') {
$shopSearchQuery = $prompt;
}
$this->agentLogger->info('Commerce search prepared', [
'userId' => $userId,
@@ -154,26 +109,20 @@ final readonly class AgentRunner
'usedOptimizedShopQuery' => $optimizedShopQuery !== '',
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'usedDeterministicReferenceQuery' => $isReferenceOnlyFollowUp,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'hasActiveCommerceReference' => $activeCommerceReference !== null,
'activeCommerceReferenceProduct' => $activeCommerceReference?->productName,
]);
yield $this->systemMsg(
'Ich rufe Recherchedaten ab (type: ' . $commerceIntent . ')',
sprintf($this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), $commerceIntent),
'think'
);
$shopChecked = true;
$primaryShopResults = $this->searchShop(
$shopSearchQuery,
$commerceIntent,
$userId,
$commerceHistoryContext,
$activeCommerceReference
$commerceHistoryContext
);
$repairPayload = $this->repairShopResults(
@@ -192,13 +141,11 @@ final readonly class AgentRunner
$shopRepairQueries = $repairPayload['repairQueries'];
if ($shopResults !== []) {
$this->addBadge($factSources, 'Shopsystem');
} elseif ($shopChecked) {
$this->addBadge($factSources, 'Shopsystem geprüft');
$this->addSource($sources, $this->agentRunnerConfig->getShopSystemSourceLabel());
}
if ($attemptedShopRepair) {
$this->addBadge($contextSignals, 'Erweiterte Shopsuche');
$this->addSource($sources, $this->agentRunnerConfig->getExtendedShopSearchSourceLabel());
}
}
@@ -206,7 +153,7 @@ final readonly class AgentRunner
$knowledgeChunks = $this->limitKnowledgeChunks($knowledgeChunks, $commerceIntent);
}
yield $this->systemMsg('Ich analysiere alle Informationen...', 'think');
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeAllInformationMessage(), 'think');
$finalPrompt = $this->promptBuilder->build(
prompt: $prompt,
@@ -226,7 +173,6 @@ final readonly class AgentRunner
'shopSearchQuery' => $shopSearchQuery,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'shopChecked' => $shopChecked,
'attemptedShopRepair' => $attemptedShopRepair,
'usedShopRepair' => $usedShopRepair,
'shopRepairQueries' => $shopRepairQueries,
@@ -243,21 +189,19 @@ final readonly class AgentRunner
]);
}
if ($factSources !== [] || $contextSignals !== []) {
yield $this->emitSourceSummary(
$factSources,
$contextSignals,
'Genutzte Datenpfade'
if ($sources !== []) {
yield $this->emitSources(
$sources,
$this->agentRunnerConfig->getUsedSourcesPrefix()
);
}
$fullOutput = yield from $this->streamFinalAnswer($finalPrompt);
if ($factSources !== [] || $contextSignals !== []) {
yield $this->emitSourceSummary(
$factSources,
$contextSignals,
'Quellen und Signale'
if ($sources !== []) {
yield $this->emitSources(
$sources,
$this->agentRunnerConfig->getSourcesPrefix()
);
}
@@ -266,11 +210,10 @@ final readonly class AgentRunner
}
if ($fullOutput !== '') {
$this->persistConversationState(
userId: $userId,
prompt: $prompt,
fullOutput: $fullOutput,
shopResults: $shopResults
$this->contextService->appendHistory(
$userId,
$prompt,
$fullOutput
);
}
@@ -281,7 +224,6 @@ final readonly class AgentRunner
'commerceIntent' => $commerceIntent,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'shopChecked' => $shopChecked,
'attemptedShopRepair' => $attemptedShopRepair,
'usedShopRepair' => $usedShopRepair,
'shopRepairQueries' => $shopRepairQueries,
@@ -292,8 +234,6 @@ final readonly class AgentRunner
'shopSearchQuery' => $shopSearchQuery,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'hasActiveCommerceReference' => $activeCommerceReference !== null,
'activeCommerceReferenceProduct' => $activeCommerceReference?->productName,
]);
} catch (Throwable $e) {
$this->agentLogger->error('Agent run failed', [
@@ -361,42 +301,6 @@ final readonly class AgentRunner
return $this->sanitizeOptimizedShopQuery($optimizedQuery);
}
private function isReferenceOnlyCommerceFollowUp(
string $prompt,
?CommerceReferenceContext $referenceContext
): bool {
if ($referenceContext === null) {
return false;
}
$normalizedPrompt = mb_strtolower(trim($prompt), 'UTF-8');
$normalizedPrompt = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt;
$normalizedPrompt = preg_replace('/\s+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt;
$normalizedPrompt = trim($normalizedPrompt);
if ($normalizedPrompt === '') {
return false;
}
if (preg_match('/\b(testomat|lab|evo|eco|calc|thcl|808|2000)\b/u', $normalizedPrompt) === 1) {
return false;
}
return preg_match(
'/\b(preis|preise|kosten|kostet|dazu|dafuer|dafür|davon|was kostet das|verfuegbarkeit|verfügbarkeit|shop|link)\b/u',
$normalizedPrompt
) === 1;
}
private function buildDeterministicReferenceShopQuery(?CommerceReferenceContext $referenceContext): string
{
if ($referenceContext === null) {
return '';
}
return trim($referenceContext->buildReferenceSearchText());
}
/**
* @return array{
* results: array,
@@ -445,15 +349,13 @@ final readonly class AgentRunner
string $query,
string $commerceIntent,
string $userId,
string $commerceHistoryContext = '',
?CommerceReferenceContext $referenceContext = null
string $commerceHistoryContext = ''
): array {
try {
return $this->shopSearchService->search(
$query,
$commerceIntent,
$commerceHistoryContext,
$referenceContext
$commerceHistoryContext
);
} catch (Throwable $e) {
$this->agentLogger->warning('Shop search failed, continuing without shop results', [
@@ -462,8 +364,6 @@ final readonly class AgentRunner
'query' => $query,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'hasReferenceContext' => $referenceContext !== null,
'referenceProductName' => $referenceContext?->productName,
'exception' => $e,
]);
@@ -475,73 +375,23 @@ final readonly class AgentRunner
{
return $this->contextService->buildUserContextWithinBudget(
$userId,
self::COMMERCE_HISTORY_BUDGET_CHARS
);
}
private function loadCommerceReference(string $userId): ?CommerceReferenceContext
{
try {
return $this->commerceReferenceStore->load($userId);
} catch (Throwable $e) {
$this->agentLogger->warning('Failed to load commerce reference context', [
'userId' => $userId,
'exception' => $e,
]);
return null;
}
}
/**
* @param array<int, mixed> $shopResults
*/
private function storeCommerceReference(string $userId, string $prompt, string $answer, array $shopResults): void
{
try {
$referenceContext = $this->commerceReferenceResolver->resolveFromCommerceTurn(
$prompt,
$answer,
$shopResults
);
if ($referenceContext === null) {
return;
}
$this->commerceReferenceStore->save($userId, $referenceContext);
} catch (Throwable $e) {
$this->agentLogger->warning('Failed to persist commerce reference context', [
'userId' => $userId,
'exception' => $e,
]);
}
}
/**
* @param array<int, mixed> $shopResults
*/
private function persistConversationState(
string $userId,
string $prompt,
string $fullOutput,
array $shopResults
): void {
$this->contextService->appendHistory($userId, $prompt, $fullOutput);
$this->storeCommerceReference(
userId: $userId,
prompt: $prompt,
answer: $fullOutput,
shopResults: $shopResults
$this->agentRunnerConfig->getCommerceHistoryBudgetChars()
);
}
private function limitKnowledgeChunks(array $knowledgeChunks, string $commerceIntent): array
{
return match ($commerceIntent) {
CommerceIntentLite::PRODUCT_SEARCH => array_slice($knowledgeChunks, 0, 2),
CommerceIntentLite::ADVISORY_PRODUCT_SEARCH => array_slice($knowledgeChunks, 0, 3),
CommerceIntentLite::PRODUCT_SEARCH => array_slice(
$knowledgeChunks,
0,
$this->agentRunnerConfig->getProductSearchKnowledgeChunkLimit()
),
CommerceIntentLite::ADVISORY_PRODUCT_SEARCH => array_slice(
$knowledgeChunks,
0,
$this->agentRunnerConfig->getAdvisoryProductSearchKnowledgeChunkLimit()
),
default => $knowledgeChunks,
};
}
@@ -555,8 +405,8 @@ final readonly class AgentRunner
}
$query = preg_split('/\R+/u', $query, 2)[0] ?? $query;
$query = preg_replace('/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu', '', $query) ?? $query;
$query = trim($query, " \t\n\r\0\x0B\"'`");
$query = preg_replace($this->agentRunnerConfig->getOptimizedShopQueryPrefixPattern(), '', $query) ?? $query;
$query = trim($query, $this->agentRunnerConfig->getOptimizedShopQueryTrimCharacters());
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
return trim($query);
@@ -582,7 +432,7 @@ final readonly class AgentRunner
if ($cleanToken === '') {
if ($firstThinkLoop) {
yield $this->systemMsg('Denke nach...', 'think');
yield $this->systemMsg($this->agentRunnerConfig->getThinkingWhileStreamingMessage(), 'think');
$firstThinkLoop = false;
}
@@ -601,60 +451,46 @@ final readonly class AgentRunner
if ($finalChunk !== null) {
yield $this->systemMsg($finalChunk, 'answer');
} elseif ($fullOutput === '') {
yield $this->systemMsg('❌ Es wurden keine Daten vom LLM empfangen.', 'err');
yield $this->systemMsg($this->agentRunnerConfig->getNoLlmDataReceivedMessage(), 'err');
}
return $fullOutput;
}
/**
* @param string[] $factSources
* @param string[] $contextSignals
* @param string[] $sources
*/
private function emitSourceSummary(array $factSources, array $contextSignals, string $label): string
private function emitSources(array $sources, string $prefix): string
{
$parts = [];
if ($factSources !== []) {
$parts[] = 'Fakten: ' . implode(' ', $factSources);
}
if ($contextSignals !== []) {
$parts[] = 'Kontext: ' . implode(' ', $contextSignals);
}
return $this->systemMsg(
$label . ': ' . implode(' &nbsp;&nbsp; ', $parts),
'info'
);
return $this->systemMsg($prefix . implode(' ', $sources), 'info');
}
/**
* @param string[] $target
* @param string[] $sources
*/
private function addBadge(array &$target, string $label): void
private function addSource(array &$sources, string $label): void
{
$badge = $this->badge($label);
if (!in_array($badge, $target, true)) {
$target[] = $badge;
if (!in_array($badge, $sources, true)) {
$sources[] = $badge;
}
}
private function buildUserErrorMessage(Throwable $e): string
{
if (!$this->debug) {
return '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.';
return $this->agentRunnerConfig->getGenericInternalErrorMessage();
}
return '❌ Interner Fehler: '
return $this->agentRunnerConfig->getDebugInternalErrorPrefix()
. htmlspecialchars($e->getMessage(), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
}
private function badge(string $label): string
{
return sprintf(
'<span class="badge bg-info text-black">%s</span>',
$this->agentRunnerConfig->getSourceBadgeHtmlTemplate(),
htmlspecialchars($label, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
);
}
@@ -667,10 +503,13 @@ final readonly class AgentRunner
return match ($type) {
'answer' => $msg,
'err' => '<span class="text-danger">' . $msg . "</span>\n<hr>\n",
'think' => '<span class="text-info think">' . $msg . "</span>\n",
'info' => "\n\n<span class=\"text-info fw-bolder\">" . $msg . "</span>\n",
'debug' => "\n\nDEBUG: <code>" . htmlspecialchars($msg, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8') . "</code>\n",
'err' => sprintf($this->agentRunnerConfig->getErrorHtmlTemplate(), $msg),
'think' => sprintf($this->agentRunnerConfig->getThinkHtmlTemplate(), $msg),
'info' => sprintf($this->agentRunnerConfig->getInfoHtmlTemplate(), $msg),
'debug' => sprintf(
$this->agentRunnerConfig->getDebugHtmlTemplate(),
htmlspecialchars($msg, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
),
default => $msg,
};
}

View File

@@ -18,6 +18,7 @@ final readonly class PromptBuilder
private ContextService $contextService,
private SystemPromptRepository $systemPromptRepository,
private ModelGenerationConfigProvider $modelGenerationConfigProvider,
private PromptBuilderConfig $config,
) {
}
@@ -31,7 +32,6 @@ final readonly class PromptBuilder
* @param ShopProductResult[] $shopResults
* @param bool|null $fullContext
* @param string|null $swagFullOutPut
* @return string
*/
public function build(
string $prompt,
@@ -48,23 +48,21 @@ final readonly class PromptBuilder
$hasShopResults = $shopResults !== [];
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
$isPriceDrivenQuestion = $this->isLikelyPriceDrivenQuestion($prompt);
$asksForAccessoryOrBundle = $this->asksForAccessoryOrBundle($prompt);
$systemBlock = $this->buildSystemBlock();
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults, $isPriceDrivenQuestion);
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults);
$responseFormatBlock = $this->buildResponseFormatBlock(
$prompt,
$hasShopResults,
$isTechnicalProductQuestion,
$isPriceDrivenQuestion
hasShopResults: $hasShopResults,
isTechnicalProductQuestion: $isTechnicalProductQuestion,
asksForAccessoryOrBundle: $asksForAccessoryOrBundle
);
$knowledgeBlock = $this->buildKnowledgeBlock(
$knowledgeChunks,
$urlContent,
$prompt,
$hasShopResults,
$isPriceDrivenQuestion
knowledgeChunks: $knowledgeChunks,
urlContent: $urlContent,
hasShopResults: $hasShopResults,
isTechnicalProductQuestion: $isTechnicalProductQuestion
);
$userBlock = $this->buildUserBlock($prompt);
@@ -106,12 +104,12 @@ final readonly class PromptBuilder
$activeSystemPrompt = str_replace('{% now %}', $now, $activePrompt->getContent());
return "SYSTEM:\n" . $this->normalizeBlockText($activeSystemPrompt);
return $this->config->getSystemSectionLabel() . ":\n" . $this->normalizeBlockText($activeSystemPrompt);
}
private function buildUserBlock(string $prompt): string
{
return "USER QUESTION:\n" . $prompt;
return $this->config->getUserQuestionSectionLabel() . ":\n" . $prompt;
}
/**
@@ -146,12 +144,11 @@ final readonly class PromptBuilder
return '';
}
return
"CONVERSATION CONTEXT (contextual only):\n" .
"The following messages are previous turns of this conversation.\n" .
"Use them to resolve references, follow-up questions, and user intent.\n" .
"They must not override retrieved factual knowledge or live shop data.\n\n" .
$history;
return $this->implodeBlocks([
$this->config->getConversationContextSectionLabel() . ':',
$this->implodeLines($this->config->getConversationContextIntroLines()),
$history,
]);
}
/**
@@ -165,10 +162,11 @@ final readonly class PromptBuilder
$parts = [];
if ($swagFullOutPut !== null && $swagFullOutPut !== '') {
$parts[] =
"SHOP SEARCH QUERY:\n" .
$swagFullOutPut . "\n" .
"Source: Shop Search";
$parts[] = $this->implodeBlocks([
$this->config->getShopSearchQuerySectionLabel() . ':',
$swagFullOutPut,
$this->config->getShopSearchQuerySourceLine(),
]);
}
$normalizedShopResults = array_values(array_filter(
@@ -181,77 +179,33 @@ final readonly class PromptBuilder
}
$totalCount = count($normalizedShopResults);
$limitedShopResults = array_slice($normalizedShopResults, 0, PromptBuilderConfig::MAX_SHOP_RESULTS_IN_PROMPT);
$isDetailed = count($limitedShopResults) <= 5;
$limitedShopResults = array_slice($normalizedShopResults, 0, $this->config->getMaxShopResultsInPrompt());
$isDetailed = count($limitedShopResults) <= $this->config->getDetailedShopResultsMaxCount();
$lines = [];
foreach ($limitedShopResults as $i => $product) {
$n = $i + 1;
$entryParts = [
"[{$n}] " . $this->normalizeBlockText($product->name),
];
if ($product->productNumber) {
$entryParts[] = "Product number: " . $this->normalizeBlockText($product->productNumber);
}
if ($product->manufacturer) {
$entryParts[] = "Manufacturer: " . $this->normalizeBlockText($product->manufacturer);
}
if ($product->price) {
$entryParts[] = "Price: " . $this->normalizeBlockText($product->price);
}
if ($product->available !== null) {
$entryParts[] = "Available: " . ($product->available ? 'yes' : 'no');
}
foreach ($product->highlights as $highlight) {
$highlight = $this->normalizeBlockText((string) $highlight);
if ($highlight !== '') {
$entryParts[] = "- " . $highlight;
}
}
if ($product->url) {
$entryParts[] = "URL: " . $this->normalizeBlockText($product->url);
}
if ($product->productImage) {
$entryParts[] = "Product image: " . $this->normalizeBlockText($product->productImage);
}
if ($isDetailed && $product->description) {
$entryParts[] = "Description: " . $this->normalizeBlockText($product->description);
}
if ($product->customFields) {
$entryParts[] = "Meta information: " . $this->normalizeBlockText($product->customFields);
}
$lines[] = implode("\n", $entryParts);
$lines[] = $this->buildShopProductEntry(
product: $product,
index: $i + 1,
isDetailed: $isDetailed
);
}
if ($lines !== []) {
$header =
"LIVE SHOP RESULTS (authoritative for current commercial details):\n" .
"Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.\n" .
"If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.\n" .
"Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" .
"Do not infer undocumented technical specifications from shop data.\n" .
"Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.\n" .
"Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.\n" .
"If shop results only contain accessories, reagents, indicators, or consumables, do not conclude that no matching main device exists unless the sources explicitly support that conclusion.\n" .
"If the user asks for price filtering, use the numeric prices in these live shop results as the decisive source for filtering.";
$headerLines = $this->config->getLiveShopResultsHeaderLines();
if ($totalCount > count($limitedShopResults)) {
$header .= "\n" .
"Only the top " . count($limitedShopResults) . " ranked shop results are shown here out of {$totalCount} total results.";
$headerLines[] = sprintf(
$this->config->getLiveShopResultsOverflowNoticeTemplate(),
count($limitedShopResults),
$totalCount
);
}
$parts[] = $header . "\n\n" . implode("\n\n", $lines);
$parts[] = $this->implodeBlocks([
$this->implodeLines($headerLines),
implode("\n\n", $lines),
]);
}
return $this->implodeBlocks($parts);
@@ -260,89 +214,60 @@ final readonly class PromptBuilder
/**
* Build a small priority block that tells the model what to surface first.
*/
private function buildOutputPriorityBlock(bool $hasShopResults, bool $isPriceDrivenQuestion): string
private function buildOutputPriorityBlock(bool $hasShopResults): string
{
if (!$hasShopResults) {
return '';
}
if ($isPriceDrivenQuestion) {
return
"OUTPUT PRIORITY:\n" .
"For price-driven questions, evaluate shop results first for numeric price filtering.\n" .
"Use retrieved knowledge afterwards only to add technical context or explain missing commercial coverage.\n" .
"Do not let accessory-only shop results prove that no matching device exists unless the sources explicitly support that conclusion.\n";
}
return
"OUTPUT PRIORITY:\n" .
"Use retrieved knowledge first to determine the technically matching product or answer.\n" .
"If shop results are present, use them afterwards to add current price, availability, and the actual URL.\n" .
"Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n";
return $this->buildRuleBlock(
$this->config->getOutputPrioritySectionLabel(),
$this->config->getOutputPriorityRules()
);
}
private function buildResponseFormatBlock(
string $prompt,
bool $hasShopResults,
bool $isTechnicalProductQuestion,
bool $isPriceDrivenQuestion
bool $asksForAccessoryOrBundle
): string {
$rules = [
"RESPONSE FORMAT RULES:",
"- Keep normal spacing between all words. Never fuse words together.",
"- Use short, clean paragraphs or short labeled sections.",
"- Do not use persuasive or promotional wording.",
"- Do not repeat the same fact in slightly different wording.",
"- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.",
"- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.",
"- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.",
"- Do not combine technical identity from one source with commercial fields from a different product.",
"- Product number, price, availability, and URL must belong to the same explicitly grounded product.",
];
$rules = $this->config->getResponseFormatBaseRules();
if ($hasShopResults) {
$rules[] = "- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts.";
$rules[] = "- Keep price, availability, and URL on separate lines when they are present.";
$rules[] = "- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.";
$rules[] = "- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.";
$rules[] = "- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.";
$rules[] = "- If the question includes a price threshold, filter using only explicit numeric shop prices.";
$rules[] = "- Do not say that no device exists above a threshold merely because only cheaper accessories were found in the shop results.";
$rules = array_merge($rules, $this->config->getResponseFormatWithShopRules());
} else {
$rules[] = "- If no shop results are present, do not compensate by inventing external products or external manufacturers.";
$rules = array_merge($rules, $this->config->getResponseFormatWithoutShopRules());
}
if ($isTechnicalProductQuestion) {
$rules[] = "- Write like technical documentation: precise, neutral, and source-close.";
$rules[] = "- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.";
$rules[] = "- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.";
$rules = array_merge($rules, $this->config->getResponseFormatTechnicalRules());
}
if ($isPriceDrivenQuestion) {
$rules[] = "- For price-driven questions, answer the threshold result first.";
$rules[] = "- If no grounded shop product fulfills the threshold, say that clearly.";
$rules[] = "- Then optionally explain whether retrieved knowledge mentions relevant devices that are not commercially listed in the current shop results.";
if ($asksForAccessoryOrBundle) {
$rules = array_merge($rules, $this->config->getResponseFormatAccessoryRules());
}
if ($this->asksForAccessoryOrBundle($prompt)) {
$rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.";
$rules[] = "- The main device must come first. The accessory must not replace the main device.";
$rules[] = "- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.";
$rules[] = "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.";
}
return implode("\n", $rules);
return $this->buildRuleBlock(
$this->config->getResponseFormatSectionLabel(),
$rules
);
}
/**
* Build the knowledge block.
*
* Retrieved knowledge remains the main source for technical matching and explanation.
* Shop data is preferred for current commercial fields.
*
* @param string[] $knowledgeChunks
*/
private function buildKnowledgeBlock(
array $knowledgeChunks,
string $urlContent,
string $prompt,
bool $hasShopResults,
bool $isPriceDrivenQuestion
bool $isTechnicalProductQuestion
): string {
$knowledgeParts = [];
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
if ($knowledgeChunks !== []) {
$lines = [];
@@ -359,56 +284,71 @@ final readonly class PromptBuilder
}
if ($lines !== []) {
$parts = [
"LANGUAGE RULES:\n" .
implode("\n", $this->buildLanguageRules()),
"FACT GROUNDING RULES:\n" .
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults, $isPriceDrivenQuestion)),
"RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" .
"Source: Documents\n" .
$knowledgeParts[] = $this->implodeBlocks([
$this->buildRuleBlock(
$this->config->getLanguageRulesSectionLabel(),
$this->config->getLanguageRules()
),
$this->buildRuleBlock(
$this->config->getFactGroundingRulesSectionLabel(),
$this->buildFactGroundingRules(
hasShopResults: $hasShopResults,
isTechnicalProductQuestion: $isTechnicalProductQuestion
)
),
$this->implodeBlocks([
$this->config->getRetrievedKnowledgeSectionLabel() . ':',
$this->config->getRetrievedKnowledgeSourceLine(),
implode("\n\n", $lines),
];
$knowledgeParts[] = implode("\n\n", $parts);
]),
]);
}
}
if ($urlContent !== '') {
$knowledgeParts[] =
"CONTENT FROM URL (authoritative if user-provided):\n" .
"Source: URL\n" .
$urlContent;
$knowledgeParts[] = $this->implodeBlocks([
$this->config->getUrlContentSectionLabel() . ':',
$this->config->getUrlContentSourceLine(),
$urlContent,
]);
}
return $this->implodeBlocks($knowledgeParts);
}
/**
* Resolve how many characters may still be used by history.
*
* The active model num_ctx is converted into a conservative prompt budget.
* Shop, knowledge and user question are fixed priority blocks.
* History only receives the remaining space.
*/
private function resolveHistoryBudgetChars(string $fixedPrompt): int
{
$numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx();
$outputReserveTokens = $this->clamp(
(int) floor($numCtx * PromptBuilderConfig::OUTPUT_RESERVE_RATIO),
PromptBuilderConfig::OUTPUT_RESERVE_MIN_TOKENS,
PromptBuilderConfig::OUTPUT_RESERVE_MAX_TOKENS
(int) floor($numCtx * $this->config->getOutputReserveRatio()),
$this->config->getOutputReserveMinTokens(),
$this->config->getOutputReserveMaxTokens()
);
$safetyReserveTokens = $this->clamp(
(int) floor($numCtx * PromptBuilderConfig::SAFETY_RESERVE_RATIO),
PromptBuilderConfig::SAFETY_RESERVE_MIN_TOKENS,
PromptBuilderConfig::SAFETY_RESERVE_MAX_TOKENS
(int) floor($numCtx * $this->config->getSafetyReserveRatio()),
$this->config->getSafetyReserveMinTokens(),
$this->config->getSafetyReserveMaxTokens()
);
$promptBudgetTokens = max(
PromptBuilderConfig::MIN_PROMPT_BUDGET_TOKENS,
$this->config->getMinPromptBudgetTokens(),
$numCtx - $outputReserveTokens - $safetyReserveTokens
);
$promptBudgetChars = $promptBudgetTokens * PromptBuilderConfig::CHARS_PER_TOKEN;
$promptBudgetChars = $promptBudgetTokens * $this->config->getCharsPerToken();
$remaining = $promptBudgetChars
- mb_strlen($fixedPrompt)
- PromptBuilderConfig::HISTORY_PADDING_CHARS;
- $this->config->getHistoryPaddingChars();
return max(0, $remaining);
}
@@ -416,87 +356,118 @@ final readonly class PromptBuilder
/**
* @return string[]
*/
private function buildLanguageRules(): array
private function buildFactGroundingRules(bool $hasShopResults, bool $isTechnicalProductQuestion): array
{
return [
"- Answer only in the same language as the user question.",
"- All headings, labels, notes, and structural elements must be in the same language as the user question.",
"- Do not switch languages unless the user does.",
"- If headings are used, write them in the user's language.",
];
}
/**
* @return string[]
*/
private function buildFactGroundingRules(
bool $isTechnicalProductQuestion,
bool $hasShopResults,
bool $isPriceDrivenQuestion
): array {
$rules = [
"- State only facts that are explicitly present in the provided sources.",
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
"- Do not invent missing values.",
"- Do not replace missing values with estimates, defaults, or typical industry assumptions.",
"- Do not claim that information is missing if it appears in the provided sources.",
"- Do not compare with other products unless those products are also present in the provided sources.",
"- Prefer source-faithful wording over persuasive wording.",
"- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', 'entscheidend', 'optimal', 'kosteneffizient', 'prozesssicher', or 'state-of-the-art'.",
"- Clearly separate explicit facts from inferences.",
"- If a conclusion goes beyond the source wording, label it exactly as 'Inference:'.",
"- If a sentence cannot be traced to the provided sources, do not write it.",
"- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.",
"- If the sources do not identify a suitable product, do not invent one.",
];
$rules = $this->config->getFactGroundingBaseRules();
if ($hasShopResults) {
$rules = array_merge($rules, [
"- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.",
"- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.",
"- When shop results are present and relevant, include current price and the actual URL if available.",
"- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.",
"- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.",
"- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.",
"- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.",
"- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.",
"- If the shop match is ambiguous, keep the technical identification and commercial details separate.",
]);
if ($isPriceDrivenQuestion) {
$rules[] = "- For price-threshold questions, shop prices are authoritative for the threshold check.";
$rules[] = "- Accessory-only shop hits do not prove that no qualifying device exists.";
}
$rules = array_merge($rules, $this->config->getFactGroundingWithShopRules());
} else {
$rules[] = "- Use retrieved knowledge as authoritative for factual answers.";
$rules[] = "- If no shop results are present, do not compensate with external recommendations or external product suggestions.";
$rules = array_merge($rules, $this->config->getFactGroundingWithoutShopRules());
}
if ($isTechnicalProductQuestion) {
$rules = array_merge($rules, [
"- For technical product questions, answer primarily with explicitly stated facts.",
"- Behave like a technical documentation assistant, not like a sales advisor.",
"- Keep interpretations minimal and do not generalize application areas beyond the provided sources.",
"- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.",
"- Do not translate technical facts into business value unless the source explicitly does so.",
"- Do not recommend process changes unless explicitly present in the source.",
"- Do not use persuasive summaries or advisory conclusions.",
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.",
"- Use neutral engineering language.",
"- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.",
"- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.",
"- If the source lists application areas, repeat only those areas and do not broaden them.",
"- If the source names an indicator and threshold, reproduce that exactly without extrapolation.",
"- If the source states only a threshold function, do not expand it into broader control logic.",
"- If a detail is not explicitly stated in the provided sources, say so plainly.",
"- Prefer short, source-close sentences over explanatory expansion.",
"- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.",
]);
$rules = array_merge($rules, $this->config->getFactGroundingTechnicalRules());
}
return $rules;
}
private function buildShopProductEntry(ShopProductResult $product, int $index, bool $isDetailed): string
{
$entryParts = [
"[{$index}] " . $this->normalizeBlockText($product->name),
];
if ($product->productNumber) {
$entryParts[] = $this->config->getShopProductNumberLabel() . ': '
. $this->normalizeBlockText($product->productNumber);
}
if ($product->manufacturer) {
$entryParts[] = $this->config->getShopManufacturerLabel() . ': '
. $this->normalizeBlockText($product->manufacturer);
}
if ($product->price) {
$entryParts[] = $this->config->getShopPriceLabel() . ': '
. $this->normalizeBlockText($product->price);
}
if ($product->available !== null) {
$entryParts[] = $this->config->getShopAvailabilityLabel() . ': '
. ($product->available
? $this->config->getShopAvailabilityYesLabel()
: $this->config->getShopAvailabilityNoLabel());
}
foreach ($product->highlights as $highlight) {
$highlight = $this->normalizeBlockText((string) $highlight);
if ($highlight !== '') {
$entryParts[] = $this->config->getShopHighlightPrefix() . $highlight;
}
}
if ($product->url) {
$entryParts[] = $this->config->getShopUrlLabel() . ': '
. $this->normalizeBlockText($product->url);
}
if ($product->productImage) {
$entryParts[] = $this->config->getShopProductImageLabel() . ': '
. $this->normalizeBlockText($product->productImage);
}
if ($isDetailed && $product->description) {
$entryParts[] = $this->config->getShopDescriptionLabel() . ': '
. $this->normalizeBlockText($product->description);
}
if ($product->customFields) {
$entryParts[] = $this->config->getShopMetaInformationLabel() . ': '
. $this->normalizeBlockText($product->customFields);
}
return implode("\n", $entryParts);
}
/**
* @param string[] $rules
*/
private function buildRuleBlock(string $sectionLabel, array $rules): string
{
$normalizedRules = array_values(array_filter(
array_map(
fn(string $rule): string => $this->normalizeBlockText($rule),
$rules
),
static fn(string $rule): bool => $rule !== ''
));
if ($normalizedRules === []) {
return '';
}
return $sectionLabel . ":\n" . implode("\n", $normalizedRules);
}
/**
* @param string[] $lines
*/
private function implodeLines(array $lines): string
{
$normalizedLines = array_values(array_filter(
array_map(
fn(string $line): string => $this->normalizeBlockText($line),
$lines
),
static fn(string $line): bool => $line !== ''
));
return implode("\n", $normalizedLines);
}
private function implodeBlocks(array $blocks): string
{
$filtered = array_values(array_filter(
@@ -537,41 +508,26 @@ final readonly class PromptBuilder
private function isLikelyTechnicalProductQuestion(string $prompt): bool
{
$normalized = mb_strtolower($prompt, 'UTF-8');
$matches = 0;
foreach (PromptBuilderConfig::TECHNICAL_PRODUCT_KEYWORDS as $keyword) {
foreach ($this->config->getTechnicalProductKeywords() as $keyword) {
if (str_contains($normalized, $keyword)) {
$matches++;
}
}
if ($matches >= 2) {
if ($matches >= $this->config->getTechnicalProductKeywordMatchThreshold()) {
return true;
}
return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1;
}
private function isLikelyPriceDrivenQuestion(string $prompt): bool
{
$normalized = mb_strtolower($prompt, 'UTF-8');
if (preg_match('/\b(mehr\s+als|über|ueber|größer\s+als|groesser\s+als|unter|bis|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*(?:euro|eur|€)\b/u', $normalized) === 1) {
return true;
}
return str_contains($normalized, 'preis')
|| str_contains($normalized, 'preise')
|| str_contains($normalized, 'kosten')
|| str_contains($normalized, 'kostet');
return preg_match($this->config->getTechnicalProductModelPattern(), $prompt) === 1;
}
private function asksForAccessoryOrBundle(string $prompt): bool
{
$normalized = mb_strtolower($prompt, 'UTF-8');
foreach (PromptBuilderConfig::ACCESSORY_REQUEST_KEYWORDS as $keyword) {
foreach ($this->config->getAccessoryRequestKeywords() as $keyword) {
if (str_contains($normalized, $keyword)) {
return true;
}

View File

@@ -4,7 +4,6 @@ declare(strict_types=1);
namespace App\Commerce;
use App\Commerce\Dto\CommerceReferenceContext;
use App\Commerce\Dto\CommerceSearchQuery;
use App\Config\CommerceIntentConfig;
use App\Config\CommerceQueryParserConfig;
@@ -24,12 +23,10 @@ final readonly class CommerceQueryParser
public function parse(
string $originalPrompt,
string $intent,
string $historyContext = '',
?CommerceReferenceContext $referenceContext = null
string $historyContext = ''
): CommerceSearchQuery {
$normalizedPrompt = $this->normalize($originalPrompt);
$isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt);
$isReferenceOnlyFollowUp = $this->isReferenceOnlyFollowUp($normalizedPrompt);
[$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt);
$sizes = $this->extractSizes($normalizedPrompt);
@@ -47,58 +44,23 @@ final readonly class CommerceQueryParser
if (
!$isDirectProductQuery
&& $historyContext !== ''
&& $this->shouldUseHistoryContext($normalizedPrompt, $searchText)
&& $this->shouldUseHistoryContext($normalizedPrompt)
) {
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
$historyParse = $this->parseHistoryContext($historyContext);
if ($latestHistoryQuestion !== '') {
$normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion);
$isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt);
[$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt);
$historySizes = $this->extractSizes($normalizedHistoryPrompt);
$historyBrand = $this->extractBrand($normalizedHistoryPrompt);
$historySearchText = $this->buildSearchText(
prompt: $normalizedHistoryPrompt,
sizes: $historySizes,
brand: $historyBrand,
priceMin: $historyPriceMin,
priceMax: $historyPriceMax,
preserveDirectProductQuery: $isDirectHistoryProductQuery
if ($historyParse !== null) {
$searchText = $this->mergeSearchTexts(
$historyParse['searchText'],
$searchText
);
$searchText = $this->mergeSearchTexts($historySearchText, $searchText);
if (($brand === null || $brand === '') && $historyBrand !== null && $historyBrand !== '') {
$brand = $historyBrand;
if (($brand === null || $brand === '') && $historyParse['brand'] !== null && $historyParse['brand'] !== '') {
$brand = $historyParse['brand'];
}
}
}
if (
!$isDirectProductQuery
&& $referenceContext !== null
&& $this->shouldUseReferenceContext($normalizedPrompt, $searchText)
) {
$referenceSearchText = $this->buildReferenceSearchText($referenceContext);
if ($isReferenceOnlyFollowUp || $this->isTooGenericSearchText($searchText)) {
$searchText = $referenceSearchText !== '' ? $referenceSearchText : $searchText;
} else {
$searchText = $this->mergeSearchTexts($referenceSearchText, $searchText);
}
if (($brand === null || $brand === '') && $referenceContext->manufacturer !== null) {
$normalizedManufacturer = $this->normalize($referenceContext->manufacturer);
if ($normalizedManufacturer !== '') {
$brand = $normalizedManufacturer;
}
}
}
$finalSearchText = trim($searchText !== '' ? $searchText : $normalizedPrompt);
$finalSearchText = $searchText !== '' ? $searchText : $normalizedPrompt;
return new CommerceSearchQuery(
originalPrompt: $originalPrompt,
@@ -118,10 +80,14 @@ final readonly class CommerceQueryParser
{
$value = $this->textNormalizer->normalize($prompt);
$value = $this->queryCleaner->clean($value);
$value = mb_strtolower(trim($value), 'UTF-8');
$value = str_replace(['€'], ' euro ', $value);
$value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value;
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
$value = mb_strtolower(trim($value));
$value = str_replace(
$this->config->getNormalizationSearch(),
$this->config->getNormalizationReplace(),
$value
);
$value = preg_replace($this->config->getPromptSanitizePattern(), ' ', $value) ?? $value;
$value = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $value) ?? $value;
return trim($value);
}
@@ -134,32 +100,21 @@ final readonly class CommerceQueryParser
$priceMin = null;
$priceMax = null;
if (preg_match('/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
$a = $this->toFloat($m[1]);
$b = $this->toFloat($m[2]);
if (preg_match($this->config->getPriceBetweenPattern(), $prompt, $matches) === 1) {
$a = $this->toFloat($matches[1]);
$b = $this->toFloat($matches[2]);
if ($a !== null && $b !== null) {
return [min($a, $b), max($a, $b)];
}
}
if (preg_match('/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
$priceMax = $this->toFloat($m[1]);
if (preg_match($this->config->getPriceMaxPattern(), $prompt, $matches) === 1) {
$priceMax = $this->toFloat($matches[1]);
}
if (preg_match('/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
$priceMin = $this->toFloat($m[1]);
}
// NEW:
// Recognize comparative lower-bound phrasing such as:
// - mehr als 3000 euro
// - über 3000 euro
// - ueber 3000 euro
// - größer als 3000 euro
// - groesser als 3000 euro
if (preg_match('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
$priceMin = $this->toFloat($m[1]);
if (preg_match($this->config->getPriceMinPattern(), $prompt, $matches) === 1) {
$priceMin = $this->toFloat($matches[1]);
}
return [$priceMin, $priceMax];
@@ -172,8 +127,7 @@ final readonly class CommerceQueryParser
{
$sizes = [];
$sizePattern = $this->intentConfig->getSizePattern();
if (preg_match_all('/\b(?:' . $sizePattern . ')\s*([a-z0-9.-]+)\b/u', $prompt, $matches) === false) {
if (preg_match_all($this->intentConfig->getSizeExtractionPattern(), $prompt, $matches) === false) {
return [];
}
@@ -181,8 +135,7 @@ final readonly class CommerceQueryParser
$sizes[] = trim($size);
}
$sizeTokenPattern = $this->intentConfig->getSizeTokenPattern();
if (preg_match_all('/\b(' . $sizeTokenPattern . ')\b/u', $prompt, $tokenMatches) !== false) {
if (preg_match_all($this->intentConfig->getSizeTokenValuePattern(), $prompt, $tokenMatches) !== false) {
foreach ($tokenMatches[1] as $sizeToken) {
$sizes[] = trim($sizeToken);
}
@@ -207,6 +160,9 @@ final readonly class CommerceQueryParser
return null;
}
/**
* @param string[] $sizes
*/
private function buildSearchText(
string $prompt,
array $sizes,
@@ -219,7 +175,7 @@ final readonly class CommerceQueryParser
return $this->buildDirectProductSearchText($prompt);
}
$text = ' ' . $prompt . ' ';
$text = $this->wrapForPhraseReplacement($prompt);
foreach ($this->config->getPhrasesToRemove() as $phrase) {
$normalizedPhrase = $this->normalize((string) $phrase);
@@ -228,7 +184,11 @@ final readonly class CommerceQueryParser
continue;
}
$text = str_replace(' ' . $normalizedPhrase . ' ', ' ', $text);
$text = str_replace(
$this->wrapForPhraseReplacement($normalizedPhrase),
' ',
$text
);
}
foreach ($sizes as $size) {
@@ -238,111 +198,69 @@ final readonly class CommerceQueryParser
continue;
}
$text = preg_replace('/\b' . preg_quote($normalizedSize, '/') . '\b/u', ' ', $text) ?? $text;
$text = preg_replace(
$this->config->buildExactTokenRemovalPattern($normalizedSize),
' ',
$text
) ?? $text;
}
if ($brand !== null && $brand !== '' && !$this->isBrandPartOfModelPhrase($prompt, $brand)) {
$text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text;
$text = preg_replace(
$this->config->buildExactTokenRemovalPattern($brand),
' ',
$text
) ?? $text;
}
if ($priceMin !== null || $priceMax !== null) {
$text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
$text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
$text = preg_replace('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
$text = preg_replace('/\b' . $this->intentConfig->getPricePattern() . '\b/u', ' ', $text) ?? $text;
foreach ($this->config->getPriceRemovalPatterns($this->intentConfig) as $pattern) {
$text = preg_replace($pattern, ' ', $text) ?? $text;
}
}
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
$text = trim($text, " \t\n\r\0\x0B-.,");
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
$text = trim($text, $this->config->getSearchTextTrimCharacters());
$tokens = array_filter(
explode(' ', $text),
static fn(string $token): bool => mb_strlen($token) > 1
fn(string $token): bool => mb_strlen($token) > $this->config->getMinSearchTokenLength()
);
$tokens = $this->filterSearchTokens($tokens);
$tokens = $this->stripReferenceOnlyTokens($tokens);
return trim(implode(' ', $tokens));
}
private function buildDirectProductSearchText(string $prompt): string
{
$text = preg_replace('/\s+/u', ' ', $prompt) ?? $prompt;
$text = trim($text, " \t\n\r\0\x0B-.,");
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $prompt) ?? $prompt;
$text = trim($text, $this->config->getSearchTextTrimCharacters());
$tokens = array_filter(
explode(' ', $text),
static fn(string $token): bool => mb_strlen($token) > 0
fn(string $token): bool => mb_strlen($token) >= $this->config->getMinDirectProductTokenLength()
);
return trim(implode(' ', array_values(array_unique($tokens))));
$tokens = array_values(array_unique($tokens));
return trim(implode(' ', $tokens));
}
private function shouldUseHistoryContext(string $prompt, string $searchText): bool
private function shouldUseHistoryContext(string $prompt): bool
{
if ($this->isReferenceOnlyFollowUp($prompt)) {
return true;
}
if ($this->isTooGenericSearchText($searchText)) {
return true;
}
return preg_match('/\b(' . $this->config->getHistoryContextPattern() . ')\b/u', $prompt) === 1;
}
private function shouldUseReferenceContext(string $prompt, string $searchText): bool
{
if ($this->isReferenceOnlyFollowUp($prompt)) {
return true;
}
return $this->isTooGenericSearchText($searchText);
}
private function isReferenceOnlyFollowUp(string $prompt): bool
{
return preg_match('/\b(' . $this->config->getReferenceFollowUpPattern() . ')\b/u', $prompt) === 1;
}
private function isTooGenericSearchText(string $searchText): bool
{
$tokens = array_values(array_filter(
preg_split('/\s+/u', $searchText, -1, PREG_SPLIT_NO_EMPTY) ?: [],
static fn(string $token): bool => $token !== ''
));
if ($tokens === []) {
return true;
}
$genericTokens = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
foreach ($tokens as $token) {
if (!isset($genericTokens[$token])) {
return false;
}
}
return true;
}
private function buildReferenceSearchText(CommerceReferenceContext $referenceContext): string
{
return $this->normalize($referenceContext->buildReferenceSearchText());
return preg_match($this->config->getHistoryContextValuePattern(), $prompt) === 1;
}
private function extractLatestQuestionFromHistory(string $historyContext): string
{
$result = preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches);
$result = preg_match_all($this->config->getHistoryQuestionPattern(), $historyContext, $matches);
if ($result === false) {
return '';
}
$questions = $matches[1] ?? [];
if ($questions === []) {
return '';
}
@@ -352,11 +270,11 @@ final readonly class CommerceQueryParser
return is_string($lastQuestion) ? trim($lastQuestion) : '';
}
private function mergeSearchTexts(string $left, string $right): string
private function mergeSearchTexts(string $historySearchText, string $currentSearchText): string
{
$tokens = [];
foreach ([$left, $right] as $text) {
foreach ([$historySearchText, $currentSearchText] as $text) {
if ($text === '') {
continue;
}
@@ -364,7 +282,7 @@ final readonly class CommerceQueryParser
foreach (explode(' ', $text) as $token) {
$token = trim($token);
if ($token === '' || mb_strlen($token) <= 1) {
if ($token === '' || mb_strlen($token) <= $this->config->getMinSearchTokenLength()) {
continue;
}
@@ -381,25 +299,11 @@ final readonly class CommerceQueryParser
*/
private function filterSearchTokens(array $tokens): array
{
$stopWords = array_fill_keys($this->config->getFilterSearchTokensPattern(), true);
$stopWords = $this->config->getFilterSearchTokens();
return array_values(array_filter(
$tokens,
static fn(string $token): bool => !isset($stopWords[$token])
));
}
/**
* @param string[] $tokens
* @return string[]
*/
private function stripReferenceOnlyTokens(array $tokens): array
{
$referenceOnly = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
return array_values(array_filter(
$tokens,
static fn(string $token): bool => !isset($referenceOnly[$token])
static fn(string $token): bool => !in_array($token, $stopWords, true)
));
}
@@ -417,25 +321,25 @@ final readonly class CommerceQueryParser
return true;
}
$tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: [];
$tokens = preg_split(
$this->config->getWhitespaceSplitPattern(),
$prompt,
-1,
PREG_SPLIT_NO_EMPTY
) ?: [];
return count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1;
return count($tokens) <= $this->config->getDirectProductMaxTokens()
&& preg_match($this->config->getDirectProductDigitPattern(), $prompt) === 1;
}
private function containsModelLikePhrase(string $text): bool
{
return preg_match(
'/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u',
$text
) === 1;
return preg_match($this->config->getModelLikePattern(), $text) === 1;
}
private function containsAccessoryLikePhrase(string $text): bool
{
return preg_match(
'/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u',
$text
) === 1;
return preg_match($this->config->getAccessoryLikePattern(), $text) === 1;
}
private function isBrandPartOfModelPhrase(string $prompt, string $brand): bool
@@ -445,7 +349,7 @@ final readonly class CommerceQueryParser
}
return preg_match(
'/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u',
$this->config->buildBrandPartOfModelPattern($brand),
$prompt
) === 1;
}
@@ -456,4 +360,42 @@ final readonly class CommerceQueryParser
return is_numeric($value) ? (float) $value : null;
}
/**
* @return array{searchText:string, brand:?string}|null
*/
private function parseHistoryContext(string $historyContext): ?array
{
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
if ($latestHistoryQuestion === '') {
return null;
}
$normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion);
$isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt);
[$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt);
$historySizes = $this->extractSizes($normalizedHistoryPrompt);
$historyBrand = $this->extractBrand($normalizedHistoryPrompt);
$historySearchText = $this->buildSearchText(
prompt: $normalizedHistoryPrompt,
sizes: $historySizes,
brand: $historyBrand,
priceMin: $historyPriceMin,
priceMax: $historyPriceMax,
preserveDirectProductQuery: $isDirectHistoryProductQuery
);
return [
'searchText' => $historySearchText,
'brand' => $historyBrand,
];
}
private function wrapForPhraseReplacement(string $text): string
{
return ' ' . $text . ' ';
}
}

View File

@@ -5,16 +5,15 @@ declare(strict_types=1);
namespace App\Commerce;
use App\Commerce\Dto\ShopProductResult;
use App\Config\SearchRepairConfig;
use Psr\Log\LoggerInterface;
final readonly class SearchRepairService
{
public function __construct(
private ShopSearchService $shopSearchService,
private SearchRepairConfig $config,
private LoggerInterface $logger,
private bool $enabled = true,
private int $maxRepairQueries = 3,
private int $minPrimaryResultsWithoutRepair = 2,
) {
}
@@ -37,22 +36,22 @@ final readonly class SearchRepairService
array $primaryShopResults,
array $knowledgeChunks
): array {
if (!$this->enabled) {
return [
'results' => $primaryShopResults,
'attemptedRepair' => false,
'usedRepair' => false,
'repairQueries' => [],
];
if (!$this->config->isEnabled()) {
return $this->buildRepairResult(
results: $primaryShopResults,
attemptedRepair: false,
usedRepair: false,
repairQueries: []
);
}
if (!$this->shouldAttemptRepair($prompt, $primaryQuery, $primaryShopResults)) {
return [
'results' => $primaryShopResults,
'attemptedRepair' => false,
'usedRepair' => false,
'repairQueries' => [],
];
return $this->buildRepairResult(
results: $primaryShopResults,
attemptedRepair: false,
usedRepair: false,
repairQueries: []
);
}
$repairQueries = $this->buildRepairQueries(
@@ -63,12 +62,12 @@ final readonly class SearchRepairService
);
if ($repairQueries === []) {
return [
'results' => $primaryShopResults,
'attemptedRepair' => false,
'usedRepair' => false,
'repairQueries' => [],
];
return $this->buildRepairResult(
results: $primaryShopResults,
attemptedRepair: false,
usedRepair: false,
repairQueries: []
);
}
$this->logger->info('Shop repair started', [
@@ -99,12 +98,12 @@ final readonly class SearchRepairService
'repairQueries' => $repairQueries,
]);
return [
'results' => $primaryShopResults,
'attemptedRepair' => true,
'usedRepair' => false,
'repairQueries' => $repairQueries,
];
return $this->buildRepairResult(
results: $primaryShopResults,
attemptedRepair: true,
usedRepair: false,
repairQueries: $repairQueries
);
}
$mergedResults = $this->rankMergedResults(
@@ -129,16 +128,16 @@ final readonly class SearchRepairService
'manufacturer' => $product->manufacturer,
'available' => $product->available,
],
array_slice($mergedResults, 0, 3)
array_slice($mergedResults, 0, $this->config->getTopProductLogLimit())
),
]);
return [
'results' => $mergedResults,
'attemptedRepair' => true,
'usedRepair' => true,
'repairQueries' => $repairQueries,
];
return $this->buildRepairResult(
results: $mergedResults,
attemptedRepair: true,
usedRepair: true,
repairQueries: $repairQueries
);
}
/**
@@ -157,15 +156,11 @@ final readonly class SearchRepairService
return true;
}
// Always try repair for bundle/accessory prompts.
// These prompts often need a second pass even when the first search
// already returned some results, because the user is asking for a
// combination of main device + matching accessory.
if ($asksForBundle) {
return true;
}
if ($primaryResultsCount >= $this->minPrimaryResultsWithoutRepair) {
if ($primaryResultsCount >= $this->config->getMinPrimaryResultsWithoutRepair()) {
return false;
}
@@ -173,7 +168,7 @@ final readonly class SearchRepairService
return false;
}
return $primaryResultsCount < $this->minPrimaryResultsWithoutRepair;
return $primaryResultsCount < $this->config->getMinPrimaryResultsWithoutRepair();
}
/**
@@ -230,7 +225,7 @@ final readonly class SearchRepairService
fn(string $query): bool => $query !== '' && !$this->isTooCloseToPrimaryQuery($query, $primaryQuery)
));
return array_slice($queries, 0, max(1, $this->maxRepairQueries));
return array_slice($queries, 0, max(1, $this->config->getMaxRepairQueries()));
}
/**
@@ -291,7 +286,7 @@ final readonly class SearchRepairService
$candidates = [];
preg_match_all(
'/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u',
$this->config->getModelCandidatePattern(),
$text,
$matches
);
@@ -321,7 +316,7 @@ final readonly class SearchRepairService
$candidates = [];
preg_match_all(
'/\b((?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu',
$this->config->getAccessoryCandidatePattern(),
$text,
$matches
);
@@ -368,15 +363,15 @@ final readonly class SearchRepairService
{
$score = 0;
if (preg_match('/\d/u', $candidate) === 1) {
$score += 4;
if (preg_match($this->config->getContainsDigitPattern(), $candidate) === 1) {
$score += $this->config->getCandidateDigitScore();
}
$wordCount = count($this->tokenize($candidate));
$score += min($wordCount, 4);
$score += min($wordCount, $this->config->getCandidateWordCountCap());
if (preg_match('/\b(?:indikator|indicator|testomat|tritromat|titromat|reagenz|reagent)\b/iu', $candidate) === 1) {
$score += 3;
if (preg_match($this->config->getSpecificityBoostPattern(), $candidate) === 1) {
$score += $this->config->getSpecificityBoostScore();
}
return $score;
@@ -384,39 +379,19 @@ final readonly class SearchRepairService
private function asksForBundleOrAccessory(string $prompt): bool
{
return preg_match(
'/\b(passend|passende|zubehor|zubehör|dazu|zusatz|erganzung|ergänzung|indikator|reagenz|kit|set|auch\s+das|mit\s+preis\s+und\s+allen\s+infos)\b/iu',
$prompt
) === 1;
return preg_match($this->config->getAccessoryOrBundlePattern(), $prompt) === 1;
}
private function containsModelLikePhrase(string $text): bool
{
return preg_match(
'/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u',
$text
) === 1;
return preg_match($this->config->getModelLikePattern(), $text) === 1;
}
private function looksTooGeneric(string $candidate): bool
{
$normalized = mb_strtolower($candidate);
foreach ([
'wasser',
'messgerät',
'messgeraet',
'produkt',
'geräte',
'geraete',
'gerät',
'geraet',
'resthärte',
'resthaerte',
'preis',
'infos',
'wissen',
] as $genericToken) {
foreach ($this->config->getGenericCandidateTokens() as $genericToken) {
if ($normalized === $genericToken) {
return true;
}
@@ -428,8 +403,8 @@ final readonly class SearchRepairService
private function sanitizeQuery(string $query): string
{
$query = trim($query);
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
$query = trim($query, " \t\n\r\0\x0B\"'`.,;:-");
$query = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $query) ?? $query;
$query = trim($query, $this->config->getSanitizeTrimCharacters());
return trim($query);
}
@@ -446,7 +421,7 @@ final readonly class SearchRepairService
$intersection = array_intersect($candidateTokens, $primaryTokens);
$overlapRatio = count($intersection) / max(count($candidateTokens), count($primaryTokens));
return $overlapRatio >= 0.9;
return $overlapRatio >= $this->config->getPrimaryQueryOverlapThreshold();
}
/**
@@ -497,12 +472,12 @@ final readonly class SearchRepairService
foreach ($allResults as $index => $product) {
$score = 0;
$score += $this->scoreProductAgainstText($product, $prompt) * 3;
$score += $this->scoreProductAgainstText($product, $primaryQuery) * 2;
$score += $this->scoreProductAgainstText($product, $repairSignal) * 4;
$score += $this->scoreProductAgainstText($product, $prompt) * $this->config->getPromptMatchWeight();
$score += $this->scoreProductAgainstText($product, $primaryQuery) * $this->config->getPrimaryQueryMatchWeight();
$score += $this->scoreProductAgainstText($product, $repairSignal) * $this->config->getRepairSignalMatchWeight();
if ($index < count($primaryResults)) {
$score += 1;
$score += $this->config->getPrimaryResultOrderBonus();
}
$decorated[] = [
@@ -549,11 +524,11 @@ final readonly class SearchRepairService
$score = 0;
$intersection = array_intersect($queryTokens, $productTokens);
$score += count($intersection) * 2;
$score += count($intersection) * $this->config->getTokenIntersectionScore();
foreach ($this->extractNumberTokens($queryTokens) as $numberToken) {
if (in_array($numberToken, $productTokens, true)) {
$score += 4;
$score += $this->config->getNumericTokenMatchScore();
}
}
@@ -562,7 +537,7 @@ final readonly class SearchRepairService
private function buildProductKey(ShopProductResult $product): string
{
return mb_strtolower(trim(implode('|', [
return mb_strtolower(trim(implode($this->config->getProductKeySeparator(), [
$product->id,
$product->productNumber ?? '',
$product->name,
@@ -576,8 +551,8 @@ final readonly class SearchRepairService
private function tokenize(string $text): array
{
$text = mb_strtolower($text);
$text = preg_replace('/[^\p{L}\p{N}\s\-]+/u', ' ', $text) ?? $text;
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
$text = preg_replace($this->config->getTokenizeCleanupPattern(), ' ', $text) ?? $text;
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
$text = trim($text);
if ($text === '') {
@@ -595,7 +570,31 @@ final readonly class SearchRepairService
{
return array_values(array_filter(
$tokens,
static fn(string $token): bool => preg_match('/\d/u', $token) === 1
fn(string $token): bool => preg_match($this->config->getContainsDigitPattern(), $token) === 1
));
}
/**
* @param ShopProductResult[] $results
* @param string[] $repairQueries
* @return array{
* results: ShopProductResult[],
* attemptedRepair: bool,
* usedRepair: bool,
* repairQueries: string[]
* }
*/
private function buildRepairResult(
array $results,
bool $attemptedRepair,
bool $usedRepair,
array $repairQueries
): array {
return [
'results' => $results,
'attemptedRepair' => $attemptedRepair,
'usedRepair' => $usedRepair,
'repairQueries' => $repairQueries,
];
}
}

View File

@@ -6,50 +6,249 @@ namespace App\Config;
final class AgentRunnerConfig
{
public function getCommerceHistoryBudgetChars(): int
{
return 1000;
}
public function getProductSearchKnowledgeChunkLimit(): int
{
return 2;
}
public function getAdvisoryProductSearchKnowledgeChunkLimit(): int
{
return 3;
}
public function getOptimizedShopQueryPrefixPattern(): string
{
return '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu';
}
public function getOptimizedShopQueryTrimCharacters(): string
{
return " \t\n\r\0\x0B\"'`";
}
public function getEmptyPromptMessage(): string
{
return '❌ Empty prompt.';
}
public function getAnalyzeRequestMessage(): string
{
return 'Ich analysiere deine Anfrage...';
}
public function getCheckInternetSourcesMessage(): string
{
return 'Ich prüfe auf Internetquellen...';
}
public function getRetrieveKnowledgeMessage(): string
{
return 'Ich hole relevante Daten aus meinem RAG-Wissen...';
}
public function getOptimizeSearchMessage(): string
{
return 'Ich optimiere die Recherche...';
}
public function getFetchSearchDataMessageTemplate(): string
{
return 'Ich rufe Recherchedaten ab (type: %s)';
}
public function getAnalyzeAllInformationMessage(): string
{
return 'Ich analysiere alle Informationen...';
}
public function getThinkingWhileStreamingMessage(): string
{
return 'Denke nach...';
}
public function getNoLlmDataReceivedMessage(): string
{
return '❌ Es wurden keine Daten vom LLM empfangen.';
}
public function getGenericInternalErrorMessage(): string
{
return '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.';
}
public function getDebugInternalErrorPrefix(): string
{
return '❌ Interner Fehler: ';
}
public function getExternalUrlSourceLabel(): string
{
return 'Externe URL';
}
public function getRagKnowledgeSourceLabel(): string
{
return 'RAG Wissen';
}
public function getConversationHistorySourceLabel(): string
{
return 'Chatverlauf';
}
public function getShopSystemSourceLabel(): string
{
return 'Shopsystem';
}
public function getExtendedShopSearchSourceLabel(): string
{
return 'Erweiterte Shopsuche';
}
public function getUsedSourcesPrefix(): string
{
return 'Genutzte Quellen: ';
}
public function getSourcesPrefix(): string
{
return 'Quellen: ';
}
public function getSourceBadgeHtmlTemplate(): string
{
return '<span class="badge bg-info text-black">%s</span>';
}
public function getErrorHtmlTemplate(): string
{
return '<span class="text-danger">%s</span>' . "\n<hr>\n";
}
public function getThinkHtmlTemplate(): string
{
return '<span class="text-info think">%s</span>' . "\n";
}
public function getInfoHtmlTemplate(): string
{
return "\n\n" . '<span class="text-info fw-bolder">%s</span>' . "\n";
}
public function getDebugHtmlTemplate(): string
{
return "\n\nDEBUG: <code>%s</code>\n";
}
public function getShopPrompt(string $prompt, string $commerceHistoryContext = ''): string
{
$historyBlock = '';
if (trim($commerceHistoryContext) !== '') {
$historyBlock = '
RECENT CONVERSATION CONTEXT:
' . $commerceHistoryContext . '
Additional rules for conversation context:
- The current user input has highest priority.
- Use the recent conversation context only to resolve omitted references.
- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.
- Do not revive older products unless the current user input clearly refers to them.
- If the current input starts a new topic, ignore older product context.
- Prefer the most recent product reference over older ones.
';
$historyBlock = $this->buildHistoryBlock($commerceHistoryContext);
}
return '
Generate a short search query for Shopware 6 from the following user input text.
return $this->implodePromptBlocks([
$this->getShopPromptIntro(),
$this->buildRulesBlock($this->getShopPromptRules()),
$this->getShopPromptOutputFormatBlock(),
$historyBlock,
$this->getCurrentUserInputLabel() . ':',
trim($prompt),
]);
}
Rules:
- Output only the final search query.
- Always convert relevant search terms to their singular form.
- No introduction, no explanation, no quotation marks.
- Use only shop-relevant search terms from the user input for a shop search.
- Maximum 6 search terms, preferably fewer.
- Remove filler words, polite phrases, and irrelevant words.
- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.
- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).
- Separate terms using spaces only.
- If a relevant product name is present, it must be placed at the beginning of the final search query.
- Try to always identify all products mentioned in the user input text, even in long prompts.
- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.
- If the current user input is vague or referential, use the recent conversation context only as support.
- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".
private function buildHistoryBlock(string $commerceHistoryContext): string
{
return $this->implodePromptBlocks([
$this->getRecentConversationContextLabel() . ':',
trim($commerceHistoryContext),
$this->buildRulesBlock($this->getConversationContextRules(), 'Additional rules for conversation context:'),
]);
}
Output format:
Keyword1 Keyword2 Keyword3
' . $historyBlock . '
/**
* @return string[]
*/
public function getShopPromptRules(): array
{
return [
'- Output only the final search query.',
'- Always convert relevant search terms to their singular form.',
'- No introduction, no explanation, no quotation marks.',
'- Use only shop-relevant search terms from the user input for a shop search.',
'- Maximum 6 search terms, preferably fewer.',
'- Remove filler words, polite phrases, and irrelevant words.',
'- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.',
'- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).',
'- Separate terms using spaces only.',
'- If a relevant product name is present, it must be placed at the beginning of the final search query.',
'- Try to always identify all products mentioned in the user input text, even in long prompts.',
'- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.',
'- If the current user input is vague or referential, use the recent conversation context only as support.',
'- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".',
];
}
CURRENT USER INPUT:
' . $prompt . '
';
/**
* @return string[]
*/
public function getConversationContextRules(): array
{
return [
'- The current user input has highest priority.',
'- Use the recent conversation context only to resolve omitted references.',
'- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.',
'- Do not revive older products unless the current user input clearly refers to them.',
'- If the current input starts a new topic, ignore older product context.',
'- Prefer the most recent product reference over older ones.',
];
}
public function getShopPromptIntro(): string
{
return 'Generate a short search query for Shopware 6 from the following user input text.';
}
public function getShopPromptOutputFormatBlock(): string
{
return "Output format:\nKeyword1 Keyword2 Keyword3";
}
public function getRecentConversationContextLabel(): string
{
return 'RECENT CONVERSATION CONTEXT';
}
public function getCurrentUserInputLabel(): string
{
return 'CURRENT USER INPUT';
}
private function buildRulesBlock(array $rules, string $headline = 'Rules:'): string
{
return $headline . "\n" . implode("\n", $rules);
}
/**
* @param string[] $blocks
*/
private function implodePromptBlocks(array $blocks): string
{
$normalized = array_values(array_filter(
array_map(
static fn(string $block): string => trim($block),
$blocks
),
static fn(string $block): bool => $block !== ''
));
return implode("\n\n", $normalized);
}
}

View File

@@ -6,57 +6,38 @@ namespace App\Config;
final class CommerceIntentConfig
{
/**
* @return string[]
*/
public function getStrongSignalsList(): array
{
return [
'shop',
'alle',
'preis',
'preise',
'kunde',
'online',
'produkt',
'produkte',
'artikel',
'sku',
'kaufen',
'kostet',
'kosten',
'verfügbarkeit',
'verfuegbarkeit',
// Search / product discovery signals
'suche',
'such',
'finde',
'finden',
'welche',
'welcher',
'welches',
// Device / system signals
'analysegerät',
'analysegeraet',
'analysegeräte',
'analysegeraete',
'messgerät',
'messgeraet',
'messgeräte',
'messgeraete',
'gerät',
'geraet',
'geräte',
'geraete',
'analysator',
'analysatoren',
'analyzer',
'system',
'systeme',
'anlage',
'anlagen',
];
}
/**
* @return string[]
*/
public function getAdvisorySignals(): array
{
return [
@@ -67,30 +48,36 @@ final class CommerceIntentConfig
'geeignet',
'empfiehl',
'empfehl',
'vergleich',
'vergleichen',
];
}
public function getPricePattern(): string
/**
* @return string[]
*/
public function getPriceTerms(): array
{
$pattern = [
return [
'euro',
'€',
'eur',
'teuer',
'preis',
'preise',
'kosten',
'kostet',
];
return implode('|', $pattern);
}
public function getColorPattern(): string
public function getPricePattern(): string
{
$pattern = [
return implode('|', $this->getPriceTerms());
}
/**
* @return string[]
*/
public function getColorTerms(): array
{
return [
'schwarz',
'weiß',
'weis',
@@ -103,13 +90,19 @@ final class CommerceIntentConfig
'orange',
'braun',
];
return implode('|', $pattern);
}
public function getSizeTokenPattern(): string
public function getColorPattern(): string
{
$pattern = [
return implode('|', $this->getColorTerms());
}
/**
* @return string[]
*/
public function getSizeTokenTerms(): array
{
return [
'xs',
's',
'm',
@@ -118,18 +111,189 @@ final class CommerceIntentConfig
'xxl',
'xxxxl',
];
return implode('|', $pattern);
}
public function getSizePattern(): string
public function getSizeTokenPattern(): string
{
$pattern = [
return implode('|', $this->getSizeTokenTerms());
}
/**
* @return string[]
*/
public function getSizeTerms(): array
{
return [
'größe',
'groesse',
'grösse',
];
}
return implode('|', $pattern);
public function getSizePattern(): string
{
return implode('|', $this->getSizeTerms());
}
public function getSizeExtractionPattern(): string
{
return '/\b(?:' . $this->getSizePattern() . ')\s*([a-z0-9.-]+)\b/u';
}
/**
* @return string[]
*/
public function getSupportDiagnosticPatterns(): array
{
return [
'/\bfehler\b/u',
'/\bfehlercode\b/u',
'/\berror\b/u',
'/\bstörung\b/u',
'/\bstoerung\b/u',
'/\balarm\b/u',
'/\bstörungsmeldung\b/u',
'/\bstoerungsmeldung\b/u',
'/\bmeldung\b/u',
'/\bwarnung\b/u',
'/\bwarncode\b/u',
'/\bcode\b/u',
'/\bwas bedeutet\b/u',
'/\bwarum\b/u',
'/\bblinkt\b/u',
'/\bzeigt\b/u',
'/\bzeigt an\b/u',
'/\bursache\b/u',
'/\bdiagnose\b/u',
'/\bservicefall\b/u',
'/\bproblem\b/u',
'/\bstörung beheben\b/u',
'/\bstoerung beheben\b/u',
'/\be\d{1,3}\b/u',
];
}
/**
* @return string[]
*/
public function getExplicitCommerceIntentPatterns(): array
{
return [
'/\bshop\b/u',
'/\bpreis\b/u',
'/\bkosten\b/u',
'/\bkostet\b/u',
'/\bkaufen\b/u',
'/\bbestellen\b/u',
'/\bprodukt\b/u',
'/\bartikel\b/u',
'/\bsku\b/u',
'/\bonline\b/u',
];
}
public function getSkuLikePattern(): string
{
return '/\b\d{4,10}\b/u';
}
public function getPriceValuePattern(): string
{
return '/\b\d+(?:[.,]\d+)?\s*(?:' . $this->getPricePattern() . ')\b/u';
}
public function getSizeValuePattern(): string
{
return '/\b(?:' . $this->getSizePattern() . ')\s*[a-z0-9.-]+\b/u';
}
public function getSizeTokenValuePattern(): string
{
return '/\b(?:' . $this->getSizeTokenPattern() . ')\b/u';
}
public function getColorValuePattern(): string
{
return '/\b(?:' . $this->getColorPattern() . ')\b/u';
}
public function getSupportOrDiagnosticSignalLabel(): string
{
return 'support_or_diagnostic';
}
public function getSkuSignalLabel(): string
{
return 'sku';
}
public function getPriceSignalLabel(): string
{
return 'price';
}
public function getSizeSignalLabel(): string
{
return 'size';
}
public function getSizeTokenSignalLabel(): string
{
return 'size_token';
}
public function getColorSignalLabel(): string
{
return 'color';
}
public function getAdvisorySignalPrefix(): string
{
return 'advisory:';
}
public function getProductSearchMinScore(): int
{
return 3;
}
public function getAdvisoryProductSearchMinScore(): int
{
return 2;
}
public function getStrongSignalScore(): int
{
return 3;
}
public function getSkuSignalScore(): int
{
return 2;
}
public function getPriceSignalScore(): int
{
return 2;
}
public function getSizeSignalScore(): int
{
return 2;
}
public function getSizeTokenSignalScore(): int
{
return 1;
}
public function getColorSignalScore(): int
{
return 1;
}
public function getAdvisorySignalScore(): int
{
return 1;
}
}

View File

@@ -4,28 +4,18 @@ declare(strict_types=1);
namespace App\Config;
final readonly class CommerceQueryParserConfig
final class CommerceQueryParserConfig
{
/**
* @param string[] $knownBrands
* @param string[] $phrasesToRemove
* @param string[] $filterSearchTokensPattern
* @param string[] $referenceOnlyTokens
*/
public function __construct(
private array $knownBrands = [],
private array $phrasesToRemove = [],
private array $filterSearchTokensPattern = [],
private array $referenceOnlyTokens = [],
) {
}
/**
* @return string[]
*/
public function getKnownBrands(): array
{
return $this->knownBrands;
return [
'heyl',
'horiba',
'neomeris',
];
}
/**
@@ -33,62 +23,175 @@ final readonly class CommerceQueryParserConfig
*/
public function getPhrasesToRemove(): array
{
return $this->phrasesToRemove;
return [
'ich suche',
'suche',
'habt ihr',
'gibt es',
'zeige mir',
'welches gerät',
'welche gerät',
'welches modell',
'welches ist besser',
'welches ist am besten',
'alternative',
'alternativen',
];
}
public function getHistoryContextPattern(): string
{
return 'auch|noch|nochmal|dazu|wie oben|wie zuvor|ähnlich|aehnlich|stattdessen|alternative|alternativ|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|und der preis|kosten|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
}
public function getReferenceFollowUpPattern(): string
public function getHistoryContextValuePattern(): string
{
return 'preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
}
/**
* @return string[]
*/
public function getFilterSearchTokens(): array
{
return [
'auch',
'noch',
'nochmal',
'zusätzlich',
'dazu',
'davon',
'stattdessen',
'bitte',
'gern',
'gerne',
'zeige',
'zeig',
'such',
'suche',
'finde',
'find',
'mir',
'mal',
'von',
];
}
/**
* Backward-compatible alias for older callers.
*
* @return string[]
*/
public function getFilterSearchTokensPattern(): array
{
return $this->filterSearchTokensPattern;
return $this->getFilterSearchTokens();
}
/**
* @return string[]
*/
public function getReferenceOnlyTokens(): array
public function getNormalizationSearch(): array
{
if ($this->referenceOnlyTokens !== []) {
return $this->referenceOnlyTokens;
return ['€'];
}
/**
* @return string[]
*/
public function getNormalizationReplace(): array
{
return [' euro '];
}
public function getPromptSanitizePattern(): string
{
return '/[^\p{L}\p{N}\s.,\-]/u';
}
public function getWhitespaceCollapsePattern(): string
{
return '/\s+/u';
}
public function getWhitespaceSplitPattern(): string
{
return '/\s+/u';
}
public function getSearchTextTrimCharacters(): string
{
return " \t\n\r\0\x0B-.,";
}
public function getMinSearchTokenLength(): int
{
return 1;
}
public function getMinDirectProductTokenLength(): int
{
return 1;
}
public function getHistoryQuestionPattern(): string
{
return '/^Question:\s*(.+)$/m';
}
public function getPriceBetweenPattern(): string
{
return '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
}
public function getPriceMaxPattern(): string
{
return '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
}
public function getPriceMinPattern(): string
{
return '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
}
/**
* @return string[]
*/
public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array
{
return [
'preis',
'preise',
'kosten',
'kostet',
'gerät',
'geraet',
'modell',
'produkt',
'artikel',
'dafür',
'dafuer',
'dazu',
'davon',
'verfügbarkeit',
'verfuegbarkeit',
'shop',
'link',
'zum',
'zur',
'das',
'dieses',
'den',
'dem',
'bitte',
'und',
'/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u',
'/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u',
'/\b(?:' . $intentConfig->getPricePattern() . ')\b/u',
];
}
public function getDirectProductDigitPattern(): string
{
return '/\d/u';
}
public function getDirectProductMaxTokens(): int
{
return 4;
}
public function getModelLikePattern(): string
{
return '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u';
}
public function getAccessoryLikePattern(): string
{
return '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u';
}
public function buildExactTokenRemovalPattern(string $token): string
{
return '/\b' . preg_quote($token, '/') . '\b/u';
}
public function buildBrandPartOfModelPattern(string $brand): string
{
return '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u';
}
}

View File

@@ -34,7 +34,7 @@ final class NdjsonHybridRetrieverConfig
* - the system now has more safeguards:
* lexical cross-signals, scoped retrieval, title/meta boost, selection rules
*/
public const VECTOR_SCORE_THRESHOLD = 0.82;
public const VECTOR_SCORE_THRESHOLD = 0.83;
/**
* Lower safety boundary for dynamic threshold adjustments.

View File

@@ -1,49 +1,399 @@
<?php
declare(strict_types=1);
namespace App\Config;
class PromptBuilderConfig{
/**
* Approximate character-to-token ratio for conservative prompt budgeting.
*/
public const CHARS_PER_TOKEN = 4;
final class PromptBuilderConfig
{
public function getCharsPerToken(): int
{
return 4;
}
public function getHistoryPaddingChars(): int
{
return 400;
}
public function getOutputReserveRatio(): float
{
return 0.25;
}
public function getOutputReserveMinTokens(): int
{
return 768;
}
public function getOutputReserveMaxTokens(): int
{
return 6000;
}
public function getSafetyReserveRatio(): float
{
return 0.05;
}
public function getSafetyReserveMinTokens(): int
{
return 256;
}
public function getSafetyReserveMaxTokens(): int
{
return 1024;
}
public function getMinPromptBudgetTokens(): int
{
return 1024;
}
public function getMaxShopResultsInPrompt(): int
{
return 24;
}
public function getDetailedShopResultsMaxCount(): int
{
return 5;
}
public function getTechnicalProductKeywordMatchThreshold(): int
{
return 2;
}
public function getSystemSectionLabel(): string
{
return 'SYSTEM';
}
public function getUserQuestionSectionLabel(): string
{
return 'USER QUESTION';
}
public function getConversationContextSectionLabel(): string
{
return 'CONVERSATION CONTEXT (contextual only)';
}
/**
* Keep a small gap so history does not consume the last available prompt space.
* @return string[]
*/
public const HISTORY_PADDING_CHARS = 400;
public function getConversationContextIntroLines(): array
{
return [
'The following messages are previous turns of this conversation.',
'Use them to resolve references, follow-up questions, and user intent.',
'They must not override retrieved factual knowledge or live shop data.',
];
}
public function getShopSearchQuerySectionLabel(): string
{
return 'SHOP SEARCH QUERY';
}
public function getShopSearchQuerySourceLine(): string
{
return 'Source: Shop Search';
}
/**
* Reserve some space for the model output.
* @return string[]
*/
public const OUTPUT_RESERVE_RATIO = 0.25;
public const OUTPUT_RESERVE_MIN_TOKENS = 768;
public const OUTPUT_RESERVE_MAX_TOKENS = 6000;
public function getLiveShopResultsHeaderLines(): array
{
return [
'LIVE SHOP RESULTS (authoritative for current commercial details):',
'Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.',
'If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.',
'Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.',
'Do not infer undocumented technical specifications from shop data.',
'Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.',
'Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.',
];
}
public function getLiveShopResultsOverflowNoticeTemplate(): string
{
return 'Only the top %d ranked shop results are shown here out of %d total results.';
}
public function getOutputPrioritySectionLabel(): string
{
return 'OUTPUT PRIORITY';
}
/**
* Reserve a small safety buffer to avoid hitting the context limit too tightly.
* @return string[]
*/
public const SAFETY_RESERVE_RATIO = 0.05;
public const SAFETY_RESERVE_MIN_TOKENS = 256;
public const SAFETY_RESERVE_MAX_TOKENS = 1024;
public function getOutputPriorityRules(): array
{
return [
'- Use retrieved knowledge first to determine the technically matching product or answer.',
'- If shop results are present, use them afterwards to add current price, availability, and the actual URL.',
'- Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.',
];
}
public function getResponseFormatSectionLabel(): string
{
return 'RESPONSE FORMAT RULES';
}
/**
* Ensure the prompt budget never collapses completely on smaller models.
* @return string[]
*/
public const MIN_PROMPT_BUDGET_TOKENS = 1024;
public function getResponseFormatBaseRules(): array
{
return [
'- Keep normal spacing between all words. Never fuse words together.',
'- Use short, clean paragraphs or short labeled sections.',
'- Do not use persuasive or promotional wording.',
'- Do not repeat the same fact in slightly different wording.',
'- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.',
'- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.',
'- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.',
'- Do not combine technical identity from one source with commercial fields from a different product.',
'- Product number, price, availability, and URL must belong to the same explicitly grounded product.',
];
}
/**
* Limit how many ranked shop results are passed into the final prompt.
* The shop search may return many candidates, but the LLM should only see
* the most relevant top subset after local reranking.
* @return string[]
*/
public const MAX_SHOP_RESULTS_IN_PROMPT = 24;
public function getResponseFormatWithShopRules(): array
{
return [
'- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts.',
'- Keep price, availability, and URL on separate lines when they are present.',
'- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.',
'- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.',
'- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.',
];
}
/**
* Technical product prompts should be answered like documentation,
* not like sales copy.
* @return string[]
*/
public const TECHNICAL_PRODUCT_KEYWORDS = [
public function getResponseFormatWithoutShopRules(): array
{
return [
'- If no shop results are present, do not compensate by inventing external products or external manufacturers.',
];
}
/**
* @return string[]
*/
public function getResponseFormatTechnicalRules(): array
{
return [
'- Write like technical documentation: precise, neutral, and source-close.',
'- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.',
'- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.',
];
}
/**
* @return string[]
*/
public function getResponseFormatAccessoryRules(): array
{
return [
'- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.',
'- The main device must come first. The accessory must not replace the main device.',
'- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.',
'- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.',
];
}
public function getLanguageRulesSectionLabel(): string
{
return 'LANGUAGE RULES';
}
/**
* @return string[]
*/
public function getLanguageRules(): array
{
return [
'- Answer only in the same language as the user question.',
'- All headings, labels, notes, and structural elements must be in the same language as the user question.',
'- Do not switch languages unless the user does.',
'- If headings are used, write them in the user\'s language.',
];
}
public function getFactGroundingRulesSectionLabel(): string
{
return 'FACT GROUNDING RULES';
}
/**
* @return string[]
*/
public function getFactGroundingBaseRules(): array
{
return [
'- State only facts that are explicitly present in the provided sources.',
'- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.',
'- Do not invent missing values.',
'- Do not replace missing values with estimates, defaults, or typical industry assumptions.',
'- Do not claim that information is missing if it appears in the provided sources.',
'- Do not compare with other products unless those products are also present in the provided sources.',
'- Prefer source-faithful wording over persuasive wording.',
'- Avoid marketing language such as \'ideal\', \'perfect\', \'unverzichtbar\', \'entscheidend\', \'optimal\', \'kosteneffizient\', \'prozesssicher\', or \'state-of-the-art\'.',
'- Clearly separate explicit facts from inferences.',
'- If a conclusion goes beyond the source wording, label it exactly as \'Inference:\'.',
'- If a sentence cannot be traced to the provided sources, do not write it.',
'- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.',
'- If the sources do not identify a suitable product, do not invent one.',
];
}
/**
* @return string[]
*/
public function getFactGroundingWithShopRules(): array
{
return [
'- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.',
'- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.',
'- When shop results are present and relevant, include current price and the actual URL if available.',
'- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.',
'- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.',
'- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.',
'- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.',
'- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.',
'- If the shop match is ambiguous, keep the technical identification and commercial details separate.',
];
}
/**
* @return string[]
*/
public function getFactGroundingWithoutShopRules(): array
{
return [
'- Use retrieved knowledge as authoritative for factual answers.',
'- If no shop results are present, do not compensate with external recommendations or external product suggestions.',
];
}
/**
* @return string[]
*/
public function getFactGroundingTechnicalRules(): array
{
return [
'- For technical product questions, answer primarily with explicitly stated facts.',
'- Behave like a technical documentation assistant, not like a sales advisor.',
'- Keep interpretations minimal and do not generalize application areas beyond the provided sources.',
'- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.',
'- Do not translate technical facts into business value unless the source explicitly does so.',
'- Do not recommend process changes unless explicitly present in the source.',
'- Do not use persuasive summaries or advisory conclusions.',
'- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.',
'- Use neutral engineering language.',
'- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.',
'- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.',
'- If the source lists application areas, repeat only those areas and do not broaden them.',
'- If the source names an indicator and threshold, reproduce that exactly without extrapolation.',
'- If the source states only a threshold function, do not expand it into broader control logic.',
'- If a detail is not explicitly stated in the provided sources, say so plainly.',
'- Prefer short, source-close sentences over explanatory expansion.',
'- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.',
];
}
public function getRetrievedKnowledgeSectionLabel(): string
{
return 'RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation)';
}
public function getRetrievedKnowledgeSourceLine(): string
{
return 'Source: Documents';
}
public function getUrlContentSectionLabel(): string
{
return 'CONTENT FROM URL (authoritative if user-provided)';
}
public function getUrlContentSourceLine(): string
{
return 'Source: URL';
}
public function getShopProductNumberLabel(): string
{
return 'Product number';
}
public function getShopManufacturerLabel(): string
{
return 'Manufacturer';
}
public function getShopPriceLabel(): string
{
return 'Price';
}
public function getShopAvailabilityLabel(): string
{
return 'Available';
}
public function getShopAvailabilityYesLabel(): string
{
return 'yes';
}
public function getShopAvailabilityNoLabel(): string
{
return 'no';
}
public function getShopHighlightPrefix(): string
{
return '- ';
}
public function getShopUrlLabel(): string
{
return 'URL';
}
public function getShopProductImageLabel(): string
{
return 'Product image';
}
public function getShopDescriptionLabel(): string
{
return 'Description';
}
public function getShopMetaInformationLabel(): string
{
return 'Meta information';
}
/**
* @return string[]
*/
public function getTechnicalProductKeywords(): array
{
return [
'technisch',
'technical',
'produkt',
@@ -78,8 +428,14 @@ class PromptBuilderConfig{
'chlor',
'chlormessung',
];
}
public const ACCESSORY_REQUEST_KEYWORDS = [
/**
* @return string[]
*/
public function getAccessoryRequestKeywords(): array
{
return [
'passend',
'passende',
'passendes',
@@ -94,4 +450,10 @@ class PromptBuilderConfig{
'ergänzung',
'ergaenzung',
];
}
public function getTechnicalProductModelPattern(): string
{
return '/\b[\p{L}]{2,}\s?\d{2,5}\b/u';
}
}

View File

@@ -0,0 +1,204 @@
<?php
declare(strict_types=1);
namespace App\Config;
final class SearchRepairConfig
{
public function isEnabled(): bool
{
return true;
}
public function getMaxRepairQueries(): int
{
return 3;
}
public function getMinPrimaryResultsWithoutRepair(): int
{
return 2;
}
public function getTopProductLogLimit(): int
{
return 3;
}
public function getModelCandidatePattern(): string
{
return '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u';
}
public function getAccessoryCandidatePattern(): string
{
return '/\b((?:' . implode('|', $this->getAccessoryCandidateTerms()) . ')\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu';
}
public function getAccessoryOrBundlePattern(): string
{
return '/\b(' . implode('|', $this->getAccessoryOrBundleTerms()) . ')\b/iu';
}
public function getModelLikePattern(): string
{
return '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u';
}
public function getSpecificityBoostPattern(): string
{
return '/\b(?:' . implode('|', $this->getSpecificityBoostTerms()) . ')\b/iu';
}
/**
* @return string[]
*/
public function getGenericCandidateTokens(): array
{
return [
'wasser',
'messgerät',
'messgeraet',
'produkt',
'geräte',
'geraete',
'gerät',
'geraet',
'resthärte',
'resthaerte',
'preis',
'infos',
'wissen',
];
}
public function getSanitizeTrimCharacters(): string
{
return " \t\n\r\0\x0B\"'`.,;:-";
}
public function getContainsDigitPattern(): string
{
return '/\d/u';
}
public function getWhitespaceCollapsePattern(): string
{
return '/\s+/u';
}
public function getTokenizeCleanupPattern(): string
{
return '/[^\p{L}\p{N}\s\-]+/u';
}
public function getProductKeySeparator(): string
{
return '|';
}
public function getCandidateDigitScore(): int
{
return 4;
}
public function getCandidateWordCountCap(): int
{
return 4;
}
public function getSpecificityBoostScore(): int
{
return 3;
}
public function getPrimaryQueryOverlapThreshold(): float
{
return 0.9;
}
public function getPromptMatchWeight(): int
{
return 3;
}
public function getPrimaryQueryMatchWeight(): int
{
return 2;
}
public function getRepairSignalMatchWeight(): int
{
return 4;
}
public function getPrimaryResultOrderBonus(): int
{
return 1;
}
public function getTokenIntersectionScore(): int
{
return 2;
}
public function getNumericTokenMatchScore(): int
{
return 4;
}
/**
* @return string[]
*/
public function getAccessoryCandidateTerms(): array
{
return [
'indikator',
'indicator',
'reagenz',
'reagent',
'kit',
'set',
];
}
/**
* @return string[]
*/
public function getAccessoryOrBundleTerms(): array
{
return [
'passend',
'passende',
'zubehor',
'zubehör',
'dazu',
'zusatz',
'erganzung',
'ergänzung',
'indikator',
'reagenz',
'kit',
'set',
'auch\s+das',
'mit\s+preis\s+und\s+allen\s+infos',
];
}
/**
* @return string[]
*/
public function getSpecificityBoostTerms(): array
{
return [
'indikator',
'indicator',
'testomat',
'tritromat',
'titromat',
'reagenz',
'reagent',
];
}
}

View File

@@ -0,0 +1,40 @@
<?php
declare(strict_types=1);
namespace App\Config;
final class StopWordsConfig
{
/**
* Retrieval-optimized stop-word list.
*
* Important:
* - keep negations
* - keep question words
* - keep domain terms
* - remove only structural filler words
*
* @return string[]
*/
public function getStopWords(): array
{
return [
'mit',
'der', 'die', 'das',
'ein', 'eine', 'einer', 'eines',
'den', 'dem', 'des',
'und', 'oder', 'aber', 'sowie',
'ich', 'du', 'er', 'sie', 'es',
'wir', 'ihr',
'halt', 'eben', 'auch', 'schon',
'noch', 'mal', 'bitte', 'danke',
'also', 'nun', 'tja',
'dann', 'danach', 'davor',
'hier', 'dort',
'heute', 'gestern', 'morgen',
'könnte', 'kannst', 'kann',
'würde', 'würdest', 'würden',
];
}
}

View File

@@ -22,157 +22,75 @@ final class CommerceIntentLite
*/
public function detect(string $originalPrompt): array
{
$p = mb_strtolower(trim($originalPrompt));
$prompt = mb_strtolower(trim($originalPrompt));
if ($p === '') {
return [
'intent' => self::NONE,
'score' => 0,
'signals' => [],
];
if ($prompt === '') {
return $this->buildDetectionResult(
intent: self::NONE,
score: 0,
signals: []
);
}
// Block support / diagnostic questions from entering the commerce flow
// unless the prompt also contains very explicit purchase / shop intent.
if ($this->isSupportOrDiagnosticQuery($p) && !$this->hasExplicitCommerceIntent($p)) {
return [
'intent' => self::NONE,
'score' => 0,
'signals' => ['support_or_diagnostic'],
];
if ($this->isSupportOrDiagnosticQuery($prompt) && !$this->hasExplicitCommerceIntent($prompt)) {
return $this->buildDetectionResult(
intent: self::NONE,
score: 0,
signals: [$this->config->getSupportOrDiagnosticSignalLabel()]
);
}
$score = 0;
$signals = [];
$strongSignals = $this->config->getStrongSignalsList();
foreach ($strongSignals as $signal) {
if (str_contains($p, mb_strtolower($signal))) {
$score += 3;
$signals[] = $signal;
}
}
// Treat long numeric identifiers as stronger product-number-like signals.
// This avoids over-triggering commerce purely because a model name contains
// a short number such as "808" in support questions.
if (preg_match('/\b\d{4,10}\b/u', $p) === 1) {
$score += 2;
$signals[] = 'sku';
}
$pricePattern = $this->config->getPricePattern();
if (preg_match('/\b\d+(?:[.,]\d+)?\s*(' . $pricePattern . ')\b/u', $p) === 1) {
$score += 2;
$signals[] = 'price';
}
$sizePattern = $this->config->getSizePattern();
if (preg_match('/\b(' . $sizePattern . ')\s*[a-z0-9.-]+\b/u', $p) === 1) {
$score += 2;
$signals[] = 'size';
}
$sizeTokenPattern = $this->config->getSizeTokenPattern();
if (preg_match('/\b(' . $sizeTokenPattern . ')\b/u', $p) === 1) {
$score += 1;
$signals[] = 'size_token';
}
$colorPattern = $this->config->getColorPattern();
if (preg_match('/\b(' . $colorPattern . ')\b/u', $p) === 1) {
$score += 1;
$signals[] = 'color';
}
$advisorySignals = $this->config->getAdvisorySignals();
foreach ($advisorySignals as $signal) {
if (str_contains($p, mb_strtolower($signal))) {
$score += 1;
$signals[] = 'advisory:' . $signal;
}
}
[$score, $signals] = $this->applyStrongSignals($prompt, $score, $signals);
[$score, $signals] = $this->applySkuSignal($prompt, $score, $signals);
[$score, $signals] = $this->applyPriceSignal($prompt, $score, $signals);
[$score, $signals] = $this->applySizeSignal($prompt, $score, $signals);
[$score, $signals] = $this->applySizeTokenSignal($prompt, $score, $signals);
[$score, $signals] = $this->applyColorSignal($prompt, $score, $signals);
[$score, $signals] = $this->applyAdvisorySignals($prompt, $score, $signals);
$signals = array_values(array_unique($signals));
if ($score >= 3) {
return [
'intent' => self::PRODUCT_SEARCH,
'score' => $score,
'signals' => $signals,
];
if ($score >= $this->config->getProductSearchMinScore()) {
return $this->buildDetectionResult(
intent: self::PRODUCT_SEARCH,
score: $score,
signals: $signals
);
}
if ($score >= 2) {
return [
'intent' => self::ADVISORY_PRODUCT_SEARCH,
'score' => $score,
'signals' => $signals,
];
if ($score >= $this->config->getAdvisoryProductSearchMinScore()) {
return $this->buildDetectionResult(
intent: self::ADVISORY_PRODUCT_SEARCH,
score: $score,
signals: $signals
);
}
return [
'intent' => self::NONE,
'score' => $score,
'signals' => $signals,
];
return $this->buildDetectionResult(
intent: self::NONE,
score: $score,
signals: $signals
);
}
private function isSupportOrDiagnosticQuery(string $prompt): bool
{
$patterns = [
'/\bfehler\b/u',
'/\bfehlercode\b/u',
'/\berror\b/u',
'/\bstörung\b/u',
'/\bstoerung\b/u',
'/\balarm\b/u',
'/\bstörungsmeldung\b/u',
'/\bstoerungsmeldung\b/u',
'/\bmeldung\b/u',
'/\bwarnung\b/u',
'/\bwarncode\b/u',
'/\bcode\b/u',
'/\bwas bedeutet\b/u',
'/\bwarum\b/u',
'/\bblinkt\b/u',
'/\bzeigt\b/u',
'/\bzeigt an\b/u',
'/\bursache\b/u',
'/\bdiagnose\b/u',
'/\bservicefall\b/u',
'/\bproblem\b/u',
'/\bstörung beheben\b/u',
'/\bstoerung beheben\b/u',
'/\be\d{1,3}\b/u',
];
foreach ($patterns as $pattern) {
if (preg_match($pattern, $prompt) === 1) {
return true;
}
}
return false;
return $this->matchesAnyPattern($prompt, $this->config->getSupportDiagnosticPatterns());
}
private function hasExplicitCommerceIntent(string $prompt): bool
{
$patterns = [
'/\bshop\b/u',
'/\bpreis\b/u',
'/\bkosten\b/u',
'/\bkostet\b/u',
'/\bkaufen\b/u',
'/\bbestellen\b/u',
'/\bprodukt\b/u',
'/\bartikel\b/u',
'/\bsku\b/u',
'/\bonline\b/u',
];
return $this->matchesAnyPattern($prompt, $this->config->getExplicitCommerceIntentPatterns());
}
/**
* @param string[] $patterns
*/
private function matchesAnyPattern(string $prompt, array $patterns): bool
{
foreach ($patterns as $pattern) {
if (preg_match($pattern, $prompt) === 1) {
return true;
@@ -181,4 +99,119 @@ final class CommerceIntentLite
return false;
}
/**
* @param string[] $signals
* @return array{0:int,1:string[]}
*/
private function applyStrongSignals(string $prompt, int $score, array $signals): array
{
foreach ($this->config->getStrongSignalsList() as $signal) {
if (str_contains($prompt, mb_strtolower($signal))) {
$score += $this->config->getStrongSignalScore();
$signals[] = $signal;
}
}
return [$score, $signals];
}
/**
* @param string[] $signals
* @return array{0:int,1:string[]}
*/
private function applySkuSignal(string $prompt, int $score, array $signals): array
{
if (preg_match($this->config->getSkuLikePattern(), $prompt) === 1) {
$score += $this->config->getSkuSignalScore();
$signals[] = $this->config->getSkuSignalLabel();
}
return [$score, $signals];
}
/**
* @param string[] $signals
* @return array{0:int,1:string[]}
*/
private function applyPriceSignal(string $prompt, int $score, array $signals): array
{
if (preg_match($this->config->getPriceValuePattern(), $prompt) === 1) {
$score += $this->config->getPriceSignalScore();
$signals[] = $this->config->getPriceSignalLabel();
}
return [$score, $signals];
}
/**
* @param string[] $signals
* @return array{0:int,1:string[]}
*/
private function applySizeSignal(string $prompt, int $score, array $signals): array
{
if (preg_match($this->config->getSizeValuePattern(), $prompt) === 1) {
$score += $this->config->getSizeSignalScore();
$signals[] = $this->config->getSizeSignalLabel();
}
return [$score, $signals];
}
/**
* @param string[] $signals
* @return array{0:int,1:string[]}
*/
private function applySizeTokenSignal(string $prompt, int $score, array $signals): array
{
if (preg_match($this->config->getSizeTokenValuePattern(), $prompt) === 1) {
$score += $this->config->getSizeTokenSignalScore();
$signals[] = $this->config->getSizeTokenSignalLabel();
}
return [$score, $signals];
}
/**
* @param string[] $signals
* @return array{0:int,1:string[]}
*/
private function applyColorSignal(string $prompt, int $score, array $signals): array
{
if (preg_match($this->config->getColorValuePattern(), $prompt) === 1) {
$score += $this->config->getColorSignalScore();
$signals[] = $this->config->getColorSignalLabel();
}
return [$score, $signals];
}
/**
* @param string[] $signals
* @return array{0:int,1:string[]}
*/
private function applyAdvisorySignals(string $prompt, int $score, array $signals): array
{
foreach ($this->config->getAdvisorySignals() as $signal) {
if (str_contains($prompt, mb_strtolower($signal))) {
$score += $this->config->getAdvisorySignalScore();
$signals[] = $this->config->getAdvisorySignalPrefix() . $signal;
}
}
return [$score, $signals];
}
/**
* @param string[] $signals
* @return array{intent:string, score:int, signals:string[]}
*/
private function buildDetectionResult(string $intent, int $score, array $signals): array
{
return [
'intent' => $intent,
'score' => $score,
'signals' => $signals,
];
}
}

View File

@@ -17,6 +17,7 @@ final readonly class NdjsonKeywordRetriever
public function __construct(
private string $projectDir,
private LoggerInterface $agentLogger,
private StopWords $stopWords,
) {
}
@@ -170,7 +171,7 @@ final readonly class NdjsonKeywordRetriever
return true;
}
return StopWords::isStopWord($token);
return $this->stopWords->isStopWord($token);
}
private function normalizeText(string $value): string
@@ -348,7 +349,7 @@ final readonly class NdjsonKeywordRetriever
* token:string,
* chunk_id:string,
* document_id:string,
* chunk_index:?int,
* chunk_index $rows :?int,
* tf:int,
* title_tf:int,
* df:int

View File

@@ -1,6 +1,5 @@
<?php
declare(strict_types=1);
namespace App\Knowledge\Retrieval;
@@ -23,8 +22,8 @@ final readonly class NdjsonLexicalIndexBuilder
public function __construct(
private string $projectDir,
private LoggerInterface $agentLogger,
)
{
private StopWords $stopWords,
) {
}
/**
@@ -345,7 +344,7 @@ final readonly class NdjsonLexicalIndexBuilder
return true;
}
return StopWords::isStopWord($token);
return $this->stopWords->isStopWord($token);
}
private function normalizeText(string $value): string

View File

@@ -6,8 +6,13 @@ namespace App\Knowledge\Retrieval;
use App\Knowledge\StopWords;
final class QueryCleaner
final readonly class QueryCleaner
{
public function __construct(
private StopWords $stopWords
) {
}
/**
* Cleans a query strictly for retrieval purposes.
*
@@ -66,7 +71,7 @@ final class QueryCleaner
}
// Remove stop words
if (StopWords::isStopWord($token)) {
if ($this->stopWords->isStopWord($token)) {
continue;
}

View File

@@ -4,62 +4,25 @@ declare(strict_types=1);
namespace App\Knowledge;
final class StopWords
use App\Config\StopWordsConfig;
final readonly class StopWords
{
/**
* Retrieval-optimierte Stopwortliste (Deutsch).
*
* WICHTIG:
* - Keine Negationen entfernen
* - Keine Fragewörter entfernen
* - Keine fachlichen Begriffe entfernen
* - Nur echte Füll- und Strukturwörter
*/
private const STOP_WORDS = [
'mit',
// Artikel
'der', 'die', 'das',
'ein', 'eine', 'einer', 'eines',
'den', 'dem', 'des',
// Konjunktionen
'und', 'oder', 'aber', 'sowie',
// Schwache Pronomen
'ich', 'du', 'er', 'sie', 'es',
'wir', 'ihr',
// Füllwörter
'halt', 'eben', 'auch', 'schon',
'noch', 'mal', 'bitte', 'danke',
// Strukturwörter
'also', 'nun', 'tja',
'dann', 'danach', 'davor',
'hier', 'dort',
// Zeit-Füller (kontextarm)
'heute', 'gestern', 'morgen',
// Höflichkeits-/Modalformen
'könnte', 'kannst', 'kann',
'würde', 'würdest', 'würden',
];
/**
* Gibt die vollständige Stopwortliste zurück.
*/
public static function getStopWords(): array
{
return self::STOP_WORDS;
public function __construct(
private StopWordsConfig $config
) {
}
/**
* Prüft, ob ein Wort ein Stopwort ist.
* @return string[]
*/
public static function isStopWord(string $word): bool
public function getStopWords(): array
{
return in_array($word, self::STOP_WORDS, true);
return $this->config->getStopWords();
}
public function isStopWord(string $word): bool
{
return in_array($word, $this->config->getStopWords(), true);
}
}