normalizeBlockText($prompt); $urlContent = $this->normalizeBlockText($urlContent); $swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut); $hasShopResults = $shopResults !== []; $isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt); $systemBlock = $this->buildSystemBlock(); $shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut); $outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults); $responseFormatBlock = $this->buildResponseFormatBlock($prompt, $hasShopResults, $isTechnicalProductQuestion); $knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults); $userBlock = $this->buildUserBlock($prompt); $fixedPrompt = $this->implodeBlocks([ $systemBlock, $shopBlock, $outputPriorityBlock, $responseFormatBlock, $knowledgeBlock, $userBlock, ]); $contextBlock = $this->buildContextBlock( userId: $userId, fixedPrompt: $fixedPrompt, fullContext: (bool) $fullContext ); return $this->implodeBlocks([ $systemBlock, $shopBlock, $outputPriorityBlock, $responseFormatBlock, $knowledgeBlock, $contextBlock, $userBlock, ]); } private function buildSystemBlock(): string { $now = (new DateTimeImmutable())->format('Y-m-d H:i:s'); $activePrompt = $this->systemPromptRepository->findActive(); if (!$activePrompt) { throw new RuntimeException('No active system prompt configured.'); } $activeSystemPrompt = str_replace('{% now %}', $now, $activePrompt->getContent()); return "SYSTEM:\n" . $this->normalizeBlockText($activeSystemPrompt); } private function buildUserBlock(string $prompt): string { return "USER QUESTION:\n" . $prompt; } /** * Build the conversation block. * * If full context is requested, keep the previous behavior. * Otherwise, history only receives the remaining prompt budget. */ private function buildContextBlock(string $userId, string $fixedPrompt, bool $fullContext): string { if ($fullContext) { $history = $this->contextService->buildUserContext( userId: $userId, full: true ); } else { $historyBudgetChars = $this->resolveHistoryBudgetChars($fixedPrompt); if ($historyBudgetChars <= 0) { return ''; } $history = $this->contextService->buildUserContextWithinBudget( userId: $userId, maxChars: $historyBudgetChars ); } $history = $this->normalizeBlockText($history); if ($history === '') { return ''; } return "CONVERSATION CONTEXT (contextual only):\n" . "The following messages are previous turns of this conversation.\n" . "Use them to resolve references, follow-up questions, and user intent.\n" . "They must not override retrieved factual knowledge or live shop data.\n\n" . $history; } /** * Build the shop block. * * Shop data is the most current source for commercial details. * It should not override technical matching logic. */ private function buildShopBlock(array $shopResults, ?string $swagFullOutPut): string { $parts = []; if ($swagFullOutPut !== null && $swagFullOutPut !== '') { $parts[] = "SHOP SEARCH QUERY:\n" . $swagFullOutPut . "\n" . "Source: Shop Search"; } $normalizedShopResults = array_values(array_filter( $shopResults, static fn(mixed $product): bool => $product instanceof ShopProductResult )); if ($normalizedShopResults === []) { return $this->implodeBlocks($parts); } $totalCount = count($normalizedShopResults); $limitedShopResults = array_slice($normalizedShopResults, 0, PromptBuilderConfig::MAX_SHOP_RESULTS_IN_PROMPT); $isDetailed = count($limitedShopResults) <= 5; $lines = []; foreach ($limitedShopResults as $i => $product) { $n = $i + 1; $entryParts = [ "[{$n}] " . $this->normalizeBlockText($product->name), ]; if ($product->productNumber) { $entryParts[] = "Product number: " . $this->normalizeBlockText($product->productNumber); } if ($product->manufacturer) { $entryParts[] = "Manufacturer: " . $this->normalizeBlockText($product->manufacturer); } if ($product->price) { $entryParts[] = "Price: " . $this->normalizeBlockText($product->price); } if ($product->available !== null) { $entryParts[] = "Available: " . ($product->available ? 'yes' : 'no'); } foreach ($product->highlights as $highlight) { $highlight = $this->normalizeBlockText((string) $highlight); if ($highlight !== '') { $entryParts[] = "- " . $highlight; } } if ($product->url) { $entryParts[] = "URL: " . $this->normalizeBlockText($product->url); } if ($product->productImage) { $entryParts[] = "Product image: " . $this->normalizeBlockText($product->productImage); } if ($isDetailed && $product->description) { $entryParts[] = "Description: " . $this->normalizeBlockText($product->description); } if ($product->customFields) { $entryParts[] = "Meta information: " . $this->normalizeBlockText($product->customFields); } $lines[] = implode("\n", $entryParts); } if ($lines !== []) { $header = "LIVE SHOP RESULTS (authoritative for current commercial details):\n" . "Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.\n" . "If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.\n" . "Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" . "Do not infer undocumented technical specifications from shop data.\n" . "Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.\n" . "Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item."; if ($totalCount > count($limitedShopResults)) { $header .= "\n" . "Only the top " . count($limitedShopResults) . " ranked shop results are shown here out of {$totalCount} total results."; } $parts[] = $header . "\n\n" . implode("\n\n", $lines); } return $this->implodeBlocks($parts); } /** * Build a small priority block that tells the model what to surface first. */ private function buildOutputPriorityBlock(bool $hasShopResults): string { if (!$hasShopResults) { return ''; } return "OUTPUT PRIORITY:\n" . "Use retrieved knowledge first to determine the technically matching product or answer.\n" . "If shop results are present, use them afterwards to add current price, availability, and the actual URL.\n" . "Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n"; } private function buildResponseFormatBlock( string $prompt, bool $hasShopResults, bool $isTechnicalProductQuestion ): string { $rules = [ "RESPONSE FORMAT RULES:", "- Keep normal spacing between all words. Never fuse words together.", "- Use short, clean paragraphs or short labeled sections.", "- Do not use persuasive or promotional wording.", "- Do not repeat the same fact in slightly different wording.", "- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.", "- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.", "- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.", "- Do not combine technical identity from one source with commercial fields from a different product.", "- Product number, price, availability, and URL must belong to the same explicitly grounded product.", ]; if ($hasShopResults) { $rules[] = "- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts."; $rules[] = "- Keep price, availability, and URL on separate lines when they are present."; $rules[] = "- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product."; $rules[] = "- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device."; $rules[] = "- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results."; } else { $rules[] = "- If no shop results are present, do not compensate by inventing external products or external manufacturers."; } if ($isTechnicalProductQuestion) { $rules[] = "- Write like technical documentation: precise, neutral, and source-close."; $rules[] = "- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation."; $rules[] = "- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives."; } if ($this->asksForAccessoryOrBundle($prompt)) { $rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory."; $rules[] = "- The main device must come first. The accessory must not replace the main device."; $rules[] = "- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources."; $rules[] = "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so."; } return implode("\n", $rules); } /** * Build the knowledge block. * * Retrieved knowledge remains the main source for technical matching and explanation. * Shop data is preferred for current commercial fields. */ private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt, bool $hasShopResults): string { $knowledgeParts = []; $isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt); if ($knowledgeChunks !== []) { $lines = []; foreach ($knowledgeChunks as $i => $chunk) { $chunk = $this->normalizeBlockText((string) $chunk); if ($chunk === '') { continue; } $n = $i + 1; $lines[] = "[{$n}] {$chunk}"; } if ($lines !== []) { $parts = [ "LANGUAGE RULES:\n" . implode("\n", $this->buildLanguageRules()), "FACT GROUNDING RULES:\n" . implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults)), "RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" . "Source: Documents\n" . implode("\n\n", $lines), ]; $knowledgeParts[] = implode("\n\n", $parts); } } if ($urlContent !== '') { $knowledgeParts[] = "CONTENT FROM URL (authoritative if user-provided):\n" . "Source: URL\n" . $urlContent; } return $this->implodeBlocks($knowledgeParts); } /** * Resolve how many characters may still be used by history. * * The active model num_ctx is converted into a conservative prompt budget. * Shop, knowledge and user question are fixed priority blocks. * History only receives the remaining space. */ private function resolveHistoryBudgetChars(string $fixedPrompt): int { $numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx(); $outputReserveTokens = $this->clamp( (int) floor($numCtx * PromptBuilderConfig::OUTPUT_RESERVE_RATIO), PromptBuilderConfig::OUTPUT_RESERVE_MIN_TOKENS, PromptBuilderConfig::OUTPUT_RESERVE_MAX_TOKENS ); $safetyReserveTokens = $this->clamp( (int) floor($numCtx * PromptBuilderConfig::SAFETY_RESERVE_RATIO), PromptBuilderConfig::SAFETY_RESERVE_MIN_TOKENS, PromptBuilderConfig::SAFETY_RESERVE_MAX_TOKENS ); $promptBudgetTokens = max( PromptBuilderConfig::MIN_PROMPT_BUDGET_TOKENS, $numCtx - $outputReserveTokens - $safetyReserveTokens ); $promptBudgetChars = $promptBudgetTokens * PromptBuilderConfig::CHARS_PER_TOKEN; $remaining = $promptBudgetChars - mb_strlen($fixedPrompt) - PromptBuilderConfig::HISTORY_PADDING_CHARS; return max(0, $remaining); } /** * @return string[] */ private function buildLanguageRules(): array { return [ "- Answer only in the same language as the user question.", "- All headings, labels, notes, and structural elements must be in the same language as the user question.", "- Do not switch languages unless the user does.", "- If headings are used, write them in the user's language.", ]; } /** * @return string[] */ private function buildFactGroundingRules(bool $isTechnicalProductQuestion, bool $hasShopResults): array { $rules = [ "- State only facts that are explicitly present in the provided sources.", "- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.", "- Do not invent missing values.", "- Do not replace missing values with estimates, defaults, or typical industry assumptions.", "- Do not claim that information is missing if it appears in the provided sources.", "- Do not compare with other products unless those products are also present in the provided sources.", "- Prefer source-faithful wording over persuasive wording.", "- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', 'entscheidend', 'optimal', 'kosteneffizient', 'prozesssicher', or 'state-of-the-art'.", "- Clearly separate explicit facts from inferences.", "- If a conclusion goes beyond the source wording, label it exactly as 'Inference:'.", "- If a sentence cannot be traced to the provided sources, do not write it.", "- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.", "- If the sources do not identify a suitable product, do not invent one.", ]; if ($hasShopResults) { $rules = array_merge($rules, [ "- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.", "- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.", "- When shop results are present and relevant, include current price and the actual URL if available.", "- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.", "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.", "- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.", "- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.", "- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.", "- If the shop match is ambiguous, keep the technical identification and commercial details separate.", ]); } else { $rules[] = "- Use retrieved knowledge as authoritative for factual answers."; $rules[] = "- If no shop results are present, do not compensate with external recommendations or external product suggestions."; } if ($isTechnicalProductQuestion) { $rules = array_merge($rules, [ "- For technical product questions, answer primarily with explicitly stated facts.", "- Behave like a technical documentation assistant, not like a sales advisor.", "- Keep interpretations minimal and do not generalize application areas beyond the provided sources.", "- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.", "- Do not translate technical facts into business value unless the source explicitly does so.", "- Do not recommend process changes unless explicitly present in the source.", "- Do not use persuasive summaries or advisory conclusions.", "- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.", "- Use neutral engineering language.", "- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.", "- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.", "- If the source lists application areas, repeat only those areas and do not broaden them.", "- If the source names an indicator and threshold, reproduce that exactly without extrapolation.", "- If the source states only a threshold function, do not expand it into broader control logic.", "- If a detail is not explicitly stated in the provided sources, say so plainly.", "- Prefer short, source-close sentences over explanatory expansion.", "- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.", ]); } return $rules; } private function implodeBlocks(array $blocks): string { $filtered = array_values(array_filter( array_map( fn($block): string => is_string($block) ? $this->normalizeBlockText($block) : '', $blocks ), static fn(string $block): bool => $block !== '' )); return implode("\n\n", $filtered); } private function normalizeNullableBlockText(?string $value): ?string { if ($value === null) { return null; } $normalized = $this->normalizeBlockText($value); return $normalized === '' ? null : $normalized; } private function normalizeBlockText(string $value): string { $value = str_replace(["\r\n", "\r"], "\n", $value); $value = str_replace("\u{00A0}", ' ', $value); $value = trim($value); $value = preg_replace("/\n{3,}/", "\n\n", $value) ?? $value; $value = preg_replace("/[ \t]+\n/", "\n", $value) ?? $value; $value = preg_replace("/[ \t]{2,}/", " ", $value) ?? $value; return $value; } private function isLikelyTechnicalProductQuestion(string $prompt): bool { $normalized = mb_strtolower($prompt, 'UTF-8'); $matches = 0; foreach (PromptBuilderConfig::TECHNICAL_PRODUCT_KEYWORDS as $keyword) { if (str_contains($normalized, $keyword)) { $matches++; } } if ($matches >= 2) { return true; } return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1; } private function asksForAccessoryOrBundle(string $prompt): bool { $normalized = mb_strtolower($prompt, 'UTF-8'); foreach (PromptBuilderConfig::ACCESSORY_REQUEST_KEYWORDS as $keyword) { if (str_contains($normalized, $keyword)) { return true; } } return false; } private function clamp(int $value, int $min, int $max): int { return max($min, min($max, $value)); } }