543 lines
24 KiB
PHP
543 lines
24 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Agent;
|
|
|
|
use App\Commerce\Dto\ShopProductResult;
|
|
use App\Config\PromptBuilderConfig;
|
|
use App\Context\ContextService;
|
|
use App\Repository\SystemPromptRepository;
|
|
use App\Service\ModelGenerationConfigProvider;
|
|
use DateTimeImmutable;
|
|
use RuntimeException;
|
|
|
|
final readonly class PromptBuilder
|
|
{
|
|
|
|
public function __construct(
|
|
private ContextService $contextService,
|
|
private SystemPromptRepository $systemPromptRepository,
|
|
private ModelGenerationConfigProvider $modelGenerationConfigProvider,
|
|
) {
|
|
}
|
|
|
|
/**
|
|
* Build the final prompt string for the LLM.
|
|
*
|
|
* @param string $prompt
|
|
* @param string $userId
|
|
* @param string $urlContent
|
|
* @param string[] $knowledgeChunks
|
|
* @param ShopProductResult[] $shopResults
|
|
* @param bool|null $fullContext
|
|
* @param string|null $swagFullOutPut
|
|
* @return string
|
|
*/
|
|
public function build(
|
|
string $prompt,
|
|
string $userId,
|
|
string $urlContent,
|
|
array $knowledgeChunks,
|
|
array $shopResults = [],
|
|
?bool $fullContext = false,
|
|
?string $swagFullOutPut = ''
|
|
): string {
|
|
$prompt = $this->normalizeBlockText($prompt);
|
|
$urlContent = $this->normalizeBlockText($urlContent);
|
|
$swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut);
|
|
|
|
$hasShopResults = $shopResults !== [];
|
|
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
|
|
|
$systemBlock = $this->buildSystemBlock();
|
|
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
|
|
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults);
|
|
$responseFormatBlock = $this->buildResponseFormatBlock($prompt, $hasShopResults, $isTechnicalProductQuestion);
|
|
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults);
|
|
$userBlock = $this->buildUserBlock($prompt);
|
|
|
|
$fixedPrompt = $this->implodeBlocks([
|
|
$systemBlock,
|
|
$shopBlock,
|
|
$outputPriorityBlock,
|
|
$responseFormatBlock,
|
|
$knowledgeBlock,
|
|
$userBlock,
|
|
]);
|
|
|
|
$contextBlock = $this->buildContextBlock(
|
|
userId: $userId,
|
|
fixedPrompt: $fixedPrompt,
|
|
fullContext: (bool) $fullContext
|
|
);
|
|
|
|
return $this->implodeBlocks([
|
|
$systemBlock,
|
|
$shopBlock,
|
|
$outputPriorityBlock,
|
|
$responseFormatBlock,
|
|
$knowledgeBlock,
|
|
$contextBlock,
|
|
$userBlock,
|
|
]);
|
|
}
|
|
|
|
private function buildSystemBlock(): string
|
|
{
|
|
$now = (new DateTimeImmutable())->format('Y-m-d H:i:s');
|
|
|
|
$activePrompt = $this->systemPromptRepository->findActive();
|
|
|
|
if (!$activePrompt) {
|
|
throw new RuntimeException('No active system prompt configured.');
|
|
}
|
|
|
|
$activeSystemPrompt = str_replace('{% now %}', $now, $activePrompt->getContent());
|
|
|
|
return "SYSTEM:\n" . $this->normalizeBlockText($activeSystemPrompt);
|
|
}
|
|
|
|
private function buildUserBlock(string $prompt): string
|
|
{
|
|
return "USER QUESTION:\n" . $prompt;
|
|
}
|
|
|
|
/**
|
|
* Build the conversation block.
|
|
*
|
|
* If full context is requested, keep the previous behavior.
|
|
* Otherwise, history only receives the remaining prompt budget.
|
|
*/
|
|
private function buildContextBlock(string $userId, string $fixedPrompt, bool $fullContext): string
|
|
{
|
|
if ($fullContext) {
|
|
$history = $this->contextService->buildUserContext(
|
|
userId: $userId,
|
|
full: true
|
|
);
|
|
} else {
|
|
$historyBudgetChars = $this->resolveHistoryBudgetChars($fixedPrompt);
|
|
|
|
if ($historyBudgetChars <= 0) {
|
|
return '';
|
|
}
|
|
|
|
$history = $this->contextService->buildUserContextWithinBudget(
|
|
userId: $userId,
|
|
maxChars: $historyBudgetChars
|
|
);
|
|
}
|
|
|
|
$history = $this->normalizeBlockText($history);
|
|
|
|
if ($history === '') {
|
|
return '';
|
|
}
|
|
|
|
return
|
|
"CONVERSATION CONTEXT (contextual only):\n" .
|
|
"The following messages are previous turns of this conversation.\n" .
|
|
"Use them to resolve references, follow-up questions, and user intent.\n" .
|
|
"They must not override retrieved factual knowledge or live shop data.\n\n" .
|
|
$history;
|
|
}
|
|
|
|
/**
|
|
* Build the shop block.
|
|
*
|
|
* Shop data is the most current source for commercial details.
|
|
* It should not override technical matching logic.
|
|
*/
|
|
private function buildShopBlock(array $shopResults, ?string $swagFullOutPut): string
|
|
{
|
|
$parts = [];
|
|
|
|
if ($swagFullOutPut !== null && $swagFullOutPut !== '') {
|
|
$parts[] =
|
|
"SHOP SEARCH QUERY:\n" .
|
|
$swagFullOutPut . "\n" .
|
|
"Source: Shop Search";
|
|
}
|
|
|
|
$normalizedShopResults = array_values(array_filter(
|
|
$shopResults,
|
|
static fn(mixed $product): bool => $product instanceof ShopProductResult
|
|
));
|
|
|
|
if ($normalizedShopResults === []) {
|
|
return $this->implodeBlocks($parts);
|
|
}
|
|
|
|
$totalCount = count($normalizedShopResults);
|
|
$limitedShopResults = array_slice($normalizedShopResults, 0, PromptBuilderConfig::MAX_SHOP_RESULTS_IN_PROMPT);
|
|
$isDetailed = count($limitedShopResults) <= 5;
|
|
$lines = [];
|
|
|
|
foreach ($limitedShopResults as $i => $product) {
|
|
$n = $i + 1;
|
|
$entryParts = [
|
|
"[{$n}] " . $this->normalizeBlockText($product->name),
|
|
];
|
|
|
|
if ($product->productNumber) {
|
|
$entryParts[] = "Product number: " . $this->normalizeBlockText($product->productNumber);
|
|
}
|
|
|
|
if ($product->manufacturer) {
|
|
$entryParts[] = "Manufacturer: " . $this->normalizeBlockText($product->manufacturer);
|
|
}
|
|
|
|
if ($product->price) {
|
|
$entryParts[] = "Price: " . $this->normalizeBlockText($product->price);
|
|
}
|
|
|
|
if ($product->available !== null) {
|
|
$entryParts[] = "Available: " . ($product->available ? 'yes' : 'no');
|
|
}
|
|
|
|
foreach ($product->highlights as $highlight) {
|
|
$highlight = $this->normalizeBlockText((string) $highlight);
|
|
|
|
if ($highlight !== '') {
|
|
$entryParts[] = "- " . $highlight;
|
|
}
|
|
}
|
|
|
|
if ($product->url) {
|
|
$entryParts[] = "URL: " . $this->normalizeBlockText($product->url);
|
|
}
|
|
|
|
if ($product->productImage) {
|
|
$entryParts[] = "Product image: " . $this->normalizeBlockText($product->productImage);
|
|
}
|
|
|
|
if ($isDetailed && $product->description) {
|
|
$entryParts[] = "Description: " . $this->normalizeBlockText($product->description);
|
|
}
|
|
|
|
if ($product->customFields) {
|
|
$entryParts[] = "Meta information: " . $this->normalizeBlockText($product->customFields);
|
|
}
|
|
|
|
$lines[] = implode("\n", $entryParts);
|
|
}
|
|
|
|
if ($lines !== []) {
|
|
$header =
|
|
"LIVE SHOP RESULTS (authoritative for current commercial details):\n" .
|
|
"Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.\n" .
|
|
"If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.\n" .
|
|
"Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" .
|
|
"Do not infer undocumented technical specifications from shop data.\n" .
|
|
"Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.\n" .
|
|
"Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.";
|
|
|
|
if ($totalCount > count($limitedShopResults)) {
|
|
$header .= "\n" .
|
|
"Only the top " . count($limitedShopResults) . " ranked shop results are shown here out of {$totalCount} total results.";
|
|
}
|
|
|
|
$parts[] = $header . "\n\n" . implode("\n\n", $lines);
|
|
}
|
|
|
|
return $this->implodeBlocks($parts);
|
|
}
|
|
|
|
/**
|
|
* Build a small priority block that tells the model what to surface first.
|
|
*/
|
|
private function buildOutputPriorityBlock(bool $hasShopResults): string
|
|
{
|
|
if (!$hasShopResults) {
|
|
return '';
|
|
}
|
|
|
|
return
|
|
"OUTPUT PRIORITY:\n" .
|
|
"Use retrieved knowledge first to determine the technically matching product or answer.\n" .
|
|
"If shop results are present, use them afterwards to add current price, availability, and the actual URL.\n" .
|
|
"Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n";
|
|
}
|
|
|
|
private function buildResponseFormatBlock(
|
|
string $prompt,
|
|
bool $hasShopResults,
|
|
bool $isTechnicalProductQuestion
|
|
): string {
|
|
$rules = [
|
|
"RESPONSE FORMAT RULES:",
|
|
"- Keep normal spacing between all words. Never fuse words together.",
|
|
"- Use short, clean paragraphs or short labeled sections.",
|
|
"- Do not use persuasive or promotional wording.",
|
|
"- Do not repeat the same fact in slightly different wording.",
|
|
"- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.",
|
|
"- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.",
|
|
"- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.",
|
|
"- Do not combine technical identity from one source with commercial fields from a different product.",
|
|
"- Product number, price, availability, and URL must belong to the same explicitly grounded product.",
|
|
];
|
|
|
|
if ($hasShopResults) {
|
|
$rules[] = "- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts.";
|
|
$rules[] = "- Keep price, availability, and URL on separate lines when they are present.";
|
|
$rules[] = "- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.";
|
|
$rules[] = "- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.";
|
|
$rules[] = "- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.";
|
|
} else {
|
|
$rules[] = "- If no shop results are present, do not compensate by inventing external products or external manufacturers.";
|
|
}
|
|
|
|
if ($isTechnicalProductQuestion) {
|
|
$rules[] = "- Write like technical documentation: precise, neutral, and source-close.";
|
|
$rules[] = "- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.";
|
|
$rules[] = "- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.";
|
|
}
|
|
|
|
if ($this->asksForAccessoryOrBundle($prompt)) {
|
|
$rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.";
|
|
$rules[] = "- The main device must come first. The accessory must not replace the main device.";
|
|
$rules[] = "- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.";
|
|
$rules[] = "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.";
|
|
}
|
|
|
|
return implode("\n", $rules);
|
|
}
|
|
|
|
/**
|
|
* Build the knowledge block.
|
|
*
|
|
* Retrieved knowledge remains the main source for technical matching and explanation.
|
|
* Shop data is preferred for current commercial fields.
|
|
*/
|
|
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt, bool $hasShopResults): string
|
|
{
|
|
$knowledgeParts = [];
|
|
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
|
|
|
if ($knowledgeChunks !== []) {
|
|
$lines = [];
|
|
|
|
foreach ($knowledgeChunks as $i => $chunk) {
|
|
$chunk = $this->normalizeBlockText((string) $chunk);
|
|
|
|
if ($chunk === '') {
|
|
continue;
|
|
}
|
|
|
|
$n = $i + 1;
|
|
$lines[] = "[{$n}] {$chunk}";
|
|
}
|
|
|
|
if ($lines !== []) {
|
|
$parts = [
|
|
"LANGUAGE RULES:\n" .
|
|
implode("\n", $this->buildLanguageRules()),
|
|
"FACT GROUNDING RULES:\n" .
|
|
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults)),
|
|
"RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" .
|
|
"Source: Documents\n" .
|
|
implode("\n\n", $lines),
|
|
];
|
|
|
|
$knowledgeParts[] = implode("\n\n", $parts);
|
|
}
|
|
}
|
|
|
|
if ($urlContent !== '') {
|
|
$knowledgeParts[] =
|
|
"CONTENT FROM URL (authoritative if user-provided):\n" .
|
|
"Source: URL\n" .
|
|
$urlContent;
|
|
}
|
|
|
|
return $this->implodeBlocks($knowledgeParts);
|
|
}
|
|
|
|
/**
|
|
* Resolve how many characters may still be used by history.
|
|
*
|
|
* The active model num_ctx is converted into a conservative prompt budget.
|
|
* Shop, knowledge and user question are fixed priority blocks.
|
|
* History only receives the remaining space.
|
|
*/
|
|
private function resolveHistoryBudgetChars(string $fixedPrompt): int
|
|
{
|
|
$numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx();
|
|
|
|
$outputReserveTokens = $this->clamp(
|
|
(int) floor($numCtx * PromptBuilderConfig::OUTPUT_RESERVE_RATIO),
|
|
PromptBuilderConfig::OUTPUT_RESERVE_MIN_TOKENS,
|
|
PromptBuilderConfig::OUTPUT_RESERVE_MAX_TOKENS
|
|
);
|
|
|
|
$safetyReserveTokens = $this->clamp(
|
|
(int) floor($numCtx * PromptBuilderConfig::SAFETY_RESERVE_RATIO),
|
|
PromptBuilderConfig::SAFETY_RESERVE_MIN_TOKENS,
|
|
PromptBuilderConfig::SAFETY_RESERVE_MAX_TOKENS
|
|
);
|
|
|
|
$promptBudgetTokens = max(
|
|
PromptBuilderConfig::MIN_PROMPT_BUDGET_TOKENS,
|
|
$numCtx - $outputReserveTokens - $safetyReserveTokens
|
|
);
|
|
|
|
$promptBudgetChars = $promptBudgetTokens * PromptBuilderConfig::CHARS_PER_TOKEN;
|
|
|
|
$remaining = $promptBudgetChars
|
|
- mb_strlen($fixedPrompt)
|
|
- PromptBuilderConfig::HISTORY_PADDING_CHARS;
|
|
|
|
return max(0, $remaining);
|
|
}
|
|
|
|
/**
|
|
* @return string[]
|
|
*/
|
|
private function buildLanguageRules(): array
|
|
{
|
|
return [
|
|
"- Answer only in the same language as the user question.",
|
|
"- All headings, labels, notes, and structural elements must be in the same language as the user question.",
|
|
"- Do not switch languages unless the user does.",
|
|
"- If headings are used, write them in the user's language.",
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @return string[]
|
|
*/
|
|
private function buildFactGroundingRules(bool $isTechnicalProductQuestion, bool $hasShopResults): array
|
|
{
|
|
$rules = [
|
|
"- State only facts that are explicitly present in the provided sources.",
|
|
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
|
|
"- Do not invent missing values.",
|
|
"- Do not replace missing values with estimates, defaults, or typical industry assumptions.",
|
|
"- Do not claim that information is missing if it appears in the provided sources.",
|
|
"- Do not compare with other products unless those products are also present in the provided sources.",
|
|
"- Prefer source-faithful wording over persuasive wording.",
|
|
"- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', 'entscheidend', 'optimal', 'kosteneffizient', 'prozesssicher', or 'state-of-the-art'.",
|
|
"- Clearly separate explicit facts from inferences.",
|
|
"- If a conclusion goes beyond the source wording, label it exactly as 'Inference:'.",
|
|
"- If a sentence cannot be traced to the provided sources, do not write it.",
|
|
"- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.",
|
|
"- If the sources do not identify a suitable product, do not invent one.",
|
|
];
|
|
|
|
if ($hasShopResults) {
|
|
$rules = array_merge($rules, [
|
|
"- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.",
|
|
"- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.",
|
|
"- When shop results are present and relevant, include current price and the actual URL if available.",
|
|
"- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.",
|
|
"- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.",
|
|
"- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.",
|
|
"- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.",
|
|
"- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.",
|
|
"- If the shop match is ambiguous, keep the technical identification and commercial details separate.",
|
|
]);
|
|
} else {
|
|
$rules[] = "- Use retrieved knowledge as authoritative for factual answers.";
|
|
$rules[] = "- If no shop results are present, do not compensate with external recommendations or external product suggestions.";
|
|
}
|
|
|
|
if ($isTechnicalProductQuestion) {
|
|
$rules = array_merge($rules, [
|
|
"- For technical product questions, answer primarily with explicitly stated facts.",
|
|
"- Behave like a technical documentation assistant, not like a sales advisor.",
|
|
"- Keep interpretations minimal and do not generalize application areas beyond the provided sources.",
|
|
"- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.",
|
|
"- Do not translate technical facts into business value unless the source explicitly does so.",
|
|
"- Do not recommend process changes unless explicitly present in the source.",
|
|
"- Do not use persuasive summaries or advisory conclusions.",
|
|
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.",
|
|
"- Use neutral engineering language.",
|
|
"- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.",
|
|
"- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.",
|
|
"- If the source lists application areas, repeat only those areas and do not broaden them.",
|
|
"- If the source names an indicator and threshold, reproduce that exactly without extrapolation.",
|
|
"- If the source states only a threshold function, do not expand it into broader control logic.",
|
|
"- If a detail is not explicitly stated in the provided sources, say so plainly.",
|
|
"- Prefer short, source-close sentences over explanatory expansion.",
|
|
"- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.",
|
|
]);
|
|
}
|
|
|
|
return $rules;
|
|
}
|
|
|
|
private function implodeBlocks(array $blocks): string
|
|
{
|
|
$filtered = array_values(array_filter(
|
|
array_map(
|
|
fn($block): string => is_string($block) ? $this->normalizeBlockText($block) : '',
|
|
$blocks
|
|
),
|
|
static fn(string $block): bool => $block !== ''
|
|
));
|
|
|
|
return implode("\n\n", $filtered);
|
|
}
|
|
|
|
private function normalizeNullableBlockText(?string $value): ?string
|
|
{
|
|
if ($value === null) {
|
|
return null;
|
|
}
|
|
|
|
$normalized = $this->normalizeBlockText($value);
|
|
|
|
return $normalized === '' ? null : $normalized;
|
|
}
|
|
|
|
private function normalizeBlockText(string $value): string
|
|
{
|
|
$value = str_replace(["\r\n", "\r"], "\n", $value);
|
|
$value = str_replace("\u{00A0}", ' ', $value);
|
|
$value = trim($value);
|
|
|
|
$value = preg_replace("/\n{3,}/", "\n\n", $value) ?? $value;
|
|
$value = preg_replace("/[ \t]+\n/", "\n", $value) ?? $value;
|
|
$value = preg_replace("/[ \t]{2,}/", " ", $value) ?? $value;
|
|
|
|
return $value;
|
|
}
|
|
|
|
private function isLikelyTechnicalProductQuestion(string $prompt): bool
|
|
{
|
|
$normalized = mb_strtolower($prompt, 'UTF-8');
|
|
|
|
$matches = 0;
|
|
|
|
foreach (PromptBuilderConfig::TECHNICAL_PRODUCT_KEYWORDS as $keyword) {
|
|
if (str_contains($normalized, $keyword)) {
|
|
$matches++;
|
|
}
|
|
}
|
|
|
|
if ($matches >= 2) {
|
|
return true;
|
|
}
|
|
|
|
return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1;
|
|
}
|
|
|
|
private function asksForAccessoryOrBundle(string $prompt): bool
|
|
{
|
|
$normalized = mb_strtolower($prompt, 'UTF-8');
|
|
|
|
foreach (PromptBuilderConfig::ACCESSORY_REQUEST_KEYWORDS as $keyword) {
|
|
if (str_contains($normalized, $keyword)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private function clamp(int $value, int $min, int $max): int
|
|
{
|
|
return max($min, min($max, $value));
|
|
}
|
|
} |