harden retrieve logic
This commit is contained in:
@@ -60,7 +60,6 @@ final readonly class PromptBuilder
|
||||
* @param ShopProductResult[] $shopResults
|
||||
* @param bool|null $fullContext
|
||||
* @param string|null $swagFullOutPut
|
||||
* @return string
|
||||
*/
|
||||
public function build(
|
||||
string $prompt,
|
||||
@@ -71,11 +70,42 @@ final readonly class PromptBuilder
|
||||
?bool $fullContext = false,
|
||||
?string $swagFullOutPut = ''
|
||||
): string {
|
||||
$prompt = $this->normalizeBlockText($prompt);
|
||||
$urlContent = $this->normalizeBlockText($urlContent);
|
||||
$swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut);
|
||||
|
||||
$systemBlock = $this->buildSystemBlock();
|
||||
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
|
||||
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt);
|
||||
$userBlock = $this->buildUserBlock($prompt);
|
||||
|
||||
// Build fixed blocks first so history only receives the remaining budget.
|
||||
$fixedPrompt = $this->implodeBlocks([
|
||||
$systemBlock,
|
||||
$shopBlock,
|
||||
$knowledgeBlock,
|
||||
$userBlock,
|
||||
]);
|
||||
|
||||
$contextBlock = $this->buildContextBlock(
|
||||
userId: $userId,
|
||||
fixedPrompt: $fixedPrompt,
|
||||
fullContext: (bool) $fullContext
|
||||
);
|
||||
|
||||
return $this->implodeBlocks([
|
||||
$systemBlock,
|
||||
$shopBlock,
|
||||
$knowledgeBlock,
|
||||
$contextBlock,
|
||||
$userBlock,
|
||||
]);
|
||||
}
|
||||
|
||||
private function buildSystemBlock(): string
|
||||
{
|
||||
$now = (new DateTimeImmutable())->format('Y-m-d H:i:s');
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// 1) SYSTEM INSTRUCTIONS
|
||||
// ------------------------------------------------------------
|
||||
$activePrompt = $this->systemPromptRepository->findActive();
|
||||
|
||||
if (!$activePrompt) {
|
||||
@@ -83,46 +113,13 @@ final readonly class PromptBuilder
|
||||
}
|
||||
|
||||
$activeSystemPrompt = str_replace('{% now %}', $now, $activePrompt->getContent());
|
||||
$systemBlock = "SYSTEM:\n" . $activeSystemPrompt;
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// 2) PRIORITIZED FIXED BLOCKS
|
||||
// ------------------------------------------------------------
|
||||
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
|
||||
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent);
|
||||
$userBlock = "USER QUESTION:\n" . $prompt;
|
||||
return "SYSTEM:\n" . $this->normalizeBlockText($activeSystemPrompt);
|
||||
}
|
||||
|
||||
// Build all fixed blocks first so history only gets the remaining budget.
|
||||
$fixedBlocks = array_filter([
|
||||
$systemBlock,
|
||||
$shopBlock,
|
||||
$knowledgeBlock,
|
||||
$userBlock,
|
||||
]);
|
||||
|
||||
$fixedPrompt = implode("\n\n", $fixedBlocks);
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// 3) CONVERSATION CONTEXT (AUTHORITATIVE, FILLS REMAINING SPACE)
|
||||
// ------------------------------------------------------------
|
||||
$contextBlock = $this->buildContextBlock(
|
||||
userId: $userId,
|
||||
fixedPrompt: $fixedPrompt,
|
||||
fullContext: (bool) $fullContext
|
||||
);
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// 4) FINAL PROMPT ASSEMBLY
|
||||
// ------------------------------------------------------------
|
||||
$blocks = array_filter([
|
||||
$systemBlock,
|
||||
$shopBlock,
|
||||
$knowledgeBlock,
|
||||
$contextBlock,
|
||||
$userBlock,
|
||||
]);
|
||||
|
||||
return implode("\n\n", $blocks);
|
||||
private function buildUserBlock(string $prompt): string
|
||||
{
|
||||
return "USER QUESTION:\n" . $prompt;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -151,33 +148,36 @@ final readonly class PromptBuilder
|
||||
);
|
||||
}
|
||||
|
||||
$history = $this->normalizeBlockText($history);
|
||||
|
||||
if ($history === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
return
|
||||
"CONVERSATION CONTEXT (authoritative):\n" .
|
||||
"The following messages are the previous turns of this conversation.\n" .
|
||||
"They must be considered when answering the next question.\n\n" .
|
||||
"CONVERSATION CONTEXT (contextual only):\n" .
|
||||
"The following messages are previous turns of this conversation.\n" .
|
||||
"Use them to resolve references, follow-up questions, and user intent.\n" .
|
||||
"They must not override retrieved factual knowledge or live shop data.\n\n" .
|
||||
$history;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the shop block with the highest business priority.
|
||||
* Build the shop block with the highest business priority for product facts.
|
||||
*/
|
||||
private function buildShopBlock(array $shopResults, ?string $swagFullOutPut): string
|
||||
{
|
||||
$parts = [];
|
||||
|
||||
if ($swagFullOutPut !== null && trim($swagFullOutPut) !== '') {
|
||||
if ($swagFullOutPut !== null && $swagFullOutPut !== '') {
|
||||
$parts[] =
|
||||
"SHOP SEARCH QUERY:\n" .
|
||||
trim($swagFullOutPut) . "\n" .
|
||||
$swagFullOutPut . "\n" .
|
||||
"Source: Shop Search";
|
||||
}
|
||||
|
||||
if ($shopResults === []) {
|
||||
return implode("\n\n", $parts);
|
||||
return $this->implodeBlocks($parts);
|
||||
}
|
||||
|
||||
$isDetailed = count($shopResults) <= 5;
|
||||
@@ -190,19 +190,19 @@ final readonly class PromptBuilder
|
||||
|
||||
$n = $i + 1;
|
||||
$entryParts = [
|
||||
"[{$n}] " . $product->name,
|
||||
"[{$n}] " . $this->normalizeBlockText($product->name),
|
||||
];
|
||||
|
||||
if ($product->productNumber) {
|
||||
$entryParts[] = "Product number: " . $product->productNumber;
|
||||
$entryParts[] = "Product number: " . $this->normalizeBlockText($product->productNumber);
|
||||
}
|
||||
|
||||
if ($product->manufacturer) {
|
||||
$entryParts[] = "Manufacturer: " . $product->manufacturer;
|
||||
$entryParts[] = "Manufacturer: " . $this->normalizeBlockText($product->manufacturer);
|
||||
}
|
||||
|
||||
if ($product->price) {
|
||||
$entryParts[] = "Price: " . $product->price;
|
||||
$entryParts[] = "Price: " . $this->normalizeBlockText($product->price);
|
||||
}
|
||||
|
||||
if ($product->available !== null) {
|
||||
@@ -210,23 +210,27 @@ final readonly class PromptBuilder
|
||||
}
|
||||
|
||||
foreach ($product->highlights as $highlight) {
|
||||
$entryParts[] = "- " . $highlight;
|
||||
$highlight = $this->normalizeBlockText((string) $highlight);
|
||||
|
||||
if ($highlight !== '') {
|
||||
$entryParts[] = "- " . $highlight;
|
||||
}
|
||||
}
|
||||
|
||||
if ($product->url) {
|
||||
$entryParts[] = "URL: " . $product->url;
|
||||
$entryParts[] = "URL: " . $this->normalizeBlockText($product->url);
|
||||
}
|
||||
|
||||
if ($product->productImage) {
|
||||
$entryParts[] = "Product image: " . $product->productImage;
|
||||
$entryParts[] = "Product image: " . $this->normalizeBlockText($product->productImage);
|
||||
}
|
||||
|
||||
if ($isDetailed && $product->description) {
|
||||
$entryParts[] = "Description: " . $product->description;
|
||||
$entryParts[] = "Description: " . $this->normalizeBlockText($product->description);
|
||||
}
|
||||
|
||||
if ($product->customFields) {
|
||||
$entryParts[] = "Meta information: " . $product->customFields;
|
||||
$entryParts[] = "Meta information: " . $this->normalizeBlockText($product->customFields);
|
||||
}
|
||||
|
||||
$lines[] = implode("\n", $entryParts);
|
||||
@@ -235,41 +239,75 @@ final readonly class PromptBuilder
|
||||
if ($lines !== []) {
|
||||
$parts[] =
|
||||
"LIVE SHOP RESULTS (authoritative for products):\n" .
|
||||
"Use these results as authoritative for product identity, availability, pricing, and shop-visible product details.\n" .
|
||||
"If retrieved documents conflict with live shop data on product availability or price, prefer the live shop data.\n" .
|
||||
"Do not infer undocumented technical specifications from live shop data.\n\n" .
|
||||
implode("\n\n", $lines);
|
||||
}
|
||||
|
||||
return implode("\n\n", $parts);
|
||||
return $this->implodeBlocks($parts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the supporting knowledge block.
|
||||
* Build the knowledge block.
|
||||
*
|
||||
* Retrieved knowledge is authoritative for factual statements that are present in the sources.
|
||||
* Missing facts must not be invented.
|
||||
*/
|
||||
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent): string
|
||||
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt): string
|
||||
{
|
||||
$knowledgeParts = [];
|
||||
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
||||
|
||||
if ($knowledgeChunks !== []) {
|
||||
$lines = [];
|
||||
|
||||
foreach ($knowledgeChunks as $i => $chunk) {
|
||||
$chunk = $this->normalizeBlockText((string) $chunk);
|
||||
|
||||
if ($chunk === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$n = $i + 1;
|
||||
$lines[] = "[{$n}] {$chunk}";
|
||||
}
|
||||
|
||||
$knowledgeParts[] =
|
||||
"RETRIEVED KNOWLEDGE (supporting):\n" .
|
||||
"Source: Documents\n" .
|
||||
implode("\n\n", $lines);
|
||||
if ($lines !== []) {
|
||||
$knowledgeParts[] =
|
||||
"FACT GROUNDING RULES:\n" .
|
||||
"- Use retrieved knowledge as authoritative for factual answers.\n" .
|
||||
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.\n" .
|
||||
"- Do not invent missing values.\n" .
|
||||
"- Do not replace missing values with estimates, defaults, or typical industry assumptions.\n" .
|
||||
"- Do not claim that information is missing if it appears in the provided sources.\n" .
|
||||
"- Do not compare with other products unless those products are also present in the provided sources.\n" .
|
||||
"- Prefer source-faithful wording over persuasive wording.\n" .
|
||||
"- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', or 'state-of-the-art'.\n" .
|
||||
"- Clearly separate explicit facts from inferences.\n" .
|
||||
"- If an inference is necessary, label it with 'Inference:'.\n" .
|
||||
($isTechnicalProductQuestion
|
||||
? "- For technical product questions, answer primarily with explicitly stated facts.\n" .
|
||||
"- Keep interpretations minimal and do not generalize application areas beyond the provided sources.\n" .
|
||||
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.\n" .
|
||||
"- Prefer neutral technical wording over evaluative summaries.\n" .
|
||||
"- If a detail is not explicitly stated in the provided sources, say so plainly.\n"
|
||||
: ""
|
||||
) . "\n" .
|
||||
"RETRIEVED KNOWLEDGE (authoritative for facts):\n" .
|
||||
"Source: Documents\n" .
|
||||
implode("\n\n", $lines);
|
||||
}
|
||||
}
|
||||
|
||||
if ($urlContent !== '') {
|
||||
$knowledgeParts[] =
|
||||
"CONTENT FROM URL (supporting):\n" .
|
||||
"CONTENT FROM URL (authoritative if user-provided):\n" .
|
||||
"Source: URL\n" .
|
||||
$urlContent;
|
||||
}
|
||||
|
||||
return implode("\n\n", $knowledgeParts);
|
||||
return $this->implodeBlocks($knowledgeParts);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -309,6 +347,85 @@ final readonly class PromptBuilder
|
||||
return max(0, $remaining);
|
||||
}
|
||||
|
||||
private function implodeBlocks(array $blocks): string
|
||||
{
|
||||
$filtered = array_values(array_filter(
|
||||
array_map(
|
||||
fn ($block): string => is_string($block) ? $this->normalizeBlockText($block) : '',
|
||||
$blocks
|
||||
),
|
||||
static fn (string $block): bool => $block !== ''
|
||||
));
|
||||
|
||||
return implode("\n\n", $filtered);
|
||||
}
|
||||
|
||||
private function normalizeNullableBlockText(?string $value): ?string
|
||||
{
|
||||
if ($value === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$normalized = $this->normalizeBlockText($value);
|
||||
|
||||
return $normalized === '' ? null : $normalized;
|
||||
}
|
||||
|
||||
private function normalizeBlockText(string $value): string
|
||||
{
|
||||
$value = str_replace(["\r\n", "\r"], "\n", $value);
|
||||
$value = str_replace("\u{00A0}", ' ', $value);
|
||||
$value = trim($value);
|
||||
|
||||
$value = preg_replace("/\n{3,}/", "\n\n", $value) ?? $value;
|
||||
$value = preg_replace("/[ \t]+\n/", "\n", $value) ?? $value;
|
||||
$value = preg_replace("/[ \t]{2,}/", " ", $value) ?? $value;
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
private function isLikelyTechnicalProductQuestion(string $prompt): bool
|
||||
{
|
||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
||||
|
||||
$keywords = [
|
||||
'technisch',
|
||||
'technical',
|
||||
'produkt',
|
||||
'product',
|
||||
'gerät',
|
||||
'device',
|
||||
'modell',
|
||||
'model',
|
||||
'messprinzip',
|
||||
'schnittstelle',
|
||||
'relais',
|
||||
'indikator',
|
||||
'spannung',
|
||||
'strom',
|
||||
'druck',
|
||||
'temperatur',
|
||||
'schutzart',
|
||||
'fehlercode',
|
||||
'wasserhärte',
|
||||
'testomat',
|
||||
];
|
||||
|
||||
$matches = 0;
|
||||
|
||||
foreach ($keywords as $keyword) {
|
||||
if (str_contains($normalized, $keyword)) {
|
||||
$matches++;
|
||||
}
|
||||
}
|
||||
|
||||
if ($matches >= 2) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function clamp(int $value, int $min, int $max): int
|
||||
{
|
||||
return max($min, min($max, $value));
|
||||
|
||||
Reference in New Issue
Block a user