harden retrieve logic

This commit is contained in:
team 1
2026-04-17 14:52:53 +02:00
parent ae2b52ad18
commit 5c9d81adeb
4 changed files with 838 additions and 141 deletions

View File

@@ -60,7 +60,6 @@ final readonly class PromptBuilder
* @param ShopProductResult[] $shopResults
* @param bool|null $fullContext
* @param string|null $swagFullOutPut
* @return string
*/
public function build(
string $prompt,
@@ -71,11 +70,42 @@ final readonly class PromptBuilder
?bool $fullContext = false,
?string $swagFullOutPut = ''
): string {
$prompt = $this->normalizeBlockText($prompt);
$urlContent = $this->normalizeBlockText($urlContent);
$swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut);
$systemBlock = $this->buildSystemBlock();
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt);
$userBlock = $this->buildUserBlock($prompt);
// Build fixed blocks first so history only receives the remaining budget.
$fixedPrompt = $this->implodeBlocks([
$systemBlock,
$shopBlock,
$knowledgeBlock,
$userBlock,
]);
$contextBlock = $this->buildContextBlock(
userId: $userId,
fixedPrompt: $fixedPrompt,
fullContext: (bool) $fullContext
);
return $this->implodeBlocks([
$systemBlock,
$shopBlock,
$knowledgeBlock,
$contextBlock,
$userBlock,
]);
}
private function buildSystemBlock(): string
{
$now = (new DateTimeImmutable())->format('Y-m-d H:i:s');
// ------------------------------------------------------------
// 1) SYSTEM INSTRUCTIONS
// ------------------------------------------------------------
$activePrompt = $this->systemPromptRepository->findActive();
if (!$activePrompt) {
@@ -83,46 +113,13 @@ final readonly class PromptBuilder
}
$activeSystemPrompt = str_replace('{% now %}', $now, $activePrompt->getContent());
$systemBlock = "SYSTEM:\n" . $activeSystemPrompt;
// ------------------------------------------------------------
// 2) PRIORITIZED FIXED BLOCKS
// ------------------------------------------------------------
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent);
$userBlock = "USER QUESTION:\n" . $prompt;
return "SYSTEM:\n" . $this->normalizeBlockText($activeSystemPrompt);
}
// Build all fixed blocks first so history only gets the remaining budget.
$fixedBlocks = array_filter([
$systemBlock,
$shopBlock,
$knowledgeBlock,
$userBlock,
]);
$fixedPrompt = implode("\n\n", $fixedBlocks);
// ------------------------------------------------------------
// 3) CONVERSATION CONTEXT (AUTHORITATIVE, FILLS REMAINING SPACE)
// ------------------------------------------------------------
$contextBlock = $this->buildContextBlock(
userId: $userId,
fixedPrompt: $fixedPrompt,
fullContext: (bool) $fullContext
);
// ------------------------------------------------------------
// 4) FINAL PROMPT ASSEMBLY
// ------------------------------------------------------------
$blocks = array_filter([
$systemBlock,
$shopBlock,
$knowledgeBlock,
$contextBlock,
$userBlock,
]);
return implode("\n\n", $blocks);
private function buildUserBlock(string $prompt): string
{
return "USER QUESTION:\n" . $prompt;
}
/**
@@ -151,33 +148,36 @@ final readonly class PromptBuilder
);
}
$history = $this->normalizeBlockText($history);
if ($history === '') {
return '';
}
return
"CONVERSATION CONTEXT (authoritative):\n" .
"The following messages are the previous turns of this conversation.\n" .
"They must be considered when answering the next question.\n\n" .
"CONVERSATION CONTEXT (contextual only):\n" .
"The following messages are previous turns of this conversation.\n" .
"Use them to resolve references, follow-up questions, and user intent.\n" .
"They must not override retrieved factual knowledge or live shop data.\n\n" .
$history;
}
/**
* Build the shop block with the highest business priority.
* Build the shop block with the highest business priority for product facts.
*/
private function buildShopBlock(array $shopResults, ?string $swagFullOutPut): string
{
$parts = [];
if ($swagFullOutPut !== null && trim($swagFullOutPut) !== '') {
if ($swagFullOutPut !== null && $swagFullOutPut !== '') {
$parts[] =
"SHOP SEARCH QUERY:\n" .
trim($swagFullOutPut) . "\n" .
$swagFullOutPut . "\n" .
"Source: Shop Search";
}
if ($shopResults === []) {
return implode("\n\n", $parts);
return $this->implodeBlocks($parts);
}
$isDetailed = count($shopResults) <= 5;
@@ -190,19 +190,19 @@ final readonly class PromptBuilder
$n = $i + 1;
$entryParts = [
"[{$n}] " . $product->name,
"[{$n}] " . $this->normalizeBlockText($product->name),
];
if ($product->productNumber) {
$entryParts[] = "Product number: " . $product->productNumber;
$entryParts[] = "Product number: " . $this->normalizeBlockText($product->productNumber);
}
if ($product->manufacturer) {
$entryParts[] = "Manufacturer: " . $product->manufacturer;
$entryParts[] = "Manufacturer: " . $this->normalizeBlockText($product->manufacturer);
}
if ($product->price) {
$entryParts[] = "Price: " . $product->price;
$entryParts[] = "Price: " . $this->normalizeBlockText($product->price);
}
if ($product->available !== null) {
@@ -210,23 +210,27 @@ final readonly class PromptBuilder
}
foreach ($product->highlights as $highlight) {
$entryParts[] = "- " . $highlight;
$highlight = $this->normalizeBlockText((string) $highlight);
if ($highlight !== '') {
$entryParts[] = "- " . $highlight;
}
}
if ($product->url) {
$entryParts[] = "URL: " . $product->url;
$entryParts[] = "URL: " . $this->normalizeBlockText($product->url);
}
if ($product->productImage) {
$entryParts[] = "Product image: " . $product->productImage;
$entryParts[] = "Product image: " . $this->normalizeBlockText($product->productImage);
}
if ($isDetailed && $product->description) {
$entryParts[] = "Description: " . $product->description;
$entryParts[] = "Description: " . $this->normalizeBlockText($product->description);
}
if ($product->customFields) {
$entryParts[] = "Meta information: " . $product->customFields;
$entryParts[] = "Meta information: " . $this->normalizeBlockText($product->customFields);
}
$lines[] = implode("\n", $entryParts);
@@ -235,41 +239,75 @@ final readonly class PromptBuilder
if ($lines !== []) {
$parts[] =
"LIVE SHOP RESULTS (authoritative for products):\n" .
"Use these results as authoritative for product identity, availability, pricing, and shop-visible product details.\n" .
"If retrieved documents conflict with live shop data on product availability or price, prefer the live shop data.\n" .
"Do not infer undocumented technical specifications from live shop data.\n\n" .
implode("\n\n", $lines);
}
return implode("\n\n", $parts);
return $this->implodeBlocks($parts);
}
/**
* Build the supporting knowledge block.
* Build the knowledge block.
*
* Retrieved knowledge is authoritative for factual statements that are present in the sources.
* Missing facts must not be invented.
*/
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent): string
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt): string
{
$knowledgeParts = [];
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
if ($knowledgeChunks !== []) {
$lines = [];
foreach ($knowledgeChunks as $i => $chunk) {
$chunk = $this->normalizeBlockText((string) $chunk);
if ($chunk === '') {
continue;
}
$n = $i + 1;
$lines[] = "[{$n}] {$chunk}";
}
$knowledgeParts[] =
"RETRIEVED KNOWLEDGE (supporting):\n" .
"Source: Documents\n" .
implode("\n\n", $lines);
if ($lines !== []) {
$knowledgeParts[] =
"FACT GROUNDING RULES:\n" .
"- Use retrieved knowledge as authoritative for factual answers.\n" .
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.\n" .
"- Do not invent missing values.\n" .
"- Do not replace missing values with estimates, defaults, or typical industry assumptions.\n" .
"- Do not claim that information is missing if it appears in the provided sources.\n" .
"- Do not compare with other products unless those products are also present in the provided sources.\n" .
"- Prefer source-faithful wording over persuasive wording.\n" .
"- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', or 'state-of-the-art'.\n" .
"- Clearly separate explicit facts from inferences.\n" .
"- If an inference is necessary, label it with 'Inference:'.\n" .
($isTechnicalProductQuestion
? "- For technical product questions, answer primarily with explicitly stated facts.\n" .
"- Keep interpretations minimal and do not generalize application areas beyond the provided sources.\n" .
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.\n" .
"- Prefer neutral technical wording over evaluative summaries.\n" .
"- If a detail is not explicitly stated in the provided sources, say so plainly.\n"
: ""
) . "\n" .
"RETRIEVED KNOWLEDGE (authoritative for facts):\n" .
"Source: Documents\n" .
implode("\n\n", $lines);
}
}
if ($urlContent !== '') {
$knowledgeParts[] =
"CONTENT FROM URL (supporting):\n" .
"CONTENT FROM URL (authoritative if user-provided):\n" .
"Source: URL\n" .
$urlContent;
}
return implode("\n\n", $knowledgeParts);
return $this->implodeBlocks($knowledgeParts);
}
/**
@@ -309,6 +347,85 @@ final readonly class PromptBuilder
return max(0, $remaining);
}
private function implodeBlocks(array $blocks): string
{
$filtered = array_values(array_filter(
array_map(
fn ($block): string => is_string($block) ? $this->normalizeBlockText($block) : '',
$blocks
),
static fn (string $block): bool => $block !== ''
));
return implode("\n\n", $filtered);
}
private function normalizeNullableBlockText(?string $value): ?string
{
if ($value === null) {
return null;
}
$normalized = $this->normalizeBlockText($value);
return $normalized === '' ? null : $normalized;
}
private function normalizeBlockText(string $value): string
{
$value = str_replace(["\r\n", "\r"], "\n", $value);
$value = str_replace("\u{00A0}", ' ', $value);
$value = trim($value);
$value = preg_replace("/\n{3,}/", "\n\n", $value) ?? $value;
$value = preg_replace("/[ \t]+\n/", "\n", $value) ?? $value;
$value = preg_replace("/[ \t]{2,}/", " ", $value) ?? $value;
return $value;
}
private function isLikelyTechnicalProductQuestion(string $prompt): bool
{
$normalized = mb_strtolower($prompt, 'UTF-8');
$keywords = [
'technisch',
'technical',
'produkt',
'product',
'gerät',
'device',
'modell',
'model',
'messprinzip',
'schnittstelle',
'relais',
'indikator',
'spannung',
'strom',
'druck',
'temperatur',
'schutzart',
'fehlercode',
'wasserhärte',
'testomat',
];
$matches = 0;
foreach ($keywords as $keyword) {
if (str_contains($normalized, $keyword)) {
$matches++;
}
}
if ($matches >= 2) {
return true;
}
return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1;
}
private function clamp(int $value, int $min, int $max): int
{
return max($min, min($max, $value));