harden retrieve logic part 3

This commit is contained in:
team 1
2026-04-17 21:00:24 +02:00
parent 10a8b69930
commit c890932248
2 changed files with 73 additions and 18 deletions

View File

@@ -56,6 +56,7 @@ final readonly class PromptBuilder
'modell', 'modell',
'model', 'model',
'messprinzip', 'messprinzip',
'measurement principle',
'schnittstelle', 'schnittstelle',
'interface', 'interface',
'relais', 'relais',
@@ -111,15 +112,19 @@ final readonly class PromptBuilder
$urlContent = $this->normalizeBlockText($urlContent); $urlContent = $this->normalizeBlockText($urlContent);
$swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut); $swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut);
$hasShopResults = $shopResults !== [];
$systemBlock = $this->buildSystemBlock(); $systemBlock = $this->buildSystemBlock();
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut); $shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt); $outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults);
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults);
$userBlock = $this->buildUserBlock($prompt); $userBlock = $this->buildUserBlock($prompt);
// Build fixed blocks first so history only receives the remaining budget. // Build fixed blocks first so history only receives the remaining budget.
$fixedPrompt = $this->implodeBlocks([ $fixedPrompt = $this->implodeBlocks([
$systemBlock, $systemBlock,
$shopBlock, $shopBlock,
$outputPriorityBlock,
$knowledgeBlock, $knowledgeBlock,
$userBlock, $userBlock,
]); ]);
@@ -133,6 +138,7 @@ final readonly class PromptBuilder
return $this->implodeBlocks([ return $this->implodeBlocks([
$systemBlock, $systemBlock,
$shopBlock, $shopBlock,
$outputPriorityBlock,
$knowledgeBlock, $knowledgeBlock,
$contextBlock, $contextBlock,
$userBlock, $userBlock,
@@ -200,7 +206,10 @@ final readonly class PromptBuilder
} }
/** /**
* Build the shop block with the highest business priority for product facts. * Build the shop block.
*
* Shop data is the most current source for commercial details.
* It should not override technical matching logic.
*/ */
private function buildShopBlock(array $shopResults, ?string $swagFullOutPut): string private function buildShopBlock(array $shopResults, ?string $swagFullOutPut): string
{ {
@@ -275,24 +284,40 @@ final readonly class PromptBuilder
if ($lines !== []) { if ($lines !== []) {
$parts[] = $parts[] =
"LIVE SHOP RESULTS (authoritative for products):\n" . "LIVE SHOP RESULTS (authoritative for current commercial details):\n" .
"Use these results as authoritative for product identity, availability, pricing, and shop-visible product details.\n" . "Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.\n" .
"If retrieved documents conflict with live shop data on product availability or price, prefer the live shop data.\n" . "If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.\n" .
"Do not infer undocumented technical specifications from live shop data.\n" . "Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" .
"Do not derive technical benefits or operational conclusions from shop data unless explicitly stated.\n\n" . "Do not infer undocumented technical specifications from shop data.\n\n" .
implode("\n\n", $lines); implode("\n\n", $lines);
} }
return $this->implodeBlocks($parts); return $this->implodeBlocks($parts);
} }
/**
* Build a small priority block that tells the model what to surface first.
*/
private function buildOutputPriorityBlock(bool $hasShopResults): string
{
if (!$hasShopResults) {
return '';
}
return
"OUTPUT PRIORITY:\n" .
"Use retrieved knowledge first to determine the technically matching product or answer.\n" .
"If shop results are present, use them afterwards to add current price, availability, and the actual URL.\n" .
"Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n";
}
/** /**
* Build the knowledge block. * Build the knowledge block.
* *
* Retrieved knowledge is authoritative for factual statements that are present in the sources. * Retrieved knowledge remains the main source for technical matching and explanation.
* Missing facts must not be invented. * Shop data is preferred for current commercial fields.
*/ */
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt): string private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt, bool $hasShopResults): string
{ {
$knowledgeParts = []; $knowledgeParts = [];
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt); $isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
@@ -312,12 +337,17 @@ final readonly class PromptBuilder
} }
if ($lines !== []) { if ($lines !== []) {
$knowledgeParts[] = $parts = [
"LANGUAGE RULES:\n" .
implode("\n", $this->buildLanguageRules()),
"FACT GROUNDING RULES:\n" . "FACT GROUNDING RULES:\n" .
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion)) . "\n\n" . implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults)),
"RETRIEVED KNOWLEDGE (authoritative for facts):\n" . "RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" .
"Source: Documents\n" . "Source: Documents\n" .
implode("\n\n", $lines); implode("\n\n", $lines),
];
$knowledgeParts[] = implode("\n\n", $parts);
} }
} }
@@ -371,10 +401,22 @@ final readonly class PromptBuilder
/** /**
* @return string[] * @return string[]
*/ */
private function buildFactGroundingRules(bool $isTechnicalProductQuestion): array private function buildLanguageRules(): array
{
return [
"- Answer only in the same language as the user question.",
"- All headings, labels, notes, and structural elements must be in the same language as the user question.",
"- Do not switch languages unless the user does.",
"- If headings are used, write them in the user's language.",
];
}
/**
* @return string[]
*/
private function buildFactGroundingRules(bool $isTechnicalProductQuestion, bool $hasShopResults): array
{ {
$rules = [ $rules = [
"- Use retrieved knowledge as authoritative for factual answers.",
"- State only facts that are explicitly present in the provided sources.", "- State only facts that are explicitly present in the provided sources.",
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.", "- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
"- Do not invent missing values.", "- Do not invent missing values.",
@@ -388,6 +430,17 @@ final readonly class PromptBuilder
"- If a sentence cannot be traced to the provided sources, do not write it.", "- If a sentence cannot be traced to the provided sources, do not write it.",
]; ];
if ($hasShopResults) {
$rules = array_merge($rules, [
"- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.",
"- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.",
"- When shop results are present and relevant, include current price and the actual URL if available.",
"- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.",
]);
} else {
$rules[] = "- Use retrieved knowledge as authoritative for factual answers.";
}
if ($isTechnicalProductQuestion) { if ($isTechnicalProductQuestion) {
$rules = array_merge($rules, [ $rules = array_merge($rules, [
"- For technical product questions, answer primarily with explicitly stated facts.", "- For technical product questions, answer primarily with explicitly stated facts.",
@@ -399,9 +452,11 @@ final readonly class PromptBuilder
"- Do not use persuasive summaries or advisory conclusions.", "- Do not use persuasive summaries or advisory conclusions.",
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.", "- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.",
"- Use neutral engineering language.", "- Use neutral engineering language.",
"- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations unless explicitly stated.", "- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.",
"- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.",
"- If the source lists application areas, repeat only those areas and do not broaden them.", "- If the source lists application areas, repeat only those areas and do not broaden them.",
"- If the source names an indicator and threshold, reproduce that exactly without extrapolation.", "- If the source names an indicator and threshold, reproduce that exactly without extrapolation.",
"- If the source states only a threshold function, do not expand it into broader control logic.",
"- If a detail is not explicitly stated in the provided sources, say so plainly.", "- If a detail is not explicitly stated in the provided sources, say so plainly.",
"- Prefer short, source-close sentences over explanatory expansion.", "- Prefer short, source-close sentences over explanatory expansion.",
]); ]);

View File

@@ -12,7 +12,7 @@ final class NdjsonHybridRetrieverConfig
* Chosen to stay selective enough for product-family-heavy data * Chosen to stay selective enough for product-family-heavy data
* while not cutting off too many useful fallback hits. * while not cutting off too many useful fallback hits.
*/ */
public const VECTOR_SCORE_THRESHOLD = 0.80; public const VECTOR_SCORE_THRESHOLD = 0.82;
/** /**
* Absolute safety caps. * Absolute safety caps.