harden retrieve logic part 2

This commit is contained in:
team 1
2026-04-17 15:06:55 +02:00
parent 5c9d81adeb
commit 10a8b69930

View File

@@ -42,6 +42,43 @@ final readonly class PromptBuilder
*/ */
private const MIN_PROMPT_BUDGET_TOKENS = 1024; private const MIN_PROMPT_BUDGET_TOKENS = 1024;
/**
* Technical product prompts should be answered like documentation,
* not like sales copy.
*/
private const TECHNICAL_PRODUCT_KEYWORDS = [
'technisch',
'technical',
'produkt',
'product',
'gerät',
'device',
'modell',
'model',
'messprinzip',
'schnittstelle',
'interface',
'relais',
'relay',
'indikator',
'indicator',
'spannung',
'voltage',
'strom',
'current',
'druck',
'pressure',
'temperatur',
'temperature',
'schutzart',
'ip',
'fehlercode',
'error code',
'wasserhärte',
'hardness',
'testomat',
];
public function __construct( public function __construct(
private ContextService $contextService, private ContextService $contextService,
private SystemPromptRepository $systemPromptRepository, private SystemPromptRepository $systemPromptRepository,
@@ -241,7 +278,8 @@ final readonly class PromptBuilder
"LIVE SHOP RESULTS (authoritative for products):\n" . "LIVE SHOP RESULTS (authoritative for products):\n" .
"Use these results as authoritative for product identity, availability, pricing, and shop-visible product details.\n" . "Use these results as authoritative for product identity, availability, pricing, and shop-visible product details.\n" .
"If retrieved documents conflict with live shop data on product availability or price, prefer the live shop data.\n" . "If retrieved documents conflict with live shop data on product availability or price, prefer the live shop data.\n" .
"Do not infer undocumented technical specifications from live shop data.\n\n" . "Do not infer undocumented technical specifications from live shop data.\n" .
"Do not derive technical benefits or operational conclusions from shop data unless explicitly stated.\n\n" .
implode("\n\n", $lines); implode("\n\n", $lines);
} }
@@ -276,24 +314,7 @@ final readonly class PromptBuilder
if ($lines !== []) { if ($lines !== []) {
$knowledgeParts[] = $knowledgeParts[] =
"FACT GROUNDING RULES:\n" . "FACT GROUNDING RULES:\n" .
"- Use retrieved knowledge as authoritative for factual answers.\n" . implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion)) . "\n\n" .
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.\n" .
"- Do not invent missing values.\n" .
"- Do not replace missing values with estimates, defaults, or typical industry assumptions.\n" .
"- Do not claim that information is missing if it appears in the provided sources.\n" .
"- Do not compare with other products unless those products are also present in the provided sources.\n" .
"- Prefer source-faithful wording over persuasive wording.\n" .
"- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', or 'state-of-the-art'.\n" .
"- Clearly separate explicit facts from inferences.\n" .
"- If an inference is necessary, label it with 'Inference:'.\n" .
($isTechnicalProductQuestion
? "- For technical product questions, answer primarily with explicitly stated facts.\n" .
"- Keep interpretations minimal and do not generalize application areas beyond the provided sources.\n" .
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.\n" .
"- Prefer neutral technical wording over evaluative summaries.\n" .
"- If a detail is not explicitly stated in the provided sources, say so plainly.\n"
: ""
) . "\n" .
"RETRIEVED KNOWLEDGE (authoritative for facts):\n" . "RETRIEVED KNOWLEDGE (authoritative for facts):\n" .
"Source: Documents\n" . "Source: Documents\n" .
implode("\n\n", $lines); implode("\n\n", $lines);
@@ -347,6 +368,48 @@ final readonly class PromptBuilder
return max(0, $remaining); return max(0, $remaining);
} }
/**
* @return string[]
*/
private function buildFactGroundingRules(bool $isTechnicalProductQuestion): array
{
$rules = [
"- Use retrieved knowledge as authoritative for factual answers.",
"- State only facts that are explicitly present in the provided sources.",
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
"- Do not invent missing values.",
"- Do not replace missing values with estimates, defaults, or typical industry assumptions.",
"- Do not claim that information is missing if it appears in the provided sources.",
"- Do not compare with other products unless those products are also present in the provided sources.",
"- Prefer source-faithful wording over persuasive wording.",
"- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', 'entscheidend', 'optimal', 'kosteneffizient', 'prozesssicher', or 'state-of-the-art'.",
"- Clearly separate explicit facts from inferences.",
"- If a conclusion goes beyond the source wording, label it exactly as 'Inference:'.",
"- If a sentence cannot be traced to the provided sources, do not write it.",
];
if ($isTechnicalProductQuestion) {
$rules = array_merge($rules, [
"- For technical product questions, answer primarily with explicitly stated facts.",
"- Behave like a technical documentation assistant, not like a sales advisor.",
"- Keep interpretations minimal and do not generalize application areas beyond the provided sources.",
"- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.",
"- Do not translate technical facts into business value unless the source explicitly does so.",
"- Do not recommend process changes unless explicitly present in the source.",
"- Do not use persuasive summaries or advisory conclusions.",
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.",
"- Use neutral engineering language.",
"- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations unless explicitly stated.",
"- If the source lists application areas, repeat only those areas and do not broaden them.",
"- If the source names an indicator and threshold, reproduce that exactly without extrapolation.",
"- If a detail is not explicitly stated in the provided sources, say so plainly.",
"- Prefer short, source-close sentences over explanatory expansion.",
]);
}
return $rules;
}
private function implodeBlocks(array $blocks): string private function implodeBlocks(array $blocks): string
{ {
$filtered = array_values(array_filter( $filtered = array_values(array_filter(
@@ -388,32 +451,9 @@ final readonly class PromptBuilder
{ {
$normalized = mb_strtolower($prompt, 'UTF-8'); $normalized = mb_strtolower($prompt, 'UTF-8');
$keywords = [
'technisch',
'technical',
'produkt',
'product',
'gerät',
'device',
'modell',
'model',
'messprinzip',
'schnittstelle',
'relais',
'indikator',
'spannung',
'strom',
'druck',
'temperatur',
'schutzart',
'fehlercode',
'wasserhärte',
'testomat',
];
$matches = 0; $matches = 0;
foreach ($keywords as $keyword) { foreach (self::TECHNICAL_PRODUCT_KEYWORDS as $keyword) {
if (str_contains($normalized, $keyword)) { if (str_contains($normalized, $keyword)) {
$matches++; $matches++;
} }