harden retrieve logic part 2
This commit is contained in:
@@ -42,6 +42,43 @@ final readonly class PromptBuilder
|
|||||||
*/
|
*/
|
||||||
private const MIN_PROMPT_BUDGET_TOKENS = 1024;
|
private const MIN_PROMPT_BUDGET_TOKENS = 1024;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Technical product prompts should be answered like documentation,
|
||||||
|
* not like sales copy.
|
||||||
|
*/
|
||||||
|
private const TECHNICAL_PRODUCT_KEYWORDS = [
|
||||||
|
'technisch',
|
||||||
|
'technical',
|
||||||
|
'produkt',
|
||||||
|
'product',
|
||||||
|
'gerät',
|
||||||
|
'device',
|
||||||
|
'modell',
|
||||||
|
'model',
|
||||||
|
'messprinzip',
|
||||||
|
'schnittstelle',
|
||||||
|
'interface',
|
||||||
|
'relais',
|
||||||
|
'relay',
|
||||||
|
'indikator',
|
||||||
|
'indicator',
|
||||||
|
'spannung',
|
||||||
|
'voltage',
|
||||||
|
'strom',
|
||||||
|
'current',
|
||||||
|
'druck',
|
||||||
|
'pressure',
|
||||||
|
'temperatur',
|
||||||
|
'temperature',
|
||||||
|
'schutzart',
|
||||||
|
'ip',
|
||||||
|
'fehlercode',
|
||||||
|
'error code',
|
||||||
|
'wasserhärte',
|
||||||
|
'hardness',
|
||||||
|
'testomat',
|
||||||
|
];
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private ContextService $contextService,
|
private ContextService $contextService,
|
||||||
private SystemPromptRepository $systemPromptRepository,
|
private SystemPromptRepository $systemPromptRepository,
|
||||||
@@ -241,7 +278,8 @@ final readonly class PromptBuilder
|
|||||||
"LIVE SHOP RESULTS (authoritative for products):\n" .
|
"LIVE SHOP RESULTS (authoritative for products):\n" .
|
||||||
"Use these results as authoritative for product identity, availability, pricing, and shop-visible product details.\n" .
|
"Use these results as authoritative for product identity, availability, pricing, and shop-visible product details.\n" .
|
||||||
"If retrieved documents conflict with live shop data on product availability or price, prefer the live shop data.\n" .
|
"If retrieved documents conflict with live shop data on product availability or price, prefer the live shop data.\n" .
|
||||||
"Do not infer undocumented technical specifications from live shop data.\n\n" .
|
"Do not infer undocumented technical specifications from live shop data.\n" .
|
||||||
|
"Do not derive technical benefits or operational conclusions from shop data unless explicitly stated.\n\n" .
|
||||||
implode("\n\n", $lines);
|
implode("\n\n", $lines);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -276,24 +314,7 @@ final readonly class PromptBuilder
|
|||||||
if ($lines !== []) {
|
if ($lines !== []) {
|
||||||
$knowledgeParts[] =
|
$knowledgeParts[] =
|
||||||
"FACT GROUNDING RULES:\n" .
|
"FACT GROUNDING RULES:\n" .
|
||||||
"- Use retrieved knowledge as authoritative for factual answers.\n" .
|
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion)) . "\n\n" .
|
||||||
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.\n" .
|
|
||||||
"- Do not invent missing values.\n" .
|
|
||||||
"- Do not replace missing values with estimates, defaults, or typical industry assumptions.\n" .
|
|
||||||
"- Do not claim that information is missing if it appears in the provided sources.\n" .
|
|
||||||
"- Do not compare with other products unless those products are also present in the provided sources.\n" .
|
|
||||||
"- Prefer source-faithful wording over persuasive wording.\n" .
|
|
||||||
"- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', or 'state-of-the-art'.\n" .
|
|
||||||
"- Clearly separate explicit facts from inferences.\n" .
|
|
||||||
"- If an inference is necessary, label it with 'Inference:'.\n" .
|
|
||||||
($isTechnicalProductQuestion
|
|
||||||
? "- For technical product questions, answer primarily with explicitly stated facts.\n" .
|
|
||||||
"- Keep interpretations minimal and do not generalize application areas beyond the provided sources.\n" .
|
|
||||||
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.\n" .
|
|
||||||
"- Prefer neutral technical wording over evaluative summaries.\n" .
|
|
||||||
"- If a detail is not explicitly stated in the provided sources, say so plainly.\n"
|
|
||||||
: ""
|
|
||||||
) . "\n" .
|
|
||||||
"RETRIEVED KNOWLEDGE (authoritative for facts):\n" .
|
"RETRIEVED KNOWLEDGE (authoritative for facts):\n" .
|
||||||
"Source: Documents\n" .
|
"Source: Documents\n" .
|
||||||
implode("\n\n", $lines);
|
implode("\n\n", $lines);
|
||||||
@@ -347,6 +368,48 @@ final readonly class PromptBuilder
|
|||||||
return max(0, $remaining);
|
return max(0, $remaining);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
private function buildFactGroundingRules(bool $isTechnicalProductQuestion): array
|
||||||
|
{
|
||||||
|
$rules = [
|
||||||
|
"- Use retrieved knowledge as authoritative for factual answers.",
|
||||||
|
"- State only facts that are explicitly present in the provided sources.",
|
||||||
|
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
|
||||||
|
"- Do not invent missing values.",
|
||||||
|
"- Do not replace missing values with estimates, defaults, or typical industry assumptions.",
|
||||||
|
"- Do not claim that information is missing if it appears in the provided sources.",
|
||||||
|
"- Do not compare with other products unless those products are also present in the provided sources.",
|
||||||
|
"- Prefer source-faithful wording over persuasive wording.",
|
||||||
|
"- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', 'entscheidend', 'optimal', 'kosteneffizient', 'prozesssicher', or 'state-of-the-art'.",
|
||||||
|
"- Clearly separate explicit facts from inferences.",
|
||||||
|
"- If a conclusion goes beyond the source wording, label it exactly as 'Inference:'.",
|
||||||
|
"- If a sentence cannot be traced to the provided sources, do not write it.",
|
||||||
|
];
|
||||||
|
|
||||||
|
if ($isTechnicalProductQuestion) {
|
||||||
|
$rules = array_merge($rules, [
|
||||||
|
"- For technical product questions, answer primarily with explicitly stated facts.",
|
||||||
|
"- Behave like a technical documentation assistant, not like a sales advisor.",
|
||||||
|
"- Keep interpretations minimal and do not generalize application areas beyond the provided sources.",
|
||||||
|
"- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.",
|
||||||
|
"- Do not translate technical facts into business value unless the source explicitly does so.",
|
||||||
|
"- Do not recommend process changes unless explicitly present in the source.",
|
||||||
|
"- Do not use persuasive summaries or advisory conclusions.",
|
||||||
|
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.",
|
||||||
|
"- Use neutral engineering language.",
|
||||||
|
"- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations unless explicitly stated.",
|
||||||
|
"- If the source lists application areas, repeat only those areas and do not broaden them.",
|
||||||
|
"- If the source names an indicator and threshold, reproduce that exactly without extrapolation.",
|
||||||
|
"- If a detail is not explicitly stated in the provided sources, say so plainly.",
|
||||||
|
"- Prefer short, source-close sentences over explanatory expansion.",
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $rules;
|
||||||
|
}
|
||||||
|
|
||||||
private function implodeBlocks(array $blocks): string
|
private function implodeBlocks(array $blocks): string
|
||||||
{
|
{
|
||||||
$filtered = array_values(array_filter(
|
$filtered = array_values(array_filter(
|
||||||
@@ -388,32 +451,9 @@ final readonly class PromptBuilder
|
|||||||
{
|
{
|
||||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
$normalized = mb_strtolower($prompt, 'UTF-8');
|
||||||
|
|
||||||
$keywords = [
|
|
||||||
'technisch',
|
|
||||||
'technical',
|
|
||||||
'produkt',
|
|
||||||
'product',
|
|
||||||
'gerät',
|
|
||||||
'device',
|
|
||||||
'modell',
|
|
||||||
'model',
|
|
||||||
'messprinzip',
|
|
||||||
'schnittstelle',
|
|
||||||
'relais',
|
|
||||||
'indikator',
|
|
||||||
'spannung',
|
|
||||||
'strom',
|
|
||||||
'druck',
|
|
||||||
'temperatur',
|
|
||||||
'schutzart',
|
|
||||||
'fehlercode',
|
|
||||||
'wasserhärte',
|
|
||||||
'testomat',
|
|
||||||
];
|
|
||||||
|
|
||||||
$matches = 0;
|
$matches = 0;
|
||||||
|
|
||||||
foreach ($keywords as $keyword) {
|
foreach (self::TECHNICAL_PRODUCT_KEYWORDS as $keyword) {
|
||||||
if (str_contains($normalized, $keyword)) {
|
if (str_contains($normalized, $keyword)) {
|
||||||
$matches++;
|
$matches++;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user