harden retrieve logic part 3
This commit is contained in:
@@ -56,6 +56,7 @@ final readonly class PromptBuilder
|
||||
'modell',
|
||||
'model',
|
||||
'messprinzip',
|
||||
'measurement principle',
|
||||
'schnittstelle',
|
||||
'interface',
|
||||
'relais',
|
||||
@@ -111,15 +112,19 @@ final readonly class PromptBuilder
|
||||
$urlContent = $this->normalizeBlockText($urlContent);
|
||||
$swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut);
|
||||
|
||||
$hasShopResults = $shopResults !== [];
|
||||
|
||||
$systemBlock = $this->buildSystemBlock();
|
||||
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
|
||||
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt);
|
||||
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults);
|
||||
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults);
|
||||
$userBlock = $this->buildUserBlock($prompt);
|
||||
|
||||
// Build fixed blocks first so history only receives the remaining budget.
|
||||
$fixedPrompt = $this->implodeBlocks([
|
||||
$systemBlock,
|
||||
$shopBlock,
|
||||
$outputPriorityBlock,
|
||||
$knowledgeBlock,
|
||||
$userBlock,
|
||||
]);
|
||||
@@ -133,6 +138,7 @@ final readonly class PromptBuilder
|
||||
return $this->implodeBlocks([
|
||||
$systemBlock,
|
||||
$shopBlock,
|
||||
$outputPriorityBlock,
|
||||
$knowledgeBlock,
|
||||
$contextBlock,
|
||||
$userBlock,
|
||||
@@ -200,7 +206,10 @@ final readonly class PromptBuilder
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the shop block with the highest business priority for product facts.
|
||||
* Build the shop block.
|
||||
*
|
||||
* Shop data is the most current source for commercial details.
|
||||
* It should not override technical matching logic.
|
||||
*/
|
||||
private function buildShopBlock(array $shopResults, ?string $swagFullOutPut): string
|
||||
{
|
||||
@@ -275,24 +284,40 @@ final readonly class PromptBuilder
|
||||
|
||||
if ($lines !== []) {
|
||||
$parts[] =
|
||||
"LIVE SHOP RESULTS (authoritative for products):\n" .
|
||||
"Use these results as authoritative for product identity, availability, pricing, and shop-visible product details.\n" .
|
||||
"If retrieved documents conflict with live shop data on product availability or price, prefer the live shop data.\n" .
|
||||
"Do not infer undocumented technical specifications from live shop data.\n" .
|
||||
"Do not derive technical benefits or operational conclusions from shop data unless explicitly stated.\n\n" .
|
||||
"LIVE SHOP RESULTS (authoritative for current commercial details):\n" .
|
||||
"Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.\n" .
|
||||
"If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.\n" .
|
||||
"Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" .
|
||||
"Do not infer undocumented technical specifications from shop data.\n\n" .
|
||||
implode("\n\n", $lines);
|
||||
}
|
||||
|
||||
return $this->implodeBlocks($parts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a small priority block that tells the model what to surface first.
|
||||
*/
|
||||
private function buildOutputPriorityBlock(bool $hasShopResults): string
|
||||
{
|
||||
if (!$hasShopResults) {
|
||||
return '';
|
||||
}
|
||||
|
||||
return
|
||||
"OUTPUT PRIORITY:\n" .
|
||||
"Use retrieved knowledge first to determine the technically matching product or answer.\n" .
|
||||
"If shop results are present, use them afterwards to add current price, availability, and the actual URL.\n" .
|
||||
"Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the knowledge block.
|
||||
*
|
||||
* Retrieved knowledge is authoritative for factual statements that are present in the sources.
|
||||
* Missing facts must not be invented.
|
||||
* Retrieved knowledge remains the main source for technical matching and explanation.
|
||||
* Shop data is preferred for current commercial fields.
|
||||
*/
|
||||
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt): string
|
||||
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt, bool $hasShopResults): string
|
||||
{
|
||||
$knowledgeParts = [];
|
||||
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
||||
@@ -312,12 +337,17 @@ final readonly class PromptBuilder
|
||||
}
|
||||
|
||||
if ($lines !== []) {
|
||||
$knowledgeParts[] =
|
||||
$parts = [
|
||||
"LANGUAGE RULES:\n" .
|
||||
implode("\n", $this->buildLanguageRules()),
|
||||
"FACT GROUNDING RULES:\n" .
|
||||
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion)) . "\n\n" .
|
||||
"RETRIEVED KNOWLEDGE (authoritative for facts):\n" .
|
||||
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults)),
|
||||
"RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" .
|
||||
"Source: Documents\n" .
|
||||
implode("\n\n", $lines);
|
||||
implode("\n\n", $lines),
|
||||
];
|
||||
|
||||
$knowledgeParts[] = implode("\n\n", $parts);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -371,10 +401,22 @@ final readonly class PromptBuilder
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function buildFactGroundingRules(bool $isTechnicalProductQuestion): array
|
||||
private function buildLanguageRules(): array
|
||||
{
|
||||
return [
|
||||
"- Answer only in the same language as the user question.",
|
||||
"- All headings, labels, notes, and structural elements must be in the same language as the user question.",
|
||||
"- Do not switch languages unless the user does.",
|
||||
"- If headings are used, write them in the user's language.",
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function buildFactGroundingRules(bool $isTechnicalProductQuestion, bool $hasShopResults): array
|
||||
{
|
||||
$rules = [
|
||||
"- Use retrieved knowledge as authoritative for factual answers.",
|
||||
"- State only facts that are explicitly present in the provided sources.",
|
||||
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
|
||||
"- Do not invent missing values.",
|
||||
@@ -388,6 +430,17 @@ final readonly class PromptBuilder
|
||||
"- If a sentence cannot be traced to the provided sources, do not write it.",
|
||||
];
|
||||
|
||||
if ($hasShopResults) {
|
||||
$rules = array_merge($rules, [
|
||||
"- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.",
|
||||
"- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.",
|
||||
"- When shop results are present and relevant, include current price and the actual URL if available.",
|
||||
"- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.",
|
||||
]);
|
||||
} else {
|
||||
$rules[] = "- Use retrieved knowledge as authoritative for factual answers.";
|
||||
}
|
||||
|
||||
if ($isTechnicalProductQuestion) {
|
||||
$rules = array_merge($rules, [
|
||||
"- For technical product questions, answer primarily with explicitly stated facts.",
|
||||
@@ -399,9 +452,11 @@ final readonly class PromptBuilder
|
||||
"- Do not use persuasive summaries or advisory conclusions.",
|
||||
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.",
|
||||
"- Use neutral engineering language.",
|
||||
"- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations unless explicitly stated.",
|
||||
"- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.",
|
||||
"- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.",
|
||||
"- If the source lists application areas, repeat only those areas and do not broaden them.",
|
||||
"- If the source names an indicator and threshold, reproduce that exactly without extrapolation.",
|
||||
"- If the source states only a threshold function, do not expand it into broader control logic.",
|
||||
"- If a detail is not explicitly stated in the provided sources, say so plainly.",
|
||||
"- Prefer short, source-close sentences over explanatory expansion.",
|
||||
]);
|
||||
|
||||
@@ -12,7 +12,7 @@ final class NdjsonHybridRetrieverConfig
|
||||
* Chosen to stay selective enough for product-family-heavy data
|
||||
* while not cutting off too many useful fallback hits.
|
||||
*/
|
||||
public const VECTOR_SCORE_THRESHOLD = 0.80;
|
||||
public const VECTOR_SCORE_THRESHOLD = 0.82;
|
||||
|
||||
/**
|
||||
* Absolute safety caps.
|
||||
|
||||
Reference in New Issue
Block a user