diff --git a/src/Agent/PromptBuilder.php b/src/Agent/PromptBuilder.php index a6c8772..6998567 100644 --- a/src/Agent/PromptBuilder.php +++ b/src/Agent/PromptBuilder.php @@ -56,6 +56,7 @@ final readonly class PromptBuilder 'modell', 'model', 'messprinzip', + 'measurement principle', 'schnittstelle', 'interface', 'relais', @@ -111,15 +112,19 @@ final readonly class PromptBuilder $urlContent = $this->normalizeBlockText($urlContent); $swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut); + $hasShopResults = $shopResults !== []; + $systemBlock = $this->buildSystemBlock(); $shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut); - $knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt); + $outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults); + $knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults); $userBlock = $this->buildUserBlock($prompt); // Build fixed blocks first so history only receives the remaining budget. $fixedPrompt = $this->implodeBlocks([ $systemBlock, $shopBlock, + $outputPriorityBlock, $knowledgeBlock, $userBlock, ]); @@ -133,6 +138,7 @@ final readonly class PromptBuilder return $this->implodeBlocks([ $systemBlock, $shopBlock, + $outputPriorityBlock, $knowledgeBlock, $contextBlock, $userBlock, @@ -200,7 +206,10 @@ final readonly class PromptBuilder } /** - * Build the shop block with the highest business priority for product facts. + * Build the shop block. + * + * Shop data is the most current source for commercial details. + * It should not override technical matching logic. */ private function buildShopBlock(array $shopResults, ?string $swagFullOutPut): string { @@ -275,24 +284,40 @@ final readonly class PromptBuilder if ($lines !== []) { $parts[] = - "LIVE SHOP RESULTS (authoritative for products):\n" . - "Use these results as authoritative for product identity, availability, pricing, and shop-visible product details.\n" . - "If retrieved documents conflict with live shop data on product availability or price, prefer the live shop data.\n" . - "Do not infer undocumented technical specifications from live shop data.\n" . - "Do not derive technical benefits or operational conclusions from shop data unless explicitly stated.\n\n" . + "LIVE SHOP RESULTS (authoritative for current commercial details):\n" . + "Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.\n" . + "If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.\n" . + "Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" . + "Do not infer undocumented technical specifications from shop data.\n\n" . implode("\n\n", $lines); } return $this->implodeBlocks($parts); } + /** + * Build a small priority block that tells the model what to surface first. + */ + private function buildOutputPriorityBlock(bool $hasShopResults): string + { + if (!$hasShopResults) { + return ''; + } + + return + "OUTPUT PRIORITY:\n" . + "Use retrieved knowledge first to determine the technically matching product or answer.\n" . + "If shop results are present, use them afterwards to add current price, availability, and the actual URL.\n" . + "Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n"; + } + /** * Build the knowledge block. * - * Retrieved knowledge is authoritative for factual statements that are present in the sources. - * Missing facts must not be invented. + * Retrieved knowledge remains the main source for technical matching and explanation. + * Shop data is preferred for current commercial fields. */ - private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt): string + private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt, bool $hasShopResults): string { $knowledgeParts = []; $isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt); @@ -312,12 +337,17 @@ final readonly class PromptBuilder } if ($lines !== []) { - $knowledgeParts[] = + $parts = [ + "LANGUAGE RULES:\n" . + implode("\n", $this->buildLanguageRules()), "FACT GROUNDING RULES:\n" . - implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion)) . "\n\n" . - "RETRIEVED KNOWLEDGE (authoritative for facts):\n" . + implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults)), + "RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" . "Source: Documents\n" . - implode("\n\n", $lines); + implode("\n\n", $lines), + ]; + + $knowledgeParts[] = implode("\n\n", $parts); } } @@ -371,10 +401,22 @@ final readonly class PromptBuilder /** * @return string[] */ - private function buildFactGroundingRules(bool $isTechnicalProductQuestion): array + private function buildLanguageRules(): array + { + return [ + "- Answer only in the same language as the user question.", + "- All headings, labels, notes, and structural elements must be in the same language as the user question.", + "- Do not switch languages unless the user does.", + "- If headings are used, write them in the user's language.", + ]; + } + + /** + * @return string[] + */ + private function buildFactGroundingRules(bool $isTechnicalProductQuestion, bool $hasShopResults): array { $rules = [ - "- Use retrieved knowledge as authoritative for factual answers.", "- State only facts that are explicitly present in the provided sources.", "- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.", "- Do not invent missing values.", @@ -388,6 +430,17 @@ final readonly class PromptBuilder "- If a sentence cannot be traced to the provided sources, do not write it.", ]; + if ($hasShopResults) { + $rules = array_merge($rules, [ + "- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.", + "- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.", + "- When shop results are present and relevant, include current price and the actual URL if available.", + "- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.", + ]); + } else { + $rules[] = "- Use retrieved knowledge as authoritative for factual answers."; + } + if ($isTechnicalProductQuestion) { $rules = array_merge($rules, [ "- For technical product questions, answer primarily with explicitly stated facts.", @@ -399,9 +452,11 @@ final readonly class PromptBuilder "- Do not use persuasive summaries or advisory conclusions.", "- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.", "- Use neutral engineering language.", - "- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations unless explicitly stated.", + "- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.", + "- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.", "- If the source lists application areas, repeat only those areas and do not broaden them.", "- If the source names an indicator and threshold, reproduce that exactly without extrapolation.", + "- If the source states only a threshold function, do not expand it into broader control logic.", "- If a detail is not explicitly stated in the provided sources, say so plainly.", "- Prefer short, source-close sentences over explanatory expansion.", ]); diff --git a/src/Config/NdjsonHybridRetrieverConfig.php b/src/Config/NdjsonHybridRetrieverConfig.php index b954e59..c86054d 100644 --- a/src/Config/NdjsonHybridRetrieverConfig.php +++ b/src/Config/NdjsonHybridRetrieverConfig.php @@ -12,7 +12,7 @@ final class NdjsonHybridRetrieverConfig * Chosen to stay selective enough for product-family-heavy data * while not cutting off too many useful fallback hits. */ - public const VECTOR_SCORE_THRESHOLD = 0.80; + public const VECTOR_SCORE_THRESHOLD = 0.82; /** * Absolute safety caps.