Files
MtoRagSystem/config/retriex/prompt.yaml
2026-05-07 07:52:52 +02:00

305 lines
26 KiB
YAML

# Prompt budget, prompt rendering limits and prompt wording rules.
# Prompt text values are mirrored from PromptBuilderConfig defaults; PHP fallbacks remain active.
parameters:
retriex.prompt.config:
budget:
chars_per_token: 4
history_padding_chars: 400
output_reserve_ratio: 0.25
output_reserve_min_tokens: 768
output_reserve_max_tokens: 6000
safety_reserve_ratio: 0.05
safety_reserve_min_tokens: 256
safety_reserve_max_tokens: 1024
min_prompt_budget_tokens: 1024
shop_results:
max_results_in_prompt: 24
detailed_max_count: 5
header_lines:
- 'LIVE SHOP RESULTS (authoritative for current commercial details):'
- Use these results as the primary source for current price, availability, URL, current shop-visible product naming, and explicitly shop-visible product suitability when the user asks which product/device can measure or monitor something.
- If retrieved documents conflict with shop data on price, availability, URL, current naming, or explicitly shop-visible product suitability, prefer the shop data for those fields.
- If retrieved documents do not identify a matching product, but a live shop result explicitly names the requested measurement parameter or application, do not conclude that no matching product exists.
- Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.
- Do not infer undocumented technical specifications from shop data.
- Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.
- Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory,
kit, consumable, or service item.
- If a shop result has no price field, do not state a price for it.
- Never interpret a missing price or a zero price as free, kostenlos, gratis, or available for 0.00 EUR.
- Treat every SHOP PRODUCT RECORD as atomic: exact product name, product number, price, availability, URL, image, description, and metadata must stay together.
- When outputting a shop item, use the exact shop product name from that same SHOP PRODUCT RECORD as the heading. Never use a retrieved-knowledge device name as the heading for a different shop URL or product number.
- Product name and URL define the primary identity of a shop record. Descriptions may mention compatible devices but must not turn an accessory, indicator, reagent, kit, set, or consumable into a main-device shop hit.
- If a technical device from retrieved knowledge and a shop record are not clearly the same exact product identity, separate Fachliche Einordnung from Shop-Treffer instead of merging them.
record_header_template: '[%d] SHOP PRODUCT RECORD'
exact_product_name_label: Exact shop product name
atomic_record_note_lines:
- 'Record boundary: all fields below belong only to this exact shop product record.'
overflow_notice_template: Only the top %d ranked shop results are shown here out of %d total results.
fields:
product_number_label: Product number
manufacturer_label: Manufacturer
price_label: Price
availability_label: Available
availability_yes_label: 'yes'
availability_no_label: 'no'
highlight_prefix: '- '
url_label: URL
product_image_label: Product image
description_label: Description
meta_information_label: Meta information
requested_role_label: Requested product role
inferred_role_label: Inferred shop product role
role_compatibility_label: Role compatibility with request
role_incompatible_commercial_suppression_note: 'Commercial fields suppressed: this shop record is not a matching main-device result for the requested product role.'
technical_product_keyword_match_threshold: 2
vocabulary_views:
technical_product_keywords: prompt.technical_product_keywords
accessory_request_keywords: prompt.accessory_request_keywords
main_device_request_keywords: prompt.main_device_request_keywords
main_device_product_keywords: prompt.main_device_product_keywords
accessory_product_keywords: prompt.accessory_product_keywords
sections:
system_label: SYSTEM
user_question_label: USER QUESTION
conversation_context_label: CONVERSATION CONTEXT (contextual only)
shop_search_query_label: SHOP SEARCH QUERY
output_priority_label: OUTPUT PRIORITY
fallback_escalation_label: FALLBACK AND ESCALATION RULES
response_format_label: RESPONSE FORMAT RULES
language_rules_label: LANGUAGE RULES
fact_grounding_rules_label: FACT GROUNDING RULES
measurement_evidence_label: MEASUREMENT PARAMETER EVIDENCE CHECK
numeric_value_focus_label: EXACT NUMERIC VALUE FOCUS
retrieved_knowledge_label: RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation)
url_content_label: CONTENT FROM URL (authoritative if user-provided)
conversation_context:
intro_lines:
- The following messages are previous turns of this conversation.
- Use them only to resolve references, follow-up questions, and user intent.
- Previous assistant answers are not a factual source for technical values, product compatibility, indicators, ranges, prices, or availability.
- All factual claims must come from retrieved factual knowledge, user-provided URL content, or live shop data.
- Conversation context must not override retrieved factual knowledge or live shop data.
shop_search:
source_line: 'Source: Shop Search'
role_guard:
direct_main_device_request_patterns:
- '/\b(welcher|welches|welche)\s+[^?.!,;]{0,40}(testomat|messgerät|messgeraet|analysegerät|analysegeraet|gerät|geraet|analysator)\b/u'
- '/\b(suche|finde|empfiehl|empfehle)\s+[^?.!,;]{0,40}(testomat|messgerät|messgeraet|analysegerät|analysegeraet|gerät|geraet|analysator)\b/u'
- '/\b(testomat|messgerät|messgeraet|analysegerät|analysegeraet|gerät|geraet|analysator)\s+[^?.!,;]{0,40}(messen|misst|überwachen|ueberwachen|kann|für|fuer)\b/u'
measurement_evidence_guard:
vocabulary_views:
accessory_lookup_guard_terms: prompt.measurement_evidence_guard.accessory_lookup_guard_terms
accessory_lookup_passthrough_terms: prompt.measurement_evidence_guard.accessory_lookup_passthrough_terms
generic_positive_context_terms: prompt.measurement_evidence_guard.generic_positive_context_terms
generic_negative_context_terms: prompt.measurement_evidence_guard.generic_negative_context_terms
vocabulary_maps:
request_terms: prompt.measurement_evidence_guard.request_terms
positive_terms: prompt.measurement_evidence_guard.positive_terms
non_equivalent_terms: prompt.measurement_evidence_guard.non_equivalent_terms
intro_rules:
- '- This block is generated from the current user question and is stricter than broad product-selection wording.'
- '- For measurement-parameter questions, technical suitability requires explicit positive evidence for the requested parameter in the same source record.'
- '- Similar water-treatment parameters, abbreviations, units, product families, search queries, or ranking positions are not enough.'
- '- For exact numeric measurement requests, records that do not contain the requested numeric value/unit must not provide indicators, ranges, variants, accessories, or suitability details for the selected product.'
product_specific_rules:
- '- Verify every recommended product independently against the requested measurement parameter.'
- '- If a retrieved RAG record mentions several products, only use a product for the requested parameter when that product is named in the same sentence, bullet, table row, or clearly bounded product section as the parameter evidence.'
- '- Do not transfer measurement suitability from one product, variant, indicator, category, tag, heading, or nearby paragraph to another product.'
- '- Generic category, umbrella-topic, device-class, product-family, document-title, tag, or application-area terms are not enough to prove a specific measurement parameter for a specific product.'
- '- Indicator names and indicator ranges are product-specific evidence. Use them only when the same source record explicitly connects the exact product, requested value or range, and indicator code.'
- '- If a record proves the device/value but the visible excerpt does not connect an indicator code to that device/value, omit the indicator instead of borrowing one from a different product record.'
generic_request_patterns:
- '/\b(?:mit|für|fuer|zur|zum)\s+(?:dem\s+)?(?:messparameter|parameter|messwert|messgröße|messgroesse)\s+(?<parameter>[^?.!,;\n]{2,80})/iu'
- '/\b(?:messparameter|parameter|messwert|messgröße|messgroesse)\s*(?:für|fuer|von|zur|zum|:)\s*(?<parameter>[^?.!,;\n]{2,80})/iu'
- '/\b(?:messung|messen|überwachung|ueberwachung|bestimmung|analyse)\s+(?:von|der|des|für|fuer|zur|zum)\s+(?<parameter>[^?.!,;\n]{2,80})/iu'
generic_safe_no_evidence_answer_template_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Treffer für die Messung von {label}.
generic_safe_no_accessory_evidence_answer_template_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Indikator oder ein Reagenz für die Messung von {label}.
rule_templates:
shop_positive_evidence: '- Shop record {index} ({product}): explicit positive evidence for {label} is present in this same record.'
shop_no_evidence: '- No shop product record shown to the model contains explicit positive evidence for {label} in the same record.'
unnamed_product: 'unnamed product'
default_requested_parameter_label: 'requested measurement parameter'
shop_record_positive_evidence_line: 'Requested measurement evidence: explicit positive evidence for {label} is present in this same SHOP PRODUCT RECORD.'
shop_record_no_evidence_line: 'Requested measurement evidence: no explicit positive evidence for {label} is present in this SHOP PRODUCT RECORD. Do not present this record as technically suitable for that measurement parameter.'
requested_parameter: '- User requested measurement parameter: {label}.'
positive_terms: '- Positive parameter terms for this request: {terms}.'
positive_context_terms: '- These parameter terms count as suitability evidence only in a measurement-purpose context such as: {terms}.'
negative_context_terms: '- These contexts are not suitability evidence by themselves: {terms}.'
non_equivalent_terms: '- Terms that must NOT be treated as equivalent positive evidence: {terms}.'
rag_url_evidence_scan: '- RAG/URL evidence scan for this exact parameter: {state}.'
rag_url_evidence_found: 'explicit positive evidence found.'
rag_url_evidence_missing: 'no explicit positive evidence found.'
deterministic_scan_no_product_specific_evidence: '- The deterministic exact-term scan did not find product-specific evidence. The answer may still use a clearly equivalent named measurement parameter from the same source record, but must not infer suitability from generic categories, document titles, tags, search terms, neighbouring products, or broad umbrella-topic wording.'
mandatory_no_recommendation: '- Mandatory answer behavior: do not recommend a product as suitable for this measurement parameter.'
start_answer_meaning: '- Start the answer with this meaning in the user language: {answer}'
accessory_mismatch: '- Do not recommend accessories for a different measurement parameter just because they are accessories. If only accessories for other parameters are present, say that only non-matching accessory hits were found.'
commercial_hits_only: '- You may list exact shop hits only as commercial/search hits under a heading such as "Shop-Treffer (technische Eignung nicht sicher belegt)".'
final_rules:
- '- Do not output measurement ranges, methods, application areas, advantages, or alternative suitable models unless the same source record contains explicit positive evidence for the requested measurement parameter.'
- '- The generated shop search query, search intent, ranking position, and user question are not factual evidence for product suitability.'
parameters:
- id: ph
label: pH / pH-Wert
negative_context_terms:
- Betriebsbereich
- Betriebsumgebung
- Einsatzbedingungen
- störungsfrei
- stoerungsfrei
- pH-Wert bei
- ph wert bei
- ph-wert bei
- bei 20 °C
- bei 20 °c
- bei 20°C
- bei 20°c
- Reagenzlösung hat
- Loesung hat
- Lösung hat
safe_no_evidence_answer_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Geräte für pH-Messung.
safe_no_accessory_evidence_answer_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten pH-Indikator oder ein pH-Reagenz für Messgeräte.
- id: redox
label: Redox / ORP
safe_no_evidence_answer_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Treffer für Redox-/ORP-Messung.
safe_no_accessory_evidence_answer_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Redox-/ORP-Indikator oder ein Redox-/ORP-Reagenz für Messgeräte.
- id: free_chlorine
label: freies Chlor
safe_no_evidence_answer_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Treffer für die Messung von freiem Chlor.
safe_no_accessory_evidence_answer_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Indikator oder ein Reagenz für die Messung von freiem Chlor.
output_priority:
rules:
- '- Use retrieved knowledge first to determine the technically matching product or answer.'
- '- For product-selection questions such as which device can measure or monitor a parameter, use relevant live shop results as a fallback when retrieved knowledge does not identify a matching product.'
- '- If shop results are present, use them afterwards to add current price, availability, and the actual URL.'
- '- Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.'
technical_rules: []
numeric_value_focus:
enabled: true
max_values: 3
value_patterns:
- '/(?<value>\d+(?:[,.]\d+)?)\s*(?<unit>°?\s*dH|dh|°dh|°\s*dH)\b/iu'
rules:
- '- Exact numeric focus from the current user question: {values}.'
- '- Prefer retrieved records that explicitly contain this exact value/unit when selecting or recommending a product.'
- '- Records without the exact value/unit may provide background only; they must not provide indicator codes, indicator ranges, variants, or suitability details for the selected product.'
- '- If the selected product record proves the exact value but does not visibly connect an indicator code to that product and value, omit the indicator field rather than borrowing an indicator from another record.'
fallback_escalation:
state_line_template: '- Internal confidence state: {state}.'
base_rules:
- '- Prefer transparent uncertainty over a confident but unsupported answer.'
- '- Never present missing or weak evidence as proof that a product, value, accessory, or suitability does not exist.'
- '- A negative answer is allowed only when the provided sources explicitly support that negative finding for the asked scope.'
- '- If several products, parameters, or accessories could match, ask one focused clarification question instead of guessing.'
- '- For risky or binding product selection, state that sales or support should verify the application before a final selection.'
provided_shop_results_context_rule: '- Treat shop results as provided context only; do not imply that a live shop check was performed in this run.'
without_shop_check_rules:
- '- If the question is product-related and no live shop check was performed in this run, do not make a portfolio-wide negative statement such as "there is no product".'
- '- Phrase missing evidence narrowly, for example: "Im RAG-Wissen finde ich dazu keine belastbare Information."'
- '- If useful, say that a shop search can be used to look for matching products, but do not claim shop results were checked unless they are present in the prompt.'
states:
sicher_beantwortbar:
- '- The retrieved factual knowledge or user-provided URL content is sufficient for the core answer. Answer directly, but do not exceed the provided facts.'
wahrscheinlich_beantwortbar:
- '- Retrieved knowledge and shop data are both available. Use retrieved knowledge for technical suitability and shop data for current commercial details.'
- '- If the two source types do not clearly refer to the same product identity, separate the technical answer from commercial shop hits.'
- '- If RAG knowledge and shop data are both available but do not explicitly support the same technical suitability, start with the uncertainty instead of selecting a product confidently.'
nur_shop_treffer_kein_belastbares_fachwissen:
- '- Start the answer by making the fallback clear: "Aus den Shopdaten ergeben sich folgende Treffer; technische Eignung bitte prüfen."'
- '- If the user directly asks for accessories, cables, electrodes, buffers, kits, sets, indicators, reagents, or consumables and matching shop hits are present, do not start with a missing main-device or missing measuring-device sentence; start directly with the accessory shop hits.'
- '- Do not present shop-only matches as verified technical suitability unless the shop text explicitly states that suitability.'
- '- Do not say that RAG knowledge confirms the result. Say that no belastbares RAG-Fachwissen was available for this selection.'
keine_belastbaren_daten:
- '- State that no reliable information was found in the provided RAG knowledge, URL content, or shop results.'
- '- Do not answer with "gibt es nicht". Use narrow wording such as "Ich finde dazu keine belastbaren Daten in den vorliegenden Quellen."'
- '- Ask one focused clarification question if a parameter, product family, accessory type, or application context would make the search answerable.'
aggregatfrage_keine_belastbare_zaehlinformation:
- '- The user asks for a count or aggregate number, but the retrieved sources do not contain an explicit count/aggregate answer.'
- '- Do not present nearby product-family or portfolio mentions as proof of a concrete count.'
- '- Say narrowly: "Ich habe passende Quellen geprüft, finde darin aber keine belastbare Zählinformation für die angefragte Anzahl."'
- '- If helpful, explain that individual product mentions are not the same as a maintained aggregate count.'
semantische_rag_treffer_kein_direkter_fachbeleg:
- '- Retrieved RAG records are semantic nearest-neighbor hits only; they are not a direct factual match for the essential user term or configured synonym.'
- '- Do not present these RAG hits as fachlich belegt. Say narrowly that the RAG knowledge does not contain a direct Fachbeleg for the requested term.'
- '- You may mention that semantic neighbor hits were found only if it helps explain uncertainty, but do not infer suitability or product existence from them.'
semantische_rag_treffer_kein_direkter_fachbeleg_shopdaten_nicht_verfuegbar:
- '- Live shop data could not be loaded and retrieved RAG records are semantic nearest-neighbor hits without a direct Fachbeleg for the essential user term.'
- '- State both facts separately: shop unavailable; no direct RAG Fachbeleg. Do not make a portfolio-wide negative claim.'
- '- Do not answer with availability, price, shop portfolio, or a technical recommendation.'
shopdaten_nicht_verfuegbar:
- '- State that live shop data could not be loaded. If retrieved knowledge or URL content contains a direct Fachbeleg, still answer the factual part from that source and clearly separate it from missing shop data.'
- '- Do not draw negative conclusions about current product availability, price, or shop portfolio while the shop is unavailable.'
parameter_parsing:
split_pattern: '/\s*(?:,|;|\/|\boder\b|\bund\b|\bor\b|\band\b)\s*/iu'
trim_characters: " \t\n\r\0\x0B-–—:()[]{}\"'`“”„"
response_format:
base_rules:
- '- Keep normal spacing between all words. Never fuse words together.'
- '- Use short, clean paragraphs or short labeled sections.'
- '- Do not use persuasive or promotional wording.'
- '- Do not repeat the same fact in slightly different wording.'
- '- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content,
or conversation context.'
- '- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.'
- '- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.'
- '- Do not combine technical identity from one source with commercial fields from a different product.'
- '- Product number, price, availability, and URL must belong to the same explicitly grounded product.'
with_shop_rules:
- '- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical
facts.'
- '- Keep price, availability, and URL on separate lines when they are present.'
- '- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.'
- '- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the
main device.'
- '- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.'
- '- If no price is shown for a shop item, omit the price instead of writing 0,00 €, free, kostenlos, or a guessed price.'
- '- For every shop hit shown in the answer, copy the exact shop product name verbatim from the same SHOP PRODUCT RECORD as the item heading.'
- '- Never place a shop URL, product number, price, or availability below a different heading taken from retrieved knowledge.'
- '- If technical RAG knowledge and shop records cannot be matched with high confidence, use separate sections: Fachliche Einordnung and Shop-Treffer.'
- '- For uncertain technical suitability from shop hits, use a short section like "Shop-Treffer (technische Eignung nicht sicher belegt)" and list only exact shop fields. Do not add a technical explanation or recommendation.'
without_shop_rules:
- '- If no shop results are present, do not compensate by inventing external products or external manufacturers.'
technical_rules: []
accessory_rules: []
language:
rules:
- '- Answer only in the same language as the user question.'
- '- All headings, labels, notes, and structural elements must be in the same language as the user question.'
- '- Do not switch languages unless the user does.'
- '- If headings are used, write them in the user''s language.'
fact_grounding:
base_rules:
- '- State only facts that are explicitly present in the provided sources.'
- '- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions,
counts, relay outputs, current outputs, and error codes.'
- '- Do not invent missing values.'
- '- Do not replace missing values with estimates, defaults, or typical industry assumptions.'
- '- Do not claim that information is missing if it appears in the provided sources.'
- '- Do not compare with other products unless those products are also present in the provided sources.'
- '- Prefer source-faithful wording over persuasive wording.'
- '- Avoid marketing language such as ''ideal'', ''perfect'', ''unverzichtbar'', ''entscheidend'', ''optimal'', ''kosteneffizient'', ''prozesssicher'', or ''state-of-the-art''.'
- '- Clearly separate explicit facts from inferences.'
- '- If a conclusion goes beyond the source wording, label it exactly as ''Inference:''.'
- '- If a sentence cannot be traced to the provided sources, do not write it.'
- '- For follow-up questions, use the conversation only to resolve what the user refers to; do not copy technical facts from previous assistant answers unless
the same fact is present in the current retrieved sources.'
- '- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.'
- '- If the sources do not identify a suitable product, do not invent one.'
- '- Do not turn absence of evidence into a broad portfolio statement. Use scoped wording tied to the provided sources and current search results.'
- '- Strong negative terms such as "ausschließlich", "keines", "nicht geeignet", or "gibt es nicht" require explicit source support for the full stated scope.'
with_shop_rules: []
without_shop_rules:
- '- Use retrieved knowledge as authoritative for factual answers.'
- '- If no shop results are present, do not compensate with external recommendations or external product suggestions.'
technical_rules: []
retrieved_knowledge:
source_line: 'Source: Documents'
url_content:
source_line: 'Source: URL'
technical_product_model_pattern: /\b[\p{L}]{2,}\s?\d{2,5}\b/u