# Agent orchestration limits, routing rules and Shopware query prompt wording. # User-visible chat messages live in config/retriex/chat-messages.yaml. parameters: retriex.agent.config: commerce_history_budget_chars: 1000 product_search_knowledge_chunk_limit: 6 advisory_product_search_knowledge_chunk_limit: 9 optimized_shop_query_prefix_pattern: '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu' optimized_shop_query_trim_characters: " \t\n\r\0\x0B\"'`" input_normalization: enabled: true max_input_chars: 500 max_output_chars: 700 max_added_tokens: 2 max_length_ratio_percent: 150 output_prefix_pattern: '/^(?:normalisiert|korrigiert|corrected|normalized)\s*:\s*/iu' placeholder_outputs: - 'normalized user input' - 'corrected user input' - 'user input' - 'normalisierte nutzereingabe' - 'korrigierte nutzereingabe' skip_patterns: - '/https?:\/\//iu' - '/\bwww\./iu' - '/```/u' prompt: intro: 'Normalize the following user input for RetrieX routing before intent detection.' output_format_block: |- Output format: Return exactly one line: the corrected user input. current_user_input_label: 'USER INPUT' rules: - '- Output only the corrected user input text itself, never the words "normalized user input".' - '- Correct only obvious typing mistakes, transposed letters, missing umlauts, spacing, and punctuation that clearly preserve the same meaning.' - '- Do not add product names, model numbers, article numbers, measurement values, parameters, brands, or application areas that are not already present in the input.' - '- Preserve product names, model numbers, article numbers, chemical symbols, units, pH, Redox, ORP, and measurement values exactly unless only letter casing is corrected.' - '- Preserve the input language; do not translate German into English or English into German.' - '- Preserve vague references such as "der indikator", "das gerät", "suche im shop", or "dazu" without resolving them from context.' - '- If the input is already clear or you are uncertain, return the original input unchanged.' - '- No introduction, no explanation, no quotation marks.' fuzzy_routing: enabled: true min_token_length: 4 medium_token_length: 7 long_token_length: 11 max_distance_short: 1 max_distance_medium: 2 max_distance_long: 3 min_similarity_percent: 72 # Canonical routing terms only, not typo variants. # Resolved from config/retriex/vocabulary.yaml view # agent.input_normalization_fuzzy_routing_terms. # A local terms list may still be added here as an explicit project override. vocabulary_views: terms: agent.input_normalization_fuzzy_routing_terms follow_up_context: strong_reference_patterns: - '/\bder\s+wert\b/u' - '/\bdieser\s+wert\b/u' - '/\bdiesen\s+wert\b/u' - '/\bdem\s+wert\b/u' - '/\bmit\s+welche(?:m|n|r)?\b/u' - '/\bwomit\b/u' - '/\bdamit\b/u' - '/\bdafuer\b/u' - '/\bdafür\b/u' - '/\bdazu\b/u' - '/\bdaraus\b/u' - '/\bwelche(?:r|s|m|n)?\s+indikator\b/u' - '/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u' - '/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u' - '/\bwelche(?:r|s|m|n)?\s+bereich\b/u' - '/\bwelche(?:r|s|m|n)?\s+messbereich\b/u' - '/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u' explicit_commercial_signal_terms: - shop - preis - preise - preisen - kostet - kosten - kaufen - bestellen - warenkorb - lieferzeit - verfuegbar - verfügbar - lager - url - link - artikelnummer - sku - produktnummer commercial_table_follow_up: enabled: true prompt_patterns: - '/\b(?:tabelle|tabellarisch|übersicht|uebersicht|liste|auflistung)\b.{0,80}\b(?:preis|preise|preisen|kosten|kostet|shop)\b/u' - '/\b(?:preis|preise|preisen|kosten|kostet|shop)\b.{0,80}\b(?:tabelle|tabellarisch|übersicht|uebersicht|liste|auflistung)\b/u' - '/\b(?:mit|inkl|inklusive|plus)\s+(?:preis|preise|preisen|kosten|shopdaten)\b/u' history_anchor_patterns: - '/\bTestomat(?:®)?\s+\d{3,4}\b/iu' - '/\b(?:Indikatortyp|Indikator|Indikatoren|Reagenz|Reagenzien|Zubehör|Zubehoer)\b/iu' vocabulary_views: table_terms: agent.follow_up_context.commercial_table_follow_up.table_terms commercial_terms: - preis - preise - preisen - kosten - kostet - shop - shoppreis - shoppreise - shopdaten indicator_marker_patterns: - '/\b(?:Indikatortyp|Indikator(?:en)?|indicator(?:\s+type)?|Reagenz(?:ien)?)\b/iu' query_template_with_model: '{model} indikator' query_template_without_model: 'indikator' history_question_pattern: '/^Question:\s*(.+)$/mi' history_turn_split_pattern: '/(?=^Question:\s)/m' history_question_strip_pattern: '/^Question:\s*.*(?:\R|$)/u' context_labels: previous_user_question_template: 'Vorherige Nutzerfrage: {question}' previous_reference_anchors_template: 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): {anchors}' current_follow_up_question_template: 'Aktuelle Folgefrage: {question}' reference_anchor: product_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+(?=[A-Z0-9]*[A-Z])[A-Z0-9]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu' measurement_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu' final_answer_guard: enabled: true # Prevents runaway final answers caused by repeated LLM output. This is a # runtime safety guard and does not change retrieval, ranking or product matching. max_output_chars: 12000 repeated_line: enabled: true min_output_chars: 1200 min_line_chars: 16 max_line_repetitions: 3 trailing_window_lines: 220 ignore_patterns: - '/^\s*(?:produkt(?:-|\s)?nummer|artikel(?:-|\s)?nummer|preis|verfügbar|verfuegbar|url|quellen?)\b/iu' - '/^\s*(?:status|query|intent|datenbasis|shop(?:-|\s)?suche)\b/iu' shop_runtime: query_cleanup: current_input_preservation: enabled: true # Terms that must be preserved from the current user input in the final # Shopware search query. This prevents short domain terms from being # dropped by query optimization or generic cleanup. Adapt vocabulary view # defaults or add a local `terms` override instead of changing PHP code. vocabulary_views: terms: agent.shop_runtime.query_cleanup.current_input_preservation_terms stopword_cleanup: enabled: true min_query_tokens_after_cleanup: 2 # Plain Shopware text search should contain product-relevant terms only. # These terms are UI, instruction, presentation or sorting words and are # removed after LLM query optimization. Keep this list simple and local. terms: [] positive_token_filter: enabled: false min_query_tokens_after_filter: 1 include_current_input_preservation_terms: true include_semantic_shop_search_tokens: true include_product_role_terms: true allowed_terms: [] blocked_terms: [] code_patterns: [] adjacent_variant_patterns: [] adjacent_variant_terms: [] attribute_cleanup: enabled: true # For direct product/accessory lookups with comparative attribute # constraints, keep the concrete product type and application terms but # do not send range words/numeric thresholds to the plain text shop # search. Example: "Anschlusskabel pH/Redox länger 20m" becomes # "anschlusskabel redox" so the shop can return 25m/50m/100m cables. min_query_tokens_after_cleanup: 2 # Direct product/accessory cleanup terms are resolved from # config/retriex/vocabulary.yaml. Local lists may still be added here # as project-specific overrides, but the default source is vocabulary. vocabulary_views: product_type_terms: search_repair.direct_product_type_terms stop_terms: search_repair.direct_product_attribute_stop_terms comparative_constraint_patterns: [] context_resolution: context_usage: referential_terms: [] history_anchor_enrichment: enabled: true max_query_terms: 2 template: '' vocabulary_views: trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms query_terms: [] query_noise_terms: [] anchor_patterns: [] meta_query_guard: enabled: true cleanup_profile: shop_context_fallback context_fallback_enabled: true context_fallback_question_limit: 12 context_fallback_history_budget_chars: 20000 context_fallback_use_full_history: true context_fallback_max_terms: 6 # Legacy/domain override list. Generic stopwords, user-instruction # phrases and presentation terms are provided by profile # `shop_context_fallback`. Keep shop/price/domain terms here. context_fallback_filter_terms: [] meta_only_terms: [] rag_anchor_enrichment: enabled: true # Enriches overly narrow numeric shop queries with a product anchor from # retrieved RAG records when the same record explicitly contains the # requested numeric value/unit. This prevents queries such as "0,02" # when RAG already identified a concrete product such as a device model. min_score: 120 max_query_terms: 6 early_chunk_bonus_max: 10 template: '{anchor} {query} {subject}' scores: exact_value_with_unit: 120 exact_value_only: 40 anchor_bonus: 50 numeric_focus_patterns: [] product_title_patterns: [] anchor_bonus_patterns: [] subject_terms: [] result_identity: enabled: true # Direct product-list answers should only list products whose primary # identity (name/URL) matches the requested product type. This prevents # devices from being listed as a requested consumable merely because the # description mentions such consumables as accessories. prefer_primary_identity_matches: true compound_prefix_match: enabled: true # Some Shopware product names combine the requested product type with # a noun suffix, for example "pH-Pufferlösung". Keep this list small # and explicit so broad direct-result filtering remains safe. terms: [] primary_identity_repair: enabled: true min_query_tokens_after_cleanup: 2 # Only used for a retry query when the direct-result guard would # otherwise suppress all shop results. Keep product words and context # such as brand/pH/Redox, but remove target-device wording that can # push Shopware ranking toward devices instead of the requested # accessory/consumable. stop_terms: [] answer_constraints: length_sort: enabled: true trigger_patterns: [] value_patterns: [] length_filter: enabled: true min_patterns: [] max_patterns: [] direct_answer: enabled: true max_results: 10 rag_evidence_guard: cleanup_profile: rag_evidence # Legacy/domain override list. Generic German stopwords are provided by # language cleanup profile `rag_evidence`. Keep RAG/product-role terms here. stop_terms: - ohne - messen - messung - tester - testgerät - testgeraet - gerät - geraet - messgerät - messgeraet - produkt - produkte - artikel - shop aggregate_query_patterns: - '/\bwie\s+viele\b/u' - '/\bwieviele\b/u' - '/\banzahl\b/u' - '/\bcount\b/u' - '/\bgesamtzahl\b/u' aggregate_evidence_terms: - anzahl - gesamtzahl - stückzahl - stueckzahl - count - portfolio - sortiment - bestand - bestände - bestaende - lieferprogramm aggregate_answer_evidence_patterns: - '/\b(?:anzahl|gesamtzahl|stückzahl|stueckzahl|count)\b.{0,80}\b\d+\b/u' - '/\b\d+\s+(?:[a-z0-9+\-]+\s+){0,3}(?:produkte|artikel|geräte|geraete|messgeräte|messgeraete)\b/u' - '/\b(?:insgesamt|gesamt|total)\b.{0,80}\b\d+\b/u' - '/\b(?:sortiment|portfolio|lieferprogramm)\b.{0,120}\b(?:umfasst|enthält|enthaelt|besteht\s+aus|beinhaltet)\b.{0,80}\b\d+\b/u' vocabulary_maps: synonyms: agent.rag_evidence_guard.synonyms no_llm_fallback: max_shop_results: 5 production_ui: shop_results: max_cards: 5 shop_prompt: intro: 'Generate a short search query for Shopware 6 from the following user input text.' output_format_block: |- Output format: Keyword1 Keyword2 Keyword3 recent_conversation_context_label: 'RECENT CONVERSATION CONTEXT' current_user_input_label: 'CURRENT USER INPUT' rules: - '- Output only the final search query.' - '- Always convert relevant search terms to their singular form.' - '- No introduction, no explanation, no quotation marks.' - '- Use only shop-relevant search terms from the user input for a shop search.' - '- Maximum 6 search terms, preferably fewer.' - '- Remove filler words, polite phrases, and irrelevant words.' - '- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.' - '- Preserve the language of the CURRENT USER INPUT for generic product/search terms; do not translate German search terms into English.' - '- For German user input, output German shop terms, for example "freies Chlor Messung" instead of "free chlorine measurement".' - '- Preserve domain terms from the current user input or resolved context in their original language.' - '- Numbers that belong to a product name or model must be preserved when they are present in the CURRENT USER INPUT or a clearly resolved reference.' - '- Separate terms using spaces only.' - '- If a relevant product name is present, it must be placed at the beginning of the final search query.' - '- Try to always identify all products mentioned in the user input text, even in long prompts.' - '- Look for terms such as Testomat, Horiba, Tritromat, Pockettester, Redox, ORP, or words like indicator/Indikator.' - '- If the current user input is vague or referential, use the recent conversation context only as support.' - '- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".' conversation_context_rules: - '- The current user input has highest priority.' - '- Use the recent conversation context only to resolve omitted references.' - '- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.' - '- Do not revive older products unless the current user input clearly refers to them.' - '- If the current input starts a new topic, ignore older product context.' - '- Prefer the most recent product reference over older ones.' language_preservation: enabled: true language_markers: de: - ' ä ' - ' ö ' - ' ü ' - ' ß ' - ' der ' - ' die ' - ' das ' - ' ein ' - ' eine ' - ' einer ' - ' einen ' - ' welchem ' - ' welchen ' - ' welche ' - ' welcher ' - ' kann ' - ' nutzen ' - ' zur ' - ' für ' - ' fuer ' - ' messung ' - ' indikator ' - ' reagenz ' - ' chlor ' translation_replacements: de: free chlorine: 'freies chlor' free chlor: 'freies chlor' total chlorine: 'gesamtchlor' chlorine measurement: 'chlor messung' water hardness: 'wasserhärte' measurement: 'messung' measuring: 'messung' chlorine: 'chlor' indicator: 'indikator' indicators: 'indikatoren' reagent: 'reagenz' reagents: 'reagenzien' accessory: 'zubehör' accessories: 'zubehör'