Files
MtoRagSystem/config/retriex/agent.yaml
team 1 bd62248c8d p65
2026-05-09 11:43:13 +02:00

407 lines
17 KiB
YAML

# Agent orchestration limits, routing rules and Shopware query prompt wording.
# User-visible chat messages live in config/retriex/chat-messages.yaml.
parameters:
retriex.agent.config:
commerce_history_budget_chars: 1000
product_search_knowledge_chunk_limit: 6
advisory_product_search_knowledge_chunk_limit: 9
optimized_shop_query_prefix_pattern: '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu'
optimized_shop_query_trim_characters: " \t\n\r\0\x0B\"'`"
input_normalization:
enabled: true
max_input_chars: 500
max_output_chars: 700
max_added_tokens: 2
max_length_ratio_percent: 150
output_prefix_pattern: '/^(?:normalisiert|korrigiert|corrected|normalized)\s*:\s*/iu'
placeholder_outputs:
- 'normalized user input'
- 'corrected user input'
- 'user input'
- 'normalisierte nutzereingabe'
- 'korrigierte nutzereingabe'
skip_patterns:
- '/https?:\/\//iu'
- '/\bwww\./iu'
- '/```/u'
prompt:
intro: 'Normalize the following user input for RetrieX routing before intent detection.'
output_format_block: |-
Output format:
Return exactly one line: the corrected user input.
current_user_input_label: 'USER INPUT'
rules:
- '- Output only the corrected user input text itself, never the words "normalized user input".'
- '- Correct only obvious typing mistakes, transposed letters, missing umlauts, spacing, and punctuation that clearly preserve the same meaning.'
- '- Do not add product names, model numbers, article numbers, measurement values, parameters, brands, or application areas that are not already present in the input.'
- '- Preserve product names, model numbers, article numbers, chemical symbols, units, pH, Redox, ORP, and measurement values exactly unless only letter casing is corrected.'
- '- Preserve the input language; do not translate German into English or English into German.'
- '- Preserve vague references such as "der indikator", "das gerät", "suche im shop", or "dazu" without resolving them from context.'
- '- If the input is already clear or you are uncertain, return the original input unchanged.'
- '- No introduction, no explanation, no quotation marks.'
fuzzy_routing:
enabled: true
min_token_length: 4
medium_token_length: 7
long_token_length: 11
max_distance_short: 1
max_distance_medium: 2
max_distance_long: 3
min_similarity_percent: 72
# Canonical routing terms only, not typo variants.
# Resolved from config/retriex/vocabulary.yaml view
# agent.input_normalization_fuzzy_routing_terms.
# A local terms list may still be added here as an explicit project override.
vocabulary_views:
terms: agent.input_normalization_fuzzy_routing_terms
follow_up_context:
strong_reference_patterns:
- '/\bder\s+wert\b/u'
- '/\bdieser\s+wert\b/u'
- '/\bdiesen\s+wert\b/u'
- '/\bdem\s+wert\b/u'
- '/\bmit\s+welche(?:m|n|r)?\b/u'
- '/\bwomit\b/u'
- '/\bdamit\b/u'
- '/\bdafuer\b/u'
- '/\bdafür\b/u'
- '/\bdazu\b/u'
- '/\bdaraus\b/u'
- '/\bwelche(?:r|s|m|n)?\s+indikator\b/u'
- '/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u'
- '/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u'
- '/\bwelche(?:r|s|m|n)?\s+bereich\b/u'
- '/\bwelche(?:r|s|m|n)?\s+messbereich\b/u'
- '/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u'
explicit_commercial_signal_terms:
- shop
- preis
- preise
- preisen
- kostet
- kosten
- kaufen
- bestellen
- warenkorb
- lieferzeit
- verfuegbar
- verfügbar
- lager
- url
- link
- artikelnummer
- sku
- produktnummer
commercial_table_follow_up:
enabled: true
prompt_patterns:
- '/\b(?:tabelle|tabellarisch|übersicht|uebersicht|liste|auflistung)\b.{0,80}\b(?:preis|preise|preisen|kosten|kostet|shop)\b/u'
- '/\b(?:preis|preise|preisen|kosten|kostet|shop)\b.{0,80}\b(?:tabelle|tabellarisch|übersicht|uebersicht|liste|auflistung)\b/u'
- '/\b(?:mit|inkl|inklusive|plus)\s+(?:preis|preise|preisen|kosten|shopdaten)\b/u'
history_anchor_patterns:
- '/\bTestomat(?:®)?\s+\d{3,4}\b/iu'
- '/\b(?:Indikatortyp|Indikator|Indikatoren|Reagenz|Reagenzien|Zubehör|Zubehoer)\b/iu'
vocabulary_views:
table_terms: agent.follow_up_context.commercial_table_follow_up.table_terms
commercial_terms:
- preis
- preise
- preisen
- kosten
- kostet
- shop
- shoppreis
- shoppreise
- shopdaten
indicator_marker_patterns:
- '/\b(?:Indikatortyp|Indikator(?:en)?|indicator(?:\s+type)?|Reagenz(?:ien)?)\b/iu'
query_template_with_model: '{model} indikator'
query_template_without_model: 'indikator'
history_question_pattern: '/^Question:\s*(.+)$/mi'
history_turn_split_pattern: '/(?=^Question:\s)/m'
history_question_strip_pattern: '/^Question:\s*.*(?:\R|$)/u'
context_labels:
previous_user_question_template: 'Vorherige Nutzerfrage: {question}'
previous_reference_anchors_template: 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): {anchors}'
current_follow_up_question_template: 'Aktuelle Folgefrage: {question}'
reference_anchor:
product_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
measurement_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
final_answer_guard:
enabled: true
# Prevents runaway final answers caused by repeated LLM output. This is a
# runtime safety guard and does not change retrieval, ranking or product matching.
max_output_chars: 12000
repeated_line:
enabled: true
min_output_chars: 1200
min_line_chars: 16
max_line_repetitions: 3
trailing_window_lines: 220
ignore_patterns:
- '/^\s*(?:produkt(?:-|\s)?nummer|artikel(?:-|\s)?nummer|preis|verfügbar|verfuegbar|url|quellen?)\b/iu'
- '/^\s*(?:status|query|intent|datenbasis|shop(?:-|\s)?suche)\b/iu'
shop_runtime:
query_cleanup:
current_input_preservation:
enabled: true
# Terms that must be preserved from the current user input in the final
# Shopware search query. This prevents short domain terms from being
# dropped by query optimization or generic cleanup. Adapt vocabulary view
# defaults or add a local `terms` override instead of changing PHP code.
vocabulary_views:
terms: agent.shop_runtime.query_cleanup.current_input_preservation_terms
stopword_cleanup:
enabled: true
min_query_tokens_after_cleanup: 2
# Plain Shopware text search should contain product-relevant terms only.
# These terms are UI, instruction, presentation or sorting words and are
# removed after LLM query optimization. Keep this list simple and local.
terms: []
positive_token_filter:
enabled: false
min_query_tokens_after_filter: 1
include_current_input_preservation_terms: true
include_semantic_shop_search_tokens: true
include_product_role_terms: true
allowed_terms: []
blocked_terms: []
code_patterns: []
attribute_cleanup:
enabled: true
# For direct product/accessory lookups with comparative attribute
# constraints, keep the concrete product type and application terms but
# do not send range words/numeric thresholds to the plain text shop
# search. Example: "Anschlusskabel pH/Redox länger 20m" becomes
# "anschlusskabel redox" so the shop can return 25m/50m/100m cables.
min_query_tokens_after_cleanup: 2
# Direct product/accessory cleanup terms are resolved from
# config/retriex/vocabulary.yaml. Local lists may still be added here
# as project-specific overrides, but the default source is vocabulary.
vocabulary_views:
product_type_terms: search_repair.direct_product_type_terms
stop_terms: search_repair.direct_product_attribute_stop_terms
comparative_constraint_patterns: []
context_resolution:
context_usage:
referential_terms: []
history_anchor_enrichment:
enabled: true
max_query_terms: 2
template: ''
vocabulary_views:
trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms
query_terms: []
query_noise_terms: []
anchor_patterns: []
meta_query_guard:
enabled: true
cleanup_profile: shop_context_fallback
context_fallback_enabled: true
context_fallback_question_limit: 12
context_fallback_history_budget_chars: 20000
context_fallback_use_full_history: true
context_fallback_max_terms: 6
# Legacy/domain override list. Generic stopwords, user-instruction
# phrases and presentation terms are provided by profile
# `shop_context_fallback`. Keep shop/price/domain terms here.
context_fallback_filter_terms: []
meta_only_terms: []
rag_anchor_enrichment:
enabled: true
# Enriches overly narrow numeric shop queries with a product anchor from
# retrieved RAG records when the same record explicitly contains the
# requested numeric value/unit. This prevents queries such as "0,02"
# when RAG already identified a concrete product such as a device model.
min_score: 120
max_query_terms: 6
early_chunk_bonus_max: 10
template: '{anchor} {query} {subject}'
scores:
exact_value_with_unit: 120
exact_value_only: 40
anchor_bonus: 50
numeric_focus_patterns: []
product_title_patterns: []
anchor_bonus_patterns: []
subject_terms: []
result_identity:
enabled: true
# Direct product-list answers should only list products whose primary
# identity (name/URL) matches the requested product type. This prevents
# devices from being listed as a requested consumable merely because the
# description mentions such consumables as accessories.
prefer_primary_identity_matches: true
compound_prefix_match:
enabled: true
# Some Shopware product names combine the requested product type with
# a noun suffix, for example "pH-Pufferlösung". Keep this list small
# and explicit so broad direct-result filtering remains safe.
terms: []
primary_identity_repair:
enabled: true
min_query_tokens_after_cleanup: 2
# Only used for a retry query when the direct-result guard would
# otherwise suppress all shop results. Keep product words and context
# such as brand/pH/Redox, but remove target-device wording that can
# push Shopware ranking toward devices instead of the requested
# accessory/consumable.
stop_terms: []
answer_constraints:
length_sort:
enabled: true
trigger_patterns: []
value_patterns: []
length_filter:
enabled: true
min_patterns: []
max_patterns: []
direct_answer:
enabled: true
max_results: 10
rag_evidence_guard:
cleanup_profile: rag_evidence
# Legacy/domain override list. Generic German stopwords are provided by
# language cleanup profile `rag_evidence`. Keep RAG/product-role terms here.
stop_terms:
- ohne
- messen
- messung
- tester
- testgerät
- testgeraet
- gerät
- geraet
- messgerät
- messgeraet
- produkt
- produkte
- artikel
- shop
aggregate_query_patterns:
- '/\bwie\s+viele\b/u'
- '/\bwieviele\b/u'
- '/\banzahl\b/u'
- '/\bcount\b/u'
- '/\bgesamtzahl\b/u'
aggregate_evidence_terms:
- anzahl
- gesamtzahl
- stückzahl
- stueckzahl
- count
- portfolio
- sortiment
- bestand
- bestände
- bestaende
- lieferprogramm
aggregate_answer_evidence_patterns:
- '/\b(?:anzahl|gesamtzahl|stückzahl|stueckzahl|count)\b.{0,80}\b\d+\b/u'
- '/\b\d+\s+(?:[a-z0-9+\-]+\s+){0,3}(?:produkte|artikel|geräte|geraete|messgeräte|messgeraete)\b/u'
- '/\b(?:insgesamt|gesamt|total)\b.{0,80}\b\d+\b/u'
- '/\b(?:sortiment|portfolio|lieferprogramm)\b.{0,120}\b(?:umfasst|enthält|enthaelt|besteht\s+aus|beinhaltet)\b.{0,80}\b\d+\b/u'
vocabulary_maps:
synonyms: agent.rag_evidence_guard.synonyms
no_llm_fallback:
max_shop_results: 5
production_ui:
shop_results:
max_cards: 5
shop_prompt:
intro: 'Generate a short search query for Shopware 6 from the following user input text.'
output_format_block: |-
Output format:
Keyword1 Keyword2 Keyword3
recent_conversation_context_label: 'RECENT CONVERSATION CONTEXT'
current_user_input_label: 'CURRENT USER INPUT'
rules:
- '- Output only the final search query.'
- '- Always convert relevant search terms to their singular form.'
- '- No introduction, no explanation, no quotation marks.'
- '- Use only shop-relevant search terms from the user input for a shop search.'
- '- Maximum 6 search terms, preferably fewer.'
- '- Remove filler words, polite phrases, and irrelevant words.'
- '- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.'
- '- Preserve the language of the CURRENT USER INPUT for generic product/search terms; do not translate German search terms into English.'
- '- For German user input, output German shop terms, for example "freies Chlor Messung" instead of "free chlorine measurement".'
- '- Preserve domain terms from the current user input or resolved context in their original language.'
- '- Numbers that belong to a product name or model must be preserved when they are present in the CURRENT USER INPUT or a clearly resolved reference.'
- '- Separate terms using spaces only.'
- '- If a relevant product name is present, it must be placed at the beginning of the final search query.'
- '- Try to always identify all products mentioned in the user input text, even in long prompts.'
- '- Look for terms such as Testomat, Horiba, Tritromat, Pockettester, Redox, ORP, or words like indicator/Indikator.'
- '- If the current user input is vague or referential, use the recent conversation context only as support.'
- '- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".'
conversation_context_rules:
- '- The current user input has highest priority.'
- '- Use the recent conversation context only to resolve omitted references.'
- '- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.'
- '- Do not revive older products unless the current user input clearly refers to them.'
- '- If the current input starts a new topic, ignore older product context.'
- '- Prefer the most recent product reference over older ones.'
language_preservation:
enabled: true
language_markers:
de:
- ' ä '
- ' ö '
- ' ü '
- ' ß '
- ' der '
- ' die '
- ' das '
- ' ein '
- ' eine '
- ' einer '
- ' einen '
- ' welchem '
- ' welchen '
- ' welche '
- ' welcher '
- ' kann '
- ' nutzen '
- ' zur '
- ' für '
- ' fuer '
- ' messung '
- ' indikator '
- ' reagenz '
- ' chlor '
translation_replacements:
de:
free chlorine: 'freies chlor'
free chlor: 'freies chlor'
total chlorine: 'gesamtchlor'
chlorine measurement: 'chlor messung'
water hardness: 'wasserhärte'
measurement: 'messung'
measuring: 'messung'
chlorine: 'chlor'
indicator: 'indikator'
indicators: 'indikatoren'
reagent: 'reagenz'
reagents: 'reagenzien'
accessory: 'zubehör'
accessories: 'zubehör'