409 lines
17 KiB
YAML
409 lines
17 KiB
YAML
# Agent orchestration limits, routing rules and Shopware query prompt wording.
|
|
# User-visible chat messages live in config/retriex/chat-messages.yaml.
|
|
parameters:
|
|
retriex.agent.config:
|
|
commerce_history_budget_chars: 1000
|
|
product_search_knowledge_chunk_limit: 6
|
|
advisory_product_search_knowledge_chunk_limit: 9
|
|
optimized_shop_query_prefix_pattern: '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu'
|
|
optimized_shop_query_trim_characters: " \t\n\r\0\x0B\"'`"
|
|
|
|
input_normalization:
|
|
enabled: true
|
|
max_input_chars: 500
|
|
max_output_chars: 700
|
|
max_added_tokens: 2
|
|
max_length_ratio_percent: 150
|
|
output_prefix_pattern: '/^(?:normalisiert|korrigiert|corrected|normalized)\s*:\s*/iu'
|
|
placeholder_outputs:
|
|
- 'normalized user input'
|
|
- 'corrected user input'
|
|
- 'user input'
|
|
- 'normalisierte nutzereingabe'
|
|
- 'korrigierte nutzereingabe'
|
|
skip_patterns:
|
|
- '/https?:\/\//iu'
|
|
- '/\bwww\./iu'
|
|
- '/```/u'
|
|
prompt:
|
|
intro: 'Normalize the following user input for RetrieX routing before intent detection.'
|
|
output_format_block: |-
|
|
Output format:
|
|
Return exactly one line: the corrected user input.
|
|
current_user_input_label: 'USER INPUT'
|
|
rules:
|
|
- '- Output only the corrected user input text itself, never the words "normalized user input".'
|
|
- '- Correct only obvious typing mistakes, transposed letters, missing umlauts, spacing, and punctuation that clearly preserve the same meaning.'
|
|
- '- Do not add product names, model numbers, article numbers, measurement values, parameters, brands, or application areas that are not already present in the input.'
|
|
- '- Preserve product names, model numbers, article numbers, chemical symbols, units, pH, Redox, ORP, and measurement values exactly unless only letter casing is corrected.'
|
|
- '- Preserve the input language; do not translate German into English or English into German.'
|
|
- '- Preserve vague references such as "der indikator", "das gerät", "suche im shop", or "dazu" without resolving them from context.'
|
|
- '- If the input is already clear or you are uncertain, return the original input unchanged.'
|
|
- '- No introduction, no explanation, no quotation marks.'
|
|
|
|
fuzzy_routing:
|
|
enabled: true
|
|
min_token_length: 4
|
|
medium_token_length: 7
|
|
long_token_length: 11
|
|
max_distance_short: 1
|
|
max_distance_medium: 2
|
|
max_distance_long: 3
|
|
min_similarity_percent: 72
|
|
# Canonical routing terms only, not typo variants.
|
|
# Resolved from config/retriex/vocabulary.yaml view
|
|
# agent.input_normalization_fuzzy_routing_terms.
|
|
# A local terms list may still be added here as an explicit project override.
|
|
vocabulary_views:
|
|
terms: agent.input_normalization_fuzzy_routing_terms
|
|
|
|
follow_up_context:
|
|
strong_reference_patterns:
|
|
- '/\bder\s+wert\b/u'
|
|
- '/\bdieser\s+wert\b/u'
|
|
- '/\bdiesen\s+wert\b/u'
|
|
- '/\bdem\s+wert\b/u'
|
|
- '/\bmit\s+welche(?:m|n|r)?\b/u'
|
|
- '/\bwomit\b/u'
|
|
- '/\bdamit\b/u'
|
|
- '/\bdafuer\b/u'
|
|
- '/\bdafür\b/u'
|
|
- '/\bdazu\b/u'
|
|
- '/\bdaraus\b/u'
|
|
- '/\bwelche(?:r|s|m|n)?\s+indikator\b/u'
|
|
- '/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u'
|
|
- '/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u'
|
|
- '/\bwelche(?:r|s|m|n)?\s+bereich\b/u'
|
|
- '/\bwelche(?:r|s|m|n)?\s+messbereich\b/u'
|
|
- '/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u'
|
|
explicit_commercial_signal_terms:
|
|
- shop
|
|
- preis
|
|
- preise
|
|
- preisen
|
|
- kostet
|
|
- kosten
|
|
- kaufen
|
|
- bestellen
|
|
- warenkorb
|
|
- lieferzeit
|
|
- verfuegbar
|
|
- verfügbar
|
|
- lager
|
|
- url
|
|
- link
|
|
- artikelnummer
|
|
- sku
|
|
- produktnummer
|
|
|
|
commercial_table_follow_up:
|
|
enabled: true
|
|
prompt_patterns:
|
|
- '/\b(?:tabelle|tabellarisch|übersicht|uebersicht|liste|auflistung)\b.{0,80}\b(?:preis|preise|preisen|kosten|kostet|shop)\b/u'
|
|
- '/\b(?:preis|preise|preisen|kosten|kostet|shop)\b.{0,80}\b(?:tabelle|tabellarisch|übersicht|uebersicht|liste|auflistung)\b/u'
|
|
- '/\b(?:mit|inkl|inklusive|plus)\s+(?:preis|preise|preisen|kosten|shopdaten)\b/u'
|
|
history_anchor_patterns:
|
|
- '/\bTestomat(?:®)?\s+\d{3,4}\b/iu'
|
|
- '/\b(?:Indikatortyp|Indikator|Indikatoren|Reagenz|Reagenzien|Zubehör|Zubehoer)\b/iu'
|
|
vocabulary_views:
|
|
table_terms: agent.follow_up_context.commercial_table_follow_up.table_terms
|
|
commercial_terms:
|
|
- preis
|
|
- preise
|
|
- preisen
|
|
- kosten
|
|
- kostet
|
|
- shop
|
|
- shoppreis
|
|
- shoppreise
|
|
- shopdaten
|
|
indicator_marker_patterns:
|
|
- '/\b(?:Indikatortyp|Indikator(?:en)?|indicator(?:\s+type)?|Reagenz(?:ien)?)\b/iu'
|
|
query_template_with_model: '{model} indikator'
|
|
query_template_without_model: 'indikator'
|
|
history_question_pattern: '/^Question:\s*(.+)$/mi'
|
|
history_turn_split_pattern: '/(?=^Question:\s)/m'
|
|
history_question_strip_pattern: '/^Question:\s*.*(?:\R|$)/u'
|
|
context_labels:
|
|
previous_user_question_template: 'Vorherige Nutzerfrage: {question}'
|
|
previous_reference_anchors_template: 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): {anchors}'
|
|
current_follow_up_question_template: 'Aktuelle Folgefrage: {question}'
|
|
reference_anchor:
|
|
product_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
|
|
measurement_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
|
|
|
|
final_answer_guard:
|
|
enabled: true
|
|
# Prevents runaway final answers caused by repeated LLM output. This is a
|
|
# runtime safety guard and does not change retrieval, ranking or product matching.
|
|
max_output_chars: 12000
|
|
repeated_line:
|
|
enabled: true
|
|
min_output_chars: 1200
|
|
min_line_chars: 16
|
|
max_line_repetitions: 3
|
|
trailing_window_lines: 220
|
|
ignore_patterns:
|
|
- '/^\s*(?:produkt(?:-|\s)?nummer|artikel(?:-|\s)?nummer|preis|verfügbar|verfuegbar|url|quellen?)\b/iu'
|
|
- '/^\s*(?:status|query|intent|datenbasis|shop(?:-|\s)?suche)\b/iu'
|
|
|
|
shop_runtime:
|
|
query_cleanup:
|
|
current_input_preservation:
|
|
enabled: true
|
|
# Terms that must be preserved from the current user input in the final
|
|
# Shopware search query. This prevents short domain terms from being
|
|
# dropped by query optimization or generic cleanup. Adapt vocabulary view
|
|
# defaults or add a local `terms` override instead of changing PHP code.
|
|
vocabulary_views:
|
|
terms: agent.shop_runtime.query_cleanup.current_input_preservation_terms
|
|
|
|
stopword_cleanup:
|
|
enabled: true
|
|
min_query_tokens_after_cleanup: 2
|
|
# Plain Shopware text search should contain product-relevant terms only.
|
|
# These terms are UI, instruction, presentation or sorting words and are
|
|
# removed after LLM query optimization. Keep this list simple and local.
|
|
terms: []
|
|
|
|
positive_token_filter:
|
|
enabled: false
|
|
min_query_tokens_after_filter: 1
|
|
include_current_input_preservation_terms: true
|
|
include_semantic_shop_search_tokens: true
|
|
include_product_role_terms: true
|
|
allowed_terms: []
|
|
blocked_terms: []
|
|
code_patterns: []
|
|
adjacent_variant_patterns: []
|
|
adjacent_variant_terms: []
|
|
|
|
|
|
attribute_cleanup:
|
|
enabled: true
|
|
# For direct product/accessory lookups with comparative attribute
|
|
# constraints, keep the concrete product type and application terms but
|
|
# do not send range words/numeric thresholds to the plain text shop
|
|
# search. Example: "Anschlusskabel pH/Redox länger 20m" becomes
|
|
# "anschlusskabel redox" so the shop can return 25m/50m/100m cables.
|
|
min_query_tokens_after_cleanup: 2
|
|
# Direct product/accessory cleanup terms are resolved from
|
|
# config/retriex/vocabulary.yaml. Local lists may still be added here
|
|
# as project-specific overrides, but the default source is vocabulary.
|
|
vocabulary_views:
|
|
product_type_terms: search_repair.direct_product_type_terms
|
|
stop_terms: search_repair.direct_product_attribute_stop_terms
|
|
comparative_constraint_patterns: []
|
|
|
|
|
|
context_resolution:
|
|
context_usage:
|
|
referential_terms: []
|
|
history_anchor_enrichment:
|
|
enabled: true
|
|
max_query_terms: 2
|
|
template: ''
|
|
vocabulary_views:
|
|
trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms
|
|
query_terms: []
|
|
query_noise_terms: []
|
|
anchor_patterns: []
|
|
meta_query_guard:
|
|
enabled: true
|
|
cleanup_profile: shop_context_fallback
|
|
context_fallback_enabled: true
|
|
context_fallback_question_limit: 12
|
|
context_fallback_history_budget_chars: 20000
|
|
context_fallback_use_full_history: true
|
|
context_fallback_max_terms: 6
|
|
# Legacy/domain override list. Generic stopwords, user-instruction
|
|
# phrases and presentation terms are provided by profile
|
|
# `shop_context_fallback`. Keep shop/price/domain terms here.
|
|
context_fallback_filter_terms: []
|
|
meta_only_terms: []
|
|
rag_anchor_enrichment:
|
|
enabled: true
|
|
# Enriches overly narrow numeric shop queries with a product anchor from
|
|
# retrieved RAG records when the same record explicitly contains the
|
|
# requested numeric value/unit. This prevents queries such as "0,02"
|
|
# when RAG already identified a concrete product such as a device model.
|
|
min_score: 120
|
|
max_query_terms: 6
|
|
early_chunk_bonus_max: 10
|
|
template: '{anchor} {query} {subject}'
|
|
scores:
|
|
exact_value_with_unit: 120
|
|
exact_value_only: 40
|
|
anchor_bonus: 50
|
|
numeric_focus_patterns: []
|
|
product_title_patterns: []
|
|
anchor_bonus_patterns: []
|
|
subject_terms: []
|
|
|
|
|
|
result_identity:
|
|
enabled: true
|
|
# Direct product-list answers should only list products whose primary
|
|
# identity (name/URL) matches the requested product type. This prevents
|
|
# devices from being listed as a requested consumable merely because the
|
|
# description mentions such consumables as accessories.
|
|
prefer_primary_identity_matches: true
|
|
compound_prefix_match:
|
|
enabled: true
|
|
# Some Shopware product names combine the requested product type with
|
|
# a noun suffix, for example "pH-Pufferlösung". Keep this list small
|
|
# and explicit so broad direct-result filtering remains safe.
|
|
terms: []
|
|
primary_identity_repair:
|
|
enabled: true
|
|
min_query_tokens_after_cleanup: 2
|
|
# Only used for a retry query when the direct-result guard would
|
|
# otherwise suppress all shop results. Keep product words and context
|
|
# such as brand/pH/Redox, but remove target-device wording that can
|
|
# push Shopware ranking toward devices instead of the requested
|
|
# accessory/consumable.
|
|
stop_terms: []
|
|
|
|
|
|
answer_constraints:
|
|
length_sort:
|
|
enabled: true
|
|
trigger_patterns: []
|
|
value_patterns: []
|
|
|
|
length_filter:
|
|
enabled: true
|
|
min_patterns: []
|
|
max_patterns: []
|
|
|
|
|
|
direct_answer:
|
|
enabled: true
|
|
max_results: 10
|
|
rag_evidence_guard:
|
|
cleanup_profile: rag_evidence
|
|
# Legacy/domain override list. Generic German stopwords are provided by
|
|
# language cleanup profile `rag_evidence`. Keep RAG/product-role terms here.
|
|
stop_terms:
|
|
- ohne
|
|
- messen
|
|
- messung
|
|
- tester
|
|
- testgerät
|
|
- testgeraet
|
|
- gerät
|
|
- geraet
|
|
- messgerät
|
|
- messgeraet
|
|
- produkt
|
|
- produkte
|
|
- artikel
|
|
- shop
|
|
aggregate_query_patterns:
|
|
- '/\bwie\s+viele\b/u'
|
|
- '/\bwieviele\b/u'
|
|
- '/\banzahl\b/u'
|
|
- '/\bcount\b/u'
|
|
- '/\bgesamtzahl\b/u'
|
|
aggregate_evidence_terms:
|
|
- anzahl
|
|
- gesamtzahl
|
|
- stückzahl
|
|
- stueckzahl
|
|
- count
|
|
- portfolio
|
|
- sortiment
|
|
- bestand
|
|
- bestände
|
|
- bestaende
|
|
- lieferprogramm
|
|
aggregate_answer_evidence_patterns:
|
|
- '/\b(?:anzahl|gesamtzahl|stückzahl|stueckzahl|count)\b.{0,80}\b\d+\b/u'
|
|
- '/\b\d+\s+(?:[a-z0-9+\-]+\s+){0,3}(?:produkte|artikel|geräte|geraete|messgeräte|messgeraete)\b/u'
|
|
- '/\b(?:insgesamt|gesamt|total)\b.{0,80}\b\d+\b/u'
|
|
- '/\b(?:sortiment|portfolio|lieferprogramm)\b.{0,120}\b(?:umfasst|enthält|enthaelt|besteht\s+aus|beinhaltet)\b.{0,80}\b\d+\b/u'
|
|
vocabulary_maps:
|
|
synonyms: agent.rag_evidence_guard.synonyms
|
|
|
|
no_llm_fallback:
|
|
max_shop_results: 5
|
|
production_ui:
|
|
shop_results:
|
|
max_cards: 5
|
|
shop_prompt:
|
|
intro: 'Generate a short search query for Shopware 6 from the following user input text.'
|
|
output_format_block: |-
|
|
Output format:
|
|
Keyword1 Keyword2 Keyword3
|
|
recent_conversation_context_label: 'RECENT CONVERSATION CONTEXT'
|
|
current_user_input_label: 'CURRENT USER INPUT'
|
|
rules:
|
|
- '- Output only the final search query.'
|
|
- '- Always convert relevant search terms to their singular form.'
|
|
- '- No introduction, no explanation, no quotation marks.'
|
|
- '- Use only shop-relevant search terms from the user input for a shop search.'
|
|
- '- Maximum 6 search terms, preferably fewer.'
|
|
- '- Remove filler words, polite phrases, and irrelevant words.'
|
|
- '- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.'
|
|
- '- Preserve the language of the CURRENT USER INPUT for generic product/search terms; do not translate German search terms into English.'
|
|
- '- For German user input, output German shop terms, for example "freies Chlor Messung" instead of "free chlorine measurement".'
|
|
- '- Preserve domain terms from the current user input or resolved context in their original language.'
|
|
- '- Numbers that belong to a product name or model must be preserved when they are present in the CURRENT USER INPUT or a clearly resolved reference.'
|
|
- '- Separate terms using spaces only.'
|
|
- '- If a relevant product name is present, it must be placed at the beginning of the final search query.'
|
|
- '- Try to always identify all products mentioned in the user input text, even in long prompts.'
|
|
- '- Look for terms such as Testomat, Horiba, Tritromat, Pockettester, Redox, ORP, or words like indicator/Indikator.'
|
|
- '- If the current user input is vague or referential, use the recent conversation context only as support.'
|
|
- '- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".'
|
|
conversation_context_rules:
|
|
- '- The current user input has highest priority.'
|
|
- '- Use the recent conversation context only to resolve omitted references.'
|
|
- '- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.'
|
|
- '- Do not revive older products unless the current user input clearly refers to them.'
|
|
- '- If the current input starts a new topic, ignore older product context.'
|
|
- '- Prefer the most recent product reference over older ones.'
|
|
language_preservation:
|
|
enabled: true
|
|
language_markers:
|
|
de:
|
|
- ' ä '
|
|
- ' ö '
|
|
- ' ü '
|
|
- ' ß '
|
|
- ' der '
|
|
- ' die '
|
|
- ' das '
|
|
- ' ein '
|
|
- ' eine '
|
|
- ' einer '
|
|
- ' einen '
|
|
- ' welchem '
|
|
- ' welchen '
|
|
- ' welche '
|
|
- ' welcher '
|
|
- ' kann '
|
|
- ' nutzen '
|
|
- ' zur '
|
|
- ' für '
|
|
- ' fuer '
|
|
- ' messung '
|
|
- ' indikator '
|
|
- ' reagenz '
|
|
- ' chlor '
|
|
translation_replacements:
|
|
de:
|
|
free chlorine: 'freies chlor'
|
|
free chlor: 'freies chlor'
|
|
total chlorine: 'gesamtchlor'
|
|
chlorine measurement: 'chlor messung'
|
|
water hardness: 'wasserhärte'
|
|
measurement: 'messung'
|
|
measuring: 'messung'
|
|
chlorine: 'chlor'
|
|
indicator: 'indikator'
|
|
indicators: 'indikatoren'
|
|
reagent: 'reagenz'
|
|
reagents: 'reagenzien'
|
|
accessory: 'zubehör'
|
|
accessories: 'zubehör'
|