Files
MtoRagSystem/config/retriex/search_repair.yaml
team 1 de12386a98 p42
2026-05-05 08:16:45 +02:00

117 lines
3.7 KiB
YAML

# Shop search-repair configuration.
# YAML is the source of truth; SearchRepairConfig intentionally contains no PHP defaults.
parameters:
retriex.search_repair.config:
strict_requested_accessory_code_repair: true
prefer_prompt_anchored_model_for_requested_accessory_code: true
direct_product_attribute_lookup:
enabled: true
min_query_tokens_after_cleanup: 2
# Query repair must stay on the requested product/accessory type for
# direct attribute lookups. It may relax comparative constraints, but it
# must not expand to unrelated RAG model/device candidates.
stop_terms:
- zeige
- zeig
- suche
- such
- mir
- bitte
- für
- fuer
- nach
- mit
- ohne
- von
- zum
- zur
- der
- die
- das
- ein
- eine
- einen
- länger
- laenger
- lang
- kürzer
- kuerzer
- größer
- groesser
- kleiner
- über
- ueber
- unter
- mindestens
- maximal
- maximum
- minimum
- ab
- bis
- mehr
- weniger
- als
comparative_constraint_patterns:
- '/\b(?:länger|laenger|kürzer|kuerzer|größer|groesser|kleiner|über|ueber|unter|mindestens|maximal|maximum|minimum|ab|bis|mehr\s+als|weniger\s+als)\s+(?P<value>\d+(?:[,.]\d+)?\s*[\p{L}µ°%]*)\b/iu'
requested_accessory_code_fallback_query_templates:
- '{term} {code}'
requested_accessory_code_fallback_terms:
- indikatortyp
- indikator
- indicator
- reagenz
- reagent
requested_accessory_code_context_prefix_terms:
- indikatortyp
- indikator
- indicator
- reagenz
- reagent
requested_accessory_code_proximity_window: 1600
specific_model_candidate_patterns:
- '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß0-9][A-Za-zÄÖÜäöüß0-9®\-]*){0,3}\s+\d{2,5}(?:\s+[A-ZÄÖÜ]{1,8})?)\b/u'
model_candidate_exclude_terms:
- indikatortyp
- indikator
- indicator
- reagenz
- reagent
- verfügbarkeit
- verfuegbarkeit
- shop
limits:
top_product_log_limit: 3
# Character codes preserve the previous PHP trim charlist:
# space, tab, LF, CR, NUL, vertical tab, double quote, single quote,
# backtick, dot, comma, semicolon, colon, hyphen.
sanitize_trim_character_codes: [32, 9, 10, 13, 0, 11, 34, 39, 96, 46, 44, 59, 58, 45]
product_key_separator: '|'
scores:
candidate_digit: 4
candidate_word_count_cap: 4
specificity_boost: 3
primary_query_overlap_threshold: 0.9
prompt_match_weight: 3
primary_query_match_weight: 2
repair_signal_match_weight: 4
primary_result_order_bonus: 1
token_intersection_score: 2
numeric_token_match_score: 4
patterns:
model_candidate: '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u'
accessory_candidate_template: '/\b((?:{terms})\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu'
requested_accessory_code: '/\b(?:indikator(?:typ)?|indicator(?:\s*type)?|reagenz|reagent)\s*([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu'
accessory_or_bundle_template: '/\b({terms})\b/iu'
model_like: '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u'
specificity_boost_template: '/\b(?:{terms})\b/iu'
contains_digit: '/\d/u'
whitespace_collapse: '/\s+/u'
tokenize_cleanup: '/[^\p{L}\p{N}\s\-]+/u'