Files
MtoRagSystem/config/retriex/governance.yaml
team 1 707143f13e p43K
2026-05-05 19:13:56 +02:00

166 lines
4.3 KiB
YAML

# Governance and regression guardrail configuration.
# These values are intentionally YAML-owned so developer-policy checks do not
# reintroduce domain-specific guardrail terms as PHP-only defaults.
parameters:
retriex.governance.config:
regression_baseline:
protected_short_model_tokens:
- th
- tc
- tp
- tm
- ph
- rx
protected_measurement_values:
- '0,02'
- '0,05'
- '0,1'
- '0,25'
- '0,5'
- '1,0'
- '2,0'
- '2,5'
- '5,0'
protected_technical_prompt_keywords:
- testomat
- indikator
- grenzwert
- messbereich
- gemessen
technical_priority_required_markers:
- runner-up
- second-lowest
- comparison
protected_accessory_prompt_keywords:
- indikator
- reagenz
protected_search_repair_specificity_terms:
- indikator
- testomat
- reagenz
protected_retrieval_reagent_words:
- indikator
- reagenz
protected_retrieval_device_word_groups:
geraet:
- geraet
- gerät
shop_prompt_regression_original_query: 'testomat 808 0,02'
shop_prompt_required_output_instruction_markers:
- 'Output only the final search query.'
- 'Output format:'
shop_query_meta_guard_terms:
- shop
- suche
shop_query_context_fallback_filter_terms:
- welchem
- kann
- messen
shop_query_current_input_preservation_terms:
- ph
- redox
# Protected vocabulary tokens fall back to
# regression_baseline.protected_short_model_tokens.
# Add vocabulary.protected_short_model_tokens only for an explicit override.
vocabulary: {}
language:
protected_stopword_terms:
- nicht
- kein
- keine
- welche
- testomat
- indikator
- indikatortyp
- ph
- rx
- th
- tc
- '0,02'
required_cleanup_profiles:
- commerce_query
- rag_evidence
- shop_context_fallback
- retrieval_reference_cleanup
required_profile_term_defaults:
stopwords:
- der
- dieser
- mit
- bitte
required_profile_terms:
commerce_query:
phrases:
- ich suche
- suche im shop
rag_evidence: {}
shop_context_fallback:
phrases:
- zeige mir
- suche im shop
meta_terms:
- tabelle
- übersicht
- liste
core_pattern_audit:
source_roots:
- src
excluded_path_prefixes:
- src/Config/CorePatternAuditProvider.php
- src/Command/ConfigPatternAuditCommand.php
- src/Entity/
excluded_path_patterns:
- '~(^|/)vendor(/|$)~'
- '~(^|/)var(/|$)~'
- '~(^|/)node_modules(/|$)~'
warning_path_prefixes:
- src/Agent/
- src/Commerce/
- src/Intent/
- src/Knowledge/Retrieval/
suspicious_calls:
- preg_match
- preg_match_all
- preg_replace
- preg_split
- str_contains
- stripos
- strpos
- str_starts_with
- str_ends_with
- in_array
- array_intersect
- array_intersect_key
domain_marker_terms:
- testomat
- indikator
- indikatortyp
- grenzwert
- messbereich
- reagenz
- reagens
- shop
- produkt
- artikel
- kaufen
- bestellen
- geraet
- gerät
- messgerät
- messgeraet
- analysegerät
- analysegeraet
- analysator
- wasserhärte
- wasserhaerte
- chlor
- redox
allowed_literal_patterns:
- path: src/Knowledge/Retrieval/NdjsonChunkLookup.php
pattern: '/Produkt\\s\+Titel/iu'
reason: 'Technical markdown heading parser for product-title metadata.'
- path: src/Knowledge/Retrieval/NdjsonHybridRetriever.php
pattern: '/Produkt\\s\+Titel/iu'
reason: 'Technical markdown heading parser for product-title metadata.'
max_snippet_length: 180