From 1af158c8a904afb932bcf07401b8af388e813271 Mon Sep 17 00:00:00 2001
From: team 1 <team1@mitho-media.de>
Date: Mon, 4 May 2026 19:12:31 +0200
Subject: [PATCH] last fix and params_guide

---
 CONFIG_PARAMS.md                     | 340 +++++++++++++++++++++++++++
 src/Config/LanguageCleanupConfig.php |  11 +
 2 files changed, 351 insertions(+)
 create mode 100644 CONFIG_PARAMS.md

diff --git a/CONFIG_PARAMS.md b/CONFIG_PARAMS.md
new file mode 100644
index 0000000..0ceb51e
--- /dev/null
+++ b/CONFIG_PARAMS.md
@@ -0,0 +1,340 @@
+## **1\. `config/retriex/agent.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `retriex.agent.config.commerce_history_budget_chars` | Begrenzt, wie viel Chatverlauf für Commerce-/Shop-Kontext in die Query-Auflösung einfließt. |
+| `product_search_knowledge_chunk_limit` | Begrenzt RAG-Chunks bei normalen produktnahen Shop-/Wissensfragen. |
+| `advisory_product_search_knowledge_chunk_limit` | Begrenzt RAG-Chunks bei beratenden Produktauswahlfragen, z. B. „welches Gerät ist geeignet“. |
+| `optimized_shop_query_prefix_pattern` | Entfernt LLM-Ausgabepräfixe wie `query:` oder `keywords:` aus optimierten Shop-Suchqueries. |
+| `optimized_shop_query_trim_characters` | Trimmt unerwünschte Zeichen am Rand optimierter Shop-Queries. |
+| `input_normalization.enabled` | Schaltet die Vor-Normalisierung der Nutzereingabe ein/aus. |
+| `input_normalization.max_input_chars` | Maximale Eingabelänge, die zur Normalisierung geschickt wird. |
+| `input_normalization.max_output_chars` | Maximale akzeptierte Länge der Normalisierungsantwort. |
+| `input_normalization.max_added_tokens` | Verhindert, dass die Normalisierung zu viele neue Wörter hinzufügt. |
+| `input_normalization.max_length_ratio_percent` | Guardrail gegen aufgeblasene Normalisierungsantworten. |
+| `input_normalization.heartbeat_message` | Statusmeldung während der Eingabeoptimierung. |
+| `input_normalization.output_prefix_pattern` | Entfernt Präfixe wie `normalisiert:` aus der Normalisierungsantwort. |
+| `input_normalization.placeholder_outputs` | Erkennt ungültige Platzhalterantworten wie „normalized user input“. |
+| `input_normalization.skip_patterns` | Überspringt Normalisierung bei URLs, Codeblöcken usw. |
+| `input_normalization.prompt.*` | Steuert den Prompt für die LLM-basierte Eingabenormalisierung. |
+| `input_normalization.fuzzy_routing.*` | Steuert Typo-Toleranz für Routingbegriffe wie Shop, Preis, Zubehör, Messgerät. |
+| `follow_up_context.strong_reference_patterns` | Erkennt referenzielle Folgefragen wie „mit welchem Indikator“, „dieser Wert“, „womit“. |
+| `follow_up_context.explicit_commercial_signal_terms` | Erkennt kommerzielle Folgefragen wie Preis, Shop, kaufen, Artikelnummer. |
+| `follow_up_context.commercial_table_follow_up.*` | Erkennt Folgefragen nach Preis-/Shop-Tabellen und baut daraus Shop-Kontextqueries. |
+| `follow_up_context.history_*_pattern` | Extrahiert relevante Vorfragen aus dem Chatverlauf. |
+| `follow_up_context.context_labels.*` | Textbausteine für den intern erzeugten Follow-up-Kontext. |
+| `follow_up_context.reference_anchor.*` | Extrahiert technische Anker wie Testomat-Modell oder Härtewert aus vorherigen Antworten. |
+| `messages.*` | User-/Stream-Statusmeldungen und Fehlertexte im AgentRunner. |
+| `rag_evidence_guard.cleanup_profile` | Wählt das Sprachbereinigungsprofil für RAG-Evidence-Prüfung. |
+| `rag_evidence_guard.stop_terms` | Entfernt irrelevante Wörter aus Evidence-Vergleichen. |
+| `rag_evidence_guard.aggregate_query_patterns` | Erkennt aggregierende Fragen wie „wie viele Geräte“. |
+| `rag_evidence_guard.aggregate_evidence_terms` | Tokens, die bei Aggregatfragen als belastbare Zählinformation gelten. |
+| `rag_evidence_guard.aggregate_answer_evidence_patterns` | Prüft, ob die Antwort wirklich eine belegte Aggregat-/Zählaussage enthält. |
+| `rag_evidence_guard.synonyms` | Fachliche Synonyme für Evidence-Abgleich, z. B. Redox/ORP, Salinität/Salzgehalt. |
+| `no_llm_fallback.max_shop_results` | Begrenzt Shop-Produkte in Fallback-Antworten ohne LLM. |
+| `no_llm_fallback.messages.*` | Vorgefertigte Sicherheits-/Fallbackantworten, wenn LLM, RAG oder Shopdaten fehlen. |
+| `no_llm_fallback.product_fields.*` | Textformatierung für Produktzeilen ohne LLM. |
+| `no_llm_fallback.product_roles.*` | Unterscheidet in Fallbacks Hauptgerät vs. Zubehör. |
+| `production_ui.stage_labels.*` | Statusphasen im Frontend, z. B. „Shop wird durchsucht“. |
+| `production_ui.confidence_labels.*` | Labels für Beleglage/Confidence im UI. |
+| `production_ui.text.*` | UI-Texte für Statuskarten, Shop-Ergebnisse, Metadaten, Hinweise. |
+| `production_ui.templates.*` | Formatvorlagen für Zähler, Hinweise, Relevanztexte. |
+| `production_ui.shop_results.max_cards` | Maximale Anzahl sichtbarer Shopkarten im UI. |
+| `production_ui.follow_up_actions.*` | Folgeaktions-Chips wie „Im Shop suchen“, „Preis anzeigen“. |
+| `source_labels.*` | Quellenlabels wie RAG-Wissen, Chatverlauf, Shopsystem. |
+| `html.*` | HTML-Templates für Badges, Fehler, Think-/Info-Ausgaben. |
+| `shop_prompt.*` | Prompt, Regeln und Kontextlogik für die Shopware-Suchquery-Optimierung. |
+| `shop_prompt.current_input_preservation.*` | Schützt wichtige Begriffe aus der aktuellen Nutzereingabe vor Verlust, z. B. pH/Redox/ORP. |
+| `shop_prompt.context_usage.referential_terms` | Erkennt „suche im Shop“, „dazu“, „davon“ als Kontext-Follow-up. |
+| `shop_prompt.context_anchor_enrichment.*` | Reichert kurze Shop-Folgefragen mit Verlaufankern an. |
+| `shop_prompt.meta_query_guard.*` | Verhindert Meta-Queries wie „suche im Shop“ ohne konkreten Produkt-/Themenanker. |
+| `shop_prompt.language_preservation.*` | Bewahrt Sprache und korrigiert unerwünschte Übersetzungen in Shopqueries. |
+
+## **2\. `config/retriex/commerce.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `retriex.commerce.enabled` | Schaltet Shop-/Commerce-Anbindung grundsätzlich ein/aus. |
+| `retriex.commerce.max_shop_results` | Maximale Shopware-Trefferzahl. |
+| `retriex.commerce.shop_timeout` | Timeout für Shopware-Anfragen. |
+| `store_api_base_url` | Store-API-Basis-URL. |
+| `sales_channel_access_key` | Store-API-Zugriffsschlüssel. |
+| `retriex.commerce.search_repair.*` | Globale Steuerung, wann und wie viele Repair-Suchqueries nachgeschoben werden. |
+| `retriex.commerce_query.config.cleanup_profile` | Sprachbereinigung für Commerce-Queries. |
+| `known_brands` | Markenbegriffe, die beim Query Parsing als Produkt-/Modellkontext erhalten bleiben. |
+| `phrases_to_remove` | Entfernt Bedienphrasen aus Shop-Suchqueries. |
+| `filter_search_tokens` | Entfernt irrelevante Suchtokens. |
+| `search_control_tokens` | Tokens zur Steuerung von Suchabsicht, nicht als Produktinhalt. |
+| `search_token_corrections` | Korrigiert bekannte Tippfehler. |
+| `search_token_canonical_map` | Vereinheitlicht Varianten, z. B. Plural/Singular oder Englisch/Deutsch. |
+| `semantic_shop_search_tokens` | Erlaubt semantische Shop-Suche auch bei indirekter Produktsprache. |
+| `normalization.*` | Regex-Normalisierung für Commerce-Query-Text. |
+| `text.trim_characters` | Zeichen, die aus Suchtexten am Rand entfernt werden. |
+| `limits.*` | Tokenlängen, Modellkontextfenster, maximale Shop-Suchtoken. |
+| `patterns.*` | Regex-Logik für Preise, Modellnummern, Zubehörmuster, History-Kontext, Tokenisierung. |
+| `commerce_reference_resolver.conversation_product_patterns` | Findet Produkte/Modelle im Chatverlauf. |
+| `commerce_reference_resolver.focus_term_patterns` | Erkennt Fokusbegriffe wie Indikator, Reagenz, Zubehör. |
+| `shop_matching.top_product_log_limit` | Begrenzt Logging/Debug-Ausgabe für Top-Shopprodukte. |
+| `shop_matching.vocabulary_views` | Bindet zentrale `vocabulary.yaml`\-Views an Shop-Matching. |
+| `shop_matching.role_guard.*` | Steuert Gerät/Zubehör-Filterung bei Device-Queries. |
+| `shop_matching.scores.*` | Gewichtung für Shop-Ranking: Produktnummer, Name, Hersteller, Token-Overlap, Rollenbonus/-penalty. |
+| `shop_matching.patterns.*` | Normalisierung/Tokenisierung für Matching. |
+| `shop_matching.price.*` | Preisformatierung und Preisnormalisierung. |
+| `shop_matching.custom_fields.*` | Mapped Shopware-Custom-Fields auf Metadaten. |
+| `shop_matching.text.*` | Textformatierung für Custom-Field-Ausgabe. |
+| `shop_matching.description.*` | Beschreibungscleanup und Längenlimit. |
+| `shop_matching.seo.relative_prefix` | URL-/SEO-Pfadbehandlung. |
+| `shop_matching.highlight.*` | Highlighttexte für Verfügbarkeit und Produktnummer. |
+| `shop_matching.image.missing_placeholder` | Placeholder für fehlende Produktbilder. |
+| `shop_matching.deduplication.separator` | Key-Separator für Shop-Deduplizierung. |
+
+## **3\. `config/retriex/governance.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `regression_baseline.*` | Definiert geschützte Regressionstokens und Pflichtmarker für bekannte stabile Fälle. |
+| `vocabulary.protected_short_model_tokens` | Schützt kurze Modell-/Fachtokens vor falscher Cleanup-Entfernung. |
+| `language.protected_stopword_terms` | Begriffe, die trotz Stopword-Logik nicht entfernt werden dürfen. |
+| `language.required_cleanup_profiles` | Pflichtprofile, die in `language.yaml` existieren müssen. |
+| `language.required_profile_terms` | Pflichtbegriffe je Cleanup-Profil, z. B. für Regression Guardrails. |
+| `core_pattern_audit.*` | Steuert Audit auf verdächtige hardcodierte Listen/Patterns im PHP-Core. |
+
+## **4\. `config/retriex/index.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `chunk_size` | Standardgröße für Wissenschunks beim Indexing. |
+| `chunk_overlap` | Überlappung zwischen Chunks. |
+| `embedding_model` | Fallback-/Metadatenmodell für Embeddings. |
+| `embedding_dimension` | Erwartete Embedding-Dimension. |
+| `scoring_version` | Versionierung des Scoring-/Indexformats. |
+| `index_format` | Indexformat, aktuell NDJSON. |
+| `vector_backend` | Vector-Backend, aktuell FAISS. |
+
+## **5\. `config/retriex/intent.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `intent.commerce.strong_signals` | Starke Shop-/Produktabsicht. |
+| `non_product_commerce_signals` | Kommerzsignale, die nicht automatisch Produktsuche bedeuten. |
+| `advisory_signals` | Beratungssignale wie Empfehlung/Eignung. |
+| `advisory_product_selection_patterns` | Muster für Produktauswahlfragen. |
+| `price_terms`, `color_terms`, `size_*` | Preis-, Farb- und Größenintents. |
+| `support_diagnostic_patterns` | Trennt Support-/Diagnosefragen von Shopfragen. |
+| `explicit_commerce_intent_patterns` | Explizite Commerce-Absichten. |
+| `technical_factual_knowledge.*` | Erkennt technische Wissensfragen, die nicht als reine Shopfrage behandelt werden sollen. |
+| `patterns.*` | Regex für SKU, Preis, Größe, Farbe, Modellprodukte. |
+| `labels.*` | Interne Intent-Signallabels. |
+| `scores.*` | Gewichtung der Intent-Signale. |
+| `intent.catalog.*` | Schwellenwerte für Katalog-/Listenintents. |
+| `intent.light.quantity_words` | Mengen-/Listenwörter für leichte Intent-Erkennung. |
+| `intent.light.strong_patterns` | Starke Listen-/Mengenmuster. |
+| `intent.sales.*` | Erkennt Sales-, Vergleichs-, Einwand-, Implementierungs- und ROI-Fragen. |
+
+## **6\. `config/retriex/language.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `words` | Legacy-/Basis-Stopwords. |
+| `protected_terms` | Begriffe, die nie generisch entfernt werden sollen. |
+| `normalization.ascii_transliteration` | Zentrale Umlaut-/ASCII-Normalisierung. |
+| `normalization.word_separator_chars` | Zeichen, die als Worttrenner normalisiert werden. |
+| `normalization.dash_equivalents` | Unicode-Dash-/Bindestrich-Normalisierung. |
+| `stopword_groups.de_core` | Allgemeine deutsche Stopwords. |
+| `stopword_groups.conversation` | Dialog-/Bedienwörter wie „bitte“, „mal“. |
+| `stopword_groups.pronouns` | Pronomen für Referenz-/Cleanup-Logik. |
+| `stopword_groups.user_instruction_terms` | Bedienphrasen-Tokens wie „zeige“, „suche“. |
+| `stopword_groups.response_style` | Präsentations-/Antwortstilwörter. |
+| `stopword_groups.question_terms` | Fragewörter für Cleanup. |
+| `stopword_groups.usage_terms` | generische Nutzungs-/Anwendungswörter. |
+| `stopword_groups.reference_fillers` | Füllwörter bei Folgefragen. |
+| `phrase_groups.user_instruction` | Ganze Bedienphrasen, die aus Queries entfernt werden können. |
+| `meta_term_groups.presentation` | Präsentationswörter wie Tabelle, Liste, Übersicht. |
+| `meta_term_groups.retrieval_reference` | Meta-Wörter für Retrieval-Referenzen. |
+| `cleanup_profiles.commerce_query` | Cleanup-Profil für Shop-/Commerce-Queries. |
+| `cleanup_profiles.rag_evidence` | Cleanup-Profil für RAG-Evidence-Prüfung. |
+| `cleanup_profiles.retrieval_reference_cleanup` | Cleanup-Profil für Retrieval-Referenzauflösung. |
+| `cleanup_profiles.shop_context_fallback` | Cleanup-Profil für Shop-Follow-up-Kontextfallback. |
+
+## **7\. `config/retriex/model.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `default_name` | Standard-LLM-Modellname. |
+| `default_stream` | Standard-Streamingverhalten. |
+| `default_temperature` | Kreativität/Varianz der Modellantwort. |
+| `default_top_k` | Sampling-Top-K. |
+| `default_top_p` | Sampling-Top-P. |
+| `default_repeat_penalty` | Wiederholungsstrafe. |
+| `default_num_ctx` | Standard-Kontextfenster. |
+| `default_retrieval_max_chunks` | Standardanzahl RAG-Chunks. |
+| `default_retrieval_vector_top_k` | Standardanzahl Vector-Kandidaten. |
+| `guardrail_min_num_ctx` | Untergrenze für Kontextfenster. |
+| `guardrail_max_retrieval_chunks` | Obergrenze für Chunk-Anzahl. |
+| `guardrail_max_vector_top_k` | Obergrenze für Vector-Kandidaten. |
+| `retriex.llm.timeout_seconds` | Timeout für LLM-Aufrufe. |
+
+## **8\. `config/retriex/prompt.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `budget.*` | Promptbudget, Outputreserve und Sicherheitsreserve. |
+| `shop_results.*` | Wie Shopprodukte in den LLM-Prompt geschrieben werden. |
+| `shop_results.fields.*` | Feldlabels für Produktnummer, Preis, Hersteller, Rollenkompatibilität usw. |
+| `technical_product_keyword_match_threshold` | Schwelle, ab wann technische Produktfrage erkannt wird. |
+| `technical_product_keywords` | Technische Produktbegriffe für Promptlogik. |
+| `accessory_request_keywords` | Erkennt Zubehöranfragen. |
+| `sections.*` | Überschriften der Promptsektionen. |
+| `conversation_context.intro_lines` | Regeln für Chatverlauf im Prompt. |
+| `shop_search.source_line` | Quellenzeile für Shop-Suchquery. |
+| `role_guard.*` | Gerät/Zubehör-Rollenprüfung im Prompt. |
+| `measurement_evidence_guard.*` | Schutz gegen falsche Eignungsaussagen bei Messparametern. |
+| `measurement_evidence_guard.rule_templates.*` | konkrete Regeltexte für Evidence-Prüfung. |
+| `output_priority.*` | Prioritätsregeln für Antwortaufbau. |
+| `fallback_escalation.*` | Regeln je Confidence-/Evidence-State. |
+| `parameter_parsing.split_pattern` | Trennt mehrere Parameter wie „pH und Redox“. |
+| `parameter_parsing.trim_characters` | Trimmt Parameterwerte. |
+| `response_format.*` | Antwortformat-Regeln mit/ohne Shopdaten. |
+| `language.rules` | Sprachregeln für die Modellantwort. |
+| `fact_grounding.*` | Fact-Grounding-Regeln, besonders gegen Halluzinationen. |
+| `retrieved_knowledge.source_line` | Quellenzeile für Dokumentwissen. |
+| `url_content.source_line` | Quellenzeile für URL-Inhalte. |
+| `technical_product_model_pattern` | Regex zur Erkennung technischer Produktmodelle. |
+
+## **9\. `config/retriex/query_enrichment.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `max_expansions` | Maximale Anzahl Query-Erweiterungen. |
+| `rules.*` | Synonym-/Erweiterungsregeln für Retrieval, z. B. Wasserhärte → Resthärte. |
+
+## **10\. `config/retriex/retrieval.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `hard_max_chunks` | Harte Obergrenze zurückgegebener Chunks. |
+| `hard_max_vectork` | Harte Obergrenze Vector-Kandidaten. |
+| `hard_max_keywordk` | Harte Obergrenze Keyword-Kandidaten. |
+| `vector_score_threshold` | Mindestscore für Vector-Treffer. |
+| `threshold_floor`, `threshold_ceil` | Dynamischer Score-Korridor. |
+| `list_bonus` | Bonus für Listen-/Tabellenrelevanz. |
+| `rrf_k` | RRF-Fusionsparameter. |
+| `keyword_topk_multiplier` | Multiplikator für Keyword-Retrieval-Kandidaten. |
+| `keyword_score_threshold` | Mindestscore Keyword-Treffer. |
+| `keyword_rrf_weight` | Gewichtung Keyword-RRF. |
+| `scoped_vector_rrf_weight` | Gewichtung fokussierter Vector-Treffer. |
+| `scoped_keyword_rrf_weight` | Gewichtung fokussierter Keyword-Treffer. |
+| `empty_rrf_fallback_topn` | Fallback, wenn Fusion leer läuft. |
+| `max_chunks_per_doc` | Maximalzahl Chunks pro Dokument. |
+| `min_chunk_distance` | Mindestabstand zwischen Chunks. |
+| `dominant_doc_*` | Logik zur Dominanz eines Dokuments in den Treffern. |
+| `exact_document_max_chunks` | Maximalchunks bei exaktem Dokumentfokus. |
+| `focused_product_*` | Fokuslogik für ein klar erkanntes Produkt/Gerät. |
+| `catalog_list_shortcut_patterns` | Erkennt Katalog-/Listenfragen. |
+| `exact_selection_*` | Präzisionslogik für Tabellen/Indikatoren/Grenzwerte. |
+| `exact_detail_tokens` | Detailfrage-Tokens für gezielte Retrievalauswahl. |
+| `generic_exact_selection_cleanup_profile` | Cleanup-Profil für generische exakte Auswahl. |
+| `generic_product_tokens` | Allgemeine Produkttokens fürs Retrieval. |
+| `important_short_model_tokens` | Geschützte kurze Modell-/Fachtokens wie pH/RX/TC. |
+| `family_descriptor_tokens` | Produktfamilien-/Gerätebeschreibungen. |
+| `looks_like_reagent_*` | Erkennung von Reagenz-/Indikator-Dokumenten. |
+| `looks_like_safety_*` | Erkennung von Sicherheitsdatenblättern. |
+| `looks_like_document_words` | Dokumenttyp-Erkennung. |
+| `looks_like_device_words` | Geräte-/Device-Erkennung. |
+| `retriex.retrieval.inventory` | Alias auf die effektive Retrieval-Konfiguration. |
+
+## **11\. `config/retriex/runtime.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `retriex.root` | Projektwurzel. |
+| `retriex.knowledge.root` | Basisverzeichnis der Wissensdaten. |
+| `retriex.knowledge.ndjson` | Haupt-Wissensindex. |
+| `retriex.knowledge.index_meta` | Metadaten des Wissensindex. |
+| `retriex.knowledge.vector_index` | FAISS-Vectorindex für Chunks. |
+| `retriex.knowledge.vector_index_meta` | Metadaten zum Chunk-Vectorindex. |
+| `retriex.knowledge.runtime_meta` | Runtime-/Indexstatus-Datei. |
+| `retriex.knowledge.upload` | Upload-Verzeichnis. |
+| `retriex.knowledge.tags_ndjson` | Tag-Indexdaten. |
+| `retriex.knowledge.vector_tags_index` | FAISS-Vectorindex für Tags. |
+| `retriex.knowledge.vector_tags_index_meta` | Metadaten zum Tag-Vectorindex. |
+| `retriex.locks.dir` | Lock-Verzeichnis. |
+| `retriex.tags.rebuild_lock` | Lock-Datei für Tag-Rebuild. |
+| `retriex.context.config.max_visible_regular_lines` | Sichtbare Kontextzeilen im Admin-/Debug-Kontext. |
+| `retriex.context.config.max_full_lines` | Maximale vollständige Kontextzeilen. |
+
+## **12\. `config/retriex/search_repair.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `strict_requested_accessory_code_repair` | Erzwingt präzisere Repair-Logik bei angefragtem Zubehörcode. |
+| `prefer_prompt_anchored_model_for_requested_accessory_code` | Bevorzugt Modellanker aus Prompt/Verlauf bei Zubehörcode-Repair. |
+| `requested_accessory_code_fallback_query_templates` | Fallback-Query-Templates für Zubehörcodes. |
+| `requested_accessory_code_fallback_terms` | Begriffe für Zubehörcode-Erkennung. |
+| `requested_accessory_code_context_prefix_terms` | Kontextpräfixe für Zubehörcode-Suche. |
+| `requested_accessory_code_proximity_window` | Zeichenfenster für Nähe zwischen Modell und Zubehörcode. |
+| `specific_model_candidate_patterns` | Regex für Modellkandidaten. |
+| `model_candidate_exclude_terms` | Ausschlussbegriffe für falsche Modellkandidaten. |
+| `limits.top_product_log_limit` | Debug-/Loglimit für Topprodukte. |
+| `sanitize_trim_character_codes` | Zeichen-Codes für Query-Sanitizing. |
+| `product_key_separator` | Separator für Produkt-Dedupe-/Keybildung. |
+| `scores.*` | Scoring für Repair-Kandidaten, Prompt-Match, Query-Overlap, Spezifität. |
+| `patterns.*` | Regex-Templates für Modell-, Zubehör-, Bundle- und Token-Erkennung. |
+
+## **13\. `config/retriex/vector.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `vector.script_dir` | Verzeichnis der Python-Vector-Skripte. |
+| `python_bin` | Python-Binary für Vector-Tools. |
+| `control_script` | Vector-Service-Control-Skript. |
+| `ingest_script` | Chunk-Vector-Ingest-Skript. |
+| `search_script` | Chunk-Vector-Search-Skript. |
+| `ingest_tags_script` | Tag-Vector-Ingest-Skript. |
+| `search_tags_script` | Tag-Vector-Search-Skript. |
+| `host`, `port`, `service_url` | Vector-Service-Erreichbarkeit. |
+| `timeout` | Timeout für Vector-Prozesse. |
+| `vector.search.*` | Score-/Limit-/HTTP-Timeout für Chunk-Vector-Suche. |
+| `vector.tags.*` | Score-/Limit-/HTTP-Timeout für Tag-Vector-Suche. |
+| `vector.tag_routing.*` | Tag-basierte Dokumentvorauswahl: TopK, Mindestscore, Score-Drop, Kandidatenlimit, Multi-Tag-Bonus. |
+
+## **14\. `config/retriex/vocabulary.yaml`**
+
+| YAML-Parameter | Bewirkt |
+| ----- | ----- |
+| `classes.device` | Zentrale Gerätebegriffe. |
+| `classes.accessory` | Zentrale Zubehör-/Verbrauchsmaterialbegriffe. |
+| `views.shop.device_query` | Gerätebegriffe für Shop-Queries. |
+| `views.shop.accessory_query` | Zubehörbegriffe für Shop-Queries. |
+| `views.shop.accessory_product` | Zubehörerkennung in Shop-Produkten. |
+| `views.shop.device_product` | Geräteerkennung in Shop-Produkten. |
+| `views.shop.device_focus` | Fokusbegriffe für Geräteanfragen. |
+| `views.shop.accessory_focus` | Fokusbegriffe für Zubehöranfragen. |
+| `views.retrieval.*` | Vocabulary-Projektionen für Retrieval-Listen wie Reagenz, Safety, Device, Dokument. |
+| `views.search_repair.*` | Vocabulary-Projektionen für Repair-Kandidaten und Spezifitätsboost. |
+| `views.prompt.*` | Vocabulary-Projektionen für PromptBuilder-Keywords. |
+| `maps.shop.accessory_focus_variants` | Variantenmapping für Zubehörfokus, z. B. unterschiedliche Schreibweisen. |
+
+## **Pflegekompass**
+
+Wenn du künftig etwas ändern willst, wäre die Zuordnung so:
+
+| Ziel | Zuständige YAML |
+| ----- | ----- |
+| Shop-Suche erkennt Frage nicht | `agent.yaml`, `commerce.yaml`, ggf. `intent.yaml` |
+| „suche im Shop“ verliert Kontext | `agent.yaml → shop_prompt.*`, `follow_up_context.*`, `language.yaml → shop_context_fallback` |
+| pH/Redox/ORP wird entfernt | `language.yaml → protected_terms`, `agent.yaml → current_input_preservation`, `governance.yaml` |
+| Gerät wird als Zubehör gewertet | `vocabulary.yaml`, `commerce.yaml → shop_matching.role_guard/scores`, `prompt.yaml → role_guard` |
+| Zubehör wird falsch priorisiert | `vocabulary.yaml`, `commerce.yaml → shop_matching`, `search_repair.yaml` |
+| RAG findet falsches Dokument | `retrieval.yaml`, `query_enrichment.yaml`, `vocabulary.yaml` |
+| Folgefrage wird nicht verstanden | `agent.yaml → follow_up_context.*` |
+| Antwort halluziniert technische Eignung | `prompt.yaml → measurement_evidence_guard`, `fact_grounding`, `fallback_escalation` |
+| UI-/Status-/Buttontexte ändern | `agent.yaml → production_ui`, `messages`, `source_labels`, `html` |
+| Prompt-Regeln ändern | `prompt.yaml` |
+| Stopwords/Cleanup ändern | `language.yaml` |
+| Regression-/Audit-Schutz ändern | `governance.yaml` |
+| Vector-Service / FAISS / Tagrouting ändern | `vector.yaml` |
+| Modellparameter ändern | `model.yaml` |
+| Indexing-Größe ändern | `index.yaml` |
+| Pfade/Runtime-Dateien ändern | `runtime.yaml` |
+
diff --git a/src/Config/LanguageCleanupConfig.php b/src/Config/LanguageCleanupConfig.php
index 0699b16..820f442 100644
--- a/src/Config/LanguageCleanupConfig.php
+++ b/src/Config/LanguageCleanupConfig.php
@@ -237,6 +237,17 @@ final class LanguageCleanupConfig
         return $map;
     }
 
+    /** @return string[] */
+    private function getNormalizationStringList(string $key): array
+    {
+        $normalization = $this->requiredMap('normalization');
+        if (!array_key_exists($key, $normalization)) {
+            throw $this->invalid(sprintf('normalization.%s', $key), 'is missing');
+        }
+
+        return $this->stringListFromValue($normalization[$key], sprintf('normalization.%s', $key), true);
+    }
+
     /** @return string[] */
     private function requiredTopLevelStringList(string $key): array
     {