From 9731880cd36ee0b3742fba5a8c5a7d219fb26af3 Mon Sep 17 00:00:00 2001 From: team 1 Date: Wed, 6 May 2026 16:40:20 +0200 Subject: [PATCH] fix p54 --- ...H_54_SINGLE_GENRE_CONFIG_SURFACE_README.md | 140 +++++++++++++ config/retriex/genre.yaml | 188 ++++++++++++++++++ config/services.yaml | 5 + src/Config/ConfigSourceAuditProvider.php | 1 + src/Config/GenreConfig.php | 77 +++++++ src/Config/RetriexEffectiveConfigProvider.php | 147 ++++++++++++++ 6 files changed, 558 insertions(+) create mode 100644 RETRIEX_PATCH_54_SINGLE_GENRE_CONFIG_SURFACE_README.md create mode 100644 config/retriex/genre.yaml create mode 100644 src/Config/GenreConfig.php diff --git a/RETRIEX_PATCH_54_SINGLE_GENRE_CONFIG_SURFACE_README.md b/RETRIEX_PATCH_54_SINGLE_GENRE_CONFIG_SURFACE_README.md new file mode 100644 index 0000000..c1fad13 --- /dev/null +++ b/RETRIEX_PATCH_54_SINGLE_GENRE_CONFIG_SURFACE_README.md @@ -0,0 +1,140 @@ +# RetrieX Patch p54 - Single-Genre Configuration Surface + +## Ziel + +Dieser Patch macht die Konfiguration **genre-faehig fuer eine Installation mit genau einem Genre**. + +Er baut **keine** Multi-Genre-, Multi-Tenant- oder SaaS-Umschaltung. Es gibt keinen Request-/Host-/API-Key-Resolver und keine Laufzeit-Auswahl zwischen mehreren Genres. + +Stattdessen fuehrt der Patch eine zentrale Konfigurationsflaeche ein, die alle fuer eine spaetere Umwidmung relevanten Parametergruppen sichtbar macht: + +- Produktrollen +- Produktattribute und Constraints +- Marken und kanonische Begriffe +- Intent-/Routing-Signale +- Follow-up-/Kontextanker +- Shopquery-Runtime +- Ergebnisidentitaet und Antwortregeln +- Search Repair +- Retrieval-/Language-Begriffe +- Shopdaten-Mapping +- Governance-/Regression-Guardrails + +Damit ist klar dokumentiert, welche Parameter fuer ein anderes Genre wie Fashion, Moebel, Ersatzteile oder Elektronik geprueft und angepasst werden muessen. + +## Bewusste Nicht-Ziele + +- Keine Multi-Domain-/Multi-Genre-Architektur +- Keine Tenant-Logik +- Keine Runtime-Umschaltung pro Request +- Keine neuen fachlichen Defaults +- Keine neuen Tokenlisten im PHP-Core +- Keine Ranking-, Retrieval- oder Shopware-Kriterienaenderung +- Keine LLM-Verhaltensaenderung +- Keine Umbenennung bestehender Rollen wie `device`/`accessory` + +## Geaenderte Dateien + +```text +config/retriex/genre.yaml +config/services.yaml +src/Config/GenreConfig.php +src/Config/RetriexEffectiveConfigProvider.php +src/Config/ConfigSourceAuditProvider.php +``` + +## Neue Datei `config/retriex/genre.yaml` + +Die neue Datei enthaelt: + +```yaml +retriex.genre.config: + id: water_analysis + label: 'Water analysis / measurement devices' + mode: single_installation_single_genre + adaptation_surface: + ... +``` + +`adaptation_surface` ist ein nach Umwidmungsbereichen gruppiertes Inventar. Die dort genannten `paths` zeigen auf bestehende effektive oder rohe Config-Pfade. + +Wichtig: Die Eintraege verschieben noch keine fachlichen Werte physisch in `genre.yaml`. Der Patch ist absichtlich ein sicherer Oberflaechen-/Guardrail-Schritt. Bestehende Configs bleiben weiterhin die Quelle der Runtime-Werte. + +## Neue Klasse `GenreConfig` + +`src/Config/GenreConfig.php` kapselt die neue Genre-Konfiguration: + +- `getId()` +- `getLabel()` +- `getMode()` +- `getDescription()` +- `getAdaptationSurface()` +- `toArray()` + +Die Klasse enthaelt keine PHP-only Fachlisten. + +## Effective Config / Validation + +`RetriexEffectiveConfigProvider` gibt nun zusaetzlich den Bereich `genre` aus und validiert: + +- `genre.id` ist gesetzt +- `genre.mode` ist gesetzt +- `genre.adaptation_surface` ist eine nicht-leere Map +- jede Gruppe enthaelt eine nicht-leere `paths`-Liste +- referenzierte Pfade werden gegen effektive Config und rohe Symfony-Parameterpfade geprueft + +Unbekannte Pfade erzeugen Warnungen, keine Runtime-Fehler. So werden veraltete Surface-Eintraege sichtbar, ohne die Installation hart zu blockieren. + +## Config Source Audit + +`ConfigSourceAuditProvider` kennt nun auch: + +```text +GenreConfig => retriex.genre.config +``` + +Damit wird die neue Config-Klasse im Audit sauber einem YAML-Parameter zugeordnet. + +## Lokale Pruefungen + +Ausgefuehrt im entpackten Patch-Arbeitsstand: + +```bash +php -l src/Config/GenreConfig.php +php -l src/Config/RetriexEffectiveConfigProvider.php +php -l src/Config/ConfigSourceAuditProvider.php +``` + +Ergebnis: alle PHP-Lints gruen. + +Zusaetzlich wurden alle YAML-Dateien unter `config/retriex/*.yaml` per Python/YAML-Parser geprueft. + +Ergebnis: YAML-Parsing gruen. + +Die `genre.adaptation_surface.paths` wurden statisch gegen die vorhandenen Config-Pfade geprueft. + +Ergebnis: keine fehlenden Pfadreferenzen. + +## Nicht lokal ausfuehrbar + +`bin/console` konnte lokal nicht ausgefuehrt werden, weil im ZIP kein `vendor/` enthalten ist: + +```text +Dependencies are missing. Try running "composer install". +``` + +## Empfohlene Projektchecks nach dem Einspielen + +```bash +bin/console cache:clear +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` + +## Naechster moeglicher Schritt + +Wenn p54 gruen ist, koennte ein spaeterer Patch einzelne besonders genreabhaengige Werte schrittweise physisch in diese Surface ueberfuehren oder von dort referenzieren. + +Das sollte separat und klein passieren, z. B. zuerst nur Produktrollen/Vocabulary-Views, ohne Runtime-Logik zu aendern. diff --git a/config/retriex/genre.yaml b/config/retriex/genre.yaml new file mode 100644 index 0000000..994cddd --- /dev/null +++ b/config/retriex/genre.yaml @@ -0,0 +1,188 @@ +# Single-genre adaptation surface for this RetrieX installation. +# +# This file intentionally does not implement multi-tenant or per-request genre +# switching. One installation represents one genre. The entries below group the +# configuration areas that must be reviewed when the same software is repurposed +# for another genre such as fashion, furniture, spare parts or electronics. +# +# The `paths` values reference the effective config dump keys used by +# mto:agent:config:validate. They are guardrailed so stale or renamed paths are +# detected during config validation. +parameters: + retriex.genre.config: + id: water_analysis + label: 'Water analysis / measurement devices' + mode: single_installation_single_genre + description: 'Genre-specific configuration surface for one RetrieX installation.' + + adaptation_surface: + product_roles: + description: 'Main product, accessory and consumable role vocabulary used for routing, shop matching and answer guards.' + paths: + - vocabulary.classes.device + - vocabulary.classes.accessory + - vocabulary.classes.requested_accessory_code_terms + - vocabulary.views.shop.device_query.add + - vocabulary.views.shop.accessory_query.add + - vocabulary.views.shop.device_product.add + - vocabulary.views.shop.accessory_product.add + - vocabulary.views.prompt.main_device_request_keywords.add + - vocabulary.views.prompt.accessory_request_keywords.add + - vocabulary.views.prompt.main_device_product_keywords.add + - vocabulary.views.prompt.accessory_product_keywords.add + - agent.no_llm_fallback.product_roles.main_device_request_keywords + - agent.no_llm_fallback.product_roles.accessory_product_keywords + - prompt.detection.technical_product_keywords + - prompt.detection.accessory_request_keywords + + product_attributes: + description: 'Genre-specific attributes and constraints, for example measurement values now or size/color/material later.' + paths: + - vocabulary.classes.direct_product_attribute_stop_terms + - vocabulary.views.search_repair.direct_product_type_terms.add + - vocabulary.views.search_repair.direct_product_attribute_stop_terms.include + - agent.shop_runtime.attribute_cleanup.product_type_terms + - agent.shop_runtime.attribute_cleanup.stop_terms + - agent.shop_runtime.attribute_cleanup.comparative_constraint_patterns + - agent.shop_runtime.answer_constraints.length_sort.trigger_patterns + - agent.shop_runtime.answer_constraints.length_sort.value_patterns + - agent.shop_runtime.answer_constraints.length_filter.min_patterns + - agent.shop_runtime.answer_constraints.length_filter.max_patterns + - intent.commerce.size_token_terms + - intent.commerce.size_terms + - intent.commerce.color_terms + - intent.commerce.patterns.size_extraction_template + - intent.commerce.patterns.size_value_template + - intent.commerce.patterns.size_token_value_template + - intent.commerce.patterns.color_value_template + + brands_and_canonical_terms: + description: 'Known brands, canonical token mappings and query enrichment rules that change with the shop genre.' + paths: + - commerce_query.known_brands + - commerce_query.search_token_canonical_map + - query_enrichment.rules + - vocabulary.maps.shop.accessory_focus_variants + - vocabulary.maps.agent.rag_evidence_guard.synonyms + + intent_and_routing: + description: 'Genre-specific commerce/advisory signals and fuzzy routing terms.' + paths: + - vocabulary.classes.input_normalization_fuzzy_routing_terms + - agent.input_normalization.fuzzy_routing.terms + - intent.commerce.strong_signals + - intent.commerce.advisory_signals + - intent.commerce.advisory_product_selection_patterns + - intent.commerce.explicit_commerce_intent_patterns + - intent.commerce.patterns.model_like_product + - intent.sales.sales_signals + - intent.sales.comparison_signals + - intent.sales.objection_signals + - intent.sales.implementation_signals + - intent.sales.roi_signals + + context_resolution: + description: 'Follow-up anchors and meta-query handling for referential shop questions in the current genre.' + paths: + - agent.follow_up_context.commercial_table_follow_up.history_anchor_patterns + - agent.follow_up_context.commercial_table_follow_up.indicator_marker_patterns + - agent.follow_up_context.commercial_table_follow_up.query_template_with_model + - agent.follow_up_context.commercial_table_follow_up.query_template_without_model + - agent.shop_runtime.context_resolution.context_usage.referential_terms + - agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms + - agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns + - agent.shop_runtime.context_resolution.history_anchor_enrichment.template + - agent.shop_runtime.context_resolution.meta_query_guard.meta_only_terms + - agent.shop_runtime.context_resolution.meta_query_guard.context_fallback_filter_terms + - agent.shop_runtime.context_resolution.rag_anchor_enrichment.numeric_focus_patterns + - agent.shop_runtime.context_resolution.rag_anchor_enrichment.product_title_patterns + - agent.shop_runtime.context_resolution.rag_anchor_enrichment.anchor_bonus_patterns + - agent.shop_runtime.context_resolution.rag_anchor_enrichment.subject_terms + + shop_query_runtime: + description: 'Shop query cleanup and direct Shopware search behavior that needs genre-specific terms but no PHP branching.' + paths: + - vocabulary.classes.agent_shop_current_input_preservation_terms + - vocabulary.classes.agent_shop_context_anchor_trigger_terms + - agent.shop_runtime.query_cleanup.current_input_preservation.terms + - agent.shop_runtime.query_cleanup.stopword_cleanup.terms + - agent.shop_runtime.result_identity.compound_prefix_match.terms + - agent.shop_runtime.result_identity.primary_identity_repair.stop_terms + - agent.shop_runtime.direct_answer.intro + - agent.shop_runtime.direct_answer.no_results + - agent.shop_runtime.direct_answer.sorted_by_length_note + - agent.shop_runtime.direct_answer.min_length_filter_note + - agent.shop_runtime.direct_answer.max_length_filter_note + + result_identity_and_answer_policy: + description: 'Grounding, role separation and atomicity rules that must match the active product genre.' + paths: + - prompt.rules.output_priority_technical + - prompt.rules.response_format_technical + - prompt.rules.response_format_accessory + - prompt.rules.fact_grounding_technical + - prompt.rules.fact_grounding_with_shop + - vocabulary.views.prompt.measurement_evidence_guard.accessory_lookup_guard_terms.add + - vocabulary.views.prompt.measurement_evidence_guard.accessory_lookup_passthrough_terms.add + - vocabulary.views.prompt.measurement_evidence_guard.generic_positive_context_terms.add + - vocabulary.views.prompt.measurement_evidence_guard.generic_negative_context_terms.add + + search_repair: + description: 'Genre-specific repair tokens, candidate patterns and exact identifier behavior.' + paths: + - search_repair.direct_product_attribute_lookup + - vocabulary.views.search_repair.requested_accessory_code_terms.include + - search_repair.specific_model_candidate_patterns + - vocabulary.views.search_repair.model_candidate_exclude_terms.include + - vocabulary.views.search_repair.generic_candidate_tokens.add + - vocabulary.views.search_repair.accessory_candidate_terms.add + - vocabulary.views.search_repair.accessory_or_bundle_terms.add + - vocabulary.views.search_repair.specificity_boost_terms.add + - search_repair.patterns.model_candidate + - search_repair.patterns.accessory_candidate_template + - search_repair.patterns.requested_accessory_code + - search_repair.patterns.accessory_or_bundle_template + - search_repair.patterns.model_like + - search_repair.patterns.specificity_boost_template + + retrieval_and_language: + description: 'Genre-specific protected terms, exact selection helpers and retrieval vocabulary. Engine parameters stay outside this surface.' + paths: + - language.protected_terms + - language.cleanup_profiles.commerce_query + - language.cleanup_profiles.rag_evidence + - language.cleanup_profiles.shop_context_fallback + - retrieval.vocabulary.generic_product_tokens + - retrieval.vocabulary.important_short_model_tokens + - retrieval.vocabulary.family_descriptor_tokens + - retrieval.vocabulary.looks_like_reagent_tokens + - retrieval.vocabulary.looks_like_device_words + - retrieval.vocabulary.looks_like_document_words + - retrieval.exact_selection_token_variant_prefixes + - retrieval.exact_selection_indicator_question_tokens + - retrieval.exact_selection_indicator_question_phrases + - retrieval.exact_selection_indicator_table_heading_patterns + - retrieval.exact_selection_indicator_table_header_patterns + - retrieval.exact_selection_indicator_table_row_patterns + - retrieval.exact_selection_indicator_table_required_primary_terms + - retrieval.exact_selection_indicator_table_required_context_terms + + shop_data_mapping: + description: 'Shop record fields and matching text fields that are installation-/genre-specific.' + paths: + - shop_matching.custom_fields + - shop_matching.text.custom_field_join_separator + - shop_matching.text.primary_secondary_separator + - shop_matching.role_guard + - commerce.store_api_base_url + - commerce.max_shop_results + + governance_and_regression: + description: 'Guardrails and regression expectations that intentionally protect the active genre.' + paths: + - governance.regression_baseline.protected_short_model_tokens + - governance.regression_baseline.protected_measurement_values + - governance.regression_baseline.protected_technical_prompt_keywords + - governance.regression_baseline.protected_accessory_prompt_keywords + - governance.vocabulary + - governance.core_pattern_audit diff --git a/config/services.yaml b/config/services.yaml index 9861948..0649fea 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -1,5 +1,6 @@ imports: - { resource: 'retriex/runtime.yaml' } + - { resource: 'retriex/genre.yaml' } - { resource: 'retriex/index.yaml' } - { resource: 'retriex/vector.yaml' } - { resource: 'retriex/commerce.yaml' } @@ -116,6 +117,10 @@ services: $retrievalMaxChunks: '%retriex.model.default_retrieval_max_chunks%' $retrievalVectorTopK: '%retriex.model.default_retrieval_vector_top_k%' + App\Config\GenreConfig: + arguments: + $config: '%retriex.genre.config%' + App\Config\DomainVocabularyConfig: arguments: $config: '%retriex.vocabulary.config%' diff --git a/src/Config/ConfigSourceAuditProvider.php b/src/Config/ConfigSourceAuditProvider.php index 00f5da7..80a9773 100644 --- a/src/Config/ConfigSourceAuditProvider.php +++ b/src/Config/ConfigSourceAuditProvider.php @@ -18,6 +18,7 @@ final readonly class ConfigSourceAuditProvider 'IntentLightConfig' => 'retriex.intent.light.config', 'LanguageCleanupConfig' => 'retriex.stopwords.config', 'GovernanceConfig' => 'retriex.governance.config', + 'GenreConfig' => 'retriex.genre.config', 'NdjsonHybridRetrieverConfig' => 'retriex.retrieval.config', 'PromptBuilderConfig' => 'retriex.prompt.config', 'QueryEnricherConfig' => 'retriex.query_enrichment.config', diff --git a/src/Config/GenreConfig.php b/src/Config/GenreConfig.php new file mode 100644 index 0000000..560dc75 --- /dev/null +++ b/src/Config/GenreConfig.php @@ -0,0 +1,77 @@ + $config + */ + public function __construct(private readonly array $config = []) + { + } + + public function getId(): string + { + return $this->string('id', ''); + } + + public function getLabel(): string + { + return $this->string('label', ''); + } + + public function getMode(): string + { + return $this->string('mode', ''); + } + + public function getDescription(): string + { + return $this->string('description', ''); + } + + /** + * @return array + */ + public function getAdaptationSurface(): array + { + $surface = $this->value('adaptation_surface', []); + + return is_array($surface) ? $surface : []; + } + + /** + * @return array + */ + public function toArray(): array + { + return $this->config; + } + + private function string(string $path, string $fallback): string + { + $value = $this->value($path, $fallback); + if (!is_scalar($value)) { + return $fallback; + } + + return trim((string) $value); + } + + private function value(string $path, mixed $fallback): mixed + { + $current = $this->config; + foreach (explode('.', $path) as $segment) { + if (!is_array($current) || !array_key_exists($segment, $current)) { + return $fallback; + } + + $current = $current[$segment]; + } + + return $current; + } +} diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 3a53cb9..6ec636b 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -28,6 +28,7 @@ final readonly class RetriexEffectiveConfigProvider private LanguageCleanupConfig $languageCleanupConfig, private QueryEnricherConfig $queryEnricherConfig, private GovernanceConfig $governanceConfig, + private GenreConfig $genreConfig, private CatalogIntentConfig $catalogIntentConfig, private ContextServiceConfig $contextServiceConfig, ) { @@ -39,6 +40,7 @@ final readonly class RetriexEffectiveConfigProvider public function dump(): array { return [ + 'genre' => $this->genreConfig(), 'runtime' => $this->runtimeConfig(), 'index' => $this->indexConfig(), 'model_generation' => $this->modelConfig(), @@ -73,6 +75,7 @@ final readonly class RetriexEffectiveConfigProvider $warnings = []; $config = $this->dump(); + $this->validateGenre($config['genre'], $config, $errors, $warnings); $this->validateRuntime($config['runtime'], $errors, $warnings); $this->validateIndex($config['index'], $errors, $warnings); $this->validateModel($config['model_generation'], $errors, $warnings); @@ -435,6 +438,19 @@ final readonly class RetriexEffectiveConfigProvider return $key !== '' ? $key : 'value'; } + + /** @return array */ + private function genreConfig(): array + { + return [ + 'id' => $this->genreConfig->getId(), + 'label' => $this->genreConfig->getLabel(), + 'mode' => $this->genreConfig->getMode(), + 'description' => $this->genreConfig->getDescription(), + 'adaptation_surface' => $this->genreConfig->getAdaptationSurface(), + ]; + } + /** @return array */ private function runtimeConfig(): array { @@ -1084,6 +1100,137 @@ final readonly class RetriexEffectiveConfigProvider ]; } + /** + * @param array $genre + * @param array $effectiveConfig + * @param list $errors + * @param list $warnings + */ + private function validateGenre(array $genre, array $effectiveConfig, array &$errors, array &$warnings): void + { + if (trim((string) ($genre['id'] ?? '')) === '') { + $errors[] = 'genre.id must not be empty.'; + } + + if (trim((string) ($genre['mode'] ?? '')) === '') { + $errors[] = 'genre.mode must not be empty.'; + } + + $surface = $genre['adaptation_surface'] ?? null; + if (!is_array($surface) || $surface === []) { + $errors[] = 'genre.adaptation_surface must be a non-empty map.'; + return; + } + + $flattened = []; + $this->flattenGenreParameterPaths($flattened); + foreach ($effectiveConfig as $root => $value) { + if ($root === 'genre') { + continue; + } + + $this->flattenEffectiveConfigPath((string) $root, $value, $flattened); + } + + foreach ($surface as $group => $definition) { + if (!is_string($group) || trim($group) === '') { + $errors[] = 'genre.adaptation_surface keys must be non-empty strings.'; + continue; + } + + if (!is_array($definition)) { + $errors[] = sprintf('genre.adaptation_surface.%s must be a map.', $group); + continue; + } + + $paths = $definition['paths'] ?? null; + if (!is_array($paths) || $paths === []) { + $errors[] = sprintf('genre.adaptation_surface.%s.paths must be a non-empty list.', $group); + continue; + } + + foreach ($paths as $path) { + if (!is_string($path) || trim($path) === '') { + $errors[] = sprintf('genre.adaptation_surface.%s.paths must contain non-empty strings.', $group); + continue; + } + + if (!isset($flattened[$path])) { + $warnings[] = sprintf('genre.adaptation_surface.%s references unknown config path: %s.', $group, $path); + } + } + } + } + + /** + * @param array $paths + */ + private function flattenGenreParameterPaths(array &$paths): void + { + $configRoots = [ + 'retriex.agent.config' => 'agent', + 'retriex.commerce_query.config' => 'commerce_query', + 'retriex.governance.config' => 'governance', + 'retriex.intent.commerce.config' => 'intent.commerce', + 'retriex.intent.light.config' => 'intent.light', + 'retriex.intent.sales.config' => 'intent.sales', + 'retriex.prompt.config' => 'prompt', + 'retriex.query_enrichment.config' => 'query_enrichment', + 'retriex.retrieval.config' => 'retrieval', + 'retriex.search_repair.config' => 'search_repair', + 'retriex.shop_matching.config' => 'shop_matching', + 'retriex.stopwords.config' => 'stopwords', + 'retriex.vocabulary.config' => 'vocabulary', + ]; + + $allParameters = $this->parameters->all(); + foreach ($configRoots as $parameterName => $rootPath) { + if (!array_key_exists($parameterName, $allParameters)) { + continue; + } + + $this->flattenEffectiveConfigPath($rootPath, $allParameters[$parameterName], $paths); + } + + foreach ($allParameters as $parameterName => $value) { + if (!is_string($parameterName) || !str_starts_with($parameterName, 'retriex.')) { + continue; + } + + foreach (array_keys($configRoots) as $configRoot) { + if ($parameterName === $configRoot || str_starts_with($parameterName, $configRoot . '.')) { + continue 2; + } + } + + $this->flattenEffectiveConfigPath(substr($parameterName, strlen('retriex.')), $value, $paths); + } + } + + /** + * @param array $paths + */ + private function flattenEffectiveConfigPath(string $path, mixed $value, array &$paths): void + { + $paths[$path] = true; + + if (!is_array($value)) { + return; + } + + foreach ($value as $key => $child) { + if (!is_string($key) && !is_int($key)) { + continue; + } + + if (is_int($key)) { + continue; + } + + $this->flattenEffectiveConfigPath($path . '.' . $key, $child, $paths); + } + } + /** * @param array $governance * @param list $errors