From e02f3d7143205786bef601b91ef2aecfa1c8d930 Mon Sep 17 00:00:00 2001 From: team 1 Date: Thu, 7 May 2026 15:42:35 +0200 Subject: [PATCH] p59d --- config/retriex/genre.yaml | 77 +++------- ..._GENRE_SOURCEPATH_METADATA_SPLIT_README.md | 71 +++++++++ src/Command/ConfigSourceAuditCommand.php | 7 +- src/Config/ConfigSourceAuditProvider.php | 144 +++++++++++++----- src/Config/GenreSourceOfTruthGuard.php | 97 ++++++++++-- 5 files changed, 282 insertions(+), 114 deletions(-) create mode 100644 patch_history/RETRIEX_PATCH_59D_GENRE_SOURCEPATH_METADATA_SPLIT_README.md diff --git a/config/retriex/genre.yaml b/config/retriex/genre.yaml index ca41017..84360c0 100644 --- a/config/retriex/genre.yaml +++ b/config/retriex/genre.yaml @@ -9,7 +9,9 @@ # `configuration_values` as the central value surface. p59B keeps the # adaptation surface focused on native genre value paths. p59C adds # review_path_groups so every remaining non-empty/runtime-resolved legacy -# source path is visible with its cleanup classification. +# source path is visible with its cleanup classification. p59D moves +# those legacy/runtime source declarations out of configuration_values so +# the value surface stays native and maintenance-focused. parameters: retriex.genre.config: id: water_analysis @@ -521,9 +523,7 @@ parameters: - ph-indikatoren - ph indikatoren no_llm_fallback_terms: - source_paths: - - agent.no_llm_fallback.product_roles.vocabulary_views.main_device_request_keywords - - agent.no_llm_fallback.product_roles.vocabulary_views.accessory_product_keywords + origin: genre_native main_device_request_keywords: - anlage - messanlage @@ -573,9 +573,7 @@ parameters: product_attributes: description: Current genre attributes and constraint terms. Fashion would typically replace these with size, color, material, fit and variant constraints. direct_attribute_cleanup: - source_paths: - - agent.shop_runtime.attribute_cleanup.vocabulary_views.product_type_terms - - agent.shop_runtime.attribute_cleanup.vocabulary_views.stop_terms + origin: genre_native product_type_terms: - anschlusskabel - kabel @@ -637,11 +635,7 @@ parameters: comparative_constraint_patterns: - /\b(?:länger|laenger|kürzer|kuerzer|größer|groesser|kleiner|über|ueber|unter|mindestens|maximal|maximum|minimum|ab|bis|mehr\s+als|weniger\s+als)\s+(?P\d+(?:[,.]\d+)?\s*[\p{L}µ°%]*)\b/iu size_and_color_terms: - source_paths: - - intent.commerce.patterns.size_extraction_template - - intent.commerce.patterns.size_value_template - - intent.commerce.patterns.size_token_value_template - - intent.commerce.patterns.color_value_template + origin: genre_native size_token_terms: - xs - s @@ -675,9 +669,7 @@ parameters: color_value_template: /\b(?:{color_pattern})\b/u model_like_product: /\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u numeric_length_constraints: - source_paths: - - agent.shop_runtime.answer_constraints.length_sort - - agent.shop_runtime.answer_constraints.length_filter + origin: genre_native length_sort: enabled: true trigger_patterns: @@ -791,8 +783,7 @@ parameters: intent_and_routing: description: Genre-specific words and patterns that steer commerce/advisory routing. fuzzy_routing_terms: - source_paths: - - agent.input_normalization.fuzzy_routing.vocabulary_views.terms + origin: genre_native terms: - shop - suche @@ -861,8 +852,7 @@ parameters: - empfehlen - empfiehl commerce_intent: - source_paths: - - intent.commerce.patterns.model_like_product + origin: genre_native strong_signals: - shop - alle @@ -951,12 +941,7 @@ parameters: - /\belektrodenkabel\b/u model_like_product_pattern: /\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u sales_intent: - source_paths: - - intent.sales.sales_signals - - intent.sales.comparison_signals - - intent.sales.objection_signals - - intent.sales.implementation_signals - - intent.sales.roi_signals + origin: genre_native sales_signals: - preis - preise @@ -1016,11 +1001,7 @@ parameters: context_resolution: description: Current follow-up anchors and shop meta-query handling for this genre. commercial_table_follow_up: - source_paths: - - agent.follow_up_context.commercial_table_follow_up.history_anchor_patterns - - agent.follow_up_context.commercial_table_follow_up.indicator_marker_patterns - - agent.follow_up_context.commercial_table_follow_up.query_template_with_model - - agent.follow_up_context.commercial_table_follow_up.query_template_without_model + origin: genre_native history_anchor_patterns: - /\bTestomat(?:®)?\s+\d{3,4}\b/iu - /\b(?:Indikatortyp|Indikator|Indikatoren|Reagenz|Reagenzien|Zubehör|Zubehoer)\b/iu @@ -1053,8 +1034,7 @@ parameters: - selbe - selben history_anchor_enrichment: - source_paths: - - agent.shop_runtime.context_resolution.history_anchor_enrichment.vocabulary_views.trigger_terms + origin: genre_native trigger_terms: - indikator - indikatortyp @@ -1117,8 +1097,7 @@ parameters: shop_query_runtime: description: Current direct Shopware query cleanup and deterministic answer wording for this genre. current_input_preservation_terms: - source_paths: - - agent.shop_runtime.query_cleanup.current_input_preservation.vocabulary_views.terms + origin: genre_native terms: - ph - rx @@ -1492,13 +1471,7 @@ parameters: terms: - requested_accessory_code_terms candidate_patterns: - source_paths: - - search_repair.patterns.model_candidate - - search_repair.patterns.accessory_candidate_template - - search_repair.patterns.requested_accessory_code - - search_repair.patterns.accessory_or_bundle_template - - search_repair.patterns.model_like - - search_repair.patterns.specificity_boost_template + origin: genre_native specific_model_candidate_patterns: - /\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß0-9][A-Za-zÄÖÜäöüß0-9®\-]*){0,3}\s+\d{2,5}(?:\s+[A-ZÄÖÜ]{1,8})?)\b/u patterns: @@ -1580,10 +1553,7 @@ parameters: - tc - 0,02 cleanup_profiles: - source_paths: - - language.cleanup_profiles.commerce_query - - language.cleanup_profiles.rag_evidence - - language.cleanup_profiles.shop_context_fallback + origin: genre_native commerce_query: stopword_group_sets: - de_conversation @@ -1775,29 +1745,23 @@ parameters: shop_data_mapping: description: Current Shopware field mapping and matching text behavior that changes per installation/genre. custom_fields: - source_paths: - - shop_matching.custom_fields + origin: genre_native primary: migration_Backup_product_attr1 secondary: migration_Backup_product_attr2 use_cases: migration_Backup_product_attr4 languages: migration_Backup_product_attr5 text: - source_paths: - - shop_matching.text.custom_field_join_separator - - shop_matching.text.primary_secondary_separator + origin: genre_native primary_secondary_separator: ': ' use_cases_label: 'Einsatzgebiete: ' languages_label: 'Sprachen: ' custom_field_join_separator: ' | ' role_guard: - source_paths: - - shop_matching.role_guard + origin: genre_native filter_accessory_products_for_device_queries: true keep_ambiguous_products_for_device_queries: true commerce_connection: - source_paths: - - commerce.store_api_base_url - - commerce.max_shop_results + origin: genre_native store_api_base_url: '%env(SHOPWARE_STORE_API_BASE_URL)%' max_shop_results: '%env(SHOPWARE_STORE_API_MAX_RESULT)%' governance_and_regression: @@ -1862,8 +1826,7 @@ parameters: vocabulary_guardrails: origin: genre_native core_pattern_audit: - source_paths: - - governance.core_pattern_audit + origin: genre_native source_roots: - src excluded_path_prefixes: diff --git a/patch_history/RETRIEX_PATCH_59D_GENRE_SOURCEPATH_METADATA_SPLIT_README.md b/patch_history/RETRIEX_PATCH_59D_GENRE_SOURCEPATH_METADATA_SPLIT_README.md new file mode 100644 index 0000000..072ff0b --- /dev/null +++ b/patch_history/RETRIEX_PATCH_59D_GENRE_SOURCEPATH_METADATA_SPLIT_README.md @@ -0,0 +1,71 @@ +# RetrieX Patch p59D - Genre Source Path Metadata Split + +## Goal + +Continue the p59 cleanup after p59A/p59B/p59C by separating native genre values from legacy/runtime source-path metadata. + +`genre.yaml.configuration_values` should be the maintenance surface for actual genre values. Legacy/runtime source paths are still useful for audit and migration visibility, but they no longer need to live inside individual value nodes once p59C has classified them in `adaptation_surface.*.review_path_groups`. + +## Changes + +- Removed all remaining `source_paths` declarations from `genre.yaml.configuration_values`. +- Marked the affected value nodes with `origin: genre_native`. +- Kept all 39 legacy/runtime source paths in `adaptation_surface.*.review_path_groups`. +- Updated `GenreSourceOfTruthGuard` so frozen/runtime source-path checks read declared paths from review path groups as well as the legacy value-node format. +- Updated `ConfigSourceAuditProvider` so the source audit distinguishes: + - configuration value nodes with `source_paths` + - review path groups with declared source paths + - total declared source paths +- Updated the audit command labels from “Genre value path” to “Genre metadata path” for source-path audit rows. + +## Runtime impact + +No runtime, retrieval, prompt, shop, routing, query, scoring or answer logic was changed. + +The patch only changes configuration metadata placement and audit/guard handling. + +## Expected audit shape after the patch + +- `genre.configuration_values.*.source_paths`: 0 +- `genre.adaptation_surface.*.review_path_groups.*.paths`: 39 declared paths +- frozen non-empty legacy paths: 37 +- runtime-resolved allowed paths: 2 +- source-of-truth violations: 0 + +## Local checks performed + +Because the ZIP does not include `vendor/`, Symfony console commands could not be executed locally. + +Performed locally: + +- `config/retriex/genre.yaml` parses as YAML. +- `config/retriex/governance.yaml` parses as YAML. +- PHP lint passed for changed files: + - `src/Config/GenreSourceOfTruthGuard.php` + - `src/Config/ConfigSourceAuditProvider.php` + - `src/Command/ConfigSourceAuditCommand.php` +- Local guard simulation: + - 0 `configuration_values` source-path nodes + - 39 declared review source paths + - 39 unique declared source paths + - 0 missing source paths + - 0 unregistered non-empty source paths + - 0 frozen hash mismatches + - 0 undeclared frozen hashes + +## Required project checks after applying + +Run in the real project with dependencies installed: + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` + +## Notes + +This patch intentionally does not remove the legacy/runtime YAML values themselves. It only removes their duplicate declaration from `configuration_values` and keeps them visible in the adaptation surface review inventory. + +A later patch can decide, group by group, whether a legacy value is still a necessary runtime fallback, a technical config that should remain outside genre values, or removable after a dedicated runtime rewire. diff --git a/src/Command/ConfigSourceAuditCommand.php b/src/Command/ConfigSourceAuditCommand.php index 8bdac3f..58ba47a 100644 --- a/src/Command/ConfigSourceAuditCommand.php +++ b/src/Command/ConfigSourceAuditCommand.php @@ -65,6 +65,7 @@ final class ConfigSourceAuditCommand extends Command ['constructor_defaults' => (string) ($summary['constructor_defaults'] ?? 0)], ['constructor_defaults_without_yaml_mapping' => (string) ($summary['constructor_defaults_without_yaml_mapping'] ?? 0)], ['genre_value_paths_with_source_paths' => (string) ($summary['genre_value_paths_with_source_paths'] ?? 0)], + ['genre_review_path_groups_with_source_paths' => (string) ($summary['genre_review_path_groups_with_source_paths'] ?? 0)], ['genre_declared_source_paths' => (string) ($summary['genre_declared_source_paths'] ?? 0)], ['genre_source_of_truth_violations' => (string) ($summary['genre_source_of_truth_violations'] ?? 0)], ['genre_source_of_truth_fallback_empty' => (string) ($summary['genre_source_of_truth_fallback_empty'] ?? 0)], @@ -128,8 +129,8 @@ final class ConfigSourceAuditCommand extends Command } if ($genreSourceRows !== []) { - $io->section('Single-genre configuration source paths'); - $io->table(['Genre value path', 'Legacy/effective source path'], $genreSourceRows); + $io->section('Single-genre declared source paths'); + $io->table(['Genre metadata path', 'Legacy/effective source path'], $genreSourceRows); } @@ -150,7 +151,7 @@ final class ConfigSourceAuditCommand extends Command if ($sourceOfTruthRows !== []) { $io->section('Genre source-of-truth guard'); - $io->table(['Genre value path', 'Legacy/effective source path', 'State', 'Hash'], $sourceOfTruthRows); + $io->table(['Genre metadata path', 'Legacy/effective source path', 'State', 'Hash'], $sourceOfTruthRows); } } } diff --git a/src/Config/ConfigSourceAuditProvider.php b/src/Config/ConfigSourceAuditProvider.php index a22d137..0d1765d 100644 --- a/src/Config/ConfigSourceAuditProvider.php +++ b/src/Config/ConfigSourceAuditProvider.php @@ -59,7 +59,9 @@ final readonly class ConfigSourceAuditProvider public function audit(): array { $yamlPaths = $this->collectYamlParameterPaths(); - $genreSourcePaths = $this->collectGenreConfigurationSourcePaths(); + $genreConfigurationValueSourcePaths = $this->collectGenreConfigurationValueSourcePaths(); + $genreReviewSourcePaths = $this->collectGenreReviewSourcePaths(); + $genreSourcePaths = array_replace($genreConfigurationValueSourcePaths, $genreReviewSourcePaths); $genreSourceOfTruth = $this->genreSourceOfTruthGuard->auditFromFiles(); $fallbackAccessors = []; $constructorDefaults = []; @@ -142,7 +144,8 @@ final readonly class ConfigSourceAuditProvider 'fallback_accessors_missing_yaml' => count($missingYamlFallbacks), 'constructor_defaults' => count($constructorDefaults), 'constructor_defaults_without_yaml_mapping' => count($constructorPhpDefaults), - 'genre_value_paths_with_source_paths' => count($genreSourcePaths), + 'genre_value_paths_with_source_paths' => count($genreConfigurationValueSourcePaths), + 'genre_review_path_groups_with_source_paths' => count($genreReviewSourcePaths), 'genre_declared_source_paths' => $this->countGenreDeclaredSourcePaths($genreSourcePaths), 'genre_source_of_truth_violations' => (int) (($genreSourceOfTruth['summary']['violations'] ?? 0)), 'genre_source_of_truth_fallback_empty' => (int) (($genreSourceOfTruth['summary']['legacy_fallback_empty'] ?? 0)), @@ -157,6 +160,8 @@ final readonly class ConfigSourceAuditProvider 'constructor_defaults' => $constructorDefaults, 'php_constants' => $phpConstants, 'genre_configuration_source_paths' => $genreSourcePaths, + 'genre_configuration_value_source_paths' => $genreConfigurationValueSourcePaths, + 'genre_review_source_paths' => $genreReviewSourcePaths, 'genre_source_of_truth' => $genreSourceOfTruth, ]; } @@ -193,7 +198,90 @@ final readonly class ConfigSourceAuditProvider /** * @return array */ - private function collectGenreConfigurationSourcePaths(): array + private function collectGenreConfigurationValueSourcePaths(): array + { + $genreConfig = $this->loadGenreConfig(); + $configurationValues = $genreConfig['configuration_values'] ?? []; + if (!is_array($configurationValues)) { + return []; + } + + $out = []; + $this->collectGenreSourcePathsRecursive($configurationValues, '', $out); + + return $out; + } + + /** + * @return array + */ + private function collectGenreReviewSourcePaths(): array + { + $genreConfig = $this->loadGenreConfig(); + $adaptationSurface = $genreConfig['adaptation_surface'] ?? []; + if (!is_array($adaptationSurface)) { + return []; + } + + $out = []; + foreach ($adaptationSurface as $group => $definition) { + if (!is_string($group) || trim($group) === '' || !is_array($definition)) { + continue; + } + + $reviewPathGroups = $definition['review_path_groups'] ?? null; + if (!is_array($reviewPathGroups)) { + continue; + } + + foreach ($reviewPathGroups as $reviewGroup => $reviewDefinition) { + if (!is_string($reviewGroup) || trim($reviewGroup) === '' || !is_array($reviewDefinition)) { + continue; + } + + $paths = $reviewDefinition['paths'] ?? null; + if (!is_array($paths)) { + continue; + } + + $clean = $this->cleanStringList($paths); + if ($clean !== []) { + $out[sprintf('adaptation_surface.%s.review_path_groups.%s', $group, $reviewGroup)] = $clean; + } + } + } + + return $out; + } + + /** + * @param array $value + * @param array $out + */ + private function collectGenreSourcePathsRecursive(array $value, string $path, array &$out): void + { + $sourcePaths = $value['source_paths'] ?? null; + if (is_array($sourcePaths)) { + $clean = $this->cleanStringList($sourcePaths); + if ($clean !== [] && $path !== '') { + $out[$path] = $clean; + } + } + + foreach ($value as $key => $child) { + if ($key === 'source_paths' || !is_string($key) || !is_array($child)) { + continue; + } + + $childPath = $path === '' ? $key : $path . '.' . $key; + $this->collectGenreSourcePathsRecursive($child, $childPath, $out); + } + } + + /** + * @return array + */ + private function loadGenreConfig(): array { $file = $this->projectDir . '/config/retriex/genre.yaml'; if (!is_file($file)) { @@ -211,54 +299,28 @@ final readonly class ConfigSourceAuditProvider } $genreConfig = $parameters['retriex.genre.config'] ?? []; - if (!is_array($genreConfig)) { - return []; - } - $configurationValues = $genreConfig['configuration_values'] ?? []; - if (!is_array($configurationValues)) { - return []; - } - - $out = []; - $this->collectGenreSourcePathsRecursive($configurationValues, '', $out); - - return $out; + return is_array($genreConfig) ? $genreConfig : []; } /** - * @param array $value - * @param array $out + * @return string[] */ - private function collectGenreSourcePathsRecursive(array $value, string $path, array &$out): void + private function cleanStringList(array $values): array { - $sourcePaths = $value['source_paths'] ?? null; - if (is_array($sourcePaths)) { - $clean = []; - foreach ($sourcePaths as $sourcePath) { - if (!is_string($sourcePath) || trim($sourcePath) === '') { - continue; - } - - $sourcePath = trim($sourcePath); - if (!in_array($sourcePath, $clean, true)) { - $clean[] = $sourcePath; - } - } - - if ($clean !== [] && $path !== '') { - $out[$path] = $clean; - } - } - - foreach ($value as $key => $child) { - if ($key === 'source_paths' || !is_string($key) || !is_array($child)) { + $clean = []; + foreach ($values as $value) { + if (!is_string($value) || trim($value) === '') { continue; } - $childPath = $path === '' ? $key : $path . '.' . $key; - $this->collectGenreSourcePathsRecursive($child, $childPath, $out); + $value = trim($value); + if (!in_array($value, $clean, true)) { + $clean[] = $value; + } } + + return $clean; } /** diff --git a/src/Config/GenreSourceOfTruthGuard.php b/src/Config/GenreSourceOfTruthGuard.php index 0165ed4..aaa4829 100644 --- a/src/Config/GenreSourceOfTruthGuard.php +++ b/src/Config/GenreSourceOfTruthGuard.php @@ -97,7 +97,9 @@ final readonly class GenreSourceOfTruthGuard array_push($errors, ...$coverageErrors); $nativeValueNodes = $this->countGenreNativeValueNodes($configurationValues); - $declaredSourcePaths = $this->collectSourcePaths($configurationValues); + $configurationValueSourcePaths = $this->collectConfigurationValueSourcePaths($configurationValues); + $reviewSourcePaths = $this->collectReviewPathGroupSourcePaths(is_array($adaptationSurface) ? $adaptationSurface : []); + $declaredSourcePaths = array_replace($configurationValueSourcePaths, $reviewSourcePaths); $uniqueSourcePaths = []; foreach ($declaredSourcePaths as $valuePath => $sourcePaths) { foreach ($sourcePaths as $sourcePath) { @@ -159,7 +161,9 @@ final readonly class GenreSourceOfTruthGuard $summary = $this->summarizeRows($rows); $summary['configuration_value_groups'] = count($configurationValues); $summary['genre_native_value_nodes'] = $nativeValueNodes; - $summary['source_path_value_nodes'] = count($declaredSourcePaths); + $summary['source_path_value_nodes'] = count($configurationValueSourcePaths); + $summary['review_path_group_nodes'] = count($reviewSourcePaths); + $summary['declared_source_path_nodes'] = count($declaredSourcePaths); $summary['declared_source_paths'] = count($uniqueSourcePaths); $summary['violations'] = count($errors); @@ -289,9 +293,25 @@ final readonly class GenreSourceOfTruthGuard /** * @param array $configurationValues + * @param array $adaptationSurface * @return array */ - private function collectSourcePaths(array $configurationValues): array + private function collectDeclaredSourcePaths(array $configurationValues, array $adaptationSurface): array + { + $out = $this->collectConfigurationValueSourcePaths($configurationValues); + + foreach ($this->collectReviewPathGroupSourcePaths($adaptationSurface) as $path => $sourcePaths) { + $out[$path] = $sourcePaths; + } + + return $out; + } + + /** + * @param array $configurationValues + * @return array + */ + private function collectConfigurationValueSourcePaths(array $configurationValues): array { $out = []; $this->collectSourcePathsRecursive($configurationValues, '', $out); @@ -307,16 +327,7 @@ final readonly class GenreSourceOfTruthGuard { $sourcePaths = $value['source_paths'] ?? null; if (is_array($sourcePaths) && $path !== '') { - $clean = []; - foreach ($sourcePaths as $sourcePath) { - if (!is_string($sourcePath) || trim($sourcePath) === '') { - continue; - } - $sourcePath = trim($sourcePath); - if (!in_array($sourcePath, $clean, true)) { - $clean[] = $sourcePath; - } - } + $clean = $this->cleanStringList($sourcePaths); if ($clean !== []) { $out[$path] = $clean; } @@ -331,6 +342,64 @@ final readonly class GenreSourceOfTruthGuard } } + /** + * @param array $adaptationSurface + * @return array + */ + private function collectReviewPathGroupSourcePaths(array $adaptationSurface): array + { + $out = []; + + foreach ($adaptationSurface as $group => $definition) { + if (!is_string($group) || trim($group) === '' || !is_array($definition)) { + continue; + } + + $reviewPathGroups = $definition['review_path_groups'] ?? null; + if (!is_array($reviewPathGroups)) { + continue; + } + + foreach ($reviewPathGroups as $reviewGroup => $reviewDefinition) { + if (!is_string($reviewGroup) || trim($reviewGroup) === '' || !is_array($reviewDefinition)) { + continue; + } + + $paths = $reviewDefinition['paths'] ?? null; + if (!is_array($paths)) { + continue; + } + + $clean = $this->cleanStringList($paths); + if ($clean !== []) { + $out[sprintf('adaptation_surface.%s.review_path_groups.%s', $group, $reviewGroup)] = $clean; + } + } + } + + return $out; + } + + /** + * @return string[] + */ + private function cleanStringList(array $values): array + { + $clean = []; + foreach ($values as $value) { + if (!is_string($value) || trim($value) === '') { + continue; + } + + $value = trim($value); + if (!in_array($value, $clean, true)) { + $clean[] = $value; + } + } + + return $clean; + } + /** * @param array $guardConfig * @return array @@ -530,6 +599,8 @@ final readonly class GenreSourceOfTruthGuard return [ 'configuration_value_groups' => 0, 'source_path_value_nodes' => 0, + 'review_path_group_nodes' => 0, + 'declared_source_path_nodes' => 0, 'declared_source_paths' => 0, 'genre_native_value_nodes' => 0, 'legacy_fallback_empty' => 0,