From a460eee42950ce8aad89ebcd2478055e7c141980 Mon Sep 17 00:00:00 2001 From: team2 Date: Wed, 29 Apr 2026 20:21:02 +0200 Subject: [PATCH] first step --- RETRIEX_CONFIG_SOURCE_AUDIT_PATCH_README.md | 44 +++ ...DENCE_GUARD_SHOP_UNAVAILABLE_FIX_README.md | 14 + src/Command/ConfigSourceAuditCommand.php | 102 ++++++ src/Config/ConfigSourceAuditProvider.php | 341 ++++++++++++++++++ 4 files changed, 501 insertions(+) create mode 100644 RETRIEX_CONFIG_SOURCE_AUDIT_PATCH_README.md create mode 100644 RETRIEX_RAG_EVIDENCE_GUARD_SHOP_UNAVAILABLE_FIX_README.md create mode 100644 src/Command/ConfigSourceAuditCommand.php create mode 100644 src/Config/ConfigSourceAuditProvider.php diff --git a/RETRIEX_CONFIG_SOURCE_AUDIT_PATCH_README.md b/RETRIEX_CONFIG_SOURCE_AUDIT_PATCH_README.md new file mode 100644 index 0000000..20dd30a --- /dev/null +++ b/RETRIEX_CONFIG_SOURCE_AUDIT_PATCH_README.md @@ -0,0 +1,44 @@ +# RetrieX Config Source Audit Patch + +This patch starts the YAML-only migration with a non-invasive diagnostic layer. It does not change retrieval, prompt generation, commerce parsing, shop matching, SSE behavior, or any answer logic. + +## Added + +- `App\Config\ConfigSourceAuditProvider` +- `mto:agent:config:audit-source` + +## Purpose + +The audit makes remaining PHP defaults visible before we remove them. It scans: + +- `config/retriex/*.yaml` parameter paths +- `src/Config/*Config.php` constants +- config helper calls with fallback arguments, for example `stringList('x', self::DEFAULT)` or `int('x', 3)` +- constructor defaults that still act as PHP fallbacks + +## Usage + +```bash +php bin/console mto:agent:config:audit-source +php bin/console mto:agent:config:audit-source --details +php bin/console mto:agent:config:audit-source --json +``` + +## Interpretation + +- `yaml_with_php_fallback`: the YAML path exists, but PHP still has a fallback argument. This is ready for a later conversion to required YAML access. +- `php_fallback_if_yaml_missing_or_invalid`: PHP still supplies a default and no matching YAML path was found. This must be migrated before the fallback can be removed. +- `php_only`: constants without an injected YAML config parameter. These must be classified as either technical guardrails or values that should move to YAML. + +## Migration rule for the next patches + +Do not remove PHP defaults yet. First migrate every still-needed value into YAML, then convert fallback helpers to required YAML accessors in small class groups. + +## Validation performed for this patch + +```bash +php -l src/Config/ConfigSourceAuditProvider.php +php -l src/Command/ConfigSourceAuditCommand.php +``` + +Both files are syntactically valid. Full Symfony command execution was not performed in this ZIP workspace because `vendor/` is not included in the uploaded archive. diff --git a/RETRIEX_RAG_EVIDENCE_GUARD_SHOP_UNAVAILABLE_FIX_README.md b/RETRIEX_RAG_EVIDENCE_GUARD_SHOP_UNAVAILABLE_FIX_README.md new file mode 100644 index 0000000..8d7930f --- /dev/null +++ b/RETRIEX_RAG_EVIDENCE_GUARD_SHOP_UNAVAILABLE_FIX_README.md @@ -0,0 +1,14 @@ +# RetrieX RAG Evidence Guard + Shop-Unavailable Fix + +This patch separates vector retrieval hits from direct factual evidence. + +## Changed behavior + +- RAG hits are still counted as retrieved hits, but they are only shown as `fachlich belegt` when a salient user term or configured synonym appears in the retrieved RAG/URL content. +- If retrieved chunks are only semantic nearest-neighbor hits, the UI confidence changes to `RAG-Näherungstreffer, kein direkter Fachbeleg`. +- If the shop search fails, the shop meta card is finalized as unavailable instead of staying in a running/loading state. +- Prompt rules now distinguish `semantic RAG hits without direct Fachbeleg` from reliable RAG facts. + +## Scope + +No retrieval, vector index, scoring, SSE, job, or shop-search request logic is changed. The patch only changes evidence classification, prompt instructions, fallback messages, and shop-unavailable meta status. diff --git a/src/Command/ConfigSourceAuditCommand.php b/src/Command/ConfigSourceAuditCommand.php new file mode 100644 index 0000000..c28316f --- /dev/null +++ b/src/Command/ConfigSourceAuditCommand.php @@ -0,0 +1,102 @@ +addOption('json', null, InputOption::VALUE_NONE, 'Render the full audit result as JSON.') + ->addOption('details', null, InputOption::VALUE_NONE, 'Render detailed fallback rows in the console summary.'); + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + $result = $this->provider->audit(); + + if ((bool) $input->getOption('json')) { + $json = json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); + $output->writeln(is_string($json) ? $json : '{}'); + + return Command::SUCCESS; + } + + $this->renderSummary(new SymfonyStyle($input, $output), $result, (bool) $input->getOption('details')); + + return Command::SUCCESS; + } + + /** + * @param array $result + */ + private function renderSummary(SymfonyStyle $io, array $result, bool $details): void + { + $io->title('RetrieX configuration source audit'); + + $summary = is_array($result['summary'] ?? null) ? $result['summary'] : []; + $io->definitionList( + ['status' => (string) ($result['status'] ?? 'UNKNOWN')], + ['yaml_parameter_paths' => (string) ($summary['yaml_parameter_paths'] ?? 0)], + ['php_constants' => (string) ($summary['php_constants'] ?? 0)], + ['php_only_constants' => (string) ($summary['php_only_constants'] ?? 0)], + ['fallback_accessors' => (string) ($summary['fallback_accessors'] ?? 0)], + ['fallback_accessors_with_yaml' => (string) ($summary['fallback_accessors_with_yaml'] ?? 0)], + ['fallback_accessors_missing_yaml' => (string) ($summary['fallback_accessors_missing_yaml'] ?? 0)], + ['constructor_defaults' => (string) ($summary['constructor_defaults'] ?? 0)], + ['constructor_defaults_without_yaml_mapping' => (string) ($summary['constructor_defaults_without_yaml_mapping'] ?? 0)] + ); + + $warnings = is_array($result['warnings'] ?? null) ? $result['warnings'] : []; + if ($warnings !== []) { + $io->section('Warnings'); + foreach ($warnings as $warning) { + $io->writeln('- ' . (string) $warning); + } + } + + if (!$details) { + $io->note('Use --details for fallback rows or --json for the complete machine-readable audit.'); + return; + } + + $fallbackRows = []; + foreach (($result['fallback_accessors'] ?? []) as $item) { + if (!is_array($item)) { + continue; + } + + $fallbackRows[] = [ + (string) ($item['class'] ?? ''), + (string) ($item['line'] ?? ''), + (string) ($item['key'] ?? ''), + ((bool) ($item['yaml_present'] ?? false)) ? 'yes' : 'no', + (string) ($item['source'] ?? ''), + ]; + } + + if ($fallbackRows !== []) { + $io->section('Fallback accessors'); + $io->table(['Class', 'Line', 'Key', 'YAML', 'Source'], $fallbackRows); + } + } +} diff --git a/src/Config/ConfigSourceAuditProvider.php b/src/Config/ConfigSourceAuditProvider.php new file mode 100644 index 0000000..64dd433 --- /dev/null +++ b/src/Config/ConfigSourceAuditProvider.php @@ -0,0 +1,341 @@ + 'retriex.agent.config', + 'CommerceIntentConfig' => 'retriex.intent.commerce.config', + 'CommerceQueryParserConfig' => 'retriex.commerce_query.config', + 'DomainVocabularyConfig' => 'retriex.vocabulary.config', + 'IntentLightConfig' => 'retriex.intent.light.config', + 'NdjsonHybridRetrieverConfig' => 'retriex.retrieval.config', + 'PromptBuilderConfig' => 'retriex.prompt.config', + 'QueryEnricherConfig' => 'retriex.query_enrichment.config', + 'SalesIntentConfig' => 'retriex.intent.sales.config', + 'SearchRepairConfig' => 'retriex.search_repair.config', + 'ShopServiceConfig' => 'retriex.shop_matching.config', + 'StopWordsConfig' => 'retriex.stopwords.config', + ]; + + private const CONSTRUCTOR_PARAMETER_BY_CLASS_AND_ARGUMENT = [ + 'SearchRepairConfig' => [ + 'enabled' => 'retriex.commerce.search_repair.enabled', + 'maxRepairQueries' => 'retriex.commerce.search_repair.max_queries', + 'minPrimaryResultsWithoutRepair' => 'retriex.commerce.search_repair.min_primary_results_without_repair', + ], + ]; + + public function __construct(private string $projectDir) + { + } + + /** + * @return array + */ + public function audit(): array + { + $yamlPaths = $this->collectYamlParameterPaths(); + $fallbackAccessors = []; + $constructorDefaults = []; + $phpConstants = []; + + foreach ($this->configFiles() as $filePath) { + $class = pathinfo($filePath, PATHINFO_FILENAME); + $content = (string) file_get_contents($filePath); + $configParameter = self::CONFIG_PARAMETER_BY_CLASS[$class] ?? null; + + foreach ($this->findConstants($content) as $constant) { + $phpConstants[] = [ + 'class' => $class, + 'constant' => $constant['name'], + 'line' => $constant['line'], + 'config_parameter' => $configParameter, + 'source' => $configParameter === null ? 'php_only' : 'php_default_candidate', + ]; + } + + foreach ($this->findFallbackAccessors($content) as $accessor) { + $yamlPath = $configParameter !== null ? $configParameter . '.' . $accessor['key'] : null; + $yamlPresent = $yamlPath !== null && isset($yamlPaths[$yamlPath]); + + $fallbackAccessors[] = [ + 'class' => $class, + 'line' => $accessor['line'], + 'helper' => $accessor['helper'], + 'key' => $accessor['key'], + 'config_parameter' => $configParameter, + 'yaml_path' => $yamlPath, + 'yaml_present' => $yamlPresent, + 'source' => $yamlPresent ? 'yaml_with_php_fallback' : 'php_fallback_if_yaml_missing_or_invalid', + 'default_expression' => $this->compactExpression($accessor['default_expression']), + ]; + } + + foreach ($this->findConstructorDefaults($content) as $argument) { + $yamlPath = self::CONSTRUCTOR_PARAMETER_BY_CLASS_AND_ARGUMENT[$class][$argument['argument']] ?? null; + $yamlPresent = $yamlPath !== null && isset($yamlPaths[$yamlPath]); + + $constructorDefaults[] = [ + 'class' => $class, + 'line' => $argument['line'], + 'argument' => $argument['argument'], + 'yaml_path' => $yamlPath, + 'yaml_present' => $yamlPresent, + 'source' => $yamlPresent ? 'yaml_service_argument_with_php_default' : 'php_constructor_default', + 'default_expression' => $this->compactExpression($argument['default_expression']), + ]; + } + } + + $missingYamlFallbacks = array_values(array_filter( + $fallbackAccessors, + static fn (array $item): bool => $item['yaml_present'] === false + )); + $phpOnlyConstants = array_values(array_filter( + $phpConstants, + static fn (array $item): bool => $item['source'] === 'php_only' + )); + $constructorPhpDefaults = array_values(array_filter( + $constructorDefaults, + static fn (array $item): bool => $item['source'] === 'php_constructor_default' + )); + + $status = ($missingYamlFallbacks === [] && $phpOnlyConstants === [] && $constructorPhpDefaults === []) ? 'OK' : 'WARN'; + + return [ + 'status' => $status, + 'summary' => [ + 'yaml_parameter_paths' => count($yamlPaths), + 'php_constants' => count($phpConstants), + 'php_only_constants' => count($phpOnlyConstants), + 'fallback_accessors' => count($fallbackAccessors), + 'fallback_accessors_with_yaml' => count($fallbackAccessors) - count($missingYamlFallbacks), + 'fallback_accessors_missing_yaml' => count($missingYamlFallbacks), + 'constructor_defaults' => count($constructorDefaults), + 'constructor_defaults_without_yaml_mapping' => count($constructorPhpDefaults), + ], + 'warnings' => $this->buildWarnings($missingYamlFallbacks, $phpOnlyConstants, $constructorPhpDefaults), + 'fallback_accessors' => $fallbackAccessors, + 'constructor_defaults' => $constructorDefaults, + 'php_constants' => $phpConstants, + ]; + } + + /** + * @return list> + */ + private function configFiles(): array + { + $files = glob($this->projectDir . '/src/Config/*Config.php'); + if (!is_array($files)) { + return []; + } + + sort($files); + + return $files; + } + + /** + * @return array + */ + private function collectYamlParameterPaths(): array + { + $paths = []; + $files = glob($this->projectDir . '/config/retriex/*.yaml'); + if (!is_array($files)) { + return $paths; + } + + foreach ($files as $file) { + $parsed = Yaml::parseFile($file); + if (!is_array($parsed)) { + continue; + } + + $parameters = $parsed['parameters'] ?? []; + if (!is_array($parameters)) { + continue; + } + + foreach ($parameters as $name => $value) { + if (!is_string($name) || $name === '') { + continue; + } + + $this->flattenYamlPath($name, $value, $paths); + } + } + + return $paths; + } + + /** + * @param array $paths + */ + private function flattenYamlPath(string $path, mixed $value, array &$paths): void + { + $paths[$path] = true; + + if (!is_array($value)) { + return; + } + + foreach ($value as $key => $child) { + if (!is_string($key) && !is_int($key)) { + continue; + } + + if (is_int($key)) { + continue; + } + + $this->flattenYamlPath($path . '.' . $key, $child, $paths); + } + } + + /** + * @return list + */ + private function findConstants(string $content): array + { + preg_match_all('/^\s+(?:public|private|protected)\s+const\s+([A-Z0-9_]+)\s*=/m', $content, $matches, PREG_OFFSET_CAPTURE); + + $constants = []; + foreach ($matches[1] as [$name, $offset]) { + $constants[] = [ + 'name' => (string) $name, + 'line' => $this->lineNumber($content, (int) $offset), + ]; + } + + return $constants; + } + + /** + * @return list + */ + private function findFallbackAccessors(string $content): array + { + $helpers = [ + 'bool', + 'float', + 'floatValue', + 'getBool', + 'getInt', + 'getString', + 'getStringList', + 'int', + 'intValue', + 'raw', + 'string', + 'stringList', + 'stringListMap', + 'stringMap', + 'value', + ]; + + $pattern = '/\$this->(' . implode('|', $helpers) . ')\(\s*[\'\"]([^\'\"]+)[\'\"]\s*,\s*(.*?)\)/s'; + preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE); + + $accessors = []; + foreach ($matches[0] as $index => [$fullMatch, $offset]) { + $accessors[] = [ + 'helper' => (string) $matches[1][$index][0], + 'key' => (string) $matches[2][$index][0], + 'line' => $this->lineNumber($content, (int) $offset), + 'default_expression' => (string) $matches[3][$index][0], + ]; + } + + return $accessors; + } + + /** + * @return list + */ + private function findConstructorDefaults(string $content): array + { + if (!preg_match('/function\s+__construct\s*\((.*?)\)\s*\{/s', $content, $constructorMatch, PREG_OFFSET_CAPTURE)) { + return []; + } + + $constructor = (string) $constructorMatch[1][0]; + $baseOffset = (int) $constructorMatch[1][1]; + preg_match_all('/\$([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([^,\)]+)/', $constructor, $matches, PREG_OFFSET_CAPTURE); + + $defaults = []; + foreach ($matches[1] as $index => [$argument, $offset]) { + $argument = (string) $argument; + $default = trim((string) $matches[2][$index][0]); + + if ($argument === 'config' || $default === 'null' || $default === '[]') { + continue; + } + + $defaults[] = [ + 'argument' => $argument, + 'line' => $this->lineNumber($content, $baseOffset + (int) $offset), + 'default_expression' => $default, + ]; + } + + return $defaults; + } + + private function lineNumber(string $content, int $offset): int + { + return substr_count(substr($content, 0, $offset), "\n") + 1; + } + + private function compactExpression(string $expression): string + { + $expression = trim(preg_replace('/\s+/', ' ', $expression) ?? $expression); + + if (strlen($expression) <= 160) { + return $expression; + } + + return substr($expression, 0, 157) . '...'; + } + + /** + * @param list> $missingYamlFallbacks + * @param list> $phpOnlyConstants + * @param list> $constructorPhpDefaults + * @return list + */ + private function buildWarnings(array $missingYamlFallbacks, array $phpOnlyConstants, array $constructorPhpDefaults): array + { + $warnings = []; + + if ($missingYamlFallbacks !== []) { + $warnings[] = sprintf( + '%d config fallback accessor(s) still have no matching YAML path. These values must be migrated before PHP defaults can be removed.', + count($missingYamlFallbacks) + ); + } + + if ($phpOnlyConstants !== []) { + $warnings[] = sprintf( + '%d PHP-only config constant(s) were found. Review whether each one is technical guardrail or should move to YAML.', + count($phpOnlyConstants) + ); + } + + if ($constructorPhpDefaults !== []) { + $warnings[] = sprintf( + '%d constructor default(s) have no YAML/service-parameter mapping in the audit. Review before enabling strict YAML-only validation.', + count($constructorPhpDefaults) + ); + } + + return $warnings; + } +}