first step

This commit is contained in:
team2
2026-04-29 20:21:02 +02:00
parent e39a57e00b
commit a460eee429
4 changed files with 501 additions and 0 deletions

View File

@@ -0,0 +1,44 @@
# RetrieX Config Source Audit Patch
This patch starts the YAML-only migration with a non-invasive diagnostic layer. It does not change retrieval, prompt generation, commerce parsing, shop matching, SSE behavior, or any answer logic.
## Added
- `App\Config\ConfigSourceAuditProvider`
- `mto:agent:config:audit-source`
## Purpose
The audit makes remaining PHP defaults visible before we remove them. It scans:
- `config/retriex/*.yaml` parameter paths
- `src/Config/*Config.php` constants
- config helper calls with fallback arguments, for example `stringList('x', self::DEFAULT)` or `int('x', 3)`
- constructor defaults that still act as PHP fallbacks
## Usage
```bash
php bin/console mto:agent:config:audit-source
php bin/console mto:agent:config:audit-source --details
php bin/console mto:agent:config:audit-source --json
```
## Interpretation
- `yaml_with_php_fallback`: the YAML path exists, but PHP still has a fallback argument. This is ready for a later conversion to required YAML access.
- `php_fallback_if_yaml_missing_or_invalid`: PHP still supplies a default and no matching YAML path was found. This must be migrated before the fallback can be removed.
- `php_only`: constants without an injected YAML config parameter. These must be classified as either technical guardrails or values that should move to YAML.
## Migration rule for the next patches
Do not remove PHP defaults yet. First migrate every still-needed value into YAML, then convert fallback helpers to required YAML accessors in small class groups.
## Validation performed for this patch
```bash
php -l src/Config/ConfigSourceAuditProvider.php
php -l src/Command/ConfigSourceAuditCommand.php
```
Both files are syntactically valid. Full Symfony command execution was not performed in this ZIP workspace because `vendor/` is not included in the uploaded archive.

View File

@@ -0,0 +1,14 @@
# RetrieX RAG Evidence Guard + Shop-Unavailable Fix
This patch separates vector retrieval hits from direct factual evidence.
## Changed behavior
- RAG hits are still counted as retrieved hits, but they are only shown as `fachlich belegt` when a salient user term or configured synonym appears in the retrieved RAG/URL content.
- If retrieved chunks are only semantic nearest-neighbor hits, the UI confidence changes to `RAG-Näherungstreffer, kein direkter Fachbeleg`.
- If the shop search fails, the shop meta card is finalized as unavailable instead of staying in a running/loading state.
- Prompt rules now distinguish `semantic RAG hits without direct Fachbeleg` from reliable RAG facts.
## Scope
No retrieval, vector index, scoring, SSE, job, or shop-search request logic is changed. The patch only changes evidence classification, prompt instructions, fallback messages, and shop-unavailable meta status.

View File

@@ -0,0 +1,102 @@
<?php
declare(strict_types=1);
namespace App\Command;
use App\Config\ConfigSourceAuditProvider;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'mto:agent:config:audit-source',
description: 'Audit YAML-backed configuration versus remaining PHP defaults'
)]
final class ConfigSourceAuditCommand extends Command
{
public function __construct(private readonly ConfigSourceAuditProvider $provider)
{
parent::__construct();
}
protected function configure(): void
{
$this
->addOption('json', null, InputOption::VALUE_NONE, 'Render the full audit result as JSON.')
->addOption('details', null, InputOption::VALUE_NONE, 'Render detailed fallback rows in the console summary.');
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$result = $this->provider->audit();
if ((bool) $input->getOption('json')) {
$json = json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
$output->writeln(is_string($json) ? $json : '{}');
return Command::SUCCESS;
}
$this->renderSummary(new SymfonyStyle($input, $output), $result, (bool) $input->getOption('details'));
return Command::SUCCESS;
}
/**
* @param array<string, mixed> $result
*/
private function renderSummary(SymfonyStyle $io, array $result, bool $details): void
{
$io->title('RetrieX configuration source audit');
$summary = is_array($result['summary'] ?? null) ? $result['summary'] : [];
$io->definitionList(
['status' => (string) ($result['status'] ?? 'UNKNOWN')],
['yaml_parameter_paths' => (string) ($summary['yaml_parameter_paths'] ?? 0)],
['php_constants' => (string) ($summary['php_constants'] ?? 0)],
['php_only_constants' => (string) ($summary['php_only_constants'] ?? 0)],
['fallback_accessors' => (string) ($summary['fallback_accessors'] ?? 0)],
['fallback_accessors_with_yaml' => (string) ($summary['fallback_accessors_with_yaml'] ?? 0)],
['fallback_accessors_missing_yaml' => (string) ($summary['fallback_accessors_missing_yaml'] ?? 0)],
['constructor_defaults' => (string) ($summary['constructor_defaults'] ?? 0)],
['constructor_defaults_without_yaml_mapping' => (string) ($summary['constructor_defaults_without_yaml_mapping'] ?? 0)]
);
$warnings = is_array($result['warnings'] ?? null) ? $result['warnings'] : [];
if ($warnings !== []) {
$io->section('Warnings');
foreach ($warnings as $warning) {
$io->writeln('- ' . (string) $warning);
}
}
if (!$details) {
$io->note('Use --details for fallback rows or --json for the complete machine-readable audit.');
return;
}
$fallbackRows = [];
foreach (($result['fallback_accessors'] ?? []) as $item) {
if (!is_array($item)) {
continue;
}
$fallbackRows[] = [
(string) ($item['class'] ?? ''),
(string) ($item['line'] ?? ''),
(string) ($item['key'] ?? ''),
((bool) ($item['yaml_present'] ?? false)) ? 'yes' : 'no',
(string) ($item['source'] ?? ''),
];
}
if ($fallbackRows !== []) {
$io->section('Fallback accessors');
$io->table(['Class', 'Line', 'Key', 'YAML', 'Source'], $fallbackRows);
}
}
}

View File

@@ -0,0 +1,341 @@
<?php
declare(strict_types=1);
namespace App\Config;
use Symfony\Component\Yaml\Yaml;
final readonly class ConfigSourceAuditProvider
{
private const CONFIG_PARAMETER_BY_CLASS = [
'AgentRunnerConfig' => 'retriex.agent.config',
'CommerceIntentConfig' => 'retriex.intent.commerce.config',
'CommerceQueryParserConfig' => 'retriex.commerce_query.config',
'DomainVocabularyConfig' => 'retriex.vocabulary.config',
'IntentLightConfig' => 'retriex.intent.light.config',
'NdjsonHybridRetrieverConfig' => 'retriex.retrieval.config',
'PromptBuilderConfig' => 'retriex.prompt.config',
'QueryEnricherConfig' => 'retriex.query_enrichment.config',
'SalesIntentConfig' => 'retriex.intent.sales.config',
'SearchRepairConfig' => 'retriex.search_repair.config',
'ShopServiceConfig' => 'retriex.shop_matching.config',
'StopWordsConfig' => 'retriex.stopwords.config',
];
private const CONSTRUCTOR_PARAMETER_BY_CLASS_AND_ARGUMENT = [
'SearchRepairConfig' => [
'enabled' => 'retriex.commerce.search_repair.enabled',
'maxRepairQueries' => 'retriex.commerce.search_repair.max_queries',
'minPrimaryResultsWithoutRepair' => 'retriex.commerce.search_repair.min_primary_results_without_repair',
],
];
public function __construct(private string $projectDir)
{
}
/**
* @return array<string, mixed>
*/
public function audit(): array
{
$yamlPaths = $this->collectYamlParameterPaths();
$fallbackAccessors = [];
$constructorDefaults = [];
$phpConstants = [];
foreach ($this->configFiles() as $filePath) {
$class = pathinfo($filePath, PATHINFO_FILENAME);
$content = (string) file_get_contents($filePath);
$configParameter = self::CONFIG_PARAMETER_BY_CLASS[$class] ?? null;
foreach ($this->findConstants($content) as $constant) {
$phpConstants[] = [
'class' => $class,
'constant' => $constant['name'],
'line' => $constant['line'],
'config_parameter' => $configParameter,
'source' => $configParameter === null ? 'php_only' : 'php_default_candidate',
];
}
foreach ($this->findFallbackAccessors($content) as $accessor) {
$yamlPath = $configParameter !== null ? $configParameter . '.' . $accessor['key'] : null;
$yamlPresent = $yamlPath !== null && isset($yamlPaths[$yamlPath]);
$fallbackAccessors[] = [
'class' => $class,
'line' => $accessor['line'],
'helper' => $accessor['helper'],
'key' => $accessor['key'],
'config_parameter' => $configParameter,
'yaml_path' => $yamlPath,
'yaml_present' => $yamlPresent,
'source' => $yamlPresent ? 'yaml_with_php_fallback' : 'php_fallback_if_yaml_missing_or_invalid',
'default_expression' => $this->compactExpression($accessor['default_expression']),
];
}
foreach ($this->findConstructorDefaults($content) as $argument) {
$yamlPath = self::CONSTRUCTOR_PARAMETER_BY_CLASS_AND_ARGUMENT[$class][$argument['argument']] ?? null;
$yamlPresent = $yamlPath !== null && isset($yamlPaths[$yamlPath]);
$constructorDefaults[] = [
'class' => $class,
'line' => $argument['line'],
'argument' => $argument['argument'],
'yaml_path' => $yamlPath,
'yaml_present' => $yamlPresent,
'source' => $yamlPresent ? 'yaml_service_argument_with_php_default' : 'php_constructor_default',
'default_expression' => $this->compactExpression($argument['default_expression']),
];
}
}
$missingYamlFallbacks = array_values(array_filter(
$fallbackAccessors,
static fn (array $item): bool => $item['yaml_present'] === false
));
$phpOnlyConstants = array_values(array_filter(
$phpConstants,
static fn (array $item): bool => $item['source'] === 'php_only'
));
$constructorPhpDefaults = array_values(array_filter(
$constructorDefaults,
static fn (array $item): bool => $item['source'] === 'php_constructor_default'
));
$status = ($missingYamlFallbacks === [] && $phpOnlyConstants === [] && $constructorPhpDefaults === []) ? 'OK' : 'WARN';
return [
'status' => $status,
'summary' => [
'yaml_parameter_paths' => count($yamlPaths),
'php_constants' => count($phpConstants),
'php_only_constants' => count($phpOnlyConstants),
'fallback_accessors' => count($fallbackAccessors),
'fallback_accessors_with_yaml' => count($fallbackAccessors) - count($missingYamlFallbacks),
'fallback_accessors_missing_yaml' => count($missingYamlFallbacks),
'constructor_defaults' => count($constructorDefaults),
'constructor_defaults_without_yaml_mapping' => count($constructorPhpDefaults),
],
'warnings' => $this->buildWarnings($missingYamlFallbacks, $phpOnlyConstants, $constructorPhpDefaults),
'fallback_accessors' => $fallbackAccessors,
'constructor_defaults' => $constructorDefaults,
'php_constants' => $phpConstants,
];
}
/**
* @return list<array<string, mixed>>
*/
private function configFiles(): array
{
$files = glob($this->projectDir . '/src/Config/*Config.php');
if (!is_array($files)) {
return [];
}
sort($files);
return $files;
}
/**
* @return array<string, true>
*/
private function collectYamlParameterPaths(): array
{
$paths = [];
$files = glob($this->projectDir . '/config/retriex/*.yaml');
if (!is_array($files)) {
return $paths;
}
foreach ($files as $file) {
$parsed = Yaml::parseFile($file);
if (!is_array($parsed)) {
continue;
}
$parameters = $parsed['parameters'] ?? [];
if (!is_array($parameters)) {
continue;
}
foreach ($parameters as $name => $value) {
if (!is_string($name) || $name === '') {
continue;
}
$this->flattenYamlPath($name, $value, $paths);
}
}
return $paths;
}
/**
* @param array<string, true> $paths
*/
private function flattenYamlPath(string $path, mixed $value, array &$paths): void
{
$paths[$path] = true;
if (!is_array($value)) {
return;
}
foreach ($value as $key => $child) {
if (!is_string($key) && !is_int($key)) {
continue;
}
if (is_int($key)) {
continue;
}
$this->flattenYamlPath($path . '.' . $key, $child, $paths);
}
}
/**
* @return list<array{name:string,line:int}>
*/
private function findConstants(string $content): array
{
preg_match_all('/^\s+(?:public|private|protected)\s+const\s+([A-Z0-9_]+)\s*=/m', $content, $matches, PREG_OFFSET_CAPTURE);
$constants = [];
foreach ($matches[1] as [$name, $offset]) {
$constants[] = [
'name' => (string) $name,
'line' => $this->lineNumber($content, (int) $offset),
];
}
return $constants;
}
/**
* @return list<array{helper:string,key:string,line:int,default_expression:string}>
*/
private function findFallbackAccessors(string $content): array
{
$helpers = [
'bool',
'float',
'floatValue',
'getBool',
'getInt',
'getString',
'getStringList',
'int',
'intValue',
'raw',
'string',
'stringList',
'stringListMap',
'stringMap',
'value',
];
$pattern = '/\$this->(' . implode('|', $helpers) . ')\(\s*[\'\"]([^\'\"]+)[\'\"]\s*,\s*(.*?)\)/s';
preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE);
$accessors = [];
foreach ($matches[0] as $index => [$fullMatch, $offset]) {
$accessors[] = [
'helper' => (string) $matches[1][$index][0],
'key' => (string) $matches[2][$index][0],
'line' => $this->lineNumber($content, (int) $offset),
'default_expression' => (string) $matches[3][$index][0],
];
}
return $accessors;
}
/**
* @return list<array{argument:string,line:int,default_expression:string}>
*/
private function findConstructorDefaults(string $content): array
{
if (!preg_match('/function\s+__construct\s*\((.*?)\)\s*\{/s', $content, $constructorMatch, PREG_OFFSET_CAPTURE)) {
return [];
}
$constructor = (string) $constructorMatch[1][0];
$baseOffset = (int) $constructorMatch[1][1];
preg_match_all('/\$([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([^,\)]+)/', $constructor, $matches, PREG_OFFSET_CAPTURE);
$defaults = [];
foreach ($matches[1] as $index => [$argument, $offset]) {
$argument = (string) $argument;
$default = trim((string) $matches[2][$index][0]);
if ($argument === 'config' || $default === 'null' || $default === '[]') {
continue;
}
$defaults[] = [
'argument' => $argument,
'line' => $this->lineNumber($content, $baseOffset + (int) $offset),
'default_expression' => $default,
];
}
return $defaults;
}
private function lineNumber(string $content, int $offset): int
{
return substr_count(substr($content, 0, $offset), "\n") + 1;
}
private function compactExpression(string $expression): string
{
$expression = trim(preg_replace('/\s+/', ' ', $expression) ?? $expression);
if (strlen($expression) <= 160) {
return $expression;
}
return substr($expression, 0, 157) . '...';
}
/**
* @param list<array<string, mixed>> $missingYamlFallbacks
* @param list<array<string, mixed>> $phpOnlyConstants
* @param list<array<string, mixed>> $constructorPhpDefaults
* @return list<string>
*/
private function buildWarnings(array $missingYamlFallbacks, array $phpOnlyConstants, array $constructorPhpDefaults): array
{
$warnings = [];
if ($missingYamlFallbacks !== []) {
$warnings[] = sprintf(
'%d config fallback accessor(s) still have no matching YAML path. These values must be migrated before PHP defaults can be removed.',
count($missingYamlFallbacks)
);
}
if ($phpOnlyConstants !== []) {
$warnings[] = sprintf(
'%d PHP-only config constant(s) were found. Review whether each one is technical guardrail or should move to YAML.',
count($phpOnlyConstants)
);
}
if ($constructorPhpDefaults !== []) {
$warnings[] = sprintf(
'%d constructor default(s) have no YAML/service-parameter mapping in the audit. Review before enabling strict YAML-only validation.',
count($constructorPhpDefaults)
);
}
return $warnings;
}
}