Files
MtoRagSystem/src/Config/ConfigSourceAuditProvider.php
2026-05-01 17:40:48 +02:00

356 lines
13 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Config;
use Symfony\Component\Yaml\Yaml;
final readonly class ConfigSourceAuditProvider
{
private const CONFIG_PARAMETER_BY_CLASS = [
'AgentRunnerConfig' => 'retriex.agent.config',
'CommerceIntentConfig' => 'retriex.intent.commerce.config',
'CommerceQueryParserConfig' => 'retriex.commerce_query.config',
'ContextServiceConfig' => 'retriex.context.config',
'CatalogIntentConfig' => 'retriex.intent.catalog.config',
'DomainVocabularyConfig' => 'retriex.vocabulary.config',
'IntentLightConfig' => 'retriex.intent.light.config',
'GovernanceConfig' => 'retriex.governance.config',
'NdjsonHybridRetrieverConfig' => 'retriex.retrieval.config',
'PromptBuilderConfig' => 'retriex.prompt.config',
'QueryEnricherConfig' => 'retriex.query_enrichment.config',
'SalesIntentConfig' => 'retriex.intent.sales.config',
'SearchRepairConfig' => 'retriex.search_repair.config',
'ShopServiceConfig' => 'retriex.shop_matching.config',
'StopWordsConfig' => 'retriex.stopwords.config',
];
private const CONSTRUCTOR_PARAMETER_BY_CLASS_AND_ARGUMENT = [
'ModelGenerationDefaultsConfig' => [
'modelName' => 'retriex.model.default_name',
'stream' => 'retriex.model.default_stream',
'temperature' => 'retriex.model.default_temperature',
'topK' => 'retriex.model.default_top_k',
'topP' => 'retriex.model.default_top_p',
'repeatPenalty' => 'retriex.model.default_repeat_penalty',
'numCtx' => 'retriex.model.default_num_ctx',
'retrievalMaxChunks' => 'retriex.model.default_retrieval_max_chunks',
'retrievalVectorTopK' => 'retriex.model.default_retrieval_vector_top_k',
],
'SearchRepairConfig' => [
'enabled' => 'retriex.commerce.search_repair.enabled',
'maxRepairQueries' => 'retriex.commerce.search_repair.max_queries',
'minPrimaryResultsWithoutRepair' => 'retriex.commerce.search_repair.min_primary_results_without_repair',
],
];
public function __construct(private string $projectDir)
{
}
/**
* @return array<string, mixed>
*/
public function audit(): array
{
$yamlPaths = $this->collectYamlParameterPaths();
$fallbackAccessors = [];
$constructorDefaults = [];
$phpConstants = [];
foreach ($this->configFiles() as $filePath) {
$class = pathinfo($filePath, PATHINFO_FILENAME);
$content = (string) file_get_contents($filePath);
$configParameter = self::CONFIG_PARAMETER_BY_CLASS[$class] ?? null;
foreach ($this->findConstants($content) as $constant) {
$phpConstants[] = [
'class' => $class,
'constant' => $constant['name'],
'line' => $constant['line'],
'config_parameter' => $configParameter,
'source' => $configParameter === null ? 'php_only' : 'php_default_candidate',
];
}
foreach ($this->findFallbackAccessors($content) as $accessor) {
$yamlPath = $configParameter !== null ? $configParameter . '.' . $accessor['key'] : null;
$yamlPresent = $yamlPath !== null && isset($yamlPaths[$yamlPath]);
$fallbackAccessors[] = [
'class' => $class,
'line' => $accessor['line'],
'helper' => $accessor['helper'],
'key' => $accessor['key'],
'config_parameter' => $configParameter,
'yaml_path' => $yamlPath,
'yaml_present' => $yamlPresent,
'source' => $yamlPresent ? 'yaml_with_php_fallback' : 'php_fallback_if_yaml_missing_or_invalid',
'default_expression' => $this->compactExpression($accessor['default_expression']),
];
}
foreach ($this->findConstructorDefaults($content) as $argument) {
$yamlPath = self::CONSTRUCTOR_PARAMETER_BY_CLASS_AND_ARGUMENT[$class][$argument['argument']] ?? null;
$yamlPresent = $yamlPath !== null && isset($yamlPaths[$yamlPath]);
$constructorDefaults[] = [
'class' => $class,
'line' => $argument['line'],
'argument' => $argument['argument'],
'yaml_path' => $yamlPath,
'yaml_present' => $yamlPresent,
'source' => $yamlPresent ? 'yaml_service_argument_with_php_default' : 'php_constructor_default',
'default_expression' => $this->compactExpression($argument['default_expression']),
];
}
}
$missingYamlFallbacks = array_values(array_filter(
$fallbackAccessors,
static fn (array $item): bool => $item['yaml_present'] === false
));
$phpOnlyConstants = array_values(array_filter(
$phpConstants,
static fn (array $item): bool => $item['source'] === 'php_only'
));
$constructorPhpDefaults = array_values(array_filter(
$constructorDefaults,
static fn (array $item): bool => $item['source'] === 'php_constructor_default'
));
$status = ($missingYamlFallbacks === [] && $phpOnlyConstants === [] && $constructorPhpDefaults === []) ? 'OK' : 'WARN';
return [
'status' => $status,
'summary' => [
'yaml_parameter_paths' => count($yamlPaths),
'php_constants' => count($phpConstants),
'php_only_constants' => count($phpOnlyConstants),
'fallback_accessors' => count($fallbackAccessors),
'fallback_accessors_with_yaml' => count($fallbackAccessors) - count($missingYamlFallbacks),
'fallback_accessors_missing_yaml' => count($missingYamlFallbacks),
'constructor_defaults' => count($constructorDefaults),
'constructor_defaults_without_yaml_mapping' => count($constructorPhpDefaults),
],
'warnings' => $this->buildWarnings($missingYamlFallbacks, $phpOnlyConstants, $constructorPhpDefaults),
'fallback_accessors' => $fallbackAccessors,
'constructor_defaults' => $constructorDefaults,
'php_constants' => $phpConstants,
];
}
/**
* @return list<array<string, mixed>>
*/
private function configFiles(): array
{
$files = glob($this->projectDir . '/src/Config/*Config.php');
if (!is_array($files)) {
return [];
}
sort($files);
return $files;
}
/**
* @return array<string, true>
*/
private function collectYamlParameterPaths(): array
{
$paths = [];
$files = glob($this->projectDir . '/config/retriex/*.yaml');
if (!is_array($files)) {
return $paths;
}
foreach ($files as $file) {
$parsed = Yaml::parseFile($file);
if (!is_array($parsed)) {
continue;
}
$parameters = $parsed['parameters'] ?? [];
if (!is_array($parameters)) {
continue;
}
foreach ($parameters as $name => $value) {
if (!is_string($name) || $name === '') {
continue;
}
$this->flattenYamlPath($name, $value, $paths);
}
}
return $paths;
}
/**
* @param array<string, true> $paths
*/
private function flattenYamlPath(string $path, mixed $value, array &$paths): void
{
$paths[$path] = true;
if (!is_array($value)) {
return;
}
foreach ($value as $key => $child) {
if (!is_string($key) && !is_int($key)) {
continue;
}
if (is_int($key)) {
continue;
}
$this->flattenYamlPath($path . '.' . $key, $child, $paths);
}
}
/**
* @return list<array{name:string,line:int}>
*/
private function findConstants(string $content): array
{
preg_match_all('/^\s+(?:public|private|protected)\s+const\s+([A-Z0-9_]+)\s*=/m', $content, $matches, PREG_OFFSET_CAPTURE);
$constants = [];
foreach ($matches[1] as [$name, $offset]) {
$constants[] = [
'name' => (string) $name,
'line' => $this->lineNumber($content, (int) $offset),
];
}
return $constants;
}
/**
* @return list<array{helper:string,key:string,line:int,default_expression:string}>
*/
private function findFallbackAccessors(string $content): array
{
$helpers = [
'bool',
'float',
'floatValue',
'getBool',
'getInt',
'getString',
'getStringList',
'int',
'intValue',
'raw',
'string',
'stringList',
'stringListMap',
'stringMap',
'value',
];
$pattern = '/\$this->(' . implode('|', $helpers) . ')\(\s*[\'\"]([^\'\"]+)[\'\"]\s*,\s*(.*?)\)/s';
preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE);
$accessors = [];
foreach ($matches[0] as $index => [$fullMatch, $offset]) {
$accessors[] = [
'helper' => (string) $matches[1][$index][0],
'key' => (string) $matches[2][$index][0],
'line' => $this->lineNumber($content, (int) $offset),
'default_expression' => (string) $matches[3][$index][0],
];
}
return $accessors;
}
/**
* @return list<array{argument:string,line:int,default_expression:string}>
*/
private function findConstructorDefaults(string $content): array
{
if (!preg_match('/function\s+__construct\s*\((.*?)\)\s*\{/s', $content, $constructorMatch, PREG_OFFSET_CAPTURE)) {
return [];
}
$constructor = (string) $constructorMatch[1][0];
$baseOffset = (int) $constructorMatch[1][1];
preg_match_all('/\$([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([^,\)]+)/', $constructor, $matches, PREG_OFFSET_CAPTURE);
$defaults = [];
foreach ($matches[1] as $index => [$argument, $offset]) {
$argument = (string) $argument;
$default = trim((string) $matches[2][$index][0]);
if ($argument === 'config' || $default === 'null' || $default === '[]') {
continue;
}
$defaults[] = [
'argument' => $argument,
'line' => $this->lineNumber($content, $baseOffset + (int) $offset),
'default_expression' => $default,
];
}
return $defaults;
}
private function lineNumber(string $content, int $offset): int
{
return substr_count(substr($content, 0, $offset), "\n") + 1;
}
private function compactExpression(string $expression): string
{
$expression = trim(preg_replace('/\s+/', ' ', $expression) ?? $expression);
if (strlen($expression) <= 160) {
return $expression;
}
return substr($expression, 0, 157) . '...';
}
/**
* @param list<array<string, mixed>> $missingYamlFallbacks
* @param list<array<string, mixed>> $phpOnlyConstants
* @param list<array<string, mixed>> $constructorPhpDefaults
* @return list<string>
*/
private function buildWarnings(array $missingYamlFallbacks, array $phpOnlyConstants, array $constructorPhpDefaults): array
{
$warnings = [];
if ($missingYamlFallbacks !== []) {
$warnings[] = sprintf(
'%d config fallback accessor(s) still have no matching YAML path. These values must be migrated before PHP defaults can be removed.',
count($missingYamlFallbacks)
);
}
if ($phpOnlyConstants !== []) {
$warnings[] = sprintf(
'%d PHP-only config constant(s) were found. Review whether each one is technical guardrail or should move to YAML.',
count($phpOnlyConstants)
);
}
if ($constructorPhpDefaults !== []) {
$warnings[] = sprintf(
'%d constructor default(s) have no YAML/service-parameter mapping in the audit. Review before enabling strict YAML-only validation.',
count($constructorPhpDefaults)
);
}
return $warnings;
}
}