356 lines
13 KiB
PHP
356 lines
13 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Config;
|
|
|
|
use Symfony\Component\Yaml\Yaml;
|
|
|
|
final readonly class ConfigSourceAuditProvider
|
|
{
|
|
private const CONFIG_PARAMETER_BY_CLASS = [
|
|
'AgentRunnerConfig' => 'retriex.agent.config',
|
|
'CommerceIntentConfig' => 'retriex.intent.commerce.config',
|
|
'CommerceQueryParserConfig' => 'retriex.commerce_query.config',
|
|
'ContextServiceConfig' => 'retriex.context.config',
|
|
'CatalogIntentConfig' => 'retriex.intent.catalog.config',
|
|
'DomainVocabularyConfig' => 'retriex.vocabulary.config',
|
|
'IntentLightConfig' => 'retriex.intent.light.config',
|
|
'GovernanceConfig' => 'retriex.governance.config',
|
|
'NdjsonHybridRetrieverConfig' => 'retriex.retrieval.config',
|
|
'PromptBuilderConfig' => 'retriex.prompt.config',
|
|
'QueryEnricherConfig' => 'retriex.query_enrichment.config',
|
|
'SalesIntentConfig' => 'retriex.intent.sales.config',
|
|
'SearchRepairConfig' => 'retriex.search_repair.config',
|
|
'ShopServiceConfig' => 'retriex.shop_matching.config',
|
|
'StopWordsConfig' => 'retriex.stopwords.config',
|
|
];
|
|
|
|
private const CONSTRUCTOR_PARAMETER_BY_CLASS_AND_ARGUMENT = [
|
|
'ModelGenerationDefaultsConfig' => [
|
|
'modelName' => 'retriex.model.default_name',
|
|
'stream' => 'retriex.model.default_stream',
|
|
'temperature' => 'retriex.model.default_temperature',
|
|
'topK' => 'retriex.model.default_top_k',
|
|
'topP' => 'retriex.model.default_top_p',
|
|
'repeatPenalty' => 'retriex.model.default_repeat_penalty',
|
|
'numCtx' => 'retriex.model.default_num_ctx',
|
|
'retrievalMaxChunks' => 'retriex.model.default_retrieval_max_chunks',
|
|
'retrievalVectorTopK' => 'retriex.model.default_retrieval_vector_top_k',
|
|
],
|
|
'SearchRepairConfig' => [
|
|
'enabled' => 'retriex.commerce.search_repair.enabled',
|
|
'maxRepairQueries' => 'retriex.commerce.search_repair.max_queries',
|
|
'minPrimaryResultsWithoutRepair' => 'retriex.commerce.search_repair.min_primary_results_without_repair',
|
|
],
|
|
];
|
|
|
|
public function __construct(private string $projectDir)
|
|
{
|
|
}
|
|
|
|
/**
|
|
* @return array<string, mixed>
|
|
*/
|
|
public function audit(): array
|
|
{
|
|
$yamlPaths = $this->collectYamlParameterPaths();
|
|
$fallbackAccessors = [];
|
|
$constructorDefaults = [];
|
|
$phpConstants = [];
|
|
|
|
foreach ($this->configFiles() as $filePath) {
|
|
$class = pathinfo($filePath, PATHINFO_FILENAME);
|
|
$content = (string) file_get_contents($filePath);
|
|
$configParameter = self::CONFIG_PARAMETER_BY_CLASS[$class] ?? null;
|
|
|
|
foreach ($this->findConstants($content) as $constant) {
|
|
$phpConstants[] = [
|
|
'class' => $class,
|
|
'constant' => $constant['name'],
|
|
'line' => $constant['line'],
|
|
'config_parameter' => $configParameter,
|
|
'source' => $configParameter === null ? 'php_only' : 'php_default_candidate',
|
|
];
|
|
}
|
|
|
|
foreach ($this->findFallbackAccessors($content) as $accessor) {
|
|
$yamlPath = $configParameter !== null ? $configParameter . '.' . $accessor['key'] : null;
|
|
$yamlPresent = $yamlPath !== null && isset($yamlPaths[$yamlPath]);
|
|
|
|
$fallbackAccessors[] = [
|
|
'class' => $class,
|
|
'line' => $accessor['line'],
|
|
'helper' => $accessor['helper'],
|
|
'key' => $accessor['key'],
|
|
'config_parameter' => $configParameter,
|
|
'yaml_path' => $yamlPath,
|
|
'yaml_present' => $yamlPresent,
|
|
'source' => $yamlPresent ? 'yaml_with_php_fallback' : 'php_fallback_if_yaml_missing_or_invalid',
|
|
'default_expression' => $this->compactExpression($accessor['default_expression']),
|
|
];
|
|
}
|
|
|
|
foreach ($this->findConstructorDefaults($content) as $argument) {
|
|
$yamlPath = self::CONSTRUCTOR_PARAMETER_BY_CLASS_AND_ARGUMENT[$class][$argument['argument']] ?? null;
|
|
$yamlPresent = $yamlPath !== null && isset($yamlPaths[$yamlPath]);
|
|
|
|
$constructorDefaults[] = [
|
|
'class' => $class,
|
|
'line' => $argument['line'],
|
|
'argument' => $argument['argument'],
|
|
'yaml_path' => $yamlPath,
|
|
'yaml_present' => $yamlPresent,
|
|
'source' => $yamlPresent ? 'yaml_service_argument_with_php_default' : 'php_constructor_default',
|
|
'default_expression' => $this->compactExpression($argument['default_expression']),
|
|
];
|
|
}
|
|
}
|
|
|
|
$missingYamlFallbacks = array_values(array_filter(
|
|
$fallbackAccessors,
|
|
static fn (array $item): bool => $item['yaml_present'] === false
|
|
));
|
|
$phpOnlyConstants = array_values(array_filter(
|
|
$phpConstants,
|
|
static fn (array $item): bool => $item['source'] === 'php_only'
|
|
));
|
|
$constructorPhpDefaults = array_values(array_filter(
|
|
$constructorDefaults,
|
|
static fn (array $item): bool => $item['source'] === 'php_constructor_default'
|
|
));
|
|
|
|
$status = ($missingYamlFallbacks === [] && $phpOnlyConstants === [] && $constructorPhpDefaults === []) ? 'OK' : 'WARN';
|
|
|
|
return [
|
|
'status' => $status,
|
|
'summary' => [
|
|
'yaml_parameter_paths' => count($yamlPaths),
|
|
'php_constants' => count($phpConstants),
|
|
'php_only_constants' => count($phpOnlyConstants),
|
|
'fallback_accessors' => count($fallbackAccessors),
|
|
'fallback_accessors_with_yaml' => count($fallbackAccessors) - count($missingYamlFallbacks),
|
|
'fallback_accessors_missing_yaml' => count($missingYamlFallbacks),
|
|
'constructor_defaults' => count($constructorDefaults),
|
|
'constructor_defaults_without_yaml_mapping' => count($constructorPhpDefaults),
|
|
],
|
|
'warnings' => $this->buildWarnings($missingYamlFallbacks, $phpOnlyConstants, $constructorPhpDefaults),
|
|
'fallback_accessors' => $fallbackAccessors,
|
|
'constructor_defaults' => $constructorDefaults,
|
|
'php_constants' => $phpConstants,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @return list<array<string, mixed>>
|
|
*/
|
|
private function configFiles(): array
|
|
{
|
|
$files = glob($this->projectDir . '/src/Config/*Config.php');
|
|
if (!is_array($files)) {
|
|
return [];
|
|
}
|
|
|
|
sort($files);
|
|
|
|
return $files;
|
|
}
|
|
|
|
/**
|
|
* @return array<string, true>
|
|
*/
|
|
private function collectYamlParameterPaths(): array
|
|
{
|
|
$paths = [];
|
|
$files = glob($this->projectDir . '/config/retriex/*.yaml');
|
|
if (!is_array($files)) {
|
|
return $paths;
|
|
}
|
|
|
|
foreach ($files as $file) {
|
|
$parsed = Yaml::parseFile($file);
|
|
if (!is_array($parsed)) {
|
|
continue;
|
|
}
|
|
|
|
$parameters = $parsed['parameters'] ?? [];
|
|
if (!is_array($parameters)) {
|
|
continue;
|
|
}
|
|
|
|
foreach ($parameters as $name => $value) {
|
|
if (!is_string($name) || $name === '') {
|
|
continue;
|
|
}
|
|
|
|
$this->flattenYamlPath($name, $value, $paths);
|
|
}
|
|
}
|
|
|
|
return $paths;
|
|
}
|
|
|
|
/**
|
|
* @param array<string, true> $paths
|
|
*/
|
|
private function flattenYamlPath(string $path, mixed $value, array &$paths): void
|
|
{
|
|
$paths[$path] = true;
|
|
|
|
if (!is_array($value)) {
|
|
return;
|
|
}
|
|
|
|
foreach ($value as $key => $child) {
|
|
if (!is_string($key) && !is_int($key)) {
|
|
continue;
|
|
}
|
|
|
|
if (is_int($key)) {
|
|
continue;
|
|
}
|
|
|
|
$this->flattenYamlPath($path . '.' . $key, $child, $paths);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return list<array{name:string,line:int}>
|
|
*/
|
|
private function findConstants(string $content): array
|
|
{
|
|
preg_match_all('/^\s+(?:public|private|protected)\s+const\s+([A-Z0-9_]+)\s*=/m', $content, $matches, PREG_OFFSET_CAPTURE);
|
|
|
|
$constants = [];
|
|
foreach ($matches[1] as [$name, $offset]) {
|
|
$constants[] = [
|
|
'name' => (string) $name,
|
|
'line' => $this->lineNumber($content, (int) $offset),
|
|
];
|
|
}
|
|
|
|
return $constants;
|
|
}
|
|
|
|
/**
|
|
* @return list<array{helper:string,key:string,line:int,default_expression:string}>
|
|
*/
|
|
private function findFallbackAccessors(string $content): array
|
|
{
|
|
$helpers = [
|
|
'bool',
|
|
'float',
|
|
'floatValue',
|
|
'getBool',
|
|
'getInt',
|
|
'getString',
|
|
'getStringList',
|
|
'int',
|
|
'intValue',
|
|
'raw',
|
|
'string',
|
|
'stringList',
|
|
'stringListMap',
|
|
'stringMap',
|
|
'value',
|
|
];
|
|
|
|
$pattern = '/\$this->(' . implode('|', $helpers) . ')\(\s*[\'\"]([^\'\"]+)[\'\"]\s*,\s*(.*?)\)/s';
|
|
preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE);
|
|
|
|
$accessors = [];
|
|
foreach ($matches[0] as $index => [$fullMatch, $offset]) {
|
|
$accessors[] = [
|
|
'helper' => (string) $matches[1][$index][0],
|
|
'key' => (string) $matches[2][$index][0],
|
|
'line' => $this->lineNumber($content, (int) $offset),
|
|
'default_expression' => (string) $matches[3][$index][0],
|
|
];
|
|
}
|
|
|
|
return $accessors;
|
|
}
|
|
|
|
/**
|
|
* @return list<array{argument:string,line:int,default_expression:string}>
|
|
*/
|
|
private function findConstructorDefaults(string $content): array
|
|
{
|
|
if (!preg_match('/function\s+__construct\s*\((.*?)\)\s*\{/s', $content, $constructorMatch, PREG_OFFSET_CAPTURE)) {
|
|
return [];
|
|
}
|
|
|
|
$constructor = (string) $constructorMatch[1][0];
|
|
$baseOffset = (int) $constructorMatch[1][1];
|
|
preg_match_all('/\$([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([^,\)]+)/', $constructor, $matches, PREG_OFFSET_CAPTURE);
|
|
|
|
$defaults = [];
|
|
foreach ($matches[1] as $index => [$argument, $offset]) {
|
|
$argument = (string) $argument;
|
|
$default = trim((string) $matches[2][$index][0]);
|
|
|
|
if ($argument === 'config' || $default === 'null' || $default === '[]') {
|
|
continue;
|
|
}
|
|
|
|
$defaults[] = [
|
|
'argument' => $argument,
|
|
'line' => $this->lineNumber($content, $baseOffset + (int) $offset),
|
|
'default_expression' => $default,
|
|
];
|
|
}
|
|
|
|
return $defaults;
|
|
}
|
|
|
|
private function lineNumber(string $content, int $offset): int
|
|
{
|
|
return substr_count(substr($content, 0, $offset), "\n") + 1;
|
|
}
|
|
|
|
private function compactExpression(string $expression): string
|
|
{
|
|
$expression = trim(preg_replace('/\s+/', ' ', $expression) ?? $expression);
|
|
|
|
if (strlen($expression) <= 160) {
|
|
return $expression;
|
|
}
|
|
|
|
return substr($expression, 0, 157) . '...';
|
|
}
|
|
|
|
/**
|
|
* @param list<array<string, mixed>> $missingYamlFallbacks
|
|
* @param list<array<string, mixed>> $phpOnlyConstants
|
|
* @param list<array<string, mixed>> $constructorPhpDefaults
|
|
* @return list<string>
|
|
*/
|
|
private function buildWarnings(array $missingYamlFallbacks, array $phpOnlyConstants, array $constructorPhpDefaults): array
|
|
{
|
|
$warnings = [];
|
|
|
|
if ($missingYamlFallbacks !== []) {
|
|
$warnings[] = sprintf(
|
|
'%d config fallback accessor(s) still have no matching YAML path. These values must be migrated before PHP defaults can be removed.',
|
|
count($missingYamlFallbacks)
|
|
);
|
|
}
|
|
|
|
if ($phpOnlyConstants !== []) {
|
|
$warnings[] = sprintf(
|
|
'%d PHP-only config constant(s) were found. Review whether each one is technical guardrail or should move to YAML.',
|
|
count($phpOnlyConstants)
|
|
);
|
|
}
|
|
|
|
if ($constructorPhpDefaults !== []) {
|
|
$warnings[] = sprintf(
|
|
'%d constructor default(s) have no YAML/service-parameter mapping in the audit. Review before enabling strict YAML-only validation.',
|
|
count($constructorPhpDefaults)
|
|
);
|
|
}
|
|
|
|
return $warnings;
|
|
}
|
|
}
|