This commit is contained in:
team 1
2026-05-07 08:24:16 +02:00
parent f47aa2c42a
commit 0523984274
6 changed files with 766 additions and 5 deletions

View File

@@ -43,7 +43,7 @@ final class ConfigSourceAuditCommand extends Command
$this->renderSummary(new SymfonyStyle($input, $output), $result, (bool) $input->getOption('details'));
return Command::SUCCESS;
return ($result['status'] ?? 'UNKNOWN') === 'ERROR' ? Command::FAILURE : Command::SUCCESS;
}
/**
@@ -65,9 +65,20 @@ final class ConfigSourceAuditCommand extends Command
['constructor_defaults' => (string) ($summary['constructor_defaults'] ?? 0)],
['constructor_defaults_without_yaml_mapping' => (string) ($summary['constructor_defaults_without_yaml_mapping'] ?? 0)],
['genre_value_paths_with_source_paths' => (string) ($summary['genre_value_paths_with_source_paths'] ?? 0)],
['genre_declared_source_paths' => (string) ($summary['genre_declared_source_paths'] ?? 0)]
['genre_declared_source_paths' => (string) ($summary['genre_declared_source_paths'] ?? 0)],
['genre_source_of_truth_violations' => (string) ($summary['genre_source_of_truth_violations'] ?? 0)],
['genre_source_of_truth_fallback_empty' => (string) ($summary['genre_source_of_truth_fallback_empty'] ?? 0)],
['genre_source_of_truth_frozen_non_empty' => (string) ($summary['genre_source_of_truth_frozen_non_empty'] ?? 0)]
);
$errors = is_array($result['errors'] ?? null) ? $result['errors'] : [];
if ($errors !== []) {
$io->section('Errors');
foreach ($errors as $error) {
$io->writeln('- ' . (string) $error);
}
}
$warnings = is_array($result['warnings'] ?? null) ? $result['warnings'] : [];
if ($warnings !== []) {
$io->section('Warnings');
@@ -120,5 +131,26 @@ final class ConfigSourceAuditCommand extends Command
$io->section('Single-genre configuration source paths');
$io->table(['Genre value path', 'Legacy/effective source path'], $genreSourceRows);
}
$sourceOfTruthRows = [];
$sourceOfTruth = is_array($result['genre_source_of_truth'] ?? null) ? $result['genre_source_of_truth'] : [];
foreach (($sourceOfTruth['source_path_rows'] ?? []) as $item) {
if (!is_array($item)) {
continue;
}
$sourceOfTruthRows[] = [
(string) ($item['genre_value_path'] ?? ''),
(string) ($item['source_path'] ?? ''),
(string) ($item['state'] ?? ''),
(string) ($item['hash'] ?? ''),
];
}
if ($sourceOfTruthRows !== []) {
$io->section('Genre source-of-truth guard');
$io->table(['Genre value path', 'Legacy/effective source path', 'State', 'Hash'], $sourceOfTruthRows);
}
}
}

View File

@@ -47,8 +47,10 @@ final readonly class ConfigSourceAuditProvider
],
];
public function __construct(private string $projectDir)
{
public function __construct(
private string $projectDir,
private GenreSourceOfTruthGuard $genreSourceOfTruthGuard,
) {
}
/**
@@ -58,6 +60,7 @@ final readonly class ConfigSourceAuditProvider
{
$yamlPaths = $this->collectYamlParameterPaths();
$genreSourcePaths = $this->collectGenreConfigurationSourcePaths();
$genreSourceOfTruth = $this->genreSourceOfTruthGuard->auditFromFiles();
$fallbackAccessors = [];
$constructorDefaults = [];
$phpConstants = [];
@@ -124,6 +127,9 @@ final readonly class ConfigSourceAuditProvider
));
$status = ($missingYamlFallbacks === [] && $phpOnlyConstants === [] && $constructorPhpDefaults === []) ? 'OK' : 'WARN';
if (($genreSourceOfTruth['status'] ?? 'OK') === 'ERROR') {
$status = 'ERROR';
}
return [
'status' => $status,
@@ -138,12 +144,20 @@ final readonly class ConfigSourceAuditProvider
'constructor_defaults_without_yaml_mapping' => count($constructorPhpDefaults),
'genre_value_paths_with_source_paths' => count($genreSourcePaths),
'genre_declared_source_paths' => $this->countGenreDeclaredSourcePaths($genreSourcePaths),
'genre_source_of_truth_violations' => (int) (($genreSourceOfTruth['summary']['violations'] ?? 0)),
'genre_source_of_truth_fallback_empty' => (int) (($genreSourceOfTruth['summary']['legacy_fallback_empty'] ?? 0)),
'genre_source_of_truth_frozen_non_empty' => (int) (($genreSourceOfTruth['summary']['legacy_frozen_non_empty'] ?? 0)),
],
'warnings' => $this->buildWarnings($missingYamlFallbacks, $phpOnlyConstants, $constructorPhpDefaults),
'errors' => $genreSourceOfTruth['errors'] ?? [],
'warnings' => array_merge(
$this->buildWarnings($missingYamlFallbacks, $phpOnlyConstants, $constructorPhpDefaults),
$genreSourceOfTruth['warnings'] ?? []
),
'fallback_accessors' => $fallbackAccessors,
'constructor_defaults' => $constructorDefaults,
'php_constants' => $phpConstants,
'genre_configuration_source_paths' => $genreSourcePaths,
'genre_source_of_truth' => $genreSourceOfTruth,
];
}

View File

@@ -0,0 +1,525 @@
<?php
declare(strict_types=1);
namespace App\Config;
use Symfony\Component\Yaml\Yaml;
final readonly class GenreSourceOfTruthGuard
{
public function __construct(private string $projectDir)
{
}
/**
* @param array<string, mixed> $genre
* @param array<string, mixed> $effectiveConfig
* @return array{status:string, errors:list<string>, warnings:list<string>, summary:array<string,int>, source_path_rows:list<array<string,string>>}
*/
public function validate(array $genre, array $effectiveConfig): array
{
$rawConfig = $this->loadRawConfig();
$rawGenre = is_array($rawConfig['genre'] ?? null) ? $rawConfig['genre'] : $genre;
$rawGovernance = is_array($rawConfig['governance'] ?? null) ? $rawConfig['governance'] : [];
$guardConfig = $rawGovernance['genre_source_of_truth'] ?? null;
if (!is_array($guardConfig) && isset($effectiveConfig['governance']) && is_array($effectiveConfig['governance'])) {
$guardConfig = $effectiveConfig['governance']['genre_source_of_truth'] ?? [];
}
return $this->auditConfig($rawGenre, $rawConfig, is_array($guardConfig) ? $guardConfig : []);
}
/**
* @return array{status:string, errors:list<string>, warnings:list<string>, summary:array<string,int>, source_path_rows:list<array<string,string>>}
*/
public function auditFromFiles(): array
{
$config = $this->loadRawConfig();
$genre = $config['genre'] ?? [];
$governance = $config['governance'] ?? [];
$guardConfig = is_array($governance) ? ($governance['genre_source_of_truth'] ?? []) : [];
return $this->auditConfig(is_array($genre) ? $genre : [], $config, is_array($guardConfig) ? $guardConfig : []);
}
/**
* @param array<string, mixed> $genre
* @param array<string, mixed> $config
* @param array<string, mixed> $guardConfig
* @return array{status:string, errors:list<string>, warnings:list<string>, summary:array<string,int>, source_path_rows:list<array<string,string>>}
*/
private function auditConfig(array $genre, array $config, array $guardConfig): array
{
$enabled = $guardConfig['enabled'] ?? true;
if ($enabled === false || $enabled === 'false' || $enabled === 0 || $enabled === '0') {
return [
'status' => 'DISABLED',
'errors' => [],
'warnings' => ['genre source-of-truth guard is disabled.'],
'summary' => $this->emptySummary(),
'source_path_rows' => [],
];
}
$errors = [];
$warnings = [];
$rows = [];
$configurationValues = $genre['configuration_values'] ?? null;
if (!is_array($configurationValues) || $configurationValues === []) {
$errors[] = 'genre.configuration_values must be a non-empty map for source-of-truth enforcement.';
return [
'status' => 'ERROR',
'errors' => $errors,
'warnings' => $warnings,
'summary' => $this->emptySummary(),
'source_path_rows' => $rows,
];
}
$adaptationSurface = $genre['adaptation_surface'] ?? [];
if (!is_array($adaptationSurface) || $adaptationSurface === []) {
$errors[] = 'genre.adaptation_surface must be a non-empty map for source-of-truth enforcement.';
} else {
foreach ($adaptationSurface as $group => $definition) {
if (!is_string($group) || trim($group) === '') {
continue;
}
if (!array_key_exists($group, $configurationValues)) {
$errors[] = sprintf('genre.configuration_values is missing required source-of-truth group for adaptation_surface.%s.', $group);
}
}
}
$coverageErrors = $this->validateConfigurationValueCoverage($configurationValues);
array_push($errors, ...$coverageErrors);
$declaredSourcePaths = $this->collectSourcePaths($configurationValues);
$uniqueSourcePaths = [];
foreach ($declaredSourcePaths as $valuePath => $sourcePaths) {
foreach ($sourcePaths as $sourcePath) {
$uniqueSourcePaths[$sourcePath] = true;
$resolved = $this->valueAtPath($config, $sourcePath);
if (!$resolved['found']) {
$errors[] = sprintf('genre.configuration_values.%s references unknown source path: %s.', $valuePath, $sourcePath);
$rows[] = $this->row($valuePath, $sourcePath, 'missing', '');
continue;
}
$value = $resolved['value'];
if (!$this->hasNonEmptyValue($value)) {
$rows[] = $this->row($valuePath, $sourcePath, 'legacy_fallback_empty', '');
continue;
}
if ($this->isRuntimeResolvedSourcePath($guardConfig, $sourcePath)) {
$rows[] = $this->row($valuePath, $sourcePath, 'legacy_runtime_resolved_allowed', '');
continue;
}
$hash = $this->hashValue($value);
$expectedHash = $this->expectedFrozenHash($guardConfig, $sourcePath);
if ($expectedHash === null) {
$errors[] = sprintf(
'Legacy source path %s is non-empty but is not declared as a frozen fallback. Move the value to genre.yaml or add an explicit frozen fallback hash.',
$sourcePath
);
$rows[] = $this->row($valuePath, $sourcePath, 'legacy_non_empty_unregistered', $hash);
continue;
}
if (!hash_equals($expectedHash, $hash)) {
$errors[] = sprintf(
'Legacy source path %s changed outside genre.yaml. Expected frozen hash %s, got %s.',
$sourcePath,
$expectedHash,
$hash
);
$rows[] = $this->row($valuePath, $sourcePath, 'legacy_frozen_hash_mismatch', $hash);
continue;
}
$rows[] = $this->row($valuePath, $sourcePath, 'legacy_frozen_non_empty', $hash);
}
}
foreach ($this->frozenHashes($guardConfig) as $sourcePath => $hash) {
if (!isset($uniqueSourcePaths[$sourcePath])) {
$errors[] = sprintf('governance.genre_source_of_truth.frozen_non_empty_legacy_source_hashes contains undeclared source path: %s.', $sourcePath);
}
if (!is_string($hash) || preg_match('/^[a-f0-9]{64}$/', $hash) !== 1) {
$errors[] = sprintf('governance.genre_source_of_truth.frozen_non_empty_legacy_source_hashes.%s must be a SHA-256 hex hash.', $sourcePath);
}
}
$summary = $this->summarizeRows($rows);
$summary['configuration_value_groups'] = count($configurationValues);
$summary['source_path_value_nodes'] = count($declaredSourcePaths);
$summary['declared_source_paths'] = count($uniqueSourcePaths);
$summary['violations'] = count($errors);
return [
'status' => $errors === [] ? 'OK' : 'ERROR',
'errors' => $errors,
'warnings' => $warnings,
'summary' => $summary,
'source_path_rows' => $rows,
];
}
/**
* @param array<string, mixed> $configurationValues
* @return list<string>
*/
private function validateConfigurationValueCoverage(array $configurationValues): array
{
$errors = [];
$this->validateCoverageRecursive($configurationValues, '', false, $errors);
return $errors;
}
/**
* @param array<int|string, mixed> $value
* @param list<string> $errors
*/
private function validateCoverageRecursive(array $value, string $path, bool $coveredBySourcePath, array &$errors): void
{
$sourcePaths = $value['source_paths'] ?? null;
$hasSourcePaths = is_array($sourcePaths) && $sourcePaths !== [];
if (array_key_exists('source_paths', $value) && !$hasSourcePaths && $path !== '') {
$errors[] = sprintf('genre.configuration_values.%s.source_paths must be a non-empty list when declared.', $path);
}
if ($hasSourcePaths) {
$seen = [];
foreach ($sourcePaths as $sourcePath) {
if (!is_string($sourcePath) || trim($sourcePath) === '') {
$errors[] = sprintf('genre.configuration_values.%s.source_paths must contain only non-empty strings.', $path);
continue;
}
$sourcePath = trim($sourcePath);
if (isset($seen[$sourcePath])) {
$errors[] = sprintf('genre.configuration_values.%s.source_paths contains duplicate source path: %s.', $path, $sourcePath);
}
$seen[$sourcePath] = true;
}
}
$covered = $coveredBySourcePath || $hasSourcePaths;
if ($path !== '' && !$covered && $this->hasDirectPayload($value)) {
$errors[] = sprintf('genre.configuration_values.%s must declare source_paths or inherit them from a parent value node.', $path);
}
foreach ($value as $key => $child) {
if ($key === 'source_paths' || $key === 'description' || !is_string($key) || !is_array($child)) {
continue;
}
$childPath = $path === '' ? $key : $path . '.' . $key;
$this->validateCoverageRecursive($child, $childPath, $covered, $errors);
}
}
/**
* @param array<int|string, mixed> $value
*/
private function hasDirectPayload(array $value): bool
{
foreach ($value as $key => $child) {
if ($key === 'source_paths' || $key === 'description') {
continue;
}
if (!is_array($child)) {
return true;
}
}
return false;
}
/**
* @param array<string, mixed> $configurationValues
* @return array<string, string[]>
*/
private function collectSourcePaths(array $configurationValues): array
{
$out = [];
$this->collectSourcePathsRecursive($configurationValues, '', $out);
return $out;
}
/**
* @param array<int|string, mixed> $value
* @param array<string, string[]> $out
*/
private function collectSourcePathsRecursive(array $value, string $path, array &$out): void
{
$sourcePaths = $value['source_paths'] ?? null;
if (is_array($sourcePaths) && $path !== '') {
$clean = [];
foreach ($sourcePaths as $sourcePath) {
if (!is_string($sourcePath) || trim($sourcePath) === '') {
continue;
}
$sourcePath = trim($sourcePath);
if (!in_array($sourcePath, $clean, true)) {
$clean[] = $sourcePath;
}
}
if ($clean !== []) {
$out[$path] = $clean;
}
}
foreach ($value as $key => $child) {
if ($key === 'source_paths' || $key === 'description' || !is_string($key) || !is_array($child)) {
continue;
}
$childPath = $path === '' ? $key : $path . '.' . $key;
$this->collectSourcePathsRecursive($child, $childPath, $out);
}
}
/**
* @param array<string, mixed> $guardConfig
* @return array<string, string>
*/
private function frozenHashes(array $guardConfig): array
{
$value = $guardConfig['frozen_non_empty_legacy_source_hashes'] ?? [];
if (!is_array($value)) {
return [];
}
$out = [];
foreach ($value as $path => $hash) {
if (!is_string($path) || !is_string($hash)) {
continue;
}
$path = trim($path);
$hash = strtolower(trim($hash));
if ($path !== '') {
$out[$path] = $hash;
}
}
return $out;
}
/** @param array<string, mixed> $guardConfig */
private function expectedFrozenHash(array $guardConfig, string $sourcePath): ?string
{
$hashes = $this->frozenHashes($guardConfig);
return $hashes[$sourcePath] ?? null;
}
/** @param array<string, mixed> $guardConfig */
private function isRuntimeResolvedSourcePath(array $guardConfig, string $sourcePath): bool
{
$paths = $guardConfig['runtime_resolved_source_paths'] ?? [];
if (!is_array($paths)) {
return false;
}
foreach ($paths as $path) {
if (is_string($path) && trim($path) === $sourcePath) {
return true;
}
}
return false;
}
/**
* @return array{found:bool, value:mixed}
*/
private function valueAtPath(array $config, string $path): array
{
$current = $config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return ['found' => false, 'value' => null];
}
$current = $current[$segment];
}
return ['found' => true, 'value' => $current];
}
private function hasNonEmptyValue(mixed $value): bool
{
if ($value === null) {
return false;
}
if (is_string($value)) {
return trim($value) !== '';
}
if (is_scalar($value)) {
return true;
}
if (is_array($value)) {
foreach ($value as $child) {
if ($this->hasNonEmptyValue($child)) {
return true;
}
}
}
return false;
}
private function hashValue(mixed $value): string
{
$normalized = $this->normalizeForHash($value);
$json = json_encode($normalized, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
if (!is_string($json)) {
$json = 'null';
}
return hash('sha256', $json);
}
private function normalizeForHash(mixed $value): mixed
{
if (!is_array($value)) {
return $value;
}
if (array_is_list($value)) {
return array_map(fn (mixed $item): mixed => $this->normalizeForHash($item), $value);
}
$normalized = [];
$keys = array_keys($value);
sort($keys, SORT_STRING);
foreach ($keys as $key) {
$normalized[(string) $key] = $this->normalizeForHash($value[$key]);
}
return $normalized;
}
/**
* @return array<string, mixed>
*/
private function loadRawConfig(): array
{
$parameters = [];
$files = glob($this->projectDir . '/config/retriex/*.yaml');
if (!is_array($files)) {
return [];
}
sort($files);
foreach ($files as $file) {
$parsed = Yaml::parseFile($file);
if (!is_array($parsed)) {
continue;
}
$fileParameters = $parsed['parameters'] ?? [];
if (is_array($fileParameters)) {
$parameters = array_replace_recursive($parameters, $fileParameters);
}
}
$config = [];
$parameterRoots = [
'retriex.agent.config' => 'agent',
'retriex.commerce_query.config' => 'commerce_query',
'retriex.governance.config' => 'governance',
'retriex.intent.commerce.config' => 'intent.commerce',
'retriex.intent.light.config' => 'intent.light',
'retriex.intent.sales.config' => 'intent.sales',
'retriex.intent.catalog.config' => 'intent.catalog',
'retriex.prompt.config' => 'prompt',
'retriex.query_enrichment.config' => 'query_enrichment',
'retriex.retrieval.config' => 'retrieval',
'retriex.search_repair.config' => 'search_repair',
'retriex.shop_matching.config' => 'shop_matching',
'retriex.stopwords.config' => 'language',
'retriex.vocabulary.config' => 'vocabulary',
'retriex.context.config' => 'context',
'retriex.genre.config' => 'genre',
];
foreach ($parameterRoots as $parameterName => $targetPath) {
if (!array_key_exists($parameterName, $parameters)) {
continue;
}
$this->setPath($config, $targetPath, $parameters[$parameterName]);
}
foreach ($parameters as $parameterName => $value) {
if (!is_string($parameterName) || !str_starts_with($parameterName, 'retriex.') || isset($parameterRoots[$parameterName])) {
continue;
}
$this->setPath($config, substr($parameterName, strlen('retriex.')), $value);
}
return $config;
}
/** @param array<string, mixed> $config */
private function setPath(array &$config, string $path, mixed $value): void
{
$current = &$config;
foreach (explode('.', $path) as $segment) {
if (!isset($current[$segment]) || !is_array($current[$segment])) {
$current[$segment] = [];
}
$current = &$current[$segment];
}
$current = $value;
}
/** @return array<string, int> */
private function emptySummary(): array
{
return [
'configuration_value_groups' => 0,
'source_path_value_nodes' => 0,
'declared_source_paths' => 0,
'legacy_fallback_empty' => 0,
'legacy_frozen_non_empty' => 0,
'legacy_non_empty_unregistered' => 0,
'legacy_frozen_hash_mismatch' => 0,
'legacy_runtime_resolved_allowed' => 0,
'missing' => 0,
'violations' => 0,
];
}
/**
* @param list<array<string, string>> $rows
* @return array<string, int>
*/
private function summarizeRows(array $rows): array
{
$summary = $this->emptySummary();
foreach ($rows as $row) {
$state = $row['state'] ?? '';
if ($state !== '') {
$summary[$state] = ($summary[$state] ?? 0) + 1;
}
}
return $summary;
}
/** @return array<string, string> */
private function row(string $valuePath, string $sourcePath, string $state, string $hash): array
{
return [
'genre_value_path' => $valuePath,
'source_path' => $sourcePath,
'state' => $state,
'hash' => $hash,
];
}
}

View File

@@ -29,6 +29,7 @@ final readonly class RetriexEffectiveConfigProvider
private QueryEnricherConfig $queryEnricherConfig,
private GovernanceConfig $governanceConfig,
private GenreConfig $genreConfig,
private GenreSourceOfTruthGuard $genreSourceOfTruthGuard,
private CatalogIntentConfig $catalogIntentConfig,
private ContextServiceConfig $contextServiceConfig,
) {
@@ -76,6 +77,9 @@ final readonly class RetriexEffectiveConfigProvider
$config = $this->dump();
$this->validateGenre($config['genre'], $config, $errors, $warnings);
$sourceOfTruth = $this->genreSourceOfTruthGuard->validate($config['genre'], $config);
array_push($errors, ...$sourceOfTruth['errors']);
array_push($warnings, ...$sourceOfTruth['warnings']);
$this->validateRuntime($config['runtime'], $errors, $warnings);
$this->validateIndex($config['index'], $errors, $warnings);
$this->validateModel($config['model_generation'], $errors, $warnings);