Files
MtoRagSystem/src/Config/GovernanceConfig.php
2026-05-07 07:52:52 +02:00

447 lines
16 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Config;
final class GovernanceConfig
{
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly array $config = [],
private readonly ?GenreConfig $genreConfig = null,
) {
}
/** @return array<string, mixed> */
public function toArray(): array
{
return $this->config;
}
/** @return string[] */
public function getRegressionProtectedShortModelTokens(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.protected_short_model_tokens')
?: $this->requiredStringList('regression_baseline.protected_short_model_tokens');
}
/** @return string[] */
public function getRegressionProtectedMeasurementValues(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.protected_measurement_values')
?: $this->requiredStringList('regression_baseline.protected_measurement_values');
}
/** @return string[] */
public function getRegressionProtectedTechnicalPromptKeywords(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.protected_technical_prompt_keywords')
?: $this->requiredStringList('regression_baseline.protected_technical_prompt_keywords');
}
/** @return string[] */
public function getRegressionTechnicalPriorityRequiredMarkers(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.technical_priority_required_markers')
?: $this->requiredStringList('regression_baseline.technical_priority_required_markers');
}
/** @return string[] */
public function getRegressionProtectedAccessoryPromptKeywords(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.protected_accessory_prompt_keywords')
?: $this->requiredStringList('regression_baseline.protected_accessory_prompt_keywords');
}
/** @return string[] */
public function getRegressionProtectedSearchRepairSpecificityTerms(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.protected_search_repair_specificity_terms')
?: $this->requiredStringList('regression_baseline.protected_search_repair_specificity_terms');
}
/** @return string[] */
public function getRegressionProtectedRetrievalReagentWords(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.protected_retrieval_reagent_words')
?: $this->requiredStringList('regression_baseline.protected_retrieval_reagent_words');
}
/** @return array<string, string[]> */
public function getRegressionProtectedRetrievalDeviceWordGroups(): array
{
$value = $this->genreArray('governance_and_regression.regression_baseline.protected_retrieval_device_word_groups')
?: $this->requiredValue('regression_baseline.protected_retrieval_device_word_groups');
if (!is_array($value)) {
throw $this->invalid('regression_baseline.protected_retrieval_device_word_groups', 'must be a map of string lists');
}
$out = [];
foreach ($value as $key => $item) {
if (is_string($key) && is_array($item)) {
$normalizedKey = trim($key);
$terms = $this->normalizeStringList($item);
if ($normalizedKey !== '' && $terms !== []) {
$out[$normalizedKey] = $terms;
}
continue;
}
// Backwards-compatible reader for the temporary p15/p15b list-of-groups shape.
if (is_array($item)) {
$groupKey = isset($item['key']) && is_scalar($item['key']) ? trim((string) $item['key']) : '';
$terms = $this->normalizeStringList($item['terms'] ?? []);
if ($groupKey !== '' && $terms !== []) {
$out[$groupKey] = $terms;
}
}
}
if ($out === []) {
throw $this->invalid('regression_baseline.protected_retrieval_device_word_groups', 'must contain at least one valid group');
}
return $out;
}
public function getRegressionShopPromptOriginalQuery(): string
{
return $this->genreString('governance_and_regression.regression_baseline.shop_prompt_regression_original_query')
?: $this->requiredString('regression_baseline.shop_prompt_regression_original_query');
}
/** @return string[] */
public function getRegressionShopPromptRequiredOutputInstructionMarkers(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.shop_prompt_required_output_instruction_markers')
?: $this->requiredStringList('regression_baseline.shop_prompt_required_output_instruction_markers');
}
/** @return string[] */
public function getRegressionShopQueryMetaGuardTerms(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.shop_query_meta_guard_terms')
?: $this->requiredStringList('regression_baseline.shop_query_meta_guard_terms');
}
/** @return string[] */
public function getRegressionShopQueryContextFallbackFilterTerms(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.shop_query_context_fallback_filter_terms')
?: $this->requiredStringList('regression_baseline.shop_query_context_fallback_filter_terms');
}
/** @return string[] */
public function getRegressionShopQueryCurrentInputPreservationTerms(): array
{
return $this->genreStringList('governance_and_regression.regression_baseline.shop_query_current_input_preservation_terms')
?: $this->requiredStringList('regression_baseline.shop_query_current_input_preservation_terms');
}
/** @return string[] */
public function getVocabularyProtectedShortModelTokens(): array
{
return $this->optionalStringList(
'vocabulary.protected_short_model_tokens',
$this->getRegressionProtectedShortModelTokens()
);
}
/** @return string[] */
public function getLanguageProtectedStopwordTerms(): array
{
return $this->genreStringList('retrieval_and_language.protected_terms.terms')
?: $this->requiredStringList('language.protected_stopword_terms');
}
/** @return string[] */
public function getLanguageRequiredCleanupProfiles(): array
{
return $this->requiredStringList('language.required_cleanup_profiles');
}
/** @return array<string, array{stopwords:string[], phrases:string[], meta_terms:string[]}> */
public function getLanguageRequiredProfileTerms(): array
{
$value = $this->requiredValue('language.required_profile_terms');
if (!is_array($value)) {
throw $this->invalid('language.required_profile_terms', 'must be a map of cleanup profile term lists');
}
$defaults = $this->languageRequiredProfileTermDefaults();
$out = [];
foreach ($value as $profileName => $profileTerms) {
if (!is_string($profileName) || trim($profileName) === '' || !is_array($profileTerms)) {
throw $this->invalid('language.required_profile_terms', 'must be keyed by non-empty cleanup profile names');
}
$normalizedProfileName = trim($profileName);
$out[$normalizedProfileName] = [
'stopwords' => $this->normalizeStringList($profileTerms['stopwords'] ?? $defaults['stopwords']),
'phrases' => $this->normalizeStringList($profileTerms['phrases'] ?? $defaults['phrases']),
'meta_terms' => $this->normalizeStringList($profileTerms['meta_terms'] ?? $defaults['meta_terms']),
];
if ($out[$normalizedProfileName]['stopwords'] === []
&& $out[$normalizedProfileName]['phrases'] === []
&& $out[$normalizedProfileName]['meta_terms'] === []
) {
throw $this->invalid('language.required_profile_terms.' . $normalizedProfileName, 'must contain at least one required term');
}
}
if ($out === []) {
throw $this->invalid('language.required_profile_terms', 'must contain at least one cleanup profile');
}
return $out;
}
/** @return array{stopwords:string[], phrases:string[], meta_terms:string[]} */
private function languageRequiredProfileTermDefaults(): array
{
$value = $this->optionalValue('language.required_profile_term_defaults');
if ($value === null) {
return [
'stopwords' => [],
'phrases' => [],
'meta_terms' => [],
];
}
if (!is_array($value)) {
throw $this->invalid('language.required_profile_term_defaults', 'must be a map of cleanup profile term lists');
}
return [
'stopwords' => $this->normalizeStringList($value['stopwords'] ?? []),
'phrases' => $this->normalizeStringList($value['phrases'] ?? []),
'meta_terms' => $this->normalizeStringList($value['meta_terms'] ?? []),
];
}
/** @return string[] */
public function getCorePatternAuditSourceRoots(): array
{
return $this->requiredStringList('core_pattern_audit.source_roots');
}
/** @return string[] */
public function getCorePatternAuditExcludedPathPrefixes(): array
{
return $this->requiredStringList('core_pattern_audit.excluded_path_prefixes');
}
/** @return string[] */
public function getCorePatternAuditExcludedPathPatterns(): array
{
return $this->requiredStringList('core_pattern_audit.excluded_path_patterns');
}
/** @return string[] */
public function getCorePatternAuditWarningPathPrefixes(): array
{
return $this->requiredStringList('core_pattern_audit.warning_path_prefixes');
}
/** @return string[] */
public function getCorePatternAuditSuspiciousCalls(): array
{
return $this->requiredStringList('core_pattern_audit.suspicious_calls');
}
/** @return string[] */
public function getCorePatternAuditDomainMarkerTerms(): array
{
return $this->genreStringList('governance_and_regression.core_pattern_audit.domain_marker_terms')
?: $this->requiredStringList('core_pattern_audit.domain_marker_terms');
}
/** @return array<int, array{path:string, pattern:string, reason:string}> */
public function getCorePatternAuditAllowedLiteralPatterns(): array
{
$value = $this->requiredValue('core_pattern_audit.allowed_literal_patterns');
if (!is_array($value)) {
throw $this->invalid('core_pattern_audit.allowed_literal_patterns', 'must be a list of maps');
}
$out = [];
foreach ($value as $index => $item) {
$path = 'core_pattern_audit.allowed_literal_patterns.' . (string) $index;
if (!is_array($item)) {
throw $this->invalid($path, 'must be a map');
}
$pathPrefix = isset($item['path']) && is_scalar($item['path']) ? trim((string) $item['path']) : '';
$pattern = isset($item['pattern']) && is_scalar($item['pattern']) ? trim((string) $item['pattern']) : '';
$reason = isset($item['reason']) && is_scalar($item['reason']) ? trim((string) $item['reason']) : '';
if ($pathPrefix === '') {
throw $this->invalid($path . '.path', 'must not be empty');
}
if ($pattern === '') {
throw $this->invalid($path . '.pattern', 'must not be empty');
}
if (@preg_match($pattern, '') === false) {
throw $this->invalid($path . '.pattern', 'must be a valid regex pattern');
}
$out[] = [
'path' => $pathPrefix,
'pattern' => $pattern,
'reason' => $reason,
];
}
return $out;
}
public function getCorePatternAuditMaxSnippetLength(): int
{
return $this->requiredInt('core_pattern_audit.max_snippet_length', 20);
}
/** @return string[] */
private function genreStringList(string $path): array
{
return $this->genreConfig?->getValueStringList($path) ?? [];
}
private function genreString(string $path): string
{
return $this->genreConfig?->getValueString($path) ?? '';
}
/** @return array<int|string, mixed> */
private function genreArray(string $path): array
{
return $this->genreConfig?->getValueArray($path) ?? [];
}
private function requiredInt(string $path, int $min = PHP_INT_MIN): int
{
$value = $this->requiredValue($path);
if (is_int($value)) {
$intValue = $value;
} elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
$intValue = (int) trim($value);
} else {
throw $this->invalid($path, 'must be an integer');
}
if ($intValue < $min) {
throw $this->invalid($path, sprintf('must be greater than or equal to %d', $min));
}
return $intValue;
}
private function requiredString(string $path): string
{
$value = $this->requiredValue($path);
if (!is_scalar($value)) {
throw $this->invalid($path, 'must be a scalar string');
}
$value = trim((string) $value);
if ($value === '') {
throw $this->invalid($path, 'must not be empty');
}
return $value;
}
/** @return string[] */
private function requiredStringList(string $path): array
{
return $this->nonEmptyStringList($path, $this->requiredValue($path));
}
/** @return string[] */
private function optionalStringList(string $path, array $fallback = []): array
{
$value = $this->optionalValue($path);
if ($value === null) {
return $this->normalizeStringList($fallback);
}
$out = $this->normalizeStringList($value);
return $out !== [] ? $out : $this->normalizeStringList($fallback);
}
/** @return string[] */
private function nonEmptyStringList(string $path, mixed $value): array
{
if (!is_array($value)) {
throw $this->invalid($path, 'must be a string list');
}
$out = $this->normalizeStringList($value);
if ($out === []) {
throw $this->invalid($path, 'must contain at least one value');
}
return $out;
}
/** @return string[] */
private function normalizeStringList(mixed $value): array
{
if (!is_array($value)) {
return [];
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item !== '' && !in_array($item, $out, true)) {
$out[] = $item;
}
}
return $out;
}
private function requiredValue(string $path): mixed
{
$value = $this->optionalValue($path);
if ($value === null) {
throw $this->missing($path);
}
return $value;
}
private function optionalValue(string $path): mixed
{
$value = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($value) || !array_key_exists($segment, $value)) {
return null;
}
$value = $value[$segment];
}
return $value;
}
private function missing(string $path): \InvalidArgumentException
{
return new \InvalidArgumentException(sprintf('RetrieX governance config "%s" is missing.', $path));
}
private function invalid(string $path, string $reason): \InvalidArgumentException
{
return new \InvalidArgumentException(sprintf('RetrieX governance config "%s" %s.', $path, $reason));
}
}