413 lines
14 KiB
PHP
413 lines
14 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Config;
|
|
|
|
final class GovernanceConfig
|
|
{
|
|
/**
|
|
* @param array<string, mixed> $config
|
|
*/
|
|
public function __construct(private readonly array $config = [])
|
|
{
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
public function toArray(): array
|
|
{
|
|
return $this->config;
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionProtectedShortModelTokens(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.protected_short_model_tokens');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionProtectedMeasurementValues(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.protected_measurement_values');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionProtectedTechnicalPromptKeywords(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.protected_technical_prompt_keywords');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionTechnicalPriorityRequiredMarkers(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.technical_priority_required_markers');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionProtectedAccessoryPromptKeywords(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.protected_accessory_prompt_keywords');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionProtectedSearchRepairSpecificityTerms(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.protected_search_repair_specificity_terms');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionProtectedRetrievalReagentWords(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.protected_retrieval_reagent_words');
|
|
}
|
|
|
|
/** @return array<string, string[]> */
|
|
public function getRegressionProtectedRetrievalDeviceWordGroups(): array
|
|
{
|
|
$value = $this->requiredValue('regression_baseline.protected_retrieval_device_word_groups');
|
|
if (!is_array($value)) {
|
|
throw $this->invalid('regression_baseline.protected_retrieval_device_word_groups', 'must be a map of string lists');
|
|
}
|
|
|
|
$out = [];
|
|
foreach ($value as $key => $item) {
|
|
if (is_string($key) && is_array($item)) {
|
|
$normalizedKey = trim($key);
|
|
$terms = $this->normalizeStringList($item);
|
|
if ($normalizedKey !== '' && $terms !== []) {
|
|
$out[$normalizedKey] = $terms;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Backwards-compatible reader for the temporary p15/p15b list-of-groups shape.
|
|
if (is_array($item)) {
|
|
$groupKey = isset($item['key']) && is_scalar($item['key']) ? trim((string) $item['key']) : '';
|
|
$terms = $this->normalizeStringList($item['terms'] ?? []);
|
|
if ($groupKey !== '' && $terms !== []) {
|
|
$out[$groupKey] = $terms;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($out === []) {
|
|
throw $this->invalid('regression_baseline.protected_retrieval_device_word_groups', 'must contain at least one valid group');
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
public function getRegressionShopPromptOriginalQuery(): string
|
|
{
|
|
return $this->requiredString('regression_baseline.shop_prompt_regression_original_query');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionShopPromptRequiredOutputInstructionMarkers(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.shop_prompt_required_output_instruction_markers');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionShopQueryMetaGuardTerms(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.shop_query_meta_guard_terms');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionShopQueryContextFallbackFilterTerms(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.shop_query_context_fallback_filter_terms');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getRegressionShopQueryCurrentInputPreservationTerms(): array
|
|
{
|
|
return $this->requiredStringList('regression_baseline.shop_query_current_input_preservation_terms');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getVocabularyProtectedShortModelTokens(): array
|
|
{
|
|
return $this->optionalStringList(
|
|
'vocabulary.protected_short_model_tokens',
|
|
$this->getRegressionProtectedShortModelTokens()
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getLanguageProtectedStopwordTerms(): array
|
|
{
|
|
return $this->requiredStringList('language.protected_stopword_terms');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getLanguageRequiredCleanupProfiles(): array
|
|
{
|
|
return $this->requiredStringList('language.required_cleanup_profiles');
|
|
}
|
|
|
|
/** @return array<string, array{stopwords:string[], phrases:string[], meta_terms:string[]}> */
|
|
public function getLanguageRequiredProfileTerms(): array
|
|
{
|
|
$value = $this->requiredValue('language.required_profile_terms');
|
|
if (!is_array($value)) {
|
|
throw $this->invalid('language.required_profile_terms', 'must be a map of cleanup profile term lists');
|
|
}
|
|
|
|
$defaults = $this->languageRequiredProfileTermDefaults();
|
|
|
|
$out = [];
|
|
foreach ($value as $profileName => $profileTerms) {
|
|
if (!is_string($profileName) || trim($profileName) === '' || !is_array($profileTerms)) {
|
|
throw $this->invalid('language.required_profile_terms', 'must be keyed by non-empty cleanup profile names');
|
|
}
|
|
|
|
$normalizedProfileName = trim($profileName);
|
|
$out[$normalizedProfileName] = [
|
|
'stopwords' => $this->normalizeStringList($profileTerms['stopwords'] ?? $defaults['stopwords']),
|
|
'phrases' => $this->normalizeStringList($profileTerms['phrases'] ?? $defaults['phrases']),
|
|
'meta_terms' => $this->normalizeStringList($profileTerms['meta_terms'] ?? $defaults['meta_terms']),
|
|
];
|
|
|
|
if ($out[$normalizedProfileName]['stopwords'] === []
|
|
&& $out[$normalizedProfileName]['phrases'] === []
|
|
&& $out[$normalizedProfileName]['meta_terms'] === []
|
|
) {
|
|
throw $this->invalid('language.required_profile_terms.' . $normalizedProfileName, 'must contain at least one required term');
|
|
}
|
|
}
|
|
|
|
if ($out === []) {
|
|
throw $this->invalid('language.required_profile_terms', 'must contain at least one cleanup profile');
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
/** @return array{stopwords:string[], phrases:string[], meta_terms:string[]} */
|
|
private function languageRequiredProfileTermDefaults(): array
|
|
{
|
|
$value = $this->optionalValue('language.required_profile_term_defaults');
|
|
if ($value === null) {
|
|
return [
|
|
'stopwords' => [],
|
|
'phrases' => [],
|
|
'meta_terms' => [],
|
|
];
|
|
}
|
|
|
|
if (!is_array($value)) {
|
|
throw $this->invalid('language.required_profile_term_defaults', 'must be a map of cleanup profile term lists');
|
|
}
|
|
|
|
return [
|
|
'stopwords' => $this->normalizeStringList($value['stopwords'] ?? []),
|
|
'phrases' => $this->normalizeStringList($value['phrases'] ?? []),
|
|
'meta_terms' => $this->normalizeStringList($value['meta_terms'] ?? []),
|
|
];
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getCorePatternAuditSourceRoots(): array
|
|
{
|
|
return $this->requiredStringList('core_pattern_audit.source_roots');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getCorePatternAuditExcludedPathPrefixes(): array
|
|
{
|
|
return $this->requiredStringList('core_pattern_audit.excluded_path_prefixes');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getCorePatternAuditExcludedPathPatterns(): array
|
|
{
|
|
return $this->requiredStringList('core_pattern_audit.excluded_path_patterns');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getCorePatternAuditWarningPathPrefixes(): array
|
|
{
|
|
return $this->requiredStringList('core_pattern_audit.warning_path_prefixes');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getCorePatternAuditSuspiciousCalls(): array
|
|
{
|
|
return $this->requiredStringList('core_pattern_audit.suspicious_calls');
|
|
}
|
|
|
|
/** @return string[] */
|
|
public function getCorePatternAuditDomainMarkerTerms(): array
|
|
{
|
|
return $this->requiredStringList('core_pattern_audit.domain_marker_terms');
|
|
}
|
|
|
|
/** @return array<int, array{path:string, pattern:string, reason:string}> */
|
|
public function getCorePatternAuditAllowedLiteralPatterns(): array
|
|
{
|
|
$value = $this->requiredValue('core_pattern_audit.allowed_literal_patterns');
|
|
if (!is_array($value)) {
|
|
throw $this->invalid('core_pattern_audit.allowed_literal_patterns', 'must be a list of maps');
|
|
}
|
|
|
|
$out = [];
|
|
foreach ($value as $index => $item) {
|
|
$path = 'core_pattern_audit.allowed_literal_patterns.' . (string) $index;
|
|
if (!is_array($item)) {
|
|
throw $this->invalid($path, 'must be a map');
|
|
}
|
|
|
|
$pathPrefix = isset($item['path']) && is_scalar($item['path']) ? trim((string) $item['path']) : '';
|
|
$pattern = isset($item['pattern']) && is_scalar($item['pattern']) ? trim((string) $item['pattern']) : '';
|
|
$reason = isset($item['reason']) && is_scalar($item['reason']) ? trim((string) $item['reason']) : '';
|
|
|
|
if ($pathPrefix === '') {
|
|
throw $this->invalid($path . '.path', 'must not be empty');
|
|
}
|
|
if ($pattern === '') {
|
|
throw $this->invalid($path . '.pattern', 'must not be empty');
|
|
}
|
|
if (@preg_match($pattern, '') === false) {
|
|
throw $this->invalid($path . '.pattern', 'must be a valid regex pattern');
|
|
}
|
|
|
|
$out[] = [
|
|
'path' => $pathPrefix,
|
|
'pattern' => $pattern,
|
|
'reason' => $reason,
|
|
];
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
public function getCorePatternAuditMaxSnippetLength(): int
|
|
{
|
|
return $this->requiredInt('core_pattern_audit.max_snippet_length', 20);
|
|
}
|
|
|
|
private function requiredInt(string $path, int $min = PHP_INT_MIN): int
|
|
{
|
|
$value = $this->requiredValue($path);
|
|
if (is_int($value)) {
|
|
$intValue = $value;
|
|
} elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
|
|
$intValue = (int) trim($value);
|
|
} else {
|
|
throw $this->invalid($path, 'must be an integer');
|
|
}
|
|
|
|
if ($intValue < $min) {
|
|
throw $this->invalid($path, sprintf('must be greater than or equal to %d', $min));
|
|
}
|
|
|
|
return $intValue;
|
|
}
|
|
|
|
private function requiredString(string $path): string
|
|
{
|
|
$value = $this->requiredValue($path);
|
|
if (!is_scalar($value)) {
|
|
throw $this->invalid($path, 'must be a scalar string');
|
|
}
|
|
|
|
$value = trim((string) $value);
|
|
if ($value === '') {
|
|
throw $this->invalid($path, 'must not be empty');
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function requiredStringList(string $path): array
|
|
{
|
|
return $this->nonEmptyStringList($path, $this->requiredValue($path));
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function optionalStringList(string $path, array $fallback = []): array
|
|
{
|
|
$value = $this->optionalValue($path);
|
|
if ($value === null) {
|
|
return $this->normalizeStringList($fallback);
|
|
}
|
|
|
|
$out = $this->normalizeStringList($value);
|
|
return $out !== [] ? $out : $this->normalizeStringList($fallback);
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function nonEmptyStringList(string $path, mixed $value): array
|
|
{
|
|
if (!is_array($value)) {
|
|
throw $this->invalid($path, 'must be a string list');
|
|
}
|
|
|
|
$out = $this->normalizeStringList($value);
|
|
if ($out === []) {
|
|
throw $this->invalid($path, 'must contain at least one value');
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function normalizeStringList(mixed $value): array
|
|
{
|
|
if (!is_array($value)) {
|
|
return [];
|
|
}
|
|
|
|
$out = [];
|
|
foreach ($value as $item) {
|
|
if (!is_scalar($item)) {
|
|
continue;
|
|
}
|
|
|
|
$item = trim((string) $item);
|
|
if ($item !== '' && !in_array($item, $out, true)) {
|
|
$out[] = $item;
|
|
}
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
private function requiredValue(string $path): mixed
|
|
{
|
|
$value = $this->optionalValue($path);
|
|
if ($value === null) {
|
|
throw $this->missing($path);
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
private function optionalValue(string $path): mixed
|
|
{
|
|
$value = $this->config;
|
|
foreach (explode('.', $path) as $segment) {
|
|
if (!is_array($value) || !array_key_exists($segment, $value)) {
|
|
return null;
|
|
}
|
|
|
|
$value = $value[$segment];
|
|
}
|
|
|
|
return $value;
|
|
}
|
|
|
|
private function missing(string $path): \InvalidArgumentException
|
|
{
|
|
return new \InvalidArgumentException(sprintf('RetrieX governance config "%s" is missing.', $path));
|
|
}
|
|
|
|
private function invalid(string $path, string $reason): \InvalidArgumentException
|
|
{
|
|
return new \InvalidArgumentException(sprintf('RetrieX governance config "%s" %s.', $path, $reason));
|
|
}
|
|
}
|