$config */ public function __construct( private readonly array $config = [], private readonly ?GenreConfig $genreConfig = null, ) { } /** @return array */ public function toArray(): array { return $this->config; } /** @return string[] */ public function getRegressionProtectedShortModelTokens(): array { return $this->genreStringList('governance_and_regression.regression_baseline.protected_short_model_tokens') ?: $this->requiredStringList('regression_baseline.protected_short_model_tokens'); } /** @return string[] */ public function getRegressionProtectedMeasurementValues(): array { return $this->genreStringList('governance_and_regression.regression_baseline.protected_measurement_values') ?: $this->requiredStringList('regression_baseline.protected_measurement_values'); } /** @return string[] */ public function getRegressionProtectedTechnicalPromptKeywords(): array { return $this->genreStringList('governance_and_regression.regression_baseline.protected_technical_prompt_keywords') ?: $this->requiredStringList('regression_baseline.protected_technical_prompt_keywords'); } /** @return string[] */ public function getRegressionTechnicalPriorityRequiredMarkers(): array { return $this->genreStringList('governance_and_regression.regression_baseline.technical_priority_required_markers') ?: $this->requiredStringList('regression_baseline.technical_priority_required_markers'); } /** @return string[] */ public function getRegressionProtectedAccessoryPromptKeywords(): array { return $this->genreStringList('governance_and_regression.regression_baseline.protected_accessory_prompt_keywords') ?: $this->requiredStringList('regression_baseline.protected_accessory_prompt_keywords'); } /** @return string[] */ public function getRegressionProtectedSearchRepairSpecificityTerms(): array { return $this->genreStringList('governance_and_regression.regression_baseline.protected_search_repair_specificity_terms') ?: $this->requiredStringList('regression_baseline.protected_search_repair_specificity_terms'); } /** @return string[] */ public function getRegressionProtectedRetrievalReagentWords(): array { return $this->genreStringList('governance_and_regression.regression_baseline.protected_retrieval_reagent_words') ?: $this->requiredStringList('regression_baseline.protected_retrieval_reagent_words'); } /** @return array */ public function getRegressionProtectedRetrievalDeviceWordGroups(): array { $value = $this->genreArray('governance_and_regression.regression_baseline.protected_retrieval_device_word_groups') ?: $this->requiredValue('regression_baseline.protected_retrieval_device_word_groups'); if (!is_array($value)) { throw $this->invalid('regression_baseline.protected_retrieval_device_word_groups', 'must be a map of string lists'); } $out = []; foreach ($value as $key => $item) { if (is_string($key) && is_array($item)) { $normalizedKey = trim($key); $terms = $this->normalizeStringList($item); if ($normalizedKey !== '' && $terms !== []) { $out[$normalizedKey] = $terms; } continue; } // Backwards-compatible reader for the temporary p15/p15b list-of-groups shape. if (is_array($item)) { $groupKey = isset($item['key']) && is_scalar($item['key']) ? trim((string) $item['key']) : ''; $terms = $this->normalizeStringList($item['terms'] ?? []); if ($groupKey !== '' && $terms !== []) { $out[$groupKey] = $terms; } } } if ($out === []) { throw $this->invalid('regression_baseline.protected_retrieval_device_word_groups', 'must contain at least one valid group'); } return $out; } public function getRegressionShopPromptOriginalQuery(): string { return $this->genreString('governance_and_regression.regression_baseline.shop_prompt_regression_original_query') ?: $this->requiredString('regression_baseline.shop_prompt_regression_original_query'); } /** @return string[] */ public function getRegressionShopPromptRequiredOutputInstructionMarkers(): array { return $this->genreStringList('governance_and_regression.regression_baseline.shop_prompt_required_output_instruction_markers') ?: $this->requiredStringList('regression_baseline.shop_prompt_required_output_instruction_markers'); } /** @return string[] */ public function getRegressionShopQueryMetaGuardTerms(): array { return $this->genreStringList('governance_and_regression.regression_baseline.shop_query_meta_guard_terms') ?: $this->requiredStringList('regression_baseline.shop_query_meta_guard_terms'); } /** @return string[] */ public function getRegressionShopQueryContextFallbackFilterTerms(): array { return $this->genreStringList('governance_and_regression.regression_baseline.shop_query_context_fallback_filter_terms') ?: $this->requiredStringList('regression_baseline.shop_query_context_fallback_filter_terms'); } /** @return string[] */ public function getRegressionShopQueryCurrentInputPreservationTerms(): array { return $this->genreStringList('governance_and_regression.regression_baseline.shop_query_current_input_preservation_terms') ?: $this->requiredStringList('regression_baseline.shop_query_current_input_preservation_terms'); } /** @return string[] */ public function getVocabularyProtectedShortModelTokens(): array { return $this->optionalStringList( 'vocabulary.protected_short_model_tokens', $this->getRegressionProtectedShortModelTokens() ); } /** @return string[] */ public function getLanguageProtectedStopwordTerms(): array { return $this->genreStringList('retrieval_and_language.protected_terms.terms') ?: $this->requiredStringList('language.protected_stopword_terms'); } /** @return string[] */ public function getLanguageRequiredCleanupProfiles(): array { return $this->requiredStringList('language.required_cleanup_profiles'); } /** @return array */ public function getLanguageRequiredProfileTerms(): array { $value = $this->requiredValue('language.required_profile_terms'); if (!is_array($value)) { throw $this->invalid('language.required_profile_terms', 'must be a map of cleanup profile term lists'); } $defaults = $this->languageRequiredProfileTermDefaults(); $out = []; foreach ($value as $profileName => $profileTerms) { if (!is_string($profileName) || trim($profileName) === '' || !is_array($profileTerms)) { throw $this->invalid('language.required_profile_terms', 'must be keyed by non-empty cleanup profile names'); } $normalizedProfileName = trim($profileName); $out[$normalizedProfileName] = [ 'stopwords' => $this->normalizeStringList($profileTerms['stopwords'] ?? $defaults['stopwords']), 'phrases' => $this->normalizeStringList($profileTerms['phrases'] ?? $defaults['phrases']), 'meta_terms' => $this->normalizeStringList($profileTerms['meta_terms'] ?? $defaults['meta_terms']), ]; if ($out[$normalizedProfileName]['stopwords'] === [] && $out[$normalizedProfileName]['phrases'] === [] && $out[$normalizedProfileName]['meta_terms'] === [] ) { throw $this->invalid('language.required_profile_terms.' . $normalizedProfileName, 'must contain at least one required term'); } } if ($out === []) { throw $this->invalid('language.required_profile_terms', 'must contain at least one cleanup profile'); } return $out; } /** @return array{stopwords:string[], phrases:string[], meta_terms:string[]} */ private function languageRequiredProfileTermDefaults(): array { $value = $this->optionalValue('language.required_profile_term_defaults'); if ($value === null) { return [ 'stopwords' => [], 'phrases' => [], 'meta_terms' => [], ]; } if (!is_array($value)) { throw $this->invalid('language.required_profile_term_defaults', 'must be a map of cleanup profile term lists'); } return [ 'stopwords' => $this->normalizeStringList($value['stopwords'] ?? []), 'phrases' => $this->normalizeStringList($value['phrases'] ?? []), 'meta_terms' => $this->normalizeStringList($value['meta_terms'] ?? []), ]; } /** @return string[] */ public function getCorePatternAuditSourceRoots(): array { return $this->requiredStringList('core_pattern_audit.source_roots'); } /** @return string[] */ public function getCorePatternAuditExcludedPathPrefixes(): array { return $this->requiredStringList('core_pattern_audit.excluded_path_prefixes'); } /** @return string[] */ public function getCorePatternAuditExcludedPathPatterns(): array { return $this->requiredStringList('core_pattern_audit.excluded_path_patterns'); } /** @return string[] */ public function getCorePatternAuditWarningPathPrefixes(): array { return $this->requiredStringList('core_pattern_audit.warning_path_prefixes'); } /** @return string[] */ public function getCorePatternAuditSuspiciousCalls(): array { return $this->requiredStringList('core_pattern_audit.suspicious_calls'); } /** @return string[] */ public function getCorePatternAuditDomainMarkerTerms(): array { return $this->genreStringList('governance_and_regression.core_pattern_audit.domain_marker_terms') ?: $this->requiredStringList('core_pattern_audit.domain_marker_terms'); } /** @return array */ public function getCorePatternAuditAllowedLiteralPatterns(): array { $value = $this->requiredValue('core_pattern_audit.allowed_literal_patterns'); if (!is_array($value)) { throw $this->invalid('core_pattern_audit.allowed_literal_patterns', 'must be a list of maps'); } $out = []; foreach ($value as $index => $item) { $path = 'core_pattern_audit.allowed_literal_patterns.' . (string) $index; if (!is_array($item)) { throw $this->invalid($path, 'must be a map'); } $pathPrefix = isset($item['path']) && is_scalar($item['path']) ? trim((string) $item['path']) : ''; $pattern = isset($item['pattern']) && is_scalar($item['pattern']) ? trim((string) $item['pattern']) : ''; $reason = isset($item['reason']) && is_scalar($item['reason']) ? trim((string) $item['reason']) : ''; if ($pathPrefix === '') { throw $this->invalid($path . '.path', 'must not be empty'); } if ($pattern === '') { throw $this->invalid($path . '.pattern', 'must not be empty'); } if (@preg_match($pattern, '') === false) { throw $this->invalid($path . '.pattern', 'must be a valid regex pattern'); } $out[] = [ 'path' => $pathPrefix, 'pattern' => $pattern, 'reason' => $reason, ]; } return $out; } public function getCorePatternAuditMaxSnippetLength(): int { return $this->requiredInt('core_pattern_audit.max_snippet_length', 20); } /** @return string[] */ private function genreStringList(string $path): array { return $this->genreConfig?->getValueStringList($path) ?? []; } private function genreString(string $path): string { return $this->genreConfig?->getValueString($path) ?? ''; } /** @return array */ private function genreArray(string $path): array { return $this->genreConfig?->getValueArray($path) ?? []; } private function requiredInt(string $path, int $min = PHP_INT_MIN): int { $value = $this->requiredValue($path); if (is_int($value)) { $intValue = $value; } elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) { $intValue = (int) trim($value); } else { throw $this->invalid($path, 'must be an integer'); } if ($intValue < $min) { throw $this->invalid($path, sprintf('must be greater than or equal to %d', $min)); } return $intValue; } private function requiredString(string $path): string { $value = $this->requiredValue($path); if (!is_scalar($value)) { throw $this->invalid($path, 'must be a scalar string'); } $value = trim((string) $value); if ($value === '') { throw $this->invalid($path, 'must not be empty'); } return $value; } /** @return string[] */ private function requiredStringList(string $path): array { return $this->nonEmptyStringList($path, $this->requiredValue($path)); } /** @return string[] */ private function optionalStringList(string $path, array $fallback = []): array { $value = $this->optionalValue($path); if ($value === null) { return $this->normalizeStringList($fallback); } $out = $this->normalizeStringList($value); return $out !== [] ? $out : $this->normalizeStringList($fallback); } /** @return string[] */ private function nonEmptyStringList(string $path, mixed $value): array { if (!is_array($value)) { throw $this->invalid($path, 'must be a string list'); } $out = $this->normalizeStringList($value); if ($out === []) { throw $this->invalid($path, 'must contain at least one value'); } return $out; } /** @return string[] */ private function normalizeStringList(mixed $value): array { if (!is_array($value)) { return []; } $out = []; foreach ($value as $item) { if (!is_scalar($item)) { continue; } $item = trim((string) $item); if ($item !== '' && !in_array($item, $out, true)) { $out[] = $item; } } return $out; } private function requiredValue(string $path): mixed { $value = $this->optionalValue($path); if ($value === null) { throw $this->missing($path); } return $value; } private function optionalValue(string $path): mixed { $value = $this->config; foreach (explode('.', $path) as $segment) { if (!is_array($value) || !array_key_exists($segment, $value)) { return null; } $value = $value[$segment]; } return $value; } private function missing(string $path): \InvalidArgumentException { return new \InvalidArgumentException(sprintf('RetrieX governance config "%s" is missing.', $path)); } private function invalid(string $path, string $reason): \InvalidArgumentException { return new \InvalidArgumentException(sprintf('RetrieX governance config "%s" %s.', $path, $reason)); } }