From a42f8d656dcc02bf0e4c1b6426220215eaff311e Mon Sep 17 00:00:00 2001 From: team 1 Date: Fri, 1 May 2026 19:29:01 +0200 Subject: [PATCH] patch 16 --- COMMAND_REF.md | 27 ++ ...RE_PATTERN_AUDIT_NOISE_REDUCTION_README.md | 35 ++ ...ATTERN_AUDIT_ALLOWED_LITERAL_FIX_README.md | 30 ++ ..._16_CORE_PATTERN_AUDIT_GUARDRAIL_README.md | 60 ++++ config/retriex/governance.yaml | 61 ++++ src/Command/ConfigPatternAuditCommand.php | 125 +++++++ src/Config/CorePatternAuditProvider.php | 323 ++++++++++++++++++ src/Config/GovernanceConfig.php | 98 ++++++ src/Config/RetriexEffectiveConfigProvider.php | 8 + 9 files changed, 767 insertions(+) create mode 100644 RETRIEX_PATCH_16B_CORE_PATTERN_AUDIT_NOISE_REDUCTION_README.md create mode 100644 RETRIEX_PATCH_16C_CORE_PATTERN_AUDIT_ALLOWED_LITERAL_FIX_README.md create mode 100644 RETRIEX_PATCH_16_CORE_PATTERN_AUDIT_GUARDRAIL_README.md create mode 100644 src/Command/ConfigPatternAuditCommand.php create mode 100644 src/Config/CorePatternAuditProvider.php diff --git a/COMMAND_REF.md b/COMMAND_REF.md index d10f336..2744d29 100644 --- a/COMMAND_REF.md +++ b/COMMAND_REF.md @@ -18,6 +18,7 @@ Diese Referenz ist gegen den realen Codebestand abgeglichen und ersetzt die vera | `mto:agent:system:rebuild` | System | Globaler Hard-Rebuild von Chunks, Vektorindex, Tags und optional Service-Reload | | `mto:agent:config:validate` | Config / Governance | Validiert die effektive RetrieX-Konfiguration | | `mto:agent:config:audit-source` | Config / Governance | Auditiert YAML-Mappings gegen PHP-Fallbacks und Defaults | +| `mto:agent:config:audit-patterns` | Config / Governance | Auditiert fachliche Pattern-, Token- und Signalreste im PHP-Core | | `mto:agent:config:dump-effective` | Config / Governance | Gibt die effektive Konfigurationsinventur aus | | `mto:agent:regression:test` | Config / Governance | Fuehrt Offline-Regression-Guards fuer stabile Pfade aus | | `mto:agent:vector:rebuild` | Vector | Baut den Chunk-Vektorindex aus `index.ndjson` neu | @@ -556,3 +557,29 @@ Der aktuelle Custom-CLI-Umfang des Systems besteht aus mindestens **18 projektsp - `mto:agent:vector:health` - `mto:agent:tags:rebuild` - `mto:agent:tag:health` + +### `mto:agent:config:audit-patterns` + +Audits remaining pattern-, token- and signal-sensitive calls in PHP core files for developer-policy review. + +**Signature** +```bash +bin/console mto:agent:config:audit-patterns [--details] [--json] [--all] +``` + +**Purpose** +- finds configured calls such as `preg_match`, `str_contains`, `in_array` and related pattern/token helpers in configured source roots +- raises WARN findings when configured domain marker terms are involved +- keeps the command non-blocking and review-oriented; it does not change runtime behavior and does not activate strict validation + +**Options** +- `--details` renders reviewable warning rows +- `--json` renders machine-readable audit output +- `--all` includes lower-priority REVIEW findings in addition to WARN findings + +**Expected use before merge** +```bash +bin/console mto:agent:config:audit-patterns --details +``` + +Review WARN findings and move new semantic terms, product names, intent words, commerce signals or retrieval/follow-up patterns to YAML-backed configuration when they are not purely technical. diff --git a/RETRIEX_PATCH_16B_CORE_PATTERN_AUDIT_NOISE_REDUCTION_README.md b/RETRIEX_PATCH_16B_CORE_PATTERN_AUDIT_NOISE_REDUCTION_README.md new file mode 100644 index 0000000..4b27a80 --- /dev/null +++ b/RETRIEX_PATCH_16B_CORE_PATTERN_AUDIT_NOISE_REDUCTION_README.md @@ -0,0 +1,35 @@ +# RetrieX Patch 16b - Core Pattern Audit Noise Reduction + +## Ziel + +Patch 16b reduziert False Positives im neuen `mto:agent:config:audit-patterns` Command. + +Der erste p16-Scanner bewertete die komplette PHP-Codezeile. Dadurch wurden Marker in Methodennamen oder Config-Getter-Namen wie `get...Shop...Pattern()` als harte fachliche Core-Patterns gewertet, obwohl die eigentlichen Werte bereits YAML-backed sind. + +## Änderungen + +- Domain-Marker werden nur noch in String-Literalen einer verdächtigen Call-Zeile gesucht. +- Treffer außerhalb der konfigurierten `warning_path_prefixes` werden nicht mehr als WARN eskaliert. +- Bekannte technische Markdown-Heading-Parser für `Produkt Titel` sind über `core_pattern_audit.allowed_literal_patterns` dokumentiert und allowlisted. +- `GovernanceConfig` validiert die neue Allowlist-Struktur. + +## Verhalten + +Der Command bleibt nicht-blockierend und verändert keine Runtime-Logik. + +Erwartung nach p16b: + +```bash +bin/console mto:agent:config:audit-patterns --details +``` + +soll keine WARNs mehr für reine Config-Getter-/Methodennamen melden. + +## Lokale Prüfung + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` diff --git a/RETRIEX_PATCH_16C_CORE_PATTERN_AUDIT_ALLOWED_LITERAL_FIX_README.md b/RETRIEX_PATCH_16C_CORE_PATTERN_AUDIT_ALLOWED_LITERAL_FIX_README.md new file mode 100644 index 0000000..d6ce2e6 --- /dev/null +++ b/RETRIEX_PATCH_16C_CORE_PATTERN_AUDIT_ALLOWED_LITERAL_FIX_README.md @@ -0,0 +1,30 @@ +# RetrieX Patch 16c - Core Pattern Audit Allowed Literal Fix + +## Purpose + +Patch 16c fixes the last two false-positive warnings from the warn-only core pattern audit. + +The remaining warnings were technical markdown-heading regex parsers for product-title metadata, not hardcoded domain decision logic. The p16b allowlist used a semantic whitespace regex and therefore did not match the escaped PHP regex literal as it appears in source code. + +## Changes + +- Updates `config/retriex/governance.yaml`. +- Adjusts the allowlist patterns for the technical `Produkt\s+Titel` markdown-heading parser so they match escaped PHP source literals. +- Adds no runtime behavior change. +- Adds no strict validation. +- Changes no retrieval, prompt, commerce, scoring, or ranking logic. + +## Expected result + +`bin/console mto:agent:config:audit-patterns --details` should return `OK` unless new domain-sensitive string literals were introduced elsewhere. + +## Validation + +Run: + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` diff --git a/RETRIEX_PATCH_16_CORE_PATTERN_AUDIT_GUARDRAIL_README.md b/RETRIEX_PATCH_16_CORE_PATTERN_AUDIT_GUARDRAIL_README.md new file mode 100644 index 0000000..ea8e440 --- /dev/null +++ b/RETRIEX_PATCH_16_CORE_PATTERN_AUDIT_GUARDRAIL_README.md @@ -0,0 +1,60 @@ +# RetrieX Patch 16 - Core Pattern Audit Guardrail + +## Ziel + +Patch 16 ergänzt einen nicht-blockierenden Audit-Command, der neue oder verbliebene fachliche Pattern-, Token- und Signal-Logik im PHP-Core sichtbar macht. + +Der Patch ändert keine Runtime-Entscheidungen, aktiviert keinen Strict-Modus und externalisiert keine weiteren fachlichen Werte. Er dient als Review-Guardrail nach der abgeschlossenen YAML-only-Migration und den Patches p13b, p14 und p15c. + +## Neuer Command + +```bash +bin/console mto:agent:config:audit-patterns --details +``` + +Optionen: + +```bash +bin/console mto:agent:config:audit-patterns --json +bin/console mto:agent:config:audit-patterns --details --all +``` + +## Konfiguration + +Die Audit-Regeln liegen in `config/retriex/governance.yaml` unter: + +```yaml +retriex.governance.config.core_pattern_audit +``` + +Konfigurierbar sind: + +- Source-Roots +- ausgeschlossene Pfade +- ausgeschlossene Pfad-Patterns +- Review-Pfadpräfixe fuer sensible Bereiche +- zu prüfende PHP-Calls +- Domain-Marker-Terme +- maximale Snippet-Länge + +## Verhalten + +- `WARN`: konfigurierte Domain-Marker sind beteiligt. +- `REVIEW`: technische oder nicht eindeutig fachliche Treffer; nur mit `--all` sichtbar. +- Der Command gibt auch bei WARN-Findings `SUCCESS` zurück, damit p16 keine bestehende Pipeline blockiert. + +## Erwartete lokale Prüfung nach Einspielen + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` + +## Bewusst nicht enthalten + +- keine Strict YAML Validation +- keine Runtime-Pattern-Änderung +- keine Scoring-/Retrieval-Änderung +- keine automatische Blockade von Deployments diff --git a/config/retriex/governance.yaml b/config/retriex/governance.yaml index 95c4cdf..9bf4ab7 100644 --- a/config/retriex/governance.yaml +++ b/config/retriex/governance.yaml @@ -63,3 +63,64 @@ parameters: - testomat - indikator - '0,02' + core_pattern_audit: + source_roots: + - src + excluded_path_prefixes: + - src/Config/CorePatternAuditProvider.php + - src/Command/ConfigPatternAuditCommand.php + - src/Entity/ + excluded_path_patterns: + - '~(^|/)vendor(/|$)~' + - '~(^|/)var(/|$)~' + - '~(^|/)node_modules(/|$)~' + warning_path_prefixes: + - src/Agent/ + - src/Commerce/ + - src/Intent/ + - src/Knowledge/Retrieval/ + suspicious_calls: + - preg_match + - preg_match_all + - preg_replace + - preg_split + - str_contains + - stripos + - strpos + - str_starts_with + - str_ends_with + - in_array + - array_intersect + - array_intersect_key + domain_marker_terms: + - testomat + - indikator + - indikatortyp + - grenzwert + - messbereich + - reagenz + - reagens + - shop + - produkt + - artikel + - kaufen + - bestellen + - geraet + - gerät + - messgerät + - messgeraet + - analysegerät + - analysegeraet + - analysator + - wasserhärte + - wasserhaerte + - chlor + - redox + allowed_literal_patterns: + - path: src/Knowledge/Retrieval/NdjsonChunkLookup.php + pattern: '/Produkt\\s\+Titel/iu' + reason: 'Technical markdown heading parser for product-title metadata.' + - path: src/Knowledge/Retrieval/NdjsonHybridRetriever.php + pattern: '/Produkt\\s\+Titel/iu' + reason: 'Technical markdown heading parser for product-title metadata.' + max_snippet_length: 180 diff --git a/src/Command/ConfigPatternAuditCommand.php b/src/Command/ConfigPatternAuditCommand.php new file mode 100644 index 0000000..05d2cbb --- /dev/null +++ b/src/Command/ConfigPatternAuditCommand.php @@ -0,0 +1,125 @@ +addOption('json', null, InputOption::VALUE_NONE, 'Render the full audit result as JSON.') + ->addOption('details', null, InputOption::VALUE_NONE, 'Render detailed warning rows in the console summary.') + ->addOption('all', null, InputOption::VALUE_NONE, 'Also include lower-priority REVIEW findings.'); + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + $result = $this->provider->audit((bool) $input->getOption('all')); + + if ((bool) $input->getOption('json')) { + $json = json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); + $output->writeln(is_string($json) ? $json : '{}'); + + return Command::SUCCESS; + } + + $this->renderSummary( + new SymfonyStyle($input, $output), + $result, + (bool) $input->getOption('details'), + (bool) $input->getOption('all') + ); + + return Command::SUCCESS; + } + + /** @param array $result */ + private function renderSummary(SymfonyStyle $io, array $result, bool $details, bool $includeAll): void + { + $io->title('RetrieX core pattern audit'); + + $summary = is_array($result['summary'] ?? null) ? $result['summary'] : []; + $io->definitionList( + ['status' => (string) ($result['status'] ?? 'UNKNOWN')], + ['source_files' => (string) ($summary['source_files'] ?? 0)], + ['scanned_files' => (string) ($summary['scanned_files'] ?? 0)], + ['skipped_files' => (string) ($summary['skipped_files'] ?? 0)], + ['warning_findings' => (string) ($summary['warning_findings'] ?? 0)], + ['review_findings' => (string) ($summary['review_findings'] ?? 0)], + ['total_reported_findings' => (string) ($summary['total_reported_findings'] ?? 0)] + ); + + $warnings = is_array($result['warnings'] ?? null) ? $result['warnings'] : []; + if ($warnings !== []) { + $io->section('Warnings'); + foreach ($warnings as $warning) { + $io->writeln('- ' . (string) $warning); + } + } + + if (!$details) { + $note = 'Use --details for warning rows or --json for the complete machine-readable audit.'; + if (!$includeAll) { + $note .= ' Add --all to include lower-priority REVIEW findings.'; + } + $io->note($note); + return; + } + + $this->renderFindingTable($io, 'Warning findings', $result['warning_findings'] ?? []); + + if ($includeAll) { + $this->renderFindingTable($io, 'Review findings', $result['review_findings'] ?? []); + } + } + + /** @param mixed $findings */ + private function renderFindingTable(SymfonyStyle $io, string $title, mixed $findings): void + { + if (!is_array($findings) || $findings === []) { + return; + } + + $rows = []; + foreach ($findings as $finding) { + if (!is_array($finding)) { + continue; + } + + $rows[] = [ + (string) ($finding['severity'] ?? ''), + (string) ($finding['path'] ?? ''), + (string) ($finding['line'] ?? ''), + implode(', ', is_array($finding['calls'] ?? null) ? $finding['calls'] : []), + implode(', ', is_array($finding['markers'] ?? null) ? $finding['markers'] : []), + (string) ($finding['snippet'] ?? ''), + ]; + } + + if ($rows === []) { + return; + } + + $io->section($title); + $io->table(['Severity', 'Path', 'Line', 'Calls', 'Markers', 'Snippet'], $rows); + } +} diff --git a/src/Config/CorePatternAuditProvider.php b/src/Config/CorePatternAuditProvider.php new file mode 100644 index 0000000..88873ed --- /dev/null +++ b/src/Config/CorePatternAuditProvider.php @@ -0,0 +1,323 @@ + + */ + public function audit(bool $includeReviewFindings = false): array + { + $sourceRoots = $this->governanceConfig->getCorePatternAuditSourceRoots(); + $excludedPathPrefixes = $this->governanceConfig->getCorePatternAuditExcludedPathPrefixes(); + $excludedPathPatterns = $this->governanceConfig->getCorePatternAuditExcludedPathPatterns(); + $warningPathPrefixes = $this->governanceConfig->getCorePatternAuditWarningPathPrefixes(); + $suspiciousCalls = $this->governanceConfig->getCorePatternAuditSuspiciousCalls(); + $domainMarkers = $this->governanceConfig->getCorePatternAuditDomainMarkerTerms(); + $allowedLiteralPatterns = $this->governanceConfig->getCorePatternAuditAllowedLiteralPatterns(); + $maxSnippetLength = $this->governanceConfig->getCorePatternAuditMaxSnippetLength(); + + $sourceFiles = $this->collectSourceFiles($sourceRoots); + $skippedFiles = []; + $warningFindings = []; + $reviewFindings = []; + + foreach ($sourceFiles as $relativePath => $absolutePath) { + if ($this->isExcludedPath($relativePath, $excludedPathPrefixes, $excludedPathPatterns)) { + $skippedFiles[] = $relativePath; + continue; + } + + $content = file_get_contents($absolutePath); + if (!is_string($content)) { + continue; + } + + $lines = preg_split('/\R/u', $content) ?: []; + foreach ($lines as $index => $line) { + $calls = $this->matchingCalls((string) $line, $suspiciousCalls); + if ($calls === []) { + continue; + } + + $markers = $this->matchingMarkersInStringLiterals((string) $line, $domainMarkers); + if ($markers !== [] && $this->isAllowedLiteralFinding($relativePath, (string) $line, $allowedLiteralPatterns)) { + continue; + } + + $severity = $markers !== [] && $this->isWarningPath($relativePath, $warningPathPrefixes) + ? 'WARN' + : 'REVIEW'; + $finding = [ + 'severity' => $severity, + 'path' => $relativePath, + 'line' => $index + 1, + 'calls' => $calls, + 'markers' => $markers, + 'snippet' => $this->compactSnippet((string) $line, $maxSnippetLength), + ]; + + if ($severity === 'WARN') { + $warningFindings[] = $finding; + } elseif ($includeReviewFindings) { + $reviewFindings[] = $finding; + } + } + } + + $status = $warningFindings === [] ? 'OK' : 'WARN'; + + return [ + 'status' => $status, + 'summary' => [ + 'source_files' => count($sourceFiles), + 'scanned_files' => count($sourceFiles) - count($skippedFiles), + 'skipped_files' => count($skippedFiles), + 'warning_findings' => count($warningFindings), + 'review_findings' => count($reviewFindings), + 'total_reported_findings' => count($warningFindings) + count($reviewFindings), + ], + 'warnings' => $this->buildWarnings($warningFindings), + 'warning_findings' => $warningFindings, + 'review_findings' => $reviewFindings, + 'skipped_files' => $skippedFiles, + ]; + } + + /** + * @param string[] $sourceRoots + * @return array + */ + private function collectSourceFiles(array $sourceRoots): array + { + $files = []; + + foreach ($sourceRoots as $sourceRoot) { + $sourceRoot = trim($sourceRoot, '/'); + if ($sourceRoot === '') { + continue; + } + + $absoluteRoot = $this->projectDir . '/' . $sourceRoot; + if (!is_dir($absoluteRoot)) { + continue; + } + + $iterator = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($absoluteRoot)); + foreach ($iterator as $file) { + if (!$file instanceof SplFileInfo || !$file->isFile()) { + continue; + } + + if ($file->getExtension() !== 'php') { + continue; + } + + $absolutePath = $file->getPathname(); + $relativePath = $this->relativePath($absolutePath); + $files[$relativePath] = $absolutePath; + } + } + + ksort($files); + + return $files; + } + + /** + * @param string[] $prefixes + * @param string[] $patterns + */ + private function isExcludedPath(string $relativePath, array $prefixes, array $patterns): bool + { + foreach ($prefixes as $prefix) { + $prefix = trim($prefix); + if ($prefix !== '' && str_starts_with($relativePath, $prefix)) { + return true; + } + } + + foreach ($patterns as $pattern) { + if (@preg_match($pattern, $relativePath) === 1) { + return true; + } + } + + return false; + } + + /** @param string[] $prefixes */ + private function isWarningPath(string $relativePath, array $prefixes): bool + { + foreach ($prefixes as $prefix) { + $prefix = trim($prefix); + if ($prefix !== '' && str_starts_with($relativePath, $prefix)) { + return true; + } + } + + return false; + } + + /** + * @param string[] $calls + * @return string[] + */ + private function matchingCalls(string $line, array $calls): array + { + $matches = []; + foreach ($calls as $call) { + $call = trim($call); + if ($call === '') { + continue; + } + + if (str_contains($line, $call . '(')) { + $matches[] = $call; + } + } + + return array_values(array_unique($matches)); + } + + /** + * @param string[] $markers + * @return string[] + */ + private function matchingMarkersInStringLiterals(string $line, array $markers): array + { + $literals = $this->extractStringLiterals($line); + if ($literals === []) { + return []; + } + + $normalizedLiterals = mb_strtolower(implode("\n", $literals), 'UTF-8'); + $matches = []; + + foreach ($markers as $marker) { + $marker = mb_strtolower(trim($marker), 'UTF-8'); + if ($marker === '') { + continue; + } + + if (str_contains($normalizedLiterals, $marker)) { + $matches[] = $marker; + } + } + + return array_values(array_unique($matches)); + } + + /** + * @return string[] + */ + private function extractStringLiterals(string $line): array + { + $literals = []; + $length = strlen($line); + + for ($i = 0; $i < $length; $i++) { + $quote = $line[$i]; + if ($quote !== "'" && $quote !== '"') { + continue; + } + + $buffer = ''; + for ($j = $i + 1; $j < $length; $j++) { + $char = $line[$j]; + if ($char === '\\') { + if ($j + 1 < $length) { + $buffer .= $line[$j + 1]; + $j++; + } + continue; + } + + if ($char === $quote) { + $literals[] = $buffer; + $i = $j; + break; + } + + $buffer .= $char; + } + } + + return $literals; + } + + /** + * @param array $allowedLiteralPatterns + */ + private function isAllowedLiteralFinding(string $relativePath, string $line, array $allowedLiteralPatterns): bool + { + foreach ($allowedLiteralPatterns as $allowed) { + $pathPrefix = trim($allowed['path']); + $pattern = trim($allowed['pattern']); + if ($pathPrefix === '' || $pattern === '') { + continue; + } + + if (!str_starts_with($relativePath, $pathPrefix)) { + continue; + } + + if (@preg_match($pattern, $line) === 1) { + return true; + } + } + + return false; + } + + private function relativePath(string $absolutePath): string + { + $projectDir = rtrim($this->projectDir, '/') . '/'; + if (str_starts_with($absolutePath, $projectDir)) { + return str_replace('\\', '/', substr($absolutePath, strlen($projectDir))); + } + + return str_replace('\\', '/', $absolutePath); + } + + private function compactSnippet(string $line, int $maxLength): string + { + $snippet = trim(preg_replace('/\s+/u', ' ', $line) ?? $line); + if ($maxLength < 20 || mb_strlen($snippet, 'UTF-8') <= $maxLength) { + return $snippet; + } + + return mb_substr($snippet, 0, $maxLength - 3, 'UTF-8') . '...'; + } + + /** + * @param array> $warningFindings + * @return string[] + */ + private function buildWarnings(array $warningFindings): array + { + if ($warningFindings === []) { + return []; + } + + return [ + sprintf( + 'Core pattern audit found %d warning finding(s). Review whether these domain-sensitive patterns belong in YAML-backed configuration.', + count($warningFindings) + ), + ]; + } +} diff --git a/src/Config/GovernanceConfig.php b/src/Config/GovernanceConfig.php index e97fbf8..7b7c6e8 100644 --- a/src/Config/GovernanceConfig.php +++ b/src/Config/GovernanceConfig.php @@ -132,6 +132,104 @@ final class GovernanceConfig return $this->requiredStringList('language.protected_stopword_terms'); } + /** @return string[] */ + public function getCorePatternAuditSourceRoots(): array + { + return $this->requiredStringList('core_pattern_audit.source_roots'); + } + + /** @return string[] */ + public function getCorePatternAuditExcludedPathPrefixes(): array + { + return $this->requiredStringList('core_pattern_audit.excluded_path_prefixes'); + } + + /** @return string[] */ + public function getCorePatternAuditExcludedPathPatterns(): array + { + return $this->requiredStringList('core_pattern_audit.excluded_path_patterns'); + } + + /** @return string[] */ + public function getCorePatternAuditWarningPathPrefixes(): array + { + return $this->requiredStringList('core_pattern_audit.warning_path_prefixes'); + } + + /** @return string[] */ + public function getCorePatternAuditSuspiciousCalls(): array + { + return $this->requiredStringList('core_pattern_audit.suspicious_calls'); + } + + /** @return string[] */ + public function getCorePatternAuditDomainMarkerTerms(): array + { + return $this->requiredStringList('core_pattern_audit.domain_marker_terms'); + } + + /** @return array */ + public function getCorePatternAuditAllowedLiteralPatterns(): array + { + $value = $this->requiredValue('core_pattern_audit.allowed_literal_patterns'); + if (!is_array($value)) { + throw $this->invalid('core_pattern_audit.allowed_literal_patterns', 'must be a list of maps'); + } + + $out = []; + foreach ($value as $index => $item) { + $path = 'core_pattern_audit.allowed_literal_patterns.' . (string) $index; + if (!is_array($item)) { + throw $this->invalid($path, 'must be a map'); + } + + $pathPrefix = isset($item['path']) && is_scalar($item['path']) ? trim((string) $item['path']) : ''; + $pattern = isset($item['pattern']) && is_scalar($item['pattern']) ? trim((string) $item['pattern']) : ''; + $reason = isset($item['reason']) && is_scalar($item['reason']) ? trim((string) $item['reason']) : ''; + + if ($pathPrefix === '') { + throw $this->invalid($path . '.path', 'must not be empty'); + } + if ($pattern === '') { + throw $this->invalid($path . '.pattern', 'must not be empty'); + } + if (@preg_match($pattern, '') === false) { + throw $this->invalid($path . '.pattern', 'must be a valid regex pattern'); + } + + $out[] = [ + 'path' => $pathPrefix, + 'pattern' => $pattern, + 'reason' => $reason, + ]; + } + + return $out; + } + + public function getCorePatternAuditMaxSnippetLength(): int + { + return $this->requiredInt('core_pattern_audit.max_snippet_length', 20); + } + + private function requiredInt(string $path, int $min = PHP_INT_MIN): int + { + $value = $this->requiredValue($path); + if (is_int($value)) { + $intValue = $value; + } elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) { + $intValue = (int) trim($value); + } else { + throw $this->invalid($path, 'must be an integer'); + } + + if ($intValue < $min) { + throw $this->invalid($path, sprintf('must be greater than or equal to %d', $min)); + } + + return $intValue; + } + private function requiredString(string $path): string { $value = $this->requiredValue($path); diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 8be6785..d3007ca 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -809,6 +809,14 @@ final readonly class RetriexEffectiveConfigProvider $this->governanceConfig->getRegressionShopQueryContextFallbackFilterTerms(); $this->governanceConfig->getVocabularyProtectedShortModelTokens(); $this->governanceConfig->getLanguageProtectedStopwordTerms(); + $this->governanceConfig->getCorePatternAuditSourceRoots(); + $this->governanceConfig->getCorePatternAuditExcludedPathPrefixes(); + $this->governanceConfig->getCorePatternAuditExcludedPathPatterns(); + $this->governanceConfig->getCorePatternAuditWarningPathPrefixes(); + $this->governanceConfig->getCorePatternAuditSuspiciousCalls(); + $this->governanceConfig->getCorePatternAuditDomainMarkerTerms(); + $this->governanceConfig->getCorePatternAuditAllowedLiteralPatterns(); + $this->governanceConfig->getCorePatternAuditMaxSnippetLength(); } catch (\InvalidArgumentException $e) { $errors[] = $e->getMessage(); }