This commit is contained in:
team 1
2026-05-01 19:29:01 +02:00
parent ad7cac72be
commit a42f8d656d
9 changed files with 767 additions and 0 deletions

View File

@@ -0,0 +1,323 @@
<?php
declare(strict_types=1);
namespace App\Config;
use RecursiveDirectoryIterator;
use RecursiveIteratorIterator;
use SplFileInfo;
final readonly class CorePatternAuditProvider
{
public function __construct(
private string $projectDir,
private GovernanceConfig $governanceConfig
) {
}
/**
* @return array<string, mixed>
*/
public function audit(bool $includeReviewFindings = false): array
{
$sourceRoots = $this->governanceConfig->getCorePatternAuditSourceRoots();
$excludedPathPrefixes = $this->governanceConfig->getCorePatternAuditExcludedPathPrefixes();
$excludedPathPatterns = $this->governanceConfig->getCorePatternAuditExcludedPathPatterns();
$warningPathPrefixes = $this->governanceConfig->getCorePatternAuditWarningPathPrefixes();
$suspiciousCalls = $this->governanceConfig->getCorePatternAuditSuspiciousCalls();
$domainMarkers = $this->governanceConfig->getCorePatternAuditDomainMarkerTerms();
$allowedLiteralPatterns = $this->governanceConfig->getCorePatternAuditAllowedLiteralPatterns();
$maxSnippetLength = $this->governanceConfig->getCorePatternAuditMaxSnippetLength();
$sourceFiles = $this->collectSourceFiles($sourceRoots);
$skippedFiles = [];
$warningFindings = [];
$reviewFindings = [];
foreach ($sourceFiles as $relativePath => $absolutePath) {
if ($this->isExcludedPath($relativePath, $excludedPathPrefixes, $excludedPathPatterns)) {
$skippedFiles[] = $relativePath;
continue;
}
$content = file_get_contents($absolutePath);
if (!is_string($content)) {
continue;
}
$lines = preg_split('/\R/u', $content) ?: [];
foreach ($lines as $index => $line) {
$calls = $this->matchingCalls((string) $line, $suspiciousCalls);
if ($calls === []) {
continue;
}
$markers = $this->matchingMarkersInStringLiterals((string) $line, $domainMarkers);
if ($markers !== [] && $this->isAllowedLiteralFinding($relativePath, (string) $line, $allowedLiteralPatterns)) {
continue;
}
$severity = $markers !== [] && $this->isWarningPath($relativePath, $warningPathPrefixes)
? 'WARN'
: 'REVIEW';
$finding = [
'severity' => $severity,
'path' => $relativePath,
'line' => $index + 1,
'calls' => $calls,
'markers' => $markers,
'snippet' => $this->compactSnippet((string) $line, $maxSnippetLength),
];
if ($severity === 'WARN') {
$warningFindings[] = $finding;
} elseif ($includeReviewFindings) {
$reviewFindings[] = $finding;
}
}
}
$status = $warningFindings === [] ? 'OK' : 'WARN';
return [
'status' => $status,
'summary' => [
'source_files' => count($sourceFiles),
'scanned_files' => count($sourceFiles) - count($skippedFiles),
'skipped_files' => count($skippedFiles),
'warning_findings' => count($warningFindings),
'review_findings' => count($reviewFindings),
'total_reported_findings' => count($warningFindings) + count($reviewFindings),
],
'warnings' => $this->buildWarnings($warningFindings),
'warning_findings' => $warningFindings,
'review_findings' => $reviewFindings,
'skipped_files' => $skippedFiles,
];
}
/**
* @param string[] $sourceRoots
* @return array<string, string>
*/
private function collectSourceFiles(array $sourceRoots): array
{
$files = [];
foreach ($sourceRoots as $sourceRoot) {
$sourceRoot = trim($sourceRoot, '/');
if ($sourceRoot === '') {
continue;
}
$absoluteRoot = $this->projectDir . '/' . $sourceRoot;
if (!is_dir($absoluteRoot)) {
continue;
}
$iterator = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($absoluteRoot));
foreach ($iterator as $file) {
if (!$file instanceof SplFileInfo || !$file->isFile()) {
continue;
}
if ($file->getExtension() !== 'php') {
continue;
}
$absolutePath = $file->getPathname();
$relativePath = $this->relativePath($absolutePath);
$files[$relativePath] = $absolutePath;
}
}
ksort($files);
return $files;
}
/**
* @param string[] $prefixes
* @param string[] $patterns
*/
private function isExcludedPath(string $relativePath, array $prefixes, array $patterns): bool
{
foreach ($prefixes as $prefix) {
$prefix = trim($prefix);
if ($prefix !== '' && str_starts_with($relativePath, $prefix)) {
return true;
}
}
foreach ($patterns as $pattern) {
if (@preg_match($pattern, $relativePath) === 1) {
return true;
}
}
return false;
}
/** @param string[] $prefixes */
private function isWarningPath(string $relativePath, array $prefixes): bool
{
foreach ($prefixes as $prefix) {
$prefix = trim($prefix);
if ($prefix !== '' && str_starts_with($relativePath, $prefix)) {
return true;
}
}
return false;
}
/**
* @param string[] $calls
* @return string[]
*/
private function matchingCalls(string $line, array $calls): array
{
$matches = [];
foreach ($calls as $call) {
$call = trim($call);
if ($call === '') {
continue;
}
if (str_contains($line, $call . '(')) {
$matches[] = $call;
}
}
return array_values(array_unique($matches));
}
/**
* @param string[] $markers
* @return string[]
*/
private function matchingMarkersInStringLiterals(string $line, array $markers): array
{
$literals = $this->extractStringLiterals($line);
if ($literals === []) {
return [];
}
$normalizedLiterals = mb_strtolower(implode("\n", $literals), 'UTF-8');
$matches = [];
foreach ($markers as $marker) {
$marker = mb_strtolower(trim($marker), 'UTF-8');
if ($marker === '') {
continue;
}
if (str_contains($normalizedLiterals, $marker)) {
$matches[] = $marker;
}
}
return array_values(array_unique($matches));
}
/**
* @return string[]
*/
private function extractStringLiterals(string $line): array
{
$literals = [];
$length = strlen($line);
for ($i = 0; $i < $length; $i++) {
$quote = $line[$i];
if ($quote !== "'" && $quote !== '"') {
continue;
}
$buffer = '';
for ($j = $i + 1; $j < $length; $j++) {
$char = $line[$j];
if ($char === '\\') {
if ($j + 1 < $length) {
$buffer .= $line[$j + 1];
$j++;
}
continue;
}
if ($char === $quote) {
$literals[] = $buffer;
$i = $j;
break;
}
$buffer .= $char;
}
}
return $literals;
}
/**
* @param array<int, array{path:string, pattern:string, reason:string}> $allowedLiteralPatterns
*/
private function isAllowedLiteralFinding(string $relativePath, string $line, array $allowedLiteralPatterns): bool
{
foreach ($allowedLiteralPatterns as $allowed) {
$pathPrefix = trim($allowed['path']);
$pattern = trim($allowed['pattern']);
if ($pathPrefix === '' || $pattern === '') {
continue;
}
if (!str_starts_with($relativePath, $pathPrefix)) {
continue;
}
if (@preg_match($pattern, $line) === 1) {
return true;
}
}
return false;
}
private function relativePath(string $absolutePath): string
{
$projectDir = rtrim($this->projectDir, '/') . '/';
if (str_starts_with($absolutePath, $projectDir)) {
return str_replace('\\', '/', substr($absolutePath, strlen($projectDir)));
}
return str_replace('\\', '/', $absolutePath);
}
private function compactSnippet(string $line, int $maxLength): string
{
$snippet = trim(preg_replace('/\s+/u', ' ', $line) ?? $line);
if ($maxLength < 20 || mb_strlen($snippet, 'UTF-8') <= $maxLength) {
return $snippet;
}
return mb_substr($snippet, 0, $maxLength - 3, 'UTF-8') . '...';
}
/**
* @param array<int, array<string, mixed>> $warningFindings
* @return string[]
*/
private function buildWarnings(array $warningFindings): array
{
if ($warningFindings === []) {
return [];
}
return [
sprintf(
'Core pattern audit found %d warning finding(s). Review whether these domain-sensitive patterns belong in YAML-backed configuration.',
count($warningFindings)
),
];
}
}

View File

@@ -132,6 +132,104 @@ final class GovernanceConfig
return $this->requiredStringList('language.protected_stopword_terms');
}
/** @return string[] */
public function getCorePatternAuditSourceRoots(): array
{
return $this->requiredStringList('core_pattern_audit.source_roots');
}
/** @return string[] */
public function getCorePatternAuditExcludedPathPrefixes(): array
{
return $this->requiredStringList('core_pattern_audit.excluded_path_prefixes');
}
/** @return string[] */
public function getCorePatternAuditExcludedPathPatterns(): array
{
return $this->requiredStringList('core_pattern_audit.excluded_path_patterns');
}
/** @return string[] */
public function getCorePatternAuditWarningPathPrefixes(): array
{
return $this->requiredStringList('core_pattern_audit.warning_path_prefixes');
}
/** @return string[] */
public function getCorePatternAuditSuspiciousCalls(): array
{
return $this->requiredStringList('core_pattern_audit.suspicious_calls');
}
/** @return string[] */
public function getCorePatternAuditDomainMarkerTerms(): array
{
return $this->requiredStringList('core_pattern_audit.domain_marker_terms');
}
/** @return array<int, array{path:string, pattern:string, reason:string}> */
public function getCorePatternAuditAllowedLiteralPatterns(): array
{
$value = $this->requiredValue('core_pattern_audit.allowed_literal_patterns');
if (!is_array($value)) {
throw $this->invalid('core_pattern_audit.allowed_literal_patterns', 'must be a list of maps');
}
$out = [];
foreach ($value as $index => $item) {
$path = 'core_pattern_audit.allowed_literal_patterns.' . (string) $index;
if (!is_array($item)) {
throw $this->invalid($path, 'must be a map');
}
$pathPrefix = isset($item['path']) && is_scalar($item['path']) ? trim((string) $item['path']) : '';
$pattern = isset($item['pattern']) && is_scalar($item['pattern']) ? trim((string) $item['pattern']) : '';
$reason = isset($item['reason']) && is_scalar($item['reason']) ? trim((string) $item['reason']) : '';
if ($pathPrefix === '') {
throw $this->invalid($path . '.path', 'must not be empty');
}
if ($pattern === '') {
throw $this->invalid($path . '.pattern', 'must not be empty');
}
if (@preg_match($pattern, '') === false) {
throw $this->invalid($path . '.pattern', 'must be a valid regex pattern');
}
$out[] = [
'path' => $pathPrefix,
'pattern' => $pattern,
'reason' => $reason,
];
}
return $out;
}
public function getCorePatternAuditMaxSnippetLength(): int
{
return $this->requiredInt('core_pattern_audit.max_snippet_length', 20);
}
private function requiredInt(string $path, int $min = PHP_INT_MIN): int
{
$value = $this->requiredValue($path);
if (is_int($value)) {
$intValue = $value;
} elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
$intValue = (int) trim($value);
} else {
throw $this->invalid($path, 'must be an integer');
}
if ($intValue < $min) {
throw $this->invalid($path, sprintf('must be greater than or equal to %d', $min));
}
return $intValue;
}
private function requiredString(string $path): string
{
$value = $this->requiredValue($path);

View File

@@ -809,6 +809,14 @@ final readonly class RetriexEffectiveConfigProvider
$this->governanceConfig->getRegressionShopQueryContextFallbackFilterTerms();
$this->governanceConfig->getVocabularyProtectedShortModelTokens();
$this->governanceConfig->getLanguageProtectedStopwordTerms();
$this->governanceConfig->getCorePatternAuditSourceRoots();
$this->governanceConfig->getCorePatternAuditExcludedPathPrefixes();
$this->governanceConfig->getCorePatternAuditExcludedPathPatterns();
$this->governanceConfig->getCorePatternAuditWarningPathPrefixes();
$this->governanceConfig->getCorePatternAuditSuspiciousCalls();
$this->governanceConfig->getCorePatternAuditDomainMarkerTerms();
$this->governanceConfig->getCorePatternAuditAllowedLiteralPatterns();
$this->governanceConfig->getCorePatternAuditMaxSnippetLength();
} catch (\InvalidArgumentException $e) {
$errors[] = $e->getMessage();
}