This commit is contained in:
team 1
2026-05-04 08:38:53 +02:00
parent c5e212f8f2
commit c00cb3a9b9
16 changed files with 482 additions and 88 deletions

View File

@@ -808,6 +808,17 @@ final class AgentRunnerConfig
return $this->getRequiredBool('shop_prompt.meta_query_guard.enabled');
}
public function getShopQueryContextFallbackCleanupProfile(): string
{
$value = $this->optionalValue('shop_prompt.meta_query_guard.cleanup_profile');
if (is_string($value) && trim($value) !== '') {
return trim($value);
}
return 'shop_context_fallback';
}
/**
* @return string[]
*/

View File

@@ -132,6 +132,48 @@ final class GovernanceConfig
return $this->requiredStringList('language.protected_stopword_terms');
}
/** @return string[] */
public function getLanguageRequiredCleanupProfiles(): array
{
return $this->requiredStringList('language.required_cleanup_profiles');
}
/** @return array<string, array{stopwords:string[], phrases:string[], meta_terms:string[]}> */
public function getLanguageRequiredProfileTerms(): array
{
$value = $this->requiredValue('language.required_profile_terms');
if (!is_array($value)) {
throw $this->invalid('language.required_profile_terms', 'must be a map of cleanup profile term lists');
}
$out = [];
foreach ($value as $profileName => $profileTerms) {
if (!is_string($profileName) || trim($profileName) === '' || !is_array($profileTerms)) {
throw $this->invalid('language.required_profile_terms', 'must be keyed by non-empty cleanup profile names');
}
$normalizedProfileName = trim($profileName);
$out[$normalizedProfileName] = [
'stopwords' => $this->normalizeStringList($profileTerms['stopwords'] ?? []),
'phrases' => $this->normalizeStringList($profileTerms['phrases'] ?? []),
'meta_terms' => $this->normalizeStringList($profileTerms['meta_terms'] ?? []),
];
if ($out[$normalizedProfileName]['stopwords'] === []
&& $out[$normalizedProfileName]['phrases'] === []
&& $out[$normalizedProfileName]['meta_terms'] === []
) {
throw $this->invalid('language.required_profile_terms.' . $normalizedProfileName, 'must contain at least one required term');
}
}
if ($out === []) {
throw $this->invalid('language.required_profile_terms', 'must contain at least one cleanup profile');
}
return $out;
}
/** @return string[] */
public function getCorePatternAuditSourceRoots(): array
{

View File

@@ -117,6 +117,77 @@ final readonly class RetriexEffectiveConfigProvider
$warnings[] = 'Config validation warning: ' . $warning;
}
try {
$cleanupProfileNames = $this->languageCleanupConfig->getCleanupProfileNames();
foreach ($this->governanceConfig->getLanguageRequiredCleanupProfiles() as $profileName) {
$key = 'language_cleanup_profile_' . $this->guardrailCheckKey($profileName);
$checks[$key] = in_array($profileName, $cleanupProfileNames, true);
if (!$checks[$key]) {
$errors[] = 'Missing required language cleanup profile: ' . $profileName . '.';
continue;
}
$this->languageCleanupConfig->getCleanupProfile($profileName);
}
$legacyStopwords = $this->stopWordsConfig->getStopWords();
foreach ($this->governanceConfig->getLanguageProtectedStopwordTerms() as $protectedTerm) {
$key = 'language_protected_term_' . $this->guardrailCheckKey($protectedTerm);
$checks[$key . '_registered'] = $this->languageCleanupConfig->isProtectedTerm($protectedTerm);
if (!$checks[$key . '_registered']) {
$errors[] = 'Missing protected language cleanup term: ' . $protectedTerm . '.';
}
$checks[$key . '_not_legacy_stopword'] = !in_array($protectedTerm, $legacyStopwords, true);
if (!$checks[$key . '_not_legacy_stopword']) {
$errors[] = 'Protected language cleanup term is still a legacy stopword: ' . $protectedTerm . '.';
}
foreach ($cleanupProfileNames as $profileName) {
$profile = $this->languageCleanupConfig->getCleanupProfile($profileName);
foreach (['stopwords', 'phrases', 'meta_terms'] as $bucket) {
$bucketKey = $key . '_not_in_' . $this->guardrailCheckKey($profileName . '_' . $bucket);
$checks[$bucketKey] = !in_array($protectedTerm, $profile[$bucket] ?? [], true);
if (!$checks[$bucketKey]) {
$errors[] = sprintf('Protected language cleanup term %s is present in %s.%s.', $protectedTerm, $profileName, $bucket);
}
}
}
}
foreach ($this->governanceConfig->getLanguageRequiredProfileTerms() as $profileName => $requiredTerms) {
$profile = $this->languageCleanupConfig->getCleanupProfile($profileName);
foreach ($requiredTerms as $bucket => $terms) {
foreach ($terms as $term) {
$key = 'language_cleanup_profile_' . $this->guardrailCheckKey($profileName . '_' . $bucket . '_' . $term);
$checks[$key] = in_array($term, $profile[$bucket] ?? [], true);
if (!$checks[$key]) {
$errors[] = sprintf('Missing language cleanup profile term: %s.%s must contain %s.', $profileName, $bucket, $term);
}
}
}
}
$checks['commerce_query_cleanup_profile_wired'] = $this->commerceQueryParserConfig->getCleanupProfile() === 'commerce_query';
if (!$checks['commerce_query_cleanup_profile_wired']) {
$errors[] = 'Commerce query parser is not wired to cleanup profile commerce_query.';
}
$checks['rag_evidence_cleanup_profile_wired'] = $this->agentRunnerConfig->getRagEvidenceCleanupProfile() === 'rag_evidence';
if (!$checks['rag_evidence_cleanup_profile_wired']) {
$errors[] = 'RAG evidence guard is not wired to cleanup profile rag_evidence.';
}
$checks['shop_context_fallback_cleanup_profile_wired'] = $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile() === 'shop_context_fallback';
if (!$checks['shop_context_fallback_cleanup_profile_wired']) {
$errors[] = 'Shop context fallback is not wired to cleanup profile shop_context_fallback.';
}
} catch (\InvalidArgumentException $e) {
$checks['language_cleanup_profile_config_valid'] = false;
$errors[] = 'Language cleanup profile guardrails failed: ' . $e->getMessage();
}
$importantShortModelTokens = $this->retrieverConfig->importantShortModelTokens();
foreach ($this->governanceConfig->getRegressionProtectedShortModelTokens() as $token) {
$key = 'important_short_model_token_' . $this->guardrailCheckKey($token);
@@ -543,6 +614,7 @@ final readonly class RetriexEffectiveConfigProvider
],
'meta_query_guard' => [
'enabled' => $this->agentRunnerConfig->isShopQueryMetaGuardEnabled(),
'cleanup_profile' => $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile(),
'context_fallback_use_full_history' => $this->agentRunnerConfig->shouldUseFullHistoryForShopQueryContextFallback(),
'meta_only_terms' => $this->agentRunnerConfig->getShopQueryMetaOnlyTerms(),
'context_fallback_enabled' => $this->agentRunnerConfig->isShopQueryContextFallbackEnabled(),
@@ -799,7 +871,17 @@ final readonly class RetriexEffectiveConfigProvider
private function languageConfig(): array
{
return ['stopwords' => $this->stopWordsConfig->getStopWords()];
$profiles = [];
foreach ($this->languageCleanupConfig->getCleanupProfileNames() as $profileName) {
$profiles[$profileName] = $this->languageCleanupConfig->getCleanupProfile($profileName);
}
return [
'stopwords' => $this->stopWordsConfig->getStopWords(),
'protected_terms' => $this->languageCleanupConfig->getProtectedTerms(),
'cleanup_profile_names' => $this->languageCleanupConfig->getCleanupProfileNames(),
'cleanup_profiles' => $profiles,
];
}
/** @return array<string, mixed> */
@@ -861,6 +943,8 @@ final readonly class RetriexEffectiveConfigProvider
$this->governanceConfig->getRegressionShopQueryContextFallbackFilterTerms();
$this->governanceConfig->getVocabularyProtectedShortModelTokens();
$this->governanceConfig->getLanguageProtectedStopwordTerms();
$this->governanceConfig->getLanguageRequiredCleanupProfiles();
$this->governanceConfig->getLanguageRequiredProfileTerms();
$this->governanceConfig->getCorePatternAuditSourceRoots();
$this->governanceConfig->getCorePatternAuditExcludedPathPrefixes();
$this->governanceConfig->getCorePatternAuditExcludedPathPatterns();
@@ -1090,6 +1174,18 @@ final readonly class RetriexEffectiveConfigProvider
$this->validateStringList($this->toList($ragEvidence['aggregate_evidence_terms'] ?? []), 'agent.rag_evidence_guard.aggregate_evidence_terms', $errors, $warnings);
$this->validateRegexPatternList($ragEvidence['aggregate_answer_evidence_patterns'] ?? [], 'agent.rag_evidence_guard.aggregate_answer_evidence_patterns', $errors);
$shopPrompt = is_array($agent['shop_prompt'] ?? null) ? $agent['shop_prompt'] : [];
$metaQueryGuard = is_array($shopPrompt['meta_query_guard'] ?? null) ? $shopPrompt['meta_query_guard'] : [];
$shopContextCleanupProfile = $metaQueryGuard['cleanup_profile'] ?? null;
if (!is_string($shopContextCleanupProfile) || trim($shopContextCleanupProfile) === '') {
$shopContextCleanupProfile = $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile();
} else {
$shopContextCleanupProfile = trim($shopContextCleanupProfile);
}
if (!in_array($shopContextCleanupProfile, $this->languageCleanupConfig->getCleanupProfileNames(), true)) {
$errors[] = 'agent.shop_prompt.meta_query_guard.cleanup_profile references unknown language cleanup profile: ' . $shopContextCleanupProfile . '.';
}
$this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings);
$this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors);
@@ -1319,10 +1415,40 @@ final readonly class RetriexEffectiveConfigProvider
{
$this->validateStringListMap($language, 'language', $errors, $warnings);
$stopwords = is_array($language['stopwords'] ?? null) ? $language['stopwords'] : [];
foreach ($this->governanceConfig->getLanguageProtectedStopwordTerms() as $protected) {
if (in_array($protected, $stopwords, true)) {
$errors[] = 'language.stopwords must not contain protected term: ' . $protected . '.';
try {
$profileNames = $this->languageCleanupConfig->getCleanupProfileNames();
foreach ($this->governanceConfig->getLanguageRequiredCleanupProfiles() as $profileName) {
if (!in_array($profileName, $profileNames, true)) {
$errors[] = 'language.cleanup_profiles must contain required profile: ' . $profileName . '.';
continue;
}
$this->languageCleanupConfig->getCleanupProfile($profileName);
}
foreach ($this->governanceConfig->getLanguageProtectedStopwordTerms() as $protected) {
if (in_array($protected, $stopwords, true)) {
$errors[] = 'language.stopwords must not contain protected term: ' . $protected . '.';
}
if (!$this->languageCleanupConfig->isProtectedTerm($protected)) {
$errors[] = 'language.protected_terms must contain protected term: ' . $protected . '.';
}
}
foreach ($this->governanceConfig->getLanguageRequiredProfileTerms() as $profileName => $requiredTerms) {
$profile = $this->languageCleanupConfig->getCleanupProfile($profileName);
foreach ($requiredTerms as $bucket => $terms) {
foreach ($terms as $term) {
if (!in_array($term, $profile[$bucket] ?? [], true)) {
$errors[] = sprintf('language.cleanup_profiles.%s.%s must contain required term: %s.', $profileName, $bucket, $term);
}
}
}
}
} catch (\InvalidArgumentException $e) {
$errors[] = $e->getMessage();
}
}