patch 15
This commit is contained in:
@@ -31,6 +31,29 @@ final class CommerceIntentConfig
|
||||
return $this->requiredStringList('advisory_product_selection_patterns');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getNonProductCommerceSignals(): array
|
||||
{
|
||||
return $this->requiredStringList('non_product_commerce_signals');
|
||||
}
|
||||
|
||||
public function getTechnicalFactualKnowledgeSignalLabel(): string
|
||||
{
|
||||
return $this->requiredString('technical_factual_knowledge.signal_label');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getTechnicalFactualKnowledgeQuestionMarkerPatterns(): array
|
||||
{
|
||||
return $this->requiredStringList('technical_factual_knowledge.question_marker_patterns');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getTechnicalFactualKnowledgeFactPatterns(): array
|
||||
{
|
||||
return $this->requiredStringList('technical_factual_knowledge.fact_patterns');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getPriceTerms(): array
|
||||
{
|
||||
|
||||
@@ -16,6 +16,7 @@ final readonly class ConfigSourceAuditProvider
|
||||
'CatalogIntentConfig' => 'retriex.intent.catalog.config',
|
||||
'DomainVocabularyConfig' => 'retriex.vocabulary.config',
|
||||
'IntentLightConfig' => 'retriex.intent.light.config',
|
||||
'GovernanceConfig' => 'retriex.governance.config',
|
||||
'NdjsonHybridRetrieverConfig' => 'retriex.retrieval.config',
|
||||
'PromptBuilderConfig' => 'retriex.prompt.config',
|
||||
'QueryEnricherConfig' => 'retriex.query_enrichment.config',
|
||||
|
||||
216
src/Config/GovernanceConfig.php
Normal file
216
src/Config/GovernanceConfig.php
Normal file
@@ -0,0 +1,216 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
final class GovernanceConfig
|
||||
{
|
||||
/**
|
||||
* @param array<string, mixed> $config
|
||||
*/
|
||||
public function __construct(private readonly array $config = [])
|
||||
{
|
||||
}
|
||||
|
||||
/** @return array<string, mixed> */
|
||||
public function toArray(): array
|
||||
{
|
||||
return $this->config;
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getRegressionProtectedShortModelTokens(): array
|
||||
{
|
||||
return $this->requiredStringList('regression_baseline.protected_short_model_tokens');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getRegressionProtectedMeasurementValues(): array
|
||||
{
|
||||
return $this->requiredStringList('regression_baseline.protected_measurement_values');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getRegressionProtectedTechnicalPromptKeywords(): array
|
||||
{
|
||||
return $this->requiredStringList('regression_baseline.protected_technical_prompt_keywords');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getRegressionTechnicalPriorityRequiredMarkers(): array
|
||||
{
|
||||
return $this->requiredStringList('regression_baseline.technical_priority_required_markers');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getRegressionProtectedAccessoryPromptKeywords(): array
|
||||
{
|
||||
return $this->requiredStringList('regression_baseline.protected_accessory_prompt_keywords');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getRegressionProtectedSearchRepairSpecificityTerms(): array
|
||||
{
|
||||
return $this->requiredStringList('regression_baseline.protected_search_repair_specificity_terms');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getRegressionProtectedRetrievalReagentWords(): array
|
||||
{
|
||||
return $this->requiredStringList('regression_baseline.protected_retrieval_reagent_words');
|
||||
}
|
||||
|
||||
/** @return array<string, string[]> */
|
||||
public function getRegressionProtectedRetrievalDeviceWordGroups(): array
|
||||
{
|
||||
$value = $this->requiredValue('regression_baseline.protected_retrieval_device_word_groups');
|
||||
if (!is_array($value)) {
|
||||
throw $this->invalid('regression_baseline.protected_retrieval_device_word_groups', 'must be a map of string lists');
|
||||
}
|
||||
|
||||
$out = [];
|
||||
foreach ($value as $key => $item) {
|
||||
if (is_string($key) && is_array($item)) {
|
||||
$normalizedKey = trim($key);
|
||||
$terms = $this->normalizeStringList($item);
|
||||
if ($normalizedKey !== '' && $terms !== []) {
|
||||
$out[$normalizedKey] = $terms;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Backwards-compatible reader for the temporary p15/p15b list-of-groups shape.
|
||||
if (is_array($item)) {
|
||||
$groupKey = isset($item['key']) && is_scalar($item['key']) ? trim((string) $item['key']) : '';
|
||||
$terms = $this->normalizeStringList($item['terms'] ?? []);
|
||||
if ($groupKey !== '' && $terms !== []) {
|
||||
$out[$groupKey] = $terms;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($out === []) {
|
||||
throw $this->invalid('regression_baseline.protected_retrieval_device_word_groups', 'must contain at least one valid group');
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function getRegressionShopPromptOriginalQuery(): string
|
||||
{
|
||||
return $this->requiredString('regression_baseline.shop_prompt_regression_original_query');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getRegressionShopPromptRequiredOutputInstructionMarkers(): array
|
||||
{
|
||||
return $this->requiredStringList('regression_baseline.shop_prompt_required_output_instruction_markers');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getRegressionShopQueryMetaGuardTerms(): array
|
||||
{
|
||||
return $this->requiredStringList('regression_baseline.shop_query_meta_guard_terms');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getRegressionShopQueryContextFallbackFilterTerms(): array
|
||||
{
|
||||
return $this->requiredStringList('regression_baseline.shop_query_context_fallback_filter_terms');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getVocabularyProtectedShortModelTokens(): array
|
||||
{
|
||||
return $this->requiredStringList('vocabulary.protected_short_model_tokens');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getLanguageProtectedStopwordTerms(): array
|
||||
{
|
||||
return $this->requiredStringList('language.protected_stopword_terms');
|
||||
}
|
||||
|
||||
private function requiredString(string $path): string
|
||||
{
|
||||
$value = $this->requiredValue($path);
|
||||
if (!is_scalar($value)) {
|
||||
throw $this->invalid($path, 'must be a scalar string');
|
||||
}
|
||||
|
||||
$value = trim((string) $value);
|
||||
if ($value === '') {
|
||||
throw $this->invalid($path, 'must not be empty');
|
||||
}
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
private function requiredStringList(string $path): array
|
||||
{
|
||||
return $this->nonEmptyStringList($path, $this->requiredValue($path));
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
private function nonEmptyStringList(string $path, mixed $value): array
|
||||
{
|
||||
if (!is_array($value)) {
|
||||
throw $this->invalid($path, 'must be a string list');
|
||||
}
|
||||
|
||||
$out = $this->normalizeStringList($value);
|
||||
if ($out === []) {
|
||||
throw $this->invalid($path, 'must contain at least one value');
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
private function normalizeStringList(mixed $value): array
|
||||
{
|
||||
if (!is_array($value)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$out = [];
|
||||
foreach ($value as $item) {
|
||||
if (!is_scalar($item)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$item = trim((string) $item);
|
||||
if ($item !== '' && !in_array($item, $out, true)) {
|
||||
$out[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function requiredValue(string $path): mixed
|
||||
{
|
||||
$value = $this->config;
|
||||
foreach (explode('.', $path) as $segment) {
|
||||
if (!is_array($value) || !array_key_exists($segment, $value)) {
|
||||
throw $this->missing($path);
|
||||
}
|
||||
|
||||
$value = $value[$segment];
|
||||
}
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
private function missing(string $path): \InvalidArgumentException
|
||||
{
|
||||
return new \InvalidArgumentException(sprintf('RetrieX governance config "%s" is missing.', $path));
|
||||
}
|
||||
|
||||
private function invalid(string $path, string $reason): \InvalidArgumentException
|
||||
{
|
||||
return new \InvalidArgumentException(sprintf('RetrieX governance config "%s" %s.', $path, $reason));
|
||||
}
|
||||
}
|
||||
@@ -136,6 +136,72 @@ final class NdjsonHybridRetrieverConfig
|
||||
return $this->requiredInt('focused_product_max_chunks', 1);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function catalogListShortcutPatterns(): array
|
||||
{
|
||||
return $this->requiredStringList('catalog_list_shortcut_patterns');
|
||||
}
|
||||
|
||||
/** @return array<string, string[]> */
|
||||
public function exactSelectionTokenVariantPrefixes(): array
|
||||
{
|
||||
return $this->requiredStringListMap('exact_selection_token_variant_prefixes');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function exactSelectionIndicatorQuestionTokens(): array
|
||||
{
|
||||
return $this->requiredStringList('exact_selection_indicator_question_tokens');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function exactSelectionIndicatorQuestionPhrases(): array
|
||||
{
|
||||
return $this->requiredStringList('exact_selection_indicator_question_phrases');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function exactSelectionIndicatorTableHeadingPatterns(): array
|
||||
{
|
||||
return $this->requiredStringList('exact_selection_indicator_table_heading_patterns');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function exactSelectionIndicatorTableHeaderPatterns(): array
|
||||
{
|
||||
return $this->requiredStringList('exact_selection_indicator_table_header_patterns');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function exactSelectionIndicatorTableRowPatterns(): array
|
||||
{
|
||||
return $this->requiredStringList('exact_selection_indicator_table_row_patterns');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function exactSelectionIndicatorTableRequiredPrimaryTerms(): array
|
||||
{
|
||||
return $this->requiredStringList('exact_selection_indicator_table_required_primary_terms');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function exactSelectionIndicatorTableRequiredContextTerms(): array
|
||||
{
|
||||
return $this->requiredStringList('exact_selection_indicator_table_required_context_terms');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function exactDetailTokens(): array
|
||||
{
|
||||
return $this->requiredStringList('exact_detail_tokens');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function genericExactSelectionTokens(): array
|
||||
{
|
||||
return $this->requiredStringList('generic_exact_selection_tokens');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function genericProductTokens(): array
|
||||
{
|
||||
@@ -240,6 +306,17 @@ final class NdjsonHybridRetrieverConfig
|
||||
'focused_product_min_score' => $this->focusedProductMinScore(),
|
||||
'focused_product_min_gap' => $this->focusedProductMinGap(),
|
||||
'focused_product_max_chunks' => $this->focusedProductMaxChunks(),
|
||||
'catalog_list_shortcut_patterns' => $this->catalogListShortcutPatterns(),
|
||||
'exact_selection_token_variant_prefixes' => $this->exactSelectionTokenVariantPrefixes(),
|
||||
'exact_selection_indicator_question_tokens' => $this->exactSelectionIndicatorQuestionTokens(),
|
||||
'exact_selection_indicator_question_phrases' => $this->exactSelectionIndicatorQuestionPhrases(),
|
||||
'exact_selection_indicator_table_heading_patterns' => $this->exactSelectionIndicatorTableHeadingPatterns(),
|
||||
'exact_selection_indicator_table_header_patterns' => $this->exactSelectionIndicatorTableHeaderPatterns(),
|
||||
'exact_selection_indicator_table_row_patterns' => $this->exactSelectionIndicatorTableRowPatterns(),
|
||||
'exact_selection_indicator_table_required_primary_terms' => $this->exactSelectionIndicatorTableRequiredPrimaryTerms(),
|
||||
'exact_selection_indicator_table_required_context_terms' => $this->exactSelectionIndicatorTableRequiredContextTerms(),
|
||||
'exact_detail_tokens' => $this->exactDetailTokens(),
|
||||
'generic_exact_selection_tokens' => $this->genericExactSelectionTokens(),
|
||||
'generic_product_tokens' => $this->genericProductTokens(),
|
||||
'important_short_model_tokens' => $this->importantShortModelTokens(),
|
||||
'family_descriptor_tokens' => $this->familyDescriptorTokens(),
|
||||
@@ -324,6 +401,47 @@ final class NdjsonHybridRetrieverConfig
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, string[]>
|
||||
*/
|
||||
private function requiredStringListMap(string $key): array
|
||||
{
|
||||
$value = $this->requiredValue($key);
|
||||
|
||||
if (!is_array($value)) {
|
||||
throw $this->invalid($key, 'must be a map of string lists');
|
||||
}
|
||||
|
||||
$out = [];
|
||||
foreach ($value as $mapKey => $items) {
|
||||
if (!is_string($mapKey) || trim($mapKey) === '' || !is_array($items)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$cleanItems = [];
|
||||
foreach ($items as $item) {
|
||||
if (!is_scalar($item)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$item = trim((string) $item);
|
||||
if ($item !== '' && !in_array($item, $cleanItems, true)) {
|
||||
$cleanItems[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
if ($cleanItems !== []) {
|
||||
$out[trim($mapKey)] = $cleanItems;
|
||||
}
|
||||
}
|
||||
|
||||
if ($out === []) {
|
||||
throw $this->invalid($key, 'must contain at least one non-empty map entry');
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function requiredValue(string $key): mixed
|
||||
{
|
||||
if (!array_key_exists($key, $this->config)) {
|
||||
|
||||
@@ -544,6 +544,14 @@ final class PromptBuilderConfig
|
||||
return $this->getRequiredStringList('role_guard.accessory_product_keywords');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getDirectMainDeviceRequestPatterns(): array
|
||||
{
|
||||
return $this->getRequiredStringList('role_guard.direct_main_device_request_patterns');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
|
||||
@@ -26,6 +26,7 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
private ShopServiceConfig $shopServiceConfig,
|
||||
private StopWordsConfig $stopWordsConfig,
|
||||
private QueryEnricherConfig $queryEnricherConfig,
|
||||
private GovernanceConfig $governanceConfig,
|
||||
private CatalogIntentConfig $catalogIntentConfig,
|
||||
private ContextServiceConfig $contextServiceConfig,
|
||||
) {
|
||||
@@ -51,6 +52,7 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
'search_repair' => $this->searchRepairEffectiveConfig(),
|
||||
'intent' => $this->intentConfig(),
|
||||
'vocabulary' => $this->domainVocabularyConfig->toArray(),
|
||||
'governance' => $this->governanceConfig->toArray(),
|
||||
'language' => $this->languageConfig(),
|
||||
'query_enrichment' => $this->queryEnrichmentConfig(),
|
||||
'catalog_intent' => $this->catalogIntentConfig(),
|
||||
@@ -80,6 +82,7 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
$this->validateSearchRepair($config['search_repair'], $errors, $warnings);
|
||||
$this->validateIntent($config['intent'], $errors, $warnings);
|
||||
$this->validateVocabulary($config['vocabulary'], $errors, $warnings);
|
||||
$this->validateGovernance($config['governance'], $errors, $warnings);
|
||||
$this->validateLanguage($config['language'], $errors, $warnings);
|
||||
$this->validateQueryEnrichment($config['query_enrichment'], $errors, $warnings);
|
||||
|
||||
@@ -114,8 +117,8 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
}
|
||||
|
||||
$importantShortModelTokens = $this->retrieverConfig->importantShortModelTokens();
|
||||
foreach (['th', 'tc', 'tp', 'tm', 'ph', 'rx'] as $token) {
|
||||
$key = 'important_short_model_token_' . $token;
|
||||
foreach ($this->governanceConfig->getRegressionProtectedShortModelTokens() as $token) {
|
||||
$key = 'important_short_model_token_' . $this->guardrailCheckKey($token);
|
||||
$checks[$key] = in_array($token, $importantShortModelTokens, true);
|
||||
if (!$checks[$key]) {
|
||||
$errors[] = 'Missing protected short model token: ' . $token;
|
||||
@@ -123,20 +126,23 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
}
|
||||
|
||||
$measurementPattern = $this->commerceQueryParserConfig->getMeasurementValueTokenPattern();
|
||||
$checks['measurement_value_0_02_matches'] = @preg_match($measurementPattern, '0,02') === 1;
|
||||
if (!$checks['measurement_value_0_02_matches']) {
|
||||
$errors[] = 'Commerce query parser no longer recognizes 0,02 as a measurement value.';
|
||||
}
|
||||
|
||||
$filterTokens = $this->commerceQueryParserConfig->getFilterSearchTokens();
|
||||
$checks['measurement_value_0_02_not_filtered'] = !in_array('0,02', $filterTokens, true);
|
||||
if (!$checks['measurement_value_0_02_not_filtered']) {
|
||||
$errors[] = 'Commerce query parser filters the protected token 0,02.';
|
||||
foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) {
|
||||
$keySuffix = $this->guardrailCheckKey($measurementValue);
|
||||
$checks['measurement_value_' . $keySuffix . '_matches'] = @preg_match($measurementPattern, $measurementValue) === 1;
|
||||
if (!$checks['measurement_value_' . $keySuffix . '_matches']) {
|
||||
$errors[] = 'Commerce query parser no longer recognizes protected measurement value: ' . $measurementValue . '.';
|
||||
}
|
||||
|
||||
$checks['measurement_value_' . $keySuffix . '_not_filtered'] = !in_array($measurementValue, $filterTokens, true);
|
||||
if (!$checks['measurement_value_' . $keySuffix . '_not_filtered']) {
|
||||
$errors[] = 'Commerce query parser filters protected measurement value: ' . $measurementValue . '.';
|
||||
}
|
||||
}
|
||||
|
||||
$technicalKeywords = $this->promptConfig->getTechnicalProductKeywords();
|
||||
foreach (['testomat', 'indikator', 'grenzwert', 'messbereich', 'gemessen'] as $term) {
|
||||
$key = 'technical_keyword_' . $term;
|
||||
foreach ($this->governanceConfig->getRegressionProtectedTechnicalPromptKeywords() as $term) {
|
||||
$key = 'technical_keyword_' . $this->guardrailCheckKey($term);
|
||||
$checks[$key] = in_array($term, $technicalKeywords, true);
|
||||
if (!$checks[$key]) {
|
||||
$errors[] = 'Missing technical prompt keyword: ' . $term;
|
||||
@@ -144,19 +150,20 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
}
|
||||
$technicalPriorityRules = implode("\n", $this->promptConfig->getOutputPriorityTechnicalRules());
|
||||
$checks['technical_priority_rules_present'] = trim($technicalPriorityRules) !== '';
|
||||
$checks['technical_priority_prevents_runner_up'] = str_contains($technicalPriorityRules, 'runner-up')
|
||||
|| str_contains($technicalPriorityRules, 'second-lowest')
|
||||
|| str_contains($technicalPriorityRules, 'comparison');
|
||||
$checks['technical_priority_required_markers_present'] = $this->containsAnyConfiguredMarker(
|
||||
$technicalPriorityRules,
|
||||
$this->governanceConfig->getRegressionTechnicalPriorityRequiredMarkers()
|
||||
);
|
||||
if (!$checks['technical_priority_rules_present']) {
|
||||
$errors[] = 'Missing technical output priority rules.';
|
||||
}
|
||||
if (!$checks['technical_priority_prevents_runner_up']) {
|
||||
$errors[] = 'Technical output priority no longer guards against runner-up/comparison expansion.';
|
||||
if (!$checks['technical_priority_required_markers_present']) {
|
||||
$errors[] = 'Technical output priority no longer contains a required governance marker.';
|
||||
}
|
||||
|
||||
$accessoryKeywords = $this->promptConfig->getAccessoryRequestKeywords();
|
||||
foreach (['indikator', 'reagenz'] as $term) {
|
||||
$key = 'accessory_keyword_' . $term;
|
||||
foreach ($this->governanceConfig->getRegressionProtectedAccessoryPromptKeywords() as $term) {
|
||||
$key = 'accessory_keyword_' . $this->guardrailCheckKey($term);
|
||||
$checks[$key] = in_array($term, $accessoryKeywords, true);
|
||||
if (!$checks[$key]) {
|
||||
$errors[] = 'Missing accessory prompt keyword: ' . $term;
|
||||
@@ -164,8 +171,8 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
}
|
||||
|
||||
$searchRepairTerms = $this->searchRepairConfig->getSpecificityBoostTerms();
|
||||
foreach (['indikator', 'testomat', 'reagenz'] as $term) {
|
||||
$key = 'search_repair_specificity_' . $term;
|
||||
foreach ($this->governanceConfig->getRegressionProtectedSearchRepairSpecificityTerms() as $term) {
|
||||
$key = 'search_repair_specificity_' . $this->guardrailCheckKey($term);
|
||||
$checks[$key] = in_array($term, $searchRepairTerms, true);
|
||||
if (!$checks[$key]) {
|
||||
$errors[] = 'Missing search repair specificity term: ' . $term;
|
||||
@@ -173,30 +180,46 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
}
|
||||
|
||||
$reagentWords = $this->retrieverConfig->looksLikeReagentWords();
|
||||
$deviceWords = $this->retrieverConfig->looksLikeDeviceWords();
|
||||
$checks['retrieval_reagent_word_indikator'] = in_array('indikator', $reagentWords, true);
|
||||
$checks['retrieval_device_word_geraet'] = in_array('geraet', $deviceWords, true) || in_array('gerät', $deviceWords, true);
|
||||
if (!$checks['retrieval_reagent_word_indikator']) {
|
||||
$errors[] = 'Missing retrieval reagent word: indikator.';
|
||||
}
|
||||
if (!$checks['retrieval_device_word_geraet']) {
|
||||
$errors[] = 'Missing retrieval device word: geraet/geraet equivalent.';
|
||||
foreach ($this->governanceConfig->getRegressionProtectedRetrievalReagentWords() as $term) {
|
||||
$key = 'retrieval_reagent_word_' . $this->guardrailCheckKey($term);
|
||||
$checks[$key] = in_array($term, $reagentWords, true);
|
||||
if (!$checks[$key]) {
|
||||
$errors[] = 'Missing retrieval reagent word: ' . $term . '.';
|
||||
}
|
||||
}
|
||||
|
||||
$shopPrompt = $this->agentRunnerConfig->getShopPrompt('testomat 808 0,02', '');
|
||||
$checks['shop_prompt_contains_output_instruction'] = str_contains($shopPrompt, 'Output only the final search query.')
|
||||
|| str_contains($shopPrompt, 'Output format:');
|
||||
$checks['shop_prompt_contains_original_query'] = str_contains($shopPrompt, 'testomat 808 0,02');
|
||||
$deviceWords = $this->retrieverConfig->looksLikeDeviceWords();
|
||||
foreach ($this->governanceConfig->getRegressionProtectedRetrievalDeviceWordGroups() as $groupKey => $terms) {
|
||||
$key = 'retrieval_device_word_' . $this->guardrailCheckKey((string) $groupKey);
|
||||
$checks[$key] = false;
|
||||
foreach ($terms as $term) {
|
||||
if (in_array($term, $deviceWords, true)) {
|
||||
$checks[$key] = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$checks[$key]) {
|
||||
$errors[] = 'Missing retrieval device word group: ' . (string) $groupKey . '.';
|
||||
}
|
||||
}
|
||||
|
||||
$shopPromptOriginalQuery = $this->governanceConfig->getRegressionShopPromptOriginalQuery();
|
||||
$shopPrompt = $this->agentRunnerConfig->getShopPrompt($shopPromptOriginalQuery, '');
|
||||
$checks['shop_prompt_contains_output_instruction'] = $this->containsAnyConfiguredMarker(
|
||||
$shopPrompt,
|
||||
$this->governanceConfig->getRegressionShopPromptRequiredOutputInstructionMarkers()
|
||||
);
|
||||
$checks['shop_prompt_contains_original_query'] = str_contains($shopPrompt, $shopPromptOriginalQuery);
|
||||
if (!$checks['shop_prompt_contains_output_instruction']) {
|
||||
$errors[] = 'Shop query optimizer prompt no longer contains the expected output instruction.';
|
||||
$errors[] = 'Shop query optimizer prompt no longer contains a required output instruction marker.';
|
||||
}
|
||||
if (!$checks['shop_prompt_contains_original_query']) {
|
||||
$errors[] = 'Shop query optimizer prompt no longer contains the original query.';
|
||||
$errors[] = 'Shop query optimizer prompt no longer contains the configured original query.';
|
||||
}
|
||||
|
||||
$metaOnlyTerms = $this->agentRunnerConfig->getShopQueryMetaOnlyTerms();
|
||||
foreach (['shop', 'suche'] as $term) {
|
||||
$key = 'shop_query_meta_guard_term_' . $term;
|
||||
foreach ($this->governanceConfig->getRegressionShopQueryMetaGuardTerms() as $term) {
|
||||
$key = 'shop_query_meta_guard_term_' . $this->guardrailCheckKey($term);
|
||||
$checks[$key] = in_array($term, $metaOnlyTerms, true);
|
||||
if (!$checks[$key]) {
|
||||
$errors[] = 'Missing shop query meta guard term: ' . $term;
|
||||
@@ -208,8 +231,8 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
}
|
||||
|
||||
$contextFallbackFilterTerms = $this->agentRunnerConfig->getShopQueryContextFallbackFilterTerms();
|
||||
foreach (['welchem', 'kann', 'messen'] as $term) {
|
||||
$key = 'shop_query_context_fallback_filter_' . $term;
|
||||
foreach ($this->governanceConfig->getRegressionShopQueryContextFallbackFilterTerms() as $term) {
|
||||
$key = 'shop_query_context_fallback_filter_' . $this->guardrailCheckKey($term);
|
||||
$checks[$key] = in_array($term, $contextFallbackFilterTerms, true);
|
||||
if (!$checks[$key]) {
|
||||
$errors[] = 'Missing shop query context fallback filter term: ' . $term;
|
||||
@@ -244,7 +267,26 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
'warnings' => $warnings,
|
||||
];
|
||||
}
|
||||
/** @param string[] $markers */
|
||||
private function containsAnyConfiguredMarker(string $haystack, array $markers): bool
|
||||
{
|
||||
foreach ($markers as $marker) {
|
||||
if ($marker !== '' && str_contains($haystack, $marker)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function guardrailCheckKey(string $term): string
|
||||
{
|
||||
$key = mb_strtolower($term, 'UTF-8');
|
||||
$key = preg_replace('/[^\p{L}\p{N}]+/u', '_', $key) ?? $key;
|
||||
$key = trim($key, '_');
|
||||
|
||||
return $key !== '' ? $key : 'value';
|
||||
}
|
||||
/** @return array<string, mixed> */
|
||||
private function runtimeConfig(): array
|
||||
{
|
||||
@@ -740,6 +782,37 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $governance
|
||||
* @param list<string> $errors
|
||||
* @param list<string> $warnings
|
||||
*/
|
||||
private function validateGovernance(array $governance, array &$errors, array &$warnings): void
|
||||
{
|
||||
if ($governance === []) {
|
||||
$errors[] = 'governance config must not be empty.';
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$this->governanceConfig->getRegressionProtectedShortModelTokens();
|
||||
$this->governanceConfig->getRegressionProtectedMeasurementValues();
|
||||
$this->governanceConfig->getRegressionProtectedTechnicalPromptKeywords();
|
||||
$this->governanceConfig->getRegressionTechnicalPriorityRequiredMarkers();
|
||||
$this->governanceConfig->getRegressionProtectedAccessoryPromptKeywords();
|
||||
$this->governanceConfig->getRegressionProtectedSearchRepairSpecificityTerms();
|
||||
$this->governanceConfig->getRegressionProtectedRetrievalReagentWords();
|
||||
$this->governanceConfig->getRegressionProtectedRetrievalDeviceWordGroups();
|
||||
$this->governanceConfig->getRegressionShopPromptOriginalQuery();
|
||||
$this->governanceConfig->getRegressionShopPromptRequiredOutputInstructionMarkers();
|
||||
$this->governanceConfig->getRegressionShopQueryMetaGuardTerms();
|
||||
$this->governanceConfig->getRegressionShopQueryContextFallbackFilterTerms();
|
||||
$this->governanceConfig->getVocabularyProtectedShortModelTokens();
|
||||
$this->governanceConfig->getLanguageProtectedStopwordTerms();
|
||||
} catch (\InvalidArgumentException $e) {
|
||||
$errors[] = $e->getMessage();
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @param array<string, mixed> $runtime
|
||||
* @param list<string> $errors
|
||||
@@ -847,6 +920,11 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
$inventory = $retrieval['inventory_parameter'] ?? [];
|
||||
if (is_array($inventory)) {
|
||||
foreach ($inventory as $key => $value) {
|
||||
$key = (string) $key;
|
||||
if (!$this->shouldCompareRetrievalInventoryKey($key, $retrieval)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (array_key_exists($key, $retrieval) && $retrieval[$key] != $value) {
|
||||
$warnings[] = 'retrieval.inventory.' . $key . ' differs from active retriever config.';
|
||||
}
|
||||
@@ -854,6 +932,41 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieval vocabulary lists can be resolved from dedicated vocabulary views.
|
||||
* The backwards-compatible inventory parameter may still contain raw legacy
|
||||
* list values for those keys, so comparing it against the active retriever
|
||||
* facade would produce false-positive validation warnings.
|
||||
*
|
||||
* @param array<string, mixed> $retrieval
|
||||
*/
|
||||
private function shouldCompareRetrievalInventoryKey(string $key, array $retrieval): bool
|
||||
{
|
||||
if (in_array($key, $this->retrievalVocabularyBackedInventoryKeys(), true)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$vocabulary = $retrieval['vocabulary'] ?? [];
|
||||
|
||||
return !is_array($vocabulary) || !array_key_exists($key, $vocabulary);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
private function retrievalVocabularyBackedInventoryKeys(): array
|
||||
{
|
||||
return [
|
||||
'generic_product_tokens',
|
||||
'important_short_model_tokens',
|
||||
'family_descriptor_tokens',
|
||||
'looks_like_reagent_tokens',
|
||||
'looks_like_safety_docs',
|
||||
'looks_like_reagent_words',
|
||||
'looks_like_document_words',
|
||||
'looks_like_safety_words',
|
||||
'looks_like_device_words',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $prompt
|
||||
* @param list<string> $errors
|
||||
@@ -964,13 +1077,15 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
}
|
||||
|
||||
$measurementPattern = $patterns['measurement_value_token'] ?? null;
|
||||
if (is_string($measurementPattern) && @preg_match($measurementPattern, '0,02') !== 1) {
|
||||
$errors[] = 'commerce_query.patterns.measurement_value_token must match 0,02.';
|
||||
}
|
||||
|
||||
$filterTokens = $commerceQuery['filter_search_tokens'] ?? [];
|
||||
if (is_array($filterTokens) && in_array('0,02', $filterTokens, true)) {
|
||||
$errors[] = 'commerce_query.filter_search_tokens must not remove protected decimal token 0,02.';
|
||||
foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) {
|
||||
if (is_string($measurementPattern) && @preg_match($measurementPattern, $measurementValue) !== 1) {
|
||||
$errors[] = 'commerce_query.patterns.measurement_value_token must match protected measurement value: ' . $measurementValue . '.';
|
||||
}
|
||||
|
||||
if (is_array($filterTokens) && in_array($measurementValue, $filterTokens, true)) {
|
||||
$errors[] = 'commerce_query.filter_search_tokens must not remove protected measurement value: ' . $measurementValue . '.';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1033,7 +1148,7 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
if (is_array($retrievalViews)) {
|
||||
$shortModel = $retrievalViews['important_short_model_tokens']['add'] ?? [];
|
||||
if (is_array($shortModel)) {
|
||||
foreach (['th', 'tc', 'tp', 'tm', 'ph', 'rx'] as $token) {
|
||||
foreach ($this->governanceConfig->getVocabularyProtectedShortModelTokens() as $token) {
|
||||
if (!in_array($token, $shortModel, true)) {
|
||||
$warnings[] = 'vocabulary.views.retrieval.important_short_model_tokens should contain protected token ' . $token . '.';
|
||||
}
|
||||
@@ -1051,7 +1166,7 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
{
|
||||
$this->validateStringListMap($language, 'language', $errors, $warnings);
|
||||
$stopwords = is_array($language['stopwords'] ?? null) ? $language['stopwords'] : [];
|
||||
foreach (['nicht', 'kein', 'welche', 'testomat', 'indikator', '0,02'] as $protected) {
|
||||
foreach ($this->governanceConfig->getLanguageProtectedStopwordTerms() as $protected) {
|
||||
if (in_array($protected, $stopwords, true)) {
|
||||
$errors[] = 'language.stopwords must not contain protected term: ' . $protected . '.';
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user