*/ public function dump(): array { return [ 'genre' => $this->genreConfig(), 'runtime' => $this->runtimeConfig(), 'index' => $this->indexConfig(), 'model_generation' => $this->modelConfig(), 'llm' => [ 'timeout_seconds' => $this->param('retriex.llm.timeout_seconds'), 'num_predict' => $this->param('retriex.llm.num_predict'), ], 'retrieval' => $this->retrievalConfig(), 'prompt' => $this->promptConfig(), 'agent' => $this->agentConfig(), 'chat_messages' => $this->chatMessagesConfig(), 'vector' => $this->vectorConfig(), 'commerce' => $this->commerceConfig(), 'commerce_query' => $this->commerceQueryConfig(), 'shop_matching' => $this->shopMatchingConfig(), 'search_repair' => $this->searchRepairEffectiveConfig(), 'intent' => $this->intentConfig(), 'vocabulary' => $this->domainVocabularyConfig->toArray(), 'governance' => $this->governanceConfig->toArray(), 'language' => $this->languageConfig(), 'query_enrichment' => $this->queryEnrichmentConfig(), 'catalog_intent' => $this->catalogIntentConfig(), 'context' => $this->contextConfig(), ]; } /** * @return array{status:string, errors:list, warnings:list, config:array} */ public function validate(): array { $errors = []; $warnings = []; $config = $this->dump(); $this->validateGenre($config['genre'], $config, $errors, $warnings); $sourceOfTruth = $this->genreSourceOfTruthGuard->validate($config['genre'], $config); array_push($errors, ...$sourceOfTruth['errors']); array_push($warnings, ...$sourceOfTruth['warnings']); $this->validateRuntime($config['runtime'], $errors, $warnings); $this->validateIndex($config['index'], $errors, $warnings); $this->validateModel($config['model_generation'], $errors, $warnings); $this->validateRetrieval($config['retrieval'], $errors, $warnings); $this->validatePrompt($config['prompt'], $errors, $warnings); $this->validateAgent($config['agent'], $errors, $warnings); $this->validateChatMessages($errors); $this->validateVector($config['vector'], $errors, $warnings); $this->validateCommerce($config['commerce'], $errors, $warnings); $this->validateCommerceQuery($config['commerce_query'], $errors, $warnings); $this->validateShopMatching($config['shop_matching'], $errors, $warnings); $this->validateSearchRepair($config['search_repair'], $errors, $warnings); $this->validateIntent($config['intent'], $errors, $warnings); $this->validateVocabulary($config['vocabulary'], $errors, $warnings); $this->validateGovernance($config['governance'], $errors, $warnings); $this->validateLanguage($config['language'], $errors, $warnings); $this->validateQueryEnrichment($config['query_enrichment'], $errors, $warnings); return [ 'status' => $errors === [] ? 'OK' : 'ERROR', 'errors' => $errors, 'warnings' => $warnings, 'config' => $config, ]; } /** * Offline regression guard for the stable 1.4.2-sensitive configuration paths. * * @return array{status:string, checks:array, errors:list, warnings:list} */ public function regressionBaseline(): array { $errors = []; $warnings = []; $checks = []; $validate = $this->validate(); $checks['config_validate_ok'] = $validate['status'] === 'OK'; if ($validate['status'] !== 'OK') { foreach ($validate['errors'] as $error) { $errors[] = 'Config validation failed: ' . $error; } } foreach ($validate['warnings'] as $warning) { $warnings[] = 'Config validation warning: ' . $warning; } try { $cleanupProfileNames = $this->languageCleanupConfig->getCleanupProfileNames(); foreach ($this->governanceConfig->getLanguageRequiredCleanupProfiles() as $profileName) { $key = 'language_cleanup_profile_' . $this->guardrailCheckKey($profileName); $checks[$key] = in_array($profileName, $cleanupProfileNames, true); if (!$checks[$key]) { $errors[] = 'Missing required language cleanup profile: ' . $profileName . '.'; continue; } $this->languageCleanupConfig->getCleanupProfile($profileName); } $legacyStopwords = $this->stopWordsConfig->getStopWords(); foreach ($this->governanceConfig->getLanguageProtectedStopwordTerms() as $protectedTerm) { $key = 'language_protected_term_' . $this->guardrailCheckKey($protectedTerm); $checks[$key . '_registered'] = $this->languageCleanupConfig->isProtectedTerm($protectedTerm); if (!$checks[$key . '_registered']) { $errors[] = 'Missing protected language cleanup term: ' . $protectedTerm . '.'; } $checks[$key . '_not_legacy_stopword'] = !in_array($protectedTerm, $legacyStopwords, true); if (!$checks[$key . '_not_legacy_stopword']) { $errors[] = 'Protected language cleanup term is still a legacy stopword: ' . $protectedTerm . '.'; } foreach ($cleanupProfileNames as $profileName) { $profile = $this->languageCleanupConfig->getCleanupProfile($profileName); foreach (['stopwords', 'phrases', 'meta_terms'] as $bucket) { $bucketKey = $key . '_not_in_' . $this->guardrailCheckKey($profileName . '_' . $bucket); $checks[$bucketKey] = !in_array($protectedTerm, $profile[$bucket] ?? [], true); if (!$checks[$bucketKey]) { $errors[] = sprintf('Protected language cleanup term %s is present in %s.%s.', $protectedTerm, $profileName, $bucket); } } } } foreach ($this->governanceConfig->getLanguageRequiredProfileTerms() as $profileName => $requiredTerms) { $profile = $this->languageCleanupConfig->getCleanupProfile($profileName); foreach ($requiredTerms as $bucket => $terms) { foreach ($terms as $term) { $key = 'language_cleanup_profile_' . $this->guardrailCheckKey($profileName . '_' . $bucket . '_' . $term); $checks[$key] = in_array($term, $profile[$bucket] ?? [], true); if (!$checks[$key]) { $errors[] = sprintf('Missing language cleanup profile term: %s.%s must contain %s.', $profileName, $bucket, $term); } } } } $checks['commerce_query_cleanup_profile_wired'] = $this->commerceQueryParserConfig->getCleanupProfile() === 'commerce_query'; if (!$checks['commerce_query_cleanup_profile_wired']) { $errors[] = 'Commerce query parser is not wired to cleanup profile commerce_query.'; } $checks['rag_evidence_cleanup_profile_wired'] = $this->agentRunnerConfig->getRagEvidenceCleanupProfile() === 'rag_evidence'; if (!$checks['rag_evidence_cleanup_profile_wired']) { $errors[] = 'RAG evidence guard is not wired to cleanup profile rag_evidence.'; } $checks['shop_context_fallback_cleanup_profile_wired'] = $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile() === 'shop_context_fallback'; if (!$checks['shop_context_fallback_cleanup_profile_wired']) { $errors[] = 'Shop context fallback is not wired to cleanup profile shop_context_fallback.'; } } catch (\InvalidArgumentException $e) { $checks['language_cleanup_profile_config_valid'] = false; $errors[] = 'Language cleanup profile guardrails failed: ' . $e->getMessage(); } $importantShortModelTokens = $this->retrieverConfig->importantShortModelTokens(); foreach ($this->governanceConfig->getRegressionProtectedShortModelTokens() as $token) { $key = 'important_short_model_token_' . $this->guardrailCheckKey($token); $checks[$key] = in_array($token, $importantShortModelTokens, true); if (!$checks[$key]) { $errors[] = 'Missing protected short model token: ' . $token; } } $measurementPattern = $this->commerceQueryParserConfig->getMeasurementValueTokenPattern(); $filterTokens = $this->commerceQueryParserConfig->getFilterSearchTokens(); foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) { $keySuffix = $this->guardrailCheckKey($measurementValue); $checks['measurement_value_' . $keySuffix . '_matches'] = @preg_match($measurementPattern, $measurementValue) === 1; if (!$checks['measurement_value_' . $keySuffix . '_matches']) { $errors[] = 'Commerce query parser no longer recognizes protected measurement value: ' . $measurementValue . '.'; } $checks['measurement_value_' . $keySuffix . '_not_filtered'] = !in_array($measurementValue, $filterTokens, true); if (!$checks['measurement_value_' . $keySuffix . '_not_filtered']) { $errors[] = 'Commerce query parser filters protected measurement value: ' . $measurementValue . '.'; } } $technicalKeywords = $this->promptConfig->getTechnicalProductKeywords(); foreach ($this->governanceConfig->getRegressionProtectedTechnicalPromptKeywords() as $term) { $key = 'technical_keyword_' . $this->guardrailCheckKey($term); $checks[$key] = in_array($term, $technicalKeywords, true); if (!$checks[$key]) { $errors[] = 'Missing technical prompt keyword: ' . $term; } } $technicalPriorityRules = implode("\n", $this->promptConfig->getOutputPriorityTechnicalRules()); $checks['technical_priority_rules_present'] = trim($technicalPriorityRules) !== ''; $checks['technical_priority_required_markers_present'] = $this->containsAnyConfiguredMarker( $technicalPriorityRules, $this->governanceConfig->getRegressionTechnicalPriorityRequiredMarkers() ); if (!$checks['technical_priority_rules_present']) { $errors[] = 'Missing technical output priority rules.'; } if (!$checks['technical_priority_required_markers_present']) { $errors[] = 'Technical output priority no longer contains a required governance marker.'; } $accessoryKeywords = $this->promptConfig->getAccessoryRequestKeywords(); foreach ($this->governanceConfig->getRegressionProtectedAccessoryPromptKeywords() as $term) { $key = 'accessory_keyword_' . $this->guardrailCheckKey($term); $checks[$key] = in_array($term, $accessoryKeywords, true); if (!$checks[$key]) { $errors[] = 'Missing accessory prompt keyword: ' . $term; } } $searchRepairTerms = $this->searchRepairConfig->getSpecificityBoostTerms(); foreach ($this->governanceConfig->getRegressionProtectedSearchRepairSpecificityTerms() as $term) { $key = 'search_repair_specificity_' . $this->guardrailCheckKey($term); $checks[$key] = in_array($term, $searchRepairTerms, true); if (!$checks[$key]) { $errors[] = 'Missing search repair specificity term: ' . $term; } } $reagentWords = $this->retrieverConfig->looksLikeReagentWords(); foreach ($this->governanceConfig->getRegressionProtectedRetrievalReagentWords() as $term) { $key = 'retrieval_reagent_word_' . $this->guardrailCheckKey($term); $checks[$key] = in_array($term, $reagentWords, true); if (!$checks[$key]) { $errors[] = 'Missing retrieval reagent word: ' . $term . '.'; } } $deviceWords = $this->retrieverConfig->looksLikeDeviceWords(); foreach ($this->governanceConfig->getRegressionProtectedRetrievalDeviceWordGroups() as $groupKey => $terms) { $key = 'retrieval_device_word_' . $this->guardrailCheckKey((string) $groupKey); $checks[$key] = false; foreach ($terms as $term) { if (in_array($term, $deviceWords, true)) { $checks[$key] = true; break; } } if (!$checks[$key]) { $errors[] = 'Missing retrieval device word group: ' . (string) $groupKey . '.'; } } $shopPromptOriginalQuery = $this->governanceConfig->getRegressionShopPromptOriginalQuery(); $shopPrompt = $this->agentRunnerConfig->getShopPrompt($shopPromptOriginalQuery, ''); $checks['shop_prompt_contains_output_instruction'] = $this->containsAnyConfiguredMarker( $shopPrompt, $this->governanceConfig->getRegressionShopPromptRequiredOutputInstructionMarkers() ); $checks['shop_prompt_contains_original_query'] = str_contains($shopPrompt, $shopPromptOriginalQuery); if (!$checks['shop_prompt_contains_output_instruction']) { $errors[] = 'Shop query optimizer prompt no longer contains a required output instruction marker.'; } if (!$checks['shop_prompt_contains_original_query']) { $errors[] = 'Shop query optimizer prompt no longer contains the configured original query.'; } $metaOnlyTerms = $this->effectiveShopQueryMetaGuardTerms(); foreach ($this->governanceConfig->getRegressionShopQueryMetaGuardTerms() as $term) { $key = 'shop_query_meta_guard_term_' . $this->guardrailCheckKey($term); $checks[$key] = in_array($term, $metaOnlyTerms, true); if (!$checks[$key]) { $errors[] = 'Missing shop query meta guard term: ' . $term; } } $checks['shop_query_context_fallback_enabled'] = $this->agentRunnerConfig->isShopQueryContextFallbackEnabled(); if (!$checks['shop_query_context_fallback_enabled']) { $errors[] = 'Shop query context fallback is disabled.'; } $contextFallbackFilterTerms = $this->effectiveShopQueryContextFallbackFilterTerms(); foreach ($this->governanceConfig->getRegressionShopQueryContextFallbackFilterTerms() as $term) { $key = 'shop_query_context_fallback_filter_' . $this->guardrailCheckKey($term); $checks[$key] = in_array($term, $contextFallbackFilterTerms, true); if (!$checks[$key]) { $errors[] = 'Missing shop query context fallback filter term: ' . $term; } } $currentInputPreservationTerms = $this->effectiveShopQueryCurrentInputPreservationTerms(); $checks['shop_query_current_input_preservation_enabled'] = $this->agentRunnerConfig->isShopQueryCurrentInputPreservationEnabled(); if (!$checks['shop_query_current_input_preservation_enabled']) { $errors[] = 'Shop query current-input term preservation is disabled.'; } foreach ($this->governanceConfig->getRegressionShopQueryCurrentInputPreservationTerms() as $term) { $key = 'shop_query_current_input_preservation_' . $this->guardrailCheckKey($term); $checks[$key] = in_array($term, $currentInputPreservationTerms, true); if (!$checks[$key]) { $errors[] = 'Missing shop query current-input preservation term: ' . $term; } } $checks['shop_query_context_fallback_history_budget_positive'] = $this->agentRunnerConfig->getShopQueryContextFallbackHistoryBudgetChars() > 0; if (!$checks['shop_query_context_fallback_history_budget_positive']) { $errors[] = 'Shop query context fallback history budget must be greater than zero.'; } $checks['shop_query_context_fallback_full_history_enabled'] = $this->agentRunnerConfig->shouldUseFullHistoryForShopQueryContextFallback(); if (!$checks['shop_query_context_fallback_full_history_enabled']) { $errors[] = 'Shop query context fallback full-history fallback is disabled.'; } $checks['shop_query_context_fallback_question_limit_minimum'] = $this->agentRunnerConfig->getShopQueryContextFallbackQuestionLimit() >= 6; if (!$checks['shop_query_context_fallback_question_limit_minimum']) { $errors[] = 'Shop query context fallback question limit is too low for repeated meta follow-ups.'; } $checks['shop_query_context_fallback_max_terms_positive'] = $this->agentRunnerConfig->getShopQueryContextFallbackMaxTerms() > 0; if (!$checks['shop_query_context_fallback_max_terms_positive']) { $errors[] = 'Shop query context fallback max terms must be greater than zero.'; } $status = $errors === [] ? 'OK' : 'ERROR'; return [ 'status' => $status, 'checks' => $checks, 'errors' => $errors, 'warnings' => $warnings, ]; } /** @return string[] */ private function effectiveShopQueryMetaGuardTerms(): array { $profileName = $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile(); return $this->mergeUniqueStrings( $this->mergeUniqueStrings( $this->languageCleanupConfig->getStopWordsForProfile($profileName), $this->languageCleanupConfig->getPhrasesForProfile($profileName) ), $this->mergeUniqueStrings( $this->languageCleanupConfig->getMetaTermsForProfile($profileName), $this->agentRunnerConfig->getShopQueryMetaOnlyTerms() ) ); } /** @return string[] */ private function effectiveShopQueryContextFallbackFilterTerms(): array { $profileName = $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile(); return $this->mergeUniqueStrings( $this->effectiveShopQueryMetaGuardTerms(), $this->agentRunnerConfig->getShopQueryContextFallbackFilterTerms() ); } /** @return string[] */ private function effectiveShopQueryCurrentInputPreservationTerms(): array { return $this->mergeUniqueStrings( $this->languageCleanupConfig->getProtectedTerms(), $this->agentRunnerConfig->getShopQueryCurrentInputPreservationTerms() ); } /** * @param string[] $left * @param string[] $right * @return string[] */ private function mergeUniqueStrings(array $left, array $right): array { $out = []; foreach (array_merge($left, $right) as $item) { $item = trim((string) $item); if ($item === '' || isset($out[$item])) { continue; } $out[$item] = $item; } return array_values($out); } /** @param string[] $markers */ private function containsAnyConfiguredMarker(string $haystack, array $markers): bool { foreach ($markers as $marker) { if ($marker !== '' && str_contains($haystack, $marker)) { return true; } } return false; } private function guardrailCheckKey(string $term): string { $key = mb_strtolower($term, 'UTF-8'); $key = preg_replace('/[^\p{L}\p{N}]+/u', '_', $key) ?? $key; $key = trim($key, '_'); return $key !== '' ? $key : 'value'; } /** @return array */ private function genreConfig(): array { return [ 'id' => $this->genreConfig->getId(), 'label' => $this->genreConfig->getLabel(), 'mode' => $this->genreConfig->getMode(), 'description' => $this->genreConfig->getDescription(), 'adaptation_surface' => $this->genreConfig->getAdaptationSurface(), 'configuration_values' => $this->genreConfig->getConfigurationValues(), ]; } /** @return array */ private function runtimeConfig(): array { return [ 'root' => $this->param('retriex.root'), 'knowledge_root' => $this->param('retriex.knowledge.root'), 'index_ndjson' => $this->param('retriex.knowledge.ndjson'), 'index_meta' => $this->param('retriex.knowledge.index_meta'), 'runtime_meta' => $this->param('retriex.knowledge.runtime_meta'), 'upload_dir' => $this->param('retriex.knowledge.upload'), 'locks_dir' => $this->param('retriex.locks.dir'), ]; } /** @return array */ private function indexConfig(): array { try { $index = $this->indexProvider->getConfiguration(); return [ 'chunk_size' => $index->getChunkSize(), 'chunk_overlap' => $index->getChunkOverlap(), 'embedding_model' => $index->getEmbeddingModel(), 'embedding_dimension' => $index->getEmbeddingDimension(), 'scoring_version' => $index->getScoringVersion(), 'index_format' => $index->getIndexFormat(), 'vector_backend' => $index->getVectorBackend(), ]; } catch (\Throwable $e) { return [ 'error' => $e->getMessage(), 'fallback_chunk_size' => $this->param('retriex.index.chunk_size'), 'fallback_chunk_overlap' => $this->param('retriex.index.chunk_overlap'), 'fallback_embedding_model' => $this->param('retriex.index.embedding_model'), 'fallback_embedding_dimension' => $this->param('retriex.index.embedding_dimension'), 'fallback_scoring_version' => $this->param('retriex.index.scoring_version'), ]; } } /** @return array */ private function modelConfig(): array { try { $model = $this->modelProvider->getActiveForModel(); return [ 'model_name' => $model->getModelName(), 'version' => $model->getVersion(), 'active' => $model->isActive(), 'stream' => $model->isStream(), 'temperature' => $model->getTemperature(), 'top_k' => $model->getTopK(), 'top_p' => $model->getTopP(), 'repeat_penalty' => $model->getRepeatPenalty(), 'num_ctx' => $model->getNumCtx(), 'retrieval_max_chunks' => $model->getRetrievalMaxChunks(), 'retrieval_vector_top_k' => $model->getRetrievalVectorTopK(), ]; } catch (\Throwable $e) { return [ 'error' => $e->getMessage(), 'default_model_name' => $this->param('retriex.model.default_name'), 'default_num_ctx' => $this->param('retriex.model.default_num_ctx'), 'default_retrieval_max_chunks' => $this->param('retriex.model.default_retrieval_max_chunks'), 'default_retrieval_vector_top_k' => $this->param('retriex.model.default_retrieval_vector_top_k'), ]; } } /** @return array */ private function retrievalConfig(): array { return [ ...$this->retrieverConfig->toArray(), 'vocabulary' => $this->retrieverConfig->vocabularyToArray(), 'inventory_parameter' => $this->param('retriex.retrieval.inventory', []), ]; } /** @return array */ private function promptConfig(): array { return [ 'chars_per_token' => $this->promptConfig->getCharsPerToken(), 'history_padding_chars' => $this->promptConfig->getHistoryPaddingChars(), 'output_reserve_ratio' => $this->promptConfig->getOutputReserveRatio(), 'output_reserve_min_tokens' => $this->promptConfig->getOutputReserveMinTokens(), 'output_reserve_max_tokens' => $this->promptConfig->getOutputReserveMaxTokens(), 'safety_reserve_ratio' => $this->promptConfig->getSafetyReserveRatio(), 'safety_reserve_min_tokens' => $this->promptConfig->getSafetyReserveMinTokens(), 'safety_reserve_max_tokens' => $this->promptConfig->getSafetyReserveMaxTokens(), 'min_prompt_budget_tokens' => $this->promptConfig->getMinPromptBudgetTokens(), 'max_shop_results_in_prompt' => $this->promptConfig->getMaxShopResultsInPrompt(), 'detailed_shop_results_max_count' => $this->promptConfig->getDetailedShopResultsMaxCount(), 'technical_product_keyword_match_threshold' => $this->promptConfig->getTechnicalProductKeywordMatchThreshold(), 'labels' => [ 'system' => $this->promptConfig->getSystemSectionLabel(), 'user_question' => $this->promptConfig->getUserQuestionSectionLabel(), 'conversation_context' => $this->promptConfig->getConversationContextSectionLabel(), 'shop_search_query' => $this->promptConfig->getShopSearchQuerySectionLabel(), 'output_priority' => $this->promptConfig->getOutputPrioritySectionLabel(), 'response_format' => $this->promptConfig->getResponseFormatSectionLabel(), 'language_rules' => $this->promptConfig->getLanguageRulesSectionLabel(), 'fact_grounding_rules' => $this->promptConfig->getFactGroundingRulesSectionLabel(), 'retrieved_knowledge' => $this->promptConfig->getRetrievedKnowledgeSectionLabel(), 'url_content' => $this->promptConfig->getUrlContentSectionLabel(), ], 'rules' => [ 'conversation_context_intro_lines' => $this->promptConfig->getConversationContextIntroLines(), 'live_shop_results_header_lines' => $this->promptConfig->getLiveShopResultsHeaderLines(), 'output_priority' => $this->promptConfig->getOutputPriorityRules(), 'output_priority_technical' => $this->promptConfig->getOutputPriorityTechnicalRules(), 'response_format_base' => $this->promptConfig->getResponseFormatBaseRules(), 'response_format_with_shop' => $this->promptConfig->getResponseFormatWithShopRules(), 'response_format_without_shop' => $this->promptConfig->getResponseFormatWithoutShopRules(), 'response_format_technical' => $this->promptConfig->getResponseFormatTechnicalRules(), 'response_format_accessory' => $this->promptConfig->getResponseFormatAccessoryRules(), 'language' => $this->promptConfig->getLanguageRules(), 'fact_grounding_base' => $this->promptConfig->getFactGroundingBaseRules(), 'fact_grounding_with_shop' => $this->promptConfig->getFactGroundingWithShopRules(), 'fact_grounding_without_shop' => $this->promptConfig->getFactGroundingWithoutShopRules(), 'fact_grounding_technical' => $this->promptConfig->getFactGroundingTechnicalRules(), ], 'shop_fields' => [ 'product_number_label' => $this->promptConfig->getShopProductNumberLabel(), 'manufacturer_label' => $this->promptConfig->getShopManufacturerLabel(), 'price_label' => $this->promptConfig->getShopPriceLabel(), 'availability_label' => $this->promptConfig->getShopAvailabilityLabel(), 'availability_yes_label' => $this->promptConfig->getShopAvailabilityYesLabel(), 'availability_no_label' => $this->promptConfig->getShopAvailabilityNoLabel(), 'highlight_prefix' => $this->promptConfig->getShopHighlightPrefix(), 'url_label' => $this->promptConfig->getShopUrlLabel(), 'product_image_label' => $this->promptConfig->getShopProductImageLabel(), 'description_label' => $this->promptConfig->getShopDescriptionLabel(), 'meta_information_label' => $this->promptConfig->getShopMetaInformationLabel(), ], 'detection' => [ 'technical_product_keywords' => $this->promptConfig->getTechnicalProductKeywords(), 'accessory_request_keywords' => $this->promptConfig->getAccessoryRequestKeywords(), 'technical_product_model_pattern' => $this->promptConfig->getTechnicalProductModelPattern(), ], ]; } /** @return array */ private function chatMessagesConfig(): array { return $this->chatMessagesConfig->toArray(); } private function validateChatMessages(array &$errors): void { $validation = $this->chatMessagesConfig->validate(); foreach ($validation['errors'] as $error) { $errors[] = $error; } } /** @return array */ private function agentConfig(): array { return [ 'commerce_history_budget_chars' => $this->agentRunnerConfig->getCommerceHistoryBudgetChars(), 'product_search_knowledge_chunk_limit' => $this->agentRunnerConfig->getProductSearchKnowledgeChunkLimit(), 'advisory_product_search_knowledge_chunk_limit' => $this->agentRunnerConfig->getAdvisoryProductSearchKnowledgeChunkLimit(), 'optimized_shop_query_prefix_pattern' => $this->agentRunnerConfig->getOptimizedShopQueryPrefixPattern(), 'follow_up_context' => [ 'commercial_table_follow_up' => [ 'enabled' => $this->agentRunnerConfig->isCommercialTableFollowUpEnabled(), 'prompt_patterns' => $this->agentRunnerConfig->getCommercialTableFollowUpPromptPatterns(), 'history_anchor_patterns' => $this->agentRunnerConfig->getCommercialTableFollowUpHistoryAnchorPatterns(), 'table_terms' => $this->agentRunnerConfig->getCommercialTableFollowUpTableTerms(), 'commercial_terms' => $this->agentRunnerConfig->getCommercialTableFollowUpCommercialTerms(), 'indicator_marker_patterns' => $this->agentRunnerConfig->getCommercialTableFollowUpIndicatorMarkerPatterns(), 'query_template_with_model' => $this->agentRunnerConfig->getCommercialTableFollowUpQueryTemplateWithModel(), 'query_template_without_model' => $this->agentRunnerConfig->getCommercialTableFollowUpQueryTemplateWithoutModel(), ], ], 'input_normalization' => [ 'enabled' => $this->agentRunnerConfig->isInputNormalizationEnabled(), 'max_input_chars' => $this->agentRunnerConfig->getInputNormalizationMaxInputChars(), 'max_output_chars' => $this->agentRunnerConfig->getInputNormalizationMaxOutputChars(), 'max_added_tokens' => $this->agentRunnerConfig->getInputNormalizationMaxAddedTokens(), 'max_length_ratio_percent' => $this->agentRunnerConfig->getInputNormalizationMaxLengthRatioPercent(), 'heartbeat_message' => $this->agentRunnerConfig->getInputNormalizationHeartbeatMessage(), 'output_prefix_pattern' => $this->agentRunnerConfig->getInputNormalizationOutputPrefixPattern(), 'skip_patterns' => $this->agentRunnerConfig->getInputNormalizationSkipPatterns(), 'prompt' => [ 'intro' => $this->agentRunnerConfig->getInputNormalizationIntro(), 'rules' => $this->agentRunnerConfig->getInputNormalizationRules(), 'output_format_block' => $this->agentRunnerConfig->getInputNormalizationOutputFormatBlock(), 'current_user_input_label' => $this->agentRunnerConfig->getInputNormalizationCurrentUserInputLabel(), ], 'fuzzy_routing' => [ 'enabled' => $this->agentRunnerConfig->isInputNormalizationFuzzyRoutingEnabled(), 'min_token_length' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMinTokenLength(), 'medium_token_length' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMediumTokenLength(), 'long_token_length' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingLongTokenLength(), 'max_distance_short' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMaxDistanceShort(), 'max_distance_medium' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMaxDistanceMedium(), 'max_distance_long' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMaxDistanceLong(), 'min_similarity_percent' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMinSimilarityPercent(), 'terms' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingTerms(), ], ], 'messages' => [ 'empty_prompt' => $this->agentRunnerConfig->getEmptyPromptMessage(), 'analyze_request' => $this->agentRunnerConfig->getAnalyzeRequestMessage(), 'check_internet_sources' => $this->agentRunnerConfig->getCheckInternetSourcesMessage(), 'retrieve_knowledge' => $this->agentRunnerConfig->getRetrieveKnowledgeMessage(), 'optimize_search' => $this->agentRunnerConfig->getOptimizeSearchMessage(), 'no_concrete_shop_query' => $this->agentRunnerConfig->getNoConcreteShopQueryMessage(), 'fetch_search_data_template' => $this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), 'analyze_all_information' => $this->agentRunnerConfig->getAnalyzeAllInformationMessage(), 'thinking_while_streaming' => $this->agentRunnerConfig->getThinkingWhileStreamingMessage(), 'no_llm_data_received' => $this->agentRunnerConfig->getNoLlmDataReceivedMessage(), 'generic_internal_error' => $this->agentRunnerConfig->getGenericInternalErrorMessage(), 'debug_internal_error_prefix' => $this->agentRunnerConfig->getDebugInternalErrorPrefix(), ], 'final_answer_guard' => [ 'enabled' => $this->agentRunnerConfig->isFinalAnswerGuardEnabled(), 'max_output_chars' => $this->agentRunnerConfig->getFinalAnswerGuardMaxOutputChars(), 'truncation_message' => $this->agentRunnerConfig->getFinalAnswerGuardTruncationMessage(), 'repeated_line' => [ 'enabled' => $this->agentRunnerConfig->isFinalAnswerRepeatedLineGuardEnabled(), 'min_output_chars' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineMinOutputChars(), 'min_line_chars' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineMinLineChars(), 'max_line_repetitions' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineMaxRepetitions(), 'trailing_window_lines' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineTrailingWindowLines(), 'ignore_patterns' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineIgnorePatterns(), ], ], 'shop_runtime' => [ 'query_cleanup' => [ 'current_input_preservation' => [ 'enabled' => $this->agentRunnerConfig->isShopQueryCurrentInputPreservationEnabled(), 'terms' => $this->agentRunnerConfig->getShopQueryCurrentInputPreservationTerms(), ], 'stopword_cleanup' => [ 'enabled' => $this->agentRunnerConfig->isShopQueryStopwordCleanupEnabled(), 'min_query_tokens_after_cleanup' => $this->agentRunnerConfig->getShopQueryStopwordCleanupMinTokens(), 'terms' => $this->agentRunnerConfig->getShopQueryStopwordCleanupTerms(), ], 'positive_token_filter' => [ 'enabled' => $this->agentRunnerConfig->isShopQueryPositiveTokenFilterEnabled(), 'min_query_tokens_after_filter' => $this->agentRunnerConfig->getShopQueryPositiveTokenFilterMinTokens(), 'include_current_input_preservation_terms' => $this->agentRunnerConfig->shouldShopQueryPositiveTokenFilterIncludeCurrentInputPreservationTerms(), 'include_semantic_shop_search_tokens' => $this->agentRunnerConfig->shouldShopQueryPositiveTokenFilterIncludeSemanticShopSearchTokens(), 'include_product_role_terms' => $this->agentRunnerConfig->shouldShopQueryPositiveTokenFilterIncludeProductRoleTerms(), 'allowed_terms' => $this->agentRunnerConfig->getShopQueryPositiveTokenFilterAllowedTerms(), 'blocked_terms' => $this->agentRunnerConfig->getShopQueryPositiveTokenFilterBlockedTerms(), 'code_patterns' => $this->agentRunnerConfig->getShopQueryPositiveTokenFilterCodePatterns(), ], ], 'attribute_cleanup' => [ 'enabled' => $this->agentRunnerConfig->isShopQueryProductAttributeCleanupEnabled(), 'min_query_tokens_after_cleanup' => $this->agentRunnerConfig->getShopQueryProductAttributeCleanupMinTokens(), 'product_type_terms' => $this->agentRunnerConfig->getShopQueryProductAttributeCleanupProductTypeTerms(), 'stop_terms' => $this->agentRunnerConfig->getShopQueryProductAttributeCleanupStopTerms(), 'comparative_constraint_patterns' => $this->agentRunnerConfig->getShopQueryProductAttributeCleanupComparativeConstraintPatterns(), ], 'context_resolution' => [ 'context_usage' => [ 'referential_terms' => $this->agentRunnerConfig->getShopQueryContextUsageReferentialTerms(), ], 'history_anchor_enrichment' => [ 'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(), 'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(), 'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(), 'query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms(), 'query_noise_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms(), 'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(), 'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(), ], 'meta_query_guard' => [ 'enabled' => $this->agentRunnerConfig->isShopQueryMetaGuardEnabled(), 'cleanup_profile' => $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile(), 'context_fallback_use_full_history' => $this->agentRunnerConfig->shouldUseFullHistoryForShopQueryContextFallback(), 'meta_only_terms' => $this->agentRunnerConfig->getShopQueryMetaOnlyTerms(), 'context_fallback_enabled' => $this->agentRunnerConfig->isShopQueryContextFallbackEnabled(), 'context_fallback_question_limit' => $this->agentRunnerConfig->getShopQueryContextFallbackQuestionLimit(), 'context_fallback_history_budget_chars' => $this->agentRunnerConfig->getShopQueryContextFallbackHistoryBudgetChars(), 'context_fallback_max_terms' => $this->agentRunnerConfig->getShopQueryContextFallbackMaxTerms(), 'context_fallback_filter_terms' => $this->agentRunnerConfig->getShopQueryContextFallbackFilterTerms(), ], 'rag_anchor_enrichment' => [ 'enabled' => $this->agentRunnerConfig->isShopQueryRagAnchorEnrichmentEnabled(), 'min_score' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMinScore(), 'max_query_terms' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMaxQueryTerms(), 'early_chunk_bonus_max' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentEarlyChunkBonusMax(), 'template' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentTemplate(), 'scores' => [ 'exact_value_with_unit' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueUnitScore(), 'exact_value_only' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueScore(), 'anchor_bonus' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusScore(), ], 'numeric_focus_patterns' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentNumericFocusPatterns(), 'product_title_patterns' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentProductTitlePatterns(), 'anchor_bonus_patterns' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusPatterns(), 'subject_terms' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentSubjectTerms(), ], ], 'result_identity' => [ 'enabled' => $this->agentRunnerConfig->isDirectShopResultGuardEnabled(), 'prefer_primary_identity_matches' => $this->agentRunnerConfig->shouldPreferDirectShopResultGuardPrimaryIdentityMatches(), 'compound_prefix_match' => [ 'enabled' => $this->agentRunnerConfig->isDirectShopResultGuardCompoundPrefixMatchEnabled(), 'terms' => $this->agentRunnerConfig->getDirectShopResultGuardCompoundPrefixTerms(), ], 'primary_identity_repair' => [ 'enabled' => $this->agentRunnerConfig->isDirectShopResultGuardPrimaryIdentityRepairEnabled(), 'min_query_tokens_after_cleanup' => $this->agentRunnerConfig->getDirectShopResultGuardPrimaryIdentityRepairMinQueryTokens(), 'stop_terms' => $this->agentRunnerConfig->getDirectShopResultGuardPrimaryIdentityRepairStopTerms(), ], ], 'answer_constraints' => [ 'length_sort' => [ 'enabled' => $this->agentRunnerConfig->isShopResultLengthSortEnabled(), 'trigger_patterns' => $this->agentRunnerConfig->getShopResultLengthSortTriggerPatterns(), 'value_patterns' => $this->agentRunnerConfig->getShopResultLengthSortValuePatterns(), ], 'length_filter' => [ 'enabled' => $this->agentRunnerConfig->isShopResultLengthFilterEnabled(), 'min_patterns' => $this->agentRunnerConfig->getShopResultMinLengthFilterPatterns(), 'max_patterns' => $this->agentRunnerConfig->getShopResultMaxLengthFilterPatterns(), ], ], 'direct_answer' => [ 'enabled' => $this->agentRunnerConfig->isDirectShopResultAnswerEnabled(), 'max_results' => $this->agentRunnerConfig->getDirectShopResultAnswerMaxResults(), 'intro' => $this->agentRunnerConfig->getDirectShopResultAnswerIntro(), 'no_results' => $this->agentRunnerConfig->getDirectShopResultAnswerNoResultsMessage(), 'sorted_by_length_note' => $this->agentRunnerConfig->getDirectShopResultAnswerSortedByLengthNote(), 'min_length_filter_note' => $this->agentRunnerConfig->getDirectShopResultAnswerMinLengthFilterNote(), 'max_length_filter_note' => $this->agentRunnerConfig->getDirectShopResultAnswerMaxLengthFilterNote(), ], ], 'rag_evidence_guard' => [ 'cleanup_profile' => $this->agentRunnerConfig->getRagEvidenceCleanupProfile(), 'stop_terms' => $this->agentRunnerConfig->getRagEvidenceStopTerms(), 'synonyms' => $this->agentRunnerConfig->getRagEvidenceSynonyms(), 'aggregate_query_patterns' => $this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns(), 'aggregate_evidence_terms' => $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms(), 'aggregate_answer_evidence_patterns' => $this->agentRunnerConfig->getRagEvidenceAggregateAnswerEvidencePatterns(), ], 'source_labels' => [ 'external_url' => $this->agentRunnerConfig->getExternalUrlSourceLabel(), 'rag_knowledge' => $this->agentRunnerConfig->getRagKnowledgeSourceLabel(), 'conversation_history' => $this->agentRunnerConfig->getConversationHistorySourceLabel(), 'shop_system' => $this->agentRunnerConfig->getShopSystemSourceLabel(), 'extended_shop_search' => $this->agentRunnerConfig->getExtendedShopSearchSourceLabel(), 'used_sources_prefix' => $this->agentRunnerConfig->getUsedSourcesPrefix(), 'sources_prefix' => $this->agentRunnerConfig->getSourcesPrefix(), ], 'html_templates' => [ 'source_badge' => $this->agentRunnerConfig->getSourceBadgeHtmlTemplate(), 'error' => $this->agentRunnerConfig->getErrorHtmlTemplate(), 'think' => $this->agentRunnerConfig->getThinkHtmlTemplate(), 'info' => $this->agentRunnerConfig->getInfoHtmlTemplate(), 'debug' => $this->agentRunnerConfig->getDebugHtmlTemplate(), ], 'shop_query_optimizer' => [ 'rules' => $this->agentRunnerConfig->getShopPromptRules(), 'conversation_context_rules' => $this->agentRunnerConfig->getConversationContextRules(), 'intro' => $this->agentRunnerConfig->getShopPromptIntro(), 'output_format_block' => $this->agentRunnerConfig->getShopPromptOutputFormatBlock(), 'recent_conversation_context_label' => $this->agentRunnerConfig->getRecentConversationContextLabel(), 'current_user_input_label' => $this->agentRunnerConfig->getCurrentUserInputLabel(), 'language_preservation' => [ 'enabled' => $this->agentRunnerConfig->isShopQueryLanguagePreservationEnabled(), 'language_markers' => $this->agentRunnerConfig->getShopQueryLanguageMarkers(), 'translation_replacements_de' => $this->agentRunnerConfig->getShopQueryTranslationReplacements('de'), ], ], ]; } /** @return array */ private function vectorConfig(): array { return [ 'service_url' => $this->param('retriex.vector.service_url'), 'host' => $this->param('retriex.vector.host'), 'port' => $this->param('retriex.vector.port'), 'python_bin' => $this->param('retriex.vector.python_bin'), 'control_script' => $this->param('retriex.vector.control_script'), 'timeout' => $this->param('retriex.vector.timeout'), 'search' => [ 'min_score' => $this->param('retriex.vector.search.min_score'), 'max_limit' => $this->param('retriex.vector.search.max_limit'), 'http_timeout' => $this->param('retriex.vector.search.http_timeout'), ], 'tags' => [ 'min_score' => $this->param('retriex.vector.tags.min_score'), 'default_limit' => $this->param('retriex.vector.tags.default_limit'), 'max_limit' => $this->param('retriex.vector.tags.max_limit'), 'http_timeout' => $this->param('retriex.vector.tags.http_timeout'), ], 'tag_routing' => [ 'default_topk' => $this->param('retriex.vector.tag_routing.default_topk'), 'min_best_score' => $this->param('retriex.vector.tag_routing.min_best_score'), 'max_score_drop_from_best' => $this->param('retriex.vector.tag_routing.max_score_drop_from_best'), 'max_routing_tags' => $this->param('retriex.vector.tag_routing.max_routing_tags'), 'max_candidate_docs' => $this->param('retriex.vector.tag_routing.max_candidate_docs'), 'multi_tag_bonus_per_extra_tag' => $this->param('retriex.vector.tag_routing.multi_tag_bonus_per_extra_tag'), 'max_multi_tag_bonus' => $this->param('retriex.vector.tag_routing.max_multi_tag_bonus'), ], ]; } /** @return array */ private function commerceConfig(): array { return [ 'enabled' => $this->param('retriex.commerce.enabled'), 'max_shop_results' => $this->param('retriex.commerce.max_shop_results'), 'shop_timeout' => $this->param('retriex.commerce.shop_timeout'), 'store_api_base_url' => $this->param('retriex.commerce.store_api_base_url'), 'sales_channel_access_key_configured' => $this->param('retriex.commerce.sales_channel_access_key') !== '', 'search_repair' => [ 'enabled' => $this->param('retriex.commerce.search_repair.enabled'), 'max_queries' => $this->param('retriex.commerce.search_repair.max_queries'), 'min_primary_results_without_repair' => $this->param('retriex.commerce.search_repair.min_primary_results_without_repair'), ], ]; } /** @return array */ private function commerceQueryConfig(): array { return [ 'cleanup_profile' => $this->commerceQueryParserConfig->getCleanupProfile(), 'known_brands' => $this->commerceQueryParserConfig->getKnownBrands(), 'phrases_to_remove' => $this->commerceQueryParserConfig->getPhrasesToRemove(), 'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokens(), 'search_token_corrections' => $this->commerceQueryParserConfig->getSearchTokenCorrections(), 'search_token_canonical_map' => $this->commerceQueryParserConfig->getSearchTokenCanonicalMap(), 'semantic_shop_search_tokens' => $this->commerceQueryParserConfig->getSemanticShopSearchTokens(), 'normalization' => [ 'search' => $this->commerceQueryParserConfig->getNormalizationSearch(), 'replace' => $this->commerceQueryParserConfig->getNormalizationReplace(), ], 'text' => [ 'trim_characters_length' => strlen($this->commerceQueryParserConfig->getSearchTextTrimCharacters()), ], 'limits' => [ 'min_search_token_length' => $this->commerceQueryParserConfig->getMinSearchTokenLength(), 'min_direct_product_token_length' => $this->commerceQueryParserConfig->getMinDirectProductTokenLength(), 'direct_product_max_tokens' => $this->commerceQueryParserConfig->getDirectProductMaxTokens(), 'model_context_token_window' => $this->commerceQueryParserConfig->getModelContextTokenWindow(), 'min_meaningful_alpha_token_length' => $this->commerceQueryParserConfig->getMinMeaningfulAlphaTokenLength(), 'max_shop_search_tokens' => $this->commerceQueryParserConfig->getMaxShopSearchTokens(), ], 'patterns' => [ 'history_context' => $this->commerceQueryParserConfig->getHistoryContextPattern(), 'history_context_value' => $this->commerceQueryParserConfig->getHistoryContextValuePattern(), 'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokensPattern(), 'prompt_sanitize' => $this->commerceQueryParserConfig->getPromptSanitizePattern(), 'whitespace_collapse' => $this->commerceQueryParserConfig->getWhitespaceCollapsePattern(), 'whitespace_split' => $this->commerceQueryParserConfig->getWhitespaceSplitPattern(), 'history_question' => $this->commerceQueryParserConfig->getHistoryQuestionPattern(), 'price_between' => $this->commerceQueryParserConfig->getPriceBetweenPattern(), 'price_max' => $this->commerceQueryParserConfig->getPriceMaxPattern(), 'price_min' => $this->commerceQueryParserConfig->getPriceMinPattern(), 'direct_product_digit' => $this->commerceQueryParserConfig->getDirectProductDigitPattern(), 'model_like' => $this->commerceQueryParserConfig->getModelLikePattern(), 'accessory_like' => $this->commerceQueryParserConfig->getAccessoryLikePattern(), 'contains_digit' => $this->commerceQueryParserConfig->getContainsDigitPattern(), 'model_number_token' => $this->commerceQueryParserConfig->getModelNumberTokenPattern(), 'model_context_token' => $this->commerceQueryParserConfig->getModelContextTokenPattern(), 'model_suffix_token' => $this->commerceQueryParserConfig->getModelSuffixTokenPattern(), 'instruction_or_presentation_token' => $this->commerceQueryParserConfig->getInstructionOrPresentationTokenPattern(), 'measurement_value_token' => $this->commerceQueryParserConfig->getMeasurementValueTokenPattern(), ], ]; } /** @return array */ private function shopMatchingConfig(): array { return [ 'device_focus_keywords' => $this->shopServiceConfig->getDeviceFocusKeywords(), 'accessory_focus_keywords' => $this->shopServiceConfig->getAccessoryFocusKeywords(), 'accessory_focus_variant_map' => $this->shopServiceConfig->getAccessoryFocusVariantMap(), 'device_query_keywords' => $this->shopServiceConfig->getDeviceQueryKeywords(), 'accessory_query_keywords' => $this->shopServiceConfig->getAccessoryQueryKeywords(), 'accessory_product_keywords' => $this->shopServiceConfig->getAccessoryProductKeywords(), 'device_product_keywords' => $this->shopServiceConfig->getDeviceProductKeywords(), 'scores' => [ 'exact_product_number_phrase' => $this->shopServiceConfig->getExactProductNumberPhraseScore(), 'exact_product_name_phrase' => $this->shopServiceConfig->getExactProductNamePhraseScore(), 'exact_manufacturer_match' => $this->shopServiceConfig->getExactManufacturerMatchScore(), 'brand_contained_in_name' => $this->shopServiceConfig->getBrandContainedInNameScore(), 'name_token_overlap_weight' => $this->shopServiceConfig->getNameTokenOverlapWeight(), 'product_number_token_overlap_weight' => $this->shopServiceConfig->getProductNumberTokenOverlapWeight(), 'corpus_token_overlap_weight' => $this->shopServiceConfig->getCorpusTokenOverlapWeight(), 'name_number_overlap_weight' => $this->shopServiceConfig->getNameNumberOverlapWeight(), 'product_number_number_overlap_weight' => $this->shopServiceConfig->getProductNumberNumberOverlapWeight(), 'corpus_number_overlap_weight' => $this->shopServiceConfig->getCorpusNumberOverlapWeight(), 'size_match' => $this->shopServiceConfig->getSizeMatchScore(), 'availability_bonus' => $this->shopServiceConfig->getAvailabilityBonusScore(), 'device_query_device_product_bonus' => $this->shopServiceConfig->getDeviceQueryDeviceProductBonus(), 'device_query_accessory_penalty' => $this->shopServiceConfig->getDeviceQueryAccessoryPenalty(), 'accessory_query_accessory_product_bonus' => $this->shopServiceConfig->getAccessoryQueryAccessoryProductBonus(), 'accessory_query_device_product_bonus' => $this->shopServiceConfig->getAccessoryQueryDeviceProductBonus(), ], ]; } /** @return array */ private function searchRepairEffectiveConfig(): array { return [ 'enabled' => $this->searchRepairConfig->isEnabled(), 'max_repair_queries' => $this->searchRepairConfig->getMaxRepairQueries(), 'min_primary_results_without_repair' => $this->searchRepairConfig->getMinPrimaryResultsWithoutRepair(), 'strict_requested_accessory_code_repair' => $this->searchRepairConfig->shouldRestrictRequestedAccessoryCodeRepair(), 'prefer_prompt_anchored_model_for_requested_accessory_code' => $this->searchRepairConfig->shouldPreferPromptAnchoredModelForRequestedAccessoryCode(), 'direct_product_attribute_lookup' => [ 'enabled' => $this->searchRepairConfig->isDirectProductAttributeLookupRepairEnabled(), 'min_query_tokens_after_cleanup' => $this->searchRepairConfig->getDirectProductAttributeLookupMinTokens(), 'product_type_terms' => $this->searchRepairConfig->getDirectProductAttributeLookupProductTypeTerms(), 'stop_terms' => $this->searchRepairConfig->getDirectProductAttributeLookupStopTerms(), 'comparative_constraint_patterns' => $this->searchRepairConfig->getDirectProductAttributeLookupComparativeConstraintPatterns(), ], 'requested_accessory_code_fallback_query_templates' => $this->searchRepairConfig->getRequestedAccessoryCodeFallbackQueryTemplates(), 'requested_accessory_code_fallback_terms' => $this->searchRepairConfig->getRequestedAccessoryCodeFallbackTerms(), 'requested_accessory_code_context_prefix_terms' => $this->searchRepairConfig->getRequestedAccessoryCodeContextPrefixTerms(), 'requested_accessory_code_proximity_window' => $this->searchRepairConfig->getRequestedAccessoryCodeProximityWindow(), 'specific_model_candidate_patterns' => $this->searchRepairConfig->getSpecificModelCandidatePatterns(), 'model_candidate_exclude_terms' => $this->searchRepairConfig->getModelCandidateExcludeTerms(), 'generic_candidate_tokens' => $this->searchRepairConfig->getGenericCandidateTokens(), 'accessory_candidate_terms' => $this->searchRepairConfig->getAccessoryCandidateTerms(), 'accessory_or_bundle_terms' => $this->searchRepairConfig->getAccessoryOrBundleTerms(), 'specificity_boost_terms' => $this->searchRepairConfig->getSpecificityBoostTerms(), 'scores' => [ 'candidate_digit' => $this->searchRepairConfig->getCandidateDigitScore(), 'candidate_word_count_cap' => $this->searchRepairConfig->getCandidateWordCountCap(), 'specificity_boost' => $this->searchRepairConfig->getSpecificityBoostScore(), 'primary_query_overlap_threshold' => $this->searchRepairConfig->getPrimaryQueryOverlapThreshold(), 'prompt_match_weight' => $this->searchRepairConfig->getPromptMatchWeight(), 'primary_query_match_weight' => $this->searchRepairConfig->getPrimaryQueryMatchWeight(), 'repair_signal_match_weight' => $this->searchRepairConfig->getRepairSignalMatchWeight(), 'primary_result_order_bonus' => $this->searchRepairConfig->getPrimaryResultOrderBonus(), 'token_intersection_score' => $this->searchRepairConfig->getTokenIntersectionScore(), 'numeric_token_match_score' => $this->searchRepairConfig->getNumericTokenMatchScore(), ], 'patterns' => [ 'model_candidate' => $this->searchRepairConfig->getModelCandidatePattern(), 'accessory_candidate' => $this->searchRepairConfig->getAccessoryCandidatePattern(), 'requested_accessory_code' => $this->searchRepairConfig->getRequestedAccessoryCodePattern(), 'accessory_or_bundle' => $this->searchRepairConfig->getAccessoryOrBundlePattern(), 'model_like' => $this->searchRepairConfig->getModelLikePattern(), 'specificity_boost' => $this->searchRepairConfig->getSpecificityBoostPattern(), 'contains_digit' => $this->searchRepairConfig->getContainsDigitPattern(), 'whitespace_collapse' => $this->searchRepairConfig->getWhitespaceCollapsePattern(), 'tokenize_cleanup' => $this->searchRepairConfig->getTokenizeCleanupPattern(), ], ]; } /** @return array */ private function intentConfig(): array { return [ 'commerce' => [ 'strong_signals' => $this->commerceIntentConfig->getStrongSignalsList(), 'advisory_signals' => $this->commerceIntentConfig->getAdvisorySignals(), 'advisory_product_selection_patterns' => $this->commerceIntentConfig->getAdvisoryProductSelectionPatterns(), 'price_terms' => $this->commerceIntentConfig->getPriceTerms(), 'color_terms' => $this->commerceIntentConfig->getColorTerms(), 'size_token_terms' => $this->commerceIntentConfig->getSizeTokenTerms(), 'size_terms' => $this->commerceIntentConfig->getSizeTerms(), 'support_diagnostic_patterns' => $this->commerceIntentConfig->getSupportDiagnosticPatterns(), 'explicit_commerce_intent_patterns' => $this->commerceIntentConfig->getExplicitCommerceIntentPatterns(), 'patterns' => [ 'sku_like' => $this->commerceIntentConfig->getSkuLikePattern(), 'price_value' => $this->commerceIntentConfig->getPriceValuePattern(), 'size_extraction' => $this->commerceIntentConfig->getSizeExtractionPattern(), 'size_value' => $this->commerceIntentConfig->getSizeValuePattern(), 'size_token_value' => $this->commerceIntentConfig->getSizeTokenValuePattern(), 'color_value' => $this->commerceIntentConfig->getColorValuePattern(), 'model_like_product' => $this->commerceIntentConfig->getModelLikeProductPattern(), ], 'labels' => [ 'support_or_diagnostic_signal' => $this->commerceIntentConfig->getSupportOrDiagnosticSignalLabel(), 'sku_signal' => $this->commerceIntentConfig->getSkuSignalLabel(), 'price_signal' => $this->commerceIntentConfig->getPriceSignalLabel(), 'size_signal' => $this->commerceIntentConfig->getSizeSignalLabel(), 'size_token_signal' => $this->commerceIntentConfig->getSizeTokenSignalLabel(), 'color_signal' => $this->commerceIntentConfig->getColorSignalLabel(), 'advisory_signal_prefix' => $this->commerceIntentConfig->getAdvisorySignalPrefix(), 'advisory_product_selection_signal' => $this->commerceIntentConfig->getAdvisoryProductSelectionSignalLabel(), 'model_like_product_signal' => $this->commerceIntentConfig->getModelLikeProductSignalLabel(), ], 'thresholds' => [ 'product_search_min_score' => $this->commerceIntentConfig->getProductSearchMinScore(), 'advisory_product_search_min_score' => $this->commerceIntentConfig->getAdvisoryProductSearchMinScore(), 'strong_signal_score' => $this->commerceIntentConfig->getStrongSignalScore(), 'sku_signal_score' => $this->commerceIntentConfig->getSkuSignalScore(), 'price_signal_score' => $this->commerceIntentConfig->getPriceSignalScore(), 'size_signal_score' => $this->commerceIntentConfig->getSizeSignalScore(), 'size_token_signal_score' => $this->commerceIntentConfig->getSizeTokenSignalScore(), 'color_signal_score' => $this->commerceIntentConfig->getColorSignalScore(), 'advisory_signal_score' => $this->commerceIntentConfig->getAdvisorySignalScore(), 'advisory_product_selection_signal_score' => $this->commerceIntentConfig->getAdvisoryProductSelectionSignalScore(), 'model_like_product_signal_score' => $this->commerceIntentConfig->getModelLikeProductSignalScore(), ], ], 'light' => [ 'list_threshold' => $this->intentLightConfig->getListThreshold(), 'quantity_words' => $this->intentLightConfig->getQuantityWords(), 'strong_patterns' => $this->intentLightConfig->getStrongPatterns(), ], 'sales' => [ 'dominance_delta' => $this->salesIntentConfig->getDominanceDelta(), 'min_score_threshold' => $this->salesIntentConfig->getMinScoreThreshold(), 'sales_signals' => $this->salesIntentConfig->getSalesSignals(), 'comparison_signals' => $this->salesIntentConfig->getComparisonSignals(), 'objection_signals' => $this->salesIntentConfig->getObjectionSignals(), 'implementation_signals' => $this->salesIntentConfig->getImplementationSignals(), 'roi_signals' => $this->salesIntentConfig->getRoiSignals(), ], ]; } private function languageConfig(): array { $profiles = []; foreach ($this->languageCleanupConfig->getCleanupProfileNames() as $profileName) { $profiles[$profileName] = $this->languageCleanupConfig->getCleanupProfile($profileName); } return [ 'stopwords' => $this->stopWordsConfig->getStopWords(), 'protected_terms' => $this->languageCleanupConfig->getProtectedTerms(), 'cleanup_profile_names' => $this->languageCleanupConfig->getCleanupProfileNames(), 'cleanup_profiles' => $profiles, ]; } /** @return array */ private function queryEnrichmentConfig(): array { return [ 'max_expansions' => $this->queryEnricherConfig->getMaxExpansions(), 'has_rules' => $this->queryEnricherConfig->hasRules(), 'rules' => $this->queryEnricherConfig->getEnrichQueryList(), ]; } /** @return array */ private function catalogIntentConfig(): array { return [ 'min_score' => $this->catalogIntentConfig->getMinScore(), 'ambiguity_delta' => $this->catalogIntentConfig->getAmbiguityDelta(), 'intent_search_limit' => $this->catalogIntentConfig->getIntentSearchLimit(), 'list_search_limit' => $this->catalogIntentConfig->getListSearchLimit(), 'min_allowed_score' => $this->catalogIntentConfig->getMinAllowedScore(), 'max_allowed_score' => $this->catalogIntentConfig->getMaxAllowedScore(), ]; } /** @return array */ private function contextConfig(): array { return [ 'max_visible_regular_lines' => $this->contextServiceConfig->getMaxVisibleRegularLines(), 'max_full_lines' => $this->contextServiceConfig->getMaxFullLines(), ]; } /** * @param array $genre * @param array $effectiveConfig * @param list $errors * @param list $warnings */ private function validateGenre(array $genre, array $effectiveConfig, array &$errors, array &$warnings): void { if (trim((string) ($genre['id'] ?? '')) === '') { $errors[] = 'genre.id must not be empty.'; } if (trim((string) ($genre['mode'] ?? '')) === '') { $errors[] = 'genre.mode must not be empty.'; } $configurationValues = $genre['configuration_values'] ?? null; if (!is_array($configurationValues) || $configurationValues === []) { $errors[] = 'genre.configuration_values must be a non-empty map.'; return; } foreach ($configurationValues as $group => $valueDefinition) { if (!is_string($group) || trim($group) === '') { $errors[] = 'genre.configuration_values keys must be non-empty strings.'; continue; } if (!is_array($valueDefinition) || $valueDefinition === []) { $errors[] = sprintf('genre.configuration_values.%s must be a non-empty map.', $group); } } $configurationValuePaths = []; $this->flattenEffectiveConfigPath('configuration_values', $configurationValues, $configurationValuePaths); $surface = $genre['adaptation_surface'] ?? null; if (!is_array($surface) || $surface === []) { $errors[] = 'genre.adaptation_surface must be a non-empty map.'; return; } $flattened = []; $this->flattenGenreParameterPaths($flattened); $allowedReviewClassifications = [ 'legacy_compatibility_view', 'legacy_runtime_helper', 'technical_regex_template', 'legacy_signal_fallback', 'legacy_context_fallback', 'technical_cleanup_profile', 'technical_shop_mapping', 'runtime_resolved_connection', 'governance_guardrail', ]; $allowedReviewSourceStates = [ 'legacy_frozen_non_empty', 'legacy_runtime_resolved_allowed', ]; foreach ($effectiveConfig as $root => $value) { if ($root === 'genre') { continue; } $this->flattenEffectiveConfigPath((string) $root, $value, $flattened); } foreach ($surface as $group => $definition) { if (!is_string($group) || trim($group) === '') { $errors[] = 'genre.adaptation_surface keys must be non-empty strings.'; continue; } if (!is_array($definition)) { $errors[] = sprintf('genre.adaptation_surface.%s must be a map.', $group); continue; } if (!array_key_exists($group, $configurationValues)) { $warnings[] = sprintf('genre.configuration_values is missing value group for adaptation_surface.%s.', $group); } $valuePaths = $definition['value_paths'] ?? null; $legacyPaths = $definition['paths'] ?? null; if ((!is_array($valuePaths) || $valuePaths === []) && (!is_array($legacyPaths) || $legacyPaths === [])) { $errors[] = sprintf('genre.adaptation_surface.%s.value_paths must be a non-empty list.', $group); continue; } if (is_array($valuePaths)) { if ($valuePaths === []) { $errors[] = sprintf('genre.adaptation_surface.%s.value_paths must be a non-empty list when declared.', $group); } foreach ($valuePaths as $path) { if (!is_string($path) || trim($path) === '') { $errors[] = sprintf('genre.adaptation_surface.%s.value_paths must contain non-empty strings.', $group); continue; } $path = trim($path); if (!str_starts_with($path, 'configuration_values.' . $group . '.')) { $warnings[] = sprintf('genre.adaptation_surface.%s.value_paths should reference configuration_values.%s.*: %s.', $group, $group, $path); } if (!isset($configurationValuePaths[$path])) { $warnings[] = sprintf('genre.adaptation_surface.%s references unknown genre value path: %s.', $group, $path); } } } if (is_array($legacyPaths)) { if ($legacyPaths === []) { $errors[] = sprintf('genre.adaptation_surface.%s.paths must be a non-empty list when declared.', $group); } foreach ($legacyPaths as $path) { if (!is_string($path) || trim($path) === '') { $errors[] = sprintf('genre.adaptation_surface.%s.paths must contain non-empty strings.', $group); continue; } $path = trim($path); if (!isset($flattened[$path])) { $warnings[] = sprintf('genre.adaptation_surface.%s references unknown config path: %s.', $group, $path); } } } $reviewPaths = $definition['review_paths'] ?? null; if (array_key_exists('review_paths', $definition)) { if (!is_array($reviewPaths) || $reviewPaths === []) { $errors[] = sprintf('genre.adaptation_surface.%s.review_paths must be a non-empty list when declared.', $group); continue; } foreach ($reviewPaths as $path) { if (!is_string($path) || trim($path) === '') { $errors[] = sprintf('genre.adaptation_surface.%s.review_paths must contain non-empty strings.', $group); continue; } $path = trim($path); if (!isset($flattened[$path])) { $warnings[] = sprintf('genre.adaptation_surface.%s references unknown review path: %s.', $group, $path); } } } $reviewPathGroups = $definition['review_path_groups'] ?? null; if (array_key_exists('review_path_groups', $definition)) { if (!is_array($reviewPathGroups) || $reviewPathGroups === []) { $errors[] = sprintf('genre.adaptation_surface.%s.review_path_groups must be a non-empty map when declared.', $group); continue; } foreach ($reviewPathGroups as $reviewGroup => $reviewDefinition) { if (!is_string($reviewGroup) || trim($reviewGroup) === '') { $errors[] = sprintf('genre.adaptation_surface.%s.review_path_groups keys must be non-empty strings.', $group); continue; } if (!is_array($reviewDefinition)) { $errors[] = sprintf('genre.adaptation_surface.%s.review_path_groups.%s must be a map.', $group, $reviewGroup); continue; } $classification = $reviewDefinition['classification'] ?? null; if (!is_string($classification) || trim($classification) === '') { $errors[] = sprintf('genre.adaptation_surface.%s.review_path_groups.%s.classification must be a non-empty string.', $group, $reviewGroup); } elseif (!in_array(trim($classification), $allowedReviewClassifications, true)) { $errors[] = sprintf('genre.adaptation_surface.%s.review_path_groups.%s.classification has unsupported value: %s.', $group, $reviewGroup, trim($classification)); } $sourceState = $reviewDefinition['source_state'] ?? null; if (!is_string($sourceState) || trim($sourceState) === '') { $errors[] = sprintf('genre.adaptation_surface.%s.review_path_groups.%s.source_state must be a non-empty string.', $group, $reviewGroup); } elseif (!in_array(trim($sourceState), $allowedReviewSourceStates, true)) { $errors[] = sprintf('genre.adaptation_surface.%s.review_path_groups.%s.source_state has unsupported value: %s.', $group, $reviewGroup, trim($sourceState)); } $cleanupAction = $reviewDefinition['cleanup_action'] ?? null; if (!is_string($cleanupAction) || trim($cleanupAction) === '') { $errors[] = sprintf('genre.adaptation_surface.%s.review_path_groups.%s.cleanup_action must be a non-empty string.', $group, $reviewGroup); } $paths = $reviewDefinition['paths'] ?? null; if (!is_array($paths) || $paths === []) { $errors[] = sprintf('genre.adaptation_surface.%s.review_path_groups.%s.paths must be a non-empty list.', $group, $reviewGroup); continue; } foreach ($paths as $path) { if (!is_string($path) || trim($path) === '') { $errors[] = sprintf('genre.adaptation_surface.%s.review_path_groups.%s.paths must contain non-empty strings.', $group, $reviewGroup); continue; } $path = trim($path); if (!isset($flattened[$path])) { $warnings[] = sprintf('genre.adaptation_surface.%s.review_path_groups.%s references unknown review path: %s.', $group, $reviewGroup, $path); } } } } } $contextResolution = is_array($configurationValues['context_resolution'] ?? null) ? $configurationValues['context_resolution'] : []; $productListFollowUp = is_array($contextResolution['product_list_followup'] ?? null) ? $contextResolution['product_list_followup'] : []; if ($productListFollowUp !== []) { if (array_key_exists('enabled', $productListFollowUp) && !is_bool($productListFollowUp['enabled'])) { $errors[] = 'genre.configuration_values.context_resolution.product_list_followup.enabled must be boolean.'; } foreach ([ 'weak_query_max_terms', 'weak_query_max_residual_terms', 'max_anchors', ] as $intKey) { if (array_key_exists($intKey, $productListFollowUp) && (($this->asInt($productListFollowUp[$intKey]) ?? -1) < 0)) { $errors[] = sprintf('genre.configuration_values.context_resolution.product_list_followup.%s must be numeric and non-negative.', $intKey); } } if (array_key_exists('template', $productListFollowUp) && (!is_string($productListFollowUp['template']) || trim($productListFollowUp['template']) === '')) { $errors[] = 'genre.configuration_values.context_resolution.product_list_followup.template must be a non-empty string.'; } $this->validateStringList($this->toList($productListFollowUp['product_terms'] ?? []), 'genre.configuration_values.context_resolution.product_list_followup.product_terms', $errors, $warnings); $this->validateStringList($this->toList($productListFollowUp['shop_terms'] ?? []), 'genre.configuration_values.context_resolution.product_list_followup.shop_terms', $errors, $warnings); $this->validateStringList($this->toList($productListFollowUp['noise_terms'] ?? []), 'genre.configuration_values.context_resolution.product_list_followup.noise_terms', $errors, $warnings); $this->validateStringList($this->toList($productListFollowUp['canonical_family_terms'] ?? []), 'genre.configuration_values.context_resolution.product_list_followup.canonical_family_terms', $errors, $warnings); $this->validateRegexPatternList($productListFollowUp['anchor_patterns'] ?? [], 'genre.configuration_values.context_resolution.product_list_followup.anchor_patterns', $errors); $this->validateRegexPatternList($productListFollowUp['canonical_start_patterns'] ?? [], 'genre.configuration_values.context_resolution.product_list_followup.canonical_start_patterns', $errors); } $shopQueryRuntime = is_array($configurationValues['shop_query_runtime'] ?? null) ? $configurationValues['shop_query_runtime'] : []; $positiveTokenFilter = is_array($shopQueryRuntime['positive_token_filter'] ?? null) ? $shopQueryRuntime['positive_token_filter'] : []; if ($positiveTokenFilter !== []) { foreach ([ 'enabled', 'include_current_input_preservation_terms', 'include_semantic_shop_search_tokens', 'include_product_role_terms', ] as $boolKey) { if (array_key_exists($boolKey, $positiveTokenFilter) && !is_bool($positiveTokenFilter[$boolKey])) { $errors[] = sprintf('genre.configuration_values.shop_query_runtime.positive_token_filter.%s must be boolean.', $boolKey); } } if (array_key_exists('min_query_tokens_after_filter', $positiveTokenFilter) && !is_numeric($positiveTokenFilter['min_query_tokens_after_filter'])) { $errors[] = 'genre.configuration_values.shop_query_runtime.positive_token_filter.min_query_tokens_after_filter must be numeric.'; } $this->validateStringList($this->toList($positiveTokenFilter['allowed_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.positive_token_filter.allowed_terms', $errors, $warnings); $this->validateStringList($this->toList($positiveTokenFilter['blocked_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.positive_token_filter.blocked_terms', $errors, $warnings); $this->validateStringList($this->toList($positiveTokenFilter['adjacent_variant_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.positive_token_filter.adjacent_variant_terms', $errors, $warnings); $this->validateRegexPatternList($positiveTokenFilter['code_patterns'] ?? [], 'genre.configuration_values.shop_query_runtime.positive_token_filter.code_patterns', $errors); $this->validateRegexPatternList($positiveTokenFilter['adjacent_variant_patterns'] ?? [], 'genre.configuration_values.shop_query_runtime.positive_token_filter.adjacent_variant_patterns', $errors); } $genericDeviceAnchor = is_array($shopQueryRuntime['generic_device_anchor'] ?? null) ? $shopQueryRuntime['generic_device_anchor'] : []; if ($genericDeviceAnchor !== []) { foreach (['enabled', 'remove_generic_device_terms'] as $boolKey) { if (array_key_exists($boolKey, $genericDeviceAnchor) && !is_bool($genericDeviceAnchor[$boolKey])) { $errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.%s must be boolean.', $boolKey); } } if (array_key_exists('template', $genericDeviceAnchor) && (!is_string($genericDeviceAnchor['template']) || trim($genericDeviceAnchor['template']) === '')) { $errors[] = 'genre.configuration_values.shop_query_runtime.generic_device_anchor.template must be a non-empty string.'; } $this->validateStringList($this->toList($genericDeviceAnchor['trigger_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.generic_device_anchor.trigger_terms', $errors, $warnings); $this->validateStringList($this->toList($genericDeviceAnchor['suppress_if_terms'] ?? []), 'genre.configuration_values.shop_query_runtime.generic_device_anchor.suppress_if_terms', $errors, $warnings); $anchorRules = $genericDeviceAnchor['anchor_rules'] ?? []; if ($anchorRules !== [] && !is_array($anchorRules)) { $errors[] = 'genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules must be a list.'; } elseif (is_array($anchorRules)) { foreach ($anchorRules as $index => $rule) { if (!is_array($rule)) { $errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s must be a map.', (string) $index); continue; } if (!is_string($rule['anchor'] ?? null) || trim((string) ($rule['anchor'] ?? '')) === '') { $errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.anchor must be a non-empty string.', (string) $index); } if (array_key_exists('template', $rule) && (!is_string($rule['template']) || trim((string) $rule['template']) === '')) { $errors[] = sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.template must be a non-empty string when configured.', (string) $index); } $this->validateStringList( $this->toList($rule['match_terms'] ?? []), sprintf('genre.configuration_values.shop_query_runtime.generic_device_anchor.anchor_rules.%s.match_terms', (string) $index), $errors, $warnings ); } } } foreach ($this->collectGenreConfigurationValueSourcePaths($configurationValues) as $valuePath => $sourcePaths) { foreach ($sourcePaths as $sourcePath) { if (!isset($flattened[$sourcePath])) { $warnings[] = sprintf( 'genre.configuration_values.%s references unknown source path: %s.', $valuePath, $sourcePath ); } } } foreach (array_keys($surface) as $group) { if (!is_string($group) || $group === '') { continue; } if (!array_key_exists($group, $configurationValues)) { $warnings[] = sprintf('genre.configuration_values is missing value group for adaptation_surface.%s.', $group); } } } /** * @param array $configurationValues * @return array */ private function collectGenreConfigurationValueSourcePaths(array $configurationValues): array { $out = []; $this->collectGenreSourcePathsRecursive($configurationValues, '', $out); return $out; } /** * @param array $value * @param array $out */ private function collectGenreSourcePathsRecursive(array $value, string $path, array &$out): void { $sourcePaths = $value['source_paths'] ?? null; if (is_array($sourcePaths)) { $clean = []; foreach ($sourcePaths as $sourcePath) { if (!is_string($sourcePath) || trim($sourcePath) === '') { continue; } $sourcePath = trim($sourcePath); if (!in_array($sourcePath, $clean, true)) { $clean[] = $sourcePath; } } if ($clean !== [] && $path !== '') { $out[$path] = $clean; } } foreach ($value as $key => $child) { if ($key === 'source_paths' || !is_string($key) || !is_array($child)) { continue; } $childPath = $path === '' ? $key : $path . '.' . $key; $this->collectGenreSourcePathsRecursive($child, $childPath, $out); } } /** * @param array $paths */ private function flattenGenreParameterPaths(array &$paths): void { $configRoots = [ 'retriex.agent.config' => 'agent', 'retriex.chat_messages.config' => 'chat_messages', 'retriex.commerce_query.config' => 'commerce_query', 'retriex.governance.config' => 'governance', 'retriex.intent.commerce.config' => 'intent.commerce', 'retriex.intent.light.config' => 'intent.light', 'retriex.intent.sales.config' => 'intent.sales', 'retriex.prompt.config' => 'prompt', 'retriex.query_enrichment.config' => 'query_enrichment', 'retriex.retrieval.config' => 'retrieval', 'retriex.search_repair.config' => 'search_repair', 'retriex.shop_matching.config' => 'shop_matching', 'retriex.stopwords.config' => 'stopwords', 'retriex.vocabulary.config' => 'vocabulary', ]; $allParameters = $this->parameters->all(); foreach ($configRoots as $parameterName => $rootPath) { if (!array_key_exists($parameterName, $allParameters)) { continue; } $this->flattenEffectiveConfigPath($rootPath, $allParameters[$parameterName], $paths); } foreach ($allParameters as $parameterName => $value) { if (!is_string($parameterName) || !str_starts_with($parameterName, 'retriex.')) { continue; } foreach (array_keys($configRoots) as $configRoot) { if ($parameterName === $configRoot || str_starts_with($parameterName, $configRoot . '.')) { continue 2; } } $this->flattenEffectiveConfigPath(substr($parameterName, strlen('retriex.')), $value, $paths); } } /** * @param array $paths */ private function flattenEffectiveConfigPath(string $path, mixed $value, array &$paths): void { $paths[$path] = true; if (!is_array($value)) { return; } foreach ($value as $key => $child) { if (!is_string($key) && !is_int($key)) { continue; } if (is_int($key)) { continue; } $this->flattenEffectiveConfigPath($path . '.' . $key, $child, $paths); } } /** * @param array $governance * @param list $errors * @param list $warnings */ private function validateGovernance(array $governance, array &$errors, array &$warnings): void { if ($governance === []) { $errors[] = 'governance config must not be empty.'; return; } try { $this->governanceConfig->getRegressionProtectedShortModelTokens(); $this->governanceConfig->getRegressionProtectedMeasurementValues(); $this->governanceConfig->getRegressionProtectedTechnicalPromptKeywords(); $this->governanceConfig->getRegressionTechnicalPriorityRequiredMarkers(); $this->governanceConfig->getRegressionProtectedAccessoryPromptKeywords(); $this->governanceConfig->getRegressionProtectedSearchRepairSpecificityTerms(); $this->governanceConfig->getRegressionProtectedRetrievalReagentWords(); $this->governanceConfig->getRegressionProtectedRetrievalDeviceWordGroups(); $this->governanceConfig->getRegressionShopPromptOriginalQuery(); $this->governanceConfig->getRegressionShopPromptRequiredOutputInstructionMarkers(); $this->governanceConfig->getRegressionShopQueryMetaGuardTerms(); $this->governanceConfig->getRegressionShopQueryContextFallbackFilterTerms(); $this->governanceConfig->getVocabularyProtectedShortModelTokens(); $this->governanceConfig->getLanguageProtectedStopwordTerms(); $this->governanceConfig->getLanguageRequiredCleanupProfiles(); $this->governanceConfig->getLanguageRequiredProfileTerms(); $this->governanceConfig->getCorePatternAuditSourceRoots(); $this->governanceConfig->getCorePatternAuditExcludedPathPrefixes(); $this->governanceConfig->getCorePatternAuditExcludedPathPatterns(); $this->governanceConfig->getCorePatternAuditWarningPathPrefixes(); $this->governanceConfig->getCorePatternAuditSuspiciousCalls(); $this->governanceConfig->getCorePatternAuditDomainMarkerTerms(); $this->governanceConfig->getCorePatternAuditAllowedLiteralPatterns(); $this->governanceConfig->getCorePatternAuditMaxSnippetLength(); } catch (\InvalidArgumentException $e) { $errors[] = $e->getMessage(); } } /** * @param array $runtime * @param list $errors * @param list $warnings */ private function validateRuntime(array $runtime, array &$errors, array &$warnings): void { foreach (['root', 'knowledge_root', 'index_ndjson', 'index_meta', 'upload_dir'] as $key) { if (trim((string) ($runtime[$key] ?? '')) === '') { $errors[] = 'runtime.' . $key . ' must not be empty.'; } } } /** * @param array $index * @param list $errors * @param list $warnings */ private function validateIndex(array $index, array &$errors, array &$warnings): void { if (isset($index['error'])) { $warnings[] = 'index configuration could not be loaded from DB/provider: ' . (string) $index['error']; return; } $chunkSize = $this->asInt($index['chunk_size'] ?? null); $chunkOverlap = $this->asInt($index['chunk_overlap'] ?? null); if ($chunkSize === null || $chunkSize <= 0) { $errors[] = 'index.chunk_size must be greater than 0.'; } if ($chunkOverlap === null || $chunkOverlap < 0) { $errors[] = 'index.chunk_overlap must be greater than or equal to 0.'; } if ($chunkSize !== null && $chunkOverlap !== null && $chunkOverlap >= $chunkSize) { $errors[] = 'index.chunk_overlap must be smaller than index.chunk_size.'; } if (trim((string) ($index['embedding_model'] ?? '')) === '') { $errors[] = 'index.embedding_model must not be empty.'; } if (($this->asInt($index['embedding_dimension'] ?? null) ?? 0) <= 0) { $errors[] = 'index.embedding_dimension must be greater than 0.'; } if (($this->asInt($index['scoring_version'] ?? null) ?? 0) <= 0) { $errors[] = 'index.scoring_version must be greater than 0.'; } } /** * @param array $model * @param list $errors * @param list $warnings */ private function validateModel(array $model, array &$errors, array &$warnings): void { if (isset($model['error'])) { $warnings[] = 'model configuration could not be loaded from DB/provider: ' . (string) $model['error']; return; } if (trim((string) ($model['model_name'] ?? '')) === '') { $errors[] = 'model_generation.model_name must not be empty.'; } if (($this->asInt($model['num_ctx'] ?? null) ?? 0) < 512) { $errors[] = 'model_generation.num_ctx must be at least 512.'; } if (($this->asInt($model['retrieval_max_chunks'] ?? null) ?? 0) < 1) { $errors[] = 'model_generation.retrieval_max_chunks must be greater than 0.'; } if (($this->asInt($model['retrieval_vector_top_k'] ?? null) ?? 0) < 1) { $errors[] = 'model_generation.retrieval_vector_top_k must be greater than 0.'; } } /** * @param array $retrieval * @param list $errors * @param list $warnings */ private function validateRetrieval(array $retrieval, array &$errors, array &$warnings): void { $floor = (float) ($retrieval['threshold_floor'] ?? 0.0); $threshold = (float) ($retrieval['vector_score_threshold'] ?? 0.0); $ceil = (float) ($retrieval['threshold_ceil'] ?? 1.0); if ($floor > $threshold || $threshold > $ceil) { $errors[] = 'retrieval threshold must satisfy threshold_floor <= vector_score_threshold <= threshold_ceil.'; } if ((int) ($retrieval['hard_max_chunks'] ?? 0) < 1) { $errors[] = 'retrieval.hard_max_chunks must be greater than 0.'; } $cleanupProfile = $retrieval['generic_exact_selection_cleanup_profile'] ?? null; if (!is_string($cleanupProfile) || trim($cleanupProfile) === '') { $errors[] = 'retrieval.generic_exact_selection_cleanup_profile must be a non-empty string.'; } elseif (!in_array(trim($cleanupProfile), $this->languageCleanupConfig->getCleanupProfileNames(), true)) { $errors[] = 'retrieval.generic_exact_selection_cleanup_profile references unknown language cleanup profile: ' . trim($cleanupProfile) . '.'; } $queryCleanupProfile = $retrieval['query_cleanup_profile'] ?? null; if (!is_string($queryCleanupProfile) || trim($queryCleanupProfile) === '') { $errors[] = 'retrieval.query_cleanup_profile must be a non-empty string.'; } elseif (!in_array(trim($queryCleanupProfile), $this->languageCleanupConfig->getCleanupProfileNames(), true)) { $errors[] = 'retrieval.query_cleanup_profile references unknown language cleanup profile: ' . trim($queryCleanupProfile) . '.'; } $this->validateStringListMap($retrieval['vocabulary'] ?? [], 'retrieval.vocabulary', $errors, $warnings); $inventory = $retrieval['inventory_parameter'] ?? []; if (is_array($inventory)) { foreach ($inventory as $key => $value) { $key = (string) $key; if (!$this->shouldCompareRetrievalInventoryKey($key, $retrieval)) { continue; } if (array_key_exists($key, $retrieval) && $retrieval[$key] != $value) { $warnings[] = 'retrieval.inventory.' . $key . ' differs from active retriever config.'; } } } } /** * Retrieval vocabulary lists can be resolved from dedicated vocabulary views. * The backwards-compatible inventory parameter may still contain raw legacy * list values for those keys, so comparing it against the active retriever * facade would produce false-positive validation warnings. * * @param array $retrieval */ private function shouldCompareRetrievalInventoryKey(string $key, array $retrieval): bool { if (in_array($key, $this->retrievalVocabularyBackedInventoryKeys(), true)) { return false; } if (in_array($key, $this->retrievalGenreBackedInventoryKeys(), true)) { return false; } $vocabulary = $retrieval['vocabulary'] ?? []; return !is_array($vocabulary) || !array_key_exists($key, $vocabulary); } /** @return string[] */ private function retrievalGenreBackedInventoryKeys(): array { $exactSelection = $this->genreConfig->getValueArray('retrieval_and_language.exact_selection'); if ($exactSelection === []) { return []; } $out = []; foreach ($exactSelection as $key => $value) { if (!is_string($key) || in_array($key, ['origin', 'description', 'source_paths'], true)) { continue; } if (is_array($value) && $value !== []) { $out[] = 'exact_selection_' . $key; } } return $out; } /** @return string[] */ private function retrievalVocabularyBackedInventoryKeys(): array { return [ 'generic_product_tokens', 'important_short_model_tokens', 'family_descriptor_tokens', 'looks_like_reagent_tokens', 'looks_like_safety_docs', 'looks_like_reagent_words', 'looks_like_document_words', 'looks_like_safety_words', 'looks_like_device_words', ]; } /** * @param array $prompt * @param list $errors * @param list $warnings */ private function validatePrompt(array $prompt, array &$errors, array &$warnings): void { if ((int) ($prompt['chars_per_token'] ?? 0) < 1) { $errors[] = 'prompt.chars_per_token must be greater than 0.'; } if ((float) ($prompt['output_reserve_ratio'] ?? -1) < 0.0 || (float) ($prompt['output_reserve_ratio'] ?? 2) > 1.0) { $errors[] = 'prompt.output_reserve_ratio must be between 0 and 1.'; } if ((float) ($prompt['safety_reserve_ratio'] ?? -1) < 0.0 || (float) ($prompt['safety_reserve_ratio'] ?? 2) > 1.0) { $errors[] = 'prompt.safety_reserve_ratio must be between 0 and 1.'; } $this->validateStringListMap($prompt['rules'] ?? [], 'prompt.rules', $errors, $warnings); $this->validateStringListMap($prompt['detection'] ?? [], 'prompt.detection', $errors, $warnings); $this->validateRegexPattern($prompt['detection']['technical_product_model_pattern'] ?? null, 'prompt.detection.technical_product_model_pattern', $errors); } /** * @param array $agent * @param list $errors * @param list $warnings */ private function validateAgent(array $agent, array &$errors, array &$warnings): void { $this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings); $this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings); $this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings); $followUpContext = is_array($agent['follow_up_context'] ?? null) ? $agent['follow_up_context'] : []; $commercialTableFollowUp = is_array($followUpContext['commercial_table_follow_up'] ?? null) ? $followUpContext['commercial_table_follow_up'] : []; $this->validateRegexPatternList($commercialTableFollowUp['prompt_patterns'] ?? [], 'agent.follow_up_context.commercial_table_follow_up.prompt_patterns', $errors); $this->validateRegexPatternList($commercialTableFollowUp['history_anchor_patterns'] ?? [], 'agent.follow_up_context.commercial_table_follow_up.history_anchor_patterns', $errors); $this->validateStringList($this->toList($commercialTableFollowUp['table_terms'] ?? []), 'agent.follow_up_context.commercial_table_follow_up.table_terms', $errors, $warnings); $this->validateStringList($this->toList($commercialTableFollowUp['commercial_terms'] ?? []), 'agent.follow_up_context.commercial_table_follow_up.commercial_terms', $errors, $warnings); $this->validateRegexPatternList($commercialTableFollowUp['indicator_marker_patterns'] ?? [], 'agent.follow_up_context.commercial_table_follow_up.indicator_marker_patterns', $errors); if (trim((string) ($commercialTableFollowUp['query_template_with_model'] ?? '')) === '') { $errors[] = 'agent.follow_up_context.commercial_table_follow_up.query_template_with_model must not be empty.'; } if (trim((string) ($commercialTableFollowUp['query_template_without_model'] ?? '')) === '') { $errors[] = 'agent.follow_up_context.commercial_table_follow_up.query_template_without_model must not be empty.'; } $ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : []; $ragEvidenceCleanupProfile = $ragEvidence['cleanup_profile'] ?? null; if (!is_string($ragEvidenceCleanupProfile) || trim($ragEvidenceCleanupProfile) === '') { $errors[] = 'agent.rag_evidence_guard.cleanup_profile must be a non-empty string.'; } elseif (!in_array($ragEvidenceCleanupProfile, $this->languageCleanupConfig->getCleanupProfileNames(), true)) { $errors[] = 'agent.rag_evidence_guard.cleanup_profile references unknown language cleanup profile: ' . $ragEvidenceCleanupProfile . '.'; } $this->validateStringList($this->toList($ragEvidence['stop_terms'] ?? []), 'agent.rag_evidence_guard.stop_terms', $errors, $warnings); $this->validateStringListMap($ragEvidence['synonyms'] ?? [], 'agent.rag_evidence_guard.synonyms', $errors, $warnings); $this->validateRegexPatternList($ragEvidence['aggregate_query_patterns'] ?? [], 'agent.rag_evidence_guard.aggregate_query_patterns', $errors); $this->validateStringList($this->toList($ragEvidence['aggregate_evidence_terms'] ?? []), 'agent.rag_evidence_guard.aggregate_evidence_terms', $errors, $warnings); $this->validateRegexPatternList($ragEvidence['aggregate_answer_evidence_patterns'] ?? [], 'agent.rag_evidence_guard.aggregate_answer_evidence_patterns', $errors); $shopRuntime = is_array($agent['shop_runtime'] ?? null) ? $agent['shop_runtime'] : []; $queryCleanup = is_array($shopRuntime['query_cleanup'] ?? null) ? $shopRuntime['query_cleanup'] : []; $contextResolution = is_array($shopRuntime['context_resolution'] ?? null) ? $shopRuntime['context_resolution'] : []; $metaQueryGuard = is_array($contextResolution['meta_query_guard'] ?? null) ? $contextResolution['meta_query_guard'] : []; $shopContextCleanupProfile = $metaQueryGuard['cleanup_profile'] ?? null; if (!is_string($shopContextCleanupProfile) || trim($shopContextCleanupProfile) === '') { $shopContextCleanupProfile = $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile(); } else { $shopContextCleanupProfile = trim($shopContextCleanupProfile); } if (!in_array($shopContextCleanupProfile, $this->languageCleanupConfig->getCleanupProfileNames(), true)) { $errors[] = 'agent.shop_runtime.context_resolution.meta_query_guard.cleanup_profile references unknown language cleanup profile: ' . $shopContextCleanupProfile . '.'; } $currentInputPreservation = is_array($queryCleanup['current_input_preservation'] ?? null) ? $queryCleanup['current_input_preservation'] : []; if (array_key_exists('enabled', $currentInputPreservation) && !is_bool($currentInputPreservation['enabled'])) { $errors[] = 'agent.shop_runtime.query_cleanup.current_input_preservation.enabled must be boolean.'; } $this->validateStringList( $this->toList($currentInputPreservation['terms'] ?? []), 'agent.shop_runtime.query_cleanup.current_input_preservation.terms', $errors, $warnings ); $this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings); $this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors); $normalization = is_array($agent['input_normalization'] ?? null) ? $agent['input_normalization'] : []; $normalizationPrompt = is_array($normalization['prompt'] ?? null) ? $normalization['prompt'] : []; if (($this->asInt($normalization['max_input_chars'] ?? null) ?? 0) < 1) { $errors[] = 'agent.input_normalization.max_input_chars must be greater than 0.'; } if (($this->asInt($normalization['max_output_chars'] ?? null) ?? 0) < 1) { $errors[] = 'agent.input_normalization.max_output_chars must be greater than 0.'; } if (($this->asInt($normalization['max_added_tokens'] ?? null) ?? -1) < 0) { $errors[] = 'agent.input_normalization.max_added_tokens must be greater than or equal to 0.'; } if (($this->asInt($normalization['max_length_ratio_percent'] ?? null) ?? 0) < 100) { $errors[] = 'agent.input_normalization.max_length_ratio_percent must be at least 100.'; } $this->validateRegexPattern($normalization['output_prefix_pattern'] ?? null, 'agent.input_normalization.output_prefix_pattern', $errors); $this->validateRegexPatternList($normalization['skip_patterns'] ?? [], 'agent.input_normalization.skip_patterns', $errors); $this->validateStringList($this->toList($normalizationPrompt['rules'] ?? []), 'agent.input_normalization.prompt.rules', $errors, $warnings); $fuzzyRouting = is_array($normalization['fuzzy_routing'] ?? null) ? $normalization['fuzzy_routing'] : []; if (($this->asInt($fuzzyRouting['min_token_length'] ?? null) ?? 0) < 1) { $errors[] = 'agent.input_normalization.fuzzy_routing.min_token_length must be greater than 0.'; } if (($this->asInt($fuzzyRouting['medium_token_length'] ?? null) ?? 0) < 1) { $errors[] = 'agent.input_normalization.fuzzy_routing.medium_token_length must be greater than 0.'; } if (($this->asInt($fuzzyRouting['long_token_length'] ?? null) ?? 0) < 1) { $errors[] = 'agent.input_normalization.fuzzy_routing.long_token_length must be greater than 0.'; } if (($this->asInt($fuzzyRouting['max_distance_short'] ?? null) ?? -1) < 0) { $errors[] = 'agent.input_normalization.fuzzy_routing.max_distance_short must be greater than or equal to 0.'; } if (($this->asInt($fuzzyRouting['max_distance_medium'] ?? null) ?? -1) < 0) { $errors[] = 'agent.input_normalization.fuzzy_routing.max_distance_medium must be greater than or equal to 0.'; } if (($this->asInt($fuzzyRouting['max_distance_long'] ?? null) ?? -1) < 0) { $errors[] = 'agent.input_normalization.fuzzy_routing.max_distance_long must be greater than or equal to 0.'; } $minSimilarityPercent = $this->asInt($fuzzyRouting['min_similarity_percent'] ?? null) ?? 0; if ($minSimilarityPercent < 1 || $minSimilarityPercent > 100) { $errors[] = 'agent.input_normalization.fuzzy_routing.min_similarity_percent must be between 1 and 100.'; } $this->validateStringList($this->toList($fuzzyRouting['terms'] ?? []), 'agent.input_normalization.fuzzy_routing.terms', $errors, $warnings); if (trim((string) ($normalizationPrompt['intro'] ?? '')) === '') { $errors[] = 'agent.input_normalization.prompt.intro must not be empty.'; } if (trim((string) ($normalizationPrompt['output_format_block'] ?? '')) === '') { $errors[] = 'agent.input_normalization.prompt.output_format_block must not be empty.'; } if (trim((string) ($normalizationPrompt['current_user_input_label'] ?? '')) === '') { $errors[] = 'agent.input_normalization.prompt.current_user_input_label must not be empty.'; } $shopRuntime = is_array($agent['shop_runtime'] ?? null) ? $agent['shop_runtime'] : []; $contextResolution = is_array($shopRuntime['context_resolution'] ?? null) ? $shopRuntime['context_resolution'] : []; $contextUsage = $contextResolution['context_usage'] ?? []; if (is_array($contextUsage)) { $this->validateStringList($this->toList($contextUsage['referential_terms'] ?? []), 'agent.shop_runtime.context_resolution.context_usage.referential_terms', $errors, $warnings); } $anchorEnrichment = $contextResolution['history_anchor_enrichment'] ?? []; if (is_array($anchorEnrichment)) { $this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms', $errors, $warnings); $this->validateStringList($this->toList($anchorEnrichment['query_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_terms', $errors, $warnings); $this->validateStringList($this->toList($anchorEnrichment['query_noise_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms', $errors, $warnings); $this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns', $errors); if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') { $errors[] = 'agent.shop_runtime.context_resolution.history_anchor_enrichment.template must not be empty.'; } } } /** * @param array $vector * @param list $errors * @param list $warnings */ private function validateVector(array $vector, array &$errors, array &$warnings): void { if (trim((string) ($vector['service_url'] ?? '')) === '') { $errors[] = 'vector.service_url must not be empty.'; } if (($this->asInt($vector['port'] ?? null) ?? 0) < 1) { $errors[] = 'vector.port must be greater than 0.'; } $search = is_array($vector['search'] ?? null) ? $vector['search'] : []; $tags = is_array($vector['tags'] ?? null) ? $vector['tags'] : []; foreach (['search.min_score' => $search['min_score'] ?? null, 'tags.min_score' => $tags['min_score'] ?? null] as $name => $value) { $score = $this->asFloat($value); if ($score === null || $score < 0.0 || $score > 1.0) { $errors[] = 'vector.' . $name . ' must be between 0 and 1.'; } } } /** * @param array $commerce * @param list $errors * @param list $warnings */ private function validateCommerce(array $commerce, array &$errors, array &$warnings): void { if (!$this->asBool($commerce['enabled'] ?? false)) { return; } if (trim((string) ($commerce['store_api_base_url'] ?? '')) === '') { $errors[] = 'commerce.store_api_base_url must not be empty when commerce is enabled.'; } if (($this->asInt($commerce['max_shop_results'] ?? null) ?? 0) < 1) { $warnings[] = 'commerce.max_shop_results could not be resolved as a positive integer.'; } } /** * @param array $commerceQuery * @param list $errors * @param list $warnings */ private function validateCommerceQuery(array $commerceQuery, array &$errors, array &$warnings): void { $this->validateStringListMap($commerceQuery, 'commerce_query', $errors, $warnings); $patterns = is_array($commerceQuery['patterns'] ?? null) ? $commerceQuery['patterns'] : []; if ($patterns === []) { $errors[] = 'commerce_query.patterns must be an array.'; } else { $this->validateCommerceQueryPatterns($patterns, $errors, $warnings); } $cleanupProfile = $commerceQuery['cleanup_profile'] ?? null; if (!is_string($cleanupProfile) || trim($cleanupProfile) === '') { $errors[] = 'commerce_query.cleanup_profile must be a non-empty string.'; } elseif (!in_array($cleanupProfile, $this->languageCleanupConfig->getCleanupProfileNames(), true)) { $errors[] = 'commerce_query.cleanup_profile references unknown language cleanup profile: ' . $cleanupProfile . '.'; } $measurementPattern = $patterns['measurement_value_token'] ?? null; $filterTokens = $commerceQuery['filter_search_tokens'] ?? []; foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) { if (is_string($measurementPattern) && @preg_match($measurementPattern, $measurementValue) !== 1) { $errors[] = 'commerce_query.patterns.measurement_value_token must match protected measurement value: ' . $measurementValue . '.'; } if (is_array($filterTokens) && in_array($measurementValue, $filterTokens, true)) { $errors[] = 'commerce_query.filter_search_tokens must not remove protected measurement value: ' . $measurementValue . '.'; } } } /** * @param array $shopMatching * @param list $errors * @param list $warnings */ private function validateShopMatching(array $shopMatching, array &$errors, array &$warnings): void { $this->validateStringListMap($shopMatching, 'shop_matching', $errors, $warnings); } /** * @param array $searchRepair * @param list $errors * @param list $warnings */ private function validateSearchRepair(array $searchRepair, array &$errors, array &$warnings): void { if ((int) ($searchRepair['max_repair_queries'] ?? 0) < 0) { $errors[] = 'search_repair.max_repair_queries must be greater than or equal to 0.'; } $this->validateStringListMap($searchRepair, 'search_repair', $errors, $warnings); $this->validateRegexPatternMap($searchRepair['patterns'] ?? [], 'search_repair.patterns', $errors); } /** * @param array $intent * @param list $errors * @param list $warnings */ private function validateIntent(array $intent, array &$errors, array &$warnings): void { $this->validateStringListMap($intent, 'intent', $errors, $warnings); $commerce = is_array($intent['commerce'] ?? null) ? $intent['commerce'] : []; $this->validateRegexPatternList($commerce['support_diagnostic_patterns'] ?? [], 'intent.commerce.support_diagnostic_patterns', $errors); $this->validateRegexPatternList($commerce['explicit_commerce_intent_patterns'] ?? [], 'intent.commerce.explicit_commerce_intent_patterns', $errors); $light = is_array($intent['light'] ?? null) ? $intent['light'] : []; $this->validateRegexPatternList($light['strong_patterns'] ?? [], 'intent.light.strong_patterns', $errors); $sales = is_array($intent['sales'] ?? null) ? $intent['sales'] : []; $this->validateRegexPatternList($sales['comparison_signals'] ?? [], 'intent.sales.comparison_signals', $errors); } /** * @param array $vocabulary * @param list $errors * @param list $warnings */ private function validateVocabulary(array $vocabulary, array &$errors, array &$warnings): void { $this->validateStringListMap($vocabulary['classes'] ?? [], 'vocabulary.classes', $errors, $warnings); $this->validateStringListMap($vocabulary['views'] ?? [], 'vocabulary.views', $errors, $warnings); $this->validateStringListMap($vocabulary['maps'] ?? [], 'vocabulary.maps', $errors, $warnings); $retrievalViews = $vocabulary['views']['retrieval'] ?? null; if (is_array($retrievalViews)) { $shortModel = $retrievalViews['important_short_model_tokens']['add'] ?? []; if (is_array($shortModel)) { $activeShortModel = $this->domainVocabularyConfig->view('retrieval.important_short_model_tokens', []); foreach ($this->governanceConfig->getVocabularyProtectedShortModelTokens() as $token) { if (!in_array($token, $shortModel, true) && !in_array($token, $activeShortModel, true)) { $warnings[] = 'vocabulary.views.retrieval.important_short_model_tokens should contain protected token ' . $token . '.'; } } } } } /** * @param array $language * @param list $errors * @param list $warnings */ private function validateLanguage(array $language, array &$errors, array &$warnings): void { $this->validateStringListMap($language, 'language', $errors, $warnings); $stopwords = is_array($language['stopwords'] ?? null) ? $language['stopwords'] : []; try { $profileNames = $this->languageCleanupConfig->getCleanupProfileNames(); foreach ($this->governanceConfig->getLanguageRequiredCleanupProfiles() as $profileName) { if (!in_array($profileName, $profileNames, true)) { $errors[] = 'language.cleanup_profiles must contain required profile: ' . $profileName . '.'; continue; } $this->languageCleanupConfig->getCleanupProfile($profileName); } foreach ($this->governanceConfig->getLanguageProtectedStopwordTerms() as $protected) { if (in_array($protected, $stopwords, true)) { $errors[] = 'language.stopwords must not contain protected term: ' . $protected . '.'; } if (!$this->languageCleanupConfig->isProtectedTerm($protected)) { $errors[] = 'language.protected_terms must contain protected term: ' . $protected . '.'; } } foreach ($this->governanceConfig->getLanguageRequiredProfileTerms() as $profileName => $requiredTerms) { $profile = $this->languageCleanupConfig->getCleanupProfile($profileName); foreach ($requiredTerms as $bucket => $terms) { foreach ($terms as $term) { if (!in_array($term, $profile[$bucket] ?? [], true)) { $errors[] = sprintf('language.cleanup_profiles.%s.%s must contain required term: %s.', $profileName, $bucket, $term); } } } } } catch (\InvalidArgumentException $e) { $errors[] = $e->getMessage(); } } /** * @param array $queryEnrichment * @param list $errors * @param list $warnings */ private function validateQueryEnrichment(array $queryEnrichment, array &$errors, array &$warnings): void { if ((int) ($queryEnrichment['max_expansions'] ?? 0) < 0) { $errors[] = 'query_enrichment.max_expansions must be greater than or equal to 0.'; } $rules = $queryEnrichment['rules'] ?? []; if (!is_array($rules)) { $errors[] = 'query_enrichment.rules must be a map.'; return; } foreach ($rules as $left => $right) { if (!is_string($left) || trim($left) === '' || !is_string($right) || trim($right) === '') { $errors[] = 'query_enrichment.rules must contain non-empty string mappings.'; return; } } } /** * @param mixed $value * @param list $errors */ private function validateRegexPattern(mixed $value, string $path, array &$errors): void { if (!is_string($value) || trim($value) === '') { $errors[] = $path . ' must be a non-empty regex string.'; return; } if (@preg_match($value, '') === false) { $errors[] = $path . ' is not a valid regex pattern.'; } } /** * @param array $patterns * @param list $errors * @param list $warnings */ private function validateCommerceQueryPatterns(array $patterns, array &$errors, array &$warnings): void { $regexKeys = [ 'history_context_value', 'prompt_sanitize', 'whitespace_collapse', 'whitespace_split', 'history_question', 'price_between', 'price_max', 'price_min', 'direct_product_digit', 'model_like', 'accessory_like', 'contains_digit', 'model_number_token', 'model_context_token', 'model_suffix_token', 'instruction_or_presentation_token', 'measurement_value_token', ]; foreach ($regexKeys as $key) { $this->validateRegexPattern($patterns[$key] ?? null, 'commerce_query.patterns.' . $key, $errors); } $this->validateRegexFragment($patterns['history_context'] ?? null, 'commerce_query.patterns.history_context', $errors); if (array_key_exists('filter_search_tokens', $patterns)) { $this->validateStringList($this->toList($patterns['filter_search_tokens']), 'commerce_query.patterns.filter_search_tokens', $errors, $warnings); } } /** * @param list $errors */ private function validateRegexFragment(mixed $value, string $path, array &$errors): void { if (!is_string($value) || trim($value) === '') { $errors[] = $path . ' must be a non-empty regex fragment string.'; return; } if (@preg_match('/(?:' . $value . ')/u', '') === false) { $errors[] = $path . ' is not a valid regex fragment.'; } } /** * @return array */ private function toList(mixed $value): array { return is_array($value) ? $value : []; } /** * @param mixed $patterns * @param list $errors */ private function validateRegexPatternMap(mixed $patterns, string $path, array &$errors): void { if (!is_array($patterns)) { $errors[] = $path . ' must be an array of regex patterns.'; return; } foreach ($patterns as $key => $pattern) { $currentPath = $path . '.' . (string) $key; if (is_array($pattern)) { $this->validateRegexPatternList($pattern, $currentPath, $errors); continue; } $this->validateRegexPattern($pattern, $currentPath, $errors); } } /** * @param mixed $patterns * @param list $errors */ private function validateRegexPatternList(mixed $patterns, string $path, array &$errors): void { if (!is_array($patterns)) { $errors[] = $path . ' must be an array of regex patterns.'; return; } foreach ($patterns as $index => $pattern) { $this->validateRegexPattern($pattern, $path . '.' . (string) $index, $errors); } } /** * @param mixed $value * @param list $errors * @param list $warnings */ private function validateStringListMap(mixed $value, string $path, array &$errors, array &$warnings): void { if (!is_array($value)) { $errors[] = $path . ' must be an array.'; return; } foreach ($value as $key => $item) { $currentPath = $path . '.' . (string) $key; if (is_array($item)) { if ($this->isList($item)) { $this->validateStringList($item, $currentPath, $errors, $warnings); continue; } $this->validateStringListMap($item, $currentPath, $errors, $warnings); continue; } if (is_string($item)) { if (trim($item) === '') { $errors[] = $currentPath . ' must not be empty.'; } continue; } if (is_int($item) || is_float($item) || is_bool($item) || $item === null) { continue; } $warnings[] = $currentPath . ' contains a non-scalar value.'; } } /** * @param array $items * @param list $errors * @param list $warnings */ private function validateStringList(array $items, string $path, array &$errors, array &$warnings): void { $seen = []; foreach ($items as $index => $item) { if (!is_scalar($item)) { $errors[] = $path . '.' . (string) $index . ' must be a scalar value.'; continue; } $item = trim((string) $item); if ($item === '') { $errors[] = $path . '.' . (string) $index . ' must not be empty.'; continue; } $key = mb_strtolower($item, 'UTF-8'); if (isset($seen[$key])) { $warnings[] = $path . ' contains duplicate value: ' . $item . '.'; } $seen[$key] = true; } } /** @param array $value */ private function isList(array $value): bool { return array_is_list($value); } private function param(string $name, mixed $default = null): mixed { if (!$this->parameters->has($name)) { return $default; } return $this->parameters->get($name); } private function asInt(mixed $value): ?int { return is_numeric($value) ? (int) $value : null; } private function asFloat(mixed $value): ?float { return is_numeric($value) ? (float) $value : null; } private function asBool(mixed $value): bool { if (is_bool($value)) { return $value; } if (is_string($value)) { return in_array(strtolower($value), ['1', 'true', 'yes', 'on'], true); } return (bool) $value; } }