diff --git a/RETRIEX_DIAGNOSTICS_REGRESSION_FIX_README.md b/RETRIEX_DIAGNOSTICS_REGRESSION_FIX_README.md new file mode 100644 index 0000000..19e5fcb --- /dev/null +++ b/RETRIEX_DIAGNOSTICS_REGRESSION_FIX_README.md @@ -0,0 +1,58 @@ +# RetrieX Diagnostics & Regression Guard Fix + +Patch-only package for the current `rag-inprogress.zip` baseline. + +## Purpose + +This patch adds safer operational checks around the already stabilized configuration centralization work. +It does not change retrieval, prompt composition, shop search, intent scoring, or vocabulary semantics. + +## Included changes + +- Extends `RetriexEffectiveConfigProvider` so the effective config dump includes the newly centralized areas: + - `vocabulary.yaml` + - `intent.yaml` + - `agent.yaml` + - `prompt.yaml` + - `query_enrichment.yaml` + - `language.yaml` + - effective shop/search-repair/commerce-query lists +- Strengthens config validation for: + - empty list items + - duplicate list values + - invalid regex patterns + - protected stopword mistakes + - protected decimal value handling for `0,02` + - protected short model tokens `TH/TC/TP/TM/PH/RX` +- Adds an offline regression guard command: + +```bash +php bin/console mto:agent:regression:test +php bin/console mto:agent:regression:test --json +``` + +- Enhances the summary view of the effective config dump: + +```bash +php bin/console mto:agent:config:dump-effective --summary +``` + +## Included files + +```text +src/Config/RetriexEffectiveConfigProvider.php +src/Command/ConfigDumpEffectiveCommand.php +src/Command/RegressionBaselineCommand.php +RETRIEX_DIAGNOSTICS_REGRESSION_FIX_README.md +``` + +## After installing + +Clear the Symfony cache and run: + +```bash +php bin/console mto:agent:config:validate +php bin/console mto:agent:regression:test +``` + +Then run the known manual 1.4.2 regression prompts again. diff --git a/RETRIEX_DIAGNOSTICS_REGRESSION_HOTFIX_README.md b/RETRIEX_DIAGNOSTICS_REGRESSION_HOTFIX_README.md new file mode 100644 index 0000000..ae37d87 --- /dev/null +++ b/RETRIEX_DIAGNOSTICS_REGRESSION_HOTFIX_README.md @@ -0,0 +1,25 @@ +# RetrieX Diagnostics/Regression Hotfix + +Patch-only hotfix for the diagnostics/regression tooling. + +## Fixed + +- `commerce_query.patterns.history_context` is now validated as a regex fragment, not as a full delimited regex. +- `commerce_query.patterns.filter_search_tokens` is now validated as a normal token list, not as a list of regex patterns. +- The regression check for the Shopware query optimizer prompt no longer expects a JSON instruction. The current stable prompt intentionally asks for a plain keyword search query, so the check now validates the actual output instruction. + +## Not changed + +- No retrieval logic changed. +- No prompt wording changed. +- No shop/search/runtime logic changed. +- No YAML config changed. + +## After applying + +Clear Symfony cache, then run: + +```bash +php bin/console mto:agent:config:validate +php bin/console mto:agent:regression:test +``` diff --git a/RETRIEX_FOLLOWUP_PRECISION_FIX_README.md b/RETRIEX_FOLLOWUP_PRECISION_FIX_README.md new file mode 100644 index 0000000..b3cf2f8 --- /dev/null +++ b/RETRIEX_FOLLOWUP_PRECISION_FIX_README.md @@ -0,0 +1,24 @@ +# RetrieX Follow-up Precision Fix + +This patch tightens direct numeric follow-up handling without changing retrieval, scoring, shop, vector or database logic. + +## Why + +The follow-up query `mit welchem indikator wird der wert gemessen` could be treated too weakly as a technical product/value question because only `indikator` was a strong keyword. As a result, the stricter technical follow-up prompt rules were not always applied. + +## Changes + +- Adds `gemessen` / `measured` as technical prompt keywords. +- Adds explicit prompt rules to answer direct indicator/value follow-ups with the exact mapping first. +- Tells the model not to expand into the full indicator table, measurement principle, applications or advisory notes unless explicitly requested. +- Adds `gemessen` to the regression/effective-config keyword check. + +## Expected behavior + +Question: `mit welchem indikator wird der wert gemessen` after the 0,02 °dH / Testomat 808 answer. + +Expected answer starts like: + +`Der Wert 0,02 °dH wird beim Testomat 808 mit Indikatortyp 300 gemessen.` + +The full indicator table should only be shown when the user asks for all indicators or detailed device information. diff --git a/config/retriex/prompt.yaml b/config/retriex/prompt.yaml index c7e60f4..ff33649 100644 --- a/config/retriex/prompt.yaml +++ b/config/retriex/prompt.yaml @@ -91,6 +91,7 @@ parameters: technical_rules: - '- Write like technical documentation: precise, neutral, and source-close.' - '- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.' + - '- For direct follow-up questions about an indicator, value, threshold, or device, answer the resolved mapping first before any table or explanation.' - '- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.' accessory_rules: - '- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.' @@ -164,6 +165,8 @@ parameters: retrieved source.' - '- For follow-up questions such as "which indicator measures that value", first resolve the referenced value/device, then use the retrieved source entry that explicitly connects value, device and indicator.' + - '- For direct follow-up indicator/value questions, start with the exact mapping in one sentence, for example: Der Wert 0,02 °dH wird beim Testomat 808 mit Indikatortyp 300 gemessen.' + - '- Do not output the full indicator table, measurement principle, application areas, or advisory notes unless the user explicitly asks for all indicators, details, a table, or device information.' - '- For numeric extreme questions, do not combine a value, device name, indicator name, range or product variant from different chunks unless the same retrieved entry explicitly connects them.' - '- If several devices or indicators are present, keep each device-indicator-range assignment separate and do not transfer an indicator from one product to diff --git a/config/retriex/vocabulary.yaml b/config/retriex/vocabulary.yaml index e26da38..2add447 100644 --- a/config/retriex/vocabulary.yaml +++ b/config/retriex/vocabulary.yaml @@ -558,6 +558,8 @@ parameters: - threshold - messbereich - measurement range + - gemessen + - measured - minimaler - minimum - resthärte diff --git a/src/Command/ConfigDumpEffectiveCommand.php b/src/Command/ConfigDumpEffectiveCommand.php index ed708b7..e4a59a4 100644 --- a/src/Command/ConfigDumpEffectiveCommand.php +++ b/src/Command/ConfigDumpEffectiveCommand.php @@ -58,6 +58,15 @@ final class ConfigDumpEffectiveCommand extends Command $retrieval = $this->section($config, 'retrieval'); $vector = $this->section($config, 'vector'); $commerce = $this->section($config, 'commerce'); + $prompt = $this->section($config, 'prompt'); + $agent = $this->section($config, 'agent'); + $intent = $this->section($config, 'intent'); + $vocabulary = $this->section($config, 'vocabulary'); + $searchRepair = $this->section($config, 'search_repair'); + $commerceQuery = $this->section($config, 'commerce_query'); + $shopMatching = $this->section($config, 'shop_matching'); + $language = $this->section($config, 'language'); + $queryEnrichment = $this->section($config, 'query_enrichment'); $io->section('Runtime'); $io->definitionList( @@ -86,7 +95,8 @@ final class ConfigDumpEffectiveCommand extends Command $io->definitionList( ['hard_max_chunks' => (string) ($retrieval['hard_max_chunks'] ?? '')], ['hard_max_vectork' => (string) ($retrieval['hard_max_vectork'] ?? '')], - ['vector_score_threshold' => (string) ($retrieval['vector_score_threshold'] ?? '')] + ['vector_score_threshold' => (string) ($retrieval['vector_score_threshold'] ?? '')], + ['retrieval_vocabulary_lists' => (string) $this->countMapEntries($retrieval['vocabulary'] ?? [])] ); $io->section('Vector'); @@ -100,7 +110,21 @@ final class ConfigDumpEffectiveCommand extends Command $io->definitionList( ['enabled' => $this->formatBool($commerce['enabled'] ?? false)], ['max_shop_results' => (string) ($commerce['max_shop_results'] ?? '')], - ['store_api_base_url' => (string) ($commerce['store_api_base_url'] ?? '')] + ['store_api_base_url' => (string) ($commerce['store_api_base_url'] ?? '')], + ['commerce_query_lists' => (string) $this->countMapEntries($commerceQuery)], + ['shop_matching_lists' => (string) $this->countMapEntries($shopMatching)], + ['search_repair_lists' => (string) $this->countMapEntries($searchRepair)] + ); + + $io->section('Centralized YAML-backed configuration'); + $io->definitionList( + ['vocabulary_classes' => (string) $this->countMapEntries($this->section($vocabulary, 'classes'))], + ['vocabulary_views' => (string) $this->countMapEntries($this->section($vocabulary, 'views'))], + ['intent_sections' => (string) $this->countMapEntries($intent)], + ['prompt_rule_groups' => (string) $this->countMapEntries($this->section($prompt, 'rules'))], + ['agent_message_groups' => (string) $this->countMapEntries($this->section($agent, 'messages'))], + ['stopwords' => (string) $this->countListEntries($language['stopwords'] ?? [])], + ['query_enrichment_rules' => (string) $this->countMapEntries($queryEnrichment['rules'] ?? [])] ); } @@ -117,4 +141,14 @@ final class ConfigDumpEffectiveCommand extends Command { return filter_var($value, FILTER_VALIDATE_BOOLEAN) ? 'yes' : 'no'; } + + private function countMapEntries(mixed $value): int + { + return is_array($value) ? count($value) : 0; + } + + private function countListEntries(mixed $value): int + { + return is_array($value) ? count($value) : 0; + } } diff --git a/src/Command/RegressionBaselineCommand.php b/src/Command/RegressionBaselineCommand.php new file mode 100644 index 0000000..edd3013 --- /dev/null +++ b/src/Command/RegressionBaselineCommand.php @@ -0,0 +1,84 @@ +addOption('json', null, InputOption::VALUE_NONE, 'Render regression result as JSON.'); + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + $result = $this->provider->regressionBaseline(); + + if ((bool) $input->getOption('json')) { + $json = json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE); + $output->writeln(is_string($json) ? $json : '{}'); + + return $result['status'] === 'OK' ? Command::SUCCESS : Command::FAILURE; + } + + $this->renderSummary(new SymfonyStyle($input, $output), $result); + + return $result['status'] === 'OK' ? Command::SUCCESS : Command::FAILURE; + } + + /** + * @param array{status:string, checks:array, errors:list, warnings:list} $result + */ + private function renderSummary(SymfonyStyle $io, array $result): void + { + $io->title('RetrieX regression baseline'); + + $rows = []; + foreach ($result['checks'] as $name => $passed) { + $rows[] = [$name, $passed ? 'OK' : 'FAILED']; + } + + if ($rows !== []) { + $io->table(['Check', 'Result'], $rows); + } + + if ($result['errors'] !== []) { + $io->section('Errors'); + foreach ($result['errors'] as $error) { + $io->writeln('- ' . $error); + } + } + + if ($result['warnings'] !== []) { + $io->section('Warnings'); + foreach ($result['warnings'] as $warning) { + $io->writeln('- ' . $warning); + } + } + + if ($result['status'] === 'OK') { + $io->success('Regression baseline checks passed.'); + } else { + $io->error('Regression baseline checks failed.'); + } + } +} diff --git a/src/Config/PromptBuilderConfig.php b/src/Config/PromptBuilderConfig.php index 5a051b9..36abac0 100644 --- a/src/Config/PromptBuilderConfig.php +++ b/src/Config/PromptBuilderConfig.php @@ -27,6 +27,8 @@ final class PromptBuilderConfig 'threshold', 'messbereich', 'measurement range', + 'gemessen', + 'measured', 'minimaler', 'minimum', 'resthärte', @@ -350,6 +352,7 @@ final class PromptBuilderConfig return $this->getStringList('response_format.technical_rules', [ '- Write like technical documentation: precise, neutral, and source-close.', '- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.', + '- For direct follow-up questions about an indicator, value, threshold, or device, answer the resolved mapping first before any table or explanation.', '- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.', ]); } @@ -468,6 +471,8 @@ final class PromptBuilderConfig '- Do not add the runner-up product, second-lowest value, or adjacent range unless the user asks for it.', '- Do not add calibration, accuracy, pretreatment, temperature, or application notes unless those exact notes are requested and explicitly present in the retrieved source.', '- For follow-up questions such as "which indicator measures that value", first resolve the referenced value/device, then use the retrieved source entry that explicitly connects value, device and indicator.', + '- For direct follow-up indicator/value questions, start with the exact mapping in one sentence, for example: Der Wert 0,02 °dH wird beim Testomat 808 mit Indikatortyp 300 gemessen.', + '- Do not output the full indicator table, measurement principle, application areas, or advisory notes unless the user explicitly asks for all indicators, details, a table, or device information.', '- For numeric extreme questions, do not combine a value, device name, indicator name, range or product variant from different chunks unless the same retrieved entry explicitly connects them.', '- If several devices or indicators are present, keep each device-indicator-range assignment separate and do not transfer an indicator from one product to another.', '- If the source states only a threshold function, do not expand it into broader control logic.', diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 37ac996..37b08c7 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -5,7 +5,6 @@ declare(strict_types=1); namespace App\Config; use App\Index\IndexConfigurationProvider; -use App\Config\NdjsonHybridRetrieverConfig; use App\Service\ModelGenerationConfigProvider; use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface; @@ -17,6 +16,16 @@ final readonly class RetriexEffectiveConfigProvider private IndexConfigurationProvider $indexProvider, private PromptBuilderConfig $promptConfig, private NdjsonHybridRetrieverConfig $retrieverConfig, + private DomainVocabularyConfig $domainVocabularyConfig, + private AgentRunnerConfig $agentRunnerConfig, + private SearchRepairConfig $searchRepairConfig, + private CommerceIntentConfig $commerceIntentConfig, + private CommerceQueryParserConfig $commerceQueryParserConfig, + private IntentLightConfig $intentLightConfig, + private SalesIntentConfig $salesIntentConfig, + private ShopServiceConfig $shopServiceConfig, + private StopWordsConfig $stopWordsConfig, + private QueryEnricherConfig $queryEnricherConfig, ) { } @@ -32,8 +41,18 @@ final readonly class RetriexEffectiveConfigProvider 'llm' => ['timeout_seconds' => $this->param('retriex.llm.timeout_seconds')], 'retrieval' => $this->retrievalConfig(), 'prompt' => $this->promptConfig(), + 'agent' => $this->agentConfig(), 'vector' => $this->vectorConfig(), 'commerce' => $this->commerceConfig(), + 'commerce_query' => $this->commerceQueryConfig(), + 'shop_matching' => $this->shopMatchingConfig(), + 'search_repair' => $this->searchRepairEffectiveConfig(), + 'intent' => $this->intentConfig(), + 'vocabulary' => $this->domainVocabularyConfig->toArray(), + 'language' => $this->languageConfig(), + 'query_enrichment' => $this->queryEnrichmentConfig(), + 'catalog_intent' => $this->catalogIntentConfig(), + 'context' => $this->contextConfig(), ]; } @@ -51,8 +70,16 @@ final readonly class RetriexEffectiveConfigProvider $this->validateModel($config['model_generation'], $errors, $warnings); $this->validateRetrieval($config['retrieval'], $errors, $warnings); $this->validatePrompt($config['prompt'], $errors, $warnings); + $this->validateAgent($config['agent'], $errors, $warnings); $this->validateVector($config['vector'], $errors, $warnings); $this->validateCommerce($config['commerce'], $errors, $warnings); + $this->validateCommerceQuery($config['commerce_query'], $errors, $warnings); + $this->validateShopMatching($config['shop_matching'], $errors, $warnings); + $this->validateSearchRepair($config['search_repair'], $errors, $warnings); + $this->validateIntent($config['intent'], $errors, $warnings); + $this->validateVocabulary($config['vocabulary'], $errors, $warnings); + $this->validateLanguage($config['language'], $errors, $warnings); + $this->validateQueryEnrichment($config['query_enrichment'], $errors, $warnings); return [ 'status' => $errors === [] ? 'OK' : 'ERROR', @@ -63,8 +90,108 @@ final readonly class RetriexEffectiveConfigProvider } /** - * @return array + * Offline regression guard for the stable 1.4.2-sensitive configuration paths. + * + * @return array{status:string, checks:array, errors:list, warnings:list} */ + public function regressionBaseline(): array + { + $errors = []; + $warnings = []; + $checks = []; + + $validate = $this->validate(); + $checks['config_validate_ok'] = $validate['status'] === 'OK'; + if ($validate['status'] !== 'OK') { + foreach ($validate['errors'] as $error) { + $errors[] = 'Config validation failed: ' . $error; + } + } + foreach ($validate['warnings'] as $warning) { + $warnings[] = 'Config validation warning: ' . $warning; + } + + $importantShortModelTokens = $this->retrieverConfig->importantShortModelTokens(); + foreach (['th', 'tc', 'tp', 'tm', 'ph', 'rx'] as $token) { + $key = 'important_short_model_token_' . $token; + $checks[$key] = in_array($token, $importantShortModelTokens, true); + if (!$checks[$key]) { + $errors[] = 'Missing protected short model token: ' . $token; + } + } + + $measurementPattern = $this->commerceQueryParserConfig->getMeasurementValueTokenPattern(); + $checks['measurement_value_0_02_matches'] = @preg_match($measurementPattern, '0,02') === 1; + if (!$checks['measurement_value_0_02_matches']) { + $errors[] = 'Commerce query parser no longer recognizes 0,02 as a measurement value.'; + } + + $filterTokens = $this->commerceQueryParserConfig->getFilterSearchTokens(); + $checks['measurement_value_0_02_not_filtered'] = !in_array('0,02', $filterTokens, true); + if (!$checks['measurement_value_0_02_not_filtered']) { + $errors[] = 'Commerce query parser filters the protected token 0,02.'; + } + + $technicalKeywords = $this->promptConfig->getTechnicalProductKeywords(); + foreach (['testomat', 'indikator', 'grenzwert', 'messbereich', 'gemessen'] as $term) { + $key = 'technical_keyword_' . $term; + $checks[$key] = in_array($term, $technicalKeywords, true); + if (!$checks[$key]) { + $errors[] = 'Missing technical prompt keyword: ' . $term; + } + } + + $accessoryKeywords = $this->promptConfig->getAccessoryRequestKeywords(); + foreach (['indikator', 'reagenz'] as $term) { + $key = 'accessory_keyword_' . $term; + $checks[$key] = in_array($term, $accessoryKeywords, true); + if (!$checks[$key]) { + $errors[] = 'Missing accessory prompt keyword: ' . $term; + } + } + + $searchRepairTerms = $this->searchRepairConfig->getSpecificityBoostTerms(); + foreach (['indikator', 'testomat', 'reagenz'] as $term) { + $key = 'search_repair_specificity_' . $term; + $checks[$key] = in_array($term, $searchRepairTerms, true); + if (!$checks[$key]) { + $errors[] = 'Missing search repair specificity term: ' . $term; + } + } + + $reagentWords = $this->retrieverConfig->looksLikeReagentWords(); + $deviceWords = $this->retrieverConfig->looksLikeDeviceWords(); + $checks['retrieval_reagent_word_indikator'] = in_array('indikator', $reagentWords, true); + $checks['retrieval_device_word_geraet'] = in_array('geraet', $deviceWords, true) || in_array('gerät', $deviceWords, true); + if (!$checks['retrieval_reagent_word_indikator']) { + $errors[] = 'Missing retrieval reagent word: indikator.'; + } + if (!$checks['retrieval_device_word_geraet']) { + $errors[] = 'Missing retrieval device word: geraet/geraet equivalent.'; + } + + $shopPrompt = $this->agentRunnerConfig->getShopPrompt('testomat 808 0,02', ''); + $checks['shop_prompt_contains_output_instruction'] = str_contains($shopPrompt, 'Output only the final search query.') + || str_contains($shopPrompt, 'Output format:'); + $checks['shop_prompt_contains_original_query'] = str_contains($shopPrompt, 'testomat 808 0,02'); + if (!$checks['shop_prompt_contains_output_instruction']) { + $errors[] = 'Shop query optimizer prompt no longer contains the expected output instruction.'; + } + if (!$checks['shop_prompt_contains_original_query']) { + $errors[] = 'Shop query optimizer prompt no longer contains the original query.'; + } + + $status = $errors === [] ? 'OK' : 'ERROR'; + + return [ + 'status' => $status, + 'checks' => $checks, + 'errors' => $errors, + 'warnings' => $warnings, + ]; + } + + /** @return array */ private function runtimeConfig(): array { return [ @@ -78,9 +205,7 @@ final readonly class RetriexEffectiveConfigProvider ]; } - /** - * @return array - */ + /** @return array */ private function indexConfig(): array { try { @@ -107,9 +232,7 @@ final readonly class RetriexEffectiveConfigProvider } } - /** - * @return array - */ + /** @return array */ private function modelConfig(): array { try { @@ -139,9 +262,7 @@ final readonly class RetriexEffectiveConfigProvider } } - /** - * @return array - */ + /** @return array */ private function retrievalConfig(): array { return [ @@ -151,9 +272,7 @@ final readonly class RetriexEffectiveConfigProvider ]; } - /** - * @return array - */ + /** @return array */ private function promptConfig(): array { return [ @@ -169,12 +288,103 @@ final readonly class RetriexEffectiveConfigProvider 'max_shop_results_in_prompt' => $this->promptConfig->getMaxShopResultsInPrompt(), 'detailed_shop_results_max_count' => $this->promptConfig->getDetailedShopResultsMaxCount(), 'technical_product_keyword_match_threshold' => $this->promptConfig->getTechnicalProductKeywordMatchThreshold(), + 'labels' => [ + 'system' => $this->promptConfig->getSystemSectionLabel(), + 'user_question' => $this->promptConfig->getUserQuestionSectionLabel(), + 'conversation_context' => $this->promptConfig->getConversationContextSectionLabel(), + 'shop_search_query' => $this->promptConfig->getShopSearchQuerySectionLabel(), + 'output_priority' => $this->promptConfig->getOutputPrioritySectionLabel(), + 'response_format' => $this->promptConfig->getResponseFormatSectionLabel(), + 'language_rules' => $this->promptConfig->getLanguageRulesSectionLabel(), + 'fact_grounding_rules' => $this->promptConfig->getFactGroundingRulesSectionLabel(), + 'retrieved_knowledge' => $this->promptConfig->getRetrievedKnowledgeSectionLabel(), + 'url_content' => $this->promptConfig->getUrlContentSectionLabel(), + ], + 'rules' => [ + 'conversation_context_intro_lines' => $this->promptConfig->getConversationContextIntroLines(), + 'live_shop_results_header_lines' => $this->promptConfig->getLiveShopResultsHeaderLines(), + 'output_priority' => $this->promptConfig->getOutputPriorityRules(), + 'response_format_base' => $this->promptConfig->getResponseFormatBaseRules(), + 'response_format_with_shop' => $this->promptConfig->getResponseFormatWithShopRules(), + 'response_format_without_shop' => $this->promptConfig->getResponseFormatWithoutShopRules(), + 'response_format_technical' => $this->promptConfig->getResponseFormatTechnicalRules(), + 'response_format_accessory' => $this->promptConfig->getResponseFormatAccessoryRules(), + 'language' => $this->promptConfig->getLanguageRules(), + 'fact_grounding_base' => $this->promptConfig->getFactGroundingBaseRules(), + 'fact_grounding_with_shop' => $this->promptConfig->getFactGroundingWithShopRules(), + 'fact_grounding_without_shop' => $this->promptConfig->getFactGroundingWithoutShopRules(), + 'fact_grounding_technical' => $this->promptConfig->getFactGroundingTechnicalRules(), + ], + 'shop_fields' => [ + 'product_number_label' => $this->promptConfig->getShopProductNumberLabel(), + 'manufacturer_label' => $this->promptConfig->getShopManufacturerLabel(), + 'price_label' => $this->promptConfig->getShopPriceLabel(), + 'availability_label' => $this->promptConfig->getShopAvailabilityLabel(), + 'availability_yes_label' => $this->promptConfig->getShopAvailabilityYesLabel(), + 'availability_no_label' => $this->promptConfig->getShopAvailabilityNoLabel(), + 'highlight_prefix' => $this->promptConfig->getShopHighlightPrefix(), + 'url_label' => $this->promptConfig->getShopUrlLabel(), + 'product_image_label' => $this->promptConfig->getShopProductImageLabel(), + 'description_label' => $this->promptConfig->getShopDescriptionLabel(), + 'meta_information_label' => $this->promptConfig->getShopMetaInformationLabel(), + ], + 'detection' => [ + 'technical_product_keywords' => $this->promptConfig->getTechnicalProductKeywords(), + 'accessory_request_keywords' => $this->promptConfig->getAccessoryRequestKeywords(), + 'technical_product_model_pattern' => $this->promptConfig->getTechnicalProductModelPattern(), + ], ]; } - /** - * @return array - */ + /** @return array */ + private function agentConfig(): array + { + return [ + 'commerce_history_budget_chars' => $this->agentRunnerConfig->getCommerceHistoryBudgetChars(), + 'product_search_knowledge_chunk_limit' => $this->agentRunnerConfig->getProductSearchKnowledgeChunkLimit(), + 'advisory_product_search_knowledge_chunk_limit' => $this->agentRunnerConfig->getAdvisoryProductSearchKnowledgeChunkLimit(), + 'optimized_shop_query_prefix_pattern' => $this->agentRunnerConfig->getOptimizedShopQueryPrefixPattern(), + 'messages' => [ + 'empty_prompt' => $this->agentRunnerConfig->getEmptyPromptMessage(), + 'analyze_request' => $this->agentRunnerConfig->getAnalyzeRequestMessage(), + 'check_internet_sources' => $this->agentRunnerConfig->getCheckInternetSourcesMessage(), + 'retrieve_knowledge' => $this->agentRunnerConfig->getRetrieveKnowledgeMessage(), + 'optimize_search' => $this->agentRunnerConfig->getOptimizeSearchMessage(), + 'fetch_search_data_template' => $this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), + 'analyze_all_information' => $this->agentRunnerConfig->getAnalyzeAllInformationMessage(), + 'thinking_while_streaming' => $this->agentRunnerConfig->getThinkingWhileStreamingMessage(), + 'no_llm_data_received' => $this->agentRunnerConfig->getNoLlmDataReceivedMessage(), + 'generic_internal_error' => $this->agentRunnerConfig->getGenericInternalErrorMessage(), + 'debug_internal_error_prefix' => $this->agentRunnerConfig->getDebugInternalErrorPrefix(), + ], + 'source_labels' => [ + 'external_url' => $this->agentRunnerConfig->getExternalUrlSourceLabel(), + 'rag_knowledge' => $this->agentRunnerConfig->getRagKnowledgeSourceLabel(), + 'conversation_history' => $this->agentRunnerConfig->getConversationHistorySourceLabel(), + 'shop_system' => $this->agentRunnerConfig->getShopSystemSourceLabel(), + 'extended_shop_search' => $this->agentRunnerConfig->getExtendedShopSearchSourceLabel(), + 'used_sources_prefix' => $this->agentRunnerConfig->getUsedSourcesPrefix(), + 'sources_prefix' => $this->agentRunnerConfig->getSourcesPrefix(), + ], + 'html_templates' => [ + 'source_badge' => $this->agentRunnerConfig->getSourceBadgeHtmlTemplate(), + 'error' => $this->agentRunnerConfig->getErrorHtmlTemplate(), + 'think' => $this->agentRunnerConfig->getThinkHtmlTemplate(), + 'info' => $this->agentRunnerConfig->getInfoHtmlTemplate(), + 'debug' => $this->agentRunnerConfig->getDebugHtmlTemplate(), + ], + 'shop_query_optimizer' => [ + 'rules' => $this->agentRunnerConfig->getShopPromptRules(), + 'conversation_context_rules' => $this->agentRunnerConfig->getConversationContextRules(), + 'intro' => $this->agentRunnerConfig->getShopPromptIntro(), + 'output_format_block' => $this->agentRunnerConfig->getShopPromptOutputFormatBlock(), + 'recent_conversation_context_label' => $this->agentRunnerConfig->getRecentConversationContextLabel(), + 'current_user_input_label' => $this->agentRunnerConfig->getCurrentUserInputLabel(), + ], + ]; + } + + /** @return array */ private function vectorConfig(): array { return [ @@ -207,9 +417,7 @@ final readonly class RetriexEffectiveConfigProvider ]; } - /** - * @return array - */ + /** @return array */ private function commerceConfig(): array { return [ @@ -226,6 +434,196 @@ final readonly class RetriexEffectiveConfigProvider ]; } + /** @return array */ + private function commerceQueryConfig(): array + { + return [ + 'known_brands' => $this->commerceQueryParserConfig->getKnownBrands(), + 'phrases_to_remove' => $this->commerceQueryParserConfig->getPhrasesToRemove(), + 'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokens(), + 'search_token_corrections' => $this->commerceQueryParserConfig->getSearchTokenCorrections(), + 'search_token_canonical_map' => $this->commerceQueryParserConfig->getSearchTokenCanonicalMap(), + 'semantic_shop_search_tokens' => $this->commerceQueryParserConfig->getSemanticShopSearchTokens(), + 'limits' => [ + 'min_search_token_length' => $this->commerceQueryParserConfig->getMinSearchTokenLength(), + 'min_direct_product_token_length' => $this->commerceQueryParserConfig->getMinDirectProductTokenLength(), + 'direct_product_max_tokens' => $this->commerceQueryParserConfig->getDirectProductMaxTokens(), + 'model_context_token_window' => $this->commerceQueryParserConfig->getModelContextTokenWindow(), + 'min_meaningful_alpha_token_length' => $this->commerceQueryParserConfig->getMinMeaningfulAlphaTokenLength(), + 'max_shop_search_tokens' => $this->commerceQueryParserConfig->getMaxShopSearchTokens(), + ], + 'patterns' => [ + 'history_context' => $this->commerceQueryParserConfig->getHistoryContextPattern(), + 'history_context_value' => $this->commerceQueryParserConfig->getHistoryContextValuePattern(), + 'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokensPattern(), + 'prompt_sanitize' => $this->commerceQueryParserConfig->getPromptSanitizePattern(), + 'whitespace_collapse' => $this->commerceQueryParserConfig->getWhitespaceCollapsePattern(), + 'whitespace_split' => $this->commerceQueryParserConfig->getWhitespaceSplitPattern(), + 'history_question' => $this->commerceQueryParserConfig->getHistoryQuestionPattern(), + 'price_between' => $this->commerceQueryParserConfig->getPriceBetweenPattern(), + 'price_max' => $this->commerceQueryParserConfig->getPriceMaxPattern(), + 'price_min' => $this->commerceQueryParserConfig->getPriceMinPattern(), + 'direct_product_digit' => $this->commerceQueryParserConfig->getDirectProductDigitPattern(), + 'model_like' => $this->commerceQueryParserConfig->getModelLikePattern(), + 'accessory_like' => $this->commerceQueryParserConfig->getAccessoryLikePattern(), + 'contains_digit' => $this->commerceQueryParserConfig->getContainsDigitPattern(), + 'model_number_token' => $this->commerceQueryParserConfig->getModelNumberTokenPattern(), + 'model_context_token' => $this->commerceQueryParserConfig->getModelContextTokenPattern(), + 'model_suffix_token' => $this->commerceQueryParserConfig->getModelSuffixTokenPattern(), + 'instruction_or_presentation_token' => $this->commerceQueryParserConfig->getInstructionOrPresentationTokenPattern(), + 'measurement_value_token' => $this->commerceQueryParserConfig->getMeasurementValueTokenPattern(), + ], + ]; + } + + /** @return array */ + private function shopMatchingConfig(): array + { + return [ + 'device_focus_keywords' => $this->shopServiceConfig->getDeviceFocusKeywords(), + 'accessory_focus_keywords' => $this->shopServiceConfig->getAccessoryFocusKeywords(), + 'accessory_focus_variant_map' => $this->shopServiceConfig->getAccessoryFocusVariantMap(), + 'device_query_keywords' => $this->shopServiceConfig->getDeviceQueryKeywords(), + 'accessory_query_keywords' => $this->shopServiceConfig->getAccessoryQueryKeywords(), + 'accessory_product_keywords' => $this->shopServiceConfig->getAccessoryProductKeywords(), + 'device_product_keywords' => $this->shopServiceConfig->getDeviceProductKeywords(), + 'scores' => [ + 'exact_product_number_phrase' => $this->shopServiceConfig->getExactProductNumberPhraseScore(), + 'exact_product_name_phrase' => $this->shopServiceConfig->getExactProductNamePhraseScore(), + 'exact_manufacturer_match' => $this->shopServiceConfig->getExactManufacturerMatchScore(), + 'brand_contained_in_name' => $this->shopServiceConfig->getBrandContainedInNameScore(), + 'name_token_overlap_weight' => $this->shopServiceConfig->getNameTokenOverlapWeight(), + 'product_number_token_overlap_weight' => $this->shopServiceConfig->getProductNumberTokenOverlapWeight(), + 'corpus_token_overlap_weight' => $this->shopServiceConfig->getCorpusTokenOverlapWeight(), + 'name_number_overlap_weight' => $this->shopServiceConfig->getNameNumberOverlapWeight(), + 'product_number_number_overlap_weight' => $this->shopServiceConfig->getProductNumberNumberOverlapWeight(), + 'corpus_number_overlap_weight' => $this->shopServiceConfig->getCorpusNumberOverlapWeight(), + 'size_match' => $this->shopServiceConfig->getSizeMatchScore(), + 'availability_bonus' => $this->shopServiceConfig->getAvailabilityBonusScore(), + 'device_query_device_product_bonus' => $this->shopServiceConfig->getDeviceQueryDeviceProductBonus(), + 'device_query_accessory_penalty' => $this->shopServiceConfig->getDeviceQueryAccessoryPenalty(), + 'accessory_query_accessory_product_bonus' => $this->shopServiceConfig->getAccessoryQueryAccessoryProductBonus(), + 'accessory_query_device_product_bonus' => $this->shopServiceConfig->getAccessoryQueryDeviceProductBonus(), + ], + ]; + } + + /** @return array */ + private function searchRepairEffectiveConfig(): array + { + return [ + 'enabled' => $this->searchRepairConfig->isEnabled(), + 'max_repair_queries' => $this->searchRepairConfig->getMaxRepairQueries(), + 'min_primary_results_without_repair' => $this->searchRepairConfig->getMinPrimaryResultsWithoutRepair(), + 'generic_candidate_tokens' => $this->searchRepairConfig->getGenericCandidateTokens(), + 'accessory_candidate_terms' => $this->searchRepairConfig->getAccessoryCandidateTerms(), + 'accessory_or_bundle_terms' => $this->searchRepairConfig->getAccessoryOrBundleTerms(), + 'specificity_boost_terms' => $this->searchRepairConfig->getSpecificityBoostTerms(), + 'scores' => [ + 'candidate_digit' => $this->searchRepairConfig->getCandidateDigitScore(), + 'candidate_word_count_cap' => $this->searchRepairConfig->getCandidateWordCountCap(), + 'specificity_boost' => $this->searchRepairConfig->getSpecificityBoostScore(), + 'primary_query_overlap_threshold' => $this->searchRepairConfig->getPrimaryQueryOverlapThreshold(), + 'prompt_match_weight' => $this->searchRepairConfig->getPromptMatchWeight(), + 'primary_query_match_weight' => $this->searchRepairConfig->getPrimaryQueryMatchWeight(), + 'repair_signal_match_weight' => $this->searchRepairConfig->getRepairSignalMatchWeight(), + 'primary_result_order_bonus' => $this->searchRepairConfig->getPrimaryResultOrderBonus(), + 'token_intersection_score' => $this->searchRepairConfig->getTokenIntersectionScore(), + 'numeric_token_match_score' => $this->searchRepairConfig->getNumericTokenMatchScore(), + ], + 'patterns' => [ + 'model_candidate' => $this->searchRepairConfig->getModelCandidatePattern(), + 'accessory_candidate' => $this->searchRepairConfig->getAccessoryCandidatePattern(), + 'accessory_or_bundle' => $this->searchRepairConfig->getAccessoryOrBundlePattern(), + 'model_like' => $this->searchRepairConfig->getModelLikePattern(), + 'specificity_boost' => $this->searchRepairConfig->getSpecificityBoostPattern(), + 'contains_digit' => $this->searchRepairConfig->getContainsDigitPattern(), + 'whitespace_collapse' => $this->searchRepairConfig->getWhitespaceCollapsePattern(), + 'tokenize_cleanup' => $this->searchRepairConfig->getTokenizeCleanupPattern(), + ], + ]; + } + + /** @return array */ + private function intentConfig(): array + { + return [ + 'commerce' => [ + 'strong_signals' => $this->commerceIntentConfig->getStrongSignalsList(), + 'advisory_signals' => $this->commerceIntentConfig->getAdvisorySignals(), + 'price_terms' => $this->commerceIntentConfig->getPriceTerms(), + 'color_terms' => $this->commerceIntentConfig->getColorTerms(), + 'size_token_terms' => $this->commerceIntentConfig->getSizeTokenTerms(), + 'size_terms' => $this->commerceIntentConfig->getSizeTerms(), + 'support_diagnostic_patterns' => $this->commerceIntentConfig->getSupportDiagnosticPatterns(), + 'explicit_commerce_intent_patterns' => $this->commerceIntentConfig->getExplicitCommerceIntentPatterns(), + 'thresholds' => [ + 'product_search_min_score' => $this->commerceIntentConfig->getProductSearchMinScore(), + 'advisory_product_search_min_score' => $this->commerceIntentConfig->getAdvisoryProductSearchMinScore(), + 'strong_signal_score' => $this->commerceIntentConfig->getStrongSignalScore(), + 'sku_signal_score' => $this->commerceIntentConfig->getSkuSignalScore(), + 'price_signal_score' => $this->commerceIntentConfig->getPriceSignalScore(), + 'size_signal_score' => $this->commerceIntentConfig->getSizeSignalScore(), + 'size_token_signal_score' => $this->commerceIntentConfig->getSizeTokenSignalScore(), + 'color_signal_score' => $this->commerceIntentConfig->getColorSignalScore(), + 'advisory_signal_score' => $this->commerceIntentConfig->getAdvisorySignalScore(), + 'model_like_product_signal_score' => $this->commerceIntentConfig->getModelLikeProductSignalScore(), + ], + ], + 'light' => [ + 'list_threshold' => IntentLightConfig::LIST_THRESHOLD, + 'quantity_words' => $this->intentLightConfig->getQuantityWords(), + 'strong_patterns' => $this->intentLightConfig->getStrongPatterns(), + ], + 'sales' => [ + 'dominance_delta' => SalesIntentConfig::DOMINANCE_DELTA, + 'min_score_threshold' => SalesIntentConfig::MIN_SCORE_THRESHOLD, + 'sales_signals' => $this->salesIntentConfig->getSalesSignals(), + 'comparison_signals' => $this->salesIntentConfig->getComparisonSignals(), + 'objection_signals' => $this->salesIntentConfig->getObjectionSignals(), + 'implementation_signals' => $this->salesIntentConfig->getImplementationSignals(), + 'roi_signals' => $this->salesIntentConfig->getRoiSignals(), + ], + ]; + } + + /** @return array */ + private function languageConfig(): array + { + return ['stopwords' => $this->stopWordsConfig->getStopWords()]; + } + + /** @return array */ + private function queryEnrichmentConfig(): array + { + return [ + 'max_expansions' => $this->queryEnricherConfig->getMaxExpansions(), + 'has_rules' => $this->queryEnricherConfig->hasRules(), + 'rules' => $this->queryEnricherConfig->getEnrichQueryList(), + ]; + } + + /** @return array */ + private function catalogIntentConfig(): array + { + return [ + 'min_score' => CatalogIntentConfig::MIN_SCORE, + 'ambiguity_delta' => CatalogIntentConfig::AMBIGUITY_DELTA, + 'search_limit' => CatalogIntentConfig::SEARCH_LIMIT, + 'min_allowed_score' => CatalogIntentConfig::MIN_ALLOWED_SCORE, + 'max_allowed_score' => CatalogIntentConfig::MAX_ALLOWED_SCORE, + ]; + } + + /** @return array */ + private function contextConfig(): array + { + return [ + 'max_visible_regular_lines' => ContextServiceConfig::MAX_VISIBLE_REGULAR_LINES, + 'max_full_lines' => ContextServiceConfig::MAX_FULL_LINES, + ]; + } + /** * @param array $runtime * @param list $errors @@ -316,23 +714,25 @@ final readonly class RetriexEffectiveConfigProvider */ private function validateRetrieval(array $retrieval, array &$errors, array &$warnings): void { - $floor = (float) $retrieval['threshold_floor']; - $threshold = (float) $retrieval['vector_score_threshold']; - $ceil = (float) $retrieval['threshold_ceil']; + $floor = (float) ($retrieval['threshold_floor'] ?? 0.0); + $threshold = (float) ($retrieval['vector_score_threshold'] ?? 0.0); + $ceil = (float) ($retrieval['threshold_ceil'] ?? 1.0); if ($floor > $threshold || $threshold > $ceil) { $errors[] = 'retrieval threshold must satisfy threshold_floor <= vector_score_threshold <= threshold_ceil.'; } - if ((int) $retrieval['hard_max_chunks'] < 1) { + if ((int) ($retrieval['hard_max_chunks'] ?? 0) < 1) { $errors[] = 'retrieval.hard_max_chunks must be greater than 0.'; } + $this->validateStringListMap($retrieval['vocabulary'] ?? [], 'retrieval.vocabulary', $errors, $warnings); + $inventory = $retrieval['inventory_parameter'] ?? []; if (is_array($inventory)) { foreach ($inventory as $key => $value) { if (array_key_exists($key, $retrieval) && $retrieval[$key] != $value) { - $warnings[] = 'retrieval.inventory.' . $key . ' differs from active retriever constant.'; + $warnings[] = 'retrieval.inventory.' . $key . ' differs from active retriever config.'; } } } @@ -345,17 +745,35 @@ final readonly class RetriexEffectiveConfigProvider */ private function validatePrompt(array $prompt, array &$errors, array &$warnings): void { - if ((int) $prompt['chars_per_token'] < 1) { + if ((int) ($prompt['chars_per_token'] ?? 0) < 1) { $errors[] = 'prompt.chars_per_token must be greater than 0.'; } - if ((float) $prompt['output_reserve_ratio'] < 0.0 || (float) $prompt['output_reserve_ratio'] > 1.0) { + if ((float) ($prompt['output_reserve_ratio'] ?? -1) < 0.0 || (float) ($prompt['output_reserve_ratio'] ?? 2) > 1.0) { $errors[] = 'prompt.output_reserve_ratio must be between 0 and 1.'; } - if ((float) $prompt['safety_reserve_ratio'] < 0.0 || (float) $prompt['safety_reserve_ratio'] > 1.0) { + if ((float) ($prompt['safety_reserve_ratio'] ?? -1) < 0.0 || (float) ($prompt['safety_reserve_ratio'] ?? 2) > 1.0) { $errors[] = 'prompt.safety_reserve_ratio must be between 0 and 1.'; } + + $this->validateStringListMap($prompt['rules'] ?? [], 'prompt.rules', $errors, $warnings); + $this->validateStringListMap($prompt['detection'] ?? [], 'prompt.detection', $errors, $warnings); + $this->validateRegexPattern($prompt['detection']['technical_product_model_pattern'] ?? null, 'prompt.detection.technical_product_model_pattern', $errors); + } + + /** + * @param array $agent + * @param list $errors + * @param list $warnings + */ + private function validateAgent(array $agent, array &$errors, array &$warnings): void + { + $this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings); + $this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings); + $this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings); + $this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings); + $this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors); } /** @@ -404,6 +822,331 @@ final readonly class RetriexEffectiveConfigProvider } } + /** + * @param array $commerceQuery + * @param list $errors + * @param list $warnings + */ + private function validateCommerceQuery(array $commerceQuery, array &$errors, array &$warnings): void + { + $this->validateStringListMap($commerceQuery, 'commerce_query', $errors, $warnings); + + $patterns = is_array($commerceQuery['patterns'] ?? null) ? $commerceQuery['patterns'] : []; + if ($patterns === []) { + $errors[] = 'commerce_query.patterns must be an array.'; + } else { + $this->validateCommerceQueryPatterns($patterns, $errors, $warnings); + } + + $measurementPattern = $patterns['measurement_value_token'] ?? null; + if (is_string($measurementPattern) && @preg_match($measurementPattern, '0,02') !== 1) { + $errors[] = 'commerce_query.patterns.measurement_value_token must match 0,02.'; + } + + $filterTokens = $commerceQuery['filter_search_tokens'] ?? []; + if (is_array($filterTokens) && in_array('0,02', $filterTokens, true)) { + $errors[] = 'commerce_query.filter_search_tokens must not remove protected decimal token 0,02.'; + } + } + + /** + * @param array $shopMatching + * @param list $errors + * @param list $warnings + */ + private function validateShopMatching(array $shopMatching, array &$errors, array &$warnings): void + { + $this->validateStringListMap($shopMatching, 'shop_matching', $errors, $warnings); + } + + /** + * @param array $searchRepair + * @param list $errors + * @param list $warnings + */ + private function validateSearchRepair(array $searchRepair, array &$errors, array &$warnings): void + { + if ((int) ($searchRepair['max_repair_queries'] ?? 0) < 0) { + $errors[] = 'search_repair.max_repair_queries must be greater than or equal to 0.'; + } + + $this->validateStringListMap($searchRepair, 'search_repair', $errors, $warnings); + $this->validateRegexPatternMap($searchRepair['patterns'] ?? [], 'search_repair.patterns', $errors); + } + + /** + * @param array $intent + * @param list $errors + * @param list $warnings + */ + private function validateIntent(array $intent, array &$errors, array &$warnings): void + { + $this->validateStringListMap($intent, 'intent', $errors, $warnings); + $commerce = is_array($intent['commerce'] ?? null) ? $intent['commerce'] : []; + $this->validateRegexPatternList($commerce['support_diagnostic_patterns'] ?? [], 'intent.commerce.support_diagnostic_patterns', $errors); + $this->validateRegexPatternList($commerce['explicit_commerce_intent_patterns'] ?? [], 'intent.commerce.explicit_commerce_intent_patterns', $errors); + + $light = is_array($intent['light'] ?? null) ? $intent['light'] : []; + $this->validateRegexPatternList($light['strong_patterns'] ?? [], 'intent.light.strong_patterns', $errors); + + $sales = is_array($intent['sales'] ?? null) ? $intent['sales'] : []; + $this->validateRegexPatternList($sales['comparison_signals'] ?? [], 'intent.sales.comparison_signals', $errors); + } + + /** + * @param array $vocabulary + * @param list $errors + * @param list $warnings + */ + private function validateVocabulary(array $vocabulary, array &$errors, array &$warnings): void + { + $this->validateStringListMap($vocabulary['classes'] ?? [], 'vocabulary.classes', $errors, $warnings); + $this->validateStringListMap($vocabulary['views'] ?? [], 'vocabulary.views', $errors, $warnings); + $this->validateStringListMap($vocabulary['maps'] ?? [], 'vocabulary.maps', $errors, $warnings); + + $retrievalViews = $vocabulary['views']['retrieval'] ?? null; + if (is_array($retrievalViews)) { + $shortModel = $retrievalViews['important_short_model_tokens']['add'] ?? []; + if (is_array($shortModel)) { + foreach (['th', 'tc', 'tp', 'tm', 'ph', 'rx'] as $token) { + if (!in_array($token, $shortModel, true)) { + $warnings[] = 'vocabulary.views.retrieval.important_short_model_tokens should contain protected token ' . $token . '.'; + } + } + } + } + } + + /** + * @param array $language + * @param list $errors + * @param list $warnings + */ + private function validateLanguage(array $language, array &$errors, array &$warnings): void + { + $this->validateStringListMap($language, 'language', $errors, $warnings); + $stopwords = is_array($language['stopwords'] ?? null) ? $language['stopwords'] : []; + foreach (['nicht', 'kein', 'welche', 'testomat', 'indikator', '0,02'] as $protected) { + if (in_array($protected, $stopwords, true)) { + $errors[] = 'language.stopwords must not contain protected term: ' . $protected . '.'; + } + } + } + + /** + * @param array $queryEnrichment + * @param list $errors + * @param list $warnings + */ + private function validateQueryEnrichment(array $queryEnrichment, array &$errors, array &$warnings): void + { + if ((int) ($queryEnrichment['max_expansions'] ?? 0) < 0) { + $errors[] = 'query_enrichment.max_expansions must be greater than or equal to 0.'; + } + + $rules = $queryEnrichment['rules'] ?? []; + if (!is_array($rules)) { + $errors[] = 'query_enrichment.rules must be a map.'; + return; + } + + foreach ($rules as $left => $right) { + if (!is_string($left) || trim($left) === '' || !is_string($right) || trim($right) === '') { + $errors[] = 'query_enrichment.rules must contain non-empty string mappings.'; + return; + } + } + } + + /** + * @param mixed $value + * @param list $errors + */ + private function validateRegexPattern(mixed $value, string $path, array &$errors): void + { + if (!is_string($value) || trim($value) === '') { + $errors[] = $path . ' must be a non-empty regex string.'; + return; + } + + if (@preg_match($value, '') === false) { + $errors[] = $path . ' is not a valid regex pattern.'; + } + } + + /** + * @param array $patterns + * @param list $errors + * @param list $warnings + */ + private function validateCommerceQueryPatterns(array $patterns, array &$errors, array &$warnings): void + { + $regexKeys = [ + 'history_context_value', + 'prompt_sanitize', + 'whitespace_collapse', + 'whitespace_split', + 'history_question', + 'price_between', + 'price_max', + 'price_min', + 'direct_product_digit', + 'model_like', + 'accessory_like', + 'contains_digit', + 'model_number_token', + 'model_context_token', + 'model_suffix_token', + 'instruction_or_presentation_token', + 'measurement_value_token', + ]; + + foreach ($regexKeys as $key) { + $this->validateRegexPattern($patterns[$key] ?? null, 'commerce_query.patterns.' . $key, $errors); + } + + $this->validateRegexFragment($patterns['history_context'] ?? null, 'commerce_query.patterns.history_context', $errors); + + if (array_key_exists('filter_search_tokens', $patterns)) { + $this->validateStringList($this->toList($patterns['filter_search_tokens']), 'commerce_query.patterns.filter_search_tokens', $errors, $warnings); + } + } + + /** + * @param list $errors + */ + private function validateRegexFragment(mixed $value, string $path, array &$errors): void + { + if (!is_string($value) || trim($value) === '') { + $errors[] = $path . ' must be a non-empty regex fragment string.'; + return; + } + + if (@preg_match('/(?:' . $value . ')/u', '') === false) { + $errors[] = $path . ' is not a valid regex fragment.'; + } + } + + /** + * @return array + */ + private function toList(mixed $value): array + { + return is_array($value) ? $value : []; + } + + + /** + * @param mixed $patterns + * @param list $errors + */ + private function validateRegexPatternMap(mixed $patterns, string $path, array &$errors): void + { + if (!is_array($patterns)) { + $errors[] = $path . ' must be an array of regex patterns.'; + return; + } + + foreach ($patterns as $key => $pattern) { + $currentPath = $path . '.' . (string) $key; + if (is_array($pattern)) { + $this->validateRegexPatternList($pattern, $currentPath, $errors); + continue; + } + + $this->validateRegexPattern($pattern, $currentPath, $errors); + } + } + + /** + * @param mixed $patterns + * @param list $errors + */ + private function validateRegexPatternList(mixed $patterns, string $path, array &$errors): void + { + if (!is_array($patterns)) { + $errors[] = $path . ' must be an array of regex patterns.'; + return; + } + + foreach ($patterns as $index => $pattern) { + $this->validateRegexPattern($pattern, $path . '.' . (string) $index, $errors); + } + } + + /** + * @param mixed $value + * @param list $errors + * @param list $warnings + */ + private function validateStringListMap(mixed $value, string $path, array &$errors, array &$warnings): void + { + if (!is_array($value)) { + $errors[] = $path . ' must be an array.'; + return; + } + + foreach ($value as $key => $item) { + $currentPath = $path . '.' . (string) $key; + if (is_array($item)) { + if ($this->isList($item)) { + $this->validateStringList($item, $currentPath, $errors, $warnings); + continue; + } + + $this->validateStringListMap($item, $currentPath, $errors, $warnings); + continue; + } + + if (is_string($item)) { + if (trim($item) === '') { + $errors[] = $currentPath . ' must not be empty.'; + } + continue; + } + + if (is_int($item) || is_float($item) || is_bool($item) || $item === null) { + continue; + } + + $warnings[] = $currentPath . ' contains a non-scalar value.'; + } + } + + /** + * @param array $items + * @param list $errors + * @param list $warnings + */ + private function validateStringList(array $items, string $path, array &$errors, array &$warnings): void + { + $seen = []; + foreach ($items as $index => $item) { + if (!is_scalar($item)) { + $errors[] = $path . '.' . (string) $index . ' must be a scalar value.'; + continue; + } + + $item = trim((string) $item); + if ($item === '') { + $errors[] = $path . '.' . (string) $index . ' must not be empty.'; + continue; + } + + $key = mb_strtolower($item, 'UTF-8'); + if (isset($seen[$key])) { + $warnings[] = $path . ' contains duplicate value: ' . $item . '.'; + } + $seen[$key] = true; + } + } + + /** @param array $value */ + private function isList(array $value): bool + { + return array_is_list($value); + } + private function param(string $name, mixed $default = null): mixed { if (!$this->parameters->has($name)) {