1892 lines
102 KiB
PHP
1892 lines
102 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Config;
|
|
|
|
use App\Index\IndexConfigurationProvider;
|
|
use App\Service\ModelGenerationConfigProvider;
|
|
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
|
|
|
|
final readonly class RetriexEffectiveConfigProvider
|
|
{
|
|
public function __construct(
|
|
private ParameterBagInterface $parameters,
|
|
private ModelGenerationConfigProvider $modelProvider,
|
|
private IndexConfigurationProvider $indexProvider,
|
|
private PromptBuilderConfig $promptConfig,
|
|
private NdjsonHybridRetrieverConfig $retrieverConfig,
|
|
private DomainVocabularyConfig $domainVocabularyConfig,
|
|
private AgentRunnerConfig $agentRunnerConfig,
|
|
private SearchRepairConfig $searchRepairConfig,
|
|
private CommerceIntentConfig $commerceIntentConfig,
|
|
private CommerceQueryParserConfig $commerceQueryParserConfig,
|
|
private IntentLightConfig $intentLightConfig,
|
|
private SalesIntentConfig $salesIntentConfig,
|
|
private ShopServiceConfig $shopServiceConfig,
|
|
private StopWordsConfig $stopWordsConfig,
|
|
private LanguageCleanupConfig $languageCleanupConfig,
|
|
private QueryEnricherConfig $queryEnricherConfig,
|
|
private GovernanceConfig $governanceConfig,
|
|
private CatalogIntentConfig $catalogIntentConfig,
|
|
private ContextServiceConfig $contextServiceConfig,
|
|
) {
|
|
}
|
|
|
|
/**
|
|
* @return array<string, mixed>
|
|
*/
|
|
public function dump(): array
|
|
{
|
|
return [
|
|
'runtime' => $this->runtimeConfig(),
|
|
'index' => $this->indexConfig(),
|
|
'model_generation' => $this->modelConfig(),
|
|
'llm' => [
|
|
'timeout_seconds' => $this->param('retriex.llm.timeout_seconds'),
|
|
'num_predict' => $this->param('retriex.llm.num_predict'),
|
|
],
|
|
'retrieval' => $this->retrievalConfig(),
|
|
'prompt' => $this->promptConfig(),
|
|
'agent' => $this->agentConfig(),
|
|
'vector' => $this->vectorConfig(),
|
|
'commerce' => $this->commerceConfig(),
|
|
'commerce_query' => $this->commerceQueryConfig(),
|
|
'shop_matching' => $this->shopMatchingConfig(),
|
|
'search_repair' => $this->searchRepairEffectiveConfig(),
|
|
'intent' => $this->intentConfig(),
|
|
'vocabulary' => $this->domainVocabularyConfig->toArray(),
|
|
'governance' => $this->governanceConfig->toArray(),
|
|
'language' => $this->languageConfig(),
|
|
'query_enrichment' => $this->queryEnrichmentConfig(),
|
|
'catalog_intent' => $this->catalogIntentConfig(),
|
|
'context' => $this->contextConfig(),
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @return array{status:string, errors:list<string>, warnings:list<string>, config:array<string,mixed>}
|
|
*/
|
|
public function validate(): array
|
|
{
|
|
$errors = [];
|
|
$warnings = [];
|
|
$config = $this->dump();
|
|
|
|
$this->validateRuntime($config['runtime'], $errors, $warnings);
|
|
$this->validateIndex($config['index'], $errors, $warnings);
|
|
$this->validateModel($config['model_generation'], $errors, $warnings);
|
|
$this->validateRetrieval($config['retrieval'], $errors, $warnings);
|
|
$this->validatePrompt($config['prompt'], $errors, $warnings);
|
|
$this->validateAgent($config['agent'], $errors, $warnings);
|
|
$this->validateVector($config['vector'], $errors, $warnings);
|
|
$this->validateCommerce($config['commerce'], $errors, $warnings);
|
|
$this->validateCommerceQuery($config['commerce_query'], $errors, $warnings);
|
|
$this->validateShopMatching($config['shop_matching'], $errors, $warnings);
|
|
$this->validateSearchRepair($config['search_repair'], $errors, $warnings);
|
|
$this->validateIntent($config['intent'], $errors, $warnings);
|
|
$this->validateVocabulary($config['vocabulary'], $errors, $warnings);
|
|
$this->validateGovernance($config['governance'], $errors, $warnings);
|
|
$this->validateLanguage($config['language'], $errors, $warnings);
|
|
$this->validateQueryEnrichment($config['query_enrichment'], $errors, $warnings);
|
|
|
|
return [
|
|
'status' => $errors === [] ? 'OK' : 'ERROR',
|
|
'errors' => $errors,
|
|
'warnings' => $warnings,
|
|
'config' => $config,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Offline regression guard for the stable 1.4.2-sensitive configuration paths.
|
|
*
|
|
* @return array{status:string, checks:array<string,bool>, errors:list<string>, warnings:list<string>}
|
|
*/
|
|
public function regressionBaseline(): array
|
|
{
|
|
$errors = [];
|
|
$warnings = [];
|
|
$checks = [];
|
|
|
|
$validate = $this->validate();
|
|
$checks['config_validate_ok'] = $validate['status'] === 'OK';
|
|
if ($validate['status'] !== 'OK') {
|
|
foreach ($validate['errors'] as $error) {
|
|
$errors[] = 'Config validation failed: ' . $error;
|
|
}
|
|
}
|
|
foreach ($validate['warnings'] as $warning) {
|
|
$warnings[] = 'Config validation warning: ' . $warning;
|
|
}
|
|
|
|
try {
|
|
$cleanupProfileNames = $this->languageCleanupConfig->getCleanupProfileNames();
|
|
foreach ($this->governanceConfig->getLanguageRequiredCleanupProfiles() as $profileName) {
|
|
$key = 'language_cleanup_profile_' . $this->guardrailCheckKey($profileName);
|
|
$checks[$key] = in_array($profileName, $cleanupProfileNames, true);
|
|
if (!$checks[$key]) {
|
|
$errors[] = 'Missing required language cleanup profile: ' . $profileName . '.';
|
|
continue;
|
|
}
|
|
|
|
$this->languageCleanupConfig->getCleanupProfile($profileName);
|
|
}
|
|
|
|
$legacyStopwords = $this->stopWordsConfig->getStopWords();
|
|
foreach ($this->governanceConfig->getLanguageProtectedStopwordTerms() as $protectedTerm) {
|
|
$key = 'language_protected_term_' . $this->guardrailCheckKey($protectedTerm);
|
|
$checks[$key . '_registered'] = $this->languageCleanupConfig->isProtectedTerm($protectedTerm);
|
|
if (!$checks[$key . '_registered']) {
|
|
$errors[] = 'Missing protected language cleanup term: ' . $protectedTerm . '.';
|
|
}
|
|
|
|
$checks[$key . '_not_legacy_stopword'] = !in_array($protectedTerm, $legacyStopwords, true);
|
|
if (!$checks[$key . '_not_legacy_stopword']) {
|
|
$errors[] = 'Protected language cleanup term is still a legacy stopword: ' . $protectedTerm . '.';
|
|
}
|
|
|
|
foreach ($cleanupProfileNames as $profileName) {
|
|
$profile = $this->languageCleanupConfig->getCleanupProfile($profileName);
|
|
foreach (['stopwords', 'phrases', 'meta_terms'] as $bucket) {
|
|
$bucketKey = $key . '_not_in_' . $this->guardrailCheckKey($profileName . '_' . $bucket);
|
|
$checks[$bucketKey] = !in_array($protectedTerm, $profile[$bucket] ?? [], true);
|
|
if (!$checks[$bucketKey]) {
|
|
$errors[] = sprintf('Protected language cleanup term %s is present in %s.%s.', $protectedTerm, $profileName, $bucket);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach ($this->governanceConfig->getLanguageRequiredProfileTerms() as $profileName => $requiredTerms) {
|
|
$profile = $this->languageCleanupConfig->getCleanupProfile($profileName);
|
|
foreach ($requiredTerms as $bucket => $terms) {
|
|
foreach ($terms as $term) {
|
|
$key = 'language_cleanup_profile_' . $this->guardrailCheckKey($profileName . '_' . $bucket . '_' . $term);
|
|
$checks[$key] = in_array($term, $profile[$bucket] ?? [], true);
|
|
if (!$checks[$key]) {
|
|
$errors[] = sprintf('Missing language cleanup profile term: %s.%s must contain %s.', $profileName, $bucket, $term);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$checks['commerce_query_cleanup_profile_wired'] = $this->commerceQueryParserConfig->getCleanupProfile() === 'commerce_query';
|
|
if (!$checks['commerce_query_cleanup_profile_wired']) {
|
|
$errors[] = 'Commerce query parser is not wired to cleanup profile commerce_query.';
|
|
}
|
|
|
|
$checks['rag_evidence_cleanup_profile_wired'] = $this->agentRunnerConfig->getRagEvidenceCleanupProfile() === 'rag_evidence';
|
|
if (!$checks['rag_evidence_cleanup_profile_wired']) {
|
|
$errors[] = 'RAG evidence guard is not wired to cleanup profile rag_evidence.';
|
|
}
|
|
|
|
$checks['shop_context_fallback_cleanup_profile_wired'] = $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile() === 'shop_context_fallback';
|
|
if (!$checks['shop_context_fallback_cleanup_profile_wired']) {
|
|
$errors[] = 'Shop context fallback is not wired to cleanup profile shop_context_fallback.';
|
|
}
|
|
|
|
} catch (\InvalidArgumentException $e) {
|
|
$checks['language_cleanup_profile_config_valid'] = false;
|
|
$errors[] = 'Language cleanup profile guardrails failed: ' . $e->getMessage();
|
|
}
|
|
|
|
$importantShortModelTokens = $this->retrieverConfig->importantShortModelTokens();
|
|
foreach ($this->governanceConfig->getRegressionProtectedShortModelTokens() as $token) {
|
|
$key = 'important_short_model_token_' . $this->guardrailCheckKey($token);
|
|
$checks[$key] = in_array($token, $importantShortModelTokens, true);
|
|
if (!$checks[$key]) {
|
|
$errors[] = 'Missing protected short model token: ' . $token;
|
|
}
|
|
}
|
|
|
|
$measurementPattern = $this->commerceQueryParserConfig->getMeasurementValueTokenPattern();
|
|
$filterTokens = $this->commerceQueryParserConfig->getFilterSearchTokens();
|
|
foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) {
|
|
$keySuffix = $this->guardrailCheckKey($measurementValue);
|
|
$checks['measurement_value_' . $keySuffix . '_matches'] = @preg_match($measurementPattern, $measurementValue) === 1;
|
|
if (!$checks['measurement_value_' . $keySuffix . '_matches']) {
|
|
$errors[] = 'Commerce query parser no longer recognizes protected measurement value: ' . $measurementValue . '.';
|
|
}
|
|
|
|
$checks['measurement_value_' . $keySuffix . '_not_filtered'] = !in_array($measurementValue, $filterTokens, true);
|
|
if (!$checks['measurement_value_' . $keySuffix . '_not_filtered']) {
|
|
$errors[] = 'Commerce query parser filters protected measurement value: ' . $measurementValue . '.';
|
|
}
|
|
}
|
|
|
|
$technicalKeywords = $this->promptConfig->getTechnicalProductKeywords();
|
|
foreach ($this->governanceConfig->getRegressionProtectedTechnicalPromptKeywords() as $term) {
|
|
$key = 'technical_keyword_' . $this->guardrailCheckKey($term);
|
|
$checks[$key] = in_array($term, $technicalKeywords, true);
|
|
if (!$checks[$key]) {
|
|
$errors[] = 'Missing technical prompt keyword: ' . $term;
|
|
}
|
|
}
|
|
$technicalPriorityRules = implode("\n", $this->promptConfig->getOutputPriorityTechnicalRules());
|
|
$checks['technical_priority_rules_present'] = trim($technicalPriorityRules) !== '';
|
|
$checks['technical_priority_required_markers_present'] = $this->containsAnyConfiguredMarker(
|
|
$technicalPriorityRules,
|
|
$this->governanceConfig->getRegressionTechnicalPriorityRequiredMarkers()
|
|
);
|
|
if (!$checks['technical_priority_rules_present']) {
|
|
$errors[] = 'Missing technical output priority rules.';
|
|
}
|
|
if (!$checks['technical_priority_required_markers_present']) {
|
|
$errors[] = 'Technical output priority no longer contains a required governance marker.';
|
|
}
|
|
|
|
$accessoryKeywords = $this->promptConfig->getAccessoryRequestKeywords();
|
|
foreach ($this->governanceConfig->getRegressionProtectedAccessoryPromptKeywords() as $term) {
|
|
$key = 'accessory_keyword_' . $this->guardrailCheckKey($term);
|
|
$checks[$key] = in_array($term, $accessoryKeywords, true);
|
|
if (!$checks[$key]) {
|
|
$errors[] = 'Missing accessory prompt keyword: ' . $term;
|
|
}
|
|
}
|
|
|
|
$searchRepairTerms = $this->searchRepairConfig->getSpecificityBoostTerms();
|
|
foreach ($this->governanceConfig->getRegressionProtectedSearchRepairSpecificityTerms() as $term) {
|
|
$key = 'search_repair_specificity_' . $this->guardrailCheckKey($term);
|
|
$checks[$key] = in_array($term, $searchRepairTerms, true);
|
|
if (!$checks[$key]) {
|
|
$errors[] = 'Missing search repair specificity term: ' . $term;
|
|
}
|
|
}
|
|
|
|
$reagentWords = $this->retrieverConfig->looksLikeReagentWords();
|
|
foreach ($this->governanceConfig->getRegressionProtectedRetrievalReagentWords() as $term) {
|
|
$key = 'retrieval_reagent_word_' . $this->guardrailCheckKey($term);
|
|
$checks[$key] = in_array($term, $reagentWords, true);
|
|
if (!$checks[$key]) {
|
|
$errors[] = 'Missing retrieval reagent word: ' . $term . '.';
|
|
}
|
|
}
|
|
|
|
$deviceWords = $this->retrieverConfig->looksLikeDeviceWords();
|
|
foreach ($this->governanceConfig->getRegressionProtectedRetrievalDeviceWordGroups() as $groupKey => $terms) {
|
|
$key = 'retrieval_device_word_' . $this->guardrailCheckKey((string) $groupKey);
|
|
$checks[$key] = false;
|
|
foreach ($terms as $term) {
|
|
if (in_array($term, $deviceWords, true)) {
|
|
$checks[$key] = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!$checks[$key]) {
|
|
$errors[] = 'Missing retrieval device word group: ' . (string) $groupKey . '.';
|
|
}
|
|
}
|
|
|
|
$shopPromptOriginalQuery = $this->governanceConfig->getRegressionShopPromptOriginalQuery();
|
|
$shopPrompt = $this->agentRunnerConfig->getShopPrompt($shopPromptOriginalQuery, '');
|
|
$checks['shop_prompt_contains_output_instruction'] = $this->containsAnyConfiguredMarker(
|
|
$shopPrompt,
|
|
$this->governanceConfig->getRegressionShopPromptRequiredOutputInstructionMarkers()
|
|
);
|
|
$checks['shop_prompt_contains_original_query'] = str_contains($shopPrompt, $shopPromptOriginalQuery);
|
|
if (!$checks['shop_prompt_contains_output_instruction']) {
|
|
$errors[] = 'Shop query optimizer prompt no longer contains a required output instruction marker.';
|
|
}
|
|
if (!$checks['shop_prompt_contains_original_query']) {
|
|
$errors[] = 'Shop query optimizer prompt no longer contains the configured original query.';
|
|
}
|
|
|
|
$metaOnlyTerms = $this->effectiveShopQueryMetaGuardTerms();
|
|
foreach ($this->governanceConfig->getRegressionShopQueryMetaGuardTerms() as $term) {
|
|
$key = 'shop_query_meta_guard_term_' . $this->guardrailCheckKey($term);
|
|
$checks[$key] = in_array($term, $metaOnlyTerms, true);
|
|
if (!$checks[$key]) {
|
|
$errors[] = 'Missing shop query meta guard term: ' . $term;
|
|
}
|
|
}
|
|
$checks['shop_query_context_fallback_enabled'] = $this->agentRunnerConfig->isShopQueryContextFallbackEnabled();
|
|
if (!$checks['shop_query_context_fallback_enabled']) {
|
|
$errors[] = 'Shop query context fallback is disabled.';
|
|
}
|
|
|
|
$contextFallbackFilterTerms = $this->effectiveShopQueryContextFallbackFilterTerms();
|
|
foreach ($this->governanceConfig->getRegressionShopQueryContextFallbackFilterTerms() as $term) {
|
|
$key = 'shop_query_context_fallback_filter_' . $this->guardrailCheckKey($term);
|
|
$checks[$key] = in_array($term, $contextFallbackFilterTerms, true);
|
|
if (!$checks[$key]) {
|
|
$errors[] = 'Missing shop query context fallback filter term: ' . $term;
|
|
}
|
|
}
|
|
$currentInputPreservationTerms = $this->effectiveShopQueryCurrentInputPreservationTerms();
|
|
$checks['shop_query_current_input_preservation_enabled'] = $this->agentRunnerConfig->isShopQueryCurrentInputPreservationEnabled();
|
|
if (!$checks['shop_query_current_input_preservation_enabled']) {
|
|
$errors[] = 'Shop query current-input term preservation is disabled.';
|
|
}
|
|
|
|
foreach ($this->governanceConfig->getRegressionShopQueryCurrentInputPreservationTerms() as $term) {
|
|
$key = 'shop_query_current_input_preservation_' . $this->guardrailCheckKey($term);
|
|
$checks[$key] = in_array($term, $currentInputPreservationTerms, true);
|
|
if (!$checks[$key]) {
|
|
$errors[] = 'Missing shop query current-input preservation term: ' . $term;
|
|
}
|
|
}
|
|
|
|
$checks['shop_query_context_fallback_history_budget_positive'] = $this->agentRunnerConfig->getShopQueryContextFallbackHistoryBudgetChars() > 0;
|
|
if (!$checks['shop_query_context_fallback_history_budget_positive']) {
|
|
$errors[] = 'Shop query context fallback history budget must be greater than zero.';
|
|
}
|
|
|
|
$checks['shop_query_context_fallback_full_history_enabled'] = $this->agentRunnerConfig->shouldUseFullHistoryForShopQueryContextFallback();
|
|
if (!$checks['shop_query_context_fallback_full_history_enabled']) {
|
|
$errors[] = 'Shop query context fallback full-history fallback is disabled.';
|
|
}
|
|
|
|
$checks['shop_query_context_fallback_question_limit_minimum'] = $this->agentRunnerConfig->getShopQueryContextFallbackQuestionLimit() >= 6;
|
|
if (!$checks['shop_query_context_fallback_question_limit_minimum']) {
|
|
$errors[] = 'Shop query context fallback question limit is too low for repeated meta follow-ups.';
|
|
}
|
|
|
|
$checks['shop_query_context_fallback_max_terms_positive'] = $this->agentRunnerConfig->getShopQueryContextFallbackMaxTerms() > 0;
|
|
if (!$checks['shop_query_context_fallback_max_terms_positive']) {
|
|
$errors[] = 'Shop query context fallback max terms must be greater than zero.';
|
|
}
|
|
|
|
$status = $errors === [] ? 'OK' : 'ERROR';
|
|
|
|
return [
|
|
'status' => $status,
|
|
'checks' => $checks,
|
|
'errors' => $errors,
|
|
'warnings' => $warnings,
|
|
];
|
|
}
|
|
|
|
|
|
/** @return string[] */
|
|
private function effectiveShopQueryMetaGuardTerms(): array
|
|
{
|
|
$profileName = $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile();
|
|
|
|
return $this->mergeUniqueStrings(
|
|
$this->mergeUniqueStrings(
|
|
$this->languageCleanupConfig->getStopWordsForProfile($profileName),
|
|
$this->languageCleanupConfig->getPhrasesForProfile($profileName)
|
|
),
|
|
$this->mergeUniqueStrings(
|
|
$this->languageCleanupConfig->getMetaTermsForProfile($profileName),
|
|
$this->agentRunnerConfig->getShopQueryMetaOnlyTerms()
|
|
)
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function effectiveShopQueryContextFallbackFilterTerms(): array
|
|
{
|
|
$profileName = $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile();
|
|
|
|
return $this->mergeUniqueStrings(
|
|
$this->effectiveShopQueryMetaGuardTerms(),
|
|
$this->agentRunnerConfig->getShopQueryContextFallbackFilterTerms()
|
|
);
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function effectiveShopQueryCurrentInputPreservationTerms(): array
|
|
{
|
|
return $this->mergeUniqueStrings(
|
|
$this->languageCleanupConfig->getProtectedTerms(),
|
|
$this->agentRunnerConfig->getShopQueryCurrentInputPreservationTerms()
|
|
);
|
|
}
|
|
|
|
/**
|
|
* @param string[] $left
|
|
* @param string[] $right
|
|
* @return string[]
|
|
*/
|
|
private function mergeUniqueStrings(array $left, array $right): array
|
|
{
|
|
$out = [];
|
|
foreach (array_merge($left, $right) as $item) {
|
|
$item = trim((string) $item);
|
|
if ($item === '' || isset($out[$item])) {
|
|
continue;
|
|
}
|
|
|
|
$out[$item] = $item;
|
|
}
|
|
|
|
return array_values($out);
|
|
}
|
|
|
|
/** @param string[] $markers */
|
|
private function containsAnyConfiguredMarker(string $haystack, array $markers): bool
|
|
{
|
|
foreach ($markers as $marker) {
|
|
if ($marker !== '' && str_contains($haystack, $marker)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private function guardrailCheckKey(string $term): string
|
|
{
|
|
$key = mb_strtolower($term, 'UTF-8');
|
|
$key = preg_replace('/[^\p{L}\p{N}]+/u', '_', $key) ?? $key;
|
|
$key = trim($key, '_');
|
|
|
|
return $key !== '' ? $key : 'value';
|
|
}
|
|
/** @return array<string, mixed> */
|
|
private function runtimeConfig(): array
|
|
{
|
|
return [
|
|
'root' => $this->param('retriex.root'),
|
|
'knowledge_root' => $this->param('retriex.knowledge.root'),
|
|
'index_ndjson' => $this->param('retriex.knowledge.ndjson'),
|
|
'index_meta' => $this->param('retriex.knowledge.index_meta'),
|
|
'runtime_meta' => $this->param('retriex.knowledge.runtime_meta'),
|
|
'upload_dir' => $this->param('retriex.knowledge.upload'),
|
|
'locks_dir' => $this->param('retriex.locks.dir'),
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function indexConfig(): array
|
|
{
|
|
try {
|
|
$index = $this->indexProvider->getConfiguration();
|
|
|
|
return [
|
|
'chunk_size' => $index->getChunkSize(),
|
|
'chunk_overlap' => $index->getChunkOverlap(),
|
|
'embedding_model' => $index->getEmbeddingModel(),
|
|
'embedding_dimension' => $index->getEmbeddingDimension(),
|
|
'scoring_version' => $index->getScoringVersion(),
|
|
'index_format' => $index->getIndexFormat(),
|
|
'vector_backend' => $index->getVectorBackend(),
|
|
];
|
|
} catch (\Throwable $e) {
|
|
return [
|
|
'error' => $e->getMessage(),
|
|
'fallback_chunk_size' => $this->param('retriex.index.chunk_size'),
|
|
'fallback_chunk_overlap' => $this->param('retriex.index.chunk_overlap'),
|
|
'fallback_embedding_model' => $this->param('retriex.index.embedding_model'),
|
|
'fallback_embedding_dimension' => $this->param('retriex.index.embedding_dimension'),
|
|
'fallback_scoring_version' => $this->param('retriex.index.scoring_version'),
|
|
];
|
|
}
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function modelConfig(): array
|
|
{
|
|
try {
|
|
$model = $this->modelProvider->getActiveForModel();
|
|
|
|
return [
|
|
'model_name' => $model->getModelName(),
|
|
'version' => $model->getVersion(),
|
|
'active' => $model->isActive(),
|
|
'stream' => $model->isStream(),
|
|
'temperature' => $model->getTemperature(),
|
|
'top_k' => $model->getTopK(),
|
|
'top_p' => $model->getTopP(),
|
|
'repeat_penalty' => $model->getRepeatPenalty(),
|
|
'num_ctx' => $model->getNumCtx(),
|
|
'retrieval_max_chunks' => $model->getRetrievalMaxChunks(),
|
|
'retrieval_vector_top_k' => $model->getRetrievalVectorTopK(),
|
|
];
|
|
} catch (\Throwable $e) {
|
|
return [
|
|
'error' => $e->getMessage(),
|
|
'default_model_name' => $this->param('retriex.model.default_name'),
|
|
'default_num_ctx' => $this->param('retriex.model.default_num_ctx'),
|
|
'default_retrieval_max_chunks' => $this->param('retriex.model.default_retrieval_max_chunks'),
|
|
'default_retrieval_vector_top_k' => $this->param('retriex.model.default_retrieval_vector_top_k'),
|
|
];
|
|
}
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function retrievalConfig(): array
|
|
{
|
|
return [
|
|
...$this->retrieverConfig->toArray(),
|
|
'vocabulary' => $this->retrieverConfig->vocabularyToArray(),
|
|
'inventory_parameter' => $this->param('retriex.retrieval.inventory', []),
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function promptConfig(): array
|
|
{
|
|
return [
|
|
'chars_per_token' => $this->promptConfig->getCharsPerToken(),
|
|
'history_padding_chars' => $this->promptConfig->getHistoryPaddingChars(),
|
|
'output_reserve_ratio' => $this->promptConfig->getOutputReserveRatio(),
|
|
'output_reserve_min_tokens' => $this->promptConfig->getOutputReserveMinTokens(),
|
|
'output_reserve_max_tokens' => $this->promptConfig->getOutputReserveMaxTokens(),
|
|
'safety_reserve_ratio' => $this->promptConfig->getSafetyReserveRatio(),
|
|
'safety_reserve_min_tokens' => $this->promptConfig->getSafetyReserveMinTokens(),
|
|
'safety_reserve_max_tokens' => $this->promptConfig->getSafetyReserveMaxTokens(),
|
|
'min_prompt_budget_tokens' => $this->promptConfig->getMinPromptBudgetTokens(),
|
|
'max_shop_results_in_prompt' => $this->promptConfig->getMaxShopResultsInPrompt(),
|
|
'detailed_shop_results_max_count' => $this->promptConfig->getDetailedShopResultsMaxCount(),
|
|
'technical_product_keyword_match_threshold' => $this->promptConfig->getTechnicalProductKeywordMatchThreshold(),
|
|
'labels' => [
|
|
'system' => $this->promptConfig->getSystemSectionLabel(),
|
|
'user_question' => $this->promptConfig->getUserQuestionSectionLabel(),
|
|
'conversation_context' => $this->promptConfig->getConversationContextSectionLabel(),
|
|
'shop_search_query' => $this->promptConfig->getShopSearchQuerySectionLabel(),
|
|
'output_priority' => $this->promptConfig->getOutputPrioritySectionLabel(),
|
|
'response_format' => $this->promptConfig->getResponseFormatSectionLabel(),
|
|
'language_rules' => $this->promptConfig->getLanguageRulesSectionLabel(),
|
|
'fact_grounding_rules' => $this->promptConfig->getFactGroundingRulesSectionLabel(),
|
|
'retrieved_knowledge' => $this->promptConfig->getRetrievedKnowledgeSectionLabel(),
|
|
'url_content' => $this->promptConfig->getUrlContentSectionLabel(),
|
|
],
|
|
'rules' => [
|
|
'conversation_context_intro_lines' => $this->promptConfig->getConversationContextIntroLines(),
|
|
'live_shop_results_header_lines' => $this->promptConfig->getLiveShopResultsHeaderLines(),
|
|
'output_priority' => $this->promptConfig->getOutputPriorityRules(),
|
|
'output_priority_technical' => $this->promptConfig->getOutputPriorityTechnicalRules(),
|
|
'response_format_base' => $this->promptConfig->getResponseFormatBaseRules(),
|
|
'response_format_with_shop' => $this->promptConfig->getResponseFormatWithShopRules(),
|
|
'response_format_without_shop' => $this->promptConfig->getResponseFormatWithoutShopRules(),
|
|
'response_format_technical' => $this->promptConfig->getResponseFormatTechnicalRules(),
|
|
'response_format_accessory' => $this->promptConfig->getResponseFormatAccessoryRules(),
|
|
'language' => $this->promptConfig->getLanguageRules(),
|
|
'fact_grounding_base' => $this->promptConfig->getFactGroundingBaseRules(),
|
|
'fact_grounding_with_shop' => $this->promptConfig->getFactGroundingWithShopRules(),
|
|
'fact_grounding_without_shop' => $this->promptConfig->getFactGroundingWithoutShopRules(),
|
|
'fact_grounding_technical' => $this->promptConfig->getFactGroundingTechnicalRules(),
|
|
],
|
|
'shop_fields' => [
|
|
'product_number_label' => $this->promptConfig->getShopProductNumberLabel(),
|
|
'manufacturer_label' => $this->promptConfig->getShopManufacturerLabel(),
|
|
'price_label' => $this->promptConfig->getShopPriceLabel(),
|
|
'availability_label' => $this->promptConfig->getShopAvailabilityLabel(),
|
|
'availability_yes_label' => $this->promptConfig->getShopAvailabilityYesLabel(),
|
|
'availability_no_label' => $this->promptConfig->getShopAvailabilityNoLabel(),
|
|
'highlight_prefix' => $this->promptConfig->getShopHighlightPrefix(),
|
|
'url_label' => $this->promptConfig->getShopUrlLabel(),
|
|
'product_image_label' => $this->promptConfig->getShopProductImageLabel(),
|
|
'description_label' => $this->promptConfig->getShopDescriptionLabel(),
|
|
'meta_information_label' => $this->promptConfig->getShopMetaInformationLabel(),
|
|
],
|
|
'detection' => [
|
|
'technical_product_keywords' => $this->promptConfig->getTechnicalProductKeywords(),
|
|
'accessory_request_keywords' => $this->promptConfig->getAccessoryRequestKeywords(),
|
|
'technical_product_model_pattern' => $this->promptConfig->getTechnicalProductModelPattern(),
|
|
],
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function agentConfig(): array
|
|
{
|
|
return [
|
|
'commerce_history_budget_chars' => $this->agentRunnerConfig->getCommerceHistoryBudgetChars(),
|
|
'product_search_knowledge_chunk_limit' => $this->agentRunnerConfig->getProductSearchKnowledgeChunkLimit(),
|
|
'advisory_product_search_knowledge_chunk_limit' => $this->agentRunnerConfig->getAdvisoryProductSearchKnowledgeChunkLimit(),
|
|
'optimized_shop_query_prefix_pattern' => $this->agentRunnerConfig->getOptimizedShopQueryPrefixPattern(),
|
|
'follow_up_context' => [
|
|
'commercial_table_follow_up' => [
|
|
'enabled' => $this->agentRunnerConfig->isCommercialTableFollowUpEnabled(),
|
|
'prompt_patterns' => $this->agentRunnerConfig->getCommercialTableFollowUpPromptPatterns(),
|
|
'history_anchor_patterns' => $this->agentRunnerConfig->getCommercialTableFollowUpHistoryAnchorPatterns(),
|
|
'table_terms' => $this->agentRunnerConfig->getCommercialTableFollowUpTableTerms(),
|
|
'commercial_terms' => $this->agentRunnerConfig->getCommercialTableFollowUpCommercialTerms(),
|
|
'indicator_marker_patterns' => $this->agentRunnerConfig->getCommercialTableFollowUpIndicatorMarkerPatterns(),
|
|
'query_template_with_model' => $this->agentRunnerConfig->getCommercialTableFollowUpQueryTemplateWithModel(),
|
|
'query_template_without_model' => $this->agentRunnerConfig->getCommercialTableFollowUpQueryTemplateWithoutModel(),
|
|
],
|
|
],
|
|
'input_normalization' => [
|
|
'enabled' => $this->agentRunnerConfig->isInputNormalizationEnabled(),
|
|
'max_input_chars' => $this->agentRunnerConfig->getInputNormalizationMaxInputChars(),
|
|
'max_output_chars' => $this->agentRunnerConfig->getInputNormalizationMaxOutputChars(),
|
|
'max_added_tokens' => $this->agentRunnerConfig->getInputNormalizationMaxAddedTokens(),
|
|
'max_length_ratio_percent' => $this->agentRunnerConfig->getInputNormalizationMaxLengthRatioPercent(),
|
|
'heartbeat_message' => $this->agentRunnerConfig->getInputNormalizationHeartbeatMessage(),
|
|
'output_prefix_pattern' => $this->agentRunnerConfig->getInputNormalizationOutputPrefixPattern(),
|
|
'skip_patterns' => $this->agentRunnerConfig->getInputNormalizationSkipPatterns(),
|
|
'prompt' => [
|
|
'intro' => $this->agentRunnerConfig->getInputNormalizationIntro(),
|
|
'rules' => $this->agentRunnerConfig->getInputNormalizationRules(),
|
|
'output_format_block' => $this->agentRunnerConfig->getInputNormalizationOutputFormatBlock(),
|
|
'current_user_input_label' => $this->agentRunnerConfig->getInputNormalizationCurrentUserInputLabel(),
|
|
],
|
|
'fuzzy_routing' => [
|
|
'enabled' => $this->agentRunnerConfig->isInputNormalizationFuzzyRoutingEnabled(),
|
|
'min_token_length' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMinTokenLength(),
|
|
'medium_token_length' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMediumTokenLength(),
|
|
'long_token_length' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingLongTokenLength(),
|
|
'max_distance_short' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMaxDistanceShort(),
|
|
'max_distance_medium' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMaxDistanceMedium(),
|
|
'max_distance_long' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMaxDistanceLong(),
|
|
'min_similarity_percent' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMinSimilarityPercent(),
|
|
'terms' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingTerms(),
|
|
],
|
|
],
|
|
'messages' => [
|
|
'empty_prompt' => $this->agentRunnerConfig->getEmptyPromptMessage(),
|
|
'analyze_request' => $this->agentRunnerConfig->getAnalyzeRequestMessage(),
|
|
'check_internet_sources' => $this->agentRunnerConfig->getCheckInternetSourcesMessage(),
|
|
'retrieve_knowledge' => $this->agentRunnerConfig->getRetrieveKnowledgeMessage(),
|
|
'optimize_search' => $this->agentRunnerConfig->getOptimizeSearchMessage(),
|
|
'no_concrete_shop_query' => $this->agentRunnerConfig->getNoConcreteShopQueryMessage(),
|
|
'fetch_search_data_template' => $this->agentRunnerConfig->getFetchSearchDataMessageTemplate(),
|
|
'analyze_all_information' => $this->agentRunnerConfig->getAnalyzeAllInformationMessage(),
|
|
'thinking_while_streaming' => $this->agentRunnerConfig->getThinkingWhileStreamingMessage(),
|
|
'no_llm_data_received' => $this->agentRunnerConfig->getNoLlmDataReceivedMessage(),
|
|
'generic_internal_error' => $this->agentRunnerConfig->getGenericInternalErrorMessage(),
|
|
'debug_internal_error_prefix' => $this->agentRunnerConfig->getDebugInternalErrorPrefix(),
|
|
],
|
|
'final_answer_guard' => [
|
|
'enabled' => $this->agentRunnerConfig->isFinalAnswerGuardEnabled(),
|
|
'max_output_chars' => $this->agentRunnerConfig->getFinalAnswerGuardMaxOutputChars(),
|
|
'truncation_message' => $this->agentRunnerConfig->getFinalAnswerGuardTruncationMessage(),
|
|
'repeated_line' => [
|
|
'enabled' => $this->agentRunnerConfig->isFinalAnswerRepeatedLineGuardEnabled(),
|
|
'min_output_chars' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineMinOutputChars(),
|
|
'min_line_chars' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineMinLineChars(),
|
|
'max_line_repetitions' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineMaxRepetitions(),
|
|
'trailing_window_lines' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineTrailingWindowLines(),
|
|
'ignore_patterns' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineIgnorePatterns(),
|
|
],
|
|
],
|
|
'shop_runtime' => [
|
|
'query_cleanup' => [
|
|
'current_input_preservation' => [
|
|
'enabled' => $this->agentRunnerConfig->isShopQueryCurrentInputPreservationEnabled(),
|
|
'terms' => $this->agentRunnerConfig->getShopQueryCurrentInputPreservationTerms(),
|
|
],
|
|
'stopword_cleanup' => [
|
|
'enabled' => $this->agentRunnerConfig->isShopQueryStopwordCleanupEnabled(),
|
|
'min_query_tokens_after_cleanup' => $this->agentRunnerConfig->getShopQueryStopwordCleanupMinTokens(),
|
|
'terms' => $this->agentRunnerConfig->getShopQueryStopwordCleanupTerms(),
|
|
],
|
|
],
|
|
'attribute_cleanup' => [
|
|
'enabled' => $this->agentRunnerConfig->isShopQueryProductAttributeCleanupEnabled(),
|
|
'min_query_tokens_after_cleanup' => $this->agentRunnerConfig->getShopQueryProductAttributeCleanupMinTokens(),
|
|
'product_type_terms' => $this->agentRunnerConfig->getShopQueryProductAttributeCleanupProductTypeTerms(),
|
|
'stop_terms' => $this->agentRunnerConfig->getShopQueryProductAttributeCleanupStopTerms(),
|
|
'comparative_constraint_patterns' => $this->agentRunnerConfig->getShopQueryProductAttributeCleanupComparativeConstraintPatterns(),
|
|
],
|
|
'context_resolution' => [
|
|
'context_usage' => [
|
|
'referential_terms' => $this->agentRunnerConfig->getShopQueryContextUsageReferentialTerms(),
|
|
],
|
|
'history_anchor_enrichment' => [
|
|
'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(),
|
|
'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(),
|
|
'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(),
|
|
'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(),
|
|
'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(),
|
|
],
|
|
'meta_query_guard' => [
|
|
'enabled' => $this->agentRunnerConfig->isShopQueryMetaGuardEnabled(),
|
|
'cleanup_profile' => $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile(),
|
|
'context_fallback_use_full_history' => $this->agentRunnerConfig->shouldUseFullHistoryForShopQueryContextFallback(),
|
|
'meta_only_terms' => $this->agentRunnerConfig->getShopQueryMetaOnlyTerms(),
|
|
'context_fallback_enabled' => $this->agentRunnerConfig->isShopQueryContextFallbackEnabled(),
|
|
'context_fallback_question_limit' => $this->agentRunnerConfig->getShopQueryContextFallbackQuestionLimit(),
|
|
'context_fallback_history_budget_chars' => $this->agentRunnerConfig->getShopQueryContextFallbackHistoryBudgetChars(),
|
|
'context_fallback_max_terms' => $this->agentRunnerConfig->getShopQueryContextFallbackMaxTerms(),
|
|
'context_fallback_filter_terms' => $this->agentRunnerConfig->getShopQueryContextFallbackFilterTerms(),
|
|
],
|
|
'rag_anchor_enrichment' => [
|
|
'enabled' => $this->agentRunnerConfig->isShopQueryRagAnchorEnrichmentEnabled(),
|
|
'min_score' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMinScore(),
|
|
'max_query_terms' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMaxQueryTerms(),
|
|
'early_chunk_bonus_max' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentEarlyChunkBonusMax(),
|
|
'template' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentTemplate(),
|
|
'scores' => [
|
|
'exact_value_with_unit' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueUnitScore(),
|
|
'exact_value_only' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueScore(),
|
|
'anchor_bonus' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusScore(),
|
|
],
|
|
'numeric_focus_patterns' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentNumericFocusPatterns(),
|
|
'product_title_patterns' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentProductTitlePatterns(),
|
|
'anchor_bonus_patterns' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusPatterns(),
|
|
'subject_terms' => $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentSubjectTerms(),
|
|
],
|
|
],
|
|
'result_identity' => [
|
|
'enabled' => $this->agentRunnerConfig->isDirectShopResultGuardEnabled(),
|
|
'prefer_primary_identity_matches' => $this->agentRunnerConfig->shouldPreferDirectShopResultGuardPrimaryIdentityMatches(),
|
|
'compound_prefix_match' => [
|
|
'enabled' => $this->agentRunnerConfig->isDirectShopResultGuardCompoundPrefixMatchEnabled(),
|
|
'terms' => $this->agentRunnerConfig->getDirectShopResultGuardCompoundPrefixTerms(),
|
|
],
|
|
'primary_identity_repair' => [
|
|
'enabled' => $this->agentRunnerConfig->isDirectShopResultGuardPrimaryIdentityRepairEnabled(),
|
|
'min_query_tokens_after_cleanup' => $this->agentRunnerConfig->getDirectShopResultGuardPrimaryIdentityRepairMinQueryTokens(),
|
|
'stop_terms' => $this->agentRunnerConfig->getDirectShopResultGuardPrimaryIdentityRepairStopTerms(),
|
|
],
|
|
],
|
|
'answer_constraints' => [
|
|
'length_sort' => [
|
|
'enabled' => $this->agentRunnerConfig->isShopResultLengthSortEnabled(),
|
|
'trigger_patterns' => $this->agentRunnerConfig->getShopResultLengthSortTriggerPatterns(),
|
|
'value_patterns' => $this->agentRunnerConfig->getShopResultLengthSortValuePatterns(),
|
|
],
|
|
'length_filter' => [
|
|
'enabled' => $this->agentRunnerConfig->isShopResultLengthFilterEnabled(),
|
|
'min_patterns' => $this->agentRunnerConfig->getShopResultMinLengthFilterPatterns(),
|
|
'max_patterns' => $this->agentRunnerConfig->getShopResultMaxLengthFilterPatterns(),
|
|
],
|
|
],
|
|
'direct_answer' => [
|
|
'enabled' => $this->agentRunnerConfig->isDirectShopResultAnswerEnabled(),
|
|
'max_results' => $this->agentRunnerConfig->getDirectShopResultAnswerMaxResults(),
|
|
'intro' => $this->agentRunnerConfig->getDirectShopResultAnswerIntro(),
|
|
'no_results' => $this->agentRunnerConfig->getDirectShopResultAnswerNoResultsMessage(),
|
|
'sorted_by_length_note' => $this->agentRunnerConfig->getDirectShopResultAnswerSortedByLengthNote(),
|
|
'min_length_filter_note' => $this->agentRunnerConfig->getDirectShopResultAnswerMinLengthFilterNote(),
|
|
'max_length_filter_note' => $this->agentRunnerConfig->getDirectShopResultAnswerMaxLengthFilterNote(),
|
|
],
|
|
],
|
|
'rag_evidence_guard' => [
|
|
'cleanup_profile' => $this->agentRunnerConfig->getRagEvidenceCleanupProfile(),
|
|
'stop_terms' => $this->agentRunnerConfig->getRagEvidenceStopTerms(),
|
|
'synonyms' => $this->agentRunnerConfig->getRagEvidenceSynonyms(),
|
|
'aggregate_query_patterns' => $this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns(),
|
|
'aggregate_evidence_terms' => $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms(),
|
|
'aggregate_answer_evidence_patterns' => $this->agentRunnerConfig->getRagEvidenceAggregateAnswerEvidencePatterns(),
|
|
],
|
|
'source_labels' => [
|
|
'external_url' => $this->agentRunnerConfig->getExternalUrlSourceLabel(),
|
|
'rag_knowledge' => $this->agentRunnerConfig->getRagKnowledgeSourceLabel(),
|
|
'conversation_history' => $this->agentRunnerConfig->getConversationHistorySourceLabel(),
|
|
'shop_system' => $this->agentRunnerConfig->getShopSystemSourceLabel(),
|
|
'extended_shop_search' => $this->agentRunnerConfig->getExtendedShopSearchSourceLabel(),
|
|
'used_sources_prefix' => $this->agentRunnerConfig->getUsedSourcesPrefix(),
|
|
'sources_prefix' => $this->agentRunnerConfig->getSourcesPrefix(),
|
|
],
|
|
'html_templates' => [
|
|
'source_badge' => $this->agentRunnerConfig->getSourceBadgeHtmlTemplate(),
|
|
'error' => $this->agentRunnerConfig->getErrorHtmlTemplate(),
|
|
'think' => $this->agentRunnerConfig->getThinkHtmlTemplate(),
|
|
'info' => $this->agentRunnerConfig->getInfoHtmlTemplate(),
|
|
'debug' => $this->agentRunnerConfig->getDebugHtmlTemplate(),
|
|
],
|
|
'shop_query_optimizer' => [
|
|
'rules' => $this->agentRunnerConfig->getShopPromptRules(),
|
|
'conversation_context_rules' => $this->agentRunnerConfig->getConversationContextRules(),
|
|
'intro' => $this->agentRunnerConfig->getShopPromptIntro(),
|
|
'output_format_block' => $this->agentRunnerConfig->getShopPromptOutputFormatBlock(),
|
|
'recent_conversation_context_label' => $this->agentRunnerConfig->getRecentConversationContextLabel(),
|
|
'current_user_input_label' => $this->agentRunnerConfig->getCurrentUserInputLabel(),
|
|
'language_preservation' => [
|
|
'enabled' => $this->agentRunnerConfig->isShopQueryLanguagePreservationEnabled(),
|
|
'language_markers' => $this->agentRunnerConfig->getShopQueryLanguageMarkers(),
|
|
'translation_replacements_de' => $this->agentRunnerConfig->getShopQueryTranslationReplacements('de'),
|
|
],
|
|
],
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function vectorConfig(): array
|
|
{
|
|
return [
|
|
'service_url' => $this->param('retriex.vector.service_url'),
|
|
'host' => $this->param('retriex.vector.host'),
|
|
'port' => $this->param('retriex.vector.port'),
|
|
'python_bin' => $this->param('retriex.vector.python_bin'),
|
|
'control_script' => $this->param('retriex.vector.control_script'),
|
|
'timeout' => $this->param('retriex.vector.timeout'),
|
|
'search' => [
|
|
'min_score' => $this->param('retriex.vector.search.min_score'),
|
|
'max_limit' => $this->param('retriex.vector.search.max_limit'),
|
|
'http_timeout' => $this->param('retriex.vector.search.http_timeout'),
|
|
],
|
|
'tags' => [
|
|
'min_score' => $this->param('retriex.vector.tags.min_score'),
|
|
'default_limit' => $this->param('retriex.vector.tags.default_limit'),
|
|
'max_limit' => $this->param('retriex.vector.tags.max_limit'),
|
|
'http_timeout' => $this->param('retriex.vector.tags.http_timeout'),
|
|
],
|
|
'tag_routing' => [
|
|
'default_topk' => $this->param('retriex.vector.tag_routing.default_topk'),
|
|
'min_best_score' => $this->param('retriex.vector.tag_routing.min_best_score'),
|
|
'max_score_drop_from_best' => $this->param('retriex.vector.tag_routing.max_score_drop_from_best'),
|
|
'max_routing_tags' => $this->param('retriex.vector.tag_routing.max_routing_tags'),
|
|
'max_candidate_docs' => $this->param('retriex.vector.tag_routing.max_candidate_docs'),
|
|
'multi_tag_bonus_per_extra_tag' => $this->param('retriex.vector.tag_routing.multi_tag_bonus_per_extra_tag'),
|
|
'max_multi_tag_bonus' => $this->param('retriex.vector.tag_routing.max_multi_tag_bonus'),
|
|
],
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function commerceConfig(): array
|
|
{
|
|
return [
|
|
'enabled' => $this->param('retriex.commerce.enabled'),
|
|
'max_shop_results' => $this->param('retriex.commerce.max_shop_results'),
|
|
'shop_timeout' => $this->param('retriex.commerce.shop_timeout'),
|
|
'store_api_base_url' => $this->param('retriex.commerce.store_api_base_url'),
|
|
'sales_channel_access_key_configured' => $this->param('retriex.commerce.sales_channel_access_key') !== '',
|
|
'search_repair' => [
|
|
'enabled' => $this->param('retriex.commerce.search_repair.enabled'),
|
|
'max_queries' => $this->param('retriex.commerce.search_repair.max_queries'),
|
|
'min_primary_results_without_repair' => $this->param('retriex.commerce.search_repair.min_primary_results_without_repair'),
|
|
],
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function commerceQueryConfig(): array
|
|
{
|
|
return [
|
|
'cleanup_profile' => $this->commerceQueryParserConfig->getCleanupProfile(),
|
|
'known_brands' => $this->commerceQueryParserConfig->getKnownBrands(),
|
|
'phrases_to_remove' => $this->commerceQueryParserConfig->getPhrasesToRemove(),
|
|
'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokens(),
|
|
'search_token_corrections' => $this->commerceQueryParserConfig->getSearchTokenCorrections(),
|
|
'search_token_canonical_map' => $this->commerceQueryParserConfig->getSearchTokenCanonicalMap(),
|
|
'semantic_shop_search_tokens' => $this->commerceQueryParserConfig->getSemanticShopSearchTokens(),
|
|
'normalization' => [
|
|
'search' => $this->commerceQueryParserConfig->getNormalizationSearch(),
|
|
'replace' => $this->commerceQueryParserConfig->getNormalizationReplace(),
|
|
],
|
|
'text' => [
|
|
'trim_characters_length' => strlen($this->commerceQueryParserConfig->getSearchTextTrimCharacters()),
|
|
],
|
|
'limits' => [
|
|
'min_search_token_length' => $this->commerceQueryParserConfig->getMinSearchTokenLength(),
|
|
'min_direct_product_token_length' => $this->commerceQueryParserConfig->getMinDirectProductTokenLength(),
|
|
'direct_product_max_tokens' => $this->commerceQueryParserConfig->getDirectProductMaxTokens(),
|
|
'model_context_token_window' => $this->commerceQueryParserConfig->getModelContextTokenWindow(),
|
|
'min_meaningful_alpha_token_length' => $this->commerceQueryParserConfig->getMinMeaningfulAlphaTokenLength(),
|
|
'max_shop_search_tokens' => $this->commerceQueryParserConfig->getMaxShopSearchTokens(),
|
|
],
|
|
'patterns' => [
|
|
'history_context' => $this->commerceQueryParserConfig->getHistoryContextPattern(),
|
|
'history_context_value' => $this->commerceQueryParserConfig->getHistoryContextValuePattern(),
|
|
'filter_search_tokens' => $this->commerceQueryParserConfig->getFilterSearchTokensPattern(),
|
|
'prompt_sanitize' => $this->commerceQueryParserConfig->getPromptSanitizePattern(),
|
|
'whitespace_collapse' => $this->commerceQueryParserConfig->getWhitespaceCollapsePattern(),
|
|
'whitespace_split' => $this->commerceQueryParserConfig->getWhitespaceSplitPattern(),
|
|
'history_question' => $this->commerceQueryParserConfig->getHistoryQuestionPattern(),
|
|
'price_between' => $this->commerceQueryParserConfig->getPriceBetweenPattern(),
|
|
'price_max' => $this->commerceQueryParserConfig->getPriceMaxPattern(),
|
|
'price_min' => $this->commerceQueryParserConfig->getPriceMinPattern(),
|
|
'direct_product_digit' => $this->commerceQueryParserConfig->getDirectProductDigitPattern(),
|
|
'model_like' => $this->commerceQueryParserConfig->getModelLikePattern(),
|
|
'accessory_like' => $this->commerceQueryParserConfig->getAccessoryLikePattern(),
|
|
'contains_digit' => $this->commerceQueryParserConfig->getContainsDigitPattern(),
|
|
'model_number_token' => $this->commerceQueryParserConfig->getModelNumberTokenPattern(),
|
|
'model_context_token' => $this->commerceQueryParserConfig->getModelContextTokenPattern(),
|
|
'model_suffix_token' => $this->commerceQueryParserConfig->getModelSuffixTokenPattern(),
|
|
'instruction_or_presentation_token' => $this->commerceQueryParserConfig->getInstructionOrPresentationTokenPattern(),
|
|
'measurement_value_token' => $this->commerceQueryParserConfig->getMeasurementValueTokenPattern(),
|
|
],
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function shopMatchingConfig(): array
|
|
{
|
|
return [
|
|
'device_focus_keywords' => $this->shopServiceConfig->getDeviceFocusKeywords(),
|
|
'accessory_focus_keywords' => $this->shopServiceConfig->getAccessoryFocusKeywords(),
|
|
'accessory_focus_variant_map' => $this->shopServiceConfig->getAccessoryFocusVariantMap(),
|
|
'device_query_keywords' => $this->shopServiceConfig->getDeviceQueryKeywords(),
|
|
'accessory_query_keywords' => $this->shopServiceConfig->getAccessoryQueryKeywords(),
|
|
'accessory_product_keywords' => $this->shopServiceConfig->getAccessoryProductKeywords(),
|
|
'device_product_keywords' => $this->shopServiceConfig->getDeviceProductKeywords(),
|
|
'scores' => [
|
|
'exact_product_number_phrase' => $this->shopServiceConfig->getExactProductNumberPhraseScore(),
|
|
'exact_product_name_phrase' => $this->shopServiceConfig->getExactProductNamePhraseScore(),
|
|
'exact_manufacturer_match' => $this->shopServiceConfig->getExactManufacturerMatchScore(),
|
|
'brand_contained_in_name' => $this->shopServiceConfig->getBrandContainedInNameScore(),
|
|
'name_token_overlap_weight' => $this->shopServiceConfig->getNameTokenOverlapWeight(),
|
|
'product_number_token_overlap_weight' => $this->shopServiceConfig->getProductNumberTokenOverlapWeight(),
|
|
'corpus_token_overlap_weight' => $this->shopServiceConfig->getCorpusTokenOverlapWeight(),
|
|
'name_number_overlap_weight' => $this->shopServiceConfig->getNameNumberOverlapWeight(),
|
|
'product_number_number_overlap_weight' => $this->shopServiceConfig->getProductNumberNumberOverlapWeight(),
|
|
'corpus_number_overlap_weight' => $this->shopServiceConfig->getCorpusNumberOverlapWeight(),
|
|
'size_match' => $this->shopServiceConfig->getSizeMatchScore(),
|
|
'availability_bonus' => $this->shopServiceConfig->getAvailabilityBonusScore(),
|
|
'device_query_device_product_bonus' => $this->shopServiceConfig->getDeviceQueryDeviceProductBonus(),
|
|
'device_query_accessory_penalty' => $this->shopServiceConfig->getDeviceQueryAccessoryPenalty(),
|
|
'accessory_query_accessory_product_bonus' => $this->shopServiceConfig->getAccessoryQueryAccessoryProductBonus(),
|
|
'accessory_query_device_product_bonus' => $this->shopServiceConfig->getAccessoryQueryDeviceProductBonus(),
|
|
],
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function searchRepairEffectiveConfig(): array
|
|
{
|
|
return [
|
|
'enabled' => $this->searchRepairConfig->isEnabled(),
|
|
'max_repair_queries' => $this->searchRepairConfig->getMaxRepairQueries(),
|
|
'min_primary_results_without_repair' => $this->searchRepairConfig->getMinPrimaryResultsWithoutRepair(),
|
|
'strict_requested_accessory_code_repair' => $this->searchRepairConfig->shouldRestrictRequestedAccessoryCodeRepair(),
|
|
'prefer_prompt_anchored_model_for_requested_accessory_code' => $this->searchRepairConfig->shouldPreferPromptAnchoredModelForRequestedAccessoryCode(),
|
|
'direct_product_attribute_lookup' => [
|
|
'enabled' => $this->searchRepairConfig->isDirectProductAttributeLookupRepairEnabled(),
|
|
'min_query_tokens_after_cleanup' => $this->searchRepairConfig->getDirectProductAttributeLookupMinTokens(),
|
|
'product_type_terms' => $this->searchRepairConfig->getDirectProductAttributeLookupProductTypeTerms(),
|
|
'stop_terms' => $this->searchRepairConfig->getDirectProductAttributeLookupStopTerms(),
|
|
'comparative_constraint_patterns' => $this->searchRepairConfig->getDirectProductAttributeLookupComparativeConstraintPatterns(),
|
|
],
|
|
'requested_accessory_code_fallback_query_templates' => $this->searchRepairConfig->getRequestedAccessoryCodeFallbackQueryTemplates(),
|
|
'requested_accessory_code_fallback_terms' => $this->searchRepairConfig->getRequestedAccessoryCodeFallbackTerms(),
|
|
'requested_accessory_code_context_prefix_terms' => $this->searchRepairConfig->getRequestedAccessoryCodeContextPrefixTerms(),
|
|
'requested_accessory_code_proximity_window' => $this->searchRepairConfig->getRequestedAccessoryCodeProximityWindow(),
|
|
'specific_model_candidate_patterns' => $this->searchRepairConfig->getSpecificModelCandidatePatterns(),
|
|
'model_candidate_exclude_terms' => $this->searchRepairConfig->getModelCandidateExcludeTerms(),
|
|
'generic_candidate_tokens' => $this->searchRepairConfig->getGenericCandidateTokens(),
|
|
'accessory_candidate_terms' => $this->searchRepairConfig->getAccessoryCandidateTerms(),
|
|
'accessory_or_bundle_terms' => $this->searchRepairConfig->getAccessoryOrBundleTerms(),
|
|
'specificity_boost_terms' => $this->searchRepairConfig->getSpecificityBoostTerms(),
|
|
'scores' => [
|
|
'candidate_digit' => $this->searchRepairConfig->getCandidateDigitScore(),
|
|
'candidate_word_count_cap' => $this->searchRepairConfig->getCandidateWordCountCap(),
|
|
'specificity_boost' => $this->searchRepairConfig->getSpecificityBoostScore(),
|
|
'primary_query_overlap_threshold' => $this->searchRepairConfig->getPrimaryQueryOverlapThreshold(),
|
|
'prompt_match_weight' => $this->searchRepairConfig->getPromptMatchWeight(),
|
|
'primary_query_match_weight' => $this->searchRepairConfig->getPrimaryQueryMatchWeight(),
|
|
'repair_signal_match_weight' => $this->searchRepairConfig->getRepairSignalMatchWeight(),
|
|
'primary_result_order_bonus' => $this->searchRepairConfig->getPrimaryResultOrderBonus(),
|
|
'token_intersection_score' => $this->searchRepairConfig->getTokenIntersectionScore(),
|
|
'numeric_token_match_score' => $this->searchRepairConfig->getNumericTokenMatchScore(),
|
|
],
|
|
'patterns' => [
|
|
'model_candidate' => $this->searchRepairConfig->getModelCandidatePattern(),
|
|
'accessory_candidate' => $this->searchRepairConfig->getAccessoryCandidatePattern(),
|
|
'requested_accessory_code' => $this->searchRepairConfig->getRequestedAccessoryCodePattern(),
|
|
'accessory_or_bundle' => $this->searchRepairConfig->getAccessoryOrBundlePattern(),
|
|
'model_like' => $this->searchRepairConfig->getModelLikePattern(),
|
|
'specificity_boost' => $this->searchRepairConfig->getSpecificityBoostPattern(),
|
|
'contains_digit' => $this->searchRepairConfig->getContainsDigitPattern(),
|
|
'whitespace_collapse' => $this->searchRepairConfig->getWhitespaceCollapsePattern(),
|
|
'tokenize_cleanup' => $this->searchRepairConfig->getTokenizeCleanupPattern(),
|
|
],
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function intentConfig(): array
|
|
{
|
|
return [
|
|
'commerce' => [
|
|
'strong_signals' => $this->commerceIntentConfig->getStrongSignalsList(),
|
|
'advisory_signals' => $this->commerceIntentConfig->getAdvisorySignals(),
|
|
'advisory_product_selection_patterns' => $this->commerceIntentConfig->getAdvisoryProductSelectionPatterns(),
|
|
'price_terms' => $this->commerceIntentConfig->getPriceTerms(),
|
|
'color_terms' => $this->commerceIntentConfig->getColorTerms(),
|
|
'size_token_terms' => $this->commerceIntentConfig->getSizeTokenTerms(),
|
|
'size_terms' => $this->commerceIntentConfig->getSizeTerms(),
|
|
'support_diagnostic_patterns' => $this->commerceIntentConfig->getSupportDiagnosticPatterns(),
|
|
'explicit_commerce_intent_patterns' => $this->commerceIntentConfig->getExplicitCommerceIntentPatterns(),
|
|
'patterns' => [
|
|
'sku_like' => $this->commerceIntentConfig->getSkuLikePattern(),
|
|
'price_value' => $this->commerceIntentConfig->getPriceValuePattern(),
|
|
'size_extraction' => $this->commerceIntentConfig->getSizeExtractionPattern(),
|
|
'size_value' => $this->commerceIntentConfig->getSizeValuePattern(),
|
|
'size_token_value' => $this->commerceIntentConfig->getSizeTokenValuePattern(),
|
|
'color_value' => $this->commerceIntentConfig->getColorValuePattern(),
|
|
'model_like_product' => $this->commerceIntentConfig->getModelLikeProductPattern(),
|
|
],
|
|
'labels' => [
|
|
'support_or_diagnostic_signal' => $this->commerceIntentConfig->getSupportOrDiagnosticSignalLabel(),
|
|
'sku_signal' => $this->commerceIntentConfig->getSkuSignalLabel(),
|
|
'price_signal' => $this->commerceIntentConfig->getPriceSignalLabel(),
|
|
'size_signal' => $this->commerceIntentConfig->getSizeSignalLabel(),
|
|
'size_token_signal' => $this->commerceIntentConfig->getSizeTokenSignalLabel(),
|
|
'color_signal' => $this->commerceIntentConfig->getColorSignalLabel(),
|
|
'advisory_signal_prefix' => $this->commerceIntentConfig->getAdvisorySignalPrefix(),
|
|
'advisory_product_selection_signal' => $this->commerceIntentConfig->getAdvisoryProductSelectionSignalLabel(),
|
|
'model_like_product_signal' => $this->commerceIntentConfig->getModelLikeProductSignalLabel(),
|
|
],
|
|
'thresholds' => [
|
|
'product_search_min_score' => $this->commerceIntentConfig->getProductSearchMinScore(),
|
|
'advisory_product_search_min_score' => $this->commerceIntentConfig->getAdvisoryProductSearchMinScore(),
|
|
'strong_signal_score' => $this->commerceIntentConfig->getStrongSignalScore(),
|
|
'sku_signal_score' => $this->commerceIntentConfig->getSkuSignalScore(),
|
|
'price_signal_score' => $this->commerceIntentConfig->getPriceSignalScore(),
|
|
'size_signal_score' => $this->commerceIntentConfig->getSizeSignalScore(),
|
|
'size_token_signal_score' => $this->commerceIntentConfig->getSizeTokenSignalScore(),
|
|
'color_signal_score' => $this->commerceIntentConfig->getColorSignalScore(),
|
|
'advisory_signal_score' => $this->commerceIntentConfig->getAdvisorySignalScore(),
|
|
'advisory_product_selection_signal_score' => $this->commerceIntentConfig->getAdvisoryProductSelectionSignalScore(),
|
|
'model_like_product_signal_score' => $this->commerceIntentConfig->getModelLikeProductSignalScore(),
|
|
],
|
|
],
|
|
'light' => [
|
|
'list_threshold' => $this->intentLightConfig->getListThreshold(),
|
|
'quantity_words' => $this->intentLightConfig->getQuantityWords(),
|
|
'strong_patterns' => $this->intentLightConfig->getStrongPatterns(),
|
|
],
|
|
'sales' => [
|
|
'dominance_delta' => $this->salesIntentConfig->getDominanceDelta(),
|
|
'min_score_threshold' => $this->salesIntentConfig->getMinScoreThreshold(),
|
|
'sales_signals' => $this->salesIntentConfig->getSalesSignals(),
|
|
'comparison_signals' => $this->salesIntentConfig->getComparisonSignals(),
|
|
'objection_signals' => $this->salesIntentConfig->getObjectionSignals(),
|
|
'implementation_signals' => $this->salesIntentConfig->getImplementationSignals(),
|
|
'roi_signals' => $this->salesIntentConfig->getRoiSignals(),
|
|
],
|
|
];
|
|
}
|
|
|
|
private function languageConfig(): array
|
|
{
|
|
$profiles = [];
|
|
foreach ($this->languageCleanupConfig->getCleanupProfileNames() as $profileName) {
|
|
$profiles[$profileName] = $this->languageCleanupConfig->getCleanupProfile($profileName);
|
|
}
|
|
|
|
return [
|
|
'stopwords' => $this->stopWordsConfig->getStopWords(),
|
|
'protected_terms' => $this->languageCleanupConfig->getProtectedTerms(),
|
|
'cleanup_profile_names' => $this->languageCleanupConfig->getCleanupProfileNames(),
|
|
'cleanup_profiles' => $profiles,
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function queryEnrichmentConfig(): array
|
|
{
|
|
return [
|
|
'max_expansions' => $this->queryEnricherConfig->getMaxExpansions(),
|
|
'has_rules' => $this->queryEnricherConfig->hasRules(),
|
|
'rules' => $this->queryEnricherConfig->getEnrichQueryList(),
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function catalogIntentConfig(): array
|
|
{
|
|
return [
|
|
'min_score' => $this->catalogIntentConfig->getMinScore(),
|
|
'ambiguity_delta' => $this->catalogIntentConfig->getAmbiguityDelta(),
|
|
'intent_search_limit' => $this->catalogIntentConfig->getIntentSearchLimit(),
|
|
'list_search_limit' => $this->catalogIntentConfig->getListSearchLimit(),
|
|
'min_allowed_score' => $this->catalogIntentConfig->getMinAllowedScore(),
|
|
'max_allowed_score' => $this->catalogIntentConfig->getMaxAllowedScore(),
|
|
];
|
|
}
|
|
|
|
/** @return array<string, mixed> */
|
|
private function contextConfig(): array
|
|
{
|
|
return [
|
|
'max_visible_regular_lines' => $this->contextServiceConfig->getMaxVisibleRegularLines(),
|
|
'max_full_lines' => $this->contextServiceConfig->getMaxFullLines(),
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $governance
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateGovernance(array $governance, array &$errors, array &$warnings): void
|
|
{
|
|
if ($governance === []) {
|
|
$errors[] = 'governance config must not be empty.';
|
|
return;
|
|
}
|
|
|
|
try {
|
|
$this->governanceConfig->getRegressionProtectedShortModelTokens();
|
|
$this->governanceConfig->getRegressionProtectedMeasurementValues();
|
|
$this->governanceConfig->getRegressionProtectedTechnicalPromptKeywords();
|
|
$this->governanceConfig->getRegressionTechnicalPriorityRequiredMarkers();
|
|
$this->governanceConfig->getRegressionProtectedAccessoryPromptKeywords();
|
|
$this->governanceConfig->getRegressionProtectedSearchRepairSpecificityTerms();
|
|
$this->governanceConfig->getRegressionProtectedRetrievalReagentWords();
|
|
$this->governanceConfig->getRegressionProtectedRetrievalDeviceWordGroups();
|
|
$this->governanceConfig->getRegressionShopPromptOriginalQuery();
|
|
$this->governanceConfig->getRegressionShopPromptRequiredOutputInstructionMarkers();
|
|
$this->governanceConfig->getRegressionShopQueryMetaGuardTerms();
|
|
$this->governanceConfig->getRegressionShopQueryContextFallbackFilterTerms();
|
|
$this->governanceConfig->getVocabularyProtectedShortModelTokens();
|
|
$this->governanceConfig->getLanguageProtectedStopwordTerms();
|
|
$this->governanceConfig->getLanguageRequiredCleanupProfiles();
|
|
$this->governanceConfig->getLanguageRequiredProfileTerms();
|
|
$this->governanceConfig->getCorePatternAuditSourceRoots();
|
|
$this->governanceConfig->getCorePatternAuditExcludedPathPrefixes();
|
|
$this->governanceConfig->getCorePatternAuditExcludedPathPatterns();
|
|
$this->governanceConfig->getCorePatternAuditWarningPathPrefixes();
|
|
$this->governanceConfig->getCorePatternAuditSuspiciousCalls();
|
|
$this->governanceConfig->getCorePatternAuditDomainMarkerTerms();
|
|
$this->governanceConfig->getCorePatternAuditAllowedLiteralPatterns();
|
|
$this->governanceConfig->getCorePatternAuditMaxSnippetLength();
|
|
} catch (\InvalidArgumentException $e) {
|
|
$errors[] = $e->getMessage();
|
|
}
|
|
}
|
|
/**
|
|
* @param array<string, mixed> $runtime
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateRuntime(array $runtime, array &$errors, array &$warnings): void
|
|
{
|
|
foreach (['root', 'knowledge_root', 'index_ndjson', 'index_meta', 'upload_dir'] as $key) {
|
|
if (trim((string) ($runtime[$key] ?? '')) === '') {
|
|
$errors[] = 'runtime.' . $key . ' must not be empty.';
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $index
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateIndex(array $index, array &$errors, array &$warnings): void
|
|
{
|
|
if (isset($index['error'])) {
|
|
$warnings[] = 'index configuration could not be loaded from DB/provider: ' . (string) $index['error'];
|
|
return;
|
|
}
|
|
|
|
$chunkSize = $this->asInt($index['chunk_size'] ?? null);
|
|
$chunkOverlap = $this->asInt($index['chunk_overlap'] ?? null);
|
|
|
|
if ($chunkSize === null || $chunkSize <= 0) {
|
|
$errors[] = 'index.chunk_size must be greater than 0.';
|
|
}
|
|
|
|
if ($chunkOverlap === null || $chunkOverlap < 0) {
|
|
$errors[] = 'index.chunk_overlap must be greater than or equal to 0.';
|
|
}
|
|
|
|
if ($chunkSize !== null && $chunkOverlap !== null && $chunkOverlap >= $chunkSize) {
|
|
$errors[] = 'index.chunk_overlap must be smaller than index.chunk_size.';
|
|
}
|
|
|
|
if (trim((string) ($index['embedding_model'] ?? '')) === '') {
|
|
$errors[] = 'index.embedding_model must not be empty.';
|
|
}
|
|
|
|
if (($this->asInt($index['embedding_dimension'] ?? null) ?? 0) <= 0) {
|
|
$errors[] = 'index.embedding_dimension must be greater than 0.';
|
|
}
|
|
|
|
if (($this->asInt($index['scoring_version'] ?? null) ?? 0) <= 0) {
|
|
$errors[] = 'index.scoring_version must be greater than 0.';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $model
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateModel(array $model, array &$errors, array &$warnings): void
|
|
{
|
|
if (isset($model['error'])) {
|
|
$warnings[] = 'model configuration could not be loaded from DB/provider: ' . (string) $model['error'];
|
|
return;
|
|
}
|
|
|
|
if (trim((string) ($model['model_name'] ?? '')) === '') {
|
|
$errors[] = 'model_generation.model_name must not be empty.';
|
|
}
|
|
|
|
if (($this->asInt($model['num_ctx'] ?? null) ?? 0) < 512) {
|
|
$errors[] = 'model_generation.num_ctx must be at least 512.';
|
|
}
|
|
|
|
if (($this->asInt($model['retrieval_max_chunks'] ?? null) ?? 0) < 1) {
|
|
$errors[] = 'model_generation.retrieval_max_chunks must be greater than 0.';
|
|
}
|
|
|
|
if (($this->asInt($model['retrieval_vector_top_k'] ?? null) ?? 0) < 1) {
|
|
$errors[] = 'model_generation.retrieval_vector_top_k must be greater than 0.';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $retrieval
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateRetrieval(array $retrieval, array &$errors, array &$warnings): void
|
|
{
|
|
$floor = (float) ($retrieval['threshold_floor'] ?? 0.0);
|
|
$threshold = (float) ($retrieval['vector_score_threshold'] ?? 0.0);
|
|
$ceil = (float) ($retrieval['threshold_ceil'] ?? 1.0);
|
|
|
|
if ($floor > $threshold || $threshold > $ceil) {
|
|
$errors[] = 'retrieval threshold must satisfy threshold_floor <= vector_score_threshold <= threshold_ceil.';
|
|
}
|
|
|
|
if ((int) ($retrieval['hard_max_chunks'] ?? 0) < 1) {
|
|
$errors[] = 'retrieval.hard_max_chunks must be greater than 0.';
|
|
}
|
|
|
|
$cleanupProfile = $retrieval['generic_exact_selection_cleanup_profile'] ?? null;
|
|
if (!is_string($cleanupProfile) || trim($cleanupProfile) === '') {
|
|
$errors[] = 'retrieval.generic_exact_selection_cleanup_profile must be a non-empty string.';
|
|
} elseif (!in_array(trim($cleanupProfile), $this->languageCleanupConfig->getCleanupProfileNames(), true)) {
|
|
$errors[] = 'retrieval.generic_exact_selection_cleanup_profile references unknown language cleanup profile: ' . trim($cleanupProfile) . '.';
|
|
}
|
|
|
|
$this->validateStringListMap($retrieval['vocabulary'] ?? [], 'retrieval.vocabulary', $errors, $warnings);
|
|
|
|
$inventory = $retrieval['inventory_parameter'] ?? [];
|
|
if (is_array($inventory)) {
|
|
foreach ($inventory as $key => $value) {
|
|
$key = (string) $key;
|
|
if (!$this->shouldCompareRetrievalInventoryKey($key, $retrieval)) {
|
|
continue;
|
|
}
|
|
|
|
if (array_key_exists($key, $retrieval) && $retrieval[$key] != $value) {
|
|
$warnings[] = 'retrieval.inventory.' . $key . ' differs from active retriever config.';
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Retrieval vocabulary lists can be resolved from dedicated vocabulary views.
|
|
* The backwards-compatible inventory parameter may still contain raw legacy
|
|
* list values for those keys, so comparing it against the active retriever
|
|
* facade would produce false-positive validation warnings.
|
|
*
|
|
* @param array<string, mixed> $retrieval
|
|
*/
|
|
private function shouldCompareRetrievalInventoryKey(string $key, array $retrieval): bool
|
|
{
|
|
if (in_array($key, $this->retrievalVocabularyBackedInventoryKeys(), true)) {
|
|
return false;
|
|
}
|
|
|
|
$vocabulary = $retrieval['vocabulary'] ?? [];
|
|
|
|
return !is_array($vocabulary) || !array_key_exists($key, $vocabulary);
|
|
}
|
|
|
|
/** @return string[] */
|
|
private function retrievalVocabularyBackedInventoryKeys(): array
|
|
{
|
|
return [
|
|
'generic_product_tokens',
|
|
'important_short_model_tokens',
|
|
'family_descriptor_tokens',
|
|
'looks_like_reagent_tokens',
|
|
'looks_like_safety_docs',
|
|
'looks_like_reagent_words',
|
|
'looks_like_document_words',
|
|
'looks_like_safety_words',
|
|
'looks_like_device_words',
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $prompt
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validatePrompt(array $prompt, array &$errors, array &$warnings): void
|
|
{
|
|
if ((int) ($prompt['chars_per_token'] ?? 0) < 1) {
|
|
$errors[] = 'prompt.chars_per_token must be greater than 0.';
|
|
}
|
|
|
|
if ((float) ($prompt['output_reserve_ratio'] ?? -1) < 0.0 || (float) ($prompt['output_reserve_ratio'] ?? 2) > 1.0) {
|
|
$errors[] = 'prompt.output_reserve_ratio must be between 0 and 1.';
|
|
}
|
|
|
|
if ((float) ($prompt['safety_reserve_ratio'] ?? -1) < 0.0 || (float) ($prompt['safety_reserve_ratio'] ?? 2) > 1.0) {
|
|
$errors[] = 'prompt.safety_reserve_ratio must be between 0 and 1.';
|
|
}
|
|
|
|
$this->validateStringListMap($prompt['rules'] ?? [], 'prompt.rules', $errors, $warnings);
|
|
$this->validateStringListMap($prompt['detection'] ?? [], 'prompt.detection', $errors, $warnings);
|
|
$this->validateRegexPattern($prompt['detection']['technical_product_model_pattern'] ?? null, 'prompt.detection.technical_product_model_pattern', $errors);
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $agent
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateAgent(array $agent, array &$errors, array &$warnings): void
|
|
{
|
|
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
|
|
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
|
|
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
|
|
|
|
$followUpContext = is_array($agent['follow_up_context'] ?? null) ? $agent['follow_up_context'] : [];
|
|
$commercialTableFollowUp = is_array($followUpContext['commercial_table_follow_up'] ?? null) ? $followUpContext['commercial_table_follow_up'] : [];
|
|
$this->validateRegexPatternList($commercialTableFollowUp['prompt_patterns'] ?? [], 'agent.follow_up_context.commercial_table_follow_up.prompt_patterns', $errors);
|
|
$this->validateRegexPatternList($commercialTableFollowUp['history_anchor_patterns'] ?? [], 'agent.follow_up_context.commercial_table_follow_up.history_anchor_patterns', $errors);
|
|
$this->validateStringList($this->toList($commercialTableFollowUp['table_terms'] ?? []), 'agent.follow_up_context.commercial_table_follow_up.table_terms', $errors, $warnings);
|
|
$this->validateStringList($this->toList($commercialTableFollowUp['commercial_terms'] ?? []), 'agent.follow_up_context.commercial_table_follow_up.commercial_terms', $errors, $warnings);
|
|
$this->validateRegexPatternList($commercialTableFollowUp['indicator_marker_patterns'] ?? [], 'agent.follow_up_context.commercial_table_follow_up.indicator_marker_patterns', $errors);
|
|
if (trim((string) ($commercialTableFollowUp['query_template_with_model'] ?? '')) === '') {
|
|
$errors[] = 'agent.follow_up_context.commercial_table_follow_up.query_template_with_model must not be empty.';
|
|
}
|
|
if (trim((string) ($commercialTableFollowUp['query_template_without_model'] ?? '')) === '') {
|
|
$errors[] = 'agent.follow_up_context.commercial_table_follow_up.query_template_without_model must not be empty.';
|
|
}
|
|
|
|
$ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : [];
|
|
$ragEvidenceCleanupProfile = $ragEvidence['cleanup_profile'] ?? null;
|
|
if (!is_string($ragEvidenceCleanupProfile) || trim($ragEvidenceCleanupProfile) === '') {
|
|
$errors[] = 'agent.rag_evidence_guard.cleanup_profile must be a non-empty string.';
|
|
} elseif (!in_array($ragEvidenceCleanupProfile, $this->languageCleanupConfig->getCleanupProfileNames(), true)) {
|
|
$errors[] = 'agent.rag_evidence_guard.cleanup_profile references unknown language cleanup profile: ' . $ragEvidenceCleanupProfile . '.';
|
|
}
|
|
|
|
$this->validateStringList($this->toList($ragEvidence['stop_terms'] ?? []), 'agent.rag_evidence_guard.stop_terms', $errors, $warnings);
|
|
$this->validateStringListMap($ragEvidence['synonyms'] ?? [], 'agent.rag_evidence_guard.synonyms', $errors, $warnings);
|
|
$this->validateRegexPatternList($ragEvidence['aggregate_query_patterns'] ?? [], 'agent.rag_evidence_guard.aggregate_query_patterns', $errors);
|
|
$this->validateStringList($this->toList($ragEvidence['aggregate_evidence_terms'] ?? []), 'agent.rag_evidence_guard.aggregate_evidence_terms', $errors, $warnings);
|
|
$this->validateRegexPatternList($ragEvidence['aggregate_answer_evidence_patterns'] ?? [], 'agent.rag_evidence_guard.aggregate_answer_evidence_patterns', $errors);
|
|
|
|
$shopRuntime = is_array($agent['shop_runtime'] ?? null) ? $agent['shop_runtime'] : [];
|
|
$queryCleanup = is_array($shopRuntime['query_cleanup'] ?? null) ? $shopRuntime['query_cleanup'] : [];
|
|
$contextResolution = is_array($shopRuntime['context_resolution'] ?? null) ? $shopRuntime['context_resolution'] : [];
|
|
$metaQueryGuard = is_array($contextResolution['meta_query_guard'] ?? null) ? $contextResolution['meta_query_guard'] : [];
|
|
$shopContextCleanupProfile = $metaQueryGuard['cleanup_profile'] ?? null;
|
|
if (!is_string($shopContextCleanupProfile) || trim($shopContextCleanupProfile) === '') {
|
|
$shopContextCleanupProfile = $this->agentRunnerConfig->getShopQueryContextFallbackCleanupProfile();
|
|
} else {
|
|
$shopContextCleanupProfile = trim($shopContextCleanupProfile);
|
|
}
|
|
if (!in_array($shopContextCleanupProfile, $this->languageCleanupConfig->getCleanupProfileNames(), true)) {
|
|
$errors[] = 'agent.shop_runtime.context_resolution.meta_query_guard.cleanup_profile references unknown language cleanup profile: ' . $shopContextCleanupProfile . '.';
|
|
}
|
|
|
|
$currentInputPreservation = is_array($queryCleanup['current_input_preservation'] ?? null) ? $queryCleanup['current_input_preservation'] : [];
|
|
if (array_key_exists('enabled', $currentInputPreservation) && !is_bool($currentInputPreservation['enabled'])) {
|
|
$errors[] = 'agent.shop_runtime.query_cleanup.current_input_preservation.enabled must be boolean.';
|
|
}
|
|
$this->validateStringList(
|
|
$this->toList($currentInputPreservation['terms'] ?? []),
|
|
'agent.shop_runtime.query_cleanup.current_input_preservation.terms',
|
|
$errors,
|
|
$warnings
|
|
);
|
|
|
|
$this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings);
|
|
$this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors);
|
|
|
|
$normalization = is_array($agent['input_normalization'] ?? null) ? $agent['input_normalization'] : [];
|
|
$normalizationPrompt = is_array($normalization['prompt'] ?? null) ? $normalization['prompt'] : [];
|
|
if (($this->asInt($normalization['max_input_chars'] ?? null) ?? 0) < 1) {
|
|
$errors[] = 'agent.input_normalization.max_input_chars must be greater than 0.';
|
|
}
|
|
if (($this->asInt($normalization['max_output_chars'] ?? null) ?? 0) < 1) {
|
|
$errors[] = 'agent.input_normalization.max_output_chars must be greater than 0.';
|
|
}
|
|
if (($this->asInt($normalization['max_added_tokens'] ?? null) ?? -1) < 0) {
|
|
$errors[] = 'agent.input_normalization.max_added_tokens must be greater than or equal to 0.';
|
|
}
|
|
if (($this->asInt($normalization['max_length_ratio_percent'] ?? null) ?? 0) < 100) {
|
|
$errors[] = 'agent.input_normalization.max_length_ratio_percent must be at least 100.';
|
|
}
|
|
$this->validateRegexPattern($normalization['output_prefix_pattern'] ?? null, 'agent.input_normalization.output_prefix_pattern', $errors);
|
|
$this->validateRegexPatternList($normalization['skip_patterns'] ?? [], 'agent.input_normalization.skip_patterns', $errors);
|
|
$this->validateStringList($this->toList($normalizationPrompt['rules'] ?? []), 'agent.input_normalization.prompt.rules', $errors, $warnings);
|
|
|
|
$fuzzyRouting = is_array($normalization['fuzzy_routing'] ?? null) ? $normalization['fuzzy_routing'] : [];
|
|
if (($this->asInt($fuzzyRouting['min_token_length'] ?? null) ?? 0) < 1) {
|
|
$errors[] = 'agent.input_normalization.fuzzy_routing.min_token_length must be greater than 0.';
|
|
}
|
|
if (($this->asInt($fuzzyRouting['medium_token_length'] ?? null) ?? 0) < 1) {
|
|
$errors[] = 'agent.input_normalization.fuzzy_routing.medium_token_length must be greater than 0.';
|
|
}
|
|
if (($this->asInt($fuzzyRouting['long_token_length'] ?? null) ?? 0) < 1) {
|
|
$errors[] = 'agent.input_normalization.fuzzy_routing.long_token_length must be greater than 0.';
|
|
}
|
|
if (($this->asInt($fuzzyRouting['max_distance_short'] ?? null) ?? -1) < 0) {
|
|
$errors[] = 'agent.input_normalization.fuzzy_routing.max_distance_short must be greater than or equal to 0.';
|
|
}
|
|
if (($this->asInt($fuzzyRouting['max_distance_medium'] ?? null) ?? -1) < 0) {
|
|
$errors[] = 'agent.input_normalization.fuzzy_routing.max_distance_medium must be greater than or equal to 0.';
|
|
}
|
|
if (($this->asInt($fuzzyRouting['max_distance_long'] ?? null) ?? -1) < 0) {
|
|
$errors[] = 'agent.input_normalization.fuzzy_routing.max_distance_long must be greater than or equal to 0.';
|
|
}
|
|
$minSimilarityPercent = $this->asInt($fuzzyRouting['min_similarity_percent'] ?? null) ?? 0;
|
|
if ($minSimilarityPercent < 1 || $minSimilarityPercent > 100) {
|
|
$errors[] = 'agent.input_normalization.fuzzy_routing.min_similarity_percent must be between 1 and 100.';
|
|
}
|
|
$this->validateStringList($this->toList($fuzzyRouting['terms'] ?? []), 'agent.input_normalization.fuzzy_routing.terms', $errors, $warnings);
|
|
if (trim((string) ($normalizationPrompt['intro'] ?? '')) === '') {
|
|
$errors[] = 'agent.input_normalization.prompt.intro must not be empty.';
|
|
}
|
|
if (trim((string) ($normalizationPrompt['output_format_block'] ?? '')) === '') {
|
|
$errors[] = 'agent.input_normalization.prompt.output_format_block must not be empty.';
|
|
}
|
|
if (trim((string) ($normalizationPrompt['current_user_input_label'] ?? '')) === '') {
|
|
$errors[] = 'agent.input_normalization.prompt.current_user_input_label must not be empty.';
|
|
}
|
|
|
|
$shopRuntime = is_array($agent['shop_runtime'] ?? null) ? $agent['shop_runtime'] : [];
|
|
$contextResolution = is_array($shopRuntime['context_resolution'] ?? null) ? $shopRuntime['context_resolution'] : [];
|
|
|
|
$contextUsage = $contextResolution['context_usage'] ?? [];
|
|
if (is_array($contextUsage)) {
|
|
$this->validateStringList($this->toList($contextUsage['referential_terms'] ?? []), 'agent.shop_runtime.context_resolution.context_usage.referential_terms', $errors, $warnings);
|
|
}
|
|
|
|
$anchorEnrichment = $contextResolution['history_anchor_enrichment'] ?? [];
|
|
if (is_array($anchorEnrichment)) {
|
|
$this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms', $errors, $warnings);
|
|
$this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns', $errors);
|
|
if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') {
|
|
$errors[] = 'agent.shop_runtime.context_resolution.history_anchor_enrichment.template must not be empty.';
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $vector
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateVector(array $vector, array &$errors, array &$warnings): void
|
|
{
|
|
if (trim((string) ($vector['service_url'] ?? '')) === '') {
|
|
$errors[] = 'vector.service_url must not be empty.';
|
|
}
|
|
|
|
if (($this->asInt($vector['port'] ?? null) ?? 0) < 1) {
|
|
$errors[] = 'vector.port must be greater than 0.';
|
|
}
|
|
|
|
$search = is_array($vector['search'] ?? null) ? $vector['search'] : [];
|
|
$tags = is_array($vector['tags'] ?? null) ? $vector['tags'] : [];
|
|
|
|
foreach (['search.min_score' => $search['min_score'] ?? null, 'tags.min_score' => $tags['min_score'] ?? null] as $name => $value) {
|
|
$score = $this->asFloat($value);
|
|
if ($score === null || $score < 0.0 || $score > 1.0) {
|
|
$errors[] = 'vector.' . $name . ' must be between 0 and 1.';
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $commerce
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateCommerce(array $commerce, array &$errors, array &$warnings): void
|
|
{
|
|
if (!$this->asBool($commerce['enabled'] ?? false)) {
|
|
return;
|
|
}
|
|
|
|
if (trim((string) ($commerce['store_api_base_url'] ?? '')) === '') {
|
|
$errors[] = 'commerce.store_api_base_url must not be empty when commerce is enabled.';
|
|
}
|
|
|
|
if (($this->asInt($commerce['max_shop_results'] ?? null) ?? 0) < 1) {
|
|
$warnings[] = 'commerce.max_shop_results could not be resolved as a positive integer.';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $commerceQuery
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateCommerceQuery(array $commerceQuery, array &$errors, array &$warnings): void
|
|
{
|
|
$this->validateStringListMap($commerceQuery, 'commerce_query', $errors, $warnings);
|
|
|
|
$patterns = is_array($commerceQuery['patterns'] ?? null) ? $commerceQuery['patterns'] : [];
|
|
if ($patterns === []) {
|
|
$errors[] = 'commerce_query.patterns must be an array.';
|
|
} else {
|
|
$this->validateCommerceQueryPatterns($patterns, $errors, $warnings);
|
|
}
|
|
|
|
$cleanupProfile = $commerceQuery['cleanup_profile'] ?? null;
|
|
if (!is_string($cleanupProfile) || trim($cleanupProfile) === '') {
|
|
$errors[] = 'commerce_query.cleanup_profile must be a non-empty string.';
|
|
} elseif (!in_array($cleanupProfile, $this->languageCleanupConfig->getCleanupProfileNames(), true)) {
|
|
$errors[] = 'commerce_query.cleanup_profile references unknown language cleanup profile: ' . $cleanupProfile . '.';
|
|
}
|
|
|
|
$measurementPattern = $patterns['measurement_value_token'] ?? null;
|
|
$filterTokens = $commerceQuery['filter_search_tokens'] ?? [];
|
|
foreach ($this->governanceConfig->getRegressionProtectedMeasurementValues() as $measurementValue) {
|
|
if (is_string($measurementPattern) && @preg_match($measurementPattern, $measurementValue) !== 1) {
|
|
$errors[] = 'commerce_query.patterns.measurement_value_token must match protected measurement value: ' . $measurementValue . '.';
|
|
}
|
|
|
|
if (is_array($filterTokens) && in_array($measurementValue, $filterTokens, true)) {
|
|
$errors[] = 'commerce_query.filter_search_tokens must not remove protected measurement value: ' . $measurementValue . '.';
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $shopMatching
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateShopMatching(array $shopMatching, array &$errors, array &$warnings): void
|
|
{
|
|
$this->validateStringListMap($shopMatching, 'shop_matching', $errors, $warnings);
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $searchRepair
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateSearchRepair(array $searchRepair, array &$errors, array &$warnings): void
|
|
{
|
|
if ((int) ($searchRepair['max_repair_queries'] ?? 0) < 0) {
|
|
$errors[] = 'search_repair.max_repair_queries must be greater than or equal to 0.';
|
|
}
|
|
|
|
$this->validateStringListMap($searchRepair, 'search_repair', $errors, $warnings);
|
|
$this->validateRegexPatternMap($searchRepair['patterns'] ?? [], 'search_repair.patterns', $errors);
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $intent
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateIntent(array $intent, array &$errors, array &$warnings): void
|
|
{
|
|
$this->validateStringListMap($intent, 'intent', $errors, $warnings);
|
|
$commerce = is_array($intent['commerce'] ?? null) ? $intent['commerce'] : [];
|
|
$this->validateRegexPatternList($commerce['support_diagnostic_patterns'] ?? [], 'intent.commerce.support_diagnostic_patterns', $errors);
|
|
$this->validateRegexPatternList($commerce['explicit_commerce_intent_patterns'] ?? [], 'intent.commerce.explicit_commerce_intent_patterns', $errors);
|
|
|
|
$light = is_array($intent['light'] ?? null) ? $intent['light'] : [];
|
|
$this->validateRegexPatternList($light['strong_patterns'] ?? [], 'intent.light.strong_patterns', $errors);
|
|
|
|
$sales = is_array($intent['sales'] ?? null) ? $intent['sales'] : [];
|
|
$this->validateRegexPatternList($sales['comparison_signals'] ?? [], 'intent.sales.comparison_signals', $errors);
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $vocabulary
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateVocabulary(array $vocabulary, array &$errors, array &$warnings): void
|
|
{
|
|
$this->validateStringListMap($vocabulary['classes'] ?? [], 'vocabulary.classes', $errors, $warnings);
|
|
$this->validateStringListMap($vocabulary['views'] ?? [], 'vocabulary.views', $errors, $warnings);
|
|
$this->validateStringListMap($vocabulary['maps'] ?? [], 'vocabulary.maps', $errors, $warnings);
|
|
|
|
$retrievalViews = $vocabulary['views']['retrieval'] ?? null;
|
|
if (is_array($retrievalViews)) {
|
|
$shortModel = $retrievalViews['important_short_model_tokens']['add'] ?? [];
|
|
if (is_array($shortModel)) {
|
|
foreach ($this->governanceConfig->getVocabularyProtectedShortModelTokens() as $token) {
|
|
if (!in_array($token, $shortModel, true)) {
|
|
$warnings[] = 'vocabulary.views.retrieval.important_short_model_tokens should contain protected token ' . $token . '.';
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $language
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateLanguage(array $language, array &$errors, array &$warnings): void
|
|
{
|
|
$this->validateStringListMap($language, 'language', $errors, $warnings);
|
|
$stopwords = is_array($language['stopwords'] ?? null) ? $language['stopwords'] : [];
|
|
|
|
try {
|
|
$profileNames = $this->languageCleanupConfig->getCleanupProfileNames();
|
|
|
|
foreach ($this->governanceConfig->getLanguageRequiredCleanupProfiles() as $profileName) {
|
|
if (!in_array($profileName, $profileNames, true)) {
|
|
$errors[] = 'language.cleanup_profiles must contain required profile: ' . $profileName . '.';
|
|
continue;
|
|
}
|
|
|
|
$this->languageCleanupConfig->getCleanupProfile($profileName);
|
|
}
|
|
|
|
foreach ($this->governanceConfig->getLanguageProtectedStopwordTerms() as $protected) {
|
|
if (in_array($protected, $stopwords, true)) {
|
|
$errors[] = 'language.stopwords must not contain protected term: ' . $protected . '.';
|
|
}
|
|
if (!$this->languageCleanupConfig->isProtectedTerm($protected)) {
|
|
$errors[] = 'language.protected_terms must contain protected term: ' . $protected . '.';
|
|
}
|
|
}
|
|
|
|
foreach ($this->governanceConfig->getLanguageRequiredProfileTerms() as $profileName => $requiredTerms) {
|
|
$profile = $this->languageCleanupConfig->getCleanupProfile($profileName);
|
|
foreach ($requiredTerms as $bucket => $terms) {
|
|
foreach ($terms as $term) {
|
|
if (!in_array($term, $profile[$bucket] ?? [], true)) {
|
|
$errors[] = sprintf('language.cleanup_profiles.%s.%s must contain required term: %s.', $profileName, $bucket, $term);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (\InvalidArgumentException $e) {
|
|
$errors[] = $e->getMessage();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $queryEnrichment
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateQueryEnrichment(array $queryEnrichment, array &$errors, array &$warnings): void
|
|
{
|
|
if ((int) ($queryEnrichment['max_expansions'] ?? 0) < 0) {
|
|
$errors[] = 'query_enrichment.max_expansions must be greater than or equal to 0.';
|
|
}
|
|
|
|
$rules = $queryEnrichment['rules'] ?? [];
|
|
if (!is_array($rules)) {
|
|
$errors[] = 'query_enrichment.rules must be a map.';
|
|
return;
|
|
}
|
|
|
|
foreach ($rules as $left => $right) {
|
|
if (!is_string($left) || trim($left) === '' || !is_string($right) || trim($right) === '') {
|
|
$errors[] = 'query_enrichment.rules must contain non-empty string mappings.';
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param mixed $value
|
|
* @param list<string> $errors
|
|
*/
|
|
private function validateRegexPattern(mixed $value, string $path, array &$errors): void
|
|
{
|
|
if (!is_string($value) || trim($value) === '') {
|
|
$errors[] = $path . ' must be a non-empty regex string.';
|
|
return;
|
|
}
|
|
|
|
if (@preg_match($value, '') === false) {
|
|
$errors[] = $path . ' is not a valid regex pattern.';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $patterns
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateCommerceQueryPatterns(array $patterns, array &$errors, array &$warnings): void
|
|
{
|
|
$regexKeys = [
|
|
'history_context_value',
|
|
'prompt_sanitize',
|
|
'whitespace_collapse',
|
|
'whitespace_split',
|
|
'history_question',
|
|
'price_between',
|
|
'price_max',
|
|
'price_min',
|
|
'direct_product_digit',
|
|
'model_like',
|
|
'accessory_like',
|
|
'contains_digit',
|
|
'model_number_token',
|
|
'model_context_token',
|
|
'model_suffix_token',
|
|
'instruction_or_presentation_token',
|
|
'measurement_value_token',
|
|
];
|
|
|
|
foreach ($regexKeys as $key) {
|
|
$this->validateRegexPattern($patterns[$key] ?? null, 'commerce_query.patterns.' . $key, $errors);
|
|
}
|
|
|
|
$this->validateRegexFragment($patterns['history_context'] ?? null, 'commerce_query.patterns.history_context', $errors);
|
|
|
|
if (array_key_exists('filter_search_tokens', $patterns)) {
|
|
$this->validateStringList($this->toList($patterns['filter_search_tokens']), 'commerce_query.patterns.filter_search_tokens', $errors, $warnings);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param list<string> $errors
|
|
*/
|
|
private function validateRegexFragment(mixed $value, string $path, array &$errors): void
|
|
{
|
|
if (!is_string($value) || trim($value) === '') {
|
|
$errors[] = $path . ' must be a non-empty regex fragment string.';
|
|
return;
|
|
}
|
|
|
|
if (@preg_match('/(?:' . $value . ')/u', '') === false) {
|
|
$errors[] = $path . ' is not a valid regex fragment.';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @return array<int|string, mixed>
|
|
*/
|
|
private function toList(mixed $value): array
|
|
{
|
|
return is_array($value) ? $value : [];
|
|
}
|
|
|
|
|
|
/**
|
|
* @param mixed $patterns
|
|
* @param list<string> $errors
|
|
*/
|
|
private function validateRegexPatternMap(mixed $patterns, string $path, array &$errors): void
|
|
{
|
|
if (!is_array($patterns)) {
|
|
$errors[] = $path . ' must be an array of regex patterns.';
|
|
return;
|
|
}
|
|
|
|
foreach ($patterns as $key => $pattern) {
|
|
$currentPath = $path . '.' . (string) $key;
|
|
if (is_array($pattern)) {
|
|
$this->validateRegexPatternList($pattern, $currentPath, $errors);
|
|
continue;
|
|
}
|
|
|
|
$this->validateRegexPattern($pattern, $currentPath, $errors);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param mixed $patterns
|
|
* @param list<string> $errors
|
|
*/
|
|
private function validateRegexPatternList(mixed $patterns, string $path, array &$errors): void
|
|
{
|
|
if (!is_array($patterns)) {
|
|
$errors[] = $path . ' must be an array of regex patterns.';
|
|
return;
|
|
}
|
|
|
|
foreach ($patterns as $index => $pattern) {
|
|
$this->validateRegexPattern($pattern, $path . '.' . (string) $index, $errors);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param mixed $value
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateStringListMap(mixed $value, string $path, array &$errors, array &$warnings): void
|
|
{
|
|
if (!is_array($value)) {
|
|
$errors[] = $path . ' must be an array.';
|
|
return;
|
|
}
|
|
|
|
foreach ($value as $key => $item) {
|
|
$currentPath = $path . '.' . (string) $key;
|
|
if (is_array($item)) {
|
|
if ($this->isList($item)) {
|
|
$this->validateStringList($item, $currentPath, $errors, $warnings);
|
|
continue;
|
|
}
|
|
|
|
$this->validateStringListMap($item, $currentPath, $errors, $warnings);
|
|
continue;
|
|
}
|
|
|
|
if (is_string($item)) {
|
|
if (trim($item) === '') {
|
|
$errors[] = $currentPath . ' must not be empty.';
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (is_int($item) || is_float($item) || is_bool($item) || $item === null) {
|
|
continue;
|
|
}
|
|
|
|
$warnings[] = $currentPath . ' contains a non-scalar value.';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<int|string, mixed> $items
|
|
* @param list<string> $errors
|
|
* @param list<string> $warnings
|
|
*/
|
|
private function validateStringList(array $items, string $path, array &$errors, array &$warnings): void
|
|
{
|
|
$seen = [];
|
|
foreach ($items as $index => $item) {
|
|
if (!is_scalar($item)) {
|
|
$errors[] = $path . '.' . (string) $index . ' must be a scalar value.';
|
|
continue;
|
|
}
|
|
|
|
$item = trim((string) $item);
|
|
if ($item === '') {
|
|
$errors[] = $path . '.' . (string) $index . ' must not be empty.';
|
|
continue;
|
|
}
|
|
|
|
$key = mb_strtolower($item, 'UTF-8');
|
|
if (isset($seen[$key])) {
|
|
$warnings[] = $path . ' contains duplicate value: ' . $item . '.';
|
|
}
|
|
$seen[$key] = true;
|
|
}
|
|
}
|
|
|
|
/** @param array<int|string, mixed> $value */
|
|
private function isList(array $value): bool
|
|
{
|
|
return array_is_list($value);
|
|
}
|
|
|
|
private function param(string $name, mixed $default = null): mixed
|
|
{
|
|
if (!$this->parameters->has($name)) {
|
|
return $default;
|
|
}
|
|
|
|
return $this->parameters->get($name);
|
|
}
|
|
|
|
private function asInt(mixed $value): ?int
|
|
{
|
|
return is_numeric($value) ? (int) $value : null;
|
|
}
|
|
|
|
private function asFloat(mixed $value): ?float
|
|
{
|
|
return is_numeric($value) ? (float) $value : null;
|
|
}
|
|
|
|
private function asBool(mixed $value): bool
|
|
{
|
|
if (is_bool($value)) {
|
|
return $value;
|
|
}
|
|
|
|
if (is_string($value)) {
|
|
return in_array(strtolower($value), ['1', 'true', 'yes', 'on'], true);
|
|
}
|
|
|
|
return (bool) $value;
|
|
}
|
|
}
|