$config */ public function __construct( private array $config = [], private ?DomainVocabularyConfig $vocabulary = null, private ?GenreConfig $genreConfig = null, ) { } public function hardMaxChunks(): int { return $this->requiredInt('hard_max_chunks', 1); } public function hardMaxVectorK(): int { return $this->requiredInt('hard_max_vectork', 1); } public function hardMaxKeywordK(): int { return $this->requiredInt('hard_max_keywordk', 1); } public function vectorScoreThreshold(): float { return $this->requiredFloat('vector_score_threshold', 0.0, 1.0); } public function thresholdFloor(): float { return $this->requiredFloat('threshold_floor', 0.0, 1.0); } public function thresholdCeil(): float { return $this->requiredFloat('threshold_ceil', 0.0, 1.0); } public function listBonus(): float { return $this->requiredFloat('list_bonus', 1.0); } public function rrfK(): int { return $this->requiredInt('rrf_k', 1); } public function keywordTopKMultiplier(): float { return $this->requiredFloat('keyword_topk_multiplier', 0.1); } public function keywordScoreThreshold(): float { return $this->requiredFloat('keyword_score_threshold', 0.0, 1.0); } public function keywordRrfWeight(): float { return $this->requiredFloat('keyword_rrf_weight', 0.0); } public function scopedVectorRrfWeight(): float { return $this->requiredFloat('scoped_vector_rrf_weight', 0.0); } public function scopedKeywordRrfWeight(): float { return $this->requiredFloat('scoped_keyword_rrf_weight', 0.0); } public function emptyRrfFallbackTopN(): int { return $this->requiredInt('empty_rrf_fallback_topn', 1); } public function maxChunksPerDoc(): int { return $this->requiredInt('max_chunks_per_doc', 1); } public function minChunkDistance(): int { return $this->requiredInt('min_chunk_distance', 0); } public function dominantDocWindow(): int { return $this->requiredInt('dominant_doc_window', 1); } public function dominantDocMinHits(): int { return $this->requiredInt('dominant_doc_min_hits', 1); } public function dominantDocMaxChunks(): int { return $this->requiredInt('dominant_doc_max_chunks', 1); } public function exactDocumentMaxChunks(): int { return $this->requiredInt('exact_document_max_chunks', 1); } public function focusedProductWindow(): int { return $this->requiredInt('focused_product_window', 1); } public function focusedProductMinScore(): float { return $this->requiredFloat('focused_product_min_score', 0.0); } public function focusedProductMinGap(): float { return $this->requiredFloat('focused_product_min_gap', 0.0); } public function focusedProductMaxChunks(): int { return $this->requiredInt('focused_product_max_chunks', 1); } /** @return string[] */ public function catalogListShortcutPatterns(): array { return $this->requiredStringList('catalog_list_shortcut_patterns'); } /** @return array */ public function exactSelectionTokenVariantPrefixes(): array { return $this->genreStringListMap('retrieval_and_language.exact_selection.token_variant_prefixes') ?: $this->requiredStringListMap('exact_selection_token_variant_prefixes'); } /** @return string[] */ public function exactSelectionTokenVariantSuffixes(): array { return $this->requiredStringList('exact_selection_token_variant_suffixes'); } /** @return string[] */ public function exactSelectionIndicatorQuestionTokens(): array { return $this->genreStringList('retrieval_and_language.exact_selection.indicator_question_tokens') ?: $this->requiredStringList('exact_selection_indicator_question_tokens'); } /** @return string[] */ public function exactSelectionIndicatorQuestionPhrases(): array { return $this->genreStringList('retrieval_and_language.exact_selection.indicator_question_phrases') ?: $this->requiredStringList('exact_selection_indicator_question_phrases'); } /** @return string[] */ public function exactSelectionIndicatorTableHeadingPatterns(): array { return $this->genreStringList('retrieval_and_language.exact_selection.indicator_table_heading_patterns') ?: $this->requiredStringList('exact_selection_indicator_table_heading_patterns'); } /** @return string[] */ public function exactSelectionIndicatorTableHeaderPatterns(): array { return $this->genreStringList('retrieval_and_language.exact_selection.indicator_table_header_patterns') ?: $this->requiredStringList('exact_selection_indicator_table_header_patterns'); } /** @return string[] */ public function exactSelectionIndicatorTableRowPatterns(): array { return $this->genreStringList('retrieval_and_language.exact_selection.indicator_table_row_patterns') ?: $this->requiredStringList('exact_selection_indicator_table_row_patterns'); } /** @return string[] */ public function exactSelectionIndicatorTableRequiredPrimaryTerms(): array { return $this->genreStringList('retrieval_and_language.exact_selection.indicator_table_required_primary_terms') ?: $this->requiredStringList('exact_selection_indicator_table_required_primary_terms'); } /** @return string[] */ public function exactSelectionIndicatorTableRequiredContextTerms(): array { return $this->genreStringList('retrieval_and_language.exact_selection.indicator_table_required_context_terms') ?: $this->requiredStringList('exact_selection_indicator_table_required_context_terms'); } /** @return string[] */ public function exactDetailTokens(): array { return $this->requiredStringList('exact_detail_tokens'); } public function genericExactSelectionCleanupProfile(): string { return $this->requiredString('generic_exact_selection_cleanup_profile'); } /** @return string[] */ public function genericExactSelectionTokens(): array { return $this->requiredStringList('generic_exact_selection_tokens'); } /** @return string[] */ public function genericProductTokens(): array { return $this->configuredStringListOrVocabularyView( 'generic_product_tokens', 'vocabulary_views.generic_product_tokens' ); } /** @return string[] */ public function importantShortModelTokens(): array { return $this->configuredStringListOrVocabularyView( 'important_short_model_tokens', 'vocabulary_views.important_short_model_tokens' ); } /** @return string[] */ public function familyDescriptorTokens(): array { return $this->configuredStringListOrVocabularyView( 'family_descriptor_tokens', 'vocabulary_views.family_descriptor_tokens' ); } /** @return string[] */ public function looksLikeReagentTokens(): array { return $this->configuredStringListOrVocabularyView( 'looks_like_reagent_tokens', 'vocabulary_views.looks_like_reagent_tokens' ); } /** @return string[] */ public function looksLikeSafetyDocs(): array { return $this->configuredStringListOrVocabularyView( 'looks_like_safety_docs', 'vocabulary_views.looks_like_safety_docs' ); } /** @return string[] */ public function looksLikeReagentWords(): array { return $this->configuredStringListOrVocabularyView( 'looks_like_reagent_words', 'vocabulary_views.looks_like_reagent_words' ); } /** @return string[] */ public function looksLikeDocumentWords(): array { return $this->configuredStringListOrVocabularyView( 'looks_like_document_words', 'vocabulary_views.looks_like_document_words' ); } /** @return string[] */ public function looksLikeSafetyWords(): array { return $this->configuredStringListOrVocabularyView( 'looks_like_safety_words', 'vocabulary_views.looks_like_safety_words' ); } /** @return string[] */ public function looksLikeDeviceWords(): array { return $this->configuredStringListOrVocabularyView( 'looks_like_device_words', 'vocabulary_views.looks_like_device_words' ); } /** * Effective retrieval vocabulary as a dedicated structure for diagnostics and config dumps. * * @return array> */ public function vocabularyToArray(): array { return [ 'generic_product_tokens' => $this->genericProductTokens(), 'important_short_model_tokens' => $this->importantShortModelTokens(), 'family_descriptor_tokens' => $this->familyDescriptorTokens(), 'looks_like_reagent_tokens' => $this->looksLikeReagentTokens(), 'looks_like_safety_docs' => $this->looksLikeSafetyDocs(), 'looks_like_reagent_words' => $this->looksLikeReagentWords(), 'looks_like_document_words' => $this->looksLikeDocumentWords(), 'looks_like_safety_words' => $this->looksLikeSafetyWords(), 'looks_like_device_words' => $this->looksLikeDeviceWords(), ]; } /** * @return array */ public function toArray(): array { return [ 'hard_max_chunks' => $this->hardMaxChunks(), 'hard_max_vectork' => $this->hardMaxVectorK(), 'hard_max_keywordk' => $this->hardMaxKeywordK(), 'vector_score_threshold' => $this->vectorScoreThreshold(), 'threshold_floor' => $this->thresholdFloor(), 'threshold_ceil' => $this->thresholdCeil(), 'list_bonus' => $this->listBonus(), 'rrf_k' => $this->rrfK(), 'keyword_topk_multiplier' => $this->keywordTopKMultiplier(), 'keyword_score_threshold' => $this->keywordScoreThreshold(), 'keyword_rrf_weight' => $this->keywordRrfWeight(), 'scoped_vector_rrf_weight' => $this->scopedVectorRrfWeight(), 'scoped_keyword_rrf_weight' => $this->scopedKeywordRrfWeight(), 'empty_rrf_fallback_topn' => $this->emptyRrfFallbackTopN(), 'max_chunks_per_doc' => $this->maxChunksPerDoc(), 'min_chunk_distance' => $this->minChunkDistance(), 'dominant_doc_window' => $this->dominantDocWindow(), 'dominant_doc_min_hits' => $this->dominantDocMinHits(), 'dominant_doc_max_chunks' => $this->dominantDocMaxChunks(), 'exact_document_max_chunks' => $this->exactDocumentMaxChunks(), 'focused_product_window' => $this->focusedProductWindow(), 'focused_product_min_score' => $this->focusedProductMinScore(), 'focused_product_min_gap' => $this->focusedProductMinGap(), 'focused_product_max_chunks' => $this->focusedProductMaxChunks(), 'catalog_list_shortcut_patterns' => $this->catalogListShortcutPatterns(), 'exact_selection_token_variant_prefixes' => $this->exactSelectionTokenVariantPrefixes(), 'exact_selection_token_variant_suffixes' => $this->exactSelectionTokenVariantSuffixes(), 'exact_selection_indicator_question_tokens' => $this->exactSelectionIndicatorQuestionTokens(), 'exact_selection_indicator_question_phrases' => $this->exactSelectionIndicatorQuestionPhrases(), 'exact_selection_indicator_table_heading_patterns' => $this->exactSelectionIndicatorTableHeadingPatterns(), 'exact_selection_indicator_table_header_patterns' => $this->exactSelectionIndicatorTableHeaderPatterns(), 'exact_selection_indicator_table_row_patterns' => $this->exactSelectionIndicatorTableRowPatterns(), 'exact_selection_indicator_table_required_primary_terms' => $this->exactSelectionIndicatorTableRequiredPrimaryTerms(), 'exact_selection_indicator_table_required_context_terms' => $this->exactSelectionIndicatorTableRequiredContextTerms(), 'exact_detail_tokens' => $this->exactDetailTokens(), 'generic_exact_selection_cleanup_profile' => $this->genericExactSelectionCleanupProfile(), 'generic_exact_selection_tokens' => $this->genericExactSelectionTokens(), 'generic_product_tokens' => $this->genericProductTokens(), 'important_short_model_tokens' => $this->importantShortModelTokens(), 'family_descriptor_tokens' => $this->familyDescriptorTokens(), 'looks_like_reagent_tokens' => $this->looksLikeReagentTokens(), 'looks_like_safety_docs' => $this->looksLikeSafetyDocs(), 'looks_like_reagent_words' => $this->looksLikeReagentWords(), 'looks_like_document_words' => $this->looksLikeDocumentWords(), 'looks_like_safety_words' => $this->looksLikeSafetyWords(), 'looks_like_device_words' => $this->looksLikeDeviceWords(), ]; } /** @return string[] */ private function genreStringList(string $path): array { return $this->genreConfig?->getValueStringList($path) ?? []; } /** @return array */ private function genreStringListMap(string $path): array { $value = $this->genreConfig?->getValueArray($path) ?? []; if ($value === []) { return []; } return $this->normalizeStringListMap($value); } private function requiredInt(string $key, int $min = PHP_INT_MIN, ?int $max = null): int { $value = $this->requiredValue($key); if (!is_numeric($value)) { throw $this->invalid($key, 'must be numeric'); } $value = (int) $value; if ($value < $min) { throw $this->invalid($key, sprintf('must be greater than or equal to %d', $min)); } if ($max !== null && $value > $max) { throw $this->invalid($key, sprintf('must be less than or equal to %d', $max)); } return $value; } private function requiredFloat(string $key, float $min = -INF, ?float $max = null): float { $value = $this->requiredValue($key); if (!is_numeric($value)) { throw $this->invalid($key, 'must be numeric'); } $value = (float) $value; if ($value < $min) { throw $this->invalid($key, sprintf('must be greater than or equal to %s', (string) $min)); } if ($max !== null && $value > $max) { throw $this->invalid($key, sprintf('must be less than or equal to %s', (string) $max)); } return $value; } private function requiredString(string $key): string { $value = $this->requiredValue($key); if (!is_scalar($value)) { throw $this->invalid($key, 'must be a non-empty string'); } $value = trim((string) $value); if ($value === '') { throw $this->invalid($key, 'must be a non-empty string'); } return $value; } /** @return string[] */ private function requiredStringList(string $key): array { $value = $this->requiredValue($key); if (!is_array($value)) { throw $this->invalid($key, 'must be a list of non-empty strings'); } $out = []; foreach ($value as $item) { if (!is_scalar($item)) { continue; } $item = trim((string) $item); if ($item === '') { continue; } if (!in_array($item, $out, true)) { $out[] = $item; } } if ($out === []) { throw $this->invalid($key, 'must contain at least one non-empty string'); } return $out; } /** @return array */ private function normalizeStringListMap(array $value): array { $out = []; foreach ($value as $mapKey => $items) { if (!is_string($mapKey) || trim($mapKey) === '' || !is_array($items)) { continue; } $cleanItems = []; foreach ($items as $item) { if (!is_scalar($item)) { continue; } $item = trim((string) $item); if ($item !== '' && !in_array($item, $cleanItems, true)) { $cleanItems[] = $item; } } if ($cleanItems !== []) { $out[trim($mapKey)] = $cleanItems; } } return $out; } /** * @return array */ private function requiredStringListMap(string $key): array { $value = $this->requiredValue($key); if (!is_array($value)) { throw $this->invalid($key, 'must be a map of string lists'); } $out = []; foreach ($value as $mapKey => $items) { if (!is_string($mapKey) || trim($mapKey) === '' || !is_array($items)) { continue; } $cleanItems = []; foreach ($items as $item) { if (!is_scalar($item)) { continue; } $item = trim((string) $item); if ($item !== '' && !in_array($item, $cleanItems, true)) { $cleanItems[] = $item; } } if ($cleanItems !== []) { $out[trim($mapKey)] = $cleanItems; } } if ($out === []) { throw $this->invalid($key, 'must contain at least one non-empty map entry'); } return $out; } /** @return string[] */ private function configuredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array { if ($this->hasKey($configPath)) { return $this->requiredStringList($configPath); } if ($this->vocabulary === null) { throw $this->missing($configPath); } $viewPath = $this->requiredPathString($viewPathConfigPath); $terms = $this->vocabulary->view($viewPath, []); if ($terms === []) { throw $this->invalid($viewPathConfigPath, sprintf('references empty vocabulary view "%s"', $viewPath)); } return $terms; } private function requiredPathString(string $key): string { $value = $this->requiredPathValue($key); if (!is_scalar($value)) { throw $this->invalid($key, 'must be a non-empty string'); } $value = trim((string) $value); if ($value === '') { throw $this->invalid($key, 'must be a non-empty string'); } return $value; } private function requiredPathValue(string $key): mixed { $current = $this->config; foreach (explode('.', $key) as $segment) { if (!is_array($current) || !array_key_exists($segment, $current)) { throw $this->missing($key); } $current = $current[$segment]; } return $current; } private function hasKey(string $key): bool { $current = $this->config; foreach (explode('.', $key) as $segment) { if (!is_array($current) || !array_key_exists($segment, $current)) { return false; } $current = $current[$segment]; } return true; } private function requiredValue(string $key): mixed { if (!array_key_exists($key, $this->config)) { throw $this->missing($key); } return $this->config[$key]; } private function missing(string $key): InvalidArgumentException { return new InvalidArgumentException(sprintf('RetrieX retrieval config "%s" is missing.', $key)); } private function invalid(string $key, string $reason): InvalidArgumentException { return new InvalidArgumentException(sprintf('RetrieX retrieval config "%s" %s.', $key, $reason)); } }