$config */ public function __construct( private array $config = [], private readonly ?DomainVocabularyConfig $vocabulary = null, ) { } public function hardMaxChunks(): int { return $this->intValue('hard_max_chunks', self::HARD_MAX_CHUNKS, 1); } public function hardMaxVectorK(): int { return $this->intValue('hard_max_vectork', self::HARD_MAX_VECTORK, 1); } public function hardMaxKeywordK(): int { return $this->intValue('hard_max_keywordk', self::HARD_MAX_KEYWORDK, 1); } public function vectorScoreThreshold(): float { return $this->floatValue('vector_score_threshold', self::VECTOR_SCORE_THRESHOLD, 0.0, 1.0); } public function thresholdFloor(): float { return $this->floatValue('threshold_floor', self::THRESHOLD_FLOOR, 0.0, 1.0); } public function thresholdCeil(): float { return $this->floatValue('threshold_ceil', self::THRESHOLD_CEIL, 0.0, 1.0); } public function listBonus(): float { return $this->floatValue('list_bonus', self::LIST_BONUS, 1.0); } public function rrfK(): int { return $this->intValue('rrf_k', self::RRF_K, 1); } public function keywordTopKMultiplier(): float { return $this->floatValue('keyword_topk_multiplier', self::KEYWORD_TOPK_MULTIPLIER, 0.1); } public function keywordScoreThreshold(): float { return $this->floatValue('keyword_score_threshold', self::KEYWORD_SCORE_THRESHOLD, 0.0, 1.0); } public function keywordRrfWeight(): float { return $this->floatValue('keyword_rrf_weight', self::KEYWORD_RRF_WEIGHT, 0.0); } public function scopedVectorRrfWeight(): float { return $this->floatValue('scoped_vector_rrf_weight', self::SCOPED_VECTOR_RRF_WEIGHT, 0.0); } public function scopedKeywordRrfWeight(): float { return $this->floatValue('scoped_keyword_rrf_weight', self::SCOPED_KEYWORD_RRF_WEIGHT, 0.0); } public function emptyRrfFallbackTopN(): int { return $this->intValue('empty_rrf_fallback_topn', self::EMPTY_RRF_FALLBACK_TOPN, 1); } public function maxChunksPerDoc(): int { return $this->intValue('max_chunks_per_doc', self::MAX_CHUNKS_PER_DOC, 1); } public function minChunkDistance(): int { return $this->intValue('min_chunk_distance', self::MIN_CHUNK_DISTANCE, 0); } public function dominantDocWindow(): int { return $this->intValue('dominant_doc_window', self::DOMINANT_DOC_WINDOW, 1); } public function dominantDocMinHits(): int { return $this->intValue('dominant_doc_min_hits', self::DOMINANT_DOC_MIN_HITS, 1); } public function dominantDocMaxChunks(): int { return $this->intValue('dominant_doc_max_chunks', self::DOMINANT_DOC_MAX_CHUNKS, 1); } public function exactDocumentMaxChunks(): int { return $this->intValue('exact_document_max_chunks', self::EXACT_DOCUMENT_MAX_CHUNKS, 1); } public function focusedProductWindow(): int { return $this->intValue('focused_product_window', self::FOCUSED_PRODUCT_WINDOW, 1); } public function focusedProductMinScore(): float { return $this->floatValue('focused_product_min_score', self::FOCUSED_PRODUCT_MIN_SCORE, 0.0); } public function focusedProductMinGap(): float { return $this->floatValue('focused_product_min_gap', self::FOCUSED_PRODUCT_MIN_GAP, 0.0); } public function focusedProductMaxChunks(): int { return $this->intValue('focused_product_max_chunks', self::FOCUSED_PRODUCT_MAX_CHUNKS, 1); } /** @return string[] */ public function genericProductTokens(): array { return $this->stringList('generic_product_tokens', $this->vocabularyView('retrieval.generic_product_tokens', self::GENERIC_PRODUCT_TOKEN)); } /** @return string[] */ public function importantShortModelTokens(): array { return $this->stringList('important_short_model_tokens', $this->vocabularyView('retrieval.important_short_model_tokens', self::IMPORTANT_SHORT_MODEL_TOKEN)); } /** @return string[] */ public function familyDescriptorTokens(): array { return $this->stringList('family_descriptor_tokens', $this->vocabularyView('retrieval.family_descriptor_tokens', self::FAMILY_DESCRIPTOR_TOKEN)); } /** @return string[] */ public function looksLikeReagentTokens(): array { return $this->stringList('looks_like_reagent_tokens', $this->vocabularyView('retrieval.looks_like_reagent_tokens', self::LOOKS_LIKE_REAGENT_TOKENS)); } /** @return string[] */ public function looksLikeSafetyDocs(): array { return $this->stringList('looks_like_safety_docs', $this->vocabularyView('retrieval.looks_like_safety_docs', self::LOOKS_LIKE_SAFETY_DOCS)); } /** @return string[] */ public function looksLikeReagentWords(): array { return $this->stringList('looks_like_reagent_words', $this->vocabularyView('retrieval.looks_like_reagent_words', self::LOOKS_LIKE_REAGENT_WORDS)); } /** @return string[] */ public function looksLikeDocumentWords(): array { return $this->stringList('looks_like_document_words', $this->vocabularyView('retrieval.looks_like_document_words', self::LOOKS_LIKE_DOCUMENT_WORDS)); } /** @return string[] */ public function looksLikeSafetyWords(): array { return $this->stringList('looks_like_safety_words', $this->vocabularyView('retrieval.looks_like_safety_words', self::LOOKS_LIKE_SAFETY_WORDS)); } /** @return string[] */ public function looksLikeDeviceWords(): array { return $this->stringList('looks_like_device_words', $this->vocabularyView('retrieval.looks_like_device_words', self::LOOKS_LIKE_DEVICE_WORDS)); } /** * Effective retrieval vocabulary as a dedicated structure for diagnostics and config dumps. * * @return array> */ public function vocabularyToArray(): array { return [ 'generic_product_tokens' => $this->genericProductTokens(), 'important_short_model_tokens' => $this->importantShortModelTokens(), 'family_descriptor_tokens' => $this->familyDescriptorTokens(), 'looks_like_reagent_tokens' => $this->looksLikeReagentTokens(), 'looks_like_safety_docs' => $this->looksLikeSafetyDocs(), 'looks_like_reagent_words' => $this->looksLikeReagentWords(), 'looks_like_document_words' => $this->looksLikeDocumentWords(), 'looks_like_safety_words' => $this->looksLikeSafetyWords(), 'looks_like_device_words' => $this->looksLikeDeviceWords(), ]; } /** * @return array */ public function toArray(): array { return [ 'hard_max_chunks' => $this->hardMaxChunks(), 'hard_max_vectork' => $this->hardMaxVectorK(), 'hard_max_keywordk' => $this->hardMaxKeywordK(), 'vector_score_threshold' => $this->vectorScoreThreshold(), 'threshold_floor' => $this->thresholdFloor(), 'threshold_ceil' => $this->thresholdCeil(), 'list_bonus' => $this->listBonus(), 'rrf_k' => $this->rrfK(), 'keyword_topk_multiplier' => $this->keywordTopKMultiplier(), 'keyword_score_threshold' => $this->keywordScoreThreshold(), 'keyword_rrf_weight' => $this->keywordRrfWeight(), 'scoped_vector_rrf_weight' => $this->scopedVectorRrfWeight(), 'scoped_keyword_rrf_weight' => $this->scopedKeywordRrfWeight(), 'empty_rrf_fallback_topn' => $this->emptyRrfFallbackTopN(), 'max_chunks_per_doc' => $this->maxChunksPerDoc(), 'min_chunk_distance' => $this->minChunkDistance(), 'dominant_doc_window' => $this->dominantDocWindow(), 'dominant_doc_min_hits' => $this->dominantDocMinHits(), 'dominant_doc_max_chunks' => $this->dominantDocMaxChunks(), 'exact_document_max_chunks' => $this->exactDocumentMaxChunks(), 'focused_product_window' => $this->focusedProductWindow(), 'focused_product_min_score' => $this->focusedProductMinScore(), 'focused_product_min_gap' => $this->focusedProductMinGap(), 'focused_product_max_chunks' => $this->focusedProductMaxChunks(), 'generic_product_tokens' => $this->genericProductTokens(), 'important_short_model_tokens' => $this->importantShortModelTokens(), 'family_descriptor_tokens' => $this->familyDescriptorTokens(), 'looks_like_reagent_tokens' => $this->looksLikeReagentTokens(), 'looks_like_safety_docs' => $this->looksLikeSafetyDocs(), 'looks_like_reagent_words' => $this->looksLikeReagentWords(), 'looks_like_document_words' => $this->looksLikeDocumentWords(), 'looks_like_safety_words' => $this->looksLikeSafetyWords(), 'looks_like_device_words' => $this->looksLikeDeviceWords(), ]; } private function intValue(string $key, int $default, int $min = PHP_INT_MIN, ?int $max = null): int { $value = $this->raw($key, $default); if (!is_numeric($value)) { return $default; } $value = (int) $value; $value = max($min, $value); if ($max !== null) { $value = min($max, $value); } return $value; } private function floatValue(string $key, float $default, float $min = -INF, ?float $max = null): float { $value = $this->raw($key, $default); if (!is_numeric($value)) { return $default; } $value = (float) $value; $value = max($min, $value); if ($max !== null) { $value = min($max, $value); } return $value; } /** * @param string[] $default * @return string[] */ /** @return string[] */ private function vocabularyView(string $path, array $fallback): array { return $this->vocabulary?->view($path, $fallback) ?? $fallback; } private function stringList(string $key, array $default): array { $value = $this->raw($key, $default); if (!is_array($value)) { return $default; } $out = []; foreach ($value as $item) { if (!is_scalar($item)) { continue; } $item = trim((string) $item); if ($item === '') { continue; } if (!in_array($item, $out, true)) { $out[] = $item; } } return $out !== [] ? $out : $default; } private function raw(string $key, mixed $default): mixed { if (array_key_exists($key, $this->config)) { return $this->config[$key]; } return $default; } }