diff --git a/config/retriex/agent.yaml b/config/retriex/agent.yaml index b2a8db8..0a0ccc2 100644 --- a/config/retriex/agent.yaml +++ b/config/retriex/agent.yaml @@ -199,8 +199,8 @@ parameters: previous_reference_anchors_template: 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): {anchors}' current_follow_up_question_template: 'Aktuelle Folgefrage: {question}' reference_anchor: - testomat_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu' - hardness_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu' + product_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu' + measurement_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu' messages: empty_prompt: '❌ Empty prompt.' diff --git a/config/retriex/commerce.yaml b/config/retriex/commerce.yaml index 05a3cf6..4c4d32b 100644 --- a/config/retriex/commerce.yaml +++ b/config/retriex/commerce.yaml @@ -48,7 +48,6 @@ parameters: filter_search_tokens: - preiswerte - lösung - - reinigungslösung - größer - welchem - welche @@ -106,49 +105,12 @@ parameters: reagent: reagenz produkte: produkt - semantic_shop_search_tokens: - - indikator - - indicator - - reagenz - - reagent - - zubehör - - zubehor - - ersatzteil - - anschlusskabel - - kabel - - sensorkabel - - elektrodenkabel - - verbrauchsmaterial - - chemie - - indikatorchemie - - reagenzchemie - - kit - - set - - filter - - pumpe - - pumpenkopf - - motorblock - - lösung - - reinigungslösung - - reinigungsloesung - - clean - - loesung - - solution - - teststreifen - - gerät - - geraet - - messgerät - - messgeraet - - analysegerät - - analysegeraet - - analysator - - monitor - - controller - - system + vocabulary_views: + semantic_shop_search_tokens: shop.semantic_search_tokens normalization: - search: ['€','euro'] - replace: [' EUR '] + search: ['€'] + replace: [' euro '] text: trim_characters: diff --git a/config/retriex/prompt.yaml b/config/retriex/prompt.yaml index 3d4bed5..14ba989 100644 --- a/config/retriex/prompt.yaml +++ b/config/retriex/prompt.yaml @@ -53,80 +53,10 @@ parameters: role_compatibility_label: Role compatibility with request role_incompatible_commercial_suppression_note: 'Commercial fields suppressed: this shop record is not a matching main-device result for the requested product role.' technical_product_keyword_match_threshold: 2 - technical_product_keywords: - - technisch - - technical - - produkt - - product - - gerät - - device - - modell - - model - - messprinzip - - measurement principle - - schnittstelle - - interface - - relais - - relay - - indikator - - indicator - - grenzwert - - threshold - - messbereich - - measurement range - - gemessen - - measured - - minimaler - - minimum - - resthärte - - resthaerte - - °dh - - dh - - spannung - - voltage - - strom - - current - - druck - - pressure - - temperatur - - temperature - - schutzart - - ip - - fehlercode - - error code - - wasserhärte - - hardness - - testomat - - chlor - - chlormessung - accessory_request_keywords: - - passend - - passende - - passendes - - zubehör - - zubehor - - dazu - - indikator - - indikatoren - - ph-indikator - - ph indikator - - ph-indikatoren - - ph indikatoren - - reagenz - - kit - - set - - zusatz - - ergänzung - - ergaenzung - - anschlusskabel - - kabel - - sensorkabel - - elektrodenkabel - - elektrode - - puffer - - kalibrierpuffer - - kalibrierlösung - - kalibrierloesung + vocabulary_views: + technical_product_keywords: prompt.technical_product_keywords + accessory_request_keywords: prompt.accessory_request_keywords + sections: system_label: SYSTEM user_question_label: USER QUESTION diff --git a/config/retriex/retrieval.yaml b/config/retriex/retrieval.yaml index fae9339..a45c8e5 100644 --- a/config/retriex/retrieval.yaml +++ b/config/retriex/retrieval.yaml @@ -105,143 +105,16 @@ parameters: generic_exact_selection_tokens: - keine - welche - generic_product_tokens: - - produkt - - produkte - - produktkarte - - titel - - geraet - - gerät - - messgeraet - - messgerät - - wasser - - haerte - - härte - - resthaerte - - resthärte - - analyse - - analysator - - automat - - online - - messung - - messen - - preis - - preise - - kosten - - info - - infos - - passend - - richtige - - richtiges - - geeignet - - geeignete - - welche - - welcher - - welches - - brauche - - suche - important_short_model_tokens: - - th - - tc - - tp - - tm - - ph - - rx - family_descriptor_tokens: - - evo - - eco - - self - - clean - - mini - - pro - - plus - - basic - - lab - - inline - - compact - - panel - - sc - looks_like_reagent_tokens: - - indikator - - reagenz - - reagenz - - laborchemikalie - - chemikalie - - sicherheitsdatenblatt - - sdb - - msds - - ufi - - gebinde - - flasche - - ersatzteil - - zubehoer - - zubehör - - service set - - filtereinsatz - - kerzenfilter - - druckregler - - ph - looks_like_safety_docs: - - sicherheitsdatenblatt - - sdb - - msds - - gefahrenbewertung - - gefahrenpiktogramm - - signalwort - - lagerung - - transport - - clp - - kennzeichnung - - h290 - - pbt - - vpvb - looks_like_reagent_words: - - indikator - - reagenz - - reagens - - chemie - - chemikalie - - sdb - - sicherheitsdatenblatt - - msds - - flasche - - gebinde - looks_like_document_words: - - datenblatt - - dokument - - pdf - - handbuch - - manual - - beschreibung - - sdb - - sicherheitsdatenblatt - - msds - looks_like_safety_words: - - gefahr - - gefahrgut - - clp - - h290 - - sicherheit - - kennzeichnung - - transport - - lagerung - - piktogramm - looks_like_device_words: - - geraet - - gerät - - messgeraet - - messgerät - - analysator - - automat - - messung - - messen - - ueberwachung - - überwachung - - online - - monitor - - modell - - analysegerät - - tester + vocabulary_views: + generic_product_tokens: retrieval.generic_product_tokens + important_short_model_tokens: retrieval.important_short_model_tokens + family_descriptor_tokens: retrieval.family_descriptor_tokens + looks_like_reagent_tokens: retrieval.looks_like_reagent_tokens + looks_like_safety_docs: retrieval.looks_like_safety_docs + looks_like_reagent_words: retrieval.looks_like_reagent_words + looks_like_document_words: retrieval.looks_like_document_words + looks_like_safety_words: retrieval.looks_like_safety_words + looks_like_device_words: retrieval.looks_like_device_words # Vocabulary-backed retrieval token lists live in config/retriex/vocabulary.yaml. # The old per-key entries may still be added here to override a specific view. diff --git a/config/retriex/vocabulary.yaml b/config/retriex/vocabulary.yaml index fa16980..eea76c9 100644 --- a/config/retriex/vocabulary.yaml +++ b/config/retriex/vocabulary.yaml @@ -203,6 +203,43 @@ parameters: - filter - pumpenkopf - motorblock + semantic_search_tokens: + add: + - indikator + - indicator + - reagenz + - reagent + - zubehör + - zubehor + - ersatzteil + - anschlusskabel + - kabel + - sensorkabel + - elektrodenkabel + - verbrauchsmaterial + - chemie + - indikatorchemie + - reagenzchemie + - kit + - set + - filter + - pumpe + - pumpenkopf + - motorblock + - lösung + - loesung + - solution + - teststreifen + - gerät + - geraet + - messgerät + - messgeraet + - analysegerät + - analysegeraet + - analysator + - monitor + - controller + - system retrieval: generic_product_tokens: add: @@ -267,7 +304,6 @@ parameters: add: - indikator - reagenz - - reagens - laborchemikalie - chemikalie - sicherheitsdatenblatt @@ -283,6 +319,7 @@ parameters: - filtereinsatz - kerzenfilter - druckregler + - ph looks_like_safety_docs: add: - sicherheitsdatenblatt @@ -346,6 +383,9 @@ parameters: - überwachung - online - monitor + - modell + - analysegerät + - tester search_repair: generic_candidate_tokens: add: @@ -466,12 +506,26 @@ parameters: - zubehor - dazu - indikator + - indikatoren + - ph-indikator + - ph indikator + - ph-indikatoren + - ph indikatoren - reagenz - kit - set - zusatz - ergänzung - ergaenzung + - anschlusskabel + - kabel + - sensorkabel + - elektrodenkabel + - elektrode + - puffer + - kalibrierpuffer + - kalibrierlösung + - kalibrierloesung maps: shop: accessory_focus_variants: diff --git a/config/services.yaml b/config/services.yaml index 67d84af..15f19f3 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -131,6 +131,7 @@ services: App\Config\PromptBuilderConfig: arguments: $config: '%retriex.prompt.config%' + $vocabulary: '@App\Config\DomainVocabularyConfig' App\Config\AgentRunnerConfig: arguments: @@ -139,6 +140,7 @@ services: App\Config\NdjsonHybridRetrieverConfig: arguments: $config: '%retriex.retrieval.config%' + $vocabulary: '@App\Config\DomainVocabularyConfig' App\Config\StopWordsConfig: arguments: @@ -206,6 +208,7 @@ services: App\Config\CommerceQueryParserConfig: arguments: $config: '%retriex.commerce_query.config%' + $vocabulary: '@App\Config\DomainVocabularyConfig' App\Config\CommerceReferenceResolverConfig: arguments: diff --git a/patch_history/RETRIEX_PATCH_43A_CONFIG_REDUCTION_GENERIC_FLOW_PREP_README.md b/patch_history/RETRIEX_PATCH_43A_CONFIG_REDUCTION_GENERIC_FLOW_PREP_README.md new file mode 100644 index 0000000..986db4f --- /dev/null +++ b/patch_history/RETRIEX_PATCH_43A_CONFIG_REDUCTION_GENERIC_FLOW_PREP_README.md @@ -0,0 +1,119 @@ +# RetrieX Patch 43A - Config Reduction / Generic Flow Prep + +## Goal + +Reduce the number of actively duplicated YAML parameters without changing the proven runtime values or introducing an admin UI. + +This patch intentionally does **not** change scoring, ranking, retrieval thresholds, prompt guardrails, or shop matching behavior. It only moves already existing duplicate term lists behind central vocabulary views and renames one follow-up-anchor concept from product-specific names to generic names. + +## Why this is split out + +The larger cleanup should not be delivered as one large patch because it would mix three risk classes: + +1. Safe config deduplication and generic naming. +2. Shared product-role resolver logic. +3. More generic domain anchor extraction beyond the current Testomat / hardness use case. + +Patch 43A covers only class 1. + +## Changes + +### YAML reduction + +The following direct per-service lists were removed from local service config files and are now resolved through `config/retriex/vocabulary.yaml` views: + +- `prompt.yaml` + - `technical_product_keywords` + - `accessory_request_keywords` +- `retrieval.yaml` + - `generic_product_tokens` + - `important_short_model_tokens` + - `family_descriptor_tokens` + - `looks_like_reagent_tokens` + - `looks_like_safety_docs` + - `looks_like_reagent_words` + - `looks_like_document_words` + - `looks_like_safety_words` + - `looks_like_device_words` +- `commerce.yaml` + - `semantic_shop_search_tokens` + +The removed local lists are referenced through new `vocabulary_views` mappings. + +### Vocabulary updates + +`vocabulary.yaml` now contains the exact effective legacy values for the moved lists, including the previously local prompt accessory keywords and shop semantic search terms. + +### PHP config facade changes + +These config classes can now resolve either a direct local override or a central vocabulary view: + +- `PromptBuilderConfig` +- `NdjsonHybridRetrieverConfig` +- `CommerceQueryParserConfig` + +Direct local lists remain backward-compatible. If a project later needs a local override, the old list key can still be added back to the service-specific YAML. + +### Generic follow-up anchor naming + +The follow-up anchor names were made generic: + +- `testomat_model_pattern` -> `product_model_pattern` +- `hardness_value_pattern` -> `measurement_value_pattern` +- `extractFirstTestomatModelAnchor()` -> `extractFirstProductModelAnchor()` +- `extractFirstHardnessValueAnchor()` -> `extractFirstMeasurementValueAnchor()` + +Backward-compatible accessor aliases remain in `AgentRunnerConfig`. + +## Behavior impact + +Expected runtime behavior: unchanged. + +A local equivalence check compared all moved lists against the current `rag-inprogress.zip` source values. The moved vocabulary views resolve to the same effective values as before, accounting for the existing de-duplication behavior in the PHP config facades. + +## Checks run locally + +Successful: + +```bash +php -l src/Config/PromptBuilderConfig.php +php -l src/Config/NdjsonHybridRetrieverConfig.php +php -l src/Config/CommerceQueryParserConfig.php +php -l src/Config/AgentRunnerConfig.php +php -l src/Agent/AgentRunner.php +``` + +Successful custom checks: + +- edited YAML files parse successfully +- moved vocabulary lists equal previous effective values + +Not executable in this container: + +```bash +php bin/console mto:agent:config:validate +php bin/console mto:agent:regression:test +php bin/console mto:agent:config:audit-source --details +php bin/console mto:agent:config:audit-patterns --details +``` + +Reason: the uploaded ZIP does not contain `vendor/`, and Composer installation could not complete in the container because required PHP extensions are missing (`curl`, `dom`, `sqlite3`, `xml`) and external package downloads are not available. + +## Required checks after applying in the project environment + +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` + +## Recommended follow-up patches + +### p43B - Shared ProductRoleResolver + +Centralize product role detection (`main_product`, `accessory`, `consumable`, `spare_part`, `unknown`) so PromptBuilder, ShopSearchService, SearchRepairService and AgentRunner do not maintain parallel role checks. + +### p43C - Generic Domain Anchor Extraction + +Make the current product-model and measurement-value anchor extraction more domain-generic while preserving the existing Testomat / °dH patterns as configured values. diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index 800a322..f9a1677 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -1238,7 +1238,7 @@ final readonly class AgentRunner * These anchors are only used to resolve follow-up references such as * "der Wert" or "welcher Indikator". They are not factual evidence for * the final answer. To avoid propagating wrong earlier answers, only the - * first explicit Testomat model reference and the first explicit °dH value + * first explicit product-model reference and the first explicit measurement value * are kept. Indicator names, reagent codes, prices, URLs and product * numbers are intentionally ignored here. * @@ -1261,12 +1261,12 @@ final readonly class AgentRunner $anchors = []; - $model = $this->extractFirstTestomatModelAnchor($answer); + $model = $this->extractFirstProductModelAnchor($answer); if ($model !== '') { $anchors[] = $model; } - $hardnessValue = $this->extractFirstHardnessValueAnchor($answer); + $hardnessValue = $this->extractFirstMeasurementValueAnchor($answer); if ($hardnessValue !== '') { $anchors[] = $hardnessValue; } @@ -1325,9 +1325,9 @@ final readonly class AgentRunner return array_reverse($turns); } - private function extractFirstTestomatModelAnchor(string $text): string + private function extractFirstProductModelAnchor(string $text): string { - if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorTestomatModelPattern(), $text, $matches) !== 1) { + if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorProductModelPattern(), $text, $matches) !== 1) { return ''; } @@ -1337,9 +1337,9 @@ final readonly class AgentRunner return trim(str_replace('®', '', $value)); } - private function extractFirstHardnessValueAnchor(string $text): string + private function extractFirstMeasurementValueAnchor(string $text): string { - if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorHardnessValuePattern(), $text, $matches) !== 1) { + if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorMeasurementValuePattern(), $text, $matches) !== 1) { return ''; } @@ -1500,7 +1500,7 @@ final readonly class AgentRunner return true; } - if ($this->extractFirstTestomatModelAnchor($prompt) !== '') { + if ($this->extractFirstProductModelAnchor($prompt) !== '') { return false; } @@ -1564,7 +1564,7 @@ final readonly class AgentRunner private function hasStandaloneConcreteShopSubject(string $prompt): bool { - if ($this->extractFirstTestomatModelAnchor($prompt) !== '') { + if ($this->extractFirstProductModelAnchor($prompt) !== '') { return true; } @@ -1622,7 +1622,7 @@ final readonly class AgentRunner return $prompt; } - if ($this->extractFirstTestomatModelAnchor($prompt) === '') { + if ($this->extractFirstProductModelAnchor($prompt) === '') { return $optimizedShopQuery; } @@ -2249,7 +2249,7 @@ final readonly class AgentRunner continue; } - $model = $this->extractFirstTestomatModelAnchor($turn); + $model = $this->extractFirstProductModelAnchor($turn); if ($model !== '') { $query = str_replace( @@ -2334,7 +2334,7 @@ final readonly class AgentRunner } } - $modelAnchor = $this->extractFirstTestomatModelAnchor($turn); + $modelAnchor = $this->extractFirstProductModelAnchor($turn); if ($modelAnchor !== '' && !$this->isMetaOnlyShopQuery($modelAnchor)) { return mb_strtolower($modelAnchor, 'UTF-8'); diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index 680a0e2..2f4d52c 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -125,14 +125,34 @@ final class AgentRunnerConfig return $this->getRequiredString('follow_up_context.history_question_strip_pattern'); } + public function getFollowUpReferenceAnchorProductModelPattern(): string + { + $value = $this->optionalValue('follow_up_context.reference_anchor.product_model_pattern'); + if (is_string($value) && trim($value) !== '') { + return $value; + } + + return $this->getRequiredString('follow_up_context.reference_anchor.testomat_model_pattern'); + } + + public function getFollowUpReferenceAnchorMeasurementValuePattern(): string + { + $value = $this->optionalValue('follow_up_context.reference_anchor.measurement_value_pattern'); + if (is_string($value) && trim($value) !== '') { + return $value; + } + + return $this->getRequiredString('follow_up_context.reference_anchor.hardness_value_pattern'); + } + public function getFollowUpReferenceAnchorTestomatModelPattern(): string { - return $this->getRequiredString('follow_up_context.reference_anchor.testomat_model_pattern'); + return $this->getFollowUpReferenceAnchorProductModelPattern(); } public function getFollowUpReferenceAnchorHardnessValuePattern(): string { - return $this->getRequiredString('follow_up_context.reference_anchor.hardness_value_pattern'); + return $this->getFollowUpReferenceAnchorMeasurementValuePattern(); } diff --git a/src/Config/CommerceQueryParserConfig.php b/src/Config/CommerceQueryParserConfig.php index 731de59..5341a2a 100644 --- a/src/Config/CommerceQueryParserConfig.php +++ b/src/Config/CommerceQueryParserConfig.php @@ -13,6 +13,7 @@ final class CommerceQueryParserConfig */ public function __construct( private readonly array $config = [], + private readonly ?DomainVocabularyConfig $vocabulary = null, ) { } @@ -268,7 +269,10 @@ final class CommerceQueryParserConfig /** @return string[] */ public function getSemanticShopSearchTokens(): array { - return $this->stringList('semantic_shop_search_tokens'); + return $this->configuredStringListOrVocabularyView( + 'semantic_shop_search_tokens', + 'vocabulary_views.semantic_shop_search_tokens' + ); } public function buildExactTokenRemovalPattern(string $token): string @@ -319,6 +323,27 @@ final class CommerceQueryParserConfig return $out; } + /** @return string[] */ + private function configuredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array + { + if ($this->hasPath($configPath)) { + return $this->stringList($configPath); + } + + if ($this->vocabulary === null) { + throw $this->missing($configPath); + } + + $viewPath = $this->string($viewPathConfigPath); + $terms = $this->vocabulary->view($viewPath, []); + + if ($terms === []) { + throw $this->invalid($viewPathConfigPath, sprintf('references empty vocabulary view "%s"', $viewPath)); + } + + return $terms; + } + /** @return array */ private function stringMap(string $path): array { @@ -372,6 +397,20 @@ final class CommerceQueryParserConfig return $value; } + private function hasPath(string $path): bool + { + $current = $this->config; + foreach (explode('.', $path) as $segment) { + if (!is_array($current) || !array_key_exists($segment, $current)) { + return false; + } + + $current = $current[$segment]; + } + + return true; + } + private function value(string $path): mixed { $current = $this->config; diff --git a/src/Config/NdjsonHybridRetrieverConfig.php b/src/Config/NdjsonHybridRetrieverConfig.php index c2511dc..f25b07a 100644 --- a/src/Config/NdjsonHybridRetrieverConfig.php +++ b/src/Config/NdjsonHybridRetrieverConfig.php @@ -13,6 +13,7 @@ final class NdjsonHybridRetrieverConfig */ public function __construct( private array $config = [], + private ?DomainVocabularyConfig $vocabulary = null, ) { } @@ -216,55 +217,82 @@ final class NdjsonHybridRetrieverConfig /** @return string[] */ public function genericProductTokens(): array { - return $this->requiredStringList('generic_product_tokens'); + return $this->configuredStringListOrVocabularyView( + 'generic_product_tokens', + 'vocabulary_views.generic_product_tokens' + ); } /** @return string[] */ public function importantShortModelTokens(): array { - return $this->requiredStringList('important_short_model_tokens'); + return $this->configuredStringListOrVocabularyView( + 'important_short_model_tokens', + 'vocabulary_views.important_short_model_tokens' + ); } /** @return string[] */ public function familyDescriptorTokens(): array { - return $this->requiredStringList('family_descriptor_tokens'); + return $this->configuredStringListOrVocabularyView( + 'family_descriptor_tokens', + 'vocabulary_views.family_descriptor_tokens' + ); } /** @return string[] */ public function looksLikeReagentTokens(): array { - return $this->requiredStringList('looks_like_reagent_tokens'); + return $this->configuredStringListOrVocabularyView( + 'looks_like_reagent_tokens', + 'vocabulary_views.looks_like_reagent_tokens' + ); } /** @return string[] */ public function looksLikeSafetyDocs(): array { - return $this->requiredStringList('looks_like_safety_docs'); + return $this->configuredStringListOrVocabularyView( + 'looks_like_safety_docs', + 'vocabulary_views.looks_like_safety_docs' + ); } /** @return string[] */ public function looksLikeReagentWords(): array { - return $this->requiredStringList('looks_like_reagent_words'); + return $this->configuredStringListOrVocabularyView( + 'looks_like_reagent_words', + 'vocabulary_views.looks_like_reagent_words' + ); } /** @return string[] */ public function looksLikeDocumentWords(): array { - return $this->requiredStringList('looks_like_document_words'); + return $this->configuredStringListOrVocabularyView( + 'looks_like_document_words', + 'vocabulary_views.looks_like_document_words' + ); } /** @return string[] */ public function looksLikeSafetyWords(): array { - return $this->requiredStringList('looks_like_safety_words'); + return $this->configuredStringListOrVocabularyView( + 'looks_like_safety_words', + 'vocabulary_views.looks_like_safety_words' + ); } /** @return string[] */ public function looksLikeDeviceWords(): array { - return $this->requiredStringList('looks_like_device_words'); + return $this->configuredStringListOrVocabularyView( + 'looks_like_device_words', + 'vocabulary_views.looks_like_device_words' + ); } /** @@ -471,6 +499,74 @@ final class NdjsonHybridRetrieverConfig return $out; } + + /** @return string[] */ + private function configuredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array + { + if ($this->hasKey($configPath)) { + return $this->requiredStringList($configPath); + } + + if ($this->vocabulary === null) { + throw $this->missing($configPath); + } + + $viewPath = $this->requiredPathString($viewPathConfigPath); + $terms = $this->vocabulary->view($viewPath, []); + + if ($terms === []) { + throw $this->invalid($viewPathConfigPath, sprintf('references empty vocabulary view "%s"', $viewPath)); + } + + return $terms; + } + + private function requiredPathString(string $key): string + { + $value = $this->requiredPathValue($key); + + if (!is_scalar($value)) { + throw $this->invalid($key, 'must be a non-empty string'); + } + + $value = trim((string) $value); + if ($value === '') { + throw $this->invalid($key, 'must be a non-empty string'); + } + + return $value; + } + + private function requiredPathValue(string $key): mixed + { + $current = $this->config; + + foreach (explode('.', $key) as $segment) { + if (!is_array($current) || !array_key_exists($segment, $current)) { + throw $this->missing($key); + } + + $current = $current[$segment]; + } + + return $current; + } + + private function hasKey(string $key): bool + { + $current = $this->config; + + foreach (explode('.', $key) as $segment) { + if (!is_array($current) || !array_key_exists($segment, $current)) { + return false; + } + + $current = $current[$segment]; + } + + return true; + } + private function requiredValue(string $key): mixed { if (!array_key_exists($key, $this->config)) { diff --git a/src/Config/PromptBuilderConfig.php b/src/Config/PromptBuilderConfig.php index 86a8d0a..5fcdd0a 100644 --- a/src/Config/PromptBuilderConfig.php +++ b/src/Config/PromptBuilderConfig.php @@ -11,6 +11,7 @@ final class PromptBuilderConfig */ public function __construct( private readonly array $config = [], + private readonly ?DomainVocabularyConfig $vocabulary = null, ) { } @@ -159,6 +160,35 @@ final class PromptBuilderConfig return $out; } + /** + * @return string[] + */ + private function getConfiguredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array + { + if ($this->hasPath($configPath)) { + return $this->getRequiredStringList($configPath); + } + + if ($this->vocabulary === null) { + throw new \InvalidArgumentException(sprintf( + 'RetrieX prompt config path "%s" is missing and no vocabulary resolver is available.', + $configPath + )); + } + + $viewPath = $this->getRequiredString($viewPathConfigPath); + $terms = $this->vocabulary->view($viewPath, []); + + if ($terms === []) { + throw new \InvalidArgumentException(sprintf( + 'RetrieX prompt vocabulary view "%s" resolved to an empty list.', + $viewPath + )); + } + + return $terms; + } + /** * @return string[] */ @@ -193,6 +223,21 @@ final class PromptBuilderConfig + private function hasPath(string $path): bool + { + $current = $this->config; + + foreach (explode('.', $path) as $segment) { + if (!is_array($current) || !array_key_exists($segment, $current)) { + return false; + } + + $current = $current[$segment]; + } + + return true; + } + private function getOptionalValue(string $path): mixed { $current = $this->config; @@ -573,7 +618,10 @@ final class PromptBuilderConfig */ public function getTechnicalProductKeywords(): array { - return $this->getRequiredStringList('technical_product_keywords'); + return $this->getConfiguredStringListOrVocabularyView( + 'technical_product_keywords', + 'vocabulary_views.technical_product_keywords' + ); } /** @@ -581,7 +629,10 @@ final class PromptBuilderConfig */ public function getAccessoryRequestKeywords(): array { - return $this->getRequiredStringList('accessory_request_keywords'); + return $this->getConfiguredStringListOrVocabularyView( + 'accessory_request_keywords', + 'vocabulary_views.accessory_request_keywords' + ); } public function getMeasurementEvidenceSectionLabel(): string