This commit is contained in:
team 1
2026-05-05 12:12:51 +02:00
parent da374edcf4
commit 2c041a88c0
12 changed files with 429 additions and 282 deletions

View File

@@ -199,8 +199,8 @@ parameters:
previous_reference_anchors_template: 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): {anchors}' previous_reference_anchors_template: 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): {anchors}'
current_follow_up_question_template: 'Aktuelle Folgefrage: {question}' current_follow_up_question_template: 'Aktuelle Folgefrage: {question}'
reference_anchor: reference_anchor:
testomat_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu' product_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
hardness_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu' measurement_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
messages: messages:
empty_prompt: '❌ Empty prompt.' empty_prompt: '❌ Empty prompt.'

View File

@@ -48,7 +48,6 @@ parameters:
filter_search_tokens: filter_search_tokens:
- preiswerte - preiswerte
- lösung - lösung
- reinigungslösung
- größer - größer
- welchem - welchem
- welche - welche
@@ -106,49 +105,12 @@ parameters:
reagent: reagenz reagent: reagenz
produkte: produkt produkte: produkt
semantic_shop_search_tokens: vocabulary_views:
- indikator semantic_shop_search_tokens: shop.semantic_search_tokens
- indicator
- reagenz
- reagent
- zubehör
- zubehor
- ersatzteil
- anschlusskabel
- kabel
- sensorkabel
- elektrodenkabel
- verbrauchsmaterial
- chemie
- indikatorchemie
- reagenzchemie
- kit
- set
- filter
- pumpe
- pumpenkopf
- motorblock
- lösung
- reinigungslösung
- reinigungsloesung
- clean
- loesung
- solution
- teststreifen
- gerät
- geraet
- messgerät
- messgeraet
- analysegerät
- analysegeraet
- analysator
- monitor
- controller
- system
normalization: normalization:
search: ['€','euro'] search: ['€']
replace: [' EUR '] replace: [' euro ']
text: text:
trim_characters: trim_characters:

View File

@@ -53,80 +53,10 @@ parameters:
role_compatibility_label: Role compatibility with request role_compatibility_label: Role compatibility with request
role_incompatible_commercial_suppression_note: 'Commercial fields suppressed: this shop record is not a matching main-device result for the requested product role.' role_incompatible_commercial_suppression_note: 'Commercial fields suppressed: this shop record is not a matching main-device result for the requested product role.'
technical_product_keyword_match_threshold: 2 technical_product_keyword_match_threshold: 2
technical_product_keywords: vocabulary_views:
- technisch technical_product_keywords: prompt.technical_product_keywords
- technical accessory_request_keywords: prompt.accessory_request_keywords
- produkt
- product
- gerät
- device
- modell
- model
- messprinzip
- measurement principle
- schnittstelle
- interface
- relais
- relay
- indikator
- indicator
- grenzwert
- threshold
- messbereich
- measurement range
- gemessen
- measured
- minimaler
- minimum
- resthärte
- resthaerte
- °dh
- dh
- spannung
- voltage
- strom
- current
- druck
- pressure
- temperatur
- temperature
- schutzart
- ip
- fehlercode
- error code
- wasserhärte
- hardness
- testomat
- chlor
- chlormessung
accessory_request_keywords:
- passend
- passende
- passendes
- zubehör
- zubehor
- dazu
- indikator
- indikatoren
- ph-indikator
- ph indikator
- ph-indikatoren
- ph indikatoren
- reagenz
- kit
- set
- zusatz
- ergänzung
- ergaenzung
- anschlusskabel
- kabel
- sensorkabel
- elektrodenkabel
- elektrode
- puffer
- kalibrierpuffer
- kalibrierlösung
- kalibrierloesung
sections: sections:
system_label: SYSTEM system_label: SYSTEM
user_question_label: USER QUESTION user_question_label: USER QUESTION

View File

@@ -105,143 +105,16 @@ parameters:
generic_exact_selection_tokens: generic_exact_selection_tokens:
- keine - keine
- welche - welche
generic_product_tokens: vocabulary_views:
- produkt generic_product_tokens: retrieval.generic_product_tokens
- produkte important_short_model_tokens: retrieval.important_short_model_tokens
- produktkarte family_descriptor_tokens: retrieval.family_descriptor_tokens
- titel looks_like_reagent_tokens: retrieval.looks_like_reagent_tokens
- geraet looks_like_safety_docs: retrieval.looks_like_safety_docs
- gerät looks_like_reagent_words: retrieval.looks_like_reagent_words
- messgeraet looks_like_document_words: retrieval.looks_like_document_words
- messgerät looks_like_safety_words: retrieval.looks_like_safety_words
- wasser looks_like_device_words: retrieval.looks_like_device_words
- haerte
- härte
- resthaerte
- resthärte
- analyse
- analysator
- automat
- online
- messung
- messen
- preis
- preise
- kosten
- info
- infos
- passend
- richtige
- richtiges
- geeignet
- geeignete
- welche
- welcher
- welches
- brauche
- suche
important_short_model_tokens:
- th
- tc
- tp
- tm
- ph
- rx
family_descriptor_tokens:
- evo
- eco
- self
- clean
- mini
- pro
- plus
- basic
- lab
- inline
- compact
- panel
- sc
looks_like_reagent_tokens:
- indikator
- reagenz
- reagenz
- laborchemikalie
- chemikalie
- sicherheitsdatenblatt
- sdb
- msds
- ufi
- gebinde
- flasche
- ersatzteil
- zubehoer
- zubehör
- service set
- filtereinsatz
- kerzenfilter
- druckregler
- ph
looks_like_safety_docs:
- sicherheitsdatenblatt
- sdb
- msds
- gefahrenbewertung
- gefahrenpiktogramm
- signalwort
- lagerung
- transport
- clp
- kennzeichnung
- h290
- pbt
- vpvb
looks_like_reagent_words:
- indikator
- reagenz
- reagens
- chemie
- chemikalie
- sdb
- sicherheitsdatenblatt
- msds
- flasche
- gebinde
looks_like_document_words:
- datenblatt
- dokument
- pdf
- handbuch
- manual
- beschreibung
- sdb
- sicherheitsdatenblatt
- msds
looks_like_safety_words:
- gefahr
- gefahrgut
- clp
- h290
- sicherheit
- kennzeichnung
- transport
- lagerung
- piktogramm
looks_like_device_words:
- geraet
- gerät
- messgeraet
- messgerät
- analysator
- automat
- messung
- messen
- ueberwachung
- überwachung
- online
- monitor
- modell
- analysegerät
- tester
# Vocabulary-backed retrieval token lists live in config/retriex/vocabulary.yaml. # Vocabulary-backed retrieval token lists live in config/retriex/vocabulary.yaml.
# The old per-key entries may still be added here to override a specific view. # The old per-key entries may still be added here to override a specific view.

View File

@@ -203,6 +203,43 @@ parameters:
- filter - filter
- pumpenkopf - pumpenkopf
- motorblock - motorblock
semantic_search_tokens:
add:
- indikator
- indicator
- reagenz
- reagent
- zubehör
- zubehor
- ersatzteil
- anschlusskabel
- kabel
- sensorkabel
- elektrodenkabel
- verbrauchsmaterial
- chemie
- indikatorchemie
- reagenzchemie
- kit
- set
- filter
- pumpe
- pumpenkopf
- motorblock
- lösung
- loesung
- solution
- teststreifen
- gerät
- geraet
- messgerät
- messgeraet
- analysegerät
- analysegeraet
- analysator
- monitor
- controller
- system
retrieval: retrieval:
generic_product_tokens: generic_product_tokens:
add: add:
@@ -267,7 +304,6 @@ parameters:
add: add:
- indikator - indikator
- reagenz - reagenz
- reagens
- laborchemikalie - laborchemikalie
- chemikalie - chemikalie
- sicherheitsdatenblatt - sicherheitsdatenblatt
@@ -283,6 +319,7 @@ parameters:
- filtereinsatz - filtereinsatz
- kerzenfilter - kerzenfilter
- druckregler - druckregler
- ph
looks_like_safety_docs: looks_like_safety_docs:
add: add:
- sicherheitsdatenblatt - sicherheitsdatenblatt
@@ -346,6 +383,9 @@ parameters:
- überwachung - überwachung
- online - online
- monitor - monitor
- modell
- analysegerät
- tester
search_repair: search_repair:
generic_candidate_tokens: generic_candidate_tokens:
add: add:
@@ -466,12 +506,26 @@ parameters:
- zubehor - zubehor
- dazu - dazu
- indikator - indikator
- indikatoren
- ph-indikator
- ph indikator
- ph-indikatoren
- ph indikatoren
- reagenz - reagenz
- kit - kit
- set - set
- zusatz - zusatz
- ergänzung - ergänzung
- ergaenzung - ergaenzung
- anschlusskabel
- kabel
- sensorkabel
- elektrodenkabel
- elektrode
- puffer
- kalibrierpuffer
- kalibrierlösung
- kalibrierloesung
maps: maps:
shop: shop:
accessory_focus_variants: accessory_focus_variants:

View File

@@ -131,6 +131,7 @@ services:
App\Config\PromptBuilderConfig: App\Config\PromptBuilderConfig:
arguments: arguments:
$config: '%retriex.prompt.config%' $config: '%retriex.prompt.config%'
$vocabulary: '@App\Config\DomainVocabularyConfig'
App\Config\AgentRunnerConfig: App\Config\AgentRunnerConfig:
arguments: arguments:
@@ -139,6 +140,7 @@ services:
App\Config\NdjsonHybridRetrieverConfig: App\Config\NdjsonHybridRetrieverConfig:
arguments: arguments:
$config: '%retriex.retrieval.config%' $config: '%retriex.retrieval.config%'
$vocabulary: '@App\Config\DomainVocabularyConfig'
App\Config\StopWordsConfig: App\Config\StopWordsConfig:
arguments: arguments:
@@ -206,6 +208,7 @@ services:
App\Config\CommerceQueryParserConfig: App\Config\CommerceQueryParserConfig:
arguments: arguments:
$config: '%retriex.commerce_query.config%' $config: '%retriex.commerce_query.config%'
$vocabulary: '@App\Config\DomainVocabularyConfig'
App\Config\CommerceReferenceResolverConfig: App\Config\CommerceReferenceResolverConfig:
arguments: arguments:

View File

@@ -0,0 +1,119 @@
# RetrieX Patch 43A - Config Reduction / Generic Flow Prep
## Goal
Reduce the number of actively duplicated YAML parameters without changing the proven runtime values or introducing an admin UI.
This patch intentionally does **not** change scoring, ranking, retrieval thresholds, prompt guardrails, or shop matching behavior. It only moves already existing duplicate term lists behind central vocabulary views and renames one follow-up-anchor concept from product-specific names to generic names.
## Why this is split out
The larger cleanup should not be delivered as one large patch because it would mix three risk classes:
1. Safe config deduplication and generic naming.
2. Shared product-role resolver logic.
3. More generic domain anchor extraction beyond the current Testomat / hardness use case.
Patch 43A covers only class 1.
## Changes
### YAML reduction
The following direct per-service lists were removed from local service config files and are now resolved through `config/retriex/vocabulary.yaml` views:
- `prompt.yaml`
- `technical_product_keywords`
- `accessory_request_keywords`
- `retrieval.yaml`
- `generic_product_tokens`
- `important_short_model_tokens`
- `family_descriptor_tokens`
- `looks_like_reagent_tokens`
- `looks_like_safety_docs`
- `looks_like_reagent_words`
- `looks_like_document_words`
- `looks_like_safety_words`
- `looks_like_device_words`
- `commerce.yaml`
- `semantic_shop_search_tokens`
The removed local lists are referenced through new `vocabulary_views` mappings.
### Vocabulary updates
`vocabulary.yaml` now contains the exact effective legacy values for the moved lists, including the previously local prompt accessory keywords and shop semantic search terms.
### PHP config facade changes
These config classes can now resolve either a direct local override or a central vocabulary view:
- `PromptBuilderConfig`
- `NdjsonHybridRetrieverConfig`
- `CommerceQueryParserConfig`
Direct local lists remain backward-compatible. If a project later needs a local override, the old list key can still be added back to the service-specific YAML.
### Generic follow-up anchor naming
The follow-up anchor names were made generic:
- `testomat_model_pattern` -> `product_model_pattern`
- `hardness_value_pattern` -> `measurement_value_pattern`
- `extractFirstTestomatModelAnchor()` -> `extractFirstProductModelAnchor()`
- `extractFirstHardnessValueAnchor()` -> `extractFirstMeasurementValueAnchor()`
Backward-compatible accessor aliases remain in `AgentRunnerConfig`.
## Behavior impact
Expected runtime behavior: unchanged.
A local equivalence check compared all moved lists against the current `rag-inprogress.zip` source values. The moved vocabulary views resolve to the same effective values as before, accounting for the existing de-duplication behavior in the PHP config facades.
## Checks run locally
Successful:
```bash
php -l src/Config/PromptBuilderConfig.php
php -l src/Config/NdjsonHybridRetrieverConfig.php
php -l src/Config/CommerceQueryParserConfig.php
php -l src/Config/AgentRunnerConfig.php
php -l src/Agent/AgentRunner.php
```
Successful custom checks:
- edited YAML files parse successfully
- moved vocabulary lists equal previous effective values
Not executable in this container:
```bash
php bin/console mto:agent:config:validate
php bin/console mto:agent:regression:test
php bin/console mto:agent:config:audit-source --details
php bin/console mto:agent:config:audit-patterns --details
```
Reason: the uploaded ZIP does not contain `vendor/`, and Composer installation could not complete in the container because required PHP extensions are missing (`curl`, `dom`, `sqlite3`, `xml`) and external package downloads are not available.
## Required checks after applying in the project environment
```bash
bin/console mto:agent:config:validate
bin/console mto:agent:regression:test
bin/console mto:agent:config:audit-source --details
bin/console mto:agent:config:audit-patterns --details
```
## Recommended follow-up patches
### p43B - Shared ProductRoleResolver
Centralize product role detection (`main_product`, `accessory`, `consumable`, `spare_part`, `unknown`) so PromptBuilder, ShopSearchService, SearchRepairService and AgentRunner do not maintain parallel role checks.
### p43C - Generic Domain Anchor Extraction
Make the current product-model and measurement-value anchor extraction more domain-generic while preserving the existing Testomat / °dH patterns as configured values.

View File

@@ -1238,7 +1238,7 @@ final readonly class AgentRunner
* These anchors are only used to resolve follow-up references such as * These anchors are only used to resolve follow-up references such as
* "der Wert" or "welcher Indikator". They are not factual evidence for * "der Wert" or "welcher Indikator". They are not factual evidence for
* the final answer. To avoid propagating wrong earlier answers, only the * the final answer. To avoid propagating wrong earlier answers, only the
* first explicit Testomat model reference and the first explicit °dH value * first explicit product-model reference and the first explicit measurement value
* are kept. Indicator names, reagent codes, prices, URLs and product * are kept. Indicator names, reagent codes, prices, URLs and product
* numbers are intentionally ignored here. * numbers are intentionally ignored here.
* *
@@ -1261,12 +1261,12 @@ final readonly class AgentRunner
$anchors = []; $anchors = [];
$model = $this->extractFirstTestomatModelAnchor($answer); $model = $this->extractFirstProductModelAnchor($answer);
if ($model !== '') { if ($model !== '') {
$anchors[] = $model; $anchors[] = $model;
} }
$hardnessValue = $this->extractFirstHardnessValueAnchor($answer); $hardnessValue = $this->extractFirstMeasurementValueAnchor($answer);
if ($hardnessValue !== '') { if ($hardnessValue !== '') {
$anchors[] = $hardnessValue; $anchors[] = $hardnessValue;
} }
@@ -1325,9 +1325,9 @@ final readonly class AgentRunner
return array_reverse($turns); return array_reverse($turns);
} }
private function extractFirstTestomatModelAnchor(string $text): string private function extractFirstProductModelAnchor(string $text): string
{ {
if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorTestomatModelPattern(), $text, $matches) !== 1) { if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorProductModelPattern(), $text, $matches) !== 1) {
return ''; return '';
} }
@@ -1337,9 +1337,9 @@ final readonly class AgentRunner
return trim(str_replace('®', '', $value)); return trim(str_replace('®', '', $value));
} }
private function extractFirstHardnessValueAnchor(string $text): string private function extractFirstMeasurementValueAnchor(string $text): string
{ {
if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorHardnessValuePattern(), $text, $matches) !== 1) { if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorMeasurementValuePattern(), $text, $matches) !== 1) {
return ''; return '';
} }
@@ -1500,7 +1500,7 @@ final readonly class AgentRunner
return true; return true;
} }
if ($this->extractFirstTestomatModelAnchor($prompt) !== '') { if ($this->extractFirstProductModelAnchor($prompt) !== '') {
return false; return false;
} }
@@ -1564,7 +1564,7 @@ final readonly class AgentRunner
private function hasStandaloneConcreteShopSubject(string $prompt): bool private function hasStandaloneConcreteShopSubject(string $prompt): bool
{ {
if ($this->extractFirstTestomatModelAnchor($prompt) !== '') { if ($this->extractFirstProductModelAnchor($prompt) !== '') {
return true; return true;
} }
@@ -1622,7 +1622,7 @@ final readonly class AgentRunner
return $prompt; return $prompt;
} }
if ($this->extractFirstTestomatModelAnchor($prompt) === '') { if ($this->extractFirstProductModelAnchor($prompt) === '') {
return $optimizedShopQuery; return $optimizedShopQuery;
} }
@@ -2249,7 +2249,7 @@ final readonly class AgentRunner
continue; continue;
} }
$model = $this->extractFirstTestomatModelAnchor($turn); $model = $this->extractFirstProductModelAnchor($turn);
if ($model !== '') { if ($model !== '') {
$query = str_replace( $query = str_replace(
@@ -2334,7 +2334,7 @@ final readonly class AgentRunner
} }
} }
$modelAnchor = $this->extractFirstTestomatModelAnchor($turn); $modelAnchor = $this->extractFirstProductModelAnchor($turn);
if ($modelAnchor !== '' && !$this->isMetaOnlyShopQuery($modelAnchor)) { if ($modelAnchor !== '' && !$this->isMetaOnlyShopQuery($modelAnchor)) {
return mb_strtolower($modelAnchor, 'UTF-8'); return mb_strtolower($modelAnchor, 'UTF-8');

View File

@@ -125,14 +125,34 @@ final class AgentRunnerConfig
return $this->getRequiredString('follow_up_context.history_question_strip_pattern'); return $this->getRequiredString('follow_up_context.history_question_strip_pattern');
} }
public function getFollowUpReferenceAnchorProductModelPattern(): string
{
$value = $this->optionalValue('follow_up_context.reference_anchor.product_model_pattern');
if (is_string($value) && trim($value) !== '') {
return $value;
}
return $this->getRequiredString('follow_up_context.reference_anchor.testomat_model_pattern');
}
public function getFollowUpReferenceAnchorMeasurementValuePattern(): string
{
$value = $this->optionalValue('follow_up_context.reference_anchor.measurement_value_pattern');
if (is_string($value) && trim($value) !== '') {
return $value;
}
return $this->getRequiredString('follow_up_context.reference_anchor.hardness_value_pattern');
}
public function getFollowUpReferenceAnchorTestomatModelPattern(): string public function getFollowUpReferenceAnchorTestomatModelPattern(): string
{ {
return $this->getRequiredString('follow_up_context.reference_anchor.testomat_model_pattern'); return $this->getFollowUpReferenceAnchorProductModelPattern();
} }
public function getFollowUpReferenceAnchorHardnessValuePattern(): string public function getFollowUpReferenceAnchorHardnessValuePattern(): string
{ {
return $this->getRequiredString('follow_up_context.reference_anchor.hardness_value_pattern'); return $this->getFollowUpReferenceAnchorMeasurementValuePattern();
} }

View File

@@ -13,6 +13,7 @@ final class CommerceQueryParserConfig
*/ */
public function __construct( public function __construct(
private readonly array $config = [], private readonly array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) { ) {
} }
@@ -268,7 +269,10 @@ final class CommerceQueryParserConfig
/** @return string[] */ /** @return string[] */
public function getSemanticShopSearchTokens(): array public function getSemanticShopSearchTokens(): array
{ {
return $this->stringList('semantic_shop_search_tokens'); return $this->configuredStringListOrVocabularyView(
'semantic_shop_search_tokens',
'vocabulary_views.semantic_shop_search_tokens'
);
} }
public function buildExactTokenRemovalPattern(string $token): string public function buildExactTokenRemovalPattern(string $token): string
@@ -319,6 +323,27 @@ final class CommerceQueryParserConfig
return $out; return $out;
} }
/** @return string[] */
private function configuredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array
{
if ($this->hasPath($configPath)) {
return $this->stringList($configPath);
}
if ($this->vocabulary === null) {
throw $this->missing($configPath);
}
$viewPath = $this->string($viewPathConfigPath);
$terms = $this->vocabulary->view($viewPath, []);
if ($terms === []) {
throw $this->invalid($viewPathConfigPath, sprintf('references empty vocabulary view "%s"', $viewPath));
}
return $terms;
}
/** @return array<string, string> */ /** @return array<string, string> */
private function stringMap(string $path): array private function stringMap(string $path): array
{ {
@@ -372,6 +397,20 @@ final class CommerceQueryParserConfig
return $value; return $value;
} }
private function hasPath(string $path): bool
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return false;
}
$current = $current[$segment];
}
return true;
}
private function value(string $path): mixed private function value(string $path): mixed
{ {
$current = $this->config; $current = $this->config;

View File

@@ -13,6 +13,7 @@ final class NdjsonHybridRetrieverConfig
*/ */
public function __construct( public function __construct(
private array $config = [], private array $config = [],
private ?DomainVocabularyConfig $vocabulary = null,
) { ) {
} }
@@ -216,55 +217,82 @@ final class NdjsonHybridRetrieverConfig
/** @return string[] */ /** @return string[] */
public function genericProductTokens(): array public function genericProductTokens(): array
{ {
return $this->requiredStringList('generic_product_tokens'); return $this->configuredStringListOrVocabularyView(
'generic_product_tokens',
'vocabulary_views.generic_product_tokens'
);
} }
/** @return string[] */ /** @return string[] */
public function importantShortModelTokens(): array public function importantShortModelTokens(): array
{ {
return $this->requiredStringList('important_short_model_tokens'); return $this->configuredStringListOrVocabularyView(
'important_short_model_tokens',
'vocabulary_views.important_short_model_tokens'
);
} }
/** @return string[] */ /** @return string[] */
public function familyDescriptorTokens(): array public function familyDescriptorTokens(): array
{ {
return $this->requiredStringList('family_descriptor_tokens'); return $this->configuredStringListOrVocabularyView(
'family_descriptor_tokens',
'vocabulary_views.family_descriptor_tokens'
);
} }
/** @return string[] */ /** @return string[] */
public function looksLikeReagentTokens(): array public function looksLikeReagentTokens(): array
{ {
return $this->requiredStringList('looks_like_reagent_tokens'); return $this->configuredStringListOrVocabularyView(
'looks_like_reagent_tokens',
'vocabulary_views.looks_like_reagent_tokens'
);
} }
/** @return string[] */ /** @return string[] */
public function looksLikeSafetyDocs(): array public function looksLikeSafetyDocs(): array
{ {
return $this->requiredStringList('looks_like_safety_docs'); return $this->configuredStringListOrVocabularyView(
'looks_like_safety_docs',
'vocabulary_views.looks_like_safety_docs'
);
} }
/** @return string[] */ /** @return string[] */
public function looksLikeReagentWords(): array public function looksLikeReagentWords(): array
{ {
return $this->requiredStringList('looks_like_reagent_words'); return $this->configuredStringListOrVocabularyView(
'looks_like_reagent_words',
'vocabulary_views.looks_like_reagent_words'
);
} }
/** @return string[] */ /** @return string[] */
public function looksLikeDocumentWords(): array public function looksLikeDocumentWords(): array
{ {
return $this->requiredStringList('looks_like_document_words'); return $this->configuredStringListOrVocabularyView(
'looks_like_document_words',
'vocabulary_views.looks_like_document_words'
);
} }
/** @return string[] */ /** @return string[] */
public function looksLikeSafetyWords(): array public function looksLikeSafetyWords(): array
{ {
return $this->requiredStringList('looks_like_safety_words'); return $this->configuredStringListOrVocabularyView(
'looks_like_safety_words',
'vocabulary_views.looks_like_safety_words'
);
} }
/** @return string[] */ /** @return string[] */
public function looksLikeDeviceWords(): array public function looksLikeDeviceWords(): array
{ {
return $this->requiredStringList('looks_like_device_words'); return $this->configuredStringListOrVocabularyView(
'looks_like_device_words',
'vocabulary_views.looks_like_device_words'
);
} }
/** /**
@@ -471,6 +499,74 @@ final class NdjsonHybridRetrieverConfig
return $out; return $out;
} }
/** @return string[] */
private function configuredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array
{
if ($this->hasKey($configPath)) {
return $this->requiredStringList($configPath);
}
if ($this->vocabulary === null) {
throw $this->missing($configPath);
}
$viewPath = $this->requiredPathString($viewPathConfigPath);
$terms = $this->vocabulary->view($viewPath, []);
if ($terms === []) {
throw $this->invalid($viewPathConfigPath, sprintf('references empty vocabulary view "%s"', $viewPath));
}
return $terms;
}
private function requiredPathString(string $key): string
{
$value = $this->requiredPathValue($key);
if (!is_scalar($value)) {
throw $this->invalid($key, 'must be a non-empty string');
}
$value = trim((string) $value);
if ($value === '') {
throw $this->invalid($key, 'must be a non-empty string');
}
return $value;
}
private function requiredPathValue(string $key): mixed
{
$current = $this->config;
foreach (explode('.', $key) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
throw $this->missing($key);
}
$current = $current[$segment];
}
return $current;
}
private function hasKey(string $key): bool
{
$current = $this->config;
foreach (explode('.', $key) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return false;
}
$current = $current[$segment];
}
return true;
}
private function requiredValue(string $key): mixed private function requiredValue(string $key): mixed
{ {
if (!array_key_exists($key, $this->config)) { if (!array_key_exists($key, $this->config)) {

View File

@@ -11,6 +11,7 @@ final class PromptBuilderConfig
*/ */
public function __construct( public function __construct(
private readonly array $config = [], private readonly array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) { ) {
} }
@@ -159,6 +160,35 @@ final class PromptBuilderConfig
return $out; return $out;
} }
/**
* @return string[]
*/
private function getConfiguredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array
{
if ($this->hasPath($configPath)) {
return $this->getRequiredStringList($configPath);
}
if ($this->vocabulary === null) {
throw new \InvalidArgumentException(sprintf(
'RetrieX prompt config path "%s" is missing and no vocabulary resolver is available.',
$configPath
));
}
$viewPath = $this->getRequiredString($viewPathConfigPath);
$terms = $this->vocabulary->view($viewPath, []);
if ($terms === []) {
throw new \InvalidArgumentException(sprintf(
'RetrieX prompt vocabulary view "%s" resolved to an empty list.',
$viewPath
));
}
return $terms;
}
/** /**
* @return string[] * @return string[]
*/ */
@@ -193,6 +223,21 @@ final class PromptBuilderConfig
private function hasPath(string $path): bool
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return false;
}
$current = $current[$segment];
}
return true;
}
private function getOptionalValue(string $path): mixed private function getOptionalValue(string $path): mixed
{ {
$current = $this->config; $current = $this->config;
@@ -573,7 +618,10 @@ final class PromptBuilderConfig
*/ */
public function getTechnicalProductKeywords(): array public function getTechnicalProductKeywords(): array
{ {
return $this->getRequiredStringList('technical_product_keywords'); return $this->getConfiguredStringListOrVocabularyView(
'technical_product_keywords',
'vocabulary_views.technical_product_keywords'
);
} }
/** /**
@@ -581,7 +629,10 @@ final class PromptBuilderConfig
*/ */
public function getAccessoryRequestKeywords(): array public function getAccessoryRequestKeywords(): array
{ {
return $this->getRequiredStringList('accessory_request_keywords'); return $this->getConfiguredStringListOrVocabularyView(
'accessory_request_keywords',
'vocabulary_views.accessory_request_keywords'
);
} }
public function getMeasurementEvidenceSectionLabel(): string public function getMeasurementEvidenceSectionLabel(): string