p43A
This commit is contained in:
@@ -199,8 +199,8 @@ parameters:
|
||||
previous_reference_anchors_template: 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): {anchors}'
|
||||
current_follow_up_question_template: 'Aktuelle Folgefrage: {question}'
|
||||
reference_anchor:
|
||||
testomat_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
|
||||
hardness_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
|
||||
product_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
|
||||
measurement_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
|
||||
|
||||
messages:
|
||||
empty_prompt: '❌ Empty prompt.'
|
||||
|
||||
@@ -48,7 +48,6 @@ parameters:
|
||||
filter_search_tokens:
|
||||
- preiswerte
|
||||
- lösung
|
||||
- reinigungslösung
|
||||
- größer
|
||||
- welchem
|
||||
- welche
|
||||
@@ -106,49 +105,12 @@ parameters:
|
||||
reagent: reagenz
|
||||
produkte: produkt
|
||||
|
||||
semantic_shop_search_tokens:
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
- zubehör
|
||||
- zubehor
|
||||
- ersatzteil
|
||||
- anschlusskabel
|
||||
- kabel
|
||||
- sensorkabel
|
||||
- elektrodenkabel
|
||||
- verbrauchsmaterial
|
||||
- chemie
|
||||
- indikatorchemie
|
||||
- reagenzchemie
|
||||
- kit
|
||||
- set
|
||||
- filter
|
||||
- pumpe
|
||||
- pumpenkopf
|
||||
- motorblock
|
||||
- lösung
|
||||
- reinigungslösung
|
||||
- reinigungsloesung
|
||||
- clean
|
||||
- loesung
|
||||
- solution
|
||||
- teststreifen
|
||||
- gerät
|
||||
- geraet
|
||||
- messgerät
|
||||
- messgeraet
|
||||
- analysegerät
|
||||
- analysegeraet
|
||||
- analysator
|
||||
- monitor
|
||||
- controller
|
||||
- system
|
||||
vocabulary_views:
|
||||
semantic_shop_search_tokens: shop.semantic_search_tokens
|
||||
|
||||
normalization:
|
||||
search: ['€','euro']
|
||||
replace: [' EUR ']
|
||||
search: ['€']
|
||||
replace: [' euro ']
|
||||
|
||||
text:
|
||||
trim_characters:
|
||||
|
||||
@@ -53,80 +53,10 @@ parameters:
|
||||
role_compatibility_label: Role compatibility with request
|
||||
role_incompatible_commercial_suppression_note: 'Commercial fields suppressed: this shop record is not a matching main-device result for the requested product role.'
|
||||
technical_product_keyword_match_threshold: 2
|
||||
technical_product_keywords:
|
||||
- technisch
|
||||
- technical
|
||||
- produkt
|
||||
- product
|
||||
- gerät
|
||||
- device
|
||||
- modell
|
||||
- model
|
||||
- messprinzip
|
||||
- measurement principle
|
||||
- schnittstelle
|
||||
- interface
|
||||
- relais
|
||||
- relay
|
||||
- indikator
|
||||
- indicator
|
||||
- grenzwert
|
||||
- threshold
|
||||
- messbereich
|
||||
- measurement range
|
||||
- gemessen
|
||||
- measured
|
||||
- minimaler
|
||||
- minimum
|
||||
- resthärte
|
||||
- resthaerte
|
||||
- °dh
|
||||
- dh
|
||||
- spannung
|
||||
- voltage
|
||||
- strom
|
||||
- current
|
||||
- druck
|
||||
- pressure
|
||||
- temperatur
|
||||
- temperature
|
||||
- schutzart
|
||||
- ip
|
||||
- fehlercode
|
||||
- error code
|
||||
- wasserhärte
|
||||
- hardness
|
||||
- testomat
|
||||
- chlor
|
||||
- chlormessung
|
||||
accessory_request_keywords:
|
||||
- passend
|
||||
- passende
|
||||
- passendes
|
||||
- zubehör
|
||||
- zubehor
|
||||
- dazu
|
||||
- indikator
|
||||
- indikatoren
|
||||
- ph-indikator
|
||||
- ph indikator
|
||||
- ph-indikatoren
|
||||
- ph indikatoren
|
||||
- reagenz
|
||||
- kit
|
||||
- set
|
||||
- zusatz
|
||||
- ergänzung
|
||||
- ergaenzung
|
||||
- anschlusskabel
|
||||
- kabel
|
||||
- sensorkabel
|
||||
- elektrodenkabel
|
||||
- elektrode
|
||||
- puffer
|
||||
- kalibrierpuffer
|
||||
- kalibrierlösung
|
||||
- kalibrierloesung
|
||||
vocabulary_views:
|
||||
technical_product_keywords: prompt.technical_product_keywords
|
||||
accessory_request_keywords: prompt.accessory_request_keywords
|
||||
|
||||
sections:
|
||||
system_label: SYSTEM
|
||||
user_question_label: USER QUESTION
|
||||
|
||||
@@ -105,143 +105,16 @@ parameters:
|
||||
generic_exact_selection_tokens:
|
||||
- keine
|
||||
- welche
|
||||
generic_product_tokens:
|
||||
- produkt
|
||||
- produkte
|
||||
- produktkarte
|
||||
- titel
|
||||
- geraet
|
||||
- gerät
|
||||
- messgeraet
|
||||
- messgerät
|
||||
- wasser
|
||||
- haerte
|
||||
- härte
|
||||
- resthaerte
|
||||
- resthärte
|
||||
- analyse
|
||||
- analysator
|
||||
- automat
|
||||
- online
|
||||
- messung
|
||||
- messen
|
||||
- preis
|
||||
- preise
|
||||
- kosten
|
||||
- info
|
||||
- infos
|
||||
- passend
|
||||
- richtige
|
||||
- richtiges
|
||||
- geeignet
|
||||
- geeignete
|
||||
- welche
|
||||
- welcher
|
||||
- welches
|
||||
- brauche
|
||||
- suche
|
||||
important_short_model_tokens:
|
||||
- th
|
||||
- tc
|
||||
- tp
|
||||
- tm
|
||||
- ph
|
||||
- rx
|
||||
family_descriptor_tokens:
|
||||
- evo
|
||||
- eco
|
||||
- self
|
||||
- clean
|
||||
- mini
|
||||
- pro
|
||||
- plus
|
||||
- basic
|
||||
- lab
|
||||
- inline
|
||||
- compact
|
||||
- panel
|
||||
- sc
|
||||
looks_like_reagent_tokens:
|
||||
- indikator
|
||||
- reagenz
|
||||
- reagenz
|
||||
- laborchemikalie
|
||||
- chemikalie
|
||||
- sicherheitsdatenblatt
|
||||
- sdb
|
||||
- msds
|
||||
- ufi
|
||||
- gebinde
|
||||
- flasche
|
||||
- ersatzteil
|
||||
- zubehoer
|
||||
- zubehör
|
||||
- service set
|
||||
- filtereinsatz
|
||||
- kerzenfilter
|
||||
- druckregler
|
||||
- ph
|
||||
looks_like_safety_docs:
|
||||
- sicherheitsdatenblatt
|
||||
- sdb
|
||||
- msds
|
||||
- gefahrenbewertung
|
||||
- gefahrenpiktogramm
|
||||
- signalwort
|
||||
- lagerung
|
||||
- transport
|
||||
- clp
|
||||
- kennzeichnung
|
||||
- h290
|
||||
- pbt
|
||||
- vpvb
|
||||
looks_like_reagent_words:
|
||||
- indikator
|
||||
- reagenz
|
||||
- reagens
|
||||
- chemie
|
||||
- chemikalie
|
||||
- sdb
|
||||
- sicherheitsdatenblatt
|
||||
- msds
|
||||
- flasche
|
||||
- gebinde
|
||||
looks_like_document_words:
|
||||
- datenblatt
|
||||
- dokument
|
||||
- pdf
|
||||
- handbuch
|
||||
- manual
|
||||
- beschreibung
|
||||
- sdb
|
||||
- sicherheitsdatenblatt
|
||||
- msds
|
||||
looks_like_safety_words:
|
||||
- gefahr
|
||||
- gefahrgut
|
||||
- clp
|
||||
- h290
|
||||
- sicherheit
|
||||
- kennzeichnung
|
||||
- transport
|
||||
- lagerung
|
||||
- piktogramm
|
||||
looks_like_device_words:
|
||||
- geraet
|
||||
- gerät
|
||||
- messgeraet
|
||||
- messgerät
|
||||
- analysator
|
||||
- automat
|
||||
- messung
|
||||
- messen
|
||||
- ueberwachung
|
||||
- überwachung
|
||||
- online
|
||||
- monitor
|
||||
- modell
|
||||
- analysegerät
|
||||
- tester
|
||||
vocabulary_views:
|
||||
generic_product_tokens: retrieval.generic_product_tokens
|
||||
important_short_model_tokens: retrieval.important_short_model_tokens
|
||||
family_descriptor_tokens: retrieval.family_descriptor_tokens
|
||||
looks_like_reagent_tokens: retrieval.looks_like_reagent_tokens
|
||||
looks_like_safety_docs: retrieval.looks_like_safety_docs
|
||||
looks_like_reagent_words: retrieval.looks_like_reagent_words
|
||||
looks_like_document_words: retrieval.looks_like_document_words
|
||||
looks_like_safety_words: retrieval.looks_like_safety_words
|
||||
looks_like_device_words: retrieval.looks_like_device_words
|
||||
|
||||
# Vocabulary-backed retrieval token lists live in config/retriex/vocabulary.yaml.
|
||||
# The old per-key entries may still be added here to override a specific view.
|
||||
|
||||
@@ -203,6 +203,43 @@ parameters:
|
||||
- filter
|
||||
- pumpenkopf
|
||||
- motorblock
|
||||
semantic_search_tokens:
|
||||
add:
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
- zubehör
|
||||
- zubehor
|
||||
- ersatzteil
|
||||
- anschlusskabel
|
||||
- kabel
|
||||
- sensorkabel
|
||||
- elektrodenkabel
|
||||
- verbrauchsmaterial
|
||||
- chemie
|
||||
- indikatorchemie
|
||||
- reagenzchemie
|
||||
- kit
|
||||
- set
|
||||
- filter
|
||||
- pumpe
|
||||
- pumpenkopf
|
||||
- motorblock
|
||||
- lösung
|
||||
- loesung
|
||||
- solution
|
||||
- teststreifen
|
||||
- gerät
|
||||
- geraet
|
||||
- messgerät
|
||||
- messgeraet
|
||||
- analysegerät
|
||||
- analysegeraet
|
||||
- analysator
|
||||
- monitor
|
||||
- controller
|
||||
- system
|
||||
retrieval:
|
||||
generic_product_tokens:
|
||||
add:
|
||||
@@ -267,7 +304,6 @@ parameters:
|
||||
add:
|
||||
- indikator
|
||||
- reagenz
|
||||
- reagens
|
||||
- laborchemikalie
|
||||
- chemikalie
|
||||
- sicherheitsdatenblatt
|
||||
@@ -283,6 +319,7 @@ parameters:
|
||||
- filtereinsatz
|
||||
- kerzenfilter
|
||||
- druckregler
|
||||
- ph
|
||||
looks_like_safety_docs:
|
||||
add:
|
||||
- sicherheitsdatenblatt
|
||||
@@ -346,6 +383,9 @@ parameters:
|
||||
- überwachung
|
||||
- online
|
||||
- monitor
|
||||
- modell
|
||||
- analysegerät
|
||||
- tester
|
||||
search_repair:
|
||||
generic_candidate_tokens:
|
||||
add:
|
||||
@@ -466,12 +506,26 @@ parameters:
|
||||
- zubehor
|
||||
- dazu
|
||||
- indikator
|
||||
- indikatoren
|
||||
- ph-indikator
|
||||
- ph indikator
|
||||
- ph-indikatoren
|
||||
- ph indikatoren
|
||||
- reagenz
|
||||
- kit
|
||||
- set
|
||||
- zusatz
|
||||
- ergänzung
|
||||
- ergaenzung
|
||||
- anschlusskabel
|
||||
- kabel
|
||||
- sensorkabel
|
||||
- elektrodenkabel
|
||||
- elektrode
|
||||
- puffer
|
||||
- kalibrierpuffer
|
||||
- kalibrierlösung
|
||||
- kalibrierloesung
|
||||
maps:
|
||||
shop:
|
||||
accessory_focus_variants:
|
||||
|
||||
@@ -131,6 +131,7 @@ services:
|
||||
App\Config\PromptBuilderConfig:
|
||||
arguments:
|
||||
$config: '%retriex.prompt.config%'
|
||||
$vocabulary: '@App\Config\DomainVocabularyConfig'
|
||||
|
||||
App\Config\AgentRunnerConfig:
|
||||
arguments:
|
||||
@@ -139,6 +140,7 @@ services:
|
||||
App\Config\NdjsonHybridRetrieverConfig:
|
||||
arguments:
|
||||
$config: '%retriex.retrieval.config%'
|
||||
$vocabulary: '@App\Config\DomainVocabularyConfig'
|
||||
|
||||
App\Config\StopWordsConfig:
|
||||
arguments:
|
||||
@@ -206,6 +208,7 @@ services:
|
||||
App\Config\CommerceQueryParserConfig:
|
||||
arguments:
|
||||
$config: '%retriex.commerce_query.config%'
|
||||
$vocabulary: '@App\Config\DomainVocabularyConfig'
|
||||
|
||||
App\Config\CommerceReferenceResolverConfig:
|
||||
arguments:
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
# RetrieX Patch 43A - Config Reduction / Generic Flow Prep
|
||||
|
||||
## Goal
|
||||
|
||||
Reduce the number of actively duplicated YAML parameters without changing the proven runtime values or introducing an admin UI.
|
||||
|
||||
This patch intentionally does **not** change scoring, ranking, retrieval thresholds, prompt guardrails, or shop matching behavior. It only moves already existing duplicate term lists behind central vocabulary views and renames one follow-up-anchor concept from product-specific names to generic names.
|
||||
|
||||
## Why this is split out
|
||||
|
||||
The larger cleanup should not be delivered as one large patch because it would mix three risk classes:
|
||||
|
||||
1. Safe config deduplication and generic naming.
|
||||
2. Shared product-role resolver logic.
|
||||
3. More generic domain anchor extraction beyond the current Testomat / hardness use case.
|
||||
|
||||
Patch 43A covers only class 1.
|
||||
|
||||
## Changes
|
||||
|
||||
### YAML reduction
|
||||
|
||||
The following direct per-service lists were removed from local service config files and are now resolved through `config/retriex/vocabulary.yaml` views:
|
||||
|
||||
- `prompt.yaml`
|
||||
- `technical_product_keywords`
|
||||
- `accessory_request_keywords`
|
||||
- `retrieval.yaml`
|
||||
- `generic_product_tokens`
|
||||
- `important_short_model_tokens`
|
||||
- `family_descriptor_tokens`
|
||||
- `looks_like_reagent_tokens`
|
||||
- `looks_like_safety_docs`
|
||||
- `looks_like_reagent_words`
|
||||
- `looks_like_document_words`
|
||||
- `looks_like_safety_words`
|
||||
- `looks_like_device_words`
|
||||
- `commerce.yaml`
|
||||
- `semantic_shop_search_tokens`
|
||||
|
||||
The removed local lists are referenced through new `vocabulary_views` mappings.
|
||||
|
||||
### Vocabulary updates
|
||||
|
||||
`vocabulary.yaml` now contains the exact effective legacy values for the moved lists, including the previously local prompt accessory keywords and shop semantic search terms.
|
||||
|
||||
### PHP config facade changes
|
||||
|
||||
These config classes can now resolve either a direct local override or a central vocabulary view:
|
||||
|
||||
- `PromptBuilderConfig`
|
||||
- `NdjsonHybridRetrieverConfig`
|
||||
- `CommerceQueryParserConfig`
|
||||
|
||||
Direct local lists remain backward-compatible. If a project later needs a local override, the old list key can still be added back to the service-specific YAML.
|
||||
|
||||
### Generic follow-up anchor naming
|
||||
|
||||
The follow-up anchor names were made generic:
|
||||
|
||||
- `testomat_model_pattern` -> `product_model_pattern`
|
||||
- `hardness_value_pattern` -> `measurement_value_pattern`
|
||||
- `extractFirstTestomatModelAnchor()` -> `extractFirstProductModelAnchor()`
|
||||
- `extractFirstHardnessValueAnchor()` -> `extractFirstMeasurementValueAnchor()`
|
||||
|
||||
Backward-compatible accessor aliases remain in `AgentRunnerConfig`.
|
||||
|
||||
## Behavior impact
|
||||
|
||||
Expected runtime behavior: unchanged.
|
||||
|
||||
A local equivalence check compared all moved lists against the current `rag-inprogress.zip` source values. The moved vocabulary views resolve to the same effective values as before, accounting for the existing de-duplication behavior in the PHP config facades.
|
||||
|
||||
## Checks run locally
|
||||
|
||||
Successful:
|
||||
|
||||
```bash
|
||||
php -l src/Config/PromptBuilderConfig.php
|
||||
php -l src/Config/NdjsonHybridRetrieverConfig.php
|
||||
php -l src/Config/CommerceQueryParserConfig.php
|
||||
php -l src/Config/AgentRunnerConfig.php
|
||||
php -l src/Agent/AgentRunner.php
|
||||
```
|
||||
|
||||
Successful custom checks:
|
||||
|
||||
- edited YAML files parse successfully
|
||||
- moved vocabulary lists equal previous effective values
|
||||
|
||||
Not executable in this container:
|
||||
|
||||
```bash
|
||||
php bin/console mto:agent:config:validate
|
||||
php bin/console mto:agent:regression:test
|
||||
php bin/console mto:agent:config:audit-source --details
|
||||
php bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
|
||||
Reason: the uploaded ZIP does not contain `vendor/`, and Composer installation could not complete in the container because required PHP extensions are missing (`curl`, `dom`, `sqlite3`, `xml`) and external package downloads are not available.
|
||||
|
||||
## Required checks after applying in the project environment
|
||||
|
||||
```bash
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
|
||||
## Recommended follow-up patches
|
||||
|
||||
### p43B - Shared ProductRoleResolver
|
||||
|
||||
Centralize product role detection (`main_product`, `accessory`, `consumable`, `spare_part`, `unknown`) so PromptBuilder, ShopSearchService, SearchRepairService and AgentRunner do not maintain parallel role checks.
|
||||
|
||||
### p43C - Generic Domain Anchor Extraction
|
||||
|
||||
Make the current product-model and measurement-value anchor extraction more domain-generic while preserving the existing Testomat / °dH patterns as configured values.
|
||||
@@ -1238,7 +1238,7 @@ final readonly class AgentRunner
|
||||
* These anchors are only used to resolve follow-up references such as
|
||||
* "der Wert" or "welcher Indikator". They are not factual evidence for
|
||||
* the final answer. To avoid propagating wrong earlier answers, only the
|
||||
* first explicit Testomat model reference and the first explicit °dH value
|
||||
* first explicit product-model reference and the first explicit measurement value
|
||||
* are kept. Indicator names, reagent codes, prices, URLs and product
|
||||
* numbers are intentionally ignored here.
|
||||
*
|
||||
@@ -1261,12 +1261,12 @@ final readonly class AgentRunner
|
||||
|
||||
$anchors = [];
|
||||
|
||||
$model = $this->extractFirstTestomatModelAnchor($answer);
|
||||
$model = $this->extractFirstProductModelAnchor($answer);
|
||||
if ($model !== '') {
|
||||
$anchors[] = $model;
|
||||
}
|
||||
|
||||
$hardnessValue = $this->extractFirstHardnessValueAnchor($answer);
|
||||
$hardnessValue = $this->extractFirstMeasurementValueAnchor($answer);
|
||||
if ($hardnessValue !== '') {
|
||||
$anchors[] = $hardnessValue;
|
||||
}
|
||||
@@ -1325,9 +1325,9 @@ final readonly class AgentRunner
|
||||
return array_reverse($turns);
|
||||
}
|
||||
|
||||
private function extractFirstTestomatModelAnchor(string $text): string
|
||||
private function extractFirstProductModelAnchor(string $text): string
|
||||
{
|
||||
if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorTestomatModelPattern(), $text, $matches) !== 1) {
|
||||
if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorProductModelPattern(), $text, $matches) !== 1) {
|
||||
return '';
|
||||
}
|
||||
|
||||
@@ -1337,9 +1337,9 @@ final readonly class AgentRunner
|
||||
return trim(str_replace('®', '', $value));
|
||||
}
|
||||
|
||||
private function extractFirstHardnessValueAnchor(string $text): string
|
||||
private function extractFirstMeasurementValueAnchor(string $text): string
|
||||
{
|
||||
if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorHardnessValuePattern(), $text, $matches) !== 1) {
|
||||
if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorMeasurementValuePattern(), $text, $matches) !== 1) {
|
||||
return '';
|
||||
}
|
||||
|
||||
@@ -1500,7 +1500,7 @@ final readonly class AgentRunner
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($this->extractFirstTestomatModelAnchor($prompt) !== '') {
|
||||
if ($this->extractFirstProductModelAnchor($prompt) !== '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1564,7 +1564,7 @@ final readonly class AgentRunner
|
||||
|
||||
private function hasStandaloneConcreteShopSubject(string $prompt): bool
|
||||
{
|
||||
if ($this->extractFirstTestomatModelAnchor($prompt) !== '') {
|
||||
if ($this->extractFirstProductModelAnchor($prompt) !== '') {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1622,7 +1622,7 @@ final readonly class AgentRunner
|
||||
return $prompt;
|
||||
}
|
||||
|
||||
if ($this->extractFirstTestomatModelAnchor($prompt) === '') {
|
||||
if ($this->extractFirstProductModelAnchor($prompt) === '') {
|
||||
return $optimizedShopQuery;
|
||||
}
|
||||
|
||||
@@ -2249,7 +2249,7 @@ final readonly class AgentRunner
|
||||
continue;
|
||||
}
|
||||
|
||||
$model = $this->extractFirstTestomatModelAnchor($turn);
|
||||
$model = $this->extractFirstProductModelAnchor($turn);
|
||||
|
||||
if ($model !== '') {
|
||||
$query = str_replace(
|
||||
@@ -2334,7 +2334,7 @@ final readonly class AgentRunner
|
||||
}
|
||||
}
|
||||
|
||||
$modelAnchor = $this->extractFirstTestomatModelAnchor($turn);
|
||||
$modelAnchor = $this->extractFirstProductModelAnchor($turn);
|
||||
|
||||
if ($modelAnchor !== '' && !$this->isMetaOnlyShopQuery($modelAnchor)) {
|
||||
return mb_strtolower($modelAnchor, 'UTF-8');
|
||||
|
||||
@@ -125,14 +125,34 @@ final class AgentRunnerConfig
|
||||
return $this->getRequiredString('follow_up_context.history_question_strip_pattern');
|
||||
}
|
||||
|
||||
public function getFollowUpReferenceAnchorProductModelPattern(): string
|
||||
{
|
||||
$value = $this->optionalValue('follow_up_context.reference_anchor.product_model_pattern');
|
||||
if (is_string($value) && trim($value) !== '') {
|
||||
return $value;
|
||||
}
|
||||
|
||||
return $this->getRequiredString('follow_up_context.reference_anchor.testomat_model_pattern');
|
||||
}
|
||||
|
||||
public function getFollowUpReferenceAnchorMeasurementValuePattern(): string
|
||||
{
|
||||
$value = $this->optionalValue('follow_up_context.reference_anchor.measurement_value_pattern');
|
||||
if (is_string($value) && trim($value) !== '') {
|
||||
return $value;
|
||||
}
|
||||
|
||||
return $this->getRequiredString('follow_up_context.reference_anchor.hardness_value_pattern');
|
||||
}
|
||||
|
||||
public function getFollowUpReferenceAnchorTestomatModelPattern(): string
|
||||
{
|
||||
return $this->getRequiredString('follow_up_context.reference_anchor.testomat_model_pattern');
|
||||
return $this->getFollowUpReferenceAnchorProductModelPattern();
|
||||
}
|
||||
|
||||
public function getFollowUpReferenceAnchorHardnessValuePattern(): string
|
||||
{
|
||||
return $this->getRequiredString('follow_up_context.reference_anchor.hardness_value_pattern');
|
||||
return $this->getFollowUpReferenceAnchorMeasurementValuePattern();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ final class CommerceQueryParserConfig
|
||||
*/
|
||||
public function __construct(
|
||||
private readonly array $config = [],
|
||||
private readonly ?DomainVocabularyConfig $vocabulary = null,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -268,7 +269,10 @@ final class CommerceQueryParserConfig
|
||||
/** @return string[] */
|
||||
public function getSemanticShopSearchTokens(): array
|
||||
{
|
||||
return $this->stringList('semantic_shop_search_tokens');
|
||||
return $this->configuredStringListOrVocabularyView(
|
||||
'semantic_shop_search_tokens',
|
||||
'vocabulary_views.semantic_shop_search_tokens'
|
||||
);
|
||||
}
|
||||
|
||||
public function buildExactTokenRemovalPattern(string $token): string
|
||||
@@ -319,6 +323,27 @@ final class CommerceQueryParserConfig
|
||||
return $out;
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
private function configuredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array
|
||||
{
|
||||
if ($this->hasPath($configPath)) {
|
||||
return $this->stringList($configPath);
|
||||
}
|
||||
|
||||
if ($this->vocabulary === null) {
|
||||
throw $this->missing($configPath);
|
||||
}
|
||||
|
||||
$viewPath = $this->string($viewPathConfigPath);
|
||||
$terms = $this->vocabulary->view($viewPath, []);
|
||||
|
||||
if ($terms === []) {
|
||||
throw $this->invalid($viewPathConfigPath, sprintf('references empty vocabulary view "%s"', $viewPath));
|
||||
}
|
||||
|
||||
return $terms;
|
||||
}
|
||||
|
||||
/** @return array<string, string> */
|
||||
private function stringMap(string $path): array
|
||||
{
|
||||
@@ -372,6 +397,20 @@ final class CommerceQueryParserConfig
|
||||
return $value;
|
||||
}
|
||||
|
||||
private function hasPath(string $path): bool
|
||||
{
|
||||
$current = $this->config;
|
||||
foreach (explode('.', $path) as $segment) {
|
||||
if (!is_array($current) || !array_key_exists($segment, $current)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$current = $current[$segment];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function value(string $path): mixed
|
||||
{
|
||||
$current = $this->config;
|
||||
|
||||
@@ -13,6 +13,7 @@ final class NdjsonHybridRetrieverConfig
|
||||
*/
|
||||
public function __construct(
|
||||
private array $config = [],
|
||||
private ?DomainVocabularyConfig $vocabulary = null,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -216,55 +217,82 @@ final class NdjsonHybridRetrieverConfig
|
||||
/** @return string[] */
|
||||
public function genericProductTokens(): array
|
||||
{
|
||||
return $this->requiredStringList('generic_product_tokens');
|
||||
return $this->configuredStringListOrVocabularyView(
|
||||
'generic_product_tokens',
|
||||
'vocabulary_views.generic_product_tokens'
|
||||
);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function importantShortModelTokens(): array
|
||||
{
|
||||
return $this->requiredStringList('important_short_model_tokens');
|
||||
return $this->configuredStringListOrVocabularyView(
|
||||
'important_short_model_tokens',
|
||||
'vocabulary_views.important_short_model_tokens'
|
||||
);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function familyDescriptorTokens(): array
|
||||
{
|
||||
return $this->requiredStringList('family_descriptor_tokens');
|
||||
return $this->configuredStringListOrVocabularyView(
|
||||
'family_descriptor_tokens',
|
||||
'vocabulary_views.family_descriptor_tokens'
|
||||
);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function looksLikeReagentTokens(): array
|
||||
{
|
||||
return $this->requiredStringList('looks_like_reagent_tokens');
|
||||
return $this->configuredStringListOrVocabularyView(
|
||||
'looks_like_reagent_tokens',
|
||||
'vocabulary_views.looks_like_reagent_tokens'
|
||||
);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function looksLikeSafetyDocs(): array
|
||||
{
|
||||
return $this->requiredStringList('looks_like_safety_docs');
|
||||
return $this->configuredStringListOrVocabularyView(
|
||||
'looks_like_safety_docs',
|
||||
'vocabulary_views.looks_like_safety_docs'
|
||||
);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function looksLikeReagentWords(): array
|
||||
{
|
||||
return $this->requiredStringList('looks_like_reagent_words');
|
||||
return $this->configuredStringListOrVocabularyView(
|
||||
'looks_like_reagent_words',
|
||||
'vocabulary_views.looks_like_reagent_words'
|
||||
);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function looksLikeDocumentWords(): array
|
||||
{
|
||||
return $this->requiredStringList('looks_like_document_words');
|
||||
return $this->configuredStringListOrVocabularyView(
|
||||
'looks_like_document_words',
|
||||
'vocabulary_views.looks_like_document_words'
|
||||
);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function looksLikeSafetyWords(): array
|
||||
{
|
||||
return $this->requiredStringList('looks_like_safety_words');
|
||||
return $this->configuredStringListOrVocabularyView(
|
||||
'looks_like_safety_words',
|
||||
'vocabulary_views.looks_like_safety_words'
|
||||
);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function looksLikeDeviceWords(): array
|
||||
{
|
||||
return $this->requiredStringList('looks_like_device_words');
|
||||
return $this->configuredStringListOrVocabularyView(
|
||||
'looks_like_device_words',
|
||||
'vocabulary_views.looks_like_device_words'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -471,6 +499,74 @@ final class NdjsonHybridRetrieverConfig
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
/** @return string[] */
|
||||
private function configuredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array
|
||||
{
|
||||
if ($this->hasKey($configPath)) {
|
||||
return $this->requiredStringList($configPath);
|
||||
}
|
||||
|
||||
if ($this->vocabulary === null) {
|
||||
throw $this->missing($configPath);
|
||||
}
|
||||
|
||||
$viewPath = $this->requiredPathString($viewPathConfigPath);
|
||||
$terms = $this->vocabulary->view($viewPath, []);
|
||||
|
||||
if ($terms === []) {
|
||||
throw $this->invalid($viewPathConfigPath, sprintf('references empty vocabulary view "%s"', $viewPath));
|
||||
}
|
||||
|
||||
return $terms;
|
||||
}
|
||||
|
||||
private function requiredPathString(string $key): string
|
||||
{
|
||||
$value = $this->requiredPathValue($key);
|
||||
|
||||
if (!is_scalar($value)) {
|
||||
throw $this->invalid($key, 'must be a non-empty string');
|
||||
}
|
||||
|
||||
$value = trim((string) $value);
|
||||
if ($value === '') {
|
||||
throw $this->invalid($key, 'must be a non-empty string');
|
||||
}
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
private function requiredPathValue(string $key): mixed
|
||||
{
|
||||
$current = $this->config;
|
||||
|
||||
foreach (explode('.', $key) as $segment) {
|
||||
if (!is_array($current) || !array_key_exists($segment, $current)) {
|
||||
throw $this->missing($key);
|
||||
}
|
||||
|
||||
$current = $current[$segment];
|
||||
}
|
||||
|
||||
return $current;
|
||||
}
|
||||
|
||||
private function hasKey(string $key): bool
|
||||
{
|
||||
$current = $this->config;
|
||||
|
||||
foreach (explode('.', $key) as $segment) {
|
||||
if (!is_array($current) || !array_key_exists($segment, $current)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$current = $current[$segment];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function requiredValue(string $key): mixed
|
||||
{
|
||||
if (!array_key_exists($key, $this->config)) {
|
||||
|
||||
@@ -11,6 +11,7 @@ final class PromptBuilderConfig
|
||||
*/
|
||||
public function __construct(
|
||||
private readonly array $config = [],
|
||||
private readonly ?DomainVocabularyConfig $vocabulary = null,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -159,6 +160,35 @@ final class PromptBuilderConfig
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function getConfiguredStringListOrVocabularyView(string $configPath, string $viewPathConfigPath): array
|
||||
{
|
||||
if ($this->hasPath($configPath)) {
|
||||
return $this->getRequiredStringList($configPath);
|
||||
}
|
||||
|
||||
if ($this->vocabulary === null) {
|
||||
throw new \InvalidArgumentException(sprintf(
|
||||
'RetrieX prompt config path "%s" is missing and no vocabulary resolver is available.',
|
||||
$configPath
|
||||
));
|
||||
}
|
||||
|
||||
$viewPath = $this->getRequiredString($viewPathConfigPath);
|
||||
$terms = $this->vocabulary->view($viewPath, []);
|
||||
|
||||
if ($terms === []) {
|
||||
throw new \InvalidArgumentException(sprintf(
|
||||
'RetrieX prompt vocabulary view "%s" resolved to an empty list.',
|
||||
$viewPath
|
||||
));
|
||||
}
|
||||
|
||||
return $terms;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
@@ -193,6 +223,21 @@ final class PromptBuilderConfig
|
||||
|
||||
|
||||
|
||||
private function hasPath(string $path): bool
|
||||
{
|
||||
$current = $this->config;
|
||||
|
||||
foreach (explode('.', $path) as $segment) {
|
||||
if (!is_array($current) || !array_key_exists($segment, $current)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$current = $current[$segment];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function getOptionalValue(string $path): mixed
|
||||
{
|
||||
$current = $this->config;
|
||||
@@ -573,7 +618,10 @@ final class PromptBuilderConfig
|
||||
*/
|
||||
public function getTechnicalProductKeywords(): array
|
||||
{
|
||||
return $this->getRequiredStringList('technical_product_keywords');
|
||||
return $this->getConfiguredStringListOrVocabularyView(
|
||||
'technical_product_keywords',
|
||||
'vocabulary_views.technical_product_keywords'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -581,7 +629,10 @@ final class PromptBuilderConfig
|
||||
*/
|
||||
public function getAccessoryRequestKeywords(): array
|
||||
{
|
||||
return $this->getRequiredStringList('accessory_request_keywords');
|
||||
return $this->getConfiguredStringListOrVocabularyView(
|
||||
'accessory_request_keywords',
|
||||
'vocabulary_views.accessory_request_keywords'
|
||||
);
|
||||
}
|
||||
|
||||
public function getMeasurementEvidenceSectionLabel(): string
|
||||
|
||||
Reference in New Issue
Block a user