last step
This commit is contained in:
@@ -11,6 +11,215 @@ parameters:
|
||||
retriex.commerce.search_repair.max_queries: 2
|
||||
retriex.commerce.search_repair.min_primary_results_without_repair: 2
|
||||
|
||||
# Commerce query parser configuration.
|
||||
# YAML is the only operative source of truth; PHP must not contain parser defaults.
|
||||
retriex.commerce_query.config:
|
||||
known_brands:
|
||||
- heyl
|
||||
- horiba
|
||||
- neomeris
|
||||
|
||||
phrases_to_remove:
|
||||
- ich suche
|
||||
- suche
|
||||
- habt ihr
|
||||
- gibt es
|
||||
- gebe mir
|
||||
- gib mir
|
||||
- zeige mir
|
||||
- welches gerät
|
||||
- welche gerät
|
||||
- welches modell
|
||||
- welches ist besser
|
||||
- welches ist am besten
|
||||
- alternative
|
||||
- alternativen
|
||||
- unter anderem
|
||||
- u a
|
||||
- welche
|
||||
- welcher
|
||||
- welches
|
||||
- welchen
|
||||
- sind
|
||||
- ist
|
||||
- geeignet
|
||||
- geeigent
|
||||
- verfügbarkeit
|
||||
- verfuegbarkeit
|
||||
|
||||
filter_search_tokens:
|
||||
- auch
|
||||
- noch
|
||||
- nochmal
|
||||
- zusätzlich
|
||||
- dazu
|
||||
- davon
|
||||
- stattdessen
|
||||
- bitte
|
||||
- gern
|
||||
- gerne
|
||||
- zeige
|
||||
- zeig
|
||||
- such
|
||||
- suche
|
||||
- finde
|
||||
- find
|
||||
- mir
|
||||
- mal
|
||||
- von
|
||||
- im
|
||||
- in
|
||||
- für
|
||||
- fuer
|
||||
- welche
|
||||
- welcher
|
||||
- welches
|
||||
- welchen
|
||||
- sind
|
||||
- ist
|
||||
- geeignet
|
||||
- geeigent
|
||||
- verfügbarkeit
|
||||
- verfuegbarkeit
|
||||
- prüfe
|
||||
- pruefe
|
||||
- den
|
||||
- die
|
||||
- das
|
||||
- der
|
||||
- dem
|
||||
- des
|
||||
- und
|
||||
- oder
|
||||
- sowie
|
||||
- seine
|
||||
- seinen
|
||||
- seiner
|
||||
- seinem
|
||||
- seines
|
||||
- siene
|
||||
- sienen
|
||||
- siener
|
||||
- sienem
|
||||
- sienes
|
||||
- gebe
|
||||
- gib
|
||||
- nenne
|
||||
- nenn
|
||||
- preis
|
||||
- preise
|
||||
- preisen
|
||||
- kostet
|
||||
- kosten
|
||||
- ua
|
||||
- also
|
||||
- gut
|
||||
- gute
|
||||
- guten
|
||||
- guter
|
||||
- gutes
|
||||
- passen
|
||||
- passend
|
||||
|
||||
search_token_corrections:
|
||||
siene: seine
|
||||
sienen: seinen
|
||||
siener: seiner
|
||||
sienem: seinem
|
||||
sienes: seines
|
||||
indicatoren: indikatoren
|
||||
|
||||
search_token_canonical_map:
|
||||
indikatoren: indikator
|
||||
indicators: indikator
|
||||
indicator: indikator
|
||||
reagenzien: reagenz
|
||||
reagents: reagenz
|
||||
reagent: reagenz
|
||||
produkte: produkt
|
||||
|
||||
semantic_shop_search_tokens:
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
- zubehör
|
||||
- zubehor
|
||||
- ersatzteil
|
||||
- verbrauchsmaterial
|
||||
- chemie
|
||||
- indikatorchemie
|
||||
- reagenzchemie
|
||||
- kit
|
||||
- set
|
||||
- filter
|
||||
- pumpe
|
||||
- pumpenkopf
|
||||
- motorblock
|
||||
- lösung
|
||||
- loesung
|
||||
- solution
|
||||
- teststreifen
|
||||
- gerät
|
||||
- geraet
|
||||
- messgerät
|
||||
- messgeraet
|
||||
- analysegerät
|
||||
- analysegeraet
|
||||
- analysator
|
||||
- monitor
|
||||
- controller
|
||||
- system
|
||||
|
||||
normalization:
|
||||
search: ['€']
|
||||
replace: [' euro ']
|
||||
|
||||
text:
|
||||
trim_characters:
|
||||
- space
|
||||
- tab
|
||||
- lf
|
||||
- cr
|
||||
- nul
|
||||
- vertical_tab
|
||||
- '-'
|
||||
- '.'
|
||||
- ','
|
||||
|
||||
limits:
|
||||
min_search_token_length: 1
|
||||
min_direct_product_token_length: 1
|
||||
direct_product_max_tokens: 4
|
||||
model_context_token_window: 4
|
||||
min_meaningful_alpha_token_length: 2
|
||||
max_shop_search_tokens: 6
|
||||
|
||||
patterns:
|
||||
history_context: 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt'
|
||||
history_context_value_template: '/\b({fragment})\b/u'
|
||||
prompt_sanitize: '/[^\p{L}\p{N}\s.,\-]/u'
|
||||
whitespace_collapse: '/\s+/u'
|
||||
whitespace_split: '/\s+/u'
|
||||
history_question: '/^Question:\s*(.+)$/m'
|
||||
price_between: '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'
|
||||
price_max: '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'
|
||||
price_min: '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'
|
||||
price_removal_between: '/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u'
|
||||
price_removal_minmax: '/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u'
|
||||
price_removal_intent_template: '/\b(?:{price_pattern})\b/u'
|
||||
direct_product_digit: '/\d/u'
|
||||
model_like: '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u'
|
||||
accessory_like: '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u'
|
||||
contains_digit: '/\d/u'
|
||||
model_number_token: '/^(?:\d{2,5}[a-z0-9\-]*|[a-z]{1,6}\d{1,5}[a-z0-9\-]*)$/u'
|
||||
model_context_token: '/^[\p{L}][\p{L}0-9®\-]{2,}$/u'
|
||||
model_suffix_token: '/^[a-z]{1,4}\d{0,3}$/u'
|
||||
instruction_or_presentation_token: '/^(?:zeig(?:e)?|such(?:e)?|find(?:e)?|gib|gebe|nenn(?:e)?|liefer(?:e)?|erstelle?|mach(?:e)?|brauch(?:e)?|will|möchte|moechte|hätte|haette|kannst|bitte|mal|alle|alles|komplett|vollständig|vollstaendig|gesamt|ganze|ganzen|liste|listung|auflistung|tabelle|tabellarisch|übersicht|uebersicht|anzeigen?|ausgeben?|darstellen?|antwort(?:e)?|erklär(?:e)?|erklaer(?:e)?|info|infos|informationen|dazu|hierzu|damit|davon|an|als|mit|ohne|inkl|inklusive|also|gut|gute|guten|guter|gutes|passend|passen)$/u'
|
||||
measurement_value_token: '/^\d+[.,]\d+$/u'
|
||||
exact_token_removal_template: '/\b{token}\b/u'
|
||||
brand_part_of_model_template: '/\b{brand}\s+\d{2,5}[a-z0-9\-]*\b/u'
|
||||
|
||||
# Shop matching and presentation configuration.
|
||||
# Defaults are intentionally identical to the previous PHP values.
|
||||
retriex.shop_matching.config:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Query enrichment vocabulary.
|
||||
# Defaults are intentionally identical to the previous PHP mapping.
|
||||
# YAML is the source of truth; QueryEnricherConfig intentionally contains no PHP defaults.
|
||||
parameters:
|
||||
retriex.query_enrichment.config:
|
||||
max_expansions: 4
|
||||
|
||||
66
config/retriex/search_repair.yaml
Normal file
66
config/retriex/search_repair.yaml
Normal file
@@ -0,0 +1,66 @@
|
||||
# Shop search-repair configuration.
|
||||
# YAML is the source of truth; SearchRepairConfig intentionally contains no PHP defaults.
|
||||
parameters:
|
||||
retriex.search_repair.config:
|
||||
strict_requested_accessory_code_repair: true
|
||||
prefer_prompt_anchored_model_for_requested_accessory_code: true
|
||||
|
||||
requested_accessory_code_fallback_query_templates:
|
||||
- '{term} {code}'
|
||||
requested_accessory_code_fallback_terms:
|
||||
- indikatortyp
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
requested_accessory_code_context_prefix_terms:
|
||||
- indikatortyp
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
requested_accessory_code_proximity_window: 1600
|
||||
|
||||
specific_model_candidate_patterns:
|
||||
- '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß0-9][A-Za-zÄÖÜäöüß0-9®\-]*){0,3}\s+\d{2,5}(?:\s+[A-ZÄÖÜ]{1,8})?)\b/u'
|
||||
model_candidate_exclude_terms:
|
||||
- indikatortyp
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
- verfügbarkeit
|
||||
- verfuegbarkeit
|
||||
- shop
|
||||
|
||||
limits:
|
||||
top_product_log_limit: 3
|
||||
|
||||
# Character codes preserve the previous PHP trim charlist:
|
||||
# space, tab, LF, CR, NUL, vertical tab, double quote, single quote,
|
||||
# backtick, dot, comma, semicolon, colon, hyphen.
|
||||
sanitize_trim_character_codes: [32, 9, 10, 13, 0, 11, 34, 39, 96, 46, 44, 59, 58, 45]
|
||||
product_key_separator: '|'
|
||||
|
||||
scores:
|
||||
candidate_digit: 4
|
||||
candidate_word_count_cap: 4
|
||||
specificity_boost: 3
|
||||
primary_query_overlap_threshold: 0.9
|
||||
prompt_match_weight: 3
|
||||
primary_query_match_weight: 2
|
||||
repair_signal_match_weight: 4
|
||||
primary_result_order_bonus: 1
|
||||
token_intersection_score: 2
|
||||
numeric_token_match_score: 4
|
||||
|
||||
patterns:
|
||||
model_candidate: '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u'
|
||||
accessory_candidate_template: '/\b((?:{terms})\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu'
|
||||
requested_accessory_code: '/\b(?:indikator(?:typ)?|indicator(?:\s*type)?|reagenz|reagent)\s*([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu'
|
||||
accessory_or_bundle_template: '/\b({terms})\b/iu'
|
||||
model_like: '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u'
|
||||
specificity_boost_template: '/\b(?:{terms})\b/iu'
|
||||
contains_digit: '/\d/u'
|
||||
whitespace_collapse: '/\s+/u'
|
||||
tokenize_cleanup: '/[^\p{L}\p{N}\s\-]+/u'
|
||||
@@ -1,37 +1,6 @@
|
||||
# Central domain vocabulary for RetrieX.
|
||||
# Views preserve the previous 1.4.2-tuned ordering exactly; per-service configs may still override them.
|
||||
parameters:
|
||||
retriex.commerce_query.config: {}
|
||||
retriex.search_repair.config:
|
||||
strict_requested_accessory_code_repair: true
|
||||
prefer_prompt_anchored_model_for_requested_accessory_code: true
|
||||
requested_accessory_code_pattern: '/\b(?:indikator(?:typ)?|indicator(?:\s*type)?|reagenz|reagent)\s*([A-Za-z]{0,3}\s*\d{1,5}[A-Za-z0-9\-]*)\b/iu'
|
||||
requested_accessory_code_fallback_query_templates:
|
||||
- '{term} {code}'
|
||||
requested_accessory_code_fallback_terms:
|
||||
- indikatortyp
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
requested_accessory_code_context_prefix_terms:
|
||||
- indikatortyp
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
requested_accessory_code_proximity_window: 1600
|
||||
specific_model_candidate_patterns:
|
||||
- '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß0-9][A-Za-zÄÖÜäöüß0-9®\-]*){0,3}\s+\d{2,5}(?:\s+[A-ZÄÖÜ]{1,8})?)\b/u'
|
||||
model_candidate_exclude_terms:
|
||||
- indikatortyp
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
- verfügbarkeit
|
||||
- verfuegbarkeit
|
||||
- shop
|
||||
retriex.vocabulary.config:
|
||||
classes:
|
||||
device:
|
||||
@@ -231,147 +200,6 @@ parameters:
|
||||
- filter
|
||||
- pumpenkopf
|
||||
- motorblock
|
||||
commerce_query:
|
||||
known_brands:
|
||||
add:
|
||||
- heyl
|
||||
- horiba
|
||||
- neomeris
|
||||
phrases_to_remove:
|
||||
add:
|
||||
- ich suche
|
||||
- suche
|
||||
- habt ihr
|
||||
- gibt es
|
||||
- gebe mir
|
||||
- gib mir
|
||||
- zeige mir
|
||||
- welches gerät
|
||||
- welche gerät
|
||||
- welches modell
|
||||
- welches ist besser
|
||||
- welches ist am besten
|
||||
- alternative
|
||||
- alternativen
|
||||
- unter anderem
|
||||
- u a
|
||||
- welche
|
||||
- welcher
|
||||
- welches
|
||||
- welchen
|
||||
- sind
|
||||
- ist
|
||||
- geeignet
|
||||
- geeigent
|
||||
- verfügbarkeit
|
||||
- verfuegbarkeit
|
||||
filter_search_tokens:
|
||||
add:
|
||||
- auch
|
||||
- noch
|
||||
- nochmal
|
||||
- zusätzlich
|
||||
- dazu
|
||||
- davon
|
||||
- stattdessen
|
||||
- bitte
|
||||
- gern
|
||||
- gerne
|
||||
- zeige
|
||||
- zeig
|
||||
- such
|
||||
- suche
|
||||
- finde
|
||||
- find
|
||||
- mir
|
||||
- mal
|
||||
- von
|
||||
- im
|
||||
- in
|
||||
- für
|
||||
- fuer
|
||||
- welche
|
||||
- welcher
|
||||
- welches
|
||||
- welchen
|
||||
- sind
|
||||
- ist
|
||||
- geeignet
|
||||
- geeigent
|
||||
- verfügbarkeit
|
||||
- verfuegbarkeit
|
||||
- prüfe
|
||||
- pruefe
|
||||
- den
|
||||
- die
|
||||
- das
|
||||
- der
|
||||
- dem
|
||||
- des
|
||||
- und
|
||||
- oder
|
||||
- sowie
|
||||
- seine
|
||||
- seinen
|
||||
- seiner
|
||||
- seinem
|
||||
- seines
|
||||
- siene
|
||||
- sienen
|
||||
- siener
|
||||
- sienem
|
||||
- sienes
|
||||
- gebe
|
||||
- gib
|
||||
- nenne
|
||||
- nenn
|
||||
- preis
|
||||
- preise
|
||||
- preisen
|
||||
- kostet
|
||||
- kosten
|
||||
- ua
|
||||
- also
|
||||
- gut
|
||||
- gute
|
||||
- guten
|
||||
- guter
|
||||
- gutes
|
||||
- passen
|
||||
- passend
|
||||
semantic_shop_search_tokens:
|
||||
add:
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
- zubehör
|
||||
- zubehor
|
||||
- ersatzteil
|
||||
- verbrauchsmaterial
|
||||
- chemie
|
||||
- indikatorchemie
|
||||
- reagenzchemie
|
||||
- kit
|
||||
- set
|
||||
- filter
|
||||
- pumpe
|
||||
- pumpenkopf
|
||||
- motorblock
|
||||
- lösung
|
||||
- loesung
|
||||
- solution
|
||||
- teststreifen
|
||||
- gerät
|
||||
- geraet
|
||||
- messgerät
|
||||
- messgeraet
|
||||
- analysegerät
|
||||
- analysegeraet
|
||||
- analysator
|
||||
- monitor
|
||||
- controller
|
||||
- system
|
||||
retrieval:
|
||||
generic_product_tokens:
|
||||
add:
|
||||
@@ -660,19 +488,3 @@ parameters:
|
||||
- service set
|
||||
- serviceset
|
||||
- service-set
|
||||
commerce_query:
|
||||
search_token_corrections:
|
||||
siene: seine
|
||||
sienen: seinen
|
||||
siener: seiner
|
||||
sienem: seinem
|
||||
sienes: seines
|
||||
indicatoren: indikatoren
|
||||
search_token_canonical:
|
||||
indikatoren: indikator
|
||||
indicators: indikator
|
||||
indicator: indikator
|
||||
reagenzien: reagenz
|
||||
reagents: reagenz
|
||||
reagent: reagenz
|
||||
produkte: produkt
|
||||
|
||||
@@ -9,6 +9,7 @@ imports:
|
||||
- { resource: 'retriex/retrieval.yaml' }
|
||||
- { resource: 'retriex/language.yaml' }
|
||||
- { resource: 'retriex/query_enrichment.yaml' }
|
||||
- { resource: 'retriex/search_repair.yaml' }
|
||||
- { resource: 'retriex/vocabulary.yaml' }
|
||||
- { resource: 'retriex/intent.yaml' }
|
||||
|
||||
@@ -129,7 +130,6 @@ services:
|
||||
App\Config\PromptBuilderConfig:
|
||||
arguments:
|
||||
$config: '%retriex.prompt.config%'
|
||||
$vocabulary: '@App\Config\DomainVocabularyConfig'
|
||||
|
||||
App\Config\AgentRunnerConfig:
|
||||
arguments:
|
||||
@@ -138,7 +138,6 @@ services:
|
||||
App\Config\NdjsonHybridRetrieverConfig:
|
||||
arguments:
|
||||
$config: '%retriex.retrieval.config%'
|
||||
$vocabulary: '@App\Config\DomainVocabularyConfig'
|
||||
|
||||
App\Config\StopWordsConfig:
|
||||
arguments:
|
||||
@@ -151,7 +150,6 @@ services:
|
||||
App\Config\ShopServiceConfig:
|
||||
arguments:
|
||||
$config: '%retriex.shop_matching.config%'
|
||||
$vocabulary: '@App\Config\DomainVocabularyConfig'
|
||||
|
||||
App\Infrastructure\OllamaClient:
|
||||
arguments:
|
||||
@@ -199,7 +197,6 @@ services:
|
||||
App\Config\CommerceQueryParserConfig:
|
||||
arguments:
|
||||
$config: '%retriex.commerce_query.config%'
|
||||
$vocabulary: '@App\Config\DomainVocabularyConfig'
|
||||
|
||||
App\Commerce\CommerceQueryParser: ~
|
||||
|
||||
@@ -209,7 +206,6 @@ services:
|
||||
$maxRepairQueries: '%retriex.commerce.search_repair.max_queries%'
|
||||
$minPrimaryResultsWithoutRepair: '%retriex.commerce.search_repair.min_primary_results_without_repair%'
|
||||
$config: '%retriex.search_repair.config%'
|
||||
$vocabulary: '@App\Config\DomainVocabularyConfig'
|
||||
|
||||
App\Commerce\SearchRepairService: ~
|
||||
|
||||
|
||||
Reference in New Issue
Block a user