last step
This commit is contained in:
@@ -11,6 +11,215 @@ parameters:
|
||||
retriex.commerce.search_repair.max_queries: 2
|
||||
retriex.commerce.search_repair.min_primary_results_without_repair: 2
|
||||
|
||||
# Commerce query parser configuration.
|
||||
# YAML is the only operative source of truth; PHP must not contain parser defaults.
|
||||
retriex.commerce_query.config:
|
||||
known_brands:
|
||||
- heyl
|
||||
- horiba
|
||||
- neomeris
|
||||
|
||||
phrases_to_remove:
|
||||
- ich suche
|
||||
- suche
|
||||
- habt ihr
|
||||
- gibt es
|
||||
- gebe mir
|
||||
- gib mir
|
||||
- zeige mir
|
||||
- welches gerät
|
||||
- welche gerät
|
||||
- welches modell
|
||||
- welches ist besser
|
||||
- welches ist am besten
|
||||
- alternative
|
||||
- alternativen
|
||||
- unter anderem
|
||||
- u a
|
||||
- welche
|
||||
- welcher
|
||||
- welches
|
||||
- welchen
|
||||
- sind
|
||||
- ist
|
||||
- geeignet
|
||||
- geeigent
|
||||
- verfügbarkeit
|
||||
- verfuegbarkeit
|
||||
|
||||
filter_search_tokens:
|
||||
- auch
|
||||
- noch
|
||||
- nochmal
|
||||
- zusätzlich
|
||||
- dazu
|
||||
- davon
|
||||
- stattdessen
|
||||
- bitte
|
||||
- gern
|
||||
- gerne
|
||||
- zeige
|
||||
- zeig
|
||||
- such
|
||||
- suche
|
||||
- finde
|
||||
- find
|
||||
- mir
|
||||
- mal
|
||||
- von
|
||||
- im
|
||||
- in
|
||||
- für
|
||||
- fuer
|
||||
- welche
|
||||
- welcher
|
||||
- welches
|
||||
- welchen
|
||||
- sind
|
||||
- ist
|
||||
- geeignet
|
||||
- geeigent
|
||||
- verfügbarkeit
|
||||
- verfuegbarkeit
|
||||
- prüfe
|
||||
- pruefe
|
||||
- den
|
||||
- die
|
||||
- das
|
||||
- der
|
||||
- dem
|
||||
- des
|
||||
- und
|
||||
- oder
|
||||
- sowie
|
||||
- seine
|
||||
- seinen
|
||||
- seiner
|
||||
- seinem
|
||||
- seines
|
||||
- siene
|
||||
- sienen
|
||||
- siener
|
||||
- sienem
|
||||
- sienes
|
||||
- gebe
|
||||
- gib
|
||||
- nenne
|
||||
- nenn
|
||||
- preis
|
||||
- preise
|
||||
- preisen
|
||||
- kostet
|
||||
- kosten
|
||||
- ua
|
||||
- also
|
||||
- gut
|
||||
- gute
|
||||
- guten
|
||||
- guter
|
||||
- gutes
|
||||
- passen
|
||||
- passend
|
||||
|
||||
search_token_corrections:
|
||||
siene: seine
|
||||
sienen: seinen
|
||||
siener: seiner
|
||||
sienem: seinem
|
||||
sienes: seines
|
||||
indicatoren: indikatoren
|
||||
|
||||
search_token_canonical_map:
|
||||
indikatoren: indikator
|
||||
indicators: indikator
|
||||
indicator: indikator
|
||||
reagenzien: reagenz
|
||||
reagents: reagenz
|
||||
reagent: reagenz
|
||||
produkte: produkt
|
||||
|
||||
semantic_shop_search_tokens:
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
- zubehör
|
||||
- zubehor
|
||||
- ersatzteil
|
||||
- verbrauchsmaterial
|
||||
- chemie
|
||||
- indikatorchemie
|
||||
- reagenzchemie
|
||||
- kit
|
||||
- set
|
||||
- filter
|
||||
- pumpe
|
||||
- pumpenkopf
|
||||
- motorblock
|
||||
- lösung
|
||||
- loesung
|
||||
- solution
|
||||
- teststreifen
|
||||
- gerät
|
||||
- geraet
|
||||
- messgerät
|
||||
- messgeraet
|
||||
- analysegerät
|
||||
- analysegeraet
|
||||
- analysator
|
||||
- monitor
|
||||
- controller
|
||||
- system
|
||||
|
||||
normalization:
|
||||
search: ['€']
|
||||
replace: [' euro ']
|
||||
|
||||
text:
|
||||
trim_characters:
|
||||
- space
|
||||
- tab
|
||||
- lf
|
||||
- cr
|
||||
- nul
|
||||
- vertical_tab
|
||||
- '-'
|
||||
- '.'
|
||||
- ','
|
||||
|
||||
limits:
|
||||
min_search_token_length: 1
|
||||
min_direct_product_token_length: 1
|
||||
direct_product_max_tokens: 4
|
||||
model_context_token_window: 4
|
||||
min_meaningful_alpha_token_length: 2
|
||||
max_shop_search_tokens: 6
|
||||
|
||||
patterns:
|
||||
history_context: 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt'
|
||||
history_context_value_template: '/\b({fragment})\b/u'
|
||||
prompt_sanitize: '/[^\p{L}\p{N}\s.,\-]/u'
|
||||
whitespace_collapse: '/\s+/u'
|
||||
whitespace_split: '/\s+/u'
|
||||
history_question: '/^Question:\s*(.+)$/m'
|
||||
price_between: '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'
|
||||
price_max: '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'
|
||||
price_min: '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'
|
||||
price_removal_between: '/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u'
|
||||
price_removal_minmax: '/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u'
|
||||
price_removal_intent_template: '/\b(?:{price_pattern})\b/u'
|
||||
direct_product_digit: '/\d/u'
|
||||
model_like: '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u'
|
||||
accessory_like: '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u'
|
||||
contains_digit: '/\d/u'
|
||||
model_number_token: '/^(?:\d{2,5}[a-z0-9\-]*|[a-z]{1,6}\d{1,5}[a-z0-9\-]*)$/u'
|
||||
model_context_token: '/^[\p{L}][\p{L}0-9®\-]{2,}$/u'
|
||||
model_suffix_token: '/^[a-z]{1,4}\d{0,3}$/u'
|
||||
instruction_or_presentation_token: '/^(?:zeig(?:e)?|such(?:e)?|find(?:e)?|gib|gebe|nenn(?:e)?|liefer(?:e)?|erstelle?|mach(?:e)?|brauch(?:e)?|will|möchte|moechte|hätte|haette|kannst|bitte|mal|alle|alles|komplett|vollständig|vollstaendig|gesamt|ganze|ganzen|liste|listung|auflistung|tabelle|tabellarisch|übersicht|uebersicht|anzeigen?|ausgeben?|darstellen?|antwort(?:e)?|erklär(?:e)?|erklaer(?:e)?|info|infos|informationen|dazu|hierzu|damit|davon|an|als|mit|ohne|inkl|inklusive|also|gut|gute|guten|guter|gutes|passend|passen)$/u'
|
||||
measurement_value_token: '/^\d+[.,]\d+$/u'
|
||||
exact_token_removal_template: '/\b{token}\b/u'
|
||||
brand_part_of_model_template: '/\b{brand}\s+\d{2,5}[a-z0-9\-]*\b/u'
|
||||
|
||||
# Shop matching and presentation configuration.
|
||||
# Defaults are intentionally identical to the previous PHP values.
|
||||
retriex.shop_matching.config:
|
||||
|
||||
Reference in New Issue
Block a user