cleanup code

This commit is contained in:
team 1
2026-04-21 18:22:57 +02:00
parent ce859b9662
commit 86caae5552
2 changed files with 84 additions and 65 deletions

View File

@@ -99,4 +99,69 @@ final class NdjsonHybridRetrieverConfig
* - still allow relevant continuation when needed
*/
public const MIN_CHUNK_DISTANCE = 2;
/**
* When one document clearly dominates the top-ranked window,
* temporarily switch from "spread" mode to "dominant document" mode.
*/
public const DOMINANT_DOC_WINDOW = 6;
public const DOMINANT_DOC_MIN_HITS = 3;
public const DOMINANT_DOC_MAX_CHUNKS = 4;
public const EXACT_DOCUMENT_MAX_CHUNKS = 6;
public const FOCUSED_PRODUCT_WINDOW = 8;
public const FOCUSED_PRODUCT_MIN_SCORE = 10.0;
public const FOCUSED_PRODUCT_MIN_GAP = 4.0;
public const FOCUSED_PRODUCT_MAX_CHUNKS = 4;
public const GENERIC_PRODUCT_TOKEN = [
'der', 'die', 'das', 'ein', 'eine', 'einen', 'einem', 'und', 'oder', 'mit',
'fuer', 'für', 'von', 'im', 'in', 'am', 'an', 'auf', 'zu', 'zum', 'zur',
'produkt', 'produkte', 'produktkarte', 'titel', 'geraet', 'gerät',
'messgeraet', 'messgerät', 'wasser', 'haerte', 'härte', 'resthaerte',
'resthärte', 'analyse', 'analysator', 'automat', 'online', 'messung',
'messen', 'preis', 'preise', 'kosten', 'info', 'infos', 'passend',
'richtige', 'richtiges', 'geeignet', 'geeignete', 'welche', 'welcher',
'welches', 'brauche', 'suche', 'bitte', 'fuer', 'gegen', 'und', 'oder',
];
const IMPORTANT_SHORT_MODEL_TOKEN = ['th', 'tc', 'tp', 'tm', 'ph', 'rx'];
const FAMILY_DESCRIPTOR_TOKEN = [
'evo', 'eco', 'self', 'clean', 'mini', 'pro', 'plus', 'basic', 'lab',
'inline', 'compact', 'panel', 'sc',
];
const LOOKS_LIKE_REAGENT_TOKENS = [
'indikator', 'reagenz', 'reagens', 'laborchemikalie', 'chemikalie',
'sicherheitsdatenblatt', 'sdb', 'msds', 'ufi', 'gebinde', 'flasche',
'ersatzteil', 'zubehoer', 'zubehör', 'service set', 'filtereinsatz',
'kerzenfilter', 'druckregler',
];
const LOOKS_LIKE_SAFETY_DOCS = [
'sicherheitsdatenblatt', 'sdb', 'msds', 'gefahrenbewertung',
'gefahrenpiktogramm', 'signalwort', 'lagerung', 'transport', 'clp',
'kennzeichnung', 'h290', 'pbt', 'vpvb',
];
public const LOOKS_LIKE_REAGENT_WORDS = [
'indikator', 'reagenz', 'reagens', 'chemie', 'chemikalie', 'sdb',
'sicherheitsdatenblatt', 'msds', 'flasche', 'gebinde',
];
public const LOOKS_LIKE_DOCUMENT_WORDS = [
'datenblatt', 'dokument', 'pdf', 'handbuch', 'manual', 'beschreibung',
'sdb', 'sicherheitsdatenblatt', 'msds',
];
public const LOOKS_LIKE_SAFETY_WORDS = [
'gefahr', 'gefahrgut', 'clp', 'h290', 'sicherheit', 'kennzeichnung',
'transport', 'lagerung', 'piktogramm',
];
public const LOOKS_LIKE_DEVICE_WORDS = [
'geraet', 'gerät', 'messgeraet', 'messgerät', 'analysator', 'automat',
'messung', 'messen', 'ueberwachung', 'überwachung', 'online', 'monitor',
];
}