getHistoryContextPattern() . ')\b/u'; } /** * @return string[] */ public function getFilterSearchTokens(): array { return [ 'auch', 'noch', 'nochmal', 'zusätzlich', 'dazu', 'davon', 'stattdessen', 'bitte', 'gern', 'gerne', 'zeige', 'zeig', 'such', 'suche', 'finde', 'find', 'mir', 'mal', 'von', 'im', 'in', 'für', 'fuer', 'welche', 'welcher', 'welches', 'welchen', 'sind', 'ist', 'geeignet', 'geeigent', 'verfügbarkeit', 'verfuegbarkeit', 'prüfe', 'pruefe', 'den', 'die', 'das', 'der', 'dem', 'des', 'und', 'oder', 'sowie', 'seine', 'seinen', 'seiner', 'seinem', 'seines', 'siene', 'sienen', 'siener', 'sienem', 'sienes', 'gebe', 'gib', 'nenne', 'nenn', 'preis', 'preise', 'preisen', 'kostet', 'kosten', 'ua', ]; } /** * @return array */ public function getSearchTokenCorrections(): array { return [ 'siene' => 'seine', 'sienen' => 'seinen', 'siener' => 'seiner', 'sienem' => 'seinem', 'sienes' => 'seines', 'indicatoren' => 'indikatoren', ]; } /** * @return array */ public function getSearchTokenCanonicalMap(): array { return [ 'indikatoren' => 'indikator', 'indicators' => 'indikator', 'indicator' => 'indikator', 'reagenzien' => 'reagenz', 'reagents' => 'reagenz', 'reagent' => 'reagenz', 'produkte' => 'produkt', ]; } /** * Backward-compatible alias for older callers. * * @return string[] */ public function getFilterSearchTokensPattern(): array { return $this->getFilterSearchTokens(); } /** * @return string[] */ public function getNormalizationSearch(): array { return ['€']; } /** * @return string[] */ public function getNormalizationReplace(): array { return [' euro ']; } public function getPromptSanitizePattern(): string { return '/[^\p{L}\p{N}\s.,\-]/u'; } public function getWhitespaceCollapsePattern(): string { return '/\s+/u'; } public function getWhitespaceSplitPattern(): string { return '/\s+/u'; } public function getSearchTextTrimCharacters(): string { return " \t\n\r\0\x0B-.,"; } public function getMinSearchTokenLength(): int { return 1; } public function getMinDirectProductTokenLength(): int { return 1; } public function getHistoryQuestionPattern(): string { return '/^Question:\s*(.+)$/m'; } public function getPriceBetweenPattern(): string { return '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'; } public function getPriceMaxPattern(): string { return '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'; } public function getPriceMinPattern(): string { return '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u'; } /** * @return string[] */ public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array { return [ '/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', '/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', '/\b(?:' . $intentConfig->getPricePattern() . ')\b/u', ]; } public function getDirectProductDigitPattern(): string { return '/\d/u'; } public function getDirectProductMaxTokens(): int { return 4; } public function getModelLikePattern(): string { return '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u'; } public function getAccessoryLikePattern(): string { return '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u'; } public function getContainsDigitPattern(): string { return '/\d/u'; } public function getModelNumberTokenPattern(): string { return '/^(?:\d{2,5}[a-z0-9\-]*|[a-z]{1,6}\d{1,5}[a-z0-9\-]*)$/u'; } public function getModelContextTokenPattern(): string { return '/^[\p{L}][\p{L}0-9®\-]{2,}$/u'; } public function getModelSuffixTokenPattern(): string { return '/^[a-z]{1,4}\d{0,3}$/u'; } public function getModelContextTokenWindow(): int { return 2; } public function getMinMeaningfulAlphaTokenLength(): int { return 2; } public function getMaxShopSearchTokens(): int { return 6; } public function getInstructionOrPresentationTokenPattern(): string { return '/^(?:zeig(?:e)?|such(?:e)?|find(?:e)?|gib|gebe|nenn(?:e)?|liefer(?:e)?|erstelle?|mach(?:e)?|brauch(?:e)?|will|möchte|moechte|hätte|haette|kannst|bitte|mal|alle|alles|komplett|vollständig|vollstaendig|gesamt|ganze|ganzen|liste|listung|auflistung|tabelle|tabellarisch|übersicht|uebersicht|anzeigen?|ausgeben?|darstellen?|antwort(?:e)?|erklär(?:e)?|erklaer(?:e)?|info|infos|informationen|dazu|hierzu|damit|davon|an|als|mit|ohne|inkl|inklusive)$/u'; } /** * Product/category tokens that are useful for Store API search even when they are not next to a model number. * This is intentionally a semantic allowlist, not a spelling-error blocklist. * * @return string[] */ public function getSemanticShopSearchTokens(): array { return [ 'indikator', 'indicator', 'reagenz', 'reagent', 'zubehör', 'zubehor', 'ersatzteil', 'verbrauchsmaterial', 'kit', 'set', 'filter', 'pumpe', 'pumpenkopf', 'motorblock', 'lösung', 'loesung', 'solution', 'teststreifen', 'gerät', 'geraet', 'messgerät', 'messgeraet', 'analysegerät', 'analysegeraet', 'analysator', 'monitor', 'controller', 'system', ]; } public function buildExactTokenRemovalPattern(string $token): string { return '/\b' . preg_quote($token, '/') . '\b/u'; } public function buildBrandPartOfModelPattern(string $brand): string { return '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u'; } }