Files
MtoRagSystem/src/Config/CommerceQueryParserConfig.php
2026-04-25 21:41:39 +02:00

287 lines
6.4 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Config;
final class CommerceQueryParserConfig
{
/**
* @return string[]
*/
public function getKnownBrands(): array
{
return [
'heyl',
'horiba',
'neomeris',
];
}
/**
* @return string[]
*/
public function getPhrasesToRemove(): array
{
return [
'ich suche',
'suche',
'habt ihr',
'gibt es',
'gebe mir',
'gib mir',
'zeige mir',
'welches gerät',
'welche gerät',
'welches modell',
'welches ist besser',
'welches ist am besten',
'alternative',
'alternativen',
'unter anderem',
'u a',
'welche',
'welcher',
'welches',
'welchen',
'sind',
'ist',
'geeignet',
'geeigent',
'verfügbarkeit',
'verfuegbarkeit',
];
}
public function getHistoryContextPattern(): string
{
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
}
public function getHistoryContextValuePattern(): string
{
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
}
/**
* @return string[]
*/
public function getFilterSearchTokens(): array
{
return [
'auch',
'noch',
'nochmal',
'zusätzlich',
'dazu',
'davon',
'stattdessen',
'bitte',
'gern',
'gerne',
'zeige',
'zeig',
'such',
'suche',
'finde',
'find',
'mir',
'mal',
'von',
'im',
'in',
'für',
'fuer',
'welche',
'welcher',
'welches',
'welchen',
'sind',
'ist',
'geeignet',
'geeigent',
'verfügbarkeit',
'verfuegbarkeit',
'prüfe',
'pruefe',
'den',
'die',
'das',
'der',
'dem',
'des',
'und',
'oder',
'sowie',
'seine',
'seinen',
'seiner',
'seinem',
'seines',
'siene',
'sienen',
'siener',
'sienem',
'sienes',
'gebe',
'gib',
'nenne',
'nenn',
'preis',
'preise',
'preisen',
'kostet',
'kosten',
'ua',
];
}
/**
* @return array<string, string>
*/
public function getSearchTokenCorrections(): array
{
return [
'siene' => 'seine',
'sienen' => 'seinen',
'siener' => 'seiner',
'sienem' => 'seinem',
'sienes' => 'seines',
'indicatoren' => 'indikatoren',
];
}
/**
* @return array<string, string>
*/
public function getSearchTokenCanonicalMap(): array
{
return [
'indikatoren' => 'indikator',
'indicators' => 'indikator',
'indicator' => 'indikator',
'reagenzien' => 'reagenz',
'reagents' => 'reagenz',
'reagent' => 'reagenz',
'produkte' => 'produkt',
];
}
/**
* Backward-compatible alias for older callers.
*
* @return string[]
*/
public function getFilterSearchTokensPattern(): array
{
return $this->getFilterSearchTokens();
}
/**
* @return string[]
*/
public function getNormalizationSearch(): array
{
return ['€'];
}
/**
* @return string[]
*/
public function getNormalizationReplace(): array
{
return [' euro '];
}
public function getPromptSanitizePattern(): string
{
return '/[^\p{L}\p{N}\s.,\-]/u';
}
public function getWhitespaceCollapsePattern(): string
{
return '/\s+/u';
}
public function getWhitespaceSplitPattern(): string
{
return '/\s+/u';
}
public function getSearchTextTrimCharacters(): string
{
return " \t\n\r\0\x0B-.,";
}
public function getMinSearchTokenLength(): int
{
return 1;
}
public function getMinDirectProductTokenLength(): int
{
return 1;
}
public function getHistoryQuestionPattern(): string
{
return '/^Question:\s*(.+)$/m';
}
public function getPriceBetweenPattern(): string
{
return '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
}
public function getPriceMaxPattern(): string
{
return '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
}
public function getPriceMinPattern(): string
{
return '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
}
/**
* @return string[]
*/
public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array
{
return [
'/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u',
'/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u',
'/\b(?:' . $intentConfig->getPricePattern() . ')\b/u',
];
}
public function getDirectProductDigitPattern(): string
{
return '/\d/u';
}
public function getDirectProductMaxTokens(): int
{
return 4;
}
public function getModelLikePattern(): string
{
return '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u';
}
public function getAccessoryLikePattern(): string
{
return '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u';
}
public function buildExactTokenRemovalPattern(string $token): string
{
return '/\b' . preg_quote($token, '/') . '\b/u';
}
public function buildBrandPartOfModelPattern(string $brand): string
{
return '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u';
}
}