move intent an config value into config files
This commit is contained in:
@@ -4,28 +4,18 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
final readonly class CommerceQueryParserConfig
|
||||
final class CommerceQueryParserConfig
|
||||
{
|
||||
/**
|
||||
* @param string[] $knownBrands
|
||||
* @param string[] $phrasesToRemove
|
||||
* @param string[] $filterSearchTokensPattern
|
||||
* @param string[] $referenceOnlyTokens
|
||||
*/
|
||||
public function __construct(
|
||||
private array $knownBrands = [],
|
||||
private array $phrasesToRemove = [],
|
||||
private array $filterSearchTokensPattern = [],
|
||||
private array $referenceOnlyTokens = [],
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getKnownBrands(): array
|
||||
{
|
||||
return $this->knownBrands;
|
||||
return [
|
||||
'heyl',
|
||||
'horiba',
|
||||
'neomeris',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -33,62 +23,175 @@ final readonly class CommerceQueryParserConfig
|
||||
*/
|
||||
public function getPhrasesToRemove(): array
|
||||
{
|
||||
return $this->phrasesToRemove;
|
||||
return [
|
||||
'ich suche',
|
||||
'suche',
|
||||
'habt ihr',
|
||||
'gibt es',
|
||||
'zeige mir',
|
||||
'welches gerät',
|
||||
'welche gerät',
|
||||
'welches modell',
|
||||
'welches ist besser',
|
||||
'welches ist am besten',
|
||||
'alternative',
|
||||
'alternativen',
|
||||
];
|
||||
}
|
||||
|
||||
public function getHistoryContextPattern(): string
|
||||
{
|
||||
return 'auch|noch|nochmal|dazu|wie oben|wie zuvor|ähnlich|aehnlich|stattdessen|alternative|alternativ|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|und der preis|kosten|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
|
||||
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
|
||||
}
|
||||
|
||||
public function getReferenceFollowUpPattern(): string
|
||||
public function getHistoryContextValuePattern(): string
|
||||
{
|
||||
return 'preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
|
||||
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFilterSearchTokens(): array
|
||||
{
|
||||
return [
|
||||
'auch',
|
||||
'noch',
|
||||
'nochmal',
|
||||
'zusätzlich',
|
||||
'dazu',
|
||||
'davon',
|
||||
'stattdessen',
|
||||
'bitte',
|
||||
'gern',
|
||||
'gerne',
|
||||
'zeige',
|
||||
'zeig',
|
||||
'such',
|
||||
'suche',
|
||||
'finde',
|
||||
'find',
|
||||
'mir',
|
||||
'mal',
|
||||
'von',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Backward-compatible alias for older callers.
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFilterSearchTokensPattern(): array
|
||||
{
|
||||
return $this->filterSearchTokensPattern;
|
||||
return $this->getFilterSearchTokens();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getReferenceOnlyTokens(): array
|
||||
public function getNormalizationSearch(): array
|
||||
{
|
||||
if ($this->referenceOnlyTokens !== []) {
|
||||
return $this->referenceOnlyTokens;
|
||||
}
|
||||
return ['€'];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getNormalizationReplace(): array
|
||||
{
|
||||
return [' euro '];
|
||||
}
|
||||
|
||||
public function getPromptSanitizePattern(): string
|
||||
{
|
||||
return '/[^\p{L}\p{N}\s.,\-]/u';
|
||||
}
|
||||
|
||||
public function getWhitespaceCollapsePattern(): string
|
||||
{
|
||||
return '/\s+/u';
|
||||
}
|
||||
|
||||
public function getWhitespaceSplitPattern(): string
|
||||
{
|
||||
return '/\s+/u';
|
||||
}
|
||||
|
||||
public function getSearchTextTrimCharacters(): string
|
||||
{
|
||||
return " \t\n\r\0\x0B-.,";
|
||||
}
|
||||
|
||||
public function getMinSearchTokenLength(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getMinDirectProductTokenLength(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getHistoryQuestionPattern(): string
|
||||
{
|
||||
return '/^Question:\s*(.+)$/m';
|
||||
}
|
||||
|
||||
public function getPriceBetweenPattern(): string
|
||||
{
|
||||
return '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||
}
|
||||
|
||||
public function getPriceMaxPattern(): string
|
||||
{
|
||||
return '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||
}
|
||||
|
||||
public function getPriceMinPattern(): string
|
||||
{
|
||||
return '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array
|
||||
{
|
||||
return [
|
||||
'preis',
|
||||
'preise',
|
||||
'kosten',
|
||||
'kostet',
|
||||
'gerät',
|
||||
'geraet',
|
||||
'modell',
|
||||
'produkt',
|
||||
'artikel',
|
||||
'dafür',
|
||||
'dafuer',
|
||||
'dazu',
|
||||
'davon',
|
||||
'verfügbarkeit',
|
||||
'verfuegbarkeit',
|
||||
'shop',
|
||||
'link',
|
||||
'zum',
|
||||
'zur',
|
||||
'das',
|
||||
'dieses',
|
||||
'den',
|
||||
'dem',
|
||||
'bitte',
|
||||
'und',
|
||||
'/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u',
|
||||
'/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u',
|
||||
'/\b(?:' . $intentConfig->getPricePattern() . ')\b/u',
|
||||
];
|
||||
}
|
||||
|
||||
public function getDirectProductDigitPattern(): string
|
||||
{
|
||||
return '/\d/u';
|
||||
}
|
||||
|
||||
public function getDirectProductMaxTokens(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getModelLikePattern(): string
|
||||
{
|
||||
return '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u';
|
||||
}
|
||||
|
||||
public function getAccessoryLikePattern(): string
|
||||
{
|
||||
return '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u';
|
||||
}
|
||||
|
||||
public function buildExactTokenRemovalPattern(string $token): string
|
||||
{
|
||||
return '/\b' . preg_quote($token, '/') . '\b/u';
|
||||
}
|
||||
|
||||
public function buildBrandPartOfModelPattern(string $brand): string
|
||||
{
|
||||
return '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u';
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user