move intent an config value into config files
This commit is contained in:
@@ -6,50 +6,249 @@ namespace App\Config;
|
||||
|
||||
final class AgentRunnerConfig
|
||||
{
|
||||
public function getCommerceHistoryBudgetChars(): int
|
||||
{
|
||||
return 1000;
|
||||
}
|
||||
|
||||
public function getProductSearchKnowledgeChunkLimit(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getAdvisoryProductSearchKnowledgeChunkLimit(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getOptimizedShopQueryPrefixPattern(): string
|
||||
{
|
||||
return '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu';
|
||||
}
|
||||
|
||||
public function getOptimizedShopQueryTrimCharacters(): string
|
||||
{
|
||||
return " \t\n\r\0\x0B\"'`";
|
||||
}
|
||||
|
||||
public function getEmptyPromptMessage(): string
|
||||
{
|
||||
return '❌ Empty prompt.';
|
||||
}
|
||||
|
||||
public function getAnalyzeRequestMessage(): string
|
||||
{
|
||||
return 'Ich analysiere deine Anfrage...';
|
||||
}
|
||||
|
||||
public function getCheckInternetSourcesMessage(): string
|
||||
{
|
||||
return 'Ich prüfe auf Internetquellen...';
|
||||
}
|
||||
|
||||
public function getRetrieveKnowledgeMessage(): string
|
||||
{
|
||||
return 'Ich hole relevante Daten aus meinem RAG-Wissen...';
|
||||
}
|
||||
|
||||
public function getOptimizeSearchMessage(): string
|
||||
{
|
||||
return 'Ich optimiere die Recherche...';
|
||||
}
|
||||
|
||||
public function getFetchSearchDataMessageTemplate(): string
|
||||
{
|
||||
return 'Ich rufe Recherchedaten ab (type: %s)';
|
||||
}
|
||||
|
||||
public function getAnalyzeAllInformationMessage(): string
|
||||
{
|
||||
return 'Ich analysiere alle Informationen...';
|
||||
}
|
||||
|
||||
public function getThinkingWhileStreamingMessage(): string
|
||||
{
|
||||
return 'Denke nach...';
|
||||
}
|
||||
|
||||
public function getNoLlmDataReceivedMessage(): string
|
||||
{
|
||||
return '❌ Es wurden keine Daten vom LLM empfangen.';
|
||||
}
|
||||
|
||||
public function getGenericInternalErrorMessage(): string
|
||||
{
|
||||
return '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.';
|
||||
}
|
||||
|
||||
public function getDebugInternalErrorPrefix(): string
|
||||
{
|
||||
return '❌ Interner Fehler: ';
|
||||
}
|
||||
|
||||
public function getExternalUrlSourceLabel(): string
|
||||
{
|
||||
return 'Externe URL';
|
||||
}
|
||||
|
||||
public function getRagKnowledgeSourceLabel(): string
|
||||
{
|
||||
return 'RAG Wissen';
|
||||
}
|
||||
|
||||
public function getConversationHistorySourceLabel(): string
|
||||
{
|
||||
return 'Chatverlauf';
|
||||
}
|
||||
|
||||
public function getShopSystemSourceLabel(): string
|
||||
{
|
||||
return 'Shopsystem';
|
||||
}
|
||||
|
||||
public function getExtendedShopSearchSourceLabel(): string
|
||||
{
|
||||
return 'Erweiterte Shopsuche';
|
||||
}
|
||||
|
||||
public function getUsedSourcesPrefix(): string
|
||||
{
|
||||
return 'Genutzte Quellen: ';
|
||||
}
|
||||
|
||||
public function getSourcesPrefix(): string
|
||||
{
|
||||
return 'Quellen: ';
|
||||
}
|
||||
|
||||
public function getSourceBadgeHtmlTemplate(): string
|
||||
{
|
||||
return '<span class="badge bg-info text-black">%s</span>';
|
||||
}
|
||||
|
||||
public function getErrorHtmlTemplate(): string
|
||||
{
|
||||
return '<span class="text-danger">%s</span>' . "\n<hr>\n";
|
||||
}
|
||||
|
||||
public function getThinkHtmlTemplate(): string
|
||||
{
|
||||
return '<span class="text-info think">%s</span>' . "\n";
|
||||
}
|
||||
|
||||
public function getInfoHtmlTemplate(): string
|
||||
{
|
||||
return "\n\n" . '<span class="text-info fw-bolder">%s</span>' . "\n";
|
||||
}
|
||||
|
||||
public function getDebugHtmlTemplate(): string
|
||||
{
|
||||
return "\n\nDEBUG: <code>%s</code>\n";
|
||||
}
|
||||
|
||||
public function getShopPrompt(string $prompt, string $commerceHistoryContext = ''): string
|
||||
{
|
||||
$historyBlock = '';
|
||||
|
||||
if (trim($commerceHistoryContext) !== '') {
|
||||
$historyBlock = '
|
||||
RECENT CONVERSATION CONTEXT:
|
||||
' . $commerceHistoryContext . '
|
||||
|
||||
Additional rules for conversation context:
|
||||
- The current user input has highest priority.
|
||||
- Use the recent conversation context only to resolve omitted references.
|
||||
- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.
|
||||
- Do not revive older products unless the current user input clearly refers to them.
|
||||
- If the current input starts a new topic, ignore older product context.
|
||||
- Prefer the most recent product reference over older ones.
|
||||
';
|
||||
$historyBlock = $this->buildHistoryBlock($commerceHistoryContext);
|
||||
}
|
||||
|
||||
return '
|
||||
Generate a short search query for Shopware 6 from the following user input text.
|
||||
|
||||
Rules:
|
||||
- Output only the final search query.
|
||||
- Always convert relevant search terms to their singular form.
|
||||
- No introduction, no explanation, no quotation marks.
|
||||
- Use only shop-relevant search terms from the user input for a shop search.
|
||||
- Maximum 6 search terms, preferably fewer.
|
||||
- Remove filler words, polite phrases, and irrelevant words.
|
||||
- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.
|
||||
- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).
|
||||
- Separate terms using spaces only.
|
||||
- If a relevant product name is present, it must be placed at the beginning of the final search query.
|
||||
- Try to always identify all products mentioned in the user input text, even in long prompts.
|
||||
- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.
|
||||
- If the current user input is vague or referential, use the recent conversation context only as support.
|
||||
- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".
|
||||
|
||||
Output format:
|
||||
Keyword1 Keyword2 Keyword3
|
||||
' . $historyBlock . '
|
||||
|
||||
CURRENT USER INPUT:
|
||||
' . $prompt . '
|
||||
';
|
||||
return $this->implodePromptBlocks([
|
||||
$this->getShopPromptIntro(),
|
||||
$this->buildRulesBlock($this->getShopPromptRules()),
|
||||
$this->getShopPromptOutputFormatBlock(),
|
||||
$historyBlock,
|
||||
$this->getCurrentUserInputLabel() . ':',
|
||||
trim($prompt),
|
||||
]);
|
||||
}
|
||||
|
||||
private function buildHistoryBlock(string $commerceHistoryContext): string
|
||||
{
|
||||
return $this->implodePromptBlocks([
|
||||
$this->getRecentConversationContextLabel() . ':',
|
||||
trim($commerceHistoryContext),
|
||||
$this->buildRulesBlock($this->getConversationContextRules(), 'Additional rules for conversation context:'),
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getShopPromptRules(): array
|
||||
{
|
||||
return [
|
||||
'- Output only the final search query.',
|
||||
'- Always convert relevant search terms to their singular form.',
|
||||
'- No introduction, no explanation, no quotation marks.',
|
||||
'- Use only shop-relevant search terms from the user input for a shop search.',
|
||||
'- Maximum 6 search terms, preferably fewer.',
|
||||
'- Remove filler words, polite phrases, and irrelevant words.',
|
||||
'- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.',
|
||||
'- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).',
|
||||
'- Separate terms using spaces only.',
|
||||
'- If a relevant product name is present, it must be placed at the beginning of the final search query.',
|
||||
'- Try to always identify all products mentioned in the user input text, even in long prompts.',
|
||||
'- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.',
|
||||
'- If the current user input is vague or referential, use the recent conversation context only as support.',
|
||||
'- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getConversationContextRules(): array
|
||||
{
|
||||
return [
|
||||
'- The current user input has highest priority.',
|
||||
'- Use the recent conversation context only to resolve omitted references.',
|
||||
'- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.',
|
||||
'- Do not revive older products unless the current user input clearly refers to them.',
|
||||
'- If the current input starts a new topic, ignore older product context.',
|
||||
'- Prefer the most recent product reference over older ones.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getShopPromptIntro(): string
|
||||
{
|
||||
return 'Generate a short search query for Shopware 6 from the following user input text.';
|
||||
}
|
||||
|
||||
public function getShopPromptOutputFormatBlock(): string
|
||||
{
|
||||
return "Output format:\nKeyword1 Keyword2 Keyword3";
|
||||
}
|
||||
|
||||
public function getRecentConversationContextLabel(): string
|
||||
{
|
||||
return 'RECENT CONVERSATION CONTEXT';
|
||||
}
|
||||
|
||||
public function getCurrentUserInputLabel(): string
|
||||
{
|
||||
return 'CURRENT USER INPUT';
|
||||
}
|
||||
|
||||
private function buildRulesBlock(array $rules, string $headline = 'Rules:'): string
|
||||
{
|
||||
return $headline . "\n" . implode("\n", $rules);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $blocks
|
||||
*/
|
||||
private function implodePromptBlocks(array $blocks): string
|
||||
{
|
||||
$normalized = array_values(array_filter(
|
||||
array_map(
|
||||
static fn(string $block): string => trim($block),
|
||||
$blocks
|
||||
),
|
||||
static fn(string $block): bool => $block !== ''
|
||||
));
|
||||
|
||||
return implode("\n\n", $normalized);
|
||||
}
|
||||
}
|
||||
@@ -6,57 +6,38 @@ namespace App\Config;
|
||||
|
||||
final class CommerceIntentConfig
|
||||
{
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getStrongSignalsList(): array
|
||||
{
|
||||
return [
|
||||
'shop',
|
||||
'alle',
|
||||
'preis',
|
||||
'preise',
|
||||
'kunde',
|
||||
'online',
|
||||
'produkt',
|
||||
'produkte',
|
||||
'artikel',
|
||||
'sku',
|
||||
'kaufen',
|
||||
'kostet',
|
||||
'kosten',
|
||||
'verfügbarkeit',
|
||||
'verfuegbarkeit',
|
||||
|
||||
// Search / product discovery signals
|
||||
'suche',
|
||||
'such',
|
||||
'finde',
|
||||
'finden',
|
||||
'welche',
|
||||
'welcher',
|
||||
'welches',
|
||||
|
||||
// Device / system signals
|
||||
'analysegerät',
|
||||
'analysegeraet',
|
||||
'analysegeräte',
|
||||
'analysegeraete',
|
||||
'messgerät',
|
||||
'messgeraet',
|
||||
'messgeräte',
|
||||
'messgeraete',
|
||||
'gerät',
|
||||
'geraet',
|
||||
'geräte',
|
||||
'geraete',
|
||||
'analysator',
|
||||
'analysatoren',
|
||||
'analyzer',
|
||||
'system',
|
||||
'systeme',
|
||||
'anlage',
|
||||
'anlagen',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getAdvisorySignals(): array
|
||||
{
|
||||
return [
|
||||
@@ -67,30 +48,36 @@ final class CommerceIntentConfig
|
||||
'geeignet',
|
||||
'empfiehl',
|
||||
'empfehl',
|
||||
'vergleich',
|
||||
'vergleichen',
|
||||
];
|
||||
}
|
||||
|
||||
public function getPricePattern(): string
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getPriceTerms(): array
|
||||
{
|
||||
$pattern = [
|
||||
return [
|
||||
'euro',
|
||||
'€',
|
||||
'eur',
|
||||
'teuer',
|
||||
'preis',
|
||||
'preise',
|
||||
'kosten',
|
||||
'kostet',
|
||||
];
|
||||
|
||||
return implode('|', $pattern);
|
||||
}
|
||||
|
||||
public function getColorPattern(): string
|
||||
public function getPricePattern(): string
|
||||
{
|
||||
$pattern = [
|
||||
return implode('|', $this->getPriceTerms());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getColorTerms(): array
|
||||
{
|
||||
return [
|
||||
'schwarz',
|
||||
'weiß',
|
||||
'weis',
|
||||
@@ -103,13 +90,19 @@ final class CommerceIntentConfig
|
||||
'orange',
|
||||
'braun',
|
||||
];
|
||||
|
||||
return implode('|', $pattern);
|
||||
}
|
||||
|
||||
public function getSizeTokenPattern(): string
|
||||
public function getColorPattern(): string
|
||||
{
|
||||
$pattern = [
|
||||
return implode('|', $this->getColorTerms());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSizeTokenTerms(): array
|
||||
{
|
||||
return [
|
||||
'xs',
|
||||
's',
|
||||
'm',
|
||||
@@ -118,18 +111,189 @@ final class CommerceIntentConfig
|
||||
'xxl',
|
||||
'xxxxl',
|
||||
];
|
||||
|
||||
return implode('|', $pattern);
|
||||
}
|
||||
|
||||
public function getSizePattern(): string
|
||||
public function getSizeTokenPattern(): string
|
||||
{
|
||||
$pattern = [
|
||||
return implode('|', $this->getSizeTokenTerms());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSizeTerms(): array
|
||||
{
|
||||
return [
|
||||
'größe',
|
||||
'groesse',
|
||||
'grösse',
|
||||
];
|
||||
}
|
||||
|
||||
return implode('|', $pattern);
|
||||
public function getSizePattern(): string
|
||||
{
|
||||
return implode('|', $this->getSizeTerms());
|
||||
}
|
||||
|
||||
public function getSizeExtractionPattern(): string
|
||||
{
|
||||
return '/\b(?:' . $this->getSizePattern() . ')\s*([a-z0-9.-]+)\b/u';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportDiagnosticPatterns(): array
|
||||
{
|
||||
return [
|
||||
'/\bfehler\b/u',
|
||||
'/\bfehlercode\b/u',
|
||||
'/\berror\b/u',
|
||||
'/\bstörung\b/u',
|
||||
'/\bstoerung\b/u',
|
||||
'/\balarm\b/u',
|
||||
'/\bstörungsmeldung\b/u',
|
||||
'/\bstoerungsmeldung\b/u',
|
||||
'/\bmeldung\b/u',
|
||||
'/\bwarnung\b/u',
|
||||
'/\bwarncode\b/u',
|
||||
'/\bcode\b/u',
|
||||
'/\bwas bedeutet\b/u',
|
||||
'/\bwarum\b/u',
|
||||
'/\bblinkt\b/u',
|
||||
'/\bzeigt\b/u',
|
||||
'/\bzeigt an\b/u',
|
||||
'/\bursache\b/u',
|
||||
'/\bdiagnose\b/u',
|
||||
'/\bservicefall\b/u',
|
||||
'/\bproblem\b/u',
|
||||
'/\bstörung beheben\b/u',
|
||||
'/\bstoerung beheben\b/u',
|
||||
'/\be\d{1,3}\b/u',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getExplicitCommerceIntentPatterns(): array
|
||||
{
|
||||
return [
|
||||
'/\bshop\b/u',
|
||||
'/\bpreis\b/u',
|
||||
'/\bkosten\b/u',
|
||||
'/\bkostet\b/u',
|
||||
'/\bkaufen\b/u',
|
||||
'/\bbestellen\b/u',
|
||||
'/\bprodukt\b/u',
|
||||
'/\bartikel\b/u',
|
||||
'/\bsku\b/u',
|
||||
'/\bonline\b/u',
|
||||
];
|
||||
}
|
||||
|
||||
public function getSkuLikePattern(): string
|
||||
{
|
||||
return '/\b\d{4,10}\b/u';
|
||||
}
|
||||
|
||||
public function getPriceValuePattern(): string
|
||||
{
|
||||
return '/\b\d+(?:[.,]\d+)?\s*(?:' . $this->getPricePattern() . ')\b/u';
|
||||
}
|
||||
|
||||
public function getSizeValuePattern(): string
|
||||
{
|
||||
return '/\b(?:' . $this->getSizePattern() . ')\s*[a-z0-9.-]+\b/u';
|
||||
}
|
||||
|
||||
public function getSizeTokenValuePattern(): string
|
||||
{
|
||||
return '/\b(?:' . $this->getSizeTokenPattern() . ')\b/u';
|
||||
}
|
||||
|
||||
public function getColorValuePattern(): string
|
||||
{
|
||||
return '/\b(?:' . $this->getColorPattern() . ')\b/u';
|
||||
}
|
||||
|
||||
public function getSupportOrDiagnosticSignalLabel(): string
|
||||
{
|
||||
return 'support_or_diagnostic';
|
||||
}
|
||||
|
||||
public function getSkuSignalLabel(): string
|
||||
{
|
||||
return 'sku';
|
||||
}
|
||||
|
||||
public function getPriceSignalLabel(): string
|
||||
{
|
||||
return 'price';
|
||||
}
|
||||
|
||||
public function getSizeSignalLabel(): string
|
||||
{
|
||||
return 'size';
|
||||
}
|
||||
|
||||
public function getSizeTokenSignalLabel(): string
|
||||
{
|
||||
return 'size_token';
|
||||
}
|
||||
|
||||
public function getColorSignalLabel(): string
|
||||
{
|
||||
return 'color';
|
||||
}
|
||||
|
||||
public function getAdvisorySignalPrefix(): string
|
||||
{
|
||||
return 'advisory:';
|
||||
}
|
||||
|
||||
public function getProductSearchMinScore(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getAdvisoryProductSearchMinScore(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getStrongSignalScore(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getSkuSignalScore(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getPriceSignalScore(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getSizeSignalScore(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getSizeTokenSignalScore(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getColorSignalScore(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getAdvisorySignalScore(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -4,28 +4,18 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
final readonly class CommerceQueryParserConfig
|
||||
final class CommerceQueryParserConfig
|
||||
{
|
||||
/**
|
||||
* @param string[] $knownBrands
|
||||
* @param string[] $phrasesToRemove
|
||||
* @param string[] $filterSearchTokensPattern
|
||||
* @param string[] $referenceOnlyTokens
|
||||
*/
|
||||
public function __construct(
|
||||
private array $knownBrands = [],
|
||||
private array $phrasesToRemove = [],
|
||||
private array $filterSearchTokensPattern = [],
|
||||
private array $referenceOnlyTokens = [],
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getKnownBrands(): array
|
||||
{
|
||||
return $this->knownBrands;
|
||||
return [
|
||||
'heyl',
|
||||
'horiba',
|
||||
'neomeris',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -33,62 +23,175 @@ final readonly class CommerceQueryParserConfig
|
||||
*/
|
||||
public function getPhrasesToRemove(): array
|
||||
{
|
||||
return $this->phrasesToRemove;
|
||||
return [
|
||||
'ich suche',
|
||||
'suche',
|
||||
'habt ihr',
|
||||
'gibt es',
|
||||
'zeige mir',
|
||||
'welches gerät',
|
||||
'welche gerät',
|
||||
'welches modell',
|
||||
'welches ist besser',
|
||||
'welches ist am besten',
|
||||
'alternative',
|
||||
'alternativen',
|
||||
];
|
||||
}
|
||||
|
||||
public function getHistoryContextPattern(): string
|
||||
{
|
||||
return 'auch|noch|nochmal|dazu|wie oben|wie zuvor|ähnlich|aehnlich|stattdessen|alternative|alternativ|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|und der preis|kosten|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
|
||||
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
|
||||
}
|
||||
|
||||
public function getReferenceFollowUpPattern(): string
|
||||
public function getHistoryContextValuePattern(): string
|
||||
{
|
||||
return 'preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
|
||||
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFilterSearchTokens(): array
|
||||
{
|
||||
return [
|
||||
'auch',
|
||||
'noch',
|
||||
'nochmal',
|
||||
'zusätzlich',
|
||||
'dazu',
|
||||
'davon',
|
||||
'stattdessen',
|
||||
'bitte',
|
||||
'gern',
|
||||
'gerne',
|
||||
'zeige',
|
||||
'zeig',
|
||||
'such',
|
||||
'suche',
|
||||
'finde',
|
||||
'find',
|
||||
'mir',
|
||||
'mal',
|
||||
'von',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Backward-compatible alias for older callers.
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFilterSearchTokensPattern(): array
|
||||
{
|
||||
return $this->filterSearchTokensPattern;
|
||||
return $this->getFilterSearchTokens();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getReferenceOnlyTokens(): array
|
||||
public function getNormalizationSearch(): array
|
||||
{
|
||||
if ($this->referenceOnlyTokens !== []) {
|
||||
return $this->referenceOnlyTokens;
|
||||
}
|
||||
return ['€'];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getNormalizationReplace(): array
|
||||
{
|
||||
return [' euro '];
|
||||
}
|
||||
|
||||
public function getPromptSanitizePattern(): string
|
||||
{
|
||||
return '/[^\p{L}\p{N}\s.,\-]/u';
|
||||
}
|
||||
|
||||
public function getWhitespaceCollapsePattern(): string
|
||||
{
|
||||
return '/\s+/u';
|
||||
}
|
||||
|
||||
public function getWhitespaceSplitPattern(): string
|
||||
{
|
||||
return '/\s+/u';
|
||||
}
|
||||
|
||||
public function getSearchTextTrimCharacters(): string
|
||||
{
|
||||
return " \t\n\r\0\x0B-.,";
|
||||
}
|
||||
|
||||
public function getMinSearchTokenLength(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getMinDirectProductTokenLength(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getHistoryQuestionPattern(): string
|
||||
{
|
||||
return '/^Question:\s*(.+)$/m';
|
||||
}
|
||||
|
||||
public function getPriceBetweenPattern(): string
|
||||
{
|
||||
return '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||
}
|
||||
|
||||
public function getPriceMaxPattern(): string
|
||||
{
|
||||
return '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||
}
|
||||
|
||||
public function getPriceMinPattern(): string
|
||||
{
|
||||
return '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array
|
||||
{
|
||||
return [
|
||||
'preis',
|
||||
'preise',
|
||||
'kosten',
|
||||
'kostet',
|
||||
'gerät',
|
||||
'geraet',
|
||||
'modell',
|
||||
'produkt',
|
||||
'artikel',
|
||||
'dafür',
|
||||
'dafuer',
|
||||
'dazu',
|
||||
'davon',
|
||||
'verfügbarkeit',
|
||||
'verfuegbarkeit',
|
||||
'shop',
|
||||
'link',
|
||||
'zum',
|
||||
'zur',
|
||||
'das',
|
||||
'dieses',
|
||||
'den',
|
||||
'dem',
|
||||
'bitte',
|
||||
'und',
|
||||
'/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u',
|
||||
'/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u',
|
||||
'/\b(?:' . $intentConfig->getPricePattern() . ')\b/u',
|
||||
];
|
||||
}
|
||||
|
||||
public function getDirectProductDigitPattern(): string
|
||||
{
|
||||
return '/\d/u';
|
||||
}
|
||||
|
||||
public function getDirectProductMaxTokens(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getModelLikePattern(): string
|
||||
{
|
||||
return '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u';
|
||||
}
|
||||
|
||||
public function getAccessoryLikePattern(): string
|
||||
{
|
||||
return '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u';
|
||||
}
|
||||
|
||||
public function buildExactTokenRemovalPattern(string $token): string
|
||||
{
|
||||
return '/\b' . preg_quote($token, '/') . '\b/u';
|
||||
}
|
||||
|
||||
public function buildBrandPartOfModelPattern(string $brand): string
|
||||
{
|
||||
return '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u';
|
||||
}
|
||||
}
|
||||
@@ -34,7 +34,7 @@ final class NdjsonHybridRetrieverConfig
|
||||
* - the system now has more safeguards:
|
||||
* lexical cross-signals, scoped retrieval, title/meta boost, selection rules
|
||||
*/
|
||||
public const VECTOR_SCORE_THRESHOLD = 0.82;
|
||||
public const VECTOR_SCORE_THRESHOLD = 0.83;
|
||||
|
||||
/**
|
||||
* Lower safety boundary for dynamic threshold adjustments.
|
||||
|
||||
@@ -1,97 +1,459 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
class PromptBuilderConfig{
|
||||
/**
|
||||
* Approximate character-to-token ratio for conservative prompt budgeting.
|
||||
*/
|
||||
public const CHARS_PER_TOKEN = 4;
|
||||
final class PromptBuilderConfig
|
||||
{
|
||||
public function getCharsPerToken(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getHistoryPaddingChars(): int
|
||||
{
|
||||
return 400;
|
||||
}
|
||||
|
||||
public function getOutputReserveRatio(): float
|
||||
{
|
||||
return 0.25;
|
||||
}
|
||||
|
||||
public function getOutputReserveMinTokens(): int
|
||||
{
|
||||
return 768;
|
||||
}
|
||||
|
||||
public function getOutputReserveMaxTokens(): int
|
||||
{
|
||||
return 6000;
|
||||
}
|
||||
|
||||
public function getSafetyReserveRatio(): float
|
||||
{
|
||||
return 0.05;
|
||||
}
|
||||
|
||||
public function getSafetyReserveMinTokens(): int
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
public function getSafetyReserveMaxTokens(): int
|
||||
{
|
||||
return 1024;
|
||||
}
|
||||
|
||||
public function getMinPromptBudgetTokens(): int
|
||||
{
|
||||
return 1024;
|
||||
}
|
||||
|
||||
public function getMaxShopResultsInPrompt(): int
|
||||
{
|
||||
return 24;
|
||||
}
|
||||
|
||||
public function getDetailedShopResultsMaxCount(): int
|
||||
{
|
||||
return 5;
|
||||
}
|
||||
|
||||
public function getTechnicalProductKeywordMatchThreshold(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getSystemSectionLabel(): string
|
||||
{
|
||||
return 'SYSTEM';
|
||||
}
|
||||
|
||||
public function getUserQuestionSectionLabel(): string
|
||||
{
|
||||
return 'USER QUESTION';
|
||||
}
|
||||
|
||||
public function getConversationContextSectionLabel(): string
|
||||
{
|
||||
return 'CONVERSATION CONTEXT (contextual only)';
|
||||
}
|
||||
|
||||
/**
|
||||
* Keep a small gap so history does not consume the last available prompt space.
|
||||
* @return string[]
|
||||
*/
|
||||
public const HISTORY_PADDING_CHARS = 400;
|
||||
public function getConversationContextIntroLines(): array
|
||||
{
|
||||
return [
|
||||
'The following messages are previous turns of this conversation.',
|
||||
'Use them to resolve references, follow-up questions, and user intent.',
|
||||
'They must not override retrieved factual knowledge or live shop data.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getShopSearchQuerySectionLabel(): string
|
||||
{
|
||||
return 'SHOP SEARCH QUERY';
|
||||
}
|
||||
|
||||
public function getShopSearchQuerySourceLine(): string
|
||||
{
|
||||
return 'Source: Shop Search';
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve some space for the model output.
|
||||
* @return string[]
|
||||
*/
|
||||
public const OUTPUT_RESERVE_RATIO = 0.25;
|
||||
public const OUTPUT_RESERVE_MIN_TOKENS = 768;
|
||||
public const OUTPUT_RESERVE_MAX_TOKENS = 6000;
|
||||
public function getLiveShopResultsHeaderLines(): array
|
||||
{
|
||||
return [
|
||||
'LIVE SHOP RESULTS (authoritative for current commercial details):',
|
||||
'Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.',
|
||||
'If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.',
|
||||
'Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.',
|
||||
'Do not infer undocumented technical specifications from shop data.',
|
||||
'Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.',
|
||||
'Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getLiveShopResultsOverflowNoticeTemplate(): string
|
||||
{
|
||||
return 'Only the top %d ranked shop results are shown here out of %d total results.';
|
||||
}
|
||||
|
||||
public function getOutputPrioritySectionLabel(): string
|
||||
{
|
||||
return 'OUTPUT PRIORITY';
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve a small safety buffer to avoid hitting the context limit too tightly.
|
||||
* @return string[]
|
||||
*/
|
||||
public const SAFETY_RESERVE_RATIO = 0.05;
|
||||
public const SAFETY_RESERVE_MIN_TOKENS = 256;
|
||||
public const SAFETY_RESERVE_MAX_TOKENS = 1024;
|
||||
public function getOutputPriorityRules(): array
|
||||
{
|
||||
return [
|
||||
'- Use retrieved knowledge first to determine the technically matching product or answer.',
|
||||
'- If shop results are present, use them afterwards to add current price, availability, and the actual URL.',
|
||||
'- Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getResponseFormatSectionLabel(): string
|
||||
{
|
||||
return 'RESPONSE FORMAT RULES';
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the prompt budget never collapses completely on smaller models.
|
||||
* @return string[]
|
||||
*/
|
||||
public const MIN_PROMPT_BUDGET_TOKENS = 1024;
|
||||
public function getResponseFormatBaseRules(): array
|
||||
{
|
||||
return [
|
||||
'- Keep normal spacing between all words. Never fuse words together.',
|
||||
'- Use short, clean paragraphs or short labeled sections.',
|
||||
'- Do not use persuasive or promotional wording.',
|
||||
'- Do not repeat the same fact in slightly different wording.',
|
||||
'- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.',
|
||||
'- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.',
|
||||
'- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.',
|
||||
'- Do not combine technical identity from one source with commercial fields from a different product.',
|
||||
'- Product number, price, availability, and URL must belong to the same explicitly grounded product.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Limit how many ranked shop results are passed into the final prompt.
|
||||
* The shop search may return many candidates, but the LLM should only see
|
||||
* the most relevant top subset after local reranking.
|
||||
* @return string[]
|
||||
*/
|
||||
public const MAX_SHOP_RESULTS_IN_PROMPT = 24;
|
||||
public function getResponseFormatWithShopRules(): array
|
||||
{
|
||||
return [
|
||||
'- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts.',
|
||||
'- Keep price, availability, and URL on separate lines when they are present.',
|
||||
'- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.',
|
||||
'- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.',
|
||||
'- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Technical product prompts should be answered like documentation,
|
||||
* not like sales copy.
|
||||
* @return string[]
|
||||
*/
|
||||
public const TECHNICAL_PRODUCT_KEYWORDS = [
|
||||
'technisch',
|
||||
'technical',
|
||||
'produkt',
|
||||
'product',
|
||||
'gerät',
|
||||
'device',
|
||||
'modell',
|
||||
'model',
|
||||
'messprinzip',
|
||||
'measurement principle',
|
||||
'schnittstelle',
|
||||
'interface',
|
||||
'relais',
|
||||
'relay',
|
||||
'indikator',
|
||||
'indicator',
|
||||
'spannung',
|
||||
'voltage',
|
||||
'strom',
|
||||
'current',
|
||||
'druck',
|
||||
'pressure',
|
||||
'temperatur',
|
||||
'temperature',
|
||||
'schutzart',
|
||||
'ip',
|
||||
'fehlercode',
|
||||
'error code',
|
||||
'wasserhärte',
|
||||
'hardness',
|
||||
'testomat',
|
||||
'chlor',
|
||||
'chlormessung',
|
||||
];
|
||||
public function getResponseFormatWithoutShopRules(): array
|
||||
{
|
||||
return [
|
||||
'- If no shop results are present, do not compensate by inventing external products or external manufacturers.',
|
||||
];
|
||||
}
|
||||
|
||||
public const ACCESSORY_REQUEST_KEYWORDS = [
|
||||
'passend',
|
||||
'passende',
|
||||
'passendes',
|
||||
'zubehör',
|
||||
'zubehor',
|
||||
'dazu',
|
||||
'indikator',
|
||||
'reagenz',
|
||||
'kit',
|
||||
'set',
|
||||
'zusatz',
|
||||
'ergänzung',
|
||||
'ergaenzung',
|
||||
];
|
||||
}
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getResponseFormatTechnicalRules(): array
|
||||
{
|
||||
return [
|
||||
'- Write like technical documentation: precise, neutral, and source-close.',
|
||||
'- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.',
|
||||
'- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getResponseFormatAccessoryRules(): array
|
||||
{
|
||||
return [
|
||||
'- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.',
|
||||
'- The main device must come first. The accessory must not replace the main device.',
|
||||
'- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.',
|
||||
'- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getLanguageRulesSectionLabel(): string
|
||||
{
|
||||
return 'LANGUAGE RULES';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getLanguageRules(): array
|
||||
{
|
||||
return [
|
||||
'- Answer only in the same language as the user question.',
|
||||
'- All headings, labels, notes, and structural elements must be in the same language as the user question.',
|
||||
'- Do not switch languages unless the user does.',
|
||||
'- If headings are used, write them in the user\'s language.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getFactGroundingRulesSectionLabel(): string
|
||||
{
|
||||
return 'FACT GROUNDING RULES';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFactGroundingBaseRules(): array
|
||||
{
|
||||
return [
|
||||
'- State only facts that are explicitly present in the provided sources.',
|
||||
'- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.',
|
||||
'- Do not invent missing values.',
|
||||
'- Do not replace missing values with estimates, defaults, or typical industry assumptions.',
|
||||
'- Do not claim that information is missing if it appears in the provided sources.',
|
||||
'- Do not compare with other products unless those products are also present in the provided sources.',
|
||||
'- Prefer source-faithful wording over persuasive wording.',
|
||||
'- Avoid marketing language such as \'ideal\', \'perfect\', \'unverzichtbar\', \'entscheidend\', \'optimal\', \'kosteneffizient\', \'prozesssicher\', or \'state-of-the-art\'.',
|
||||
'- Clearly separate explicit facts from inferences.',
|
||||
'- If a conclusion goes beyond the source wording, label it exactly as \'Inference:\'.',
|
||||
'- If a sentence cannot be traced to the provided sources, do not write it.',
|
||||
'- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.',
|
||||
'- If the sources do not identify a suitable product, do not invent one.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFactGroundingWithShopRules(): array
|
||||
{
|
||||
return [
|
||||
'- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.',
|
||||
'- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.',
|
||||
'- When shop results are present and relevant, include current price and the actual URL if available.',
|
||||
'- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.',
|
||||
'- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.',
|
||||
'- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.',
|
||||
'- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.',
|
||||
'- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.',
|
||||
'- If the shop match is ambiguous, keep the technical identification and commercial details separate.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFactGroundingWithoutShopRules(): array
|
||||
{
|
||||
return [
|
||||
'- Use retrieved knowledge as authoritative for factual answers.',
|
||||
'- If no shop results are present, do not compensate with external recommendations or external product suggestions.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFactGroundingTechnicalRules(): array
|
||||
{
|
||||
return [
|
||||
'- For technical product questions, answer primarily with explicitly stated facts.',
|
||||
'- Behave like a technical documentation assistant, not like a sales advisor.',
|
||||
'- Keep interpretations minimal and do not generalize application areas beyond the provided sources.',
|
||||
'- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.',
|
||||
'- Do not translate technical facts into business value unless the source explicitly does so.',
|
||||
'- Do not recommend process changes unless explicitly present in the source.',
|
||||
'- Do not use persuasive summaries or advisory conclusions.',
|
||||
'- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.',
|
||||
'- Use neutral engineering language.',
|
||||
'- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.',
|
||||
'- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.',
|
||||
'- If the source lists application areas, repeat only those areas and do not broaden them.',
|
||||
'- If the source names an indicator and threshold, reproduce that exactly without extrapolation.',
|
||||
'- If the source states only a threshold function, do not expand it into broader control logic.',
|
||||
'- If a detail is not explicitly stated in the provided sources, say so plainly.',
|
||||
'- Prefer short, source-close sentences over explanatory expansion.',
|
||||
'- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getRetrievedKnowledgeSectionLabel(): string
|
||||
{
|
||||
return 'RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation)';
|
||||
}
|
||||
|
||||
public function getRetrievedKnowledgeSourceLine(): string
|
||||
{
|
||||
return 'Source: Documents';
|
||||
}
|
||||
|
||||
public function getUrlContentSectionLabel(): string
|
||||
{
|
||||
return 'CONTENT FROM URL (authoritative if user-provided)';
|
||||
}
|
||||
|
||||
public function getUrlContentSourceLine(): string
|
||||
{
|
||||
return 'Source: URL';
|
||||
}
|
||||
|
||||
public function getShopProductNumberLabel(): string
|
||||
{
|
||||
return 'Product number';
|
||||
}
|
||||
|
||||
public function getShopManufacturerLabel(): string
|
||||
{
|
||||
return 'Manufacturer';
|
||||
}
|
||||
|
||||
public function getShopPriceLabel(): string
|
||||
{
|
||||
return 'Price';
|
||||
}
|
||||
|
||||
public function getShopAvailabilityLabel(): string
|
||||
{
|
||||
return 'Available';
|
||||
}
|
||||
|
||||
public function getShopAvailabilityYesLabel(): string
|
||||
{
|
||||
return 'yes';
|
||||
}
|
||||
|
||||
public function getShopAvailabilityNoLabel(): string
|
||||
{
|
||||
return 'no';
|
||||
}
|
||||
|
||||
public function getShopHighlightPrefix(): string
|
||||
{
|
||||
return '- ';
|
||||
}
|
||||
|
||||
public function getShopUrlLabel(): string
|
||||
{
|
||||
return 'URL';
|
||||
}
|
||||
|
||||
public function getShopProductImageLabel(): string
|
||||
{
|
||||
return 'Product image';
|
||||
}
|
||||
|
||||
public function getShopDescriptionLabel(): string
|
||||
{
|
||||
return 'Description';
|
||||
}
|
||||
|
||||
public function getShopMetaInformationLabel(): string
|
||||
{
|
||||
return 'Meta information';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getTechnicalProductKeywords(): array
|
||||
{
|
||||
return [
|
||||
'technisch',
|
||||
'technical',
|
||||
'produkt',
|
||||
'product',
|
||||
'gerät',
|
||||
'device',
|
||||
'modell',
|
||||
'model',
|
||||
'messprinzip',
|
||||
'measurement principle',
|
||||
'schnittstelle',
|
||||
'interface',
|
||||
'relais',
|
||||
'relay',
|
||||
'indikator',
|
||||
'indicator',
|
||||
'spannung',
|
||||
'voltage',
|
||||
'strom',
|
||||
'current',
|
||||
'druck',
|
||||
'pressure',
|
||||
'temperatur',
|
||||
'temperature',
|
||||
'schutzart',
|
||||
'ip',
|
||||
'fehlercode',
|
||||
'error code',
|
||||
'wasserhärte',
|
||||
'hardness',
|
||||
'testomat',
|
||||
'chlor',
|
||||
'chlormessung',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getAccessoryRequestKeywords(): array
|
||||
{
|
||||
return [
|
||||
'passend',
|
||||
'passende',
|
||||
'passendes',
|
||||
'zubehör',
|
||||
'zubehor',
|
||||
'dazu',
|
||||
'indikator',
|
||||
'reagenz',
|
||||
'kit',
|
||||
'set',
|
||||
'zusatz',
|
||||
'ergänzung',
|
||||
'ergaenzung',
|
||||
];
|
||||
}
|
||||
|
||||
public function getTechnicalProductModelPattern(): string
|
||||
{
|
||||
return '/\b[\p{L}]{2,}\s?\d{2,5}\b/u';
|
||||
}
|
||||
}
|
||||
204
src/Config/SearchRepairConfig.php
Normal file
204
src/Config/SearchRepairConfig.php
Normal file
@@ -0,0 +1,204 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
final class SearchRepairConfig
|
||||
{
|
||||
public function isEnabled(): bool
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
public function getMaxRepairQueries(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getMinPrimaryResultsWithoutRepair(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getTopProductLogLimit(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getModelCandidatePattern(): string
|
||||
{
|
||||
return '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u';
|
||||
}
|
||||
|
||||
public function getAccessoryCandidatePattern(): string
|
||||
{
|
||||
return '/\b((?:' . implode('|', $this->getAccessoryCandidateTerms()) . ')\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu';
|
||||
}
|
||||
|
||||
public function getAccessoryOrBundlePattern(): string
|
||||
{
|
||||
return '/\b(' . implode('|', $this->getAccessoryOrBundleTerms()) . ')\b/iu';
|
||||
}
|
||||
|
||||
public function getModelLikePattern(): string
|
||||
{
|
||||
return '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u';
|
||||
}
|
||||
|
||||
public function getSpecificityBoostPattern(): string
|
||||
{
|
||||
return '/\b(?:' . implode('|', $this->getSpecificityBoostTerms()) . ')\b/iu';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getGenericCandidateTokens(): array
|
||||
{
|
||||
return [
|
||||
'wasser',
|
||||
'messgerät',
|
||||
'messgeraet',
|
||||
'produkt',
|
||||
'geräte',
|
||||
'geraete',
|
||||
'gerät',
|
||||
'geraet',
|
||||
'resthärte',
|
||||
'resthaerte',
|
||||
'preis',
|
||||
'infos',
|
||||
'wissen',
|
||||
];
|
||||
}
|
||||
|
||||
public function getSanitizeTrimCharacters(): string
|
||||
{
|
||||
return " \t\n\r\0\x0B\"'`.,;:-";
|
||||
}
|
||||
|
||||
public function getContainsDigitPattern(): string
|
||||
{
|
||||
return '/\d/u';
|
||||
}
|
||||
|
||||
public function getWhitespaceCollapsePattern(): string
|
||||
{
|
||||
return '/\s+/u';
|
||||
}
|
||||
|
||||
public function getTokenizeCleanupPattern(): string
|
||||
{
|
||||
return '/[^\p{L}\p{N}\s\-]+/u';
|
||||
}
|
||||
|
||||
public function getProductKeySeparator(): string
|
||||
{
|
||||
return '|';
|
||||
}
|
||||
|
||||
public function getCandidateDigitScore(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getCandidateWordCountCap(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getSpecificityBoostScore(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getPrimaryQueryOverlapThreshold(): float
|
||||
{
|
||||
return 0.9;
|
||||
}
|
||||
|
||||
public function getPromptMatchWeight(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getPrimaryQueryMatchWeight(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getRepairSignalMatchWeight(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getPrimaryResultOrderBonus(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getTokenIntersectionScore(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getNumericTokenMatchScore(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getAccessoryCandidateTerms(): array
|
||||
{
|
||||
return [
|
||||
'indikator',
|
||||
'indicator',
|
||||
'reagenz',
|
||||
'reagent',
|
||||
'kit',
|
||||
'set',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getAccessoryOrBundleTerms(): array
|
||||
{
|
||||
return [
|
||||
'passend',
|
||||
'passende',
|
||||
'zubehor',
|
||||
'zubehör',
|
||||
'dazu',
|
||||
'zusatz',
|
||||
'erganzung',
|
||||
'ergänzung',
|
||||
'indikator',
|
||||
'reagenz',
|
||||
'kit',
|
||||
'set',
|
||||
'auch\s+das',
|
||||
'mit\s+preis\s+und\s+allen\s+infos',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSpecificityBoostTerms(): array
|
||||
{
|
||||
return [
|
||||
'indikator',
|
||||
'indicator',
|
||||
'testomat',
|
||||
'tritromat',
|
||||
'titromat',
|
||||
'reagenz',
|
||||
'reagent',
|
||||
];
|
||||
}
|
||||
}
|
||||
40
src/Config/StopWordsConfig.php
Normal file
40
src/Config/StopWordsConfig.php
Normal file
@@ -0,0 +1,40 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
final class StopWordsConfig
|
||||
{
|
||||
/**
|
||||
* Retrieval-optimized stop-word list.
|
||||
*
|
||||
* Important:
|
||||
* - keep negations
|
||||
* - keep question words
|
||||
* - keep domain terms
|
||||
* - remove only structural filler words
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
public function getStopWords(): array
|
||||
{
|
||||
return [
|
||||
'mit',
|
||||
'der', 'die', 'das',
|
||||
'ein', 'eine', 'einer', 'eines',
|
||||
'den', 'dem', 'des',
|
||||
'und', 'oder', 'aber', 'sowie',
|
||||
'ich', 'du', 'er', 'sie', 'es',
|
||||
'wir', 'ihr',
|
||||
'halt', 'eben', 'auch', 'schon',
|
||||
'noch', 'mal', 'bitte', 'danke',
|
||||
'also', 'nun', 'tja',
|
||||
'dann', 'danach', 'davor',
|
||||
'hier', 'dort',
|
||||
'heute', 'gestern', 'morgen',
|
||||
'könnte', 'kannst', 'kann',
|
||||
'würde', 'würdest', 'würden',
|
||||
];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user