move intent an config value into config files

This commit is contained in:
team2
2026-04-23 21:49:54 +02:00
parent 87417febf4
commit fce44e971d
17 changed files with 1937 additions and 1133 deletions

View File

@@ -6,50 +6,249 @@ namespace App\Config;
final class AgentRunnerConfig
{
public function getCommerceHistoryBudgetChars(): int
{
return 1000;
}
public function getProductSearchKnowledgeChunkLimit(): int
{
return 2;
}
public function getAdvisoryProductSearchKnowledgeChunkLimit(): int
{
return 3;
}
public function getOptimizedShopQueryPrefixPattern(): string
{
return '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu';
}
public function getOptimizedShopQueryTrimCharacters(): string
{
return " \t\n\r\0\x0B\"'`";
}
public function getEmptyPromptMessage(): string
{
return '❌ Empty prompt.';
}
public function getAnalyzeRequestMessage(): string
{
return 'Ich analysiere deine Anfrage...';
}
public function getCheckInternetSourcesMessage(): string
{
return 'Ich prüfe auf Internetquellen...';
}
public function getRetrieveKnowledgeMessage(): string
{
return 'Ich hole relevante Daten aus meinem RAG-Wissen...';
}
public function getOptimizeSearchMessage(): string
{
return 'Ich optimiere die Recherche...';
}
public function getFetchSearchDataMessageTemplate(): string
{
return 'Ich rufe Recherchedaten ab (type: %s)';
}
public function getAnalyzeAllInformationMessage(): string
{
return 'Ich analysiere alle Informationen...';
}
public function getThinkingWhileStreamingMessage(): string
{
return 'Denke nach...';
}
public function getNoLlmDataReceivedMessage(): string
{
return '❌ Es wurden keine Daten vom LLM empfangen.';
}
public function getGenericInternalErrorMessage(): string
{
return '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.';
}
public function getDebugInternalErrorPrefix(): string
{
return '❌ Interner Fehler: ';
}
public function getExternalUrlSourceLabel(): string
{
return 'Externe URL';
}
public function getRagKnowledgeSourceLabel(): string
{
return 'RAG Wissen';
}
public function getConversationHistorySourceLabel(): string
{
return 'Chatverlauf';
}
public function getShopSystemSourceLabel(): string
{
return 'Shopsystem';
}
public function getExtendedShopSearchSourceLabel(): string
{
return 'Erweiterte Shopsuche';
}
public function getUsedSourcesPrefix(): string
{
return 'Genutzte Quellen: ';
}
public function getSourcesPrefix(): string
{
return 'Quellen: ';
}
public function getSourceBadgeHtmlTemplate(): string
{
return '<span class="badge bg-info text-black">%s</span>';
}
public function getErrorHtmlTemplate(): string
{
return '<span class="text-danger">%s</span>' . "\n<hr>\n";
}
public function getThinkHtmlTemplate(): string
{
return '<span class="text-info think">%s</span>' . "\n";
}
public function getInfoHtmlTemplate(): string
{
return "\n\n" . '<span class="text-info fw-bolder">%s</span>' . "\n";
}
public function getDebugHtmlTemplate(): string
{
return "\n\nDEBUG: <code>%s</code>\n";
}
public function getShopPrompt(string $prompt, string $commerceHistoryContext = ''): string
{
$historyBlock = '';
if (trim($commerceHistoryContext) !== '') {
$historyBlock = '
RECENT CONVERSATION CONTEXT:
' . $commerceHistoryContext . '
Additional rules for conversation context:
- The current user input has highest priority.
- Use the recent conversation context only to resolve omitted references.
- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.
- Do not revive older products unless the current user input clearly refers to them.
- If the current input starts a new topic, ignore older product context.
- Prefer the most recent product reference over older ones.
';
$historyBlock = $this->buildHistoryBlock($commerceHistoryContext);
}
return '
Generate a short search query for Shopware 6 from the following user input text.
Rules:
- Output only the final search query.
- Always convert relevant search terms to their singular form.
- No introduction, no explanation, no quotation marks.
- Use only shop-relevant search terms from the user input for a shop search.
- Maximum 6 search terms, preferably fewer.
- Remove filler words, polite phrases, and irrelevant words.
- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.
- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).
- Separate terms using spaces only.
- If a relevant product name is present, it must be placed at the beginning of the final search query.
- Try to always identify all products mentioned in the user input text, even in long prompts.
- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.
- If the current user input is vague or referential, use the recent conversation context only as support.
- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".
Output format:
Keyword1 Keyword2 Keyword3
' . $historyBlock . '
CURRENT USER INPUT:
' . $prompt . '
';
return $this->implodePromptBlocks([
$this->getShopPromptIntro(),
$this->buildRulesBlock($this->getShopPromptRules()),
$this->getShopPromptOutputFormatBlock(),
$historyBlock,
$this->getCurrentUserInputLabel() . ':',
trim($prompt),
]);
}
private function buildHistoryBlock(string $commerceHistoryContext): string
{
return $this->implodePromptBlocks([
$this->getRecentConversationContextLabel() . ':',
trim($commerceHistoryContext),
$this->buildRulesBlock($this->getConversationContextRules(), 'Additional rules for conversation context:'),
]);
}
/**
* @return string[]
*/
public function getShopPromptRules(): array
{
return [
'- Output only the final search query.',
'- Always convert relevant search terms to their singular form.',
'- No introduction, no explanation, no quotation marks.',
'- Use only shop-relevant search terms from the user input for a shop search.',
'- Maximum 6 search terms, preferably fewer.',
'- Remove filler words, polite phrases, and irrelevant words.',
'- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.',
'- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).',
'- Separate terms using spaces only.',
'- If a relevant product name is present, it must be placed at the beginning of the final search query.',
'- Try to always identify all products mentioned in the user input text, even in long prompts.',
'- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.',
'- If the current user input is vague or referential, use the recent conversation context only as support.',
'- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".',
];
}
/**
* @return string[]
*/
public function getConversationContextRules(): array
{
return [
'- The current user input has highest priority.',
'- Use the recent conversation context only to resolve omitted references.',
'- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.',
'- Do not revive older products unless the current user input clearly refers to them.',
'- If the current input starts a new topic, ignore older product context.',
'- Prefer the most recent product reference over older ones.',
];
}
public function getShopPromptIntro(): string
{
return 'Generate a short search query for Shopware 6 from the following user input text.';
}
public function getShopPromptOutputFormatBlock(): string
{
return "Output format:\nKeyword1 Keyword2 Keyword3";
}
public function getRecentConversationContextLabel(): string
{
return 'RECENT CONVERSATION CONTEXT';
}
public function getCurrentUserInputLabel(): string
{
return 'CURRENT USER INPUT';
}
private function buildRulesBlock(array $rules, string $headline = 'Rules:'): string
{
return $headline . "\n" . implode("\n", $rules);
}
/**
* @param string[] $blocks
*/
private function implodePromptBlocks(array $blocks): string
{
$normalized = array_values(array_filter(
array_map(
static fn(string $block): string => trim($block),
$blocks
),
static fn(string $block): bool => $block !== ''
));
return implode("\n\n", $normalized);
}
}

View File

@@ -6,57 +6,38 @@ namespace App\Config;
final class CommerceIntentConfig
{
/**
* @return string[]
*/
public function getStrongSignalsList(): array
{
return [
'shop',
'alle',
'preis',
'preise',
'kunde',
'online',
'produkt',
'produkte',
'artikel',
'sku',
'kaufen',
'kostet',
'kosten',
'verfügbarkeit',
'verfuegbarkeit',
// Search / product discovery signals
'suche',
'such',
'finde',
'finden',
'welche',
'welcher',
'welches',
// Device / system signals
'analysegerät',
'analysegeraet',
'analysegeräte',
'analysegeraete',
'messgerät',
'messgeraet',
'messgeräte',
'messgeraete',
'gerät',
'geraet',
'geräte',
'geraete',
'analysator',
'analysatoren',
'analyzer',
'system',
'systeme',
'anlage',
'anlagen',
];
}
/**
* @return string[]
*/
public function getAdvisorySignals(): array
{
return [
@@ -67,30 +48,36 @@ final class CommerceIntentConfig
'geeignet',
'empfiehl',
'empfehl',
'vergleich',
'vergleichen',
];
}
public function getPricePattern(): string
/**
* @return string[]
*/
public function getPriceTerms(): array
{
$pattern = [
return [
'euro',
'€',
'eur',
'teuer',
'preis',
'preise',
'kosten',
'kostet',
];
return implode('|', $pattern);
}
public function getColorPattern(): string
public function getPricePattern(): string
{
$pattern = [
return implode('|', $this->getPriceTerms());
}
/**
* @return string[]
*/
public function getColorTerms(): array
{
return [
'schwarz',
'weiß',
'weis',
@@ -103,13 +90,19 @@ final class CommerceIntentConfig
'orange',
'braun',
];
return implode('|', $pattern);
}
public function getSizeTokenPattern(): string
public function getColorPattern(): string
{
$pattern = [
return implode('|', $this->getColorTerms());
}
/**
* @return string[]
*/
public function getSizeTokenTerms(): array
{
return [
'xs',
's',
'm',
@@ -118,18 +111,189 @@ final class CommerceIntentConfig
'xxl',
'xxxxl',
];
return implode('|', $pattern);
}
public function getSizePattern(): string
public function getSizeTokenPattern(): string
{
$pattern = [
return implode('|', $this->getSizeTokenTerms());
}
/**
* @return string[]
*/
public function getSizeTerms(): array
{
return [
'größe',
'groesse',
'grösse',
];
}
return implode('|', $pattern);
public function getSizePattern(): string
{
return implode('|', $this->getSizeTerms());
}
public function getSizeExtractionPattern(): string
{
return '/\b(?:' . $this->getSizePattern() . ')\s*([a-z0-9.-]+)\b/u';
}
/**
* @return string[]
*/
public function getSupportDiagnosticPatterns(): array
{
return [
'/\bfehler\b/u',
'/\bfehlercode\b/u',
'/\berror\b/u',
'/\bstörung\b/u',
'/\bstoerung\b/u',
'/\balarm\b/u',
'/\bstörungsmeldung\b/u',
'/\bstoerungsmeldung\b/u',
'/\bmeldung\b/u',
'/\bwarnung\b/u',
'/\bwarncode\b/u',
'/\bcode\b/u',
'/\bwas bedeutet\b/u',
'/\bwarum\b/u',
'/\bblinkt\b/u',
'/\bzeigt\b/u',
'/\bzeigt an\b/u',
'/\bursache\b/u',
'/\bdiagnose\b/u',
'/\bservicefall\b/u',
'/\bproblem\b/u',
'/\bstörung beheben\b/u',
'/\bstoerung beheben\b/u',
'/\be\d{1,3}\b/u',
];
}
/**
* @return string[]
*/
public function getExplicitCommerceIntentPatterns(): array
{
return [
'/\bshop\b/u',
'/\bpreis\b/u',
'/\bkosten\b/u',
'/\bkostet\b/u',
'/\bkaufen\b/u',
'/\bbestellen\b/u',
'/\bprodukt\b/u',
'/\bartikel\b/u',
'/\bsku\b/u',
'/\bonline\b/u',
];
}
public function getSkuLikePattern(): string
{
return '/\b\d{4,10}\b/u';
}
public function getPriceValuePattern(): string
{
return '/\b\d+(?:[.,]\d+)?\s*(?:' . $this->getPricePattern() . ')\b/u';
}
public function getSizeValuePattern(): string
{
return '/\b(?:' . $this->getSizePattern() . ')\s*[a-z0-9.-]+\b/u';
}
public function getSizeTokenValuePattern(): string
{
return '/\b(?:' . $this->getSizeTokenPattern() . ')\b/u';
}
public function getColorValuePattern(): string
{
return '/\b(?:' . $this->getColorPattern() . ')\b/u';
}
public function getSupportOrDiagnosticSignalLabel(): string
{
return 'support_or_diagnostic';
}
public function getSkuSignalLabel(): string
{
return 'sku';
}
public function getPriceSignalLabel(): string
{
return 'price';
}
public function getSizeSignalLabel(): string
{
return 'size';
}
public function getSizeTokenSignalLabel(): string
{
return 'size_token';
}
public function getColorSignalLabel(): string
{
return 'color';
}
public function getAdvisorySignalPrefix(): string
{
return 'advisory:';
}
public function getProductSearchMinScore(): int
{
return 3;
}
public function getAdvisoryProductSearchMinScore(): int
{
return 2;
}
public function getStrongSignalScore(): int
{
return 3;
}
public function getSkuSignalScore(): int
{
return 2;
}
public function getPriceSignalScore(): int
{
return 2;
}
public function getSizeSignalScore(): int
{
return 2;
}
public function getSizeTokenSignalScore(): int
{
return 1;
}
public function getColorSignalScore(): int
{
return 1;
}
public function getAdvisorySignalScore(): int
{
return 1;
}
}

View File

@@ -4,28 +4,18 @@ declare(strict_types=1);
namespace App\Config;
final readonly class CommerceQueryParserConfig
final class CommerceQueryParserConfig
{
/**
* @param string[] $knownBrands
* @param string[] $phrasesToRemove
* @param string[] $filterSearchTokensPattern
* @param string[] $referenceOnlyTokens
*/
public function __construct(
private array $knownBrands = [],
private array $phrasesToRemove = [],
private array $filterSearchTokensPattern = [],
private array $referenceOnlyTokens = [],
) {
}
/**
* @return string[]
*/
public function getKnownBrands(): array
{
return $this->knownBrands;
return [
'heyl',
'horiba',
'neomeris',
];
}
/**
@@ -33,62 +23,175 @@ final readonly class CommerceQueryParserConfig
*/
public function getPhrasesToRemove(): array
{
return $this->phrasesToRemove;
return [
'ich suche',
'suche',
'habt ihr',
'gibt es',
'zeige mir',
'welches gerät',
'welche gerät',
'welches modell',
'welches ist besser',
'welches ist am besten',
'alternative',
'alternativen',
];
}
public function getHistoryContextPattern(): string
{
return 'auch|noch|nochmal|dazu|wie oben|wie zuvor|ähnlich|aehnlich|stattdessen|alternative|alternativ|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|und der preis|kosten|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
}
public function getReferenceFollowUpPattern(): string
public function getHistoryContextValuePattern(): string
{
return 'preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
}
/**
* @return string[]
*/
public function getFilterSearchTokens(): array
{
return [
'auch',
'noch',
'nochmal',
'zusätzlich',
'dazu',
'davon',
'stattdessen',
'bitte',
'gern',
'gerne',
'zeige',
'zeig',
'such',
'suche',
'finde',
'find',
'mir',
'mal',
'von',
];
}
/**
* Backward-compatible alias for older callers.
*
* @return string[]
*/
public function getFilterSearchTokensPattern(): array
{
return $this->filterSearchTokensPattern;
return $this->getFilterSearchTokens();
}
/**
* @return string[]
*/
public function getReferenceOnlyTokens(): array
public function getNormalizationSearch(): array
{
if ($this->referenceOnlyTokens !== []) {
return $this->referenceOnlyTokens;
}
return ['€'];
}
/**
* @return string[]
*/
public function getNormalizationReplace(): array
{
return [' euro '];
}
public function getPromptSanitizePattern(): string
{
return '/[^\p{L}\p{N}\s.,\-]/u';
}
public function getWhitespaceCollapsePattern(): string
{
return '/\s+/u';
}
public function getWhitespaceSplitPattern(): string
{
return '/\s+/u';
}
public function getSearchTextTrimCharacters(): string
{
return " \t\n\r\0\x0B-.,";
}
public function getMinSearchTokenLength(): int
{
return 1;
}
public function getMinDirectProductTokenLength(): int
{
return 1;
}
public function getHistoryQuestionPattern(): string
{
return '/^Question:\s*(.+)$/m';
}
public function getPriceBetweenPattern(): string
{
return '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
}
public function getPriceMaxPattern(): string
{
return '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
}
public function getPriceMinPattern(): string
{
return '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
}
/**
* @return string[]
*/
public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array
{
return [
'preis',
'preise',
'kosten',
'kostet',
'gerät',
'geraet',
'modell',
'produkt',
'artikel',
'dafür',
'dafuer',
'dazu',
'davon',
'verfügbarkeit',
'verfuegbarkeit',
'shop',
'link',
'zum',
'zur',
'das',
'dieses',
'den',
'dem',
'bitte',
'und',
'/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u',
'/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u',
'/\b(?:' . $intentConfig->getPricePattern() . ')\b/u',
];
}
public function getDirectProductDigitPattern(): string
{
return '/\d/u';
}
public function getDirectProductMaxTokens(): int
{
return 4;
}
public function getModelLikePattern(): string
{
return '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u';
}
public function getAccessoryLikePattern(): string
{
return '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u';
}
public function buildExactTokenRemovalPattern(string $token): string
{
return '/\b' . preg_quote($token, '/') . '\b/u';
}
public function buildBrandPartOfModelPattern(string $brand): string
{
return '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u';
}
}

View File

@@ -34,7 +34,7 @@ final class NdjsonHybridRetrieverConfig
* - the system now has more safeguards:
* lexical cross-signals, scoped retrieval, title/meta boost, selection rules
*/
public const VECTOR_SCORE_THRESHOLD = 0.82;
public const VECTOR_SCORE_THRESHOLD = 0.83;
/**
* Lower safety boundary for dynamic threshold adjustments.

View File

@@ -1,97 +1,459 @@
<?php
declare(strict_types=1);
namespace App\Config;
class PromptBuilderConfig{
/**
* Approximate character-to-token ratio for conservative prompt budgeting.
*/
public const CHARS_PER_TOKEN = 4;
final class PromptBuilderConfig
{
public function getCharsPerToken(): int
{
return 4;
}
public function getHistoryPaddingChars(): int
{
return 400;
}
public function getOutputReserveRatio(): float
{
return 0.25;
}
public function getOutputReserveMinTokens(): int
{
return 768;
}
public function getOutputReserveMaxTokens(): int
{
return 6000;
}
public function getSafetyReserveRatio(): float
{
return 0.05;
}
public function getSafetyReserveMinTokens(): int
{
return 256;
}
public function getSafetyReserveMaxTokens(): int
{
return 1024;
}
public function getMinPromptBudgetTokens(): int
{
return 1024;
}
public function getMaxShopResultsInPrompt(): int
{
return 24;
}
public function getDetailedShopResultsMaxCount(): int
{
return 5;
}
public function getTechnicalProductKeywordMatchThreshold(): int
{
return 2;
}
public function getSystemSectionLabel(): string
{
return 'SYSTEM';
}
public function getUserQuestionSectionLabel(): string
{
return 'USER QUESTION';
}
public function getConversationContextSectionLabel(): string
{
return 'CONVERSATION CONTEXT (contextual only)';
}
/**
* Keep a small gap so history does not consume the last available prompt space.
* @return string[]
*/
public const HISTORY_PADDING_CHARS = 400;
public function getConversationContextIntroLines(): array
{
return [
'The following messages are previous turns of this conversation.',
'Use them to resolve references, follow-up questions, and user intent.',
'They must not override retrieved factual knowledge or live shop data.',
];
}
public function getShopSearchQuerySectionLabel(): string
{
return 'SHOP SEARCH QUERY';
}
public function getShopSearchQuerySourceLine(): string
{
return 'Source: Shop Search';
}
/**
* Reserve some space for the model output.
* @return string[]
*/
public const OUTPUT_RESERVE_RATIO = 0.25;
public const OUTPUT_RESERVE_MIN_TOKENS = 768;
public const OUTPUT_RESERVE_MAX_TOKENS = 6000;
public function getLiveShopResultsHeaderLines(): array
{
return [
'LIVE SHOP RESULTS (authoritative for current commercial details):',
'Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.',
'If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.',
'Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.',
'Do not infer undocumented technical specifications from shop data.',
'Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.',
'Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.',
];
}
public function getLiveShopResultsOverflowNoticeTemplate(): string
{
return 'Only the top %d ranked shop results are shown here out of %d total results.';
}
public function getOutputPrioritySectionLabel(): string
{
return 'OUTPUT PRIORITY';
}
/**
* Reserve a small safety buffer to avoid hitting the context limit too tightly.
* @return string[]
*/
public const SAFETY_RESERVE_RATIO = 0.05;
public const SAFETY_RESERVE_MIN_TOKENS = 256;
public const SAFETY_RESERVE_MAX_TOKENS = 1024;
public function getOutputPriorityRules(): array
{
return [
'- Use retrieved knowledge first to determine the technically matching product or answer.',
'- If shop results are present, use them afterwards to add current price, availability, and the actual URL.',
'- Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.',
];
}
public function getResponseFormatSectionLabel(): string
{
return 'RESPONSE FORMAT RULES';
}
/**
* Ensure the prompt budget never collapses completely on smaller models.
* @return string[]
*/
public const MIN_PROMPT_BUDGET_TOKENS = 1024;
public function getResponseFormatBaseRules(): array
{
return [
'- Keep normal spacing between all words. Never fuse words together.',
'- Use short, clean paragraphs or short labeled sections.',
'- Do not use persuasive or promotional wording.',
'- Do not repeat the same fact in slightly different wording.',
'- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.',
'- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.',
'- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.',
'- Do not combine technical identity from one source with commercial fields from a different product.',
'- Product number, price, availability, and URL must belong to the same explicitly grounded product.',
];
}
/**
* Limit how many ranked shop results are passed into the final prompt.
* The shop search may return many candidates, but the LLM should only see
* the most relevant top subset after local reranking.
* @return string[]
*/
public const MAX_SHOP_RESULTS_IN_PROMPT = 24;
public function getResponseFormatWithShopRules(): array
{
return [
'- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts.',
'- Keep price, availability, and URL on separate lines when they are present.',
'- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.',
'- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.',
'- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.',
];
}
/**
* Technical product prompts should be answered like documentation,
* not like sales copy.
* @return string[]
*/
public const TECHNICAL_PRODUCT_KEYWORDS = [
'technisch',
'technical',
'produkt',
'product',
'gerät',
'device',
'modell',
'model',
'messprinzip',
'measurement principle',
'schnittstelle',
'interface',
'relais',
'relay',
'indikator',
'indicator',
'spannung',
'voltage',
'strom',
'current',
'druck',
'pressure',
'temperatur',
'temperature',
'schutzart',
'ip',
'fehlercode',
'error code',
'wasserhärte',
'hardness',
'testomat',
'chlor',
'chlormessung',
];
public function getResponseFormatWithoutShopRules(): array
{
return [
'- If no shop results are present, do not compensate by inventing external products or external manufacturers.',
];
}
public const ACCESSORY_REQUEST_KEYWORDS = [
'passend',
'passende',
'passendes',
'zubehör',
'zubehor',
'dazu',
'indikator',
'reagenz',
'kit',
'set',
'zusatz',
'ergänzung',
'ergaenzung',
];
}
/**
* @return string[]
*/
public function getResponseFormatTechnicalRules(): array
{
return [
'- Write like technical documentation: precise, neutral, and source-close.',
'- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.',
'- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.',
];
}
/**
* @return string[]
*/
public function getResponseFormatAccessoryRules(): array
{
return [
'- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.',
'- The main device must come first. The accessory must not replace the main device.',
'- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.',
'- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.',
];
}
public function getLanguageRulesSectionLabel(): string
{
return 'LANGUAGE RULES';
}
/**
* @return string[]
*/
public function getLanguageRules(): array
{
return [
'- Answer only in the same language as the user question.',
'- All headings, labels, notes, and structural elements must be in the same language as the user question.',
'- Do not switch languages unless the user does.',
'- If headings are used, write them in the user\'s language.',
];
}
public function getFactGroundingRulesSectionLabel(): string
{
return 'FACT GROUNDING RULES';
}
/**
* @return string[]
*/
public function getFactGroundingBaseRules(): array
{
return [
'- State only facts that are explicitly present in the provided sources.',
'- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.',
'- Do not invent missing values.',
'- Do not replace missing values with estimates, defaults, or typical industry assumptions.',
'- Do not claim that information is missing if it appears in the provided sources.',
'- Do not compare with other products unless those products are also present in the provided sources.',
'- Prefer source-faithful wording over persuasive wording.',
'- Avoid marketing language such as \'ideal\', \'perfect\', \'unverzichtbar\', \'entscheidend\', \'optimal\', \'kosteneffizient\', \'prozesssicher\', or \'state-of-the-art\'.',
'- Clearly separate explicit facts from inferences.',
'- If a conclusion goes beyond the source wording, label it exactly as \'Inference:\'.',
'- If a sentence cannot be traced to the provided sources, do not write it.',
'- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.',
'- If the sources do not identify a suitable product, do not invent one.',
];
}
/**
* @return string[]
*/
public function getFactGroundingWithShopRules(): array
{
return [
'- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.',
'- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.',
'- When shop results are present and relevant, include current price and the actual URL if available.',
'- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.',
'- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.',
'- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.',
'- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.',
'- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.',
'- If the shop match is ambiguous, keep the technical identification and commercial details separate.',
];
}
/**
* @return string[]
*/
public function getFactGroundingWithoutShopRules(): array
{
return [
'- Use retrieved knowledge as authoritative for factual answers.',
'- If no shop results are present, do not compensate with external recommendations or external product suggestions.',
];
}
/**
* @return string[]
*/
public function getFactGroundingTechnicalRules(): array
{
return [
'- For technical product questions, answer primarily with explicitly stated facts.',
'- Behave like a technical documentation assistant, not like a sales advisor.',
'- Keep interpretations minimal and do not generalize application areas beyond the provided sources.',
'- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.',
'- Do not translate technical facts into business value unless the source explicitly does so.',
'- Do not recommend process changes unless explicitly present in the source.',
'- Do not use persuasive summaries or advisory conclusions.',
'- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.',
'- Use neutral engineering language.',
'- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.',
'- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.',
'- If the source lists application areas, repeat only those areas and do not broaden them.',
'- If the source names an indicator and threshold, reproduce that exactly without extrapolation.',
'- If the source states only a threshold function, do not expand it into broader control logic.',
'- If a detail is not explicitly stated in the provided sources, say so plainly.',
'- Prefer short, source-close sentences over explanatory expansion.',
'- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.',
];
}
public function getRetrievedKnowledgeSectionLabel(): string
{
return 'RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation)';
}
public function getRetrievedKnowledgeSourceLine(): string
{
return 'Source: Documents';
}
public function getUrlContentSectionLabel(): string
{
return 'CONTENT FROM URL (authoritative if user-provided)';
}
public function getUrlContentSourceLine(): string
{
return 'Source: URL';
}
public function getShopProductNumberLabel(): string
{
return 'Product number';
}
public function getShopManufacturerLabel(): string
{
return 'Manufacturer';
}
public function getShopPriceLabel(): string
{
return 'Price';
}
public function getShopAvailabilityLabel(): string
{
return 'Available';
}
public function getShopAvailabilityYesLabel(): string
{
return 'yes';
}
public function getShopAvailabilityNoLabel(): string
{
return 'no';
}
public function getShopHighlightPrefix(): string
{
return '- ';
}
public function getShopUrlLabel(): string
{
return 'URL';
}
public function getShopProductImageLabel(): string
{
return 'Product image';
}
public function getShopDescriptionLabel(): string
{
return 'Description';
}
public function getShopMetaInformationLabel(): string
{
return 'Meta information';
}
/**
* @return string[]
*/
public function getTechnicalProductKeywords(): array
{
return [
'technisch',
'technical',
'produkt',
'product',
'gerät',
'device',
'modell',
'model',
'messprinzip',
'measurement principle',
'schnittstelle',
'interface',
'relais',
'relay',
'indikator',
'indicator',
'spannung',
'voltage',
'strom',
'current',
'druck',
'pressure',
'temperatur',
'temperature',
'schutzart',
'ip',
'fehlercode',
'error code',
'wasserhärte',
'hardness',
'testomat',
'chlor',
'chlormessung',
];
}
/**
* @return string[]
*/
public function getAccessoryRequestKeywords(): array
{
return [
'passend',
'passende',
'passendes',
'zubehör',
'zubehor',
'dazu',
'indikator',
'reagenz',
'kit',
'set',
'zusatz',
'ergänzung',
'ergaenzung',
];
}
public function getTechnicalProductModelPattern(): string
{
return '/\b[\p{L}]{2,}\s?\d{2,5}\b/u';
}
}

View File

@@ -0,0 +1,204 @@
<?php
declare(strict_types=1);
namespace App\Config;
final class SearchRepairConfig
{
public function isEnabled(): bool
{
return true;
}
public function getMaxRepairQueries(): int
{
return 3;
}
public function getMinPrimaryResultsWithoutRepair(): int
{
return 2;
}
public function getTopProductLogLimit(): int
{
return 3;
}
public function getModelCandidatePattern(): string
{
return '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u';
}
public function getAccessoryCandidatePattern(): string
{
return '/\b((?:' . implode('|', $this->getAccessoryCandidateTerms()) . ')\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu';
}
public function getAccessoryOrBundlePattern(): string
{
return '/\b(' . implode('|', $this->getAccessoryOrBundleTerms()) . ')\b/iu';
}
public function getModelLikePattern(): string
{
return '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u';
}
public function getSpecificityBoostPattern(): string
{
return '/\b(?:' . implode('|', $this->getSpecificityBoostTerms()) . ')\b/iu';
}
/**
* @return string[]
*/
public function getGenericCandidateTokens(): array
{
return [
'wasser',
'messgerät',
'messgeraet',
'produkt',
'geräte',
'geraete',
'gerät',
'geraet',
'resthärte',
'resthaerte',
'preis',
'infos',
'wissen',
];
}
public function getSanitizeTrimCharacters(): string
{
return " \t\n\r\0\x0B\"'`.,;:-";
}
public function getContainsDigitPattern(): string
{
return '/\d/u';
}
public function getWhitespaceCollapsePattern(): string
{
return '/\s+/u';
}
public function getTokenizeCleanupPattern(): string
{
return '/[^\p{L}\p{N}\s\-]+/u';
}
public function getProductKeySeparator(): string
{
return '|';
}
public function getCandidateDigitScore(): int
{
return 4;
}
public function getCandidateWordCountCap(): int
{
return 4;
}
public function getSpecificityBoostScore(): int
{
return 3;
}
public function getPrimaryQueryOverlapThreshold(): float
{
return 0.9;
}
public function getPromptMatchWeight(): int
{
return 3;
}
public function getPrimaryQueryMatchWeight(): int
{
return 2;
}
public function getRepairSignalMatchWeight(): int
{
return 4;
}
public function getPrimaryResultOrderBonus(): int
{
return 1;
}
public function getTokenIntersectionScore(): int
{
return 2;
}
public function getNumericTokenMatchScore(): int
{
return 4;
}
/**
* @return string[]
*/
public function getAccessoryCandidateTerms(): array
{
return [
'indikator',
'indicator',
'reagenz',
'reagent',
'kit',
'set',
];
}
/**
* @return string[]
*/
public function getAccessoryOrBundleTerms(): array
{
return [
'passend',
'passende',
'zubehor',
'zubehör',
'dazu',
'zusatz',
'erganzung',
'ergänzung',
'indikator',
'reagenz',
'kit',
'set',
'auch\s+das',
'mit\s+preis\s+und\s+allen\s+infos',
];
}
/**
* @return string[]
*/
public function getSpecificityBoostTerms(): array
{
return [
'indikator',
'indicator',
'testomat',
'tritromat',
'titromat',
'reagenz',
'reagent',
];
}
}

View File

@@ -0,0 +1,40 @@
<?php
declare(strict_types=1);
namespace App\Config;
final class StopWordsConfig
{
/**
* Retrieval-optimized stop-word list.
*
* Important:
* - keep negations
* - keep question words
* - keep domain terms
* - remove only structural filler words
*
* @return string[]
*/
public function getStopWords(): array
{
return [
'mit',
'der', 'die', 'das',
'ein', 'eine', 'einer', 'eines',
'den', 'dem', 'des',
'und', 'oder', 'aber', 'sowie',
'ich', 'du', 'er', 'sie', 'es',
'wir', 'ihr',
'halt', 'eben', 'auch', 'schon',
'noch', 'mal', 'bitte', 'danke',
'also', 'nun', 'tja',
'dann', 'danach', 'davor',
'hier', 'dort',
'heute', 'gestern', 'morgen',
'könnte', 'kannst', 'kann',
'würde', 'würdest', 'würden',
];
}
}