move intent an config value into config files
This commit is contained in:
@@ -120,12 +120,7 @@ services:
|
||||
|
||||
App\Commerce\CommerceQueryParser: ~
|
||||
|
||||
App\Commerce\SearchRepairService:
|
||||
arguments:
|
||||
$logger: '@monolog.logger.agent'
|
||||
$enabled: '%mto.commerce.search_repair.enabled%'
|
||||
$maxRepairQueries: '%mto.commerce.search_repair.max_queries%'
|
||||
$minPrimaryResultsWithoutRepair: '%mto.commerce.search_repair.min_primary_results_without_repair%'
|
||||
App\Commerce\SearchRepairService: ~
|
||||
|
||||
App\Shopware\ShopwareCriteriaBuilder: ~
|
||||
|
||||
|
||||
@@ -4,9 +4,6 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Agent;
|
||||
|
||||
use App\Commerce\CommerceReferenceResolver;
|
||||
use App\Commerce\CommerceReferenceStore;
|
||||
use App\Commerce\Dto\CommerceReferenceContext;
|
||||
use App\Commerce\SearchRepairService;
|
||||
use App\Commerce\ShopSearchService;
|
||||
use App\Config\AgentRunnerConfig;
|
||||
@@ -21,8 +18,6 @@ use Throwable;
|
||||
|
||||
final readonly class AgentRunner
|
||||
{
|
||||
private const COMMERCE_HISTORY_BUDGET_CHARS = 1000;
|
||||
|
||||
private bool $systemMsgOn;
|
||||
|
||||
public function __construct(
|
||||
@@ -33,8 +28,6 @@ final readonly class AgentRunner
|
||||
private RetrieverInterface $retriever,
|
||||
private ShopSearchService $shopSearchService,
|
||||
private SearchRepairService $searchRepairService,
|
||||
private CommerceReferenceStore $commerceReferenceStore,
|
||||
private CommerceReferenceResolver $commerceReferenceResolver,
|
||||
private CommerceIntentLite $commerceIntentLite,
|
||||
private OllamaClient $ollamaClient,
|
||||
private LoggerInterface $agentLogger,
|
||||
@@ -51,14 +44,13 @@ final readonly class AgentRunner
|
||||
$prompt = trim($prompt);
|
||||
|
||||
if ($prompt === '') {
|
||||
yield $this->systemMsg('❌ Empty prompt.', 'err');
|
||||
yield $this->systemMsg($this->agentRunnerConfig->getEmptyPromptMessage(), 'err');
|
||||
return;
|
||||
}
|
||||
|
||||
$shopResults = [];
|
||||
$primaryShopResults = [];
|
||||
$factSources = [];
|
||||
$contextSignals = [];
|
||||
$sources = [];
|
||||
$optimizedShopQuery = '';
|
||||
$shopSearchQuery = '';
|
||||
$commerceIntent = CommerceIntentLite::NONE;
|
||||
@@ -66,8 +58,6 @@ final readonly class AgentRunner
|
||||
$attemptedShopRepair = false;
|
||||
$usedShopRepair = false;
|
||||
$shopRepairQueries = [];
|
||||
$activeCommerceReference = null;
|
||||
$shopChecked = false;
|
||||
|
||||
$this->agentLogger->info('Agent run started', [
|
||||
'userId' => $userId,
|
||||
@@ -79,74 +69,39 @@ final readonly class AgentRunner
|
||||
// Additional context strategies can be added here later.
|
||||
}
|
||||
|
||||
yield $this->systemMsg('Ich analysiere deine Anfrage...', 'think');
|
||||
yield $this->systemMsg('Ich prüfe auf Internetquellen...', 'think');
|
||||
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeRequestMessage(), 'think');
|
||||
yield $this->systemMsg($this->agentRunnerConfig->getCheckInternetSourcesMessage(), 'think');
|
||||
|
||||
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
|
||||
if ($urlContent !== '') {
|
||||
$this->addBadge($factSources, 'Externe URL');
|
||||
$this->addSource($sources, $this->agentRunnerConfig->getExternalUrlSourceLabel());
|
||||
}
|
||||
|
||||
yield $this->systemMsg('Ich hole relevante Daten aus meinem RAG-Wissen...', 'think');
|
||||
yield $this->systemMsg($this->agentRunnerConfig->getRetrieveKnowledgeMessage(), 'think');
|
||||
|
||||
$knowledgeChunks = $this->retriever->retrieve($prompt);
|
||||
if ($knowledgeChunks !== []) {
|
||||
$this->addBadge($factSources, 'RAG Wissen');
|
||||
$this->addSource($sources, $this->agentRunnerConfig->getRagKnowledgeSourceLabel());
|
||||
}
|
||||
|
||||
$commerceIntent = $this->detectCommerceIntent($prompt);
|
||||
|
||||
if ($this->isCommerceIntent($commerceIntent)) {
|
||||
yield $this->systemMsg('Ich optimiere die Recherche...', 'think');
|
||||
yield $this->systemMsg($this->agentRunnerConfig->getOptimizeSearchMessage(), 'think');
|
||||
|
||||
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId);
|
||||
$activeCommerceReference = $this->loadCommerceReference($userId);
|
||||
|
||||
if ($commerceHistoryContext !== '') {
|
||||
$this->addBadge($contextSignals, 'Gesprächskontext');
|
||||
$this->addSource($sources, $this->agentRunnerConfig->getConversationHistorySourceLabel());
|
||||
}
|
||||
|
||||
if ($activeCommerceReference !== null) {
|
||||
$this->addBadge($contextSignals, 'Commerce-Referenz');
|
||||
}
|
||||
|
||||
$isReferenceOnlyFollowUp = $this->isReferenceOnlyCommerceFollowUp(
|
||||
$optimizedShopQuery = $this->buildOptimizedShopQuery(
|
||||
$prompt,
|
||||
$activeCommerceReference
|
||||
$userId,
|
||||
$commerceHistoryContext
|
||||
);
|
||||
|
||||
if ($isReferenceOnlyFollowUp) {
|
||||
$shopSearchQuery = $this->buildDeterministicReferenceShopQuery($activeCommerceReference);
|
||||
|
||||
if ($shopSearchQuery !== '') {
|
||||
$this->addBadge($contextSignals, 'Deterministische Referenzsuche');
|
||||
}
|
||||
|
||||
$this->agentLogger->info('Using deterministic reference shop query', [
|
||||
'userId' => $userId,
|
||||
'commerceIntent' => $commerceIntent,
|
||||
'prompt' => $prompt,
|
||||
'shopSearchQuery' => $shopSearchQuery,
|
||||
'referenceProductName' => $activeCommerceReference?->productName,
|
||||
'referenceFocusTerms' => $activeCommerceReference?->focusTerms,
|
||||
]);
|
||||
} else {
|
||||
$optimizedShopQuery = $this->buildOptimizedShopQuery(
|
||||
$prompt,
|
||||
$userId,
|
||||
$commerceHistoryContext
|
||||
);
|
||||
|
||||
if ($optimizedShopQuery !== '' && $optimizedShopQuery !== $prompt) {
|
||||
$this->addBadge($contextSignals, 'Query-Optimierung');
|
||||
}
|
||||
|
||||
$shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt;
|
||||
}
|
||||
|
||||
if ($shopSearchQuery === '') {
|
||||
$shopSearchQuery = $prompt;
|
||||
}
|
||||
$shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt;
|
||||
|
||||
$this->agentLogger->info('Commerce search prepared', [
|
||||
'userId' => $userId,
|
||||
@@ -154,26 +109,20 @@ final readonly class AgentRunner
|
||||
'usedOptimizedShopQuery' => $optimizedShopQuery !== '',
|
||||
'optimizedShopQuery' => $optimizedShopQuery,
|
||||
'shopSearchQuery' => $shopSearchQuery,
|
||||
'usedDeterministicReferenceQuery' => $isReferenceOnlyFollowUp,
|
||||
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
|
||||
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
||||
'hasActiveCommerceReference' => $activeCommerceReference !== null,
|
||||
'activeCommerceReferenceProduct' => $activeCommerceReference?->productName,
|
||||
]);
|
||||
|
||||
yield $this->systemMsg(
|
||||
'Ich rufe Recherchedaten ab (type: ' . $commerceIntent . ')',
|
||||
sprintf($this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), $commerceIntent),
|
||||
'think'
|
||||
);
|
||||
|
||||
$shopChecked = true;
|
||||
|
||||
$primaryShopResults = $this->searchShop(
|
||||
$shopSearchQuery,
|
||||
$commerceIntent,
|
||||
$userId,
|
||||
$commerceHistoryContext,
|
||||
$activeCommerceReference
|
||||
$commerceHistoryContext
|
||||
);
|
||||
|
||||
$repairPayload = $this->repairShopResults(
|
||||
@@ -192,13 +141,11 @@ final readonly class AgentRunner
|
||||
$shopRepairQueries = $repairPayload['repairQueries'];
|
||||
|
||||
if ($shopResults !== []) {
|
||||
$this->addBadge($factSources, 'Shopsystem');
|
||||
} elseif ($shopChecked) {
|
||||
$this->addBadge($factSources, 'Shopsystem geprüft');
|
||||
$this->addSource($sources, $this->agentRunnerConfig->getShopSystemSourceLabel());
|
||||
}
|
||||
|
||||
if ($attemptedShopRepair) {
|
||||
$this->addBadge($contextSignals, 'Erweiterte Shopsuche');
|
||||
$this->addSource($sources, $this->agentRunnerConfig->getExtendedShopSearchSourceLabel());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,7 +153,7 @@ final readonly class AgentRunner
|
||||
$knowledgeChunks = $this->limitKnowledgeChunks($knowledgeChunks, $commerceIntent);
|
||||
}
|
||||
|
||||
yield $this->systemMsg('Ich analysiere alle Informationen...', 'think');
|
||||
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeAllInformationMessage(), 'think');
|
||||
|
||||
$finalPrompt = $this->promptBuilder->build(
|
||||
prompt: $prompt,
|
||||
@@ -226,7 +173,6 @@ final readonly class AgentRunner
|
||||
'shopSearchQuery' => $shopSearchQuery,
|
||||
'primaryShopResultsCount' => count($primaryShopResults),
|
||||
'shopResultsCount' => count($shopResults),
|
||||
'shopChecked' => $shopChecked,
|
||||
'attemptedShopRepair' => $attemptedShopRepair,
|
||||
'usedShopRepair' => $usedShopRepair,
|
||||
'shopRepairQueries' => $shopRepairQueries,
|
||||
@@ -243,21 +189,19 @@ final readonly class AgentRunner
|
||||
]);
|
||||
}
|
||||
|
||||
if ($factSources !== [] || $contextSignals !== []) {
|
||||
yield $this->emitSourceSummary(
|
||||
$factSources,
|
||||
$contextSignals,
|
||||
'Genutzte Datenpfade'
|
||||
if ($sources !== []) {
|
||||
yield $this->emitSources(
|
||||
$sources,
|
||||
$this->agentRunnerConfig->getUsedSourcesPrefix()
|
||||
);
|
||||
}
|
||||
|
||||
$fullOutput = yield from $this->streamFinalAnswer($finalPrompt);
|
||||
|
||||
if ($factSources !== [] || $contextSignals !== []) {
|
||||
yield $this->emitSourceSummary(
|
||||
$factSources,
|
||||
$contextSignals,
|
||||
'Quellen und Signale'
|
||||
if ($sources !== []) {
|
||||
yield $this->emitSources(
|
||||
$sources,
|
||||
$this->agentRunnerConfig->getSourcesPrefix()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -266,11 +210,10 @@ final readonly class AgentRunner
|
||||
}
|
||||
|
||||
if ($fullOutput !== '') {
|
||||
$this->persistConversationState(
|
||||
userId: $userId,
|
||||
prompt: $prompt,
|
||||
fullOutput: $fullOutput,
|
||||
shopResults: $shopResults
|
||||
$this->contextService->appendHistory(
|
||||
$userId,
|
||||
$prompt,
|
||||
$fullOutput
|
||||
);
|
||||
}
|
||||
|
||||
@@ -281,7 +224,6 @@ final readonly class AgentRunner
|
||||
'commerceIntent' => $commerceIntent,
|
||||
'primaryShopResultsCount' => count($primaryShopResults),
|
||||
'shopResultsCount' => count($shopResults),
|
||||
'shopChecked' => $shopChecked,
|
||||
'attemptedShopRepair' => $attemptedShopRepair,
|
||||
'usedShopRepair' => $usedShopRepair,
|
||||
'shopRepairQueries' => $shopRepairQueries,
|
||||
@@ -292,8 +234,6 @@ final readonly class AgentRunner
|
||||
'shopSearchQuery' => $shopSearchQuery,
|
||||
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
|
||||
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
||||
'hasActiveCommerceReference' => $activeCommerceReference !== null,
|
||||
'activeCommerceReferenceProduct' => $activeCommerceReference?->productName,
|
||||
]);
|
||||
} catch (Throwable $e) {
|
||||
$this->agentLogger->error('Agent run failed', [
|
||||
@@ -361,42 +301,6 @@ final readonly class AgentRunner
|
||||
return $this->sanitizeOptimizedShopQuery($optimizedQuery);
|
||||
}
|
||||
|
||||
private function isReferenceOnlyCommerceFollowUp(
|
||||
string $prompt,
|
||||
?CommerceReferenceContext $referenceContext
|
||||
): bool {
|
||||
if ($referenceContext === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$normalizedPrompt = mb_strtolower(trim($prompt), 'UTF-8');
|
||||
$normalizedPrompt = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt;
|
||||
$normalizedPrompt = preg_replace('/\s+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt;
|
||||
$normalizedPrompt = trim($normalizedPrompt);
|
||||
|
||||
if ($normalizedPrompt === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (preg_match('/\b(testomat|lab|evo|eco|calc|thcl|808|2000)\b/u', $normalizedPrompt) === 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return preg_match(
|
||||
'/\b(preis|preise|kosten|kostet|dazu|dafuer|dafür|davon|was kostet das|verfuegbarkeit|verfügbarkeit|shop|link)\b/u',
|
||||
$normalizedPrompt
|
||||
) === 1;
|
||||
}
|
||||
|
||||
private function buildDeterministicReferenceShopQuery(?CommerceReferenceContext $referenceContext): string
|
||||
{
|
||||
if ($referenceContext === null) {
|
||||
return '';
|
||||
}
|
||||
|
||||
return trim($referenceContext->buildReferenceSearchText());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{
|
||||
* results: array,
|
||||
@@ -445,15 +349,13 @@ final readonly class AgentRunner
|
||||
string $query,
|
||||
string $commerceIntent,
|
||||
string $userId,
|
||||
string $commerceHistoryContext = '',
|
||||
?CommerceReferenceContext $referenceContext = null
|
||||
string $commerceHistoryContext = ''
|
||||
): array {
|
||||
try {
|
||||
return $this->shopSearchService->search(
|
||||
$query,
|
||||
$commerceIntent,
|
||||
$commerceHistoryContext,
|
||||
$referenceContext
|
||||
$commerceHistoryContext
|
||||
);
|
||||
} catch (Throwable $e) {
|
||||
$this->agentLogger->warning('Shop search failed, continuing without shop results', [
|
||||
@@ -462,8 +364,6 @@ final readonly class AgentRunner
|
||||
'query' => $query,
|
||||
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
|
||||
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
||||
'hasReferenceContext' => $referenceContext !== null,
|
||||
'referenceProductName' => $referenceContext?->productName,
|
||||
'exception' => $e,
|
||||
]);
|
||||
|
||||
@@ -475,73 +375,23 @@ final readonly class AgentRunner
|
||||
{
|
||||
return $this->contextService->buildUserContextWithinBudget(
|
||||
$userId,
|
||||
self::COMMERCE_HISTORY_BUDGET_CHARS
|
||||
);
|
||||
}
|
||||
|
||||
private function loadCommerceReference(string $userId): ?CommerceReferenceContext
|
||||
{
|
||||
try {
|
||||
return $this->commerceReferenceStore->load($userId);
|
||||
} catch (Throwable $e) {
|
||||
$this->agentLogger->warning('Failed to load commerce reference context', [
|
||||
'userId' => $userId,
|
||||
'exception' => $e,
|
||||
]);
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, mixed> $shopResults
|
||||
*/
|
||||
private function storeCommerceReference(string $userId, string $prompt, string $answer, array $shopResults): void
|
||||
{
|
||||
try {
|
||||
$referenceContext = $this->commerceReferenceResolver->resolveFromCommerceTurn(
|
||||
$prompt,
|
||||
$answer,
|
||||
$shopResults
|
||||
);
|
||||
|
||||
if ($referenceContext === null) {
|
||||
return;
|
||||
}
|
||||
|
||||
$this->commerceReferenceStore->save($userId, $referenceContext);
|
||||
} catch (Throwable $e) {
|
||||
$this->agentLogger->warning('Failed to persist commerce reference context', [
|
||||
'userId' => $userId,
|
||||
'exception' => $e,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, mixed> $shopResults
|
||||
*/
|
||||
private function persistConversationState(
|
||||
string $userId,
|
||||
string $prompt,
|
||||
string $fullOutput,
|
||||
array $shopResults
|
||||
): void {
|
||||
$this->contextService->appendHistory($userId, $prompt, $fullOutput);
|
||||
|
||||
$this->storeCommerceReference(
|
||||
userId: $userId,
|
||||
prompt: $prompt,
|
||||
answer: $fullOutput,
|
||||
shopResults: $shopResults
|
||||
$this->agentRunnerConfig->getCommerceHistoryBudgetChars()
|
||||
);
|
||||
}
|
||||
|
||||
private function limitKnowledgeChunks(array $knowledgeChunks, string $commerceIntent): array
|
||||
{
|
||||
return match ($commerceIntent) {
|
||||
CommerceIntentLite::PRODUCT_SEARCH => array_slice($knowledgeChunks, 0, 2),
|
||||
CommerceIntentLite::ADVISORY_PRODUCT_SEARCH => array_slice($knowledgeChunks, 0, 3),
|
||||
CommerceIntentLite::PRODUCT_SEARCH => array_slice(
|
||||
$knowledgeChunks,
|
||||
0,
|
||||
$this->agentRunnerConfig->getProductSearchKnowledgeChunkLimit()
|
||||
),
|
||||
CommerceIntentLite::ADVISORY_PRODUCT_SEARCH => array_slice(
|
||||
$knowledgeChunks,
|
||||
0,
|
||||
$this->agentRunnerConfig->getAdvisoryProductSearchKnowledgeChunkLimit()
|
||||
),
|
||||
default => $knowledgeChunks,
|
||||
};
|
||||
}
|
||||
@@ -555,8 +405,8 @@ final readonly class AgentRunner
|
||||
}
|
||||
|
||||
$query = preg_split('/\R+/u', $query, 2)[0] ?? $query;
|
||||
$query = preg_replace('/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu', '', $query) ?? $query;
|
||||
$query = trim($query, " \t\n\r\0\x0B\"'`");
|
||||
$query = preg_replace($this->agentRunnerConfig->getOptimizedShopQueryPrefixPattern(), '', $query) ?? $query;
|
||||
$query = trim($query, $this->agentRunnerConfig->getOptimizedShopQueryTrimCharacters());
|
||||
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
|
||||
|
||||
return trim($query);
|
||||
@@ -582,7 +432,7 @@ final readonly class AgentRunner
|
||||
|
||||
if ($cleanToken === '') {
|
||||
if ($firstThinkLoop) {
|
||||
yield $this->systemMsg('Denke nach...', 'think');
|
||||
yield $this->systemMsg($this->agentRunnerConfig->getThinkingWhileStreamingMessage(), 'think');
|
||||
$firstThinkLoop = false;
|
||||
}
|
||||
|
||||
@@ -601,60 +451,46 @@ final readonly class AgentRunner
|
||||
if ($finalChunk !== null) {
|
||||
yield $this->systemMsg($finalChunk, 'answer');
|
||||
} elseif ($fullOutput === '') {
|
||||
yield $this->systemMsg('❌ Es wurden keine Daten vom LLM empfangen.', 'err');
|
||||
yield $this->systemMsg($this->agentRunnerConfig->getNoLlmDataReceivedMessage(), 'err');
|
||||
}
|
||||
|
||||
return $fullOutput;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $factSources
|
||||
* @param string[] $contextSignals
|
||||
* @param string[] $sources
|
||||
*/
|
||||
private function emitSourceSummary(array $factSources, array $contextSignals, string $label): string
|
||||
private function emitSources(array $sources, string $prefix): string
|
||||
{
|
||||
$parts = [];
|
||||
|
||||
if ($factSources !== []) {
|
||||
$parts[] = 'Fakten: ' . implode(' ', $factSources);
|
||||
}
|
||||
|
||||
if ($contextSignals !== []) {
|
||||
$parts[] = 'Kontext: ' . implode(' ', $contextSignals);
|
||||
}
|
||||
|
||||
return $this->systemMsg(
|
||||
$label . ': ' . implode(' ', $parts),
|
||||
'info'
|
||||
);
|
||||
return $this->systemMsg($prefix . implode(' ', $sources), 'info');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $target
|
||||
* @param string[] $sources
|
||||
*/
|
||||
private function addBadge(array &$target, string $label): void
|
||||
private function addSource(array &$sources, string $label): void
|
||||
{
|
||||
$badge = $this->badge($label);
|
||||
|
||||
if (!in_array($badge, $target, true)) {
|
||||
$target[] = $badge;
|
||||
if (!in_array($badge, $sources, true)) {
|
||||
$sources[] = $badge;
|
||||
}
|
||||
}
|
||||
|
||||
private function buildUserErrorMessage(Throwable $e): string
|
||||
{
|
||||
if (!$this->debug) {
|
||||
return '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.';
|
||||
return $this->agentRunnerConfig->getGenericInternalErrorMessage();
|
||||
}
|
||||
|
||||
return '❌ Interner Fehler: '
|
||||
return $this->agentRunnerConfig->getDebugInternalErrorPrefix()
|
||||
. htmlspecialchars($e->getMessage(), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
|
||||
}
|
||||
|
||||
private function badge(string $label): string
|
||||
{
|
||||
return sprintf(
|
||||
'<span class="badge bg-info text-black">%s</span>',
|
||||
$this->agentRunnerConfig->getSourceBadgeHtmlTemplate(),
|
||||
htmlspecialchars($label, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
|
||||
);
|
||||
}
|
||||
@@ -667,10 +503,13 @@ final readonly class AgentRunner
|
||||
|
||||
return match ($type) {
|
||||
'answer' => $msg,
|
||||
'err' => '<span class="text-danger">' . $msg . "</span>\n<hr>\n",
|
||||
'think' => '<span class="text-info think">' . $msg . "</span>\n",
|
||||
'info' => "\n\n<span class=\"text-info fw-bolder\">" . $msg . "</span>\n",
|
||||
'debug' => "\n\nDEBUG: <code>" . htmlspecialchars($msg, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8') . "</code>\n",
|
||||
'err' => sprintf($this->agentRunnerConfig->getErrorHtmlTemplate(), $msg),
|
||||
'think' => sprintf($this->agentRunnerConfig->getThinkHtmlTemplate(), $msg),
|
||||
'info' => sprintf($this->agentRunnerConfig->getInfoHtmlTemplate(), $msg),
|
||||
'debug' => sprintf(
|
||||
$this->agentRunnerConfig->getDebugHtmlTemplate(),
|
||||
htmlspecialchars($msg, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
|
||||
),
|
||||
default => $msg,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ final readonly class PromptBuilder
|
||||
private ContextService $contextService,
|
||||
private SystemPromptRepository $systemPromptRepository,
|
||||
private ModelGenerationConfigProvider $modelGenerationConfigProvider,
|
||||
private PromptBuilderConfig $config,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -31,7 +32,6 @@ final readonly class PromptBuilder
|
||||
* @param ShopProductResult[] $shopResults
|
||||
* @param bool|null $fullContext
|
||||
* @param string|null $swagFullOutPut
|
||||
* @return string
|
||||
*/
|
||||
public function build(
|
||||
string $prompt,
|
||||
@@ -48,23 +48,21 @@ final readonly class PromptBuilder
|
||||
|
||||
$hasShopResults = $shopResults !== [];
|
||||
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
||||
$isPriceDrivenQuestion = $this->isLikelyPriceDrivenQuestion($prompt);
|
||||
$asksForAccessoryOrBundle = $this->asksForAccessoryOrBundle($prompt);
|
||||
|
||||
$systemBlock = $this->buildSystemBlock();
|
||||
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
|
||||
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults, $isPriceDrivenQuestion);
|
||||
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults);
|
||||
$responseFormatBlock = $this->buildResponseFormatBlock(
|
||||
$prompt,
|
||||
$hasShopResults,
|
||||
$isTechnicalProductQuestion,
|
||||
$isPriceDrivenQuestion
|
||||
hasShopResults: $hasShopResults,
|
||||
isTechnicalProductQuestion: $isTechnicalProductQuestion,
|
||||
asksForAccessoryOrBundle: $asksForAccessoryOrBundle
|
||||
);
|
||||
$knowledgeBlock = $this->buildKnowledgeBlock(
|
||||
$knowledgeChunks,
|
||||
$urlContent,
|
||||
$prompt,
|
||||
$hasShopResults,
|
||||
$isPriceDrivenQuestion
|
||||
knowledgeChunks: $knowledgeChunks,
|
||||
urlContent: $urlContent,
|
||||
hasShopResults: $hasShopResults,
|
||||
isTechnicalProductQuestion: $isTechnicalProductQuestion
|
||||
);
|
||||
$userBlock = $this->buildUserBlock($prompt);
|
||||
|
||||
@@ -106,12 +104,12 @@ final readonly class PromptBuilder
|
||||
|
||||
$activeSystemPrompt = str_replace('{% now %}', $now, $activePrompt->getContent());
|
||||
|
||||
return "SYSTEM:\n" . $this->normalizeBlockText($activeSystemPrompt);
|
||||
return $this->config->getSystemSectionLabel() . ":\n" . $this->normalizeBlockText($activeSystemPrompt);
|
||||
}
|
||||
|
||||
private function buildUserBlock(string $prompt): string
|
||||
{
|
||||
return "USER QUESTION:\n" . $prompt;
|
||||
return $this->config->getUserQuestionSectionLabel() . ":\n" . $prompt;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -146,12 +144,11 @@ final readonly class PromptBuilder
|
||||
return '';
|
||||
}
|
||||
|
||||
return
|
||||
"CONVERSATION CONTEXT (contextual only):\n" .
|
||||
"The following messages are previous turns of this conversation.\n" .
|
||||
"Use them to resolve references, follow-up questions, and user intent.\n" .
|
||||
"They must not override retrieved factual knowledge or live shop data.\n\n" .
|
||||
$history;
|
||||
return $this->implodeBlocks([
|
||||
$this->config->getConversationContextSectionLabel() . ':',
|
||||
$this->implodeLines($this->config->getConversationContextIntroLines()),
|
||||
$history,
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -165,10 +162,11 @@ final readonly class PromptBuilder
|
||||
$parts = [];
|
||||
|
||||
if ($swagFullOutPut !== null && $swagFullOutPut !== '') {
|
||||
$parts[] =
|
||||
"SHOP SEARCH QUERY:\n" .
|
||||
$swagFullOutPut . "\n" .
|
||||
"Source: Shop Search";
|
||||
$parts[] = $this->implodeBlocks([
|
||||
$this->config->getShopSearchQuerySectionLabel() . ':',
|
||||
$swagFullOutPut,
|
||||
$this->config->getShopSearchQuerySourceLine(),
|
||||
]);
|
||||
}
|
||||
|
||||
$normalizedShopResults = array_values(array_filter(
|
||||
@@ -181,77 +179,33 @@ final readonly class PromptBuilder
|
||||
}
|
||||
|
||||
$totalCount = count($normalizedShopResults);
|
||||
$limitedShopResults = array_slice($normalizedShopResults, 0, PromptBuilderConfig::MAX_SHOP_RESULTS_IN_PROMPT);
|
||||
$isDetailed = count($limitedShopResults) <= 5;
|
||||
$limitedShopResults = array_slice($normalizedShopResults, 0, $this->config->getMaxShopResultsInPrompt());
|
||||
$isDetailed = count($limitedShopResults) <= $this->config->getDetailedShopResultsMaxCount();
|
||||
$lines = [];
|
||||
|
||||
foreach ($limitedShopResults as $i => $product) {
|
||||
$n = $i + 1;
|
||||
$entryParts = [
|
||||
"[{$n}] " . $this->normalizeBlockText($product->name),
|
||||
];
|
||||
|
||||
if ($product->productNumber) {
|
||||
$entryParts[] = "Product number: " . $this->normalizeBlockText($product->productNumber);
|
||||
}
|
||||
|
||||
if ($product->manufacturer) {
|
||||
$entryParts[] = "Manufacturer: " . $this->normalizeBlockText($product->manufacturer);
|
||||
}
|
||||
|
||||
if ($product->price) {
|
||||
$entryParts[] = "Price: " . $this->normalizeBlockText($product->price);
|
||||
}
|
||||
|
||||
if ($product->available !== null) {
|
||||
$entryParts[] = "Available: " . ($product->available ? 'yes' : 'no');
|
||||
}
|
||||
|
||||
foreach ($product->highlights as $highlight) {
|
||||
$highlight = $this->normalizeBlockText((string) $highlight);
|
||||
|
||||
if ($highlight !== '') {
|
||||
$entryParts[] = "- " . $highlight;
|
||||
}
|
||||
}
|
||||
|
||||
if ($product->url) {
|
||||
$entryParts[] = "URL: " . $this->normalizeBlockText($product->url);
|
||||
}
|
||||
|
||||
if ($product->productImage) {
|
||||
$entryParts[] = "Product image: " . $this->normalizeBlockText($product->productImage);
|
||||
}
|
||||
|
||||
if ($isDetailed && $product->description) {
|
||||
$entryParts[] = "Description: " . $this->normalizeBlockText($product->description);
|
||||
}
|
||||
|
||||
if ($product->customFields) {
|
||||
$entryParts[] = "Meta information: " . $this->normalizeBlockText($product->customFields);
|
||||
}
|
||||
|
||||
$lines[] = implode("\n", $entryParts);
|
||||
$lines[] = $this->buildShopProductEntry(
|
||||
product: $product,
|
||||
index: $i + 1,
|
||||
isDetailed: $isDetailed
|
||||
);
|
||||
}
|
||||
|
||||
if ($lines !== []) {
|
||||
$header =
|
||||
"LIVE SHOP RESULTS (authoritative for current commercial details):\n" .
|
||||
"Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.\n" .
|
||||
"If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.\n" .
|
||||
"Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" .
|
||||
"Do not infer undocumented technical specifications from shop data.\n" .
|
||||
"Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.\n" .
|
||||
"Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.\n" .
|
||||
"If shop results only contain accessories, reagents, indicators, or consumables, do not conclude that no matching main device exists unless the sources explicitly support that conclusion.\n" .
|
||||
"If the user asks for price filtering, use the numeric prices in these live shop results as the decisive source for filtering.";
|
||||
$headerLines = $this->config->getLiveShopResultsHeaderLines();
|
||||
|
||||
if ($totalCount > count($limitedShopResults)) {
|
||||
$header .= "\n" .
|
||||
"Only the top " . count($limitedShopResults) . " ranked shop results are shown here out of {$totalCount} total results.";
|
||||
$headerLines[] = sprintf(
|
||||
$this->config->getLiveShopResultsOverflowNoticeTemplate(),
|
||||
count($limitedShopResults),
|
||||
$totalCount
|
||||
);
|
||||
}
|
||||
|
||||
$parts[] = $header . "\n\n" . implode("\n\n", $lines);
|
||||
$parts[] = $this->implodeBlocks([
|
||||
$this->implodeLines($headerLines),
|
||||
implode("\n\n", $lines),
|
||||
]);
|
||||
}
|
||||
|
||||
return $this->implodeBlocks($parts);
|
||||
@@ -260,89 +214,60 @@ final readonly class PromptBuilder
|
||||
/**
|
||||
* Build a small priority block that tells the model what to surface first.
|
||||
*/
|
||||
private function buildOutputPriorityBlock(bool $hasShopResults, bool $isPriceDrivenQuestion): string
|
||||
private function buildOutputPriorityBlock(bool $hasShopResults): string
|
||||
{
|
||||
if (!$hasShopResults) {
|
||||
return '';
|
||||
}
|
||||
|
||||
if ($isPriceDrivenQuestion) {
|
||||
return
|
||||
"OUTPUT PRIORITY:\n" .
|
||||
"For price-driven questions, evaluate shop results first for numeric price filtering.\n" .
|
||||
"Use retrieved knowledge afterwards only to add technical context or explain missing commercial coverage.\n" .
|
||||
"Do not let accessory-only shop results prove that no matching device exists unless the sources explicitly support that conclusion.\n";
|
||||
}
|
||||
|
||||
return
|
||||
"OUTPUT PRIORITY:\n" .
|
||||
"Use retrieved knowledge first to determine the technically matching product or answer.\n" .
|
||||
"If shop results are present, use them afterwards to add current price, availability, and the actual URL.\n" .
|
||||
"Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n";
|
||||
return $this->buildRuleBlock(
|
||||
$this->config->getOutputPrioritySectionLabel(),
|
||||
$this->config->getOutputPriorityRules()
|
||||
);
|
||||
}
|
||||
|
||||
private function buildResponseFormatBlock(
|
||||
string $prompt,
|
||||
bool $hasShopResults,
|
||||
bool $isTechnicalProductQuestion,
|
||||
bool $isPriceDrivenQuestion
|
||||
bool $asksForAccessoryOrBundle
|
||||
): string {
|
||||
$rules = [
|
||||
"RESPONSE FORMAT RULES:",
|
||||
"- Keep normal spacing between all words. Never fuse words together.",
|
||||
"- Use short, clean paragraphs or short labeled sections.",
|
||||
"- Do not use persuasive or promotional wording.",
|
||||
"- Do not repeat the same fact in slightly different wording.",
|
||||
"- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.",
|
||||
"- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.",
|
||||
"- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.",
|
||||
"- Do not combine technical identity from one source with commercial fields from a different product.",
|
||||
"- Product number, price, availability, and URL must belong to the same explicitly grounded product.",
|
||||
];
|
||||
$rules = $this->config->getResponseFormatBaseRules();
|
||||
|
||||
if ($hasShopResults) {
|
||||
$rules[] = "- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts.";
|
||||
$rules[] = "- Keep price, availability, and URL on separate lines when they are present.";
|
||||
$rules[] = "- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.";
|
||||
$rules[] = "- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.";
|
||||
$rules[] = "- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.";
|
||||
$rules[] = "- If the question includes a price threshold, filter using only explicit numeric shop prices.";
|
||||
$rules[] = "- Do not say that no device exists above a threshold merely because only cheaper accessories were found in the shop results.";
|
||||
$rules = array_merge($rules, $this->config->getResponseFormatWithShopRules());
|
||||
} else {
|
||||
$rules[] = "- If no shop results are present, do not compensate by inventing external products or external manufacturers.";
|
||||
$rules = array_merge($rules, $this->config->getResponseFormatWithoutShopRules());
|
||||
}
|
||||
|
||||
if ($isTechnicalProductQuestion) {
|
||||
$rules[] = "- Write like technical documentation: precise, neutral, and source-close.";
|
||||
$rules[] = "- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.";
|
||||
$rules[] = "- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.";
|
||||
$rules = array_merge($rules, $this->config->getResponseFormatTechnicalRules());
|
||||
}
|
||||
|
||||
if ($isPriceDrivenQuestion) {
|
||||
$rules[] = "- For price-driven questions, answer the threshold result first.";
|
||||
$rules[] = "- If no grounded shop product fulfills the threshold, say that clearly.";
|
||||
$rules[] = "- Then optionally explain whether retrieved knowledge mentions relevant devices that are not commercially listed in the current shop results.";
|
||||
if ($asksForAccessoryOrBundle) {
|
||||
$rules = array_merge($rules, $this->config->getResponseFormatAccessoryRules());
|
||||
}
|
||||
|
||||
if ($this->asksForAccessoryOrBundle($prompt)) {
|
||||
$rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.";
|
||||
$rules[] = "- The main device must come first. The accessory must not replace the main device.";
|
||||
$rules[] = "- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.";
|
||||
$rules[] = "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.";
|
||||
}
|
||||
|
||||
return implode("\n", $rules);
|
||||
return $this->buildRuleBlock(
|
||||
$this->config->getResponseFormatSectionLabel(),
|
||||
$rules
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the knowledge block.
|
||||
*
|
||||
* Retrieved knowledge remains the main source for technical matching and explanation.
|
||||
* Shop data is preferred for current commercial fields.
|
||||
*
|
||||
* @param string[] $knowledgeChunks
|
||||
*/
|
||||
private function buildKnowledgeBlock(
|
||||
array $knowledgeChunks,
|
||||
string $urlContent,
|
||||
string $prompt,
|
||||
bool $hasShopResults,
|
||||
bool $isPriceDrivenQuestion
|
||||
bool $isTechnicalProductQuestion
|
||||
): string {
|
||||
$knowledgeParts = [];
|
||||
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
||||
|
||||
if ($knowledgeChunks !== []) {
|
||||
$lines = [];
|
||||
@@ -359,56 +284,71 @@ final readonly class PromptBuilder
|
||||
}
|
||||
|
||||
if ($lines !== []) {
|
||||
$parts = [
|
||||
"LANGUAGE RULES:\n" .
|
||||
implode("\n", $this->buildLanguageRules()),
|
||||
"FACT GROUNDING RULES:\n" .
|
||||
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults, $isPriceDrivenQuestion)),
|
||||
"RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" .
|
||||
"Source: Documents\n" .
|
||||
implode("\n\n", $lines),
|
||||
];
|
||||
|
||||
$knowledgeParts[] = implode("\n\n", $parts);
|
||||
$knowledgeParts[] = $this->implodeBlocks([
|
||||
$this->buildRuleBlock(
|
||||
$this->config->getLanguageRulesSectionLabel(),
|
||||
$this->config->getLanguageRules()
|
||||
),
|
||||
$this->buildRuleBlock(
|
||||
$this->config->getFactGroundingRulesSectionLabel(),
|
||||
$this->buildFactGroundingRules(
|
||||
hasShopResults: $hasShopResults,
|
||||
isTechnicalProductQuestion: $isTechnicalProductQuestion
|
||||
)
|
||||
),
|
||||
$this->implodeBlocks([
|
||||
$this->config->getRetrievedKnowledgeSectionLabel() . ':',
|
||||
$this->config->getRetrievedKnowledgeSourceLine(),
|
||||
implode("\n\n", $lines),
|
||||
]),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
if ($urlContent !== '') {
|
||||
$knowledgeParts[] =
|
||||
"CONTENT FROM URL (authoritative if user-provided):\n" .
|
||||
"Source: URL\n" .
|
||||
$urlContent;
|
||||
$knowledgeParts[] = $this->implodeBlocks([
|
||||
$this->config->getUrlContentSectionLabel() . ':',
|
||||
$this->config->getUrlContentSourceLine(),
|
||||
$urlContent,
|
||||
]);
|
||||
}
|
||||
|
||||
return $this->implodeBlocks($knowledgeParts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve how many characters may still be used by history.
|
||||
*
|
||||
* The active model num_ctx is converted into a conservative prompt budget.
|
||||
* Shop, knowledge and user question are fixed priority blocks.
|
||||
* History only receives the remaining space.
|
||||
*/
|
||||
private function resolveHistoryBudgetChars(string $fixedPrompt): int
|
||||
{
|
||||
$numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx();
|
||||
|
||||
$outputReserveTokens = $this->clamp(
|
||||
(int) floor($numCtx * PromptBuilderConfig::OUTPUT_RESERVE_RATIO),
|
||||
PromptBuilderConfig::OUTPUT_RESERVE_MIN_TOKENS,
|
||||
PromptBuilderConfig::OUTPUT_RESERVE_MAX_TOKENS
|
||||
(int) floor($numCtx * $this->config->getOutputReserveRatio()),
|
||||
$this->config->getOutputReserveMinTokens(),
|
||||
$this->config->getOutputReserveMaxTokens()
|
||||
);
|
||||
|
||||
$safetyReserveTokens = $this->clamp(
|
||||
(int) floor($numCtx * PromptBuilderConfig::SAFETY_RESERVE_RATIO),
|
||||
PromptBuilderConfig::SAFETY_RESERVE_MIN_TOKENS,
|
||||
PromptBuilderConfig::SAFETY_RESERVE_MAX_TOKENS
|
||||
(int) floor($numCtx * $this->config->getSafetyReserveRatio()),
|
||||
$this->config->getSafetyReserveMinTokens(),
|
||||
$this->config->getSafetyReserveMaxTokens()
|
||||
);
|
||||
|
||||
$promptBudgetTokens = max(
|
||||
PromptBuilderConfig::MIN_PROMPT_BUDGET_TOKENS,
|
||||
$this->config->getMinPromptBudgetTokens(),
|
||||
$numCtx - $outputReserveTokens - $safetyReserveTokens
|
||||
);
|
||||
|
||||
$promptBudgetChars = $promptBudgetTokens * PromptBuilderConfig::CHARS_PER_TOKEN;
|
||||
$promptBudgetChars = $promptBudgetTokens * $this->config->getCharsPerToken();
|
||||
|
||||
$remaining = $promptBudgetChars
|
||||
- mb_strlen($fixedPrompt)
|
||||
- PromptBuilderConfig::HISTORY_PADDING_CHARS;
|
||||
- $this->config->getHistoryPaddingChars();
|
||||
|
||||
return max(0, $remaining);
|
||||
}
|
||||
@@ -416,87 +356,118 @@ final readonly class PromptBuilder
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function buildLanguageRules(): array
|
||||
private function buildFactGroundingRules(bool $hasShopResults, bool $isTechnicalProductQuestion): array
|
||||
{
|
||||
return [
|
||||
"- Answer only in the same language as the user question.",
|
||||
"- All headings, labels, notes, and structural elements must be in the same language as the user question.",
|
||||
"- Do not switch languages unless the user does.",
|
||||
"- If headings are used, write them in the user's language.",
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function buildFactGroundingRules(
|
||||
bool $isTechnicalProductQuestion,
|
||||
bool $hasShopResults,
|
||||
bool $isPriceDrivenQuestion
|
||||
): array {
|
||||
$rules = [
|
||||
"- State only facts that are explicitly present in the provided sources.",
|
||||
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
|
||||
"- Do not invent missing values.",
|
||||
"- Do not replace missing values with estimates, defaults, or typical industry assumptions.",
|
||||
"- Do not claim that information is missing if it appears in the provided sources.",
|
||||
"- Do not compare with other products unless those products are also present in the provided sources.",
|
||||
"- Prefer source-faithful wording over persuasive wording.",
|
||||
"- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', 'entscheidend', 'optimal', 'kosteneffizient', 'prozesssicher', or 'state-of-the-art'.",
|
||||
"- Clearly separate explicit facts from inferences.",
|
||||
"- If a conclusion goes beyond the source wording, label it exactly as 'Inference:'.",
|
||||
"- If a sentence cannot be traced to the provided sources, do not write it.",
|
||||
"- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.",
|
||||
"- If the sources do not identify a suitable product, do not invent one.",
|
||||
];
|
||||
$rules = $this->config->getFactGroundingBaseRules();
|
||||
|
||||
if ($hasShopResults) {
|
||||
$rules = array_merge($rules, [
|
||||
"- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.",
|
||||
"- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.",
|
||||
"- When shop results are present and relevant, include current price and the actual URL if available.",
|
||||
"- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.",
|
||||
"- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.",
|
||||
"- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.",
|
||||
"- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.",
|
||||
"- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.",
|
||||
"- If the shop match is ambiguous, keep the technical identification and commercial details separate.",
|
||||
]);
|
||||
|
||||
if ($isPriceDrivenQuestion) {
|
||||
$rules[] = "- For price-threshold questions, shop prices are authoritative for the threshold check.";
|
||||
$rules[] = "- Accessory-only shop hits do not prove that no qualifying device exists.";
|
||||
}
|
||||
$rules = array_merge($rules, $this->config->getFactGroundingWithShopRules());
|
||||
} else {
|
||||
$rules[] = "- Use retrieved knowledge as authoritative for factual answers.";
|
||||
$rules[] = "- If no shop results are present, do not compensate with external recommendations or external product suggestions.";
|
||||
$rules = array_merge($rules, $this->config->getFactGroundingWithoutShopRules());
|
||||
}
|
||||
|
||||
if ($isTechnicalProductQuestion) {
|
||||
$rules = array_merge($rules, [
|
||||
"- For technical product questions, answer primarily with explicitly stated facts.",
|
||||
"- Behave like a technical documentation assistant, not like a sales advisor.",
|
||||
"- Keep interpretations minimal and do not generalize application areas beyond the provided sources.",
|
||||
"- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.",
|
||||
"- Do not translate technical facts into business value unless the source explicitly does so.",
|
||||
"- Do not recommend process changes unless explicitly present in the source.",
|
||||
"- Do not use persuasive summaries or advisory conclusions.",
|
||||
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.",
|
||||
"- Use neutral engineering language.",
|
||||
"- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.",
|
||||
"- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.",
|
||||
"- If the source lists application areas, repeat only those areas and do not broaden them.",
|
||||
"- If the source names an indicator and threshold, reproduce that exactly without extrapolation.",
|
||||
"- If the source states only a threshold function, do not expand it into broader control logic.",
|
||||
"- If a detail is not explicitly stated in the provided sources, say so plainly.",
|
||||
"- Prefer short, source-close sentences over explanatory expansion.",
|
||||
"- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.",
|
||||
]);
|
||||
$rules = array_merge($rules, $this->config->getFactGroundingTechnicalRules());
|
||||
}
|
||||
|
||||
return $rules;
|
||||
}
|
||||
|
||||
private function buildShopProductEntry(ShopProductResult $product, int $index, bool $isDetailed): string
|
||||
{
|
||||
$entryParts = [
|
||||
"[{$index}] " . $this->normalizeBlockText($product->name),
|
||||
];
|
||||
|
||||
if ($product->productNumber) {
|
||||
$entryParts[] = $this->config->getShopProductNumberLabel() . ': '
|
||||
. $this->normalizeBlockText($product->productNumber);
|
||||
}
|
||||
|
||||
if ($product->manufacturer) {
|
||||
$entryParts[] = $this->config->getShopManufacturerLabel() . ': '
|
||||
. $this->normalizeBlockText($product->manufacturer);
|
||||
}
|
||||
|
||||
if ($product->price) {
|
||||
$entryParts[] = $this->config->getShopPriceLabel() . ': '
|
||||
. $this->normalizeBlockText($product->price);
|
||||
}
|
||||
|
||||
if ($product->available !== null) {
|
||||
$entryParts[] = $this->config->getShopAvailabilityLabel() . ': '
|
||||
. ($product->available
|
||||
? $this->config->getShopAvailabilityYesLabel()
|
||||
: $this->config->getShopAvailabilityNoLabel());
|
||||
}
|
||||
|
||||
foreach ($product->highlights as $highlight) {
|
||||
$highlight = $this->normalizeBlockText((string) $highlight);
|
||||
|
||||
if ($highlight !== '') {
|
||||
$entryParts[] = $this->config->getShopHighlightPrefix() . $highlight;
|
||||
}
|
||||
}
|
||||
|
||||
if ($product->url) {
|
||||
$entryParts[] = $this->config->getShopUrlLabel() . ': '
|
||||
. $this->normalizeBlockText($product->url);
|
||||
}
|
||||
|
||||
if ($product->productImage) {
|
||||
$entryParts[] = $this->config->getShopProductImageLabel() . ': '
|
||||
. $this->normalizeBlockText($product->productImage);
|
||||
}
|
||||
|
||||
if ($isDetailed && $product->description) {
|
||||
$entryParts[] = $this->config->getShopDescriptionLabel() . ': '
|
||||
. $this->normalizeBlockText($product->description);
|
||||
}
|
||||
|
||||
if ($product->customFields) {
|
||||
$entryParts[] = $this->config->getShopMetaInformationLabel() . ': '
|
||||
. $this->normalizeBlockText($product->customFields);
|
||||
}
|
||||
|
||||
return implode("\n", $entryParts);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $rules
|
||||
*/
|
||||
private function buildRuleBlock(string $sectionLabel, array $rules): string
|
||||
{
|
||||
$normalizedRules = array_values(array_filter(
|
||||
array_map(
|
||||
fn(string $rule): string => $this->normalizeBlockText($rule),
|
||||
$rules
|
||||
),
|
||||
static fn(string $rule): bool => $rule !== ''
|
||||
));
|
||||
|
||||
if ($normalizedRules === []) {
|
||||
return '';
|
||||
}
|
||||
|
||||
return $sectionLabel . ":\n" . implode("\n", $normalizedRules);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $lines
|
||||
*/
|
||||
private function implodeLines(array $lines): string
|
||||
{
|
||||
$normalizedLines = array_values(array_filter(
|
||||
array_map(
|
||||
fn(string $line): string => $this->normalizeBlockText($line),
|
||||
$lines
|
||||
),
|
||||
static fn(string $line): bool => $line !== ''
|
||||
));
|
||||
|
||||
return implode("\n", $normalizedLines);
|
||||
}
|
||||
|
||||
private function implodeBlocks(array $blocks): string
|
||||
{
|
||||
$filtered = array_values(array_filter(
|
||||
@@ -537,41 +508,26 @@ final readonly class PromptBuilder
|
||||
private function isLikelyTechnicalProductQuestion(string $prompt): bool
|
||||
{
|
||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
||||
|
||||
$matches = 0;
|
||||
|
||||
foreach (PromptBuilderConfig::TECHNICAL_PRODUCT_KEYWORDS as $keyword) {
|
||||
foreach ($this->config->getTechnicalProductKeywords() as $keyword) {
|
||||
if (str_contains($normalized, $keyword)) {
|
||||
$matches++;
|
||||
}
|
||||
}
|
||||
|
||||
if ($matches >= 2) {
|
||||
if ($matches >= $this->config->getTechnicalProductKeywordMatchThreshold()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function isLikelyPriceDrivenQuestion(string $prompt): bool
|
||||
{
|
||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
||||
|
||||
if (preg_match('/\b(mehr\s+als|über|ueber|größer\s+als|groesser\s+als|unter|bis|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*(?:euro|eur|€)\b/u', $normalized) === 1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return str_contains($normalized, 'preis')
|
||||
|| str_contains($normalized, 'preise')
|
||||
|| str_contains($normalized, 'kosten')
|
||||
|| str_contains($normalized, 'kostet');
|
||||
return preg_match($this->config->getTechnicalProductModelPattern(), $prompt) === 1;
|
||||
}
|
||||
|
||||
private function asksForAccessoryOrBundle(string $prompt): bool
|
||||
{
|
||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
||||
|
||||
foreach (PromptBuilderConfig::ACCESSORY_REQUEST_KEYWORDS as $keyword) {
|
||||
foreach ($this->config->getAccessoryRequestKeywords() as $keyword) {
|
||||
if (str_contains($normalized, $keyword)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Commerce;
|
||||
|
||||
use App\Commerce\Dto\CommerceReferenceContext;
|
||||
use App\Commerce\Dto\CommerceSearchQuery;
|
||||
use App\Config\CommerceIntentConfig;
|
||||
use App\Config\CommerceQueryParserConfig;
|
||||
@@ -24,12 +23,10 @@ final readonly class CommerceQueryParser
|
||||
public function parse(
|
||||
string $originalPrompt,
|
||||
string $intent,
|
||||
string $historyContext = '',
|
||||
?CommerceReferenceContext $referenceContext = null
|
||||
string $historyContext = ''
|
||||
): CommerceSearchQuery {
|
||||
$normalizedPrompt = $this->normalize($originalPrompt);
|
||||
$isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt);
|
||||
$isReferenceOnlyFollowUp = $this->isReferenceOnlyFollowUp($normalizedPrompt);
|
||||
|
||||
[$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt);
|
||||
$sizes = $this->extractSizes($normalizedPrompt);
|
||||
@@ -47,58 +44,23 @@ final readonly class CommerceQueryParser
|
||||
if (
|
||||
!$isDirectProductQuery
|
||||
&& $historyContext !== ''
|
||||
&& $this->shouldUseHistoryContext($normalizedPrompt, $searchText)
|
||||
&& $this->shouldUseHistoryContext($normalizedPrompt)
|
||||
) {
|
||||
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
|
||||
$historyParse = $this->parseHistoryContext($historyContext);
|
||||
|
||||
if ($latestHistoryQuestion !== '') {
|
||||
$normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion);
|
||||
$isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt);
|
||||
|
||||
[$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt);
|
||||
$historySizes = $this->extractSizes($normalizedHistoryPrompt);
|
||||
$historyBrand = $this->extractBrand($normalizedHistoryPrompt);
|
||||
|
||||
$historySearchText = $this->buildSearchText(
|
||||
prompt: $normalizedHistoryPrompt,
|
||||
sizes: $historySizes,
|
||||
brand: $historyBrand,
|
||||
priceMin: $historyPriceMin,
|
||||
priceMax: $historyPriceMax,
|
||||
preserveDirectProductQuery: $isDirectHistoryProductQuery
|
||||
if ($historyParse !== null) {
|
||||
$searchText = $this->mergeSearchTexts(
|
||||
$historyParse['searchText'],
|
||||
$searchText
|
||||
);
|
||||
|
||||
$searchText = $this->mergeSearchTexts($historySearchText, $searchText);
|
||||
|
||||
if (($brand === null || $brand === '') && $historyBrand !== null && $historyBrand !== '') {
|
||||
$brand = $historyBrand;
|
||||
if (($brand === null || $brand === '') && $historyParse['brand'] !== null && $historyParse['brand'] !== '') {
|
||||
$brand = $historyParse['brand'];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
!$isDirectProductQuery
|
||||
&& $referenceContext !== null
|
||||
&& $this->shouldUseReferenceContext($normalizedPrompt, $searchText)
|
||||
) {
|
||||
$referenceSearchText = $this->buildReferenceSearchText($referenceContext);
|
||||
|
||||
if ($isReferenceOnlyFollowUp || $this->isTooGenericSearchText($searchText)) {
|
||||
$searchText = $referenceSearchText !== '' ? $referenceSearchText : $searchText;
|
||||
} else {
|
||||
$searchText = $this->mergeSearchTexts($referenceSearchText, $searchText);
|
||||
}
|
||||
|
||||
if (($brand === null || $brand === '') && $referenceContext->manufacturer !== null) {
|
||||
$normalizedManufacturer = $this->normalize($referenceContext->manufacturer);
|
||||
|
||||
if ($normalizedManufacturer !== '') {
|
||||
$brand = $normalizedManufacturer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$finalSearchText = trim($searchText !== '' ? $searchText : $normalizedPrompt);
|
||||
$finalSearchText = $searchText !== '' ? $searchText : $normalizedPrompt;
|
||||
|
||||
return new CommerceSearchQuery(
|
||||
originalPrompt: $originalPrompt,
|
||||
@@ -118,10 +80,14 @@ final readonly class CommerceQueryParser
|
||||
{
|
||||
$value = $this->textNormalizer->normalize($prompt);
|
||||
$value = $this->queryCleaner->clean($value);
|
||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||
$value = str_replace(['€'], ' euro ', $value);
|
||||
$value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value;
|
||||
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
||||
$value = mb_strtolower(trim($value));
|
||||
$value = str_replace(
|
||||
$this->config->getNormalizationSearch(),
|
||||
$this->config->getNormalizationReplace(),
|
||||
$value
|
||||
);
|
||||
$value = preg_replace($this->config->getPromptSanitizePattern(), ' ', $value) ?? $value;
|
||||
$value = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $value) ?? $value;
|
||||
|
||||
return trim($value);
|
||||
}
|
||||
@@ -134,32 +100,21 @@ final readonly class CommerceQueryParser
|
||||
$priceMin = null;
|
||||
$priceMax = null;
|
||||
|
||||
if (preg_match('/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
||||
$a = $this->toFloat($m[1]);
|
||||
$b = $this->toFloat($m[2]);
|
||||
if (preg_match($this->config->getPriceBetweenPattern(), $prompt, $matches) === 1) {
|
||||
$a = $this->toFloat($matches[1]);
|
||||
$b = $this->toFloat($matches[2]);
|
||||
|
||||
if ($a !== null && $b !== null) {
|
||||
return [min($a, $b), max($a, $b)];
|
||||
}
|
||||
}
|
||||
|
||||
if (preg_match('/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
||||
$priceMax = $this->toFloat($m[1]);
|
||||
if (preg_match($this->config->getPriceMaxPattern(), $prompt, $matches) === 1) {
|
||||
$priceMax = $this->toFloat($matches[1]);
|
||||
}
|
||||
|
||||
if (preg_match('/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
||||
$priceMin = $this->toFloat($m[1]);
|
||||
}
|
||||
|
||||
// NEW:
|
||||
// Recognize comparative lower-bound phrasing such as:
|
||||
// - mehr als 3000 euro
|
||||
// - über 3000 euro
|
||||
// - ueber 3000 euro
|
||||
// - größer als 3000 euro
|
||||
// - groesser als 3000 euro
|
||||
if (preg_match('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
||||
$priceMin = $this->toFloat($m[1]);
|
||||
if (preg_match($this->config->getPriceMinPattern(), $prompt, $matches) === 1) {
|
||||
$priceMin = $this->toFloat($matches[1]);
|
||||
}
|
||||
|
||||
return [$priceMin, $priceMax];
|
||||
@@ -172,8 +127,7 @@ final readonly class CommerceQueryParser
|
||||
{
|
||||
$sizes = [];
|
||||
|
||||
$sizePattern = $this->intentConfig->getSizePattern();
|
||||
if (preg_match_all('/\b(?:' . $sizePattern . ')\s*([a-z0-9.-]+)\b/u', $prompt, $matches) === false) {
|
||||
if (preg_match_all($this->intentConfig->getSizeExtractionPattern(), $prompt, $matches) === false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -181,8 +135,7 @@ final readonly class CommerceQueryParser
|
||||
$sizes[] = trim($size);
|
||||
}
|
||||
|
||||
$sizeTokenPattern = $this->intentConfig->getSizeTokenPattern();
|
||||
if (preg_match_all('/\b(' . $sizeTokenPattern . ')\b/u', $prompt, $tokenMatches) !== false) {
|
||||
if (preg_match_all($this->intentConfig->getSizeTokenValuePattern(), $prompt, $tokenMatches) !== false) {
|
||||
foreach ($tokenMatches[1] as $sizeToken) {
|
||||
$sizes[] = trim($sizeToken);
|
||||
}
|
||||
@@ -207,6 +160,9 @@ final readonly class CommerceQueryParser
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $sizes
|
||||
*/
|
||||
private function buildSearchText(
|
||||
string $prompt,
|
||||
array $sizes,
|
||||
@@ -219,7 +175,7 @@ final readonly class CommerceQueryParser
|
||||
return $this->buildDirectProductSearchText($prompt);
|
||||
}
|
||||
|
||||
$text = ' ' . $prompt . ' ';
|
||||
$text = $this->wrapForPhraseReplacement($prompt);
|
||||
|
||||
foreach ($this->config->getPhrasesToRemove() as $phrase) {
|
||||
$normalizedPhrase = $this->normalize((string) $phrase);
|
||||
@@ -228,7 +184,11 @@ final readonly class CommerceQueryParser
|
||||
continue;
|
||||
}
|
||||
|
||||
$text = str_replace(' ' . $normalizedPhrase . ' ', ' ', $text);
|
||||
$text = str_replace(
|
||||
$this->wrapForPhraseReplacement($normalizedPhrase),
|
||||
' ',
|
||||
$text
|
||||
);
|
||||
}
|
||||
|
||||
foreach ($sizes as $size) {
|
||||
@@ -238,111 +198,69 @@ final readonly class CommerceQueryParser
|
||||
continue;
|
||||
}
|
||||
|
||||
$text = preg_replace('/\b' . preg_quote($normalizedSize, '/') . '\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace(
|
||||
$this->config->buildExactTokenRemovalPattern($normalizedSize),
|
||||
' ',
|
||||
$text
|
||||
) ?? $text;
|
||||
}
|
||||
|
||||
if ($brand !== null && $brand !== '' && !$this->isBrandPartOfModelPhrase($prompt, $brand)) {
|
||||
$text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace(
|
||||
$this->config->buildExactTokenRemovalPattern($brand),
|
||||
' ',
|
||||
$text
|
||||
) ?? $text;
|
||||
}
|
||||
|
||||
if ($priceMin !== null || $priceMax !== null) {
|
||||
$text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\b' . $this->intentConfig->getPricePattern() . '\b/u', ' ', $text) ?? $text;
|
||||
foreach ($this->config->getPriceRemovalPatterns($this->intentConfig) as $pattern) {
|
||||
$text = preg_replace($pattern, ' ', $text) ?? $text;
|
||||
}
|
||||
}
|
||||
|
||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
||||
$text = trim($text, " \t\n\r\0\x0B-.,");
|
||||
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
|
||||
$text = trim($text, $this->config->getSearchTextTrimCharacters());
|
||||
|
||||
$tokens = array_filter(
|
||||
explode(' ', $text),
|
||||
static fn(string $token): bool => mb_strlen($token) > 1
|
||||
fn(string $token): bool => mb_strlen($token) > $this->config->getMinSearchTokenLength()
|
||||
);
|
||||
|
||||
$tokens = $this->filterSearchTokens($tokens);
|
||||
$tokens = $this->stripReferenceOnlyTokens($tokens);
|
||||
|
||||
return trim(implode(' ', $tokens));
|
||||
}
|
||||
|
||||
private function buildDirectProductSearchText(string $prompt): string
|
||||
{
|
||||
$text = preg_replace('/\s+/u', ' ', $prompt) ?? $prompt;
|
||||
$text = trim($text, " \t\n\r\0\x0B-.,");
|
||||
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $prompt) ?? $prompt;
|
||||
$text = trim($text, $this->config->getSearchTextTrimCharacters());
|
||||
|
||||
$tokens = array_filter(
|
||||
explode(' ', $text),
|
||||
static fn(string $token): bool => mb_strlen($token) > 0
|
||||
fn(string $token): bool => mb_strlen($token) >= $this->config->getMinDirectProductTokenLength()
|
||||
);
|
||||
|
||||
return trim(implode(' ', array_values(array_unique($tokens))));
|
||||
$tokens = array_values(array_unique($tokens));
|
||||
|
||||
return trim(implode(' ', $tokens));
|
||||
}
|
||||
|
||||
private function shouldUseHistoryContext(string $prompt, string $searchText): bool
|
||||
private function shouldUseHistoryContext(string $prompt): bool
|
||||
{
|
||||
if ($this->isReferenceOnlyFollowUp($prompt)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($this->isTooGenericSearchText($searchText)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return preg_match('/\b(' . $this->config->getHistoryContextPattern() . ')\b/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function shouldUseReferenceContext(string $prompt, string $searchText): bool
|
||||
{
|
||||
if ($this->isReferenceOnlyFollowUp($prompt)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return $this->isTooGenericSearchText($searchText);
|
||||
}
|
||||
|
||||
private function isReferenceOnlyFollowUp(string $prompt): bool
|
||||
{
|
||||
return preg_match('/\b(' . $this->config->getReferenceFollowUpPattern() . ')\b/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function isTooGenericSearchText(string $searchText): bool
|
||||
{
|
||||
$tokens = array_values(array_filter(
|
||||
preg_split('/\s+/u', $searchText, -1, PREG_SPLIT_NO_EMPTY) ?: [],
|
||||
static fn(string $token): bool => $token !== ''
|
||||
));
|
||||
|
||||
if ($tokens === []) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$genericTokens = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
|
||||
|
||||
foreach ($tokens as $token) {
|
||||
if (!isset($genericTokens[$token])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function buildReferenceSearchText(CommerceReferenceContext $referenceContext): string
|
||||
{
|
||||
return $this->normalize($referenceContext->buildReferenceSearchText());
|
||||
return preg_match($this->config->getHistoryContextValuePattern(), $prompt) === 1;
|
||||
}
|
||||
|
||||
private function extractLatestQuestionFromHistory(string $historyContext): string
|
||||
{
|
||||
$result = preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches);
|
||||
$result = preg_match_all($this->config->getHistoryQuestionPattern(), $historyContext, $matches);
|
||||
|
||||
if ($result === false) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$questions = $matches[1] ?? [];
|
||||
|
||||
if ($questions === []) {
|
||||
return '';
|
||||
}
|
||||
@@ -352,11 +270,11 @@ final readonly class CommerceQueryParser
|
||||
return is_string($lastQuestion) ? trim($lastQuestion) : '';
|
||||
}
|
||||
|
||||
private function mergeSearchTexts(string $left, string $right): string
|
||||
private function mergeSearchTexts(string $historySearchText, string $currentSearchText): string
|
||||
{
|
||||
$tokens = [];
|
||||
|
||||
foreach ([$left, $right] as $text) {
|
||||
foreach ([$historySearchText, $currentSearchText] as $text) {
|
||||
if ($text === '') {
|
||||
continue;
|
||||
}
|
||||
@@ -364,7 +282,7 @@ final readonly class CommerceQueryParser
|
||||
foreach (explode(' ', $text) as $token) {
|
||||
$token = trim($token);
|
||||
|
||||
if ($token === '' || mb_strlen($token) <= 1) {
|
||||
if ($token === '' || mb_strlen($token) <= $this->config->getMinSearchTokenLength()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -381,25 +299,11 @@ final readonly class CommerceQueryParser
|
||||
*/
|
||||
private function filterSearchTokens(array $tokens): array
|
||||
{
|
||||
$stopWords = array_fill_keys($this->config->getFilterSearchTokensPattern(), true);
|
||||
$stopWords = $this->config->getFilterSearchTokens();
|
||||
|
||||
return array_values(array_filter(
|
||||
$tokens,
|
||||
static fn(string $token): bool => !isset($stopWords[$token])
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $tokens
|
||||
* @return string[]
|
||||
*/
|
||||
private function stripReferenceOnlyTokens(array $tokens): array
|
||||
{
|
||||
$referenceOnly = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
|
||||
|
||||
return array_values(array_filter(
|
||||
$tokens,
|
||||
static fn(string $token): bool => !isset($referenceOnly[$token])
|
||||
static fn(string $token): bool => !in_array($token, $stopWords, true)
|
||||
));
|
||||
}
|
||||
|
||||
@@ -417,25 +321,25 @@ final readonly class CommerceQueryParser
|
||||
return true;
|
||||
}
|
||||
|
||||
$tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: [];
|
||||
$tokens = preg_split(
|
||||
$this->config->getWhitespaceSplitPattern(),
|
||||
$prompt,
|
||||
-1,
|
||||
PREG_SPLIT_NO_EMPTY
|
||||
) ?: [];
|
||||
|
||||
return count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1;
|
||||
return count($tokens) <= $this->config->getDirectProductMaxTokens()
|
||||
&& preg_match($this->config->getDirectProductDigitPattern(), $prompt) === 1;
|
||||
}
|
||||
|
||||
private function containsModelLikePhrase(string $text): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u',
|
||||
$text
|
||||
) === 1;
|
||||
return preg_match($this->config->getModelLikePattern(), $text) === 1;
|
||||
}
|
||||
|
||||
private function containsAccessoryLikePhrase(string $text): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u',
|
||||
$text
|
||||
) === 1;
|
||||
return preg_match($this->config->getAccessoryLikePattern(), $text) === 1;
|
||||
}
|
||||
|
||||
private function isBrandPartOfModelPhrase(string $prompt, string $brand): bool
|
||||
@@ -445,7 +349,7 @@ final readonly class CommerceQueryParser
|
||||
}
|
||||
|
||||
return preg_match(
|
||||
'/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u',
|
||||
$this->config->buildBrandPartOfModelPattern($brand),
|
||||
$prompt
|
||||
) === 1;
|
||||
}
|
||||
@@ -456,4 +360,42 @@ final readonly class CommerceQueryParser
|
||||
|
||||
return is_numeric($value) ? (float) $value : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{searchText:string, brand:?string}|null
|
||||
*/
|
||||
private function parseHistoryContext(string $historyContext): ?array
|
||||
{
|
||||
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
|
||||
|
||||
if ($latestHistoryQuestion === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion);
|
||||
$isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt);
|
||||
|
||||
[$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt);
|
||||
$historySizes = $this->extractSizes($normalizedHistoryPrompt);
|
||||
$historyBrand = $this->extractBrand($normalizedHistoryPrompt);
|
||||
|
||||
$historySearchText = $this->buildSearchText(
|
||||
prompt: $normalizedHistoryPrompt,
|
||||
sizes: $historySizes,
|
||||
brand: $historyBrand,
|
||||
priceMin: $historyPriceMin,
|
||||
priceMax: $historyPriceMax,
|
||||
preserveDirectProductQuery: $isDirectHistoryProductQuery
|
||||
);
|
||||
|
||||
return [
|
||||
'searchText' => $historySearchText,
|
||||
'brand' => $historyBrand,
|
||||
];
|
||||
}
|
||||
|
||||
private function wrapForPhraseReplacement(string $text): string
|
||||
{
|
||||
return ' ' . $text . ' ';
|
||||
}
|
||||
}
|
||||
@@ -5,16 +5,15 @@ declare(strict_types=1);
|
||||
namespace App\Commerce;
|
||||
|
||||
use App\Commerce\Dto\ShopProductResult;
|
||||
use App\Config\SearchRepairConfig;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
final readonly class SearchRepairService
|
||||
{
|
||||
public function __construct(
|
||||
private ShopSearchService $shopSearchService,
|
||||
private SearchRepairConfig $config,
|
||||
private LoggerInterface $logger,
|
||||
private bool $enabled = true,
|
||||
private int $maxRepairQueries = 3,
|
||||
private int $minPrimaryResultsWithoutRepair = 2,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -37,22 +36,22 @@ final readonly class SearchRepairService
|
||||
array $primaryShopResults,
|
||||
array $knowledgeChunks
|
||||
): array {
|
||||
if (!$this->enabled) {
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => false,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => [],
|
||||
];
|
||||
if (!$this->config->isEnabled()) {
|
||||
return $this->buildRepairResult(
|
||||
results: $primaryShopResults,
|
||||
attemptedRepair: false,
|
||||
usedRepair: false,
|
||||
repairQueries: []
|
||||
);
|
||||
}
|
||||
|
||||
if (!$this->shouldAttemptRepair($prompt, $primaryQuery, $primaryShopResults)) {
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => false,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => [],
|
||||
];
|
||||
return $this->buildRepairResult(
|
||||
results: $primaryShopResults,
|
||||
attemptedRepair: false,
|
||||
usedRepair: false,
|
||||
repairQueries: []
|
||||
);
|
||||
}
|
||||
|
||||
$repairQueries = $this->buildRepairQueries(
|
||||
@@ -63,12 +62,12 @@ final readonly class SearchRepairService
|
||||
);
|
||||
|
||||
if ($repairQueries === []) {
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => false,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => [],
|
||||
];
|
||||
return $this->buildRepairResult(
|
||||
results: $primaryShopResults,
|
||||
attemptedRepair: false,
|
||||
usedRepair: false,
|
||||
repairQueries: []
|
||||
);
|
||||
}
|
||||
|
||||
$this->logger->info('Shop repair started', [
|
||||
@@ -99,12 +98,12 @@ final readonly class SearchRepairService
|
||||
'repairQueries' => $repairQueries,
|
||||
]);
|
||||
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => true,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => $repairQueries,
|
||||
];
|
||||
return $this->buildRepairResult(
|
||||
results: $primaryShopResults,
|
||||
attemptedRepair: true,
|
||||
usedRepair: false,
|
||||
repairQueries: $repairQueries
|
||||
);
|
||||
}
|
||||
|
||||
$mergedResults = $this->rankMergedResults(
|
||||
@@ -129,16 +128,16 @@ final readonly class SearchRepairService
|
||||
'manufacturer' => $product->manufacturer,
|
||||
'available' => $product->available,
|
||||
],
|
||||
array_slice($mergedResults, 0, 3)
|
||||
array_slice($mergedResults, 0, $this->config->getTopProductLogLimit())
|
||||
),
|
||||
]);
|
||||
|
||||
return [
|
||||
'results' => $mergedResults,
|
||||
'attemptedRepair' => true,
|
||||
'usedRepair' => true,
|
||||
'repairQueries' => $repairQueries,
|
||||
];
|
||||
return $this->buildRepairResult(
|
||||
results: $mergedResults,
|
||||
attemptedRepair: true,
|
||||
usedRepair: true,
|
||||
repairQueries: $repairQueries
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -157,15 +156,11 @@ final readonly class SearchRepairService
|
||||
return true;
|
||||
}
|
||||
|
||||
// Always try repair for bundle/accessory prompts.
|
||||
// These prompts often need a second pass even when the first search
|
||||
// already returned some results, because the user is asking for a
|
||||
// combination of main device + matching accessory.
|
||||
if ($asksForBundle) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($primaryResultsCount >= $this->minPrimaryResultsWithoutRepair) {
|
||||
if ($primaryResultsCount >= $this->config->getMinPrimaryResultsWithoutRepair()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -173,7 +168,7 @@ final readonly class SearchRepairService
|
||||
return false;
|
||||
}
|
||||
|
||||
return $primaryResultsCount < $this->minPrimaryResultsWithoutRepair;
|
||||
return $primaryResultsCount < $this->config->getMinPrimaryResultsWithoutRepair();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -230,7 +225,7 @@ final readonly class SearchRepairService
|
||||
fn(string $query): bool => $query !== '' && !$this->isTooCloseToPrimaryQuery($query, $primaryQuery)
|
||||
));
|
||||
|
||||
return array_slice($queries, 0, max(1, $this->maxRepairQueries));
|
||||
return array_slice($queries, 0, max(1, $this->config->getMaxRepairQueries()));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -291,7 +286,7 @@ final readonly class SearchRepairService
|
||||
$candidates = [];
|
||||
|
||||
preg_match_all(
|
||||
'/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u',
|
||||
$this->config->getModelCandidatePattern(),
|
||||
$text,
|
||||
$matches
|
||||
);
|
||||
@@ -321,7 +316,7 @@ final readonly class SearchRepairService
|
||||
$candidates = [];
|
||||
|
||||
preg_match_all(
|
||||
'/\b((?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu',
|
||||
$this->config->getAccessoryCandidatePattern(),
|
||||
$text,
|
||||
$matches
|
||||
);
|
||||
@@ -368,15 +363,15 @@ final readonly class SearchRepairService
|
||||
{
|
||||
$score = 0;
|
||||
|
||||
if (preg_match('/\d/u', $candidate) === 1) {
|
||||
$score += 4;
|
||||
if (preg_match($this->config->getContainsDigitPattern(), $candidate) === 1) {
|
||||
$score += $this->config->getCandidateDigitScore();
|
||||
}
|
||||
|
||||
$wordCount = count($this->tokenize($candidate));
|
||||
$score += min($wordCount, 4);
|
||||
$score += min($wordCount, $this->config->getCandidateWordCountCap());
|
||||
|
||||
if (preg_match('/\b(?:indikator|indicator|testomat|tritromat|titromat|reagenz|reagent)\b/iu', $candidate) === 1) {
|
||||
$score += 3;
|
||||
if (preg_match($this->config->getSpecificityBoostPattern(), $candidate) === 1) {
|
||||
$score += $this->config->getSpecificityBoostScore();
|
||||
}
|
||||
|
||||
return $score;
|
||||
@@ -384,39 +379,19 @@ final readonly class SearchRepairService
|
||||
|
||||
private function asksForBundleOrAccessory(string $prompt): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b(passend|passende|zubehor|zubehör|dazu|zusatz|erganzung|ergänzung|indikator|reagenz|kit|set|auch\s+das|mit\s+preis\s+und\s+allen\s+infos)\b/iu',
|
||||
$prompt
|
||||
) === 1;
|
||||
return preg_match($this->config->getAccessoryOrBundlePattern(), $prompt) === 1;
|
||||
}
|
||||
|
||||
private function containsModelLikePhrase(string $text): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u',
|
||||
$text
|
||||
) === 1;
|
||||
return preg_match($this->config->getModelLikePattern(), $text) === 1;
|
||||
}
|
||||
|
||||
private function looksTooGeneric(string $candidate): bool
|
||||
{
|
||||
$normalized = mb_strtolower($candidate);
|
||||
|
||||
foreach ([
|
||||
'wasser',
|
||||
'messgerät',
|
||||
'messgeraet',
|
||||
'produkt',
|
||||
'geräte',
|
||||
'geraete',
|
||||
'gerät',
|
||||
'geraet',
|
||||
'resthärte',
|
||||
'resthaerte',
|
||||
'preis',
|
||||
'infos',
|
||||
'wissen',
|
||||
] as $genericToken) {
|
||||
foreach ($this->config->getGenericCandidateTokens() as $genericToken) {
|
||||
if ($normalized === $genericToken) {
|
||||
return true;
|
||||
}
|
||||
@@ -428,8 +403,8 @@ final readonly class SearchRepairService
|
||||
private function sanitizeQuery(string $query): string
|
||||
{
|
||||
$query = trim($query);
|
||||
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
|
||||
$query = trim($query, " \t\n\r\0\x0B\"'`.,;:-");
|
||||
$query = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $query) ?? $query;
|
||||
$query = trim($query, $this->config->getSanitizeTrimCharacters());
|
||||
|
||||
return trim($query);
|
||||
}
|
||||
@@ -446,7 +421,7 @@ final readonly class SearchRepairService
|
||||
$intersection = array_intersect($candidateTokens, $primaryTokens);
|
||||
$overlapRatio = count($intersection) / max(count($candidateTokens), count($primaryTokens));
|
||||
|
||||
return $overlapRatio >= 0.9;
|
||||
return $overlapRatio >= $this->config->getPrimaryQueryOverlapThreshold();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -497,12 +472,12 @@ final readonly class SearchRepairService
|
||||
|
||||
foreach ($allResults as $index => $product) {
|
||||
$score = 0;
|
||||
$score += $this->scoreProductAgainstText($product, $prompt) * 3;
|
||||
$score += $this->scoreProductAgainstText($product, $primaryQuery) * 2;
|
||||
$score += $this->scoreProductAgainstText($product, $repairSignal) * 4;
|
||||
$score += $this->scoreProductAgainstText($product, $prompt) * $this->config->getPromptMatchWeight();
|
||||
$score += $this->scoreProductAgainstText($product, $primaryQuery) * $this->config->getPrimaryQueryMatchWeight();
|
||||
$score += $this->scoreProductAgainstText($product, $repairSignal) * $this->config->getRepairSignalMatchWeight();
|
||||
|
||||
if ($index < count($primaryResults)) {
|
||||
$score += 1;
|
||||
$score += $this->config->getPrimaryResultOrderBonus();
|
||||
}
|
||||
|
||||
$decorated[] = [
|
||||
@@ -549,11 +524,11 @@ final readonly class SearchRepairService
|
||||
|
||||
$score = 0;
|
||||
$intersection = array_intersect($queryTokens, $productTokens);
|
||||
$score += count($intersection) * 2;
|
||||
$score += count($intersection) * $this->config->getTokenIntersectionScore();
|
||||
|
||||
foreach ($this->extractNumberTokens($queryTokens) as $numberToken) {
|
||||
if (in_array($numberToken, $productTokens, true)) {
|
||||
$score += 4;
|
||||
$score += $this->config->getNumericTokenMatchScore();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -562,7 +537,7 @@ final readonly class SearchRepairService
|
||||
|
||||
private function buildProductKey(ShopProductResult $product): string
|
||||
{
|
||||
return mb_strtolower(trim(implode('|', [
|
||||
return mb_strtolower(trim(implode($this->config->getProductKeySeparator(), [
|
||||
$product->id,
|
||||
$product->productNumber ?? '',
|
||||
$product->name,
|
||||
@@ -576,8 +551,8 @@ final readonly class SearchRepairService
|
||||
private function tokenize(string $text): array
|
||||
{
|
||||
$text = mb_strtolower($text);
|
||||
$text = preg_replace('/[^\p{L}\p{N}\s\-]+/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace($this->config->getTokenizeCleanupPattern(), ' ', $text) ?? $text;
|
||||
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
|
||||
$text = trim($text);
|
||||
|
||||
if ($text === '') {
|
||||
@@ -595,7 +570,31 @@ final readonly class SearchRepairService
|
||||
{
|
||||
return array_values(array_filter(
|
||||
$tokens,
|
||||
static fn(string $token): bool => preg_match('/\d/u', $token) === 1
|
||||
fn(string $token): bool => preg_match($this->config->getContainsDigitPattern(), $token) === 1
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $results
|
||||
* @param string[] $repairQueries
|
||||
* @return array{
|
||||
* results: ShopProductResult[],
|
||||
* attemptedRepair: bool,
|
||||
* usedRepair: bool,
|
||||
* repairQueries: string[]
|
||||
* }
|
||||
*/
|
||||
private function buildRepairResult(
|
||||
array $results,
|
||||
bool $attemptedRepair,
|
||||
bool $usedRepair,
|
||||
array $repairQueries
|
||||
): array {
|
||||
return [
|
||||
'results' => $results,
|
||||
'attemptedRepair' => $attemptedRepair,
|
||||
'usedRepair' => $usedRepair,
|
||||
'repairQueries' => $repairQueries,
|
||||
];
|
||||
}
|
||||
}
|
||||
@@ -6,50 +6,249 @@ namespace App\Config;
|
||||
|
||||
final class AgentRunnerConfig
|
||||
{
|
||||
public function getCommerceHistoryBudgetChars(): int
|
||||
{
|
||||
return 1000;
|
||||
}
|
||||
|
||||
public function getProductSearchKnowledgeChunkLimit(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getAdvisoryProductSearchKnowledgeChunkLimit(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getOptimizedShopQueryPrefixPattern(): string
|
||||
{
|
||||
return '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu';
|
||||
}
|
||||
|
||||
public function getOptimizedShopQueryTrimCharacters(): string
|
||||
{
|
||||
return " \t\n\r\0\x0B\"'`";
|
||||
}
|
||||
|
||||
public function getEmptyPromptMessage(): string
|
||||
{
|
||||
return '❌ Empty prompt.';
|
||||
}
|
||||
|
||||
public function getAnalyzeRequestMessage(): string
|
||||
{
|
||||
return 'Ich analysiere deine Anfrage...';
|
||||
}
|
||||
|
||||
public function getCheckInternetSourcesMessage(): string
|
||||
{
|
||||
return 'Ich prüfe auf Internetquellen...';
|
||||
}
|
||||
|
||||
public function getRetrieveKnowledgeMessage(): string
|
||||
{
|
||||
return 'Ich hole relevante Daten aus meinem RAG-Wissen...';
|
||||
}
|
||||
|
||||
public function getOptimizeSearchMessage(): string
|
||||
{
|
||||
return 'Ich optimiere die Recherche...';
|
||||
}
|
||||
|
||||
public function getFetchSearchDataMessageTemplate(): string
|
||||
{
|
||||
return 'Ich rufe Recherchedaten ab (type: %s)';
|
||||
}
|
||||
|
||||
public function getAnalyzeAllInformationMessage(): string
|
||||
{
|
||||
return 'Ich analysiere alle Informationen...';
|
||||
}
|
||||
|
||||
public function getThinkingWhileStreamingMessage(): string
|
||||
{
|
||||
return 'Denke nach...';
|
||||
}
|
||||
|
||||
public function getNoLlmDataReceivedMessage(): string
|
||||
{
|
||||
return '❌ Es wurden keine Daten vom LLM empfangen.';
|
||||
}
|
||||
|
||||
public function getGenericInternalErrorMessage(): string
|
||||
{
|
||||
return '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.';
|
||||
}
|
||||
|
||||
public function getDebugInternalErrorPrefix(): string
|
||||
{
|
||||
return '❌ Interner Fehler: ';
|
||||
}
|
||||
|
||||
public function getExternalUrlSourceLabel(): string
|
||||
{
|
||||
return 'Externe URL';
|
||||
}
|
||||
|
||||
public function getRagKnowledgeSourceLabel(): string
|
||||
{
|
||||
return 'RAG Wissen';
|
||||
}
|
||||
|
||||
public function getConversationHistorySourceLabel(): string
|
||||
{
|
||||
return 'Chatverlauf';
|
||||
}
|
||||
|
||||
public function getShopSystemSourceLabel(): string
|
||||
{
|
||||
return 'Shopsystem';
|
||||
}
|
||||
|
||||
public function getExtendedShopSearchSourceLabel(): string
|
||||
{
|
||||
return 'Erweiterte Shopsuche';
|
||||
}
|
||||
|
||||
public function getUsedSourcesPrefix(): string
|
||||
{
|
||||
return 'Genutzte Quellen: ';
|
||||
}
|
||||
|
||||
public function getSourcesPrefix(): string
|
||||
{
|
||||
return 'Quellen: ';
|
||||
}
|
||||
|
||||
public function getSourceBadgeHtmlTemplate(): string
|
||||
{
|
||||
return '<span class="badge bg-info text-black">%s</span>';
|
||||
}
|
||||
|
||||
public function getErrorHtmlTemplate(): string
|
||||
{
|
||||
return '<span class="text-danger">%s</span>' . "\n<hr>\n";
|
||||
}
|
||||
|
||||
public function getThinkHtmlTemplate(): string
|
||||
{
|
||||
return '<span class="text-info think">%s</span>' . "\n";
|
||||
}
|
||||
|
||||
public function getInfoHtmlTemplate(): string
|
||||
{
|
||||
return "\n\n" . '<span class="text-info fw-bolder">%s</span>' . "\n";
|
||||
}
|
||||
|
||||
public function getDebugHtmlTemplate(): string
|
||||
{
|
||||
return "\n\nDEBUG: <code>%s</code>\n";
|
||||
}
|
||||
|
||||
public function getShopPrompt(string $prompt, string $commerceHistoryContext = ''): string
|
||||
{
|
||||
$historyBlock = '';
|
||||
|
||||
if (trim($commerceHistoryContext) !== '') {
|
||||
$historyBlock = '
|
||||
RECENT CONVERSATION CONTEXT:
|
||||
' . $commerceHistoryContext . '
|
||||
|
||||
Additional rules for conversation context:
|
||||
- The current user input has highest priority.
|
||||
- Use the recent conversation context only to resolve omitted references.
|
||||
- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.
|
||||
- Do not revive older products unless the current user input clearly refers to them.
|
||||
- If the current input starts a new topic, ignore older product context.
|
||||
- Prefer the most recent product reference over older ones.
|
||||
';
|
||||
$historyBlock = $this->buildHistoryBlock($commerceHistoryContext);
|
||||
}
|
||||
|
||||
return '
|
||||
Generate a short search query for Shopware 6 from the following user input text.
|
||||
return $this->implodePromptBlocks([
|
||||
$this->getShopPromptIntro(),
|
||||
$this->buildRulesBlock($this->getShopPromptRules()),
|
||||
$this->getShopPromptOutputFormatBlock(),
|
||||
$historyBlock,
|
||||
$this->getCurrentUserInputLabel() . ':',
|
||||
trim($prompt),
|
||||
]);
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Output only the final search query.
|
||||
- Always convert relevant search terms to their singular form.
|
||||
- No introduction, no explanation, no quotation marks.
|
||||
- Use only shop-relevant search terms from the user input for a shop search.
|
||||
- Maximum 6 search terms, preferably fewer.
|
||||
- Remove filler words, polite phrases, and irrelevant words.
|
||||
- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.
|
||||
- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).
|
||||
- Separate terms using spaces only.
|
||||
- If a relevant product name is present, it must be placed at the beginning of the final search query.
|
||||
- Try to always identify all products mentioned in the user input text, even in long prompts.
|
||||
- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.
|
||||
- If the current user input is vague or referential, use the recent conversation context only as support.
|
||||
- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".
|
||||
private function buildHistoryBlock(string $commerceHistoryContext): string
|
||||
{
|
||||
return $this->implodePromptBlocks([
|
||||
$this->getRecentConversationContextLabel() . ':',
|
||||
trim($commerceHistoryContext),
|
||||
$this->buildRulesBlock($this->getConversationContextRules(), 'Additional rules for conversation context:'),
|
||||
]);
|
||||
}
|
||||
|
||||
Output format:
|
||||
Keyword1 Keyword2 Keyword3
|
||||
' . $historyBlock . '
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getShopPromptRules(): array
|
||||
{
|
||||
return [
|
||||
'- Output only the final search query.',
|
||||
'- Always convert relevant search terms to their singular form.',
|
||||
'- No introduction, no explanation, no quotation marks.',
|
||||
'- Use only shop-relevant search terms from the user input for a shop search.',
|
||||
'- Maximum 6 search terms, preferably fewer.',
|
||||
'- Remove filler words, polite phrases, and irrelevant words.',
|
||||
'- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.',
|
||||
'- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).',
|
||||
'- Separate terms using spaces only.',
|
||||
'- If a relevant product name is present, it must be placed at the beginning of the final search query.',
|
||||
'- Try to always identify all products mentioned in the user input text, even in long prompts.',
|
||||
'- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.',
|
||||
'- If the current user input is vague or referential, use the recent conversation context only as support.',
|
||||
'- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".',
|
||||
];
|
||||
}
|
||||
|
||||
CURRENT USER INPUT:
|
||||
' . $prompt . '
|
||||
';
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getConversationContextRules(): array
|
||||
{
|
||||
return [
|
||||
'- The current user input has highest priority.',
|
||||
'- Use the recent conversation context only to resolve omitted references.',
|
||||
'- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.',
|
||||
'- Do not revive older products unless the current user input clearly refers to them.',
|
||||
'- If the current input starts a new topic, ignore older product context.',
|
||||
'- Prefer the most recent product reference over older ones.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getShopPromptIntro(): string
|
||||
{
|
||||
return 'Generate a short search query for Shopware 6 from the following user input text.';
|
||||
}
|
||||
|
||||
public function getShopPromptOutputFormatBlock(): string
|
||||
{
|
||||
return "Output format:\nKeyword1 Keyword2 Keyword3";
|
||||
}
|
||||
|
||||
public function getRecentConversationContextLabel(): string
|
||||
{
|
||||
return 'RECENT CONVERSATION CONTEXT';
|
||||
}
|
||||
|
||||
public function getCurrentUserInputLabel(): string
|
||||
{
|
||||
return 'CURRENT USER INPUT';
|
||||
}
|
||||
|
||||
private function buildRulesBlock(array $rules, string $headline = 'Rules:'): string
|
||||
{
|
||||
return $headline . "\n" . implode("\n", $rules);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $blocks
|
||||
*/
|
||||
private function implodePromptBlocks(array $blocks): string
|
||||
{
|
||||
$normalized = array_values(array_filter(
|
||||
array_map(
|
||||
static fn(string $block): string => trim($block),
|
||||
$blocks
|
||||
),
|
||||
static fn(string $block): bool => $block !== ''
|
||||
));
|
||||
|
||||
return implode("\n\n", $normalized);
|
||||
}
|
||||
}
|
||||
@@ -6,57 +6,38 @@ namespace App\Config;
|
||||
|
||||
final class CommerceIntentConfig
|
||||
{
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getStrongSignalsList(): array
|
||||
{
|
||||
return [
|
||||
'shop',
|
||||
'alle',
|
||||
'preis',
|
||||
'preise',
|
||||
'kunde',
|
||||
'online',
|
||||
'produkt',
|
||||
'produkte',
|
||||
'artikel',
|
||||
'sku',
|
||||
'kaufen',
|
||||
'kostet',
|
||||
'kosten',
|
||||
'verfügbarkeit',
|
||||
'verfuegbarkeit',
|
||||
|
||||
// Search / product discovery signals
|
||||
'suche',
|
||||
'such',
|
||||
'finde',
|
||||
'finden',
|
||||
'welche',
|
||||
'welcher',
|
||||
'welches',
|
||||
|
||||
// Device / system signals
|
||||
'analysegerät',
|
||||
'analysegeraet',
|
||||
'analysegeräte',
|
||||
'analysegeraete',
|
||||
'messgerät',
|
||||
'messgeraet',
|
||||
'messgeräte',
|
||||
'messgeraete',
|
||||
'gerät',
|
||||
'geraet',
|
||||
'geräte',
|
||||
'geraete',
|
||||
'analysator',
|
||||
'analysatoren',
|
||||
'analyzer',
|
||||
'system',
|
||||
'systeme',
|
||||
'anlage',
|
||||
'anlagen',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getAdvisorySignals(): array
|
||||
{
|
||||
return [
|
||||
@@ -67,30 +48,36 @@ final class CommerceIntentConfig
|
||||
'geeignet',
|
||||
'empfiehl',
|
||||
'empfehl',
|
||||
'vergleich',
|
||||
'vergleichen',
|
||||
];
|
||||
}
|
||||
|
||||
public function getPricePattern(): string
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getPriceTerms(): array
|
||||
{
|
||||
$pattern = [
|
||||
return [
|
||||
'euro',
|
||||
'€',
|
||||
'eur',
|
||||
'teuer',
|
||||
'preis',
|
||||
'preise',
|
||||
'kosten',
|
||||
'kostet',
|
||||
];
|
||||
|
||||
return implode('|', $pattern);
|
||||
}
|
||||
|
||||
public function getColorPattern(): string
|
||||
public function getPricePattern(): string
|
||||
{
|
||||
$pattern = [
|
||||
return implode('|', $this->getPriceTerms());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getColorTerms(): array
|
||||
{
|
||||
return [
|
||||
'schwarz',
|
||||
'weiß',
|
||||
'weis',
|
||||
@@ -103,13 +90,19 @@ final class CommerceIntentConfig
|
||||
'orange',
|
||||
'braun',
|
||||
];
|
||||
|
||||
return implode('|', $pattern);
|
||||
}
|
||||
|
||||
public function getSizeTokenPattern(): string
|
||||
public function getColorPattern(): string
|
||||
{
|
||||
$pattern = [
|
||||
return implode('|', $this->getColorTerms());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSizeTokenTerms(): array
|
||||
{
|
||||
return [
|
||||
'xs',
|
||||
's',
|
||||
'm',
|
||||
@@ -118,18 +111,189 @@ final class CommerceIntentConfig
|
||||
'xxl',
|
||||
'xxxxl',
|
||||
];
|
||||
|
||||
return implode('|', $pattern);
|
||||
}
|
||||
|
||||
public function getSizePattern(): string
|
||||
public function getSizeTokenPattern(): string
|
||||
{
|
||||
$pattern = [
|
||||
return implode('|', $this->getSizeTokenTerms());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSizeTerms(): array
|
||||
{
|
||||
return [
|
||||
'größe',
|
||||
'groesse',
|
||||
'grösse',
|
||||
];
|
||||
}
|
||||
|
||||
return implode('|', $pattern);
|
||||
public function getSizePattern(): string
|
||||
{
|
||||
return implode('|', $this->getSizeTerms());
|
||||
}
|
||||
|
||||
public function getSizeExtractionPattern(): string
|
||||
{
|
||||
return '/\b(?:' . $this->getSizePattern() . ')\s*([a-z0-9.-]+)\b/u';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportDiagnosticPatterns(): array
|
||||
{
|
||||
return [
|
||||
'/\bfehler\b/u',
|
||||
'/\bfehlercode\b/u',
|
||||
'/\berror\b/u',
|
||||
'/\bstörung\b/u',
|
||||
'/\bstoerung\b/u',
|
||||
'/\balarm\b/u',
|
||||
'/\bstörungsmeldung\b/u',
|
||||
'/\bstoerungsmeldung\b/u',
|
||||
'/\bmeldung\b/u',
|
||||
'/\bwarnung\b/u',
|
||||
'/\bwarncode\b/u',
|
||||
'/\bcode\b/u',
|
||||
'/\bwas bedeutet\b/u',
|
||||
'/\bwarum\b/u',
|
||||
'/\bblinkt\b/u',
|
||||
'/\bzeigt\b/u',
|
||||
'/\bzeigt an\b/u',
|
||||
'/\bursache\b/u',
|
||||
'/\bdiagnose\b/u',
|
||||
'/\bservicefall\b/u',
|
||||
'/\bproblem\b/u',
|
||||
'/\bstörung beheben\b/u',
|
||||
'/\bstoerung beheben\b/u',
|
||||
'/\be\d{1,3}\b/u',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getExplicitCommerceIntentPatterns(): array
|
||||
{
|
||||
return [
|
||||
'/\bshop\b/u',
|
||||
'/\bpreis\b/u',
|
||||
'/\bkosten\b/u',
|
||||
'/\bkostet\b/u',
|
||||
'/\bkaufen\b/u',
|
||||
'/\bbestellen\b/u',
|
||||
'/\bprodukt\b/u',
|
||||
'/\bartikel\b/u',
|
||||
'/\bsku\b/u',
|
||||
'/\bonline\b/u',
|
||||
];
|
||||
}
|
||||
|
||||
public function getSkuLikePattern(): string
|
||||
{
|
||||
return '/\b\d{4,10}\b/u';
|
||||
}
|
||||
|
||||
public function getPriceValuePattern(): string
|
||||
{
|
||||
return '/\b\d+(?:[.,]\d+)?\s*(?:' . $this->getPricePattern() . ')\b/u';
|
||||
}
|
||||
|
||||
public function getSizeValuePattern(): string
|
||||
{
|
||||
return '/\b(?:' . $this->getSizePattern() . ')\s*[a-z0-9.-]+\b/u';
|
||||
}
|
||||
|
||||
public function getSizeTokenValuePattern(): string
|
||||
{
|
||||
return '/\b(?:' . $this->getSizeTokenPattern() . ')\b/u';
|
||||
}
|
||||
|
||||
public function getColorValuePattern(): string
|
||||
{
|
||||
return '/\b(?:' . $this->getColorPattern() . ')\b/u';
|
||||
}
|
||||
|
||||
public function getSupportOrDiagnosticSignalLabel(): string
|
||||
{
|
||||
return 'support_or_diagnostic';
|
||||
}
|
||||
|
||||
public function getSkuSignalLabel(): string
|
||||
{
|
||||
return 'sku';
|
||||
}
|
||||
|
||||
public function getPriceSignalLabel(): string
|
||||
{
|
||||
return 'price';
|
||||
}
|
||||
|
||||
public function getSizeSignalLabel(): string
|
||||
{
|
||||
return 'size';
|
||||
}
|
||||
|
||||
public function getSizeTokenSignalLabel(): string
|
||||
{
|
||||
return 'size_token';
|
||||
}
|
||||
|
||||
public function getColorSignalLabel(): string
|
||||
{
|
||||
return 'color';
|
||||
}
|
||||
|
||||
public function getAdvisorySignalPrefix(): string
|
||||
{
|
||||
return 'advisory:';
|
||||
}
|
||||
|
||||
public function getProductSearchMinScore(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getAdvisoryProductSearchMinScore(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getStrongSignalScore(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getSkuSignalScore(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getPriceSignalScore(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getSizeSignalScore(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getSizeTokenSignalScore(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getColorSignalScore(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getAdvisorySignalScore(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -4,28 +4,18 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
final readonly class CommerceQueryParserConfig
|
||||
final class CommerceQueryParserConfig
|
||||
{
|
||||
/**
|
||||
* @param string[] $knownBrands
|
||||
* @param string[] $phrasesToRemove
|
||||
* @param string[] $filterSearchTokensPattern
|
||||
* @param string[] $referenceOnlyTokens
|
||||
*/
|
||||
public function __construct(
|
||||
private array $knownBrands = [],
|
||||
private array $phrasesToRemove = [],
|
||||
private array $filterSearchTokensPattern = [],
|
||||
private array $referenceOnlyTokens = [],
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getKnownBrands(): array
|
||||
{
|
||||
return $this->knownBrands;
|
||||
return [
|
||||
'heyl',
|
||||
'horiba',
|
||||
'neomeris',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -33,62 +23,175 @@ final readonly class CommerceQueryParserConfig
|
||||
*/
|
||||
public function getPhrasesToRemove(): array
|
||||
{
|
||||
return $this->phrasesToRemove;
|
||||
return [
|
||||
'ich suche',
|
||||
'suche',
|
||||
'habt ihr',
|
||||
'gibt es',
|
||||
'zeige mir',
|
||||
'welches gerät',
|
||||
'welche gerät',
|
||||
'welches modell',
|
||||
'welches ist besser',
|
||||
'welches ist am besten',
|
||||
'alternative',
|
||||
'alternativen',
|
||||
];
|
||||
}
|
||||
|
||||
public function getHistoryContextPattern(): string
|
||||
{
|
||||
return 'auch|noch|nochmal|dazu|wie oben|wie zuvor|ähnlich|aehnlich|stattdessen|alternative|alternativ|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|und der preis|kosten|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
|
||||
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
|
||||
}
|
||||
|
||||
public function getReferenceFollowUpPattern(): string
|
||||
public function getHistoryContextValuePattern(): string
|
||||
{
|
||||
return 'preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
|
||||
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFilterSearchTokens(): array
|
||||
{
|
||||
return [
|
||||
'auch',
|
||||
'noch',
|
||||
'nochmal',
|
||||
'zusätzlich',
|
||||
'dazu',
|
||||
'davon',
|
||||
'stattdessen',
|
||||
'bitte',
|
||||
'gern',
|
||||
'gerne',
|
||||
'zeige',
|
||||
'zeig',
|
||||
'such',
|
||||
'suche',
|
||||
'finde',
|
||||
'find',
|
||||
'mir',
|
||||
'mal',
|
||||
'von',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Backward-compatible alias for older callers.
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFilterSearchTokensPattern(): array
|
||||
{
|
||||
return $this->filterSearchTokensPattern;
|
||||
return $this->getFilterSearchTokens();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getReferenceOnlyTokens(): array
|
||||
public function getNormalizationSearch(): array
|
||||
{
|
||||
if ($this->referenceOnlyTokens !== []) {
|
||||
return $this->referenceOnlyTokens;
|
||||
}
|
||||
return ['€'];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getNormalizationReplace(): array
|
||||
{
|
||||
return [' euro '];
|
||||
}
|
||||
|
||||
public function getPromptSanitizePattern(): string
|
||||
{
|
||||
return '/[^\p{L}\p{N}\s.,\-]/u';
|
||||
}
|
||||
|
||||
public function getWhitespaceCollapsePattern(): string
|
||||
{
|
||||
return '/\s+/u';
|
||||
}
|
||||
|
||||
public function getWhitespaceSplitPattern(): string
|
||||
{
|
||||
return '/\s+/u';
|
||||
}
|
||||
|
||||
public function getSearchTextTrimCharacters(): string
|
||||
{
|
||||
return " \t\n\r\0\x0B-.,";
|
||||
}
|
||||
|
||||
public function getMinSearchTokenLength(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getMinDirectProductTokenLength(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getHistoryQuestionPattern(): string
|
||||
{
|
||||
return '/^Question:\s*(.+)$/m';
|
||||
}
|
||||
|
||||
public function getPriceBetweenPattern(): string
|
||||
{
|
||||
return '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||
}
|
||||
|
||||
public function getPriceMaxPattern(): string
|
||||
{
|
||||
return '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||
}
|
||||
|
||||
public function getPriceMinPattern(): string
|
||||
{
|
||||
return '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array
|
||||
{
|
||||
return [
|
||||
'preis',
|
||||
'preise',
|
||||
'kosten',
|
||||
'kostet',
|
||||
'gerät',
|
||||
'geraet',
|
||||
'modell',
|
||||
'produkt',
|
||||
'artikel',
|
||||
'dafür',
|
||||
'dafuer',
|
||||
'dazu',
|
||||
'davon',
|
||||
'verfügbarkeit',
|
||||
'verfuegbarkeit',
|
||||
'shop',
|
||||
'link',
|
||||
'zum',
|
||||
'zur',
|
||||
'das',
|
||||
'dieses',
|
||||
'den',
|
||||
'dem',
|
||||
'bitte',
|
||||
'und',
|
||||
'/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u',
|
||||
'/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u',
|
||||
'/\b(?:' . $intentConfig->getPricePattern() . ')\b/u',
|
||||
];
|
||||
}
|
||||
|
||||
public function getDirectProductDigitPattern(): string
|
||||
{
|
||||
return '/\d/u';
|
||||
}
|
||||
|
||||
public function getDirectProductMaxTokens(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getModelLikePattern(): string
|
||||
{
|
||||
return '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u';
|
||||
}
|
||||
|
||||
public function getAccessoryLikePattern(): string
|
||||
{
|
||||
return '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u';
|
||||
}
|
||||
|
||||
public function buildExactTokenRemovalPattern(string $token): string
|
||||
{
|
||||
return '/\b' . preg_quote($token, '/') . '\b/u';
|
||||
}
|
||||
|
||||
public function buildBrandPartOfModelPattern(string $brand): string
|
||||
{
|
||||
return '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u';
|
||||
}
|
||||
}
|
||||
@@ -34,7 +34,7 @@ final class NdjsonHybridRetrieverConfig
|
||||
* - the system now has more safeguards:
|
||||
* lexical cross-signals, scoped retrieval, title/meta boost, selection rules
|
||||
*/
|
||||
public const VECTOR_SCORE_THRESHOLD = 0.82;
|
||||
public const VECTOR_SCORE_THRESHOLD = 0.83;
|
||||
|
||||
/**
|
||||
* Lower safety boundary for dynamic threshold adjustments.
|
||||
|
||||
@@ -1,97 +1,459 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
class PromptBuilderConfig{
|
||||
/**
|
||||
* Approximate character-to-token ratio for conservative prompt budgeting.
|
||||
*/
|
||||
public const CHARS_PER_TOKEN = 4;
|
||||
final class PromptBuilderConfig
|
||||
{
|
||||
public function getCharsPerToken(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getHistoryPaddingChars(): int
|
||||
{
|
||||
return 400;
|
||||
}
|
||||
|
||||
public function getOutputReserveRatio(): float
|
||||
{
|
||||
return 0.25;
|
||||
}
|
||||
|
||||
public function getOutputReserveMinTokens(): int
|
||||
{
|
||||
return 768;
|
||||
}
|
||||
|
||||
public function getOutputReserveMaxTokens(): int
|
||||
{
|
||||
return 6000;
|
||||
}
|
||||
|
||||
public function getSafetyReserveRatio(): float
|
||||
{
|
||||
return 0.05;
|
||||
}
|
||||
|
||||
public function getSafetyReserveMinTokens(): int
|
||||
{
|
||||
return 256;
|
||||
}
|
||||
|
||||
public function getSafetyReserveMaxTokens(): int
|
||||
{
|
||||
return 1024;
|
||||
}
|
||||
|
||||
public function getMinPromptBudgetTokens(): int
|
||||
{
|
||||
return 1024;
|
||||
}
|
||||
|
||||
public function getMaxShopResultsInPrompt(): int
|
||||
{
|
||||
return 24;
|
||||
}
|
||||
|
||||
public function getDetailedShopResultsMaxCount(): int
|
||||
{
|
||||
return 5;
|
||||
}
|
||||
|
||||
public function getTechnicalProductKeywordMatchThreshold(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getSystemSectionLabel(): string
|
||||
{
|
||||
return 'SYSTEM';
|
||||
}
|
||||
|
||||
public function getUserQuestionSectionLabel(): string
|
||||
{
|
||||
return 'USER QUESTION';
|
||||
}
|
||||
|
||||
public function getConversationContextSectionLabel(): string
|
||||
{
|
||||
return 'CONVERSATION CONTEXT (contextual only)';
|
||||
}
|
||||
|
||||
/**
|
||||
* Keep a small gap so history does not consume the last available prompt space.
|
||||
* @return string[]
|
||||
*/
|
||||
public const HISTORY_PADDING_CHARS = 400;
|
||||
public function getConversationContextIntroLines(): array
|
||||
{
|
||||
return [
|
||||
'The following messages are previous turns of this conversation.',
|
||||
'Use them to resolve references, follow-up questions, and user intent.',
|
||||
'They must not override retrieved factual knowledge or live shop data.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getShopSearchQuerySectionLabel(): string
|
||||
{
|
||||
return 'SHOP SEARCH QUERY';
|
||||
}
|
||||
|
||||
public function getShopSearchQuerySourceLine(): string
|
||||
{
|
||||
return 'Source: Shop Search';
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve some space for the model output.
|
||||
* @return string[]
|
||||
*/
|
||||
public const OUTPUT_RESERVE_RATIO = 0.25;
|
||||
public const OUTPUT_RESERVE_MIN_TOKENS = 768;
|
||||
public const OUTPUT_RESERVE_MAX_TOKENS = 6000;
|
||||
public function getLiveShopResultsHeaderLines(): array
|
||||
{
|
||||
return [
|
||||
'LIVE SHOP RESULTS (authoritative for current commercial details):',
|
||||
'Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.',
|
||||
'If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.',
|
||||
'Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.',
|
||||
'Do not infer undocumented technical specifications from shop data.',
|
||||
'Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.',
|
||||
'Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getLiveShopResultsOverflowNoticeTemplate(): string
|
||||
{
|
||||
return 'Only the top %d ranked shop results are shown here out of %d total results.';
|
||||
}
|
||||
|
||||
public function getOutputPrioritySectionLabel(): string
|
||||
{
|
||||
return 'OUTPUT PRIORITY';
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve a small safety buffer to avoid hitting the context limit too tightly.
|
||||
* @return string[]
|
||||
*/
|
||||
public const SAFETY_RESERVE_RATIO = 0.05;
|
||||
public const SAFETY_RESERVE_MIN_TOKENS = 256;
|
||||
public const SAFETY_RESERVE_MAX_TOKENS = 1024;
|
||||
public function getOutputPriorityRules(): array
|
||||
{
|
||||
return [
|
||||
'- Use retrieved knowledge first to determine the technically matching product or answer.',
|
||||
'- If shop results are present, use them afterwards to add current price, availability, and the actual URL.',
|
||||
'- Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getResponseFormatSectionLabel(): string
|
||||
{
|
||||
return 'RESPONSE FORMAT RULES';
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the prompt budget never collapses completely on smaller models.
|
||||
* @return string[]
|
||||
*/
|
||||
public const MIN_PROMPT_BUDGET_TOKENS = 1024;
|
||||
public function getResponseFormatBaseRules(): array
|
||||
{
|
||||
return [
|
||||
'- Keep normal spacing between all words. Never fuse words together.',
|
||||
'- Use short, clean paragraphs or short labeled sections.',
|
||||
'- Do not use persuasive or promotional wording.',
|
||||
'- Do not repeat the same fact in slightly different wording.',
|
||||
'- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.',
|
||||
'- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.',
|
||||
'- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.',
|
||||
'- Do not combine technical identity from one source with commercial fields from a different product.',
|
||||
'- Product number, price, availability, and URL must belong to the same explicitly grounded product.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Limit how many ranked shop results are passed into the final prompt.
|
||||
* The shop search may return many candidates, but the LLM should only see
|
||||
* the most relevant top subset after local reranking.
|
||||
* @return string[]
|
||||
*/
|
||||
public const MAX_SHOP_RESULTS_IN_PROMPT = 24;
|
||||
public function getResponseFormatWithShopRules(): array
|
||||
{
|
||||
return [
|
||||
'- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts.',
|
||||
'- Keep price, availability, and URL on separate lines when they are present.',
|
||||
'- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.',
|
||||
'- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.',
|
||||
'- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Technical product prompts should be answered like documentation,
|
||||
* not like sales copy.
|
||||
* @return string[]
|
||||
*/
|
||||
public const TECHNICAL_PRODUCT_KEYWORDS = [
|
||||
'technisch',
|
||||
'technical',
|
||||
'produkt',
|
||||
'product',
|
||||
'gerät',
|
||||
'device',
|
||||
'modell',
|
||||
'model',
|
||||
'messprinzip',
|
||||
'measurement principle',
|
||||
'schnittstelle',
|
||||
'interface',
|
||||
'relais',
|
||||
'relay',
|
||||
'indikator',
|
||||
'indicator',
|
||||
'spannung',
|
||||
'voltage',
|
||||
'strom',
|
||||
'current',
|
||||
'druck',
|
||||
'pressure',
|
||||
'temperatur',
|
||||
'temperature',
|
||||
'schutzart',
|
||||
'ip',
|
||||
'fehlercode',
|
||||
'error code',
|
||||
'wasserhärte',
|
||||
'hardness',
|
||||
'testomat',
|
||||
'chlor',
|
||||
'chlormessung',
|
||||
];
|
||||
public function getResponseFormatWithoutShopRules(): array
|
||||
{
|
||||
return [
|
||||
'- If no shop results are present, do not compensate by inventing external products or external manufacturers.',
|
||||
];
|
||||
}
|
||||
|
||||
public const ACCESSORY_REQUEST_KEYWORDS = [
|
||||
'passend',
|
||||
'passende',
|
||||
'passendes',
|
||||
'zubehör',
|
||||
'zubehor',
|
||||
'dazu',
|
||||
'indikator',
|
||||
'reagenz',
|
||||
'kit',
|
||||
'set',
|
||||
'zusatz',
|
||||
'ergänzung',
|
||||
'ergaenzung',
|
||||
];
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getResponseFormatTechnicalRules(): array
|
||||
{
|
||||
return [
|
||||
'- Write like technical documentation: precise, neutral, and source-close.',
|
||||
'- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.',
|
||||
'- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getResponseFormatAccessoryRules(): array
|
||||
{
|
||||
return [
|
||||
'- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.',
|
||||
'- The main device must come first. The accessory must not replace the main device.',
|
||||
'- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.',
|
||||
'- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getLanguageRulesSectionLabel(): string
|
||||
{
|
||||
return 'LANGUAGE RULES';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getLanguageRules(): array
|
||||
{
|
||||
return [
|
||||
'- Answer only in the same language as the user question.',
|
||||
'- All headings, labels, notes, and structural elements must be in the same language as the user question.',
|
||||
'- Do not switch languages unless the user does.',
|
||||
'- If headings are used, write them in the user\'s language.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getFactGroundingRulesSectionLabel(): string
|
||||
{
|
||||
return 'FACT GROUNDING RULES';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFactGroundingBaseRules(): array
|
||||
{
|
||||
return [
|
||||
'- State only facts that are explicitly present in the provided sources.',
|
||||
'- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.',
|
||||
'- Do not invent missing values.',
|
||||
'- Do not replace missing values with estimates, defaults, or typical industry assumptions.',
|
||||
'- Do not claim that information is missing if it appears in the provided sources.',
|
||||
'- Do not compare with other products unless those products are also present in the provided sources.',
|
||||
'- Prefer source-faithful wording over persuasive wording.',
|
||||
'- Avoid marketing language such as \'ideal\', \'perfect\', \'unverzichtbar\', \'entscheidend\', \'optimal\', \'kosteneffizient\', \'prozesssicher\', or \'state-of-the-art\'.',
|
||||
'- Clearly separate explicit facts from inferences.',
|
||||
'- If a conclusion goes beyond the source wording, label it exactly as \'Inference:\'.',
|
||||
'- If a sentence cannot be traced to the provided sources, do not write it.',
|
||||
'- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.',
|
||||
'- If the sources do not identify a suitable product, do not invent one.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFactGroundingWithShopRules(): array
|
||||
{
|
||||
return [
|
||||
'- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.',
|
||||
'- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.',
|
||||
'- When shop results are present and relevant, include current price and the actual URL if available.',
|
||||
'- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.',
|
||||
'- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.',
|
||||
'- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.',
|
||||
'- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.',
|
||||
'- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.',
|
||||
'- If the shop match is ambiguous, keep the technical identification and commercial details separate.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFactGroundingWithoutShopRules(): array
|
||||
{
|
||||
return [
|
||||
'- Use retrieved knowledge as authoritative for factual answers.',
|
||||
'- If no shop results are present, do not compensate with external recommendations or external product suggestions.',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getFactGroundingTechnicalRules(): array
|
||||
{
|
||||
return [
|
||||
'- For technical product questions, answer primarily with explicitly stated facts.',
|
||||
'- Behave like a technical documentation assistant, not like a sales advisor.',
|
||||
'- Keep interpretations minimal and do not generalize application areas beyond the provided sources.',
|
||||
'- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.',
|
||||
'- Do not translate technical facts into business value unless the source explicitly does so.',
|
||||
'- Do not recommend process changes unless explicitly present in the source.',
|
||||
'- Do not use persuasive summaries or advisory conclusions.',
|
||||
'- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.',
|
||||
'- Use neutral engineering language.',
|
||||
'- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.',
|
||||
'- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.',
|
||||
'- If the source lists application areas, repeat only those areas and do not broaden them.',
|
||||
'- If the source names an indicator and threshold, reproduce that exactly without extrapolation.',
|
||||
'- If the source states only a threshold function, do not expand it into broader control logic.',
|
||||
'- If a detail is not explicitly stated in the provided sources, say so plainly.',
|
||||
'- Prefer short, source-close sentences over explanatory expansion.',
|
||||
'- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.',
|
||||
];
|
||||
}
|
||||
|
||||
public function getRetrievedKnowledgeSectionLabel(): string
|
||||
{
|
||||
return 'RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation)';
|
||||
}
|
||||
|
||||
public function getRetrievedKnowledgeSourceLine(): string
|
||||
{
|
||||
return 'Source: Documents';
|
||||
}
|
||||
|
||||
public function getUrlContentSectionLabel(): string
|
||||
{
|
||||
return 'CONTENT FROM URL (authoritative if user-provided)';
|
||||
}
|
||||
|
||||
public function getUrlContentSourceLine(): string
|
||||
{
|
||||
return 'Source: URL';
|
||||
}
|
||||
|
||||
public function getShopProductNumberLabel(): string
|
||||
{
|
||||
return 'Product number';
|
||||
}
|
||||
|
||||
public function getShopManufacturerLabel(): string
|
||||
{
|
||||
return 'Manufacturer';
|
||||
}
|
||||
|
||||
public function getShopPriceLabel(): string
|
||||
{
|
||||
return 'Price';
|
||||
}
|
||||
|
||||
public function getShopAvailabilityLabel(): string
|
||||
{
|
||||
return 'Available';
|
||||
}
|
||||
|
||||
public function getShopAvailabilityYesLabel(): string
|
||||
{
|
||||
return 'yes';
|
||||
}
|
||||
|
||||
public function getShopAvailabilityNoLabel(): string
|
||||
{
|
||||
return 'no';
|
||||
}
|
||||
|
||||
public function getShopHighlightPrefix(): string
|
||||
{
|
||||
return '- ';
|
||||
}
|
||||
|
||||
public function getShopUrlLabel(): string
|
||||
{
|
||||
return 'URL';
|
||||
}
|
||||
|
||||
public function getShopProductImageLabel(): string
|
||||
{
|
||||
return 'Product image';
|
||||
}
|
||||
|
||||
public function getShopDescriptionLabel(): string
|
||||
{
|
||||
return 'Description';
|
||||
}
|
||||
|
||||
public function getShopMetaInformationLabel(): string
|
||||
{
|
||||
return 'Meta information';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getTechnicalProductKeywords(): array
|
||||
{
|
||||
return [
|
||||
'technisch',
|
||||
'technical',
|
||||
'produkt',
|
||||
'product',
|
||||
'gerät',
|
||||
'device',
|
||||
'modell',
|
||||
'model',
|
||||
'messprinzip',
|
||||
'measurement principle',
|
||||
'schnittstelle',
|
||||
'interface',
|
||||
'relais',
|
||||
'relay',
|
||||
'indikator',
|
||||
'indicator',
|
||||
'spannung',
|
||||
'voltage',
|
||||
'strom',
|
||||
'current',
|
||||
'druck',
|
||||
'pressure',
|
||||
'temperatur',
|
||||
'temperature',
|
||||
'schutzart',
|
||||
'ip',
|
||||
'fehlercode',
|
||||
'error code',
|
||||
'wasserhärte',
|
||||
'hardness',
|
||||
'testomat',
|
||||
'chlor',
|
||||
'chlormessung',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getAccessoryRequestKeywords(): array
|
||||
{
|
||||
return [
|
||||
'passend',
|
||||
'passende',
|
||||
'passendes',
|
||||
'zubehör',
|
||||
'zubehor',
|
||||
'dazu',
|
||||
'indikator',
|
||||
'reagenz',
|
||||
'kit',
|
||||
'set',
|
||||
'zusatz',
|
||||
'ergänzung',
|
||||
'ergaenzung',
|
||||
];
|
||||
}
|
||||
|
||||
public function getTechnicalProductModelPattern(): string
|
||||
{
|
||||
return '/\b[\p{L}]{2,}\s?\d{2,5}\b/u';
|
||||
}
|
||||
}
|
||||
204
src/Config/SearchRepairConfig.php
Normal file
204
src/Config/SearchRepairConfig.php
Normal file
@@ -0,0 +1,204 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
final class SearchRepairConfig
|
||||
{
|
||||
public function isEnabled(): bool
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
public function getMaxRepairQueries(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getMinPrimaryResultsWithoutRepair(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getTopProductLogLimit(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getModelCandidatePattern(): string
|
||||
{
|
||||
return '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u';
|
||||
}
|
||||
|
||||
public function getAccessoryCandidatePattern(): string
|
||||
{
|
||||
return '/\b((?:' . implode('|', $this->getAccessoryCandidateTerms()) . ')\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu';
|
||||
}
|
||||
|
||||
public function getAccessoryOrBundlePattern(): string
|
||||
{
|
||||
return '/\b(' . implode('|', $this->getAccessoryOrBundleTerms()) . ')\b/iu';
|
||||
}
|
||||
|
||||
public function getModelLikePattern(): string
|
||||
{
|
||||
return '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u';
|
||||
}
|
||||
|
||||
public function getSpecificityBoostPattern(): string
|
||||
{
|
||||
return '/\b(?:' . implode('|', $this->getSpecificityBoostTerms()) . ')\b/iu';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getGenericCandidateTokens(): array
|
||||
{
|
||||
return [
|
||||
'wasser',
|
||||
'messgerät',
|
||||
'messgeraet',
|
||||
'produkt',
|
||||
'geräte',
|
||||
'geraete',
|
||||
'gerät',
|
||||
'geraet',
|
||||
'resthärte',
|
||||
'resthaerte',
|
||||
'preis',
|
||||
'infos',
|
||||
'wissen',
|
||||
];
|
||||
}
|
||||
|
||||
public function getSanitizeTrimCharacters(): string
|
||||
{
|
||||
return " \t\n\r\0\x0B\"'`.,;:-";
|
||||
}
|
||||
|
||||
public function getContainsDigitPattern(): string
|
||||
{
|
||||
return '/\d/u';
|
||||
}
|
||||
|
||||
public function getWhitespaceCollapsePattern(): string
|
||||
{
|
||||
return '/\s+/u';
|
||||
}
|
||||
|
||||
public function getTokenizeCleanupPattern(): string
|
||||
{
|
||||
return '/[^\p{L}\p{N}\s\-]+/u';
|
||||
}
|
||||
|
||||
public function getProductKeySeparator(): string
|
||||
{
|
||||
return '|';
|
||||
}
|
||||
|
||||
public function getCandidateDigitScore(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getCandidateWordCountCap(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getSpecificityBoostScore(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getPrimaryQueryOverlapThreshold(): float
|
||||
{
|
||||
return 0.9;
|
||||
}
|
||||
|
||||
public function getPromptMatchWeight(): int
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
public function getPrimaryQueryMatchWeight(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getRepairSignalMatchWeight(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
public function getPrimaryResultOrderBonus(): int
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public function getTokenIntersectionScore(): int
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public function getNumericTokenMatchScore(): int
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getAccessoryCandidateTerms(): array
|
||||
{
|
||||
return [
|
||||
'indikator',
|
||||
'indicator',
|
||||
'reagenz',
|
||||
'reagent',
|
||||
'kit',
|
||||
'set',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getAccessoryOrBundleTerms(): array
|
||||
{
|
||||
return [
|
||||
'passend',
|
||||
'passende',
|
||||
'zubehor',
|
||||
'zubehör',
|
||||
'dazu',
|
||||
'zusatz',
|
||||
'erganzung',
|
||||
'ergänzung',
|
||||
'indikator',
|
||||
'reagenz',
|
||||
'kit',
|
||||
'set',
|
||||
'auch\s+das',
|
||||
'mit\s+preis\s+und\s+allen\s+infos',
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSpecificityBoostTerms(): array
|
||||
{
|
||||
return [
|
||||
'indikator',
|
||||
'indicator',
|
||||
'testomat',
|
||||
'tritromat',
|
||||
'titromat',
|
||||
'reagenz',
|
||||
'reagent',
|
||||
];
|
||||
}
|
||||
}
|
||||
40
src/Config/StopWordsConfig.php
Normal file
40
src/Config/StopWordsConfig.php
Normal file
@@ -0,0 +1,40 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
final class StopWordsConfig
|
||||
{
|
||||
/**
|
||||
* Retrieval-optimized stop-word list.
|
||||
*
|
||||
* Important:
|
||||
* - keep negations
|
||||
* - keep question words
|
||||
* - keep domain terms
|
||||
* - remove only structural filler words
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
public function getStopWords(): array
|
||||
{
|
||||
return [
|
||||
'mit',
|
||||
'der', 'die', 'das',
|
||||
'ein', 'eine', 'einer', 'eines',
|
||||
'den', 'dem', 'des',
|
||||
'und', 'oder', 'aber', 'sowie',
|
||||
'ich', 'du', 'er', 'sie', 'es',
|
||||
'wir', 'ihr',
|
||||
'halt', 'eben', 'auch', 'schon',
|
||||
'noch', 'mal', 'bitte', 'danke',
|
||||
'also', 'nun', 'tja',
|
||||
'dann', 'danach', 'davor',
|
||||
'hier', 'dort',
|
||||
'heute', 'gestern', 'morgen',
|
||||
'könnte', 'kannst', 'kann',
|
||||
'würde', 'würdest', 'würden',
|
||||
];
|
||||
}
|
||||
}
|
||||
@@ -22,157 +22,75 @@ final class CommerceIntentLite
|
||||
*/
|
||||
public function detect(string $originalPrompt): array
|
||||
{
|
||||
$p = mb_strtolower(trim($originalPrompt));
|
||||
$prompt = mb_strtolower(trim($originalPrompt));
|
||||
|
||||
if ($p === '') {
|
||||
return [
|
||||
'intent' => self::NONE,
|
||||
'score' => 0,
|
||||
'signals' => [],
|
||||
];
|
||||
if ($prompt === '') {
|
||||
return $this->buildDetectionResult(
|
||||
intent: self::NONE,
|
||||
score: 0,
|
||||
signals: []
|
||||
);
|
||||
}
|
||||
|
||||
// Block support / diagnostic questions from entering the commerce flow
|
||||
// unless the prompt also contains very explicit purchase / shop intent.
|
||||
if ($this->isSupportOrDiagnosticQuery($p) && !$this->hasExplicitCommerceIntent($p)) {
|
||||
return [
|
||||
'intent' => self::NONE,
|
||||
'score' => 0,
|
||||
'signals' => ['support_or_diagnostic'],
|
||||
];
|
||||
if ($this->isSupportOrDiagnosticQuery($prompt) && !$this->hasExplicitCommerceIntent($prompt)) {
|
||||
return $this->buildDetectionResult(
|
||||
intent: self::NONE,
|
||||
score: 0,
|
||||
signals: [$this->config->getSupportOrDiagnosticSignalLabel()]
|
||||
);
|
||||
}
|
||||
|
||||
$score = 0;
|
||||
$signals = [];
|
||||
|
||||
$strongSignals = $this->config->getStrongSignalsList();
|
||||
|
||||
foreach ($strongSignals as $signal) {
|
||||
if (str_contains($p, mb_strtolower($signal))) {
|
||||
$score += 3;
|
||||
$signals[] = $signal;
|
||||
}
|
||||
}
|
||||
|
||||
// Treat long numeric identifiers as stronger product-number-like signals.
|
||||
// This avoids over-triggering commerce purely because a model name contains
|
||||
// a short number such as "808" in support questions.
|
||||
if (preg_match('/\b\d{4,10}\b/u', $p) === 1) {
|
||||
$score += 2;
|
||||
$signals[] = 'sku';
|
||||
}
|
||||
|
||||
$pricePattern = $this->config->getPricePattern();
|
||||
if (preg_match('/\b\d+(?:[.,]\d+)?\s*(' . $pricePattern . ')\b/u', $p) === 1) {
|
||||
$score += 2;
|
||||
$signals[] = 'price';
|
||||
}
|
||||
|
||||
$sizePattern = $this->config->getSizePattern();
|
||||
if (preg_match('/\b(' . $sizePattern . ')\s*[a-z0-9.-]+\b/u', $p) === 1) {
|
||||
$score += 2;
|
||||
$signals[] = 'size';
|
||||
}
|
||||
|
||||
$sizeTokenPattern = $this->config->getSizeTokenPattern();
|
||||
if (preg_match('/\b(' . $sizeTokenPattern . ')\b/u', $p) === 1) {
|
||||
$score += 1;
|
||||
$signals[] = 'size_token';
|
||||
}
|
||||
|
||||
$colorPattern = $this->config->getColorPattern();
|
||||
if (preg_match('/\b(' . $colorPattern . ')\b/u', $p) === 1) {
|
||||
$score += 1;
|
||||
$signals[] = 'color';
|
||||
}
|
||||
|
||||
$advisorySignals = $this->config->getAdvisorySignals();
|
||||
|
||||
foreach ($advisorySignals as $signal) {
|
||||
if (str_contains($p, mb_strtolower($signal))) {
|
||||
$score += 1;
|
||||
$signals[] = 'advisory:' . $signal;
|
||||
}
|
||||
}
|
||||
[$score, $signals] = $this->applyStrongSignals($prompt, $score, $signals);
|
||||
[$score, $signals] = $this->applySkuSignal($prompt, $score, $signals);
|
||||
[$score, $signals] = $this->applyPriceSignal($prompt, $score, $signals);
|
||||
[$score, $signals] = $this->applySizeSignal($prompt, $score, $signals);
|
||||
[$score, $signals] = $this->applySizeTokenSignal($prompt, $score, $signals);
|
||||
[$score, $signals] = $this->applyColorSignal($prompt, $score, $signals);
|
||||
[$score, $signals] = $this->applyAdvisorySignals($prompt, $score, $signals);
|
||||
|
||||
$signals = array_values(array_unique($signals));
|
||||
|
||||
if ($score >= 3) {
|
||||
return [
|
||||
'intent' => self::PRODUCT_SEARCH,
|
||||
'score' => $score,
|
||||
'signals' => $signals,
|
||||
];
|
||||
if ($score >= $this->config->getProductSearchMinScore()) {
|
||||
return $this->buildDetectionResult(
|
||||
intent: self::PRODUCT_SEARCH,
|
||||
score: $score,
|
||||
signals: $signals
|
||||
);
|
||||
}
|
||||
|
||||
if ($score >= 2) {
|
||||
return [
|
||||
'intent' => self::ADVISORY_PRODUCT_SEARCH,
|
||||
'score' => $score,
|
||||
'signals' => $signals,
|
||||
];
|
||||
if ($score >= $this->config->getAdvisoryProductSearchMinScore()) {
|
||||
return $this->buildDetectionResult(
|
||||
intent: self::ADVISORY_PRODUCT_SEARCH,
|
||||
score: $score,
|
||||
signals: $signals
|
||||
);
|
||||
}
|
||||
|
||||
return [
|
||||
'intent' => self::NONE,
|
||||
'score' => $score,
|
||||
'signals' => $signals,
|
||||
];
|
||||
return $this->buildDetectionResult(
|
||||
intent: self::NONE,
|
||||
score: $score,
|
||||
signals: $signals
|
||||
);
|
||||
}
|
||||
|
||||
private function isSupportOrDiagnosticQuery(string $prompt): bool
|
||||
{
|
||||
$patterns = [
|
||||
'/\bfehler\b/u',
|
||||
'/\bfehlercode\b/u',
|
||||
'/\berror\b/u',
|
||||
'/\bstörung\b/u',
|
||||
'/\bstoerung\b/u',
|
||||
'/\balarm\b/u',
|
||||
'/\bstörungsmeldung\b/u',
|
||||
'/\bstoerungsmeldung\b/u',
|
||||
'/\bmeldung\b/u',
|
||||
'/\bwarnung\b/u',
|
||||
'/\bwarncode\b/u',
|
||||
'/\bcode\b/u',
|
||||
'/\bwas bedeutet\b/u',
|
||||
'/\bwarum\b/u',
|
||||
'/\bblinkt\b/u',
|
||||
'/\bzeigt\b/u',
|
||||
'/\bzeigt an\b/u',
|
||||
'/\bursache\b/u',
|
||||
'/\bdiagnose\b/u',
|
||||
'/\bservicefall\b/u',
|
||||
'/\bproblem\b/u',
|
||||
'/\bstörung beheben\b/u',
|
||||
'/\bstoerung beheben\b/u',
|
||||
'/\be\d{1,3}\b/u',
|
||||
];
|
||||
|
||||
foreach ($patterns as $pattern) {
|
||||
if (preg_match($pattern, $prompt) === 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return $this->matchesAnyPattern($prompt, $this->config->getSupportDiagnosticPatterns());
|
||||
}
|
||||
|
||||
private function hasExplicitCommerceIntent(string $prompt): bool
|
||||
{
|
||||
$patterns = [
|
||||
'/\bshop\b/u',
|
||||
'/\bpreis\b/u',
|
||||
'/\bkosten\b/u',
|
||||
'/\bkostet\b/u',
|
||||
'/\bkaufen\b/u',
|
||||
'/\bbestellen\b/u',
|
||||
'/\bprodukt\b/u',
|
||||
'/\bartikel\b/u',
|
||||
'/\bsku\b/u',
|
||||
'/\bonline\b/u',
|
||||
];
|
||||
return $this->matchesAnyPattern($prompt, $this->config->getExplicitCommerceIntentPatterns());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $patterns
|
||||
*/
|
||||
private function matchesAnyPattern(string $prompt, array $patterns): bool
|
||||
{
|
||||
foreach ($patterns as $pattern) {
|
||||
if (preg_match($pattern, $prompt) === 1) {
|
||||
return true;
|
||||
@@ -181,4 +99,119 @@ final class CommerceIntentLite
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $signals
|
||||
* @return array{0:int,1:string[]}
|
||||
*/
|
||||
private function applyStrongSignals(string $prompt, int $score, array $signals): array
|
||||
{
|
||||
foreach ($this->config->getStrongSignalsList() as $signal) {
|
||||
if (str_contains($prompt, mb_strtolower($signal))) {
|
||||
$score += $this->config->getStrongSignalScore();
|
||||
$signals[] = $signal;
|
||||
}
|
||||
}
|
||||
|
||||
return [$score, $signals];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $signals
|
||||
* @return array{0:int,1:string[]}
|
||||
*/
|
||||
private function applySkuSignal(string $prompt, int $score, array $signals): array
|
||||
{
|
||||
if (preg_match($this->config->getSkuLikePattern(), $prompt) === 1) {
|
||||
$score += $this->config->getSkuSignalScore();
|
||||
$signals[] = $this->config->getSkuSignalLabel();
|
||||
}
|
||||
|
||||
return [$score, $signals];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $signals
|
||||
* @return array{0:int,1:string[]}
|
||||
*/
|
||||
private function applyPriceSignal(string $prompt, int $score, array $signals): array
|
||||
{
|
||||
if (preg_match($this->config->getPriceValuePattern(), $prompt) === 1) {
|
||||
$score += $this->config->getPriceSignalScore();
|
||||
$signals[] = $this->config->getPriceSignalLabel();
|
||||
}
|
||||
|
||||
return [$score, $signals];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $signals
|
||||
* @return array{0:int,1:string[]}
|
||||
*/
|
||||
private function applySizeSignal(string $prompt, int $score, array $signals): array
|
||||
{
|
||||
if (preg_match($this->config->getSizeValuePattern(), $prompt) === 1) {
|
||||
$score += $this->config->getSizeSignalScore();
|
||||
$signals[] = $this->config->getSizeSignalLabel();
|
||||
}
|
||||
|
||||
return [$score, $signals];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $signals
|
||||
* @return array{0:int,1:string[]}
|
||||
*/
|
||||
private function applySizeTokenSignal(string $prompt, int $score, array $signals): array
|
||||
{
|
||||
if (preg_match($this->config->getSizeTokenValuePattern(), $prompt) === 1) {
|
||||
$score += $this->config->getSizeTokenSignalScore();
|
||||
$signals[] = $this->config->getSizeTokenSignalLabel();
|
||||
}
|
||||
|
||||
return [$score, $signals];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $signals
|
||||
* @return array{0:int,1:string[]}
|
||||
*/
|
||||
private function applyColorSignal(string $prompt, int $score, array $signals): array
|
||||
{
|
||||
if (preg_match($this->config->getColorValuePattern(), $prompt) === 1) {
|
||||
$score += $this->config->getColorSignalScore();
|
||||
$signals[] = $this->config->getColorSignalLabel();
|
||||
}
|
||||
|
||||
return [$score, $signals];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $signals
|
||||
* @return array{0:int,1:string[]}
|
||||
*/
|
||||
private function applyAdvisorySignals(string $prompt, int $score, array $signals): array
|
||||
{
|
||||
foreach ($this->config->getAdvisorySignals() as $signal) {
|
||||
if (str_contains($prompt, mb_strtolower($signal))) {
|
||||
$score += $this->config->getAdvisorySignalScore();
|
||||
$signals[] = $this->config->getAdvisorySignalPrefix() . $signal;
|
||||
}
|
||||
}
|
||||
|
||||
return [$score, $signals];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $signals
|
||||
* @return array{intent:string, score:int, signals:string[]}
|
||||
*/
|
||||
private function buildDetectionResult(string $intent, int $score, array $signals): array
|
||||
{
|
||||
return [
|
||||
'intent' => $intent,
|
||||
'score' => $score,
|
||||
'signals' => $signals,
|
||||
];
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,7 @@ final readonly class NdjsonKeywordRetriever
|
||||
public function __construct(
|
||||
private string $projectDir,
|
||||
private LoggerInterface $agentLogger,
|
||||
private StopWords $stopWords,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -170,7 +171,7 @@ final readonly class NdjsonKeywordRetriever
|
||||
return true;
|
||||
}
|
||||
|
||||
return StopWords::isStopWord($token);
|
||||
return $this->stopWords->isStopWord($token);
|
||||
}
|
||||
|
||||
private function normalizeText(string $value): string
|
||||
@@ -348,7 +349,7 @@ final readonly class NdjsonKeywordRetriever
|
||||
* token:string,
|
||||
* chunk_id:string,
|
||||
* document_id:string,
|
||||
* chunk_index:?int,
|
||||
* chunk_index $rows :?int,
|
||||
* tf:int,
|
||||
* title_tf:int,
|
||||
* df:int
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
<?php
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Knowledge\Retrieval;
|
||||
@@ -21,10 +20,10 @@ final readonly class NdjsonLexicalIndexBuilder
|
||||
private const MAX_UNIQUE_TOKENS_PER_CHUNK = 256;
|
||||
|
||||
public function __construct(
|
||||
private string $projectDir,
|
||||
private string $projectDir,
|
||||
private LoggerInterface $agentLogger,
|
||||
)
|
||||
{
|
||||
private StopWords $stopWords,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -345,7 +344,7 @@ final readonly class NdjsonLexicalIndexBuilder
|
||||
return true;
|
||||
}
|
||||
|
||||
return StopWords::isStopWord($token);
|
||||
return $this->stopWords->isStopWord($token);
|
||||
}
|
||||
|
||||
private function normalizeText(string $value): string
|
||||
|
||||
@@ -6,8 +6,13 @@ namespace App\Knowledge\Retrieval;
|
||||
|
||||
use App\Knowledge\StopWords;
|
||||
|
||||
final class QueryCleaner
|
||||
final readonly class QueryCleaner
|
||||
{
|
||||
public function __construct(
|
||||
private StopWords $stopWords
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans a query strictly for retrieval purposes.
|
||||
*
|
||||
@@ -66,7 +71,7 @@ final class QueryCleaner
|
||||
}
|
||||
|
||||
// Remove stop words
|
||||
if (StopWords::isStopWord($token)) {
|
||||
if ($this->stopWords->isStopWord($token)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -4,62 +4,25 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Knowledge;
|
||||
|
||||
final class StopWords
|
||||
use App\Config\StopWordsConfig;
|
||||
|
||||
final readonly class StopWords
|
||||
{
|
||||
/**
|
||||
* Retrieval-optimierte Stopwortliste (Deutsch).
|
||||
*
|
||||
* WICHTIG:
|
||||
* - Keine Negationen entfernen
|
||||
* - Keine Fragewörter entfernen
|
||||
* - Keine fachlichen Begriffe entfernen
|
||||
* - Nur echte Füll- und Strukturwörter
|
||||
*/
|
||||
private const STOP_WORDS = [
|
||||
|
||||
'mit',
|
||||
// Artikel
|
||||
'der', 'die', 'das',
|
||||
'ein', 'eine', 'einer', 'eines',
|
||||
'den', 'dem', 'des',
|
||||
|
||||
// Konjunktionen
|
||||
'und', 'oder', 'aber', 'sowie',
|
||||
|
||||
// Schwache Pronomen
|
||||
'ich', 'du', 'er', 'sie', 'es',
|
||||
'wir', 'ihr',
|
||||
|
||||
// Füllwörter
|
||||
'halt', 'eben', 'auch', 'schon',
|
||||
'noch', 'mal', 'bitte', 'danke',
|
||||
|
||||
// Strukturwörter
|
||||
'also', 'nun', 'tja',
|
||||
'dann', 'danach', 'davor',
|
||||
'hier', 'dort',
|
||||
|
||||
// Zeit-Füller (kontextarm)
|
||||
'heute', 'gestern', 'morgen',
|
||||
|
||||
// Höflichkeits-/Modalformen
|
||||
'könnte', 'kannst', 'kann',
|
||||
'würde', 'würdest', 'würden',
|
||||
];
|
||||
|
||||
/**
|
||||
* Gibt die vollständige Stopwortliste zurück.
|
||||
*/
|
||||
public static function getStopWords(): array
|
||||
{
|
||||
return self::STOP_WORDS;
|
||||
public function __construct(
|
||||
private StopWordsConfig $config
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Prüft, ob ein Wort ein Stopwort ist.
|
||||
* @return string[]
|
||||
*/
|
||||
public static function isStopWord(string $word): bool
|
||||
public function getStopWords(): array
|
||||
{
|
||||
return in_array($word, self::STOP_WORDS, true);
|
||||
return $this->config->getStopWords();
|
||||
}
|
||||
|
||||
public function isStopWord(string $word): bool
|
||||
{
|
||||
return in_array($word, $this->config->getStopWords(), true);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user