move intent an config value into config files
This commit is contained in:
@@ -120,12 +120,7 @@ services:
|
|||||||
|
|
||||||
App\Commerce\CommerceQueryParser: ~
|
App\Commerce\CommerceQueryParser: ~
|
||||||
|
|
||||||
App\Commerce\SearchRepairService:
|
App\Commerce\SearchRepairService: ~
|
||||||
arguments:
|
|
||||||
$logger: '@monolog.logger.agent'
|
|
||||||
$enabled: '%mto.commerce.search_repair.enabled%'
|
|
||||||
$maxRepairQueries: '%mto.commerce.search_repair.max_queries%'
|
|
||||||
$minPrimaryResultsWithoutRepair: '%mto.commerce.search_repair.min_primary_results_without_repair%'
|
|
||||||
|
|
||||||
App\Shopware\ShopwareCriteriaBuilder: ~
|
App\Shopware\ShopwareCriteriaBuilder: ~
|
||||||
|
|
||||||
|
|||||||
@@ -4,9 +4,6 @@ declare(strict_types=1);
|
|||||||
|
|
||||||
namespace App\Agent;
|
namespace App\Agent;
|
||||||
|
|
||||||
use App\Commerce\CommerceReferenceResolver;
|
|
||||||
use App\Commerce\CommerceReferenceStore;
|
|
||||||
use App\Commerce\Dto\CommerceReferenceContext;
|
|
||||||
use App\Commerce\SearchRepairService;
|
use App\Commerce\SearchRepairService;
|
||||||
use App\Commerce\ShopSearchService;
|
use App\Commerce\ShopSearchService;
|
||||||
use App\Config\AgentRunnerConfig;
|
use App\Config\AgentRunnerConfig;
|
||||||
@@ -21,8 +18,6 @@ use Throwable;
|
|||||||
|
|
||||||
final readonly class AgentRunner
|
final readonly class AgentRunner
|
||||||
{
|
{
|
||||||
private const COMMERCE_HISTORY_BUDGET_CHARS = 1000;
|
|
||||||
|
|
||||||
private bool $systemMsgOn;
|
private bool $systemMsgOn;
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
@@ -33,8 +28,6 @@ final readonly class AgentRunner
|
|||||||
private RetrieverInterface $retriever,
|
private RetrieverInterface $retriever,
|
||||||
private ShopSearchService $shopSearchService,
|
private ShopSearchService $shopSearchService,
|
||||||
private SearchRepairService $searchRepairService,
|
private SearchRepairService $searchRepairService,
|
||||||
private CommerceReferenceStore $commerceReferenceStore,
|
|
||||||
private CommerceReferenceResolver $commerceReferenceResolver,
|
|
||||||
private CommerceIntentLite $commerceIntentLite,
|
private CommerceIntentLite $commerceIntentLite,
|
||||||
private OllamaClient $ollamaClient,
|
private OllamaClient $ollamaClient,
|
||||||
private LoggerInterface $agentLogger,
|
private LoggerInterface $agentLogger,
|
||||||
@@ -51,14 +44,13 @@ final readonly class AgentRunner
|
|||||||
$prompt = trim($prompt);
|
$prompt = trim($prompt);
|
||||||
|
|
||||||
if ($prompt === '') {
|
if ($prompt === '') {
|
||||||
yield $this->systemMsg('❌ Empty prompt.', 'err');
|
yield $this->systemMsg($this->agentRunnerConfig->getEmptyPromptMessage(), 'err');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
$shopResults = [];
|
$shopResults = [];
|
||||||
$primaryShopResults = [];
|
$primaryShopResults = [];
|
||||||
$factSources = [];
|
$sources = [];
|
||||||
$contextSignals = [];
|
|
||||||
$optimizedShopQuery = '';
|
$optimizedShopQuery = '';
|
||||||
$shopSearchQuery = '';
|
$shopSearchQuery = '';
|
||||||
$commerceIntent = CommerceIntentLite::NONE;
|
$commerceIntent = CommerceIntentLite::NONE;
|
||||||
@@ -66,8 +58,6 @@ final readonly class AgentRunner
|
|||||||
$attemptedShopRepair = false;
|
$attemptedShopRepair = false;
|
||||||
$usedShopRepair = false;
|
$usedShopRepair = false;
|
||||||
$shopRepairQueries = [];
|
$shopRepairQueries = [];
|
||||||
$activeCommerceReference = null;
|
|
||||||
$shopChecked = false;
|
|
||||||
|
|
||||||
$this->agentLogger->info('Agent run started', [
|
$this->agentLogger->info('Agent run started', [
|
||||||
'userId' => $userId,
|
'userId' => $userId,
|
||||||
@@ -79,74 +69,39 @@ final readonly class AgentRunner
|
|||||||
// Additional context strategies can be added here later.
|
// Additional context strategies can be added here later.
|
||||||
}
|
}
|
||||||
|
|
||||||
yield $this->systemMsg('Ich analysiere deine Anfrage...', 'think');
|
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeRequestMessage(), 'think');
|
||||||
yield $this->systemMsg('Ich prüfe auf Internetquellen...', 'think');
|
yield $this->systemMsg($this->agentRunnerConfig->getCheckInternetSourcesMessage(), 'think');
|
||||||
|
|
||||||
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
|
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
|
||||||
if ($urlContent !== '') {
|
if ($urlContent !== '') {
|
||||||
$this->addBadge($factSources, 'Externe URL');
|
$this->addSource($sources, $this->agentRunnerConfig->getExternalUrlSourceLabel());
|
||||||
}
|
}
|
||||||
|
|
||||||
yield $this->systemMsg('Ich hole relevante Daten aus meinem RAG-Wissen...', 'think');
|
yield $this->systemMsg($this->agentRunnerConfig->getRetrieveKnowledgeMessage(), 'think');
|
||||||
|
|
||||||
$knowledgeChunks = $this->retriever->retrieve($prompt);
|
$knowledgeChunks = $this->retriever->retrieve($prompt);
|
||||||
if ($knowledgeChunks !== []) {
|
if ($knowledgeChunks !== []) {
|
||||||
$this->addBadge($factSources, 'RAG Wissen');
|
$this->addSource($sources, $this->agentRunnerConfig->getRagKnowledgeSourceLabel());
|
||||||
}
|
}
|
||||||
|
|
||||||
$commerceIntent = $this->detectCommerceIntent($prompt);
|
$commerceIntent = $this->detectCommerceIntent($prompt);
|
||||||
|
|
||||||
if ($this->isCommerceIntent($commerceIntent)) {
|
if ($this->isCommerceIntent($commerceIntent)) {
|
||||||
yield $this->systemMsg('Ich optimiere die Recherche...', 'think');
|
yield $this->systemMsg($this->agentRunnerConfig->getOptimizeSearchMessage(), 'think');
|
||||||
|
|
||||||
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId);
|
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId);
|
||||||
$activeCommerceReference = $this->loadCommerceReference($userId);
|
|
||||||
|
|
||||||
if ($commerceHistoryContext !== '') {
|
if ($commerceHistoryContext !== '') {
|
||||||
$this->addBadge($contextSignals, 'Gesprächskontext');
|
$this->addSource($sources, $this->agentRunnerConfig->getConversationHistorySourceLabel());
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($activeCommerceReference !== null) {
|
$optimizedShopQuery = $this->buildOptimizedShopQuery(
|
||||||
$this->addBadge($contextSignals, 'Commerce-Referenz');
|
|
||||||
}
|
|
||||||
|
|
||||||
$isReferenceOnlyFollowUp = $this->isReferenceOnlyCommerceFollowUp(
|
|
||||||
$prompt,
|
$prompt,
|
||||||
$activeCommerceReference
|
$userId,
|
||||||
|
$commerceHistoryContext
|
||||||
);
|
);
|
||||||
|
|
||||||
if ($isReferenceOnlyFollowUp) {
|
$shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt;
|
||||||
$shopSearchQuery = $this->buildDeterministicReferenceShopQuery($activeCommerceReference);
|
|
||||||
|
|
||||||
if ($shopSearchQuery !== '') {
|
|
||||||
$this->addBadge($contextSignals, 'Deterministische Referenzsuche');
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->agentLogger->info('Using deterministic reference shop query', [
|
|
||||||
'userId' => $userId,
|
|
||||||
'commerceIntent' => $commerceIntent,
|
|
||||||
'prompt' => $prompt,
|
|
||||||
'shopSearchQuery' => $shopSearchQuery,
|
|
||||||
'referenceProductName' => $activeCommerceReference?->productName,
|
|
||||||
'referenceFocusTerms' => $activeCommerceReference?->focusTerms,
|
|
||||||
]);
|
|
||||||
} else {
|
|
||||||
$optimizedShopQuery = $this->buildOptimizedShopQuery(
|
|
||||||
$prompt,
|
|
||||||
$userId,
|
|
||||||
$commerceHistoryContext
|
|
||||||
);
|
|
||||||
|
|
||||||
if ($optimizedShopQuery !== '' && $optimizedShopQuery !== $prompt) {
|
|
||||||
$this->addBadge($contextSignals, 'Query-Optimierung');
|
|
||||||
}
|
|
||||||
|
|
||||||
$shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($shopSearchQuery === '') {
|
|
||||||
$shopSearchQuery = $prompt;
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->agentLogger->info('Commerce search prepared', [
|
$this->agentLogger->info('Commerce search prepared', [
|
||||||
'userId' => $userId,
|
'userId' => $userId,
|
||||||
@@ -154,26 +109,20 @@ final readonly class AgentRunner
|
|||||||
'usedOptimizedShopQuery' => $optimizedShopQuery !== '',
|
'usedOptimizedShopQuery' => $optimizedShopQuery !== '',
|
||||||
'optimizedShopQuery' => $optimizedShopQuery,
|
'optimizedShopQuery' => $optimizedShopQuery,
|
||||||
'shopSearchQuery' => $shopSearchQuery,
|
'shopSearchQuery' => $shopSearchQuery,
|
||||||
'usedDeterministicReferenceQuery' => $isReferenceOnlyFollowUp,
|
|
||||||
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
|
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
|
||||||
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
||||||
'hasActiveCommerceReference' => $activeCommerceReference !== null,
|
|
||||||
'activeCommerceReferenceProduct' => $activeCommerceReference?->productName,
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
yield $this->systemMsg(
|
yield $this->systemMsg(
|
||||||
'Ich rufe Recherchedaten ab (type: ' . $commerceIntent . ')',
|
sprintf($this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), $commerceIntent),
|
||||||
'think'
|
'think'
|
||||||
);
|
);
|
||||||
|
|
||||||
$shopChecked = true;
|
|
||||||
|
|
||||||
$primaryShopResults = $this->searchShop(
|
$primaryShopResults = $this->searchShop(
|
||||||
$shopSearchQuery,
|
$shopSearchQuery,
|
||||||
$commerceIntent,
|
$commerceIntent,
|
||||||
$userId,
|
$userId,
|
||||||
$commerceHistoryContext,
|
$commerceHistoryContext
|
||||||
$activeCommerceReference
|
|
||||||
);
|
);
|
||||||
|
|
||||||
$repairPayload = $this->repairShopResults(
|
$repairPayload = $this->repairShopResults(
|
||||||
@@ -192,13 +141,11 @@ final readonly class AgentRunner
|
|||||||
$shopRepairQueries = $repairPayload['repairQueries'];
|
$shopRepairQueries = $repairPayload['repairQueries'];
|
||||||
|
|
||||||
if ($shopResults !== []) {
|
if ($shopResults !== []) {
|
||||||
$this->addBadge($factSources, 'Shopsystem');
|
$this->addSource($sources, $this->agentRunnerConfig->getShopSystemSourceLabel());
|
||||||
} elseif ($shopChecked) {
|
|
||||||
$this->addBadge($factSources, 'Shopsystem geprüft');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($attemptedShopRepair) {
|
if ($attemptedShopRepair) {
|
||||||
$this->addBadge($contextSignals, 'Erweiterte Shopsuche');
|
$this->addSource($sources, $this->agentRunnerConfig->getExtendedShopSearchSourceLabel());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -206,7 +153,7 @@ final readonly class AgentRunner
|
|||||||
$knowledgeChunks = $this->limitKnowledgeChunks($knowledgeChunks, $commerceIntent);
|
$knowledgeChunks = $this->limitKnowledgeChunks($knowledgeChunks, $commerceIntent);
|
||||||
}
|
}
|
||||||
|
|
||||||
yield $this->systemMsg('Ich analysiere alle Informationen...', 'think');
|
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeAllInformationMessage(), 'think');
|
||||||
|
|
||||||
$finalPrompt = $this->promptBuilder->build(
|
$finalPrompt = $this->promptBuilder->build(
|
||||||
prompt: $prompt,
|
prompt: $prompt,
|
||||||
@@ -226,7 +173,6 @@ final readonly class AgentRunner
|
|||||||
'shopSearchQuery' => $shopSearchQuery,
|
'shopSearchQuery' => $shopSearchQuery,
|
||||||
'primaryShopResultsCount' => count($primaryShopResults),
|
'primaryShopResultsCount' => count($primaryShopResults),
|
||||||
'shopResultsCount' => count($shopResults),
|
'shopResultsCount' => count($shopResults),
|
||||||
'shopChecked' => $shopChecked,
|
|
||||||
'attemptedShopRepair' => $attemptedShopRepair,
|
'attemptedShopRepair' => $attemptedShopRepair,
|
||||||
'usedShopRepair' => $usedShopRepair,
|
'usedShopRepair' => $usedShopRepair,
|
||||||
'shopRepairQueries' => $shopRepairQueries,
|
'shopRepairQueries' => $shopRepairQueries,
|
||||||
@@ -243,21 +189,19 @@ final readonly class AgentRunner
|
|||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($factSources !== [] || $contextSignals !== []) {
|
if ($sources !== []) {
|
||||||
yield $this->emitSourceSummary(
|
yield $this->emitSources(
|
||||||
$factSources,
|
$sources,
|
||||||
$contextSignals,
|
$this->agentRunnerConfig->getUsedSourcesPrefix()
|
||||||
'Genutzte Datenpfade'
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
$fullOutput = yield from $this->streamFinalAnswer($finalPrompt);
|
$fullOutput = yield from $this->streamFinalAnswer($finalPrompt);
|
||||||
|
|
||||||
if ($factSources !== [] || $contextSignals !== []) {
|
if ($sources !== []) {
|
||||||
yield $this->emitSourceSummary(
|
yield $this->emitSources(
|
||||||
$factSources,
|
$sources,
|
||||||
$contextSignals,
|
$this->agentRunnerConfig->getSourcesPrefix()
|
||||||
'Quellen und Signale'
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -266,11 +210,10 @@ final readonly class AgentRunner
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ($fullOutput !== '') {
|
if ($fullOutput !== '') {
|
||||||
$this->persistConversationState(
|
$this->contextService->appendHistory(
|
||||||
userId: $userId,
|
$userId,
|
||||||
prompt: $prompt,
|
$prompt,
|
||||||
fullOutput: $fullOutput,
|
$fullOutput
|
||||||
shopResults: $shopResults
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -281,7 +224,6 @@ final readonly class AgentRunner
|
|||||||
'commerceIntent' => $commerceIntent,
|
'commerceIntent' => $commerceIntent,
|
||||||
'primaryShopResultsCount' => count($primaryShopResults),
|
'primaryShopResultsCount' => count($primaryShopResults),
|
||||||
'shopResultsCount' => count($shopResults),
|
'shopResultsCount' => count($shopResults),
|
||||||
'shopChecked' => $shopChecked,
|
|
||||||
'attemptedShopRepair' => $attemptedShopRepair,
|
'attemptedShopRepair' => $attemptedShopRepair,
|
||||||
'usedShopRepair' => $usedShopRepair,
|
'usedShopRepair' => $usedShopRepair,
|
||||||
'shopRepairQueries' => $shopRepairQueries,
|
'shopRepairQueries' => $shopRepairQueries,
|
||||||
@@ -292,8 +234,6 @@ final readonly class AgentRunner
|
|||||||
'shopSearchQuery' => $shopSearchQuery,
|
'shopSearchQuery' => $shopSearchQuery,
|
||||||
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
|
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
|
||||||
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
||||||
'hasActiveCommerceReference' => $activeCommerceReference !== null,
|
|
||||||
'activeCommerceReferenceProduct' => $activeCommerceReference?->productName,
|
|
||||||
]);
|
]);
|
||||||
} catch (Throwable $e) {
|
} catch (Throwable $e) {
|
||||||
$this->agentLogger->error('Agent run failed', [
|
$this->agentLogger->error('Agent run failed', [
|
||||||
@@ -361,42 +301,6 @@ final readonly class AgentRunner
|
|||||||
return $this->sanitizeOptimizedShopQuery($optimizedQuery);
|
return $this->sanitizeOptimizedShopQuery($optimizedQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function isReferenceOnlyCommerceFollowUp(
|
|
||||||
string $prompt,
|
|
||||||
?CommerceReferenceContext $referenceContext
|
|
||||||
): bool {
|
|
||||||
if ($referenceContext === null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
$normalizedPrompt = mb_strtolower(trim($prompt), 'UTF-8');
|
|
||||||
$normalizedPrompt = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt;
|
|
||||||
$normalizedPrompt = preg_replace('/\s+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt;
|
|
||||||
$normalizedPrompt = trim($normalizedPrompt);
|
|
||||||
|
|
||||||
if ($normalizedPrompt === '') {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (preg_match('/\b(testomat|lab|evo|eco|calc|thcl|808|2000)\b/u', $normalizedPrompt) === 1) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return preg_match(
|
|
||||||
'/\b(preis|preise|kosten|kostet|dazu|dafuer|dafür|davon|was kostet das|verfuegbarkeit|verfügbarkeit|shop|link)\b/u',
|
|
||||||
$normalizedPrompt
|
|
||||||
) === 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function buildDeterministicReferenceShopQuery(?CommerceReferenceContext $referenceContext): string
|
|
||||||
{
|
|
||||||
if ($referenceContext === null) {
|
|
||||||
return '';
|
|
||||||
}
|
|
||||||
|
|
||||||
return trim($referenceContext->buildReferenceSearchText());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return array{
|
* @return array{
|
||||||
* results: array,
|
* results: array,
|
||||||
@@ -445,15 +349,13 @@ final readonly class AgentRunner
|
|||||||
string $query,
|
string $query,
|
||||||
string $commerceIntent,
|
string $commerceIntent,
|
||||||
string $userId,
|
string $userId,
|
||||||
string $commerceHistoryContext = '',
|
string $commerceHistoryContext = ''
|
||||||
?CommerceReferenceContext $referenceContext = null
|
|
||||||
): array {
|
): array {
|
||||||
try {
|
try {
|
||||||
return $this->shopSearchService->search(
|
return $this->shopSearchService->search(
|
||||||
$query,
|
$query,
|
||||||
$commerceIntent,
|
$commerceIntent,
|
||||||
$commerceHistoryContext,
|
$commerceHistoryContext
|
||||||
$referenceContext
|
|
||||||
);
|
);
|
||||||
} catch (Throwable $e) {
|
} catch (Throwable $e) {
|
||||||
$this->agentLogger->warning('Shop search failed, continuing without shop results', [
|
$this->agentLogger->warning('Shop search failed, continuing without shop results', [
|
||||||
@@ -462,8 +364,6 @@ final readonly class AgentRunner
|
|||||||
'query' => $query,
|
'query' => $query,
|
||||||
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
|
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
|
||||||
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
||||||
'hasReferenceContext' => $referenceContext !== null,
|
|
||||||
'referenceProductName' => $referenceContext?->productName,
|
|
||||||
'exception' => $e,
|
'exception' => $e,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@@ -475,73 +375,23 @@ final readonly class AgentRunner
|
|||||||
{
|
{
|
||||||
return $this->contextService->buildUserContextWithinBudget(
|
return $this->contextService->buildUserContextWithinBudget(
|
||||||
$userId,
|
$userId,
|
||||||
self::COMMERCE_HISTORY_BUDGET_CHARS
|
$this->agentRunnerConfig->getCommerceHistoryBudgetChars()
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
private function loadCommerceReference(string $userId): ?CommerceReferenceContext
|
|
||||||
{
|
|
||||||
try {
|
|
||||||
return $this->commerceReferenceStore->load($userId);
|
|
||||||
} catch (Throwable $e) {
|
|
||||||
$this->agentLogger->warning('Failed to load commerce reference context', [
|
|
||||||
'userId' => $userId,
|
|
||||||
'exception' => $e,
|
|
||||||
]);
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param array<int, mixed> $shopResults
|
|
||||||
*/
|
|
||||||
private function storeCommerceReference(string $userId, string $prompt, string $answer, array $shopResults): void
|
|
||||||
{
|
|
||||||
try {
|
|
||||||
$referenceContext = $this->commerceReferenceResolver->resolveFromCommerceTurn(
|
|
||||||
$prompt,
|
|
||||||
$answer,
|
|
||||||
$shopResults
|
|
||||||
);
|
|
||||||
|
|
||||||
if ($referenceContext === null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->commerceReferenceStore->save($userId, $referenceContext);
|
|
||||||
} catch (Throwable $e) {
|
|
||||||
$this->agentLogger->warning('Failed to persist commerce reference context', [
|
|
||||||
'userId' => $userId,
|
|
||||||
'exception' => $e,
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param array<int, mixed> $shopResults
|
|
||||||
*/
|
|
||||||
private function persistConversationState(
|
|
||||||
string $userId,
|
|
||||||
string $prompt,
|
|
||||||
string $fullOutput,
|
|
||||||
array $shopResults
|
|
||||||
): void {
|
|
||||||
$this->contextService->appendHistory($userId, $prompt, $fullOutput);
|
|
||||||
|
|
||||||
$this->storeCommerceReference(
|
|
||||||
userId: $userId,
|
|
||||||
prompt: $prompt,
|
|
||||||
answer: $fullOutput,
|
|
||||||
shopResults: $shopResults
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function limitKnowledgeChunks(array $knowledgeChunks, string $commerceIntent): array
|
private function limitKnowledgeChunks(array $knowledgeChunks, string $commerceIntent): array
|
||||||
{
|
{
|
||||||
return match ($commerceIntent) {
|
return match ($commerceIntent) {
|
||||||
CommerceIntentLite::PRODUCT_SEARCH => array_slice($knowledgeChunks, 0, 2),
|
CommerceIntentLite::PRODUCT_SEARCH => array_slice(
|
||||||
CommerceIntentLite::ADVISORY_PRODUCT_SEARCH => array_slice($knowledgeChunks, 0, 3),
|
$knowledgeChunks,
|
||||||
|
0,
|
||||||
|
$this->agentRunnerConfig->getProductSearchKnowledgeChunkLimit()
|
||||||
|
),
|
||||||
|
CommerceIntentLite::ADVISORY_PRODUCT_SEARCH => array_slice(
|
||||||
|
$knowledgeChunks,
|
||||||
|
0,
|
||||||
|
$this->agentRunnerConfig->getAdvisoryProductSearchKnowledgeChunkLimit()
|
||||||
|
),
|
||||||
default => $knowledgeChunks,
|
default => $knowledgeChunks,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -555,8 +405,8 @@ final readonly class AgentRunner
|
|||||||
}
|
}
|
||||||
|
|
||||||
$query = preg_split('/\R+/u', $query, 2)[0] ?? $query;
|
$query = preg_split('/\R+/u', $query, 2)[0] ?? $query;
|
||||||
$query = preg_replace('/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu', '', $query) ?? $query;
|
$query = preg_replace($this->agentRunnerConfig->getOptimizedShopQueryPrefixPattern(), '', $query) ?? $query;
|
||||||
$query = trim($query, " \t\n\r\0\x0B\"'`");
|
$query = trim($query, $this->agentRunnerConfig->getOptimizedShopQueryTrimCharacters());
|
||||||
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
|
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
|
||||||
|
|
||||||
return trim($query);
|
return trim($query);
|
||||||
@@ -582,7 +432,7 @@ final readonly class AgentRunner
|
|||||||
|
|
||||||
if ($cleanToken === '') {
|
if ($cleanToken === '') {
|
||||||
if ($firstThinkLoop) {
|
if ($firstThinkLoop) {
|
||||||
yield $this->systemMsg('Denke nach...', 'think');
|
yield $this->systemMsg($this->agentRunnerConfig->getThinkingWhileStreamingMessage(), 'think');
|
||||||
$firstThinkLoop = false;
|
$firstThinkLoop = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -601,60 +451,46 @@ final readonly class AgentRunner
|
|||||||
if ($finalChunk !== null) {
|
if ($finalChunk !== null) {
|
||||||
yield $this->systemMsg($finalChunk, 'answer');
|
yield $this->systemMsg($finalChunk, 'answer');
|
||||||
} elseif ($fullOutput === '') {
|
} elseif ($fullOutput === '') {
|
||||||
yield $this->systemMsg('❌ Es wurden keine Daten vom LLM empfangen.', 'err');
|
yield $this->systemMsg($this->agentRunnerConfig->getNoLlmDataReceivedMessage(), 'err');
|
||||||
}
|
}
|
||||||
|
|
||||||
return $fullOutput;
|
return $fullOutput;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param string[] $factSources
|
* @param string[] $sources
|
||||||
* @param string[] $contextSignals
|
|
||||||
*/
|
*/
|
||||||
private function emitSourceSummary(array $factSources, array $contextSignals, string $label): string
|
private function emitSources(array $sources, string $prefix): string
|
||||||
{
|
{
|
||||||
$parts = [];
|
return $this->systemMsg($prefix . implode(' ', $sources), 'info');
|
||||||
|
|
||||||
if ($factSources !== []) {
|
|
||||||
$parts[] = 'Fakten: ' . implode(' ', $factSources);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($contextSignals !== []) {
|
|
||||||
$parts[] = 'Kontext: ' . implode(' ', $contextSignals);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $this->systemMsg(
|
|
||||||
$label . ': ' . implode(' ', $parts),
|
|
||||||
'info'
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param string[] $target
|
* @param string[] $sources
|
||||||
*/
|
*/
|
||||||
private function addBadge(array &$target, string $label): void
|
private function addSource(array &$sources, string $label): void
|
||||||
{
|
{
|
||||||
$badge = $this->badge($label);
|
$badge = $this->badge($label);
|
||||||
|
|
||||||
if (!in_array($badge, $target, true)) {
|
if (!in_array($badge, $sources, true)) {
|
||||||
$target[] = $badge;
|
$sources[] = $badge;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private function buildUserErrorMessage(Throwable $e): string
|
private function buildUserErrorMessage(Throwable $e): string
|
||||||
{
|
{
|
||||||
if (!$this->debug) {
|
if (!$this->debug) {
|
||||||
return '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.';
|
return $this->agentRunnerConfig->getGenericInternalErrorMessage();
|
||||||
}
|
}
|
||||||
|
|
||||||
return '❌ Interner Fehler: '
|
return $this->agentRunnerConfig->getDebugInternalErrorPrefix()
|
||||||
. htmlspecialchars($e->getMessage(), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
|
. htmlspecialchars($e->getMessage(), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
|
||||||
}
|
}
|
||||||
|
|
||||||
private function badge(string $label): string
|
private function badge(string $label): string
|
||||||
{
|
{
|
||||||
return sprintf(
|
return sprintf(
|
||||||
'<span class="badge bg-info text-black">%s</span>',
|
$this->agentRunnerConfig->getSourceBadgeHtmlTemplate(),
|
||||||
htmlspecialchars($label, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
|
htmlspecialchars($label, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -667,10 +503,13 @@ final readonly class AgentRunner
|
|||||||
|
|
||||||
return match ($type) {
|
return match ($type) {
|
||||||
'answer' => $msg,
|
'answer' => $msg,
|
||||||
'err' => '<span class="text-danger">' . $msg . "</span>\n<hr>\n",
|
'err' => sprintf($this->agentRunnerConfig->getErrorHtmlTemplate(), $msg),
|
||||||
'think' => '<span class="text-info think">' . $msg . "</span>\n",
|
'think' => sprintf($this->agentRunnerConfig->getThinkHtmlTemplate(), $msg),
|
||||||
'info' => "\n\n<span class=\"text-info fw-bolder\">" . $msg . "</span>\n",
|
'info' => sprintf($this->agentRunnerConfig->getInfoHtmlTemplate(), $msg),
|
||||||
'debug' => "\n\nDEBUG: <code>" . htmlspecialchars($msg, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8') . "</code>\n",
|
'debug' => sprintf(
|
||||||
|
$this->agentRunnerConfig->getDebugHtmlTemplate(),
|
||||||
|
htmlspecialchars($msg, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
|
||||||
|
),
|
||||||
default => $msg,
|
default => $msg,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ final readonly class PromptBuilder
|
|||||||
private ContextService $contextService,
|
private ContextService $contextService,
|
||||||
private SystemPromptRepository $systemPromptRepository,
|
private SystemPromptRepository $systemPromptRepository,
|
||||||
private ModelGenerationConfigProvider $modelGenerationConfigProvider,
|
private ModelGenerationConfigProvider $modelGenerationConfigProvider,
|
||||||
|
private PromptBuilderConfig $config,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -31,7 +32,6 @@ final readonly class PromptBuilder
|
|||||||
* @param ShopProductResult[] $shopResults
|
* @param ShopProductResult[] $shopResults
|
||||||
* @param bool|null $fullContext
|
* @param bool|null $fullContext
|
||||||
* @param string|null $swagFullOutPut
|
* @param string|null $swagFullOutPut
|
||||||
* @return string
|
|
||||||
*/
|
*/
|
||||||
public function build(
|
public function build(
|
||||||
string $prompt,
|
string $prompt,
|
||||||
@@ -48,23 +48,21 @@ final readonly class PromptBuilder
|
|||||||
|
|
||||||
$hasShopResults = $shopResults !== [];
|
$hasShopResults = $shopResults !== [];
|
||||||
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
||||||
$isPriceDrivenQuestion = $this->isLikelyPriceDrivenQuestion($prompt);
|
$asksForAccessoryOrBundle = $this->asksForAccessoryOrBundle($prompt);
|
||||||
|
|
||||||
$systemBlock = $this->buildSystemBlock();
|
$systemBlock = $this->buildSystemBlock();
|
||||||
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
|
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
|
||||||
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults, $isPriceDrivenQuestion);
|
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults);
|
||||||
$responseFormatBlock = $this->buildResponseFormatBlock(
|
$responseFormatBlock = $this->buildResponseFormatBlock(
|
||||||
$prompt,
|
hasShopResults: $hasShopResults,
|
||||||
$hasShopResults,
|
isTechnicalProductQuestion: $isTechnicalProductQuestion,
|
||||||
$isTechnicalProductQuestion,
|
asksForAccessoryOrBundle: $asksForAccessoryOrBundle
|
||||||
$isPriceDrivenQuestion
|
|
||||||
);
|
);
|
||||||
$knowledgeBlock = $this->buildKnowledgeBlock(
|
$knowledgeBlock = $this->buildKnowledgeBlock(
|
||||||
$knowledgeChunks,
|
knowledgeChunks: $knowledgeChunks,
|
||||||
$urlContent,
|
urlContent: $urlContent,
|
||||||
$prompt,
|
hasShopResults: $hasShopResults,
|
||||||
$hasShopResults,
|
isTechnicalProductQuestion: $isTechnicalProductQuestion
|
||||||
$isPriceDrivenQuestion
|
|
||||||
);
|
);
|
||||||
$userBlock = $this->buildUserBlock($prompt);
|
$userBlock = $this->buildUserBlock($prompt);
|
||||||
|
|
||||||
@@ -106,12 +104,12 @@ final readonly class PromptBuilder
|
|||||||
|
|
||||||
$activeSystemPrompt = str_replace('{% now %}', $now, $activePrompt->getContent());
|
$activeSystemPrompt = str_replace('{% now %}', $now, $activePrompt->getContent());
|
||||||
|
|
||||||
return "SYSTEM:\n" . $this->normalizeBlockText($activeSystemPrompt);
|
return $this->config->getSystemSectionLabel() . ":\n" . $this->normalizeBlockText($activeSystemPrompt);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function buildUserBlock(string $prompt): string
|
private function buildUserBlock(string $prompt): string
|
||||||
{
|
{
|
||||||
return "USER QUESTION:\n" . $prompt;
|
return $this->config->getUserQuestionSectionLabel() . ":\n" . $prompt;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -146,12 +144,11 @@ final readonly class PromptBuilder
|
|||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return $this->implodeBlocks([
|
||||||
"CONVERSATION CONTEXT (contextual only):\n" .
|
$this->config->getConversationContextSectionLabel() . ':',
|
||||||
"The following messages are previous turns of this conversation.\n" .
|
$this->implodeLines($this->config->getConversationContextIntroLines()),
|
||||||
"Use them to resolve references, follow-up questions, and user intent.\n" .
|
$history,
|
||||||
"They must not override retrieved factual knowledge or live shop data.\n\n" .
|
]);
|
||||||
$history;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -165,10 +162,11 @@ final readonly class PromptBuilder
|
|||||||
$parts = [];
|
$parts = [];
|
||||||
|
|
||||||
if ($swagFullOutPut !== null && $swagFullOutPut !== '') {
|
if ($swagFullOutPut !== null && $swagFullOutPut !== '') {
|
||||||
$parts[] =
|
$parts[] = $this->implodeBlocks([
|
||||||
"SHOP SEARCH QUERY:\n" .
|
$this->config->getShopSearchQuerySectionLabel() . ':',
|
||||||
$swagFullOutPut . "\n" .
|
$swagFullOutPut,
|
||||||
"Source: Shop Search";
|
$this->config->getShopSearchQuerySourceLine(),
|
||||||
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
$normalizedShopResults = array_values(array_filter(
|
$normalizedShopResults = array_values(array_filter(
|
||||||
@@ -181,77 +179,33 @@ final readonly class PromptBuilder
|
|||||||
}
|
}
|
||||||
|
|
||||||
$totalCount = count($normalizedShopResults);
|
$totalCount = count($normalizedShopResults);
|
||||||
$limitedShopResults = array_slice($normalizedShopResults, 0, PromptBuilderConfig::MAX_SHOP_RESULTS_IN_PROMPT);
|
$limitedShopResults = array_slice($normalizedShopResults, 0, $this->config->getMaxShopResultsInPrompt());
|
||||||
$isDetailed = count($limitedShopResults) <= 5;
|
$isDetailed = count($limitedShopResults) <= $this->config->getDetailedShopResultsMaxCount();
|
||||||
$lines = [];
|
$lines = [];
|
||||||
|
|
||||||
foreach ($limitedShopResults as $i => $product) {
|
foreach ($limitedShopResults as $i => $product) {
|
||||||
$n = $i + 1;
|
$lines[] = $this->buildShopProductEntry(
|
||||||
$entryParts = [
|
product: $product,
|
||||||
"[{$n}] " . $this->normalizeBlockText($product->name),
|
index: $i + 1,
|
||||||
];
|
isDetailed: $isDetailed
|
||||||
|
);
|
||||||
if ($product->productNumber) {
|
|
||||||
$entryParts[] = "Product number: " . $this->normalizeBlockText($product->productNumber);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($product->manufacturer) {
|
|
||||||
$entryParts[] = "Manufacturer: " . $this->normalizeBlockText($product->manufacturer);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($product->price) {
|
|
||||||
$entryParts[] = "Price: " . $this->normalizeBlockText($product->price);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($product->available !== null) {
|
|
||||||
$entryParts[] = "Available: " . ($product->available ? 'yes' : 'no');
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ($product->highlights as $highlight) {
|
|
||||||
$highlight = $this->normalizeBlockText((string) $highlight);
|
|
||||||
|
|
||||||
if ($highlight !== '') {
|
|
||||||
$entryParts[] = "- " . $highlight;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($product->url) {
|
|
||||||
$entryParts[] = "URL: " . $this->normalizeBlockText($product->url);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($product->productImage) {
|
|
||||||
$entryParts[] = "Product image: " . $this->normalizeBlockText($product->productImage);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($isDetailed && $product->description) {
|
|
||||||
$entryParts[] = "Description: " . $this->normalizeBlockText($product->description);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($product->customFields) {
|
|
||||||
$entryParts[] = "Meta information: " . $this->normalizeBlockText($product->customFields);
|
|
||||||
}
|
|
||||||
|
|
||||||
$lines[] = implode("\n", $entryParts);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($lines !== []) {
|
if ($lines !== []) {
|
||||||
$header =
|
$headerLines = $this->config->getLiveShopResultsHeaderLines();
|
||||||
"LIVE SHOP RESULTS (authoritative for current commercial details):\n" .
|
|
||||||
"Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.\n" .
|
|
||||||
"If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.\n" .
|
|
||||||
"Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" .
|
|
||||||
"Do not infer undocumented technical specifications from shop data.\n" .
|
|
||||||
"Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.\n" .
|
|
||||||
"Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.\n" .
|
|
||||||
"If shop results only contain accessories, reagents, indicators, or consumables, do not conclude that no matching main device exists unless the sources explicitly support that conclusion.\n" .
|
|
||||||
"If the user asks for price filtering, use the numeric prices in these live shop results as the decisive source for filtering.";
|
|
||||||
|
|
||||||
if ($totalCount > count($limitedShopResults)) {
|
if ($totalCount > count($limitedShopResults)) {
|
||||||
$header .= "\n" .
|
$headerLines[] = sprintf(
|
||||||
"Only the top " . count($limitedShopResults) . " ranked shop results are shown here out of {$totalCount} total results.";
|
$this->config->getLiveShopResultsOverflowNoticeTemplate(),
|
||||||
|
count($limitedShopResults),
|
||||||
|
$totalCount
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
$parts[] = $header . "\n\n" . implode("\n\n", $lines);
|
$parts[] = $this->implodeBlocks([
|
||||||
|
$this->implodeLines($headerLines),
|
||||||
|
implode("\n\n", $lines),
|
||||||
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $this->implodeBlocks($parts);
|
return $this->implodeBlocks($parts);
|
||||||
@@ -260,89 +214,60 @@ final readonly class PromptBuilder
|
|||||||
/**
|
/**
|
||||||
* Build a small priority block that tells the model what to surface first.
|
* Build a small priority block that tells the model what to surface first.
|
||||||
*/
|
*/
|
||||||
private function buildOutputPriorityBlock(bool $hasShopResults, bool $isPriceDrivenQuestion): string
|
private function buildOutputPriorityBlock(bool $hasShopResults): string
|
||||||
{
|
{
|
||||||
if (!$hasShopResults) {
|
if (!$hasShopResults) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($isPriceDrivenQuestion) {
|
return $this->buildRuleBlock(
|
||||||
return
|
$this->config->getOutputPrioritySectionLabel(),
|
||||||
"OUTPUT PRIORITY:\n" .
|
$this->config->getOutputPriorityRules()
|
||||||
"For price-driven questions, evaluate shop results first for numeric price filtering.\n" .
|
);
|
||||||
"Use retrieved knowledge afterwards only to add technical context or explain missing commercial coverage.\n" .
|
|
||||||
"Do not let accessory-only shop results prove that no matching device exists unless the sources explicitly support that conclusion.\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
"OUTPUT PRIORITY:\n" .
|
|
||||||
"Use retrieved knowledge first to determine the technically matching product or answer.\n" .
|
|
||||||
"If shop results are present, use them afterwards to add current price, availability, and the actual URL.\n" .
|
|
||||||
"Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function buildResponseFormatBlock(
|
private function buildResponseFormatBlock(
|
||||||
string $prompt,
|
|
||||||
bool $hasShopResults,
|
bool $hasShopResults,
|
||||||
bool $isTechnicalProductQuestion,
|
bool $isTechnicalProductQuestion,
|
||||||
bool $isPriceDrivenQuestion
|
bool $asksForAccessoryOrBundle
|
||||||
): string {
|
): string {
|
||||||
$rules = [
|
$rules = $this->config->getResponseFormatBaseRules();
|
||||||
"RESPONSE FORMAT RULES:",
|
|
||||||
"- Keep normal spacing between all words. Never fuse words together.",
|
|
||||||
"- Use short, clean paragraphs or short labeled sections.",
|
|
||||||
"- Do not use persuasive or promotional wording.",
|
|
||||||
"- Do not repeat the same fact in slightly different wording.",
|
|
||||||
"- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.",
|
|
||||||
"- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.",
|
|
||||||
"- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.",
|
|
||||||
"- Do not combine technical identity from one source with commercial fields from a different product.",
|
|
||||||
"- Product number, price, availability, and URL must belong to the same explicitly grounded product.",
|
|
||||||
];
|
|
||||||
|
|
||||||
if ($hasShopResults) {
|
if ($hasShopResults) {
|
||||||
$rules[] = "- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts.";
|
$rules = array_merge($rules, $this->config->getResponseFormatWithShopRules());
|
||||||
$rules[] = "- Keep price, availability, and URL on separate lines when they are present.";
|
|
||||||
$rules[] = "- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.";
|
|
||||||
$rules[] = "- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.";
|
|
||||||
$rules[] = "- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.";
|
|
||||||
$rules[] = "- If the question includes a price threshold, filter using only explicit numeric shop prices.";
|
|
||||||
$rules[] = "- Do not say that no device exists above a threshold merely because only cheaper accessories were found in the shop results.";
|
|
||||||
} else {
|
} else {
|
||||||
$rules[] = "- If no shop results are present, do not compensate by inventing external products or external manufacturers.";
|
$rules = array_merge($rules, $this->config->getResponseFormatWithoutShopRules());
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($isTechnicalProductQuestion) {
|
if ($isTechnicalProductQuestion) {
|
||||||
$rules[] = "- Write like technical documentation: precise, neutral, and source-close.";
|
$rules = array_merge($rules, $this->config->getResponseFormatTechnicalRules());
|
||||||
$rules[] = "- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.";
|
|
||||||
$rules[] = "- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($isPriceDrivenQuestion) {
|
if ($asksForAccessoryOrBundle) {
|
||||||
$rules[] = "- For price-driven questions, answer the threshold result first.";
|
$rules = array_merge($rules, $this->config->getResponseFormatAccessoryRules());
|
||||||
$rules[] = "- If no grounded shop product fulfills the threshold, say that clearly.";
|
|
||||||
$rules[] = "- Then optionally explain whether retrieved knowledge mentions relevant devices that are not commercially listed in the current shop results.";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($this->asksForAccessoryOrBundle($prompt)) {
|
return $this->buildRuleBlock(
|
||||||
$rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.";
|
$this->config->getResponseFormatSectionLabel(),
|
||||||
$rules[] = "- The main device must come first. The accessory must not replace the main device.";
|
$rules
|
||||||
$rules[] = "- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.";
|
);
|
||||||
$rules[] = "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.";
|
|
||||||
}
|
|
||||||
|
|
||||||
return implode("\n", $rules);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the knowledge block.
|
||||||
|
*
|
||||||
|
* Retrieved knowledge remains the main source for technical matching and explanation.
|
||||||
|
* Shop data is preferred for current commercial fields.
|
||||||
|
*
|
||||||
|
* @param string[] $knowledgeChunks
|
||||||
|
*/
|
||||||
private function buildKnowledgeBlock(
|
private function buildKnowledgeBlock(
|
||||||
array $knowledgeChunks,
|
array $knowledgeChunks,
|
||||||
string $urlContent,
|
string $urlContent,
|
||||||
string $prompt,
|
|
||||||
bool $hasShopResults,
|
bool $hasShopResults,
|
||||||
bool $isPriceDrivenQuestion
|
bool $isTechnicalProductQuestion
|
||||||
): string {
|
): string {
|
||||||
$knowledgeParts = [];
|
$knowledgeParts = [];
|
||||||
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
|
||||||
|
|
||||||
if ($knowledgeChunks !== []) {
|
if ($knowledgeChunks !== []) {
|
||||||
$lines = [];
|
$lines = [];
|
||||||
@@ -359,56 +284,71 @@ final readonly class PromptBuilder
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ($lines !== []) {
|
if ($lines !== []) {
|
||||||
$parts = [
|
$knowledgeParts[] = $this->implodeBlocks([
|
||||||
"LANGUAGE RULES:\n" .
|
$this->buildRuleBlock(
|
||||||
implode("\n", $this->buildLanguageRules()),
|
$this->config->getLanguageRulesSectionLabel(),
|
||||||
"FACT GROUNDING RULES:\n" .
|
$this->config->getLanguageRules()
|
||||||
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults, $isPriceDrivenQuestion)),
|
),
|
||||||
"RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" .
|
$this->buildRuleBlock(
|
||||||
"Source: Documents\n" .
|
$this->config->getFactGroundingRulesSectionLabel(),
|
||||||
implode("\n\n", $lines),
|
$this->buildFactGroundingRules(
|
||||||
];
|
hasShopResults: $hasShopResults,
|
||||||
|
isTechnicalProductQuestion: $isTechnicalProductQuestion
|
||||||
$knowledgeParts[] = implode("\n\n", $parts);
|
)
|
||||||
|
),
|
||||||
|
$this->implodeBlocks([
|
||||||
|
$this->config->getRetrievedKnowledgeSectionLabel() . ':',
|
||||||
|
$this->config->getRetrievedKnowledgeSourceLine(),
|
||||||
|
implode("\n\n", $lines),
|
||||||
|
]),
|
||||||
|
]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($urlContent !== '') {
|
if ($urlContent !== '') {
|
||||||
$knowledgeParts[] =
|
$knowledgeParts[] = $this->implodeBlocks([
|
||||||
"CONTENT FROM URL (authoritative if user-provided):\n" .
|
$this->config->getUrlContentSectionLabel() . ':',
|
||||||
"Source: URL\n" .
|
$this->config->getUrlContentSourceLine(),
|
||||||
$urlContent;
|
$urlContent,
|
||||||
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $this->implodeBlocks($knowledgeParts);
|
return $this->implodeBlocks($knowledgeParts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve how many characters may still be used by history.
|
||||||
|
*
|
||||||
|
* The active model num_ctx is converted into a conservative prompt budget.
|
||||||
|
* Shop, knowledge and user question are fixed priority blocks.
|
||||||
|
* History only receives the remaining space.
|
||||||
|
*/
|
||||||
private function resolveHistoryBudgetChars(string $fixedPrompt): int
|
private function resolveHistoryBudgetChars(string $fixedPrompt): int
|
||||||
{
|
{
|
||||||
$numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx();
|
$numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx();
|
||||||
|
|
||||||
$outputReserveTokens = $this->clamp(
|
$outputReserveTokens = $this->clamp(
|
||||||
(int) floor($numCtx * PromptBuilderConfig::OUTPUT_RESERVE_RATIO),
|
(int) floor($numCtx * $this->config->getOutputReserveRatio()),
|
||||||
PromptBuilderConfig::OUTPUT_RESERVE_MIN_TOKENS,
|
$this->config->getOutputReserveMinTokens(),
|
||||||
PromptBuilderConfig::OUTPUT_RESERVE_MAX_TOKENS
|
$this->config->getOutputReserveMaxTokens()
|
||||||
);
|
);
|
||||||
|
|
||||||
$safetyReserveTokens = $this->clamp(
|
$safetyReserveTokens = $this->clamp(
|
||||||
(int) floor($numCtx * PromptBuilderConfig::SAFETY_RESERVE_RATIO),
|
(int) floor($numCtx * $this->config->getSafetyReserveRatio()),
|
||||||
PromptBuilderConfig::SAFETY_RESERVE_MIN_TOKENS,
|
$this->config->getSafetyReserveMinTokens(),
|
||||||
PromptBuilderConfig::SAFETY_RESERVE_MAX_TOKENS
|
$this->config->getSafetyReserveMaxTokens()
|
||||||
);
|
);
|
||||||
|
|
||||||
$promptBudgetTokens = max(
|
$promptBudgetTokens = max(
|
||||||
PromptBuilderConfig::MIN_PROMPT_BUDGET_TOKENS,
|
$this->config->getMinPromptBudgetTokens(),
|
||||||
$numCtx - $outputReserveTokens - $safetyReserveTokens
|
$numCtx - $outputReserveTokens - $safetyReserveTokens
|
||||||
);
|
);
|
||||||
|
|
||||||
$promptBudgetChars = $promptBudgetTokens * PromptBuilderConfig::CHARS_PER_TOKEN;
|
$promptBudgetChars = $promptBudgetTokens * $this->config->getCharsPerToken();
|
||||||
|
|
||||||
$remaining = $promptBudgetChars
|
$remaining = $promptBudgetChars
|
||||||
- mb_strlen($fixedPrompt)
|
- mb_strlen($fixedPrompt)
|
||||||
- PromptBuilderConfig::HISTORY_PADDING_CHARS;
|
- $this->config->getHistoryPaddingChars();
|
||||||
|
|
||||||
return max(0, $remaining);
|
return max(0, $remaining);
|
||||||
}
|
}
|
||||||
@@ -416,87 +356,118 @@ final readonly class PromptBuilder
|
|||||||
/**
|
/**
|
||||||
* @return string[]
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
private function buildLanguageRules(): array
|
private function buildFactGroundingRules(bool $hasShopResults, bool $isTechnicalProductQuestion): array
|
||||||
{
|
{
|
||||||
return [
|
$rules = $this->config->getFactGroundingBaseRules();
|
||||||
"- Answer only in the same language as the user question.",
|
|
||||||
"- All headings, labels, notes, and structural elements must be in the same language as the user question.",
|
|
||||||
"- Do not switch languages unless the user does.",
|
|
||||||
"- If headings are used, write them in the user's language.",
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return string[]
|
|
||||||
*/
|
|
||||||
private function buildFactGroundingRules(
|
|
||||||
bool $isTechnicalProductQuestion,
|
|
||||||
bool $hasShopResults,
|
|
||||||
bool $isPriceDrivenQuestion
|
|
||||||
): array {
|
|
||||||
$rules = [
|
|
||||||
"- State only facts that are explicitly present in the provided sources.",
|
|
||||||
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
|
|
||||||
"- Do not invent missing values.",
|
|
||||||
"- Do not replace missing values with estimates, defaults, or typical industry assumptions.",
|
|
||||||
"- Do not claim that information is missing if it appears in the provided sources.",
|
|
||||||
"- Do not compare with other products unless those products are also present in the provided sources.",
|
|
||||||
"- Prefer source-faithful wording over persuasive wording.",
|
|
||||||
"- Avoid marketing language such as 'ideal', 'perfect', 'unverzichtbar', 'entscheidend', 'optimal', 'kosteneffizient', 'prozesssicher', or 'state-of-the-art'.",
|
|
||||||
"- Clearly separate explicit facts from inferences.",
|
|
||||||
"- If a conclusion goes beyond the source wording, label it exactly as 'Inference:'.",
|
|
||||||
"- If a sentence cannot be traced to the provided sources, do not write it.",
|
|
||||||
"- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.",
|
|
||||||
"- If the sources do not identify a suitable product, do not invent one.",
|
|
||||||
];
|
|
||||||
|
|
||||||
if ($hasShopResults) {
|
if ($hasShopResults) {
|
||||||
$rules = array_merge($rules, [
|
$rules = array_merge($rules, $this->config->getFactGroundingWithShopRules());
|
||||||
"- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.",
|
|
||||||
"- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.",
|
|
||||||
"- When shop results are present and relevant, include current price and the actual URL if available.",
|
|
||||||
"- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.",
|
|
||||||
"- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.",
|
|
||||||
"- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.",
|
|
||||||
"- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.",
|
|
||||||
"- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.",
|
|
||||||
"- If the shop match is ambiguous, keep the technical identification and commercial details separate.",
|
|
||||||
]);
|
|
||||||
|
|
||||||
if ($isPriceDrivenQuestion) {
|
|
||||||
$rules[] = "- For price-threshold questions, shop prices are authoritative for the threshold check.";
|
|
||||||
$rules[] = "- Accessory-only shop hits do not prove that no qualifying device exists.";
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
$rules[] = "- Use retrieved knowledge as authoritative for factual answers.";
|
$rules = array_merge($rules, $this->config->getFactGroundingWithoutShopRules());
|
||||||
$rules[] = "- If no shop results are present, do not compensate with external recommendations or external product suggestions.";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($isTechnicalProductQuestion) {
|
if ($isTechnicalProductQuestion) {
|
||||||
$rules = array_merge($rules, [
|
$rules = array_merge($rules, $this->config->getFactGroundingTechnicalRules());
|
||||||
"- For technical product questions, answer primarily with explicitly stated facts.",
|
|
||||||
"- Behave like a technical documentation assistant, not like a sales advisor.",
|
|
||||||
"- Keep interpretations minimal and do not generalize application areas beyond the provided sources.",
|
|
||||||
"- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.",
|
|
||||||
"- Do not translate technical facts into business value unless the source explicitly does so.",
|
|
||||||
"- Do not recommend process changes unless explicitly present in the source.",
|
|
||||||
"- Do not use persuasive summaries or advisory conclusions.",
|
|
||||||
"- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.",
|
|
||||||
"- Use neutral engineering language.",
|
|
||||||
"- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.",
|
|
||||||
"- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.",
|
|
||||||
"- If the source lists application areas, repeat only those areas and do not broaden them.",
|
|
||||||
"- If the source names an indicator and threshold, reproduce that exactly without extrapolation.",
|
|
||||||
"- If the source states only a threshold function, do not expand it into broader control logic.",
|
|
||||||
"- If a detail is not explicitly stated in the provided sources, say so plainly.",
|
|
||||||
"- Prefer short, source-close sentences over explanatory expansion.",
|
|
||||||
"- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.",
|
|
||||||
]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return $rules;
|
return $rules;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function buildShopProductEntry(ShopProductResult $product, int $index, bool $isDetailed): string
|
||||||
|
{
|
||||||
|
$entryParts = [
|
||||||
|
"[{$index}] " . $this->normalizeBlockText($product->name),
|
||||||
|
];
|
||||||
|
|
||||||
|
if ($product->productNumber) {
|
||||||
|
$entryParts[] = $this->config->getShopProductNumberLabel() . ': '
|
||||||
|
. $this->normalizeBlockText($product->productNumber);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($product->manufacturer) {
|
||||||
|
$entryParts[] = $this->config->getShopManufacturerLabel() . ': '
|
||||||
|
. $this->normalizeBlockText($product->manufacturer);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($product->price) {
|
||||||
|
$entryParts[] = $this->config->getShopPriceLabel() . ': '
|
||||||
|
. $this->normalizeBlockText($product->price);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($product->available !== null) {
|
||||||
|
$entryParts[] = $this->config->getShopAvailabilityLabel() . ': '
|
||||||
|
. ($product->available
|
||||||
|
? $this->config->getShopAvailabilityYesLabel()
|
||||||
|
: $this->config->getShopAvailabilityNoLabel());
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($product->highlights as $highlight) {
|
||||||
|
$highlight = $this->normalizeBlockText((string) $highlight);
|
||||||
|
|
||||||
|
if ($highlight !== '') {
|
||||||
|
$entryParts[] = $this->config->getShopHighlightPrefix() . $highlight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($product->url) {
|
||||||
|
$entryParts[] = $this->config->getShopUrlLabel() . ': '
|
||||||
|
. $this->normalizeBlockText($product->url);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($product->productImage) {
|
||||||
|
$entryParts[] = $this->config->getShopProductImageLabel() . ': '
|
||||||
|
. $this->normalizeBlockText($product->productImage);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($isDetailed && $product->description) {
|
||||||
|
$entryParts[] = $this->config->getShopDescriptionLabel() . ': '
|
||||||
|
. $this->normalizeBlockText($product->description);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($product->customFields) {
|
||||||
|
$entryParts[] = $this->config->getShopMetaInformationLabel() . ': '
|
||||||
|
. $this->normalizeBlockText($product->customFields);
|
||||||
|
}
|
||||||
|
|
||||||
|
return implode("\n", $entryParts);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $rules
|
||||||
|
*/
|
||||||
|
private function buildRuleBlock(string $sectionLabel, array $rules): string
|
||||||
|
{
|
||||||
|
$normalizedRules = array_values(array_filter(
|
||||||
|
array_map(
|
||||||
|
fn(string $rule): string => $this->normalizeBlockText($rule),
|
||||||
|
$rules
|
||||||
|
),
|
||||||
|
static fn(string $rule): bool => $rule !== ''
|
||||||
|
));
|
||||||
|
|
||||||
|
if ($normalizedRules === []) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
return $sectionLabel . ":\n" . implode("\n", $normalizedRules);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $lines
|
||||||
|
*/
|
||||||
|
private function implodeLines(array $lines): string
|
||||||
|
{
|
||||||
|
$normalizedLines = array_values(array_filter(
|
||||||
|
array_map(
|
||||||
|
fn(string $line): string => $this->normalizeBlockText($line),
|
||||||
|
$lines
|
||||||
|
),
|
||||||
|
static fn(string $line): bool => $line !== ''
|
||||||
|
));
|
||||||
|
|
||||||
|
return implode("\n", $normalizedLines);
|
||||||
|
}
|
||||||
|
|
||||||
private function implodeBlocks(array $blocks): string
|
private function implodeBlocks(array $blocks): string
|
||||||
{
|
{
|
||||||
$filtered = array_values(array_filter(
|
$filtered = array_values(array_filter(
|
||||||
@@ -537,41 +508,26 @@ final readonly class PromptBuilder
|
|||||||
private function isLikelyTechnicalProductQuestion(string $prompt): bool
|
private function isLikelyTechnicalProductQuestion(string $prompt): bool
|
||||||
{
|
{
|
||||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
$normalized = mb_strtolower($prompt, 'UTF-8');
|
||||||
|
|
||||||
$matches = 0;
|
$matches = 0;
|
||||||
|
|
||||||
foreach (PromptBuilderConfig::TECHNICAL_PRODUCT_KEYWORDS as $keyword) {
|
foreach ($this->config->getTechnicalProductKeywords() as $keyword) {
|
||||||
if (str_contains($normalized, $keyword)) {
|
if (str_contains($normalized, $keyword)) {
|
||||||
$matches++;
|
$matches++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($matches >= 2) {
|
if ($matches >= $this->config->getTechnicalProductKeywordMatchThreshold()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1;
|
return preg_match($this->config->getTechnicalProductModelPattern(), $prompt) === 1;
|
||||||
}
|
|
||||||
|
|
||||||
private function isLikelyPriceDrivenQuestion(string $prompt): bool
|
|
||||||
{
|
|
||||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
|
||||||
|
|
||||||
if (preg_match('/\b(mehr\s+als|über|ueber|größer\s+als|groesser\s+als|unter|bis|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*(?:euro|eur|€)\b/u', $normalized) === 1) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return str_contains($normalized, 'preis')
|
|
||||||
|| str_contains($normalized, 'preise')
|
|
||||||
|| str_contains($normalized, 'kosten')
|
|
||||||
|| str_contains($normalized, 'kostet');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function asksForAccessoryOrBundle(string $prompt): bool
|
private function asksForAccessoryOrBundle(string $prompt): bool
|
||||||
{
|
{
|
||||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
$normalized = mb_strtolower($prompt, 'UTF-8');
|
||||||
|
|
||||||
foreach (PromptBuilderConfig::ACCESSORY_REQUEST_KEYWORDS as $keyword) {
|
foreach ($this->config->getAccessoryRequestKeywords() as $keyword) {
|
||||||
if (str_contains($normalized, $keyword)) {
|
if (str_contains($normalized, $keyword)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ declare(strict_types=1);
|
|||||||
|
|
||||||
namespace App\Commerce;
|
namespace App\Commerce;
|
||||||
|
|
||||||
use App\Commerce\Dto\CommerceReferenceContext;
|
|
||||||
use App\Commerce\Dto\CommerceSearchQuery;
|
use App\Commerce\Dto\CommerceSearchQuery;
|
||||||
use App\Config\CommerceIntentConfig;
|
use App\Config\CommerceIntentConfig;
|
||||||
use App\Config\CommerceQueryParserConfig;
|
use App\Config\CommerceQueryParserConfig;
|
||||||
@@ -24,12 +23,10 @@ final readonly class CommerceQueryParser
|
|||||||
public function parse(
|
public function parse(
|
||||||
string $originalPrompt,
|
string $originalPrompt,
|
||||||
string $intent,
|
string $intent,
|
||||||
string $historyContext = '',
|
string $historyContext = ''
|
||||||
?CommerceReferenceContext $referenceContext = null
|
|
||||||
): CommerceSearchQuery {
|
): CommerceSearchQuery {
|
||||||
$normalizedPrompt = $this->normalize($originalPrompt);
|
$normalizedPrompt = $this->normalize($originalPrompt);
|
||||||
$isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt);
|
$isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt);
|
||||||
$isReferenceOnlyFollowUp = $this->isReferenceOnlyFollowUp($normalizedPrompt);
|
|
||||||
|
|
||||||
[$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt);
|
[$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt);
|
||||||
$sizes = $this->extractSizes($normalizedPrompt);
|
$sizes = $this->extractSizes($normalizedPrompt);
|
||||||
@@ -47,58 +44,23 @@ final readonly class CommerceQueryParser
|
|||||||
if (
|
if (
|
||||||
!$isDirectProductQuery
|
!$isDirectProductQuery
|
||||||
&& $historyContext !== ''
|
&& $historyContext !== ''
|
||||||
&& $this->shouldUseHistoryContext($normalizedPrompt, $searchText)
|
&& $this->shouldUseHistoryContext($normalizedPrompt)
|
||||||
) {
|
) {
|
||||||
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
|
$historyParse = $this->parseHistoryContext($historyContext);
|
||||||
|
|
||||||
if ($latestHistoryQuestion !== '') {
|
if ($historyParse !== null) {
|
||||||
$normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion);
|
$searchText = $this->mergeSearchTexts(
|
||||||
$isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt);
|
$historyParse['searchText'],
|
||||||
|
$searchText
|
||||||
[$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt);
|
|
||||||
$historySizes = $this->extractSizes($normalizedHistoryPrompt);
|
|
||||||
$historyBrand = $this->extractBrand($normalizedHistoryPrompt);
|
|
||||||
|
|
||||||
$historySearchText = $this->buildSearchText(
|
|
||||||
prompt: $normalizedHistoryPrompt,
|
|
||||||
sizes: $historySizes,
|
|
||||||
brand: $historyBrand,
|
|
||||||
priceMin: $historyPriceMin,
|
|
||||||
priceMax: $historyPriceMax,
|
|
||||||
preserveDirectProductQuery: $isDirectHistoryProductQuery
|
|
||||||
);
|
);
|
||||||
|
|
||||||
$searchText = $this->mergeSearchTexts($historySearchText, $searchText);
|
if (($brand === null || $brand === '') && $historyParse['brand'] !== null && $historyParse['brand'] !== '') {
|
||||||
|
$brand = $historyParse['brand'];
|
||||||
if (($brand === null || $brand === '') && $historyBrand !== null && $historyBrand !== '') {
|
|
||||||
$brand = $historyBrand;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (
|
$finalSearchText = $searchText !== '' ? $searchText : $normalizedPrompt;
|
||||||
!$isDirectProductQuery
|
|
||||||
&& $referenceContext !== null
|
|
||||||
&& $this->shouldUseReferenceContext($normalizedPrompt, $searchText)
|
|
||||||
) {
|
|
||||||
$referenceSearchText = $this->buildReferenceSearchText($referenceContext);
|
|
||||||
|
|
||||||
if ($isReferenceOnlyFollowUp || $this->isTooGenericSearchText($searchText)) {
|
|
||||||
$searchText = $referenceSearchText !== '' ? $referenceSearchText : $searchText;
|
|
||||||
} else {
|
|
||||||
$searchText = $this->mergeSearchTexts($referenceSearchText, $searchText);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (($brand === null || $brand === '') && $referenceContext->manufacturer !== null) {
|
|
||||||
$normalizedManufacturer = $this->normalize($referenceContext->manufacturer);
|
|
||||||
|
|
||||||
if ($normalizedManufacturer !== '') {
|
|
||||||
$brand = $normalizedManufacturer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$finalSearchText = trim($searchText !== '' ? $searchText : $normalizedPrompt);
|
|
||||||
|
|
||||||
return new CommerceSearchQuery(
|
return new CommerceSearchQuery(
|
||||||
originalPrompt: $originalPrompt,
|
originalPrompt: $originalPrompt,
|
||||||
@@ -118,10 +80,14 @@ final readonly class CommerceQueryParser
|
|||||||
{
|
{
|
||||||
$value = $this->textNormalizer->normalize($prompt);
|
$value = $this->textNormalizer->normalize($prompt);
|
||||||
$value = $this->queryCleaner->clean($value);
|
$value = $this->queryCleaner->clean($value);
|
||||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
$value = mb_strtolower(trim($value));
|
||||||
$value = str_replace(['€'], ' euro ', $value);
|
$value = str_replace(
|
||||||
$value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value;
|
$this->config->getNormalizationSearch(),
|
||||||
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
$this->config->getNormalizationReplace(),
|
||||||
|
$value
|
||||||
|
);
|
||||||
|
$value = preg_replace($this->config->getPromptSanitizePattern(), ' ', $value) ?? $value;
|
||||||
|
$value = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $value) ?? $value;
|
||||||
|
|
||||||
return trim($value);
|
return trim($value);
|
||||||
}
|
}
|
||||||
@@ -134,32 +100,21 @@ final readonly class CommerceQueryParser
|
|||||||
$priceMin = null;
|
$priceMin = null;
|
||||||
$priceMax = null;
|
$priceMax = null;
|
||||||
|
|
||||||
if (preg_match('/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
if (preg_match($this->config->getPriceBetweenPattern(), $prompt, $matches) === 1) {
|
||||||
$a = $this->toFloat($m[1]);
|
$a = $this->toFloat($matches[1]);
|
||||||
$b = $this->toFloat($m[2]);
|
$b = $this->toFloat($matches[2]);
|
||||||
|
|
||||||
if ($a !== null && $b !== null) {
|
if ($a !== null && $b !== null) {
|
||||||
return [min($a, $b), max($a, $b)];
|
return [min($a, $b), max($a, $b)];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (preg_match('/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
if (preg_match($this->config->getPriceMaxPattern(), $prompt, $matches) === 1) {
|
||||||
$priceMax = $this->toFloat($m[1]);
|
$priceMax = $this->toFloat($matches[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (preg_match('/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
if (preg_match($this->config->getPriceMinPattern(), $prompt, $matches) === 1) {
|
||||||
$priceMin = $this->toFloat($m[1]);
|
$priceMin = $this->toFloat($matches[1]);
|
||||||
}
|
|
||||||
|
|
||||||
// NEW:
|
|
||||||
// Recognize comparative lower-bound phrasing such as:
|
|
||||||
// - mehr als 3000 euro
|
|
||||||
// - über 3000 euro
|
|
||||||
// - ueber 3000 euro
|
|
||||||
// - größer als 3000 euro
|
|
||||||
// - groesser als 3000 euro
|
|
||||||
if (preg_match('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
|
||||||
$priceMin = $this->toFloat($m[1]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return [$priceMin, $priceMax];
|
return [$priceMin, $priceMax];
|
||||||
@@ -172,8 +127,7 @@ final readonly class CommerceQueryParser
|
|||||||
{
|
{
|
||||||
$sizes = [];
|
$sizes = [];
|
||||||
|
|
||||||
$sizePattern = $this->intentConfig->getSizePattern();
|
if (preg_match_all($this->intentConfig->getSizeExtractionPattern(), $prompt, $matches) === false) {
|
||||||
if (preg_match_all('/\b(?:' . $sizePattern . ')\s*([a-z0-9.-]+)\b/u', $prompt, $matches) === false) {
|
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -181,8 +135,7 @@ final readonly class CommerceQueryParser
|
|||||||
$sizes[] = trim($size);
|
$sizes[] = trim($size);
|
||||||
}
|
}
|
||||||
|
|
||||||
$sizeTokenPattern = $this->intentConfig->getSizeTokenPattern();
|
if (preg_match_all($this->intentConfig->getSizeTokenValuePattern(), $prompt, $tokenMatches) !== false) {
|
||||||
if (preg_match_all('/\b(' . $sizeTokenPattern . ')\b/u', $prompt, $tokenMatches) !== false) {
|
|
||||||
foreach ($tokenMatches[1] as $sizeToken) {
|
foreach ($tokenMatches[1] as $sizeToken) {
|
||||||
$sizes[] = trim($sizeToken);
|
$sizes[] = trim($sizeToken);
|
||||||
}
|
}
|
||||||
@@ -207,6 +160,9 @@ final readonly class CommerceQueryParser
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $sizes
|
||||||
|
*/
|
||||||
private function buildSearchText(
|
private function buildSearchText(
|
||||||
string $prompt,
|
string $prompt,
|
||||||
array $sizes,
|
array $sizes,
|
||||||
@@ -219,7 +175,7 @@ final readonly class CommerceQueryParser
|
|||||||
return $this->buildDirectProductSearchText($prompt);
|
return $this->buildDirectProductSearchText($prompt);
|
||||||
}
|
}
|
||||||
|
|
||||||
$text = ' ' . $prompt . ' ';
|
$text = $this->wrapForPhraseReplacement($prompt);
|
||||||
|
|
||||||
foreach ($this->config->getPhrasesToRemove() as $phrase) {
|
foreach ($this->config->getPhrasesToRemove() as $phrase) {
|
||||||
$normalizedPhrase = $this->normalize((string) $phrase);
|
$normalizedPhrase = $this->normalize((string) $phrase);
|
||||||
@@ -228,7 +184,11 @@ final readonly class CommerceQueryParser
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$text = str_replace(' ' . $normalizedPhrase . ' ', ' ', $text);
|
$text = str_replace(
|
||||||
|
$this->wrapForPhraseReplacement($normalizedPhrase),
|
||||||
|
' ',
|
||||||
|
$text
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($sizes as $size) {
|
foreach ($sizes as $size) {
|
||||||
@@ -238,111 +198,69 @@ final readonly class CommerceQueryParser
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$text = preg_replace('/\b' . preg_quote($normalizedSize, '/') . '\b/u', ' ', $text) ?? $text;
|
$text = preg_replace(
|
||||||
|
$this->config->buildExactTokenRemovalPattern($normalizedSize),
|
||||||
|
' ',
|
||||||
|
$text
|
||||||
|
) ?? $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($brand !== null && $brand !== '' && !$this->isBrandPartOfModelPhrase($prompt, $brand)) {
|
if ($brand !== null && $brand !== '' && !$this->isBrandPartOfModelPhrase($prompt, $brand)) {
|
||||||
$text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text;
|
$text = preg_replace(
|
||||||
|
$this->config->buildExactTokenRemovalPattern($brand),
|
||||||
|
' ',
|
||||||
|
$text
|
||||||
|
) ?? $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($priceMin !== null || $priceMax !== null) {
|
if ($priceMin !== null || $priceMax !== null) {
|
||||||
$text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
foreach ($this->config->getPriceRemovalPatterns($this->intentConfig) as $pattern) {
|
||||||
$text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
$text = preg_replace($pattern, ' ', $text) ?? $text;
|
||||||
$text = preg_replace('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
|
}
|
||||||
$text = preg_replace('/\b' . $this->intentConfig->getPricePattern() . '\b/u', ' ', $text) ?? $text;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
|
||||||
$text = trim($text, " \t\n\r\0\x0B-.,");
|
$text = trim($text, $this->config->getSearchTextTrimCharacters());
|
||||||
|
|
||||||
$tokens = array_filter(
|
$tokens = array_filter(
|
||||||
explode(' ', $text),
|
explode(' ', $text),
|
||||||
static fn(string $token): bool => mb_strlen($token) > 1
|
fn(string $token): bool => mb_strlen($token) > $this->config->getMinSearchTokenLength()
|
||||||
);
|
);
|
||||||
|
|
||||||
$tokens = $this->filterSearchTokens($tokens);
|
$tokens = $this->filterSearchTokens($tokens);
|
||||||
$tokens = $this->stripReferenceOnlyTokens($tokens);
|
|
||||||
|
|
||||||
return trim(implode(' ', $tokens));
|
return trim(implode(' ', $tokens));
|
||||||
}
|
}
|
||||||
|
|
||||||
private function buildDirectProductSearchText(string $prompt): string
|
private function buildDirectProductSearchText(string $prompt): string
|
||||||
{
|
{
|
||||||
$text = preg_replace('/\s+/u', ' ', $prompt) ?? $prompt;
|
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $prompt) ?? $prompt;
|
||||||
$text = trim($text, " \t\n\r\0\x0B-.,");
|
$text = trim($text, $this->config->getSearchTextTrimCharacters());
|
||||||
|
|
||||||
$tokens = array_filter(
|
$tokens = array_filter(
|
||||||
explode(' ', $text),
|
explode(' ', $text),
|
||||||
static fn(string $token): bool => mb_strlen($token) > 0
|
fn(string $token): bool => mb_strlen($token) >= $this->config->getMinDirectProductTokenLength()
|
||||||
);
|
);
|
||||||
|
|
||||||
return trim(implode(' ', array_values(array_unique($tokens))));
|
$tokens = array_values(array_unique($tokens));
|
||||||
|
|
||||||
|
return trim(implode(' ', $tokens));
|
||||||
}
|
}
|
||||||
|
|
||||||
private function shouldUseHistoryContext(string $prompt, string $searchText): bool
|
private function shouldUseHistoryContext(string $prompt): bool
|
||||||
{
|
{
|
||||||
if ($this->isReferenceOnlyFollowUp($prompt)) {
|
return preg_match($this->config->getHistoryContextValuePattern(), $prompt) === 1;
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($this->isTooGenericSearchText($searchText)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return preg_match('/\b(' . $this->config->getHistoryContextPattern() . ')\b/u', $prompt) === 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function shouldUseReferenceContext(string $prompt, string $searchText): bool
|
|
||||||
{
|
|
||||||
if ($this->isReferenceOnlyFollowUp($prompt)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return $this->isTooGenericSearchText($searchText);
|
|
||||||
}
|
|
||||||
|
|
||||||
private function isReferenceOnlyFollowUp(string $prompt): bool
|
|
||||||
{
|
|
||||||
return preg_match('/\b(' . $this->config->getReferenceFollowUpPattern() . ')\b/u', $prompt) === 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function isTooGenericSearchText(string $searchText): bool
|
|
||||||
{
|
|
||||||
$tokens = array_values(array_filter(
|
|
||||||
preg_split('/\s+/u', $searchText, -1, PREG_SPLIT_NO_EMPTY) ?: [],
|
|
||||||
static fn(string $token): bool => $token !== ''
|
|
||||||
));
|
|
||||||
|
|
||||||
if ($tokens === []) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
$genericTokens = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
|
|
||||||
|
|
||||||
foreach ($tokens as $token) {
|
|
||||||
if (!isset($genericTokens[$token])) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function buildReferenceSearchText(CommerceReferenceContext $referenceContext): string
|
|
||||||
{
|
|
||||||
return $this->normalize($referenceContext->buildReferenceSearchText());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function extractLatestQuestionFromHistory(string $historyContext): string
|
private function extractLatestQuestionFromHistory(string $historyContext): string
|
||||||
{
|
{
|
||||||
$result = preg_match_all('/^Question:\s*(.+)$/m', $historyContext, $matches);
|
$result = preg_match_all($this->config->getHistoryQuestionPattern(), $historyContext, $matches);
|
||||||
|
|
||||||
if ($result === false) {
|
if ($result === false) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
$questions = $matches[1] ?? [];
|
$questions = $matches[1] ?? [];
|
||||||
|
|
||||||
if ($questions === []) {
|
if ($questions === []) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
@@ -352,11 +270,11 @@ final readonly class CommerceQueryParser
|
|||||||
return is_string($lastQuestion) ? trim($lastQuestion) : '';
|
return is_string($lastQuestion) ? trim($lastQuestion) : '';
|
||||||
}
|
}
|
||||||
|
|
||||||
private function mergeSearchTexts(string $left, string $right): string
|
private function mergeSearchTexts(string $historySearchText, string $currentSearchText): string
|
||||||
{
|
{
|
||||||
$tokens = [];
|
$tokens = [];
|
||||||
|
|
||||||
foreach ([$left, $right] as $text) {
|
foreach ([$historySearchText, $currentSearchText] as $text) {
|
||||||
if ($text === '') {
|
if ($text === '') {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -364,7 +282,7 @@ final readonly class CommerceQueryParser
|
|||||||
foreach (explode(' ', $text) as $token) {
|
foreach (explode(' ', $text) as $token) {
|
||||||
$token = trim($token);
|
$token = trim($token);
|
||||||
|
|
||||||
if ($token === '' || mb_strlen($token) <= 1) {
|
if ($token === '' || mb_strlen($token) <= $this->config->getMinSearchTokenLength()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -381,25 +299,11 @@ final readonly class CommerceQueryParser
|
|||||||
*/
|
*/
|
||||||
private function filterSearchTokens(array $tokens): array
|
private function filterSearchTokens(array $tokens): array
|
||||||
{
|
{
|
||||||
$stopWords = array_fill_keys($this->config->getFilterSearchTokensPattern(), true);
|
$stopWords = $this->config->getFilterSearchTokens();
|
||||||
|
|
||||||
return array_values(array_filter(
|
return array_values(array_filter(
|
||||||
$tokens,
|
$tokens,
|
||||||
static fn(string $token): bool => !isset($stopWords[$token])
|
static fn(string $token): bool => !in_array($token, $stopWords, true)
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param string[] $tokens
|
|
||||||
* @return string[]
|
|
||||||
*/
|
|
||||||
private function stripReferenceOnlyTokens(array $tokens): array
|
|
||||||
{
|
|
||||||
$referenceOnly = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
|
|
||||||
|
|
||||||
return array_values(array_filter(
|
|
||||||
$tokens,
|
|
||||||
static fn(string $token): bool => !isset($referenceOnly[$token])
|
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -417,25 +321,25 @@ final readonly class CommerceQueryParser
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
$tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: [];
|
$tokens = preg_split(
|
||||||
|
$this->config->getWhitespaceSplitPattern(),
|
||||||
|
$prompt,
|
||||||
|
-1,
|
||||||
|
PREG_SPLIT_NO_EMPTY
|
||||||
|
) ?: [];
|
||||||
|
|
||||||
return count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1;
|
return count($tokens) <= $this->config->getDirectProductMaxTokens()
|
||||||
|
&& preg_match($this->config->getDirectProductDigitPattern(), $prompt) === 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function containsModelLikePhrase(string $text): bool
|
private function containsModelLikePhrase(string $text): bool
|
||||||
{
|
{
|
||||||
return preg_match(
|
return preg_match($this->config->getModelLikePattern(), $text) === 1;
|
||||||
'/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u',
|
|
||||||
$text
|
|
||||||
) === 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function containsAccessoryLikePhrase(string $text): bool
|
private function containsAccessoryLikePhrase(string $text): bool
|
||||||
{
|
{
|
||||||
return preg_match(
|
return preg_match($this->config->getAccessoryLikePattern(), $text) === 1;
|
||||||
'/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u',
|
|
||||||
$text
|
|
||||||
) === 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function isBrandPartOfModelPhrase(string $prompt, string $brand): bool
|
private function isBrandPartOfModelPhrase(string $prompt, string $brand): bool
|
||||||
@@ -445,7 +349,7 @@ final readonly class CommerceQueryParser
|
|||||||
}
|
}
|
||||||
|
|
||||||
return preg_match(
|
return preg_match(
|
||||||
'/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u',
|
$this->config->buildBrandPartOfModelPattern($brand),
|
||||||
$prompt
|
$prompt
|
||||||
) === 1;
|
) === 1;
|
||||||
}
|
}
|
||||||
@@ -456,4 +360,42 @@ final readonly class CommerceQueryParser
|
|||||||
|
|
||||||
return is_numeric($value) ? (float) $value : null;
|
return is_numeric($value) ? (float) $value : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array{searchText:string, brand:?string}|null
|
||||||
|
*/
|
||||||
|
private function parseHistoryContext(string $historyContext): ?array
|
||||||
|
{
|
||||||
|
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
|
||||||
|
|
||||||
|
if ($latestHistoryQuestion === '') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion);
|
||||||
|
$isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt);
|
||||||
|
|
||||||
|
[$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt);
|
||||||
|
$historySizes = $this->extractSizes($normalizedHistoryPrompt);
|
||||||
|
$historyBrand = $this->extractBrand($normalizedHistoryPrompt);
|
||||||
|
|
||||||
|
$historySearchText = $this->buildSearchText(
|
||||||
|
prompt: $normalizedHistoryPrompt,
|
||||||
|
sizes: $historySizes,
|
||||||
|
brand: $historyBrand,
|
||||||
|
priceMin: $historyPriceMin,
|
||||||
|
priceMax: $historyPriceMax,
|
||||||
|
preserveDirectProductQuery: $isDirectHistoryProductQuery
|
||||||
|
);
|
||||||
|
|
||||||
|
return [
|
||||||
|
'searchText' => $historySearchText,
|
||||||
|
'brand' => $historyBrand,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
private function wrapForPhraseReplacement(string $text): string
|
||||||
|
{
|
||||||
|
return ' ' . $text . ' ';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -5,16 +5,15 @@ declare(strict_types=1);
|
|||||||
namespace App\Commerce;
|
namespace App\Commerce;
|
||||||
|
|
||||||
use App\Commerce\Dto\ShopProductResult;
|
use App\Commerce\Dto\ShopProductResult;
|
||||||
|
use App\Config\SearchRepairConfig;
|
||||||
use Psr\Log\LoggerInterface;
|
use Psr\Log\LoggerInterface;
|
||||||
|
|
||||||
final readonly class SearchRepairService
|
final readonly class SearchRepairService
|
||||||
{
|
{
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private ShopSearchService $shopSearchService,
|
private ShopSearchService $shopSearchService,
|
||||||
|
private SearchRepairConfig $config,
|
||||||
private LoggerInterface $logger,
|
private LoggerInterface $logger,
|
||||||
private bool $enabled = true,
|
|
||||||
private int $maxRepairQueries = 3,
|
|
||||||
private int $minPrimaryResultsWithoutRepair = 2,
|
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -37,22 +36,22 @@ final readonly class SearchRepairService
|
|||||||
array $primaryShopResults,
|
array $primaryShopResults,
|
||||||
array $knowledgeChunks
|
array $knowledgeChunks
|
||||||
): array {
|
): array {
|
||||||
if (!$this->enabled) {
|
if (!$this->config->isEnabled()) {
|
||||||
return [
|
return $this->buildRepairResult(
|
||||||
'results' => $primaryShopResults,
|
results: $primaryShopResults,
|
||||||
'attemptedRepair' => false,
|
attemptedRepair: false,
|
||||||
'usedRepair' => false,
|
usedRepair: false,
|
||||||
'repairQueries' => [],
|
repairQueries: []
|
||||||
];
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!$this->shouldAttemptRepair($prompt, $primaryQuery, $primaryShopResults)) {
|
if (!$this->shouldAttemptRepair($prompt, $primaryQuery, $primaryShopResults)) {
|
||||||
return [
|
return $this->buildRepairResult(
|
||||||
'results' => $primaryShopResults,
|
results: $primaryShopResults,
|
||||||
'attemptedRepair' => false,
|
attemptedRepair: false,
|
||||||
'usedRepair' => false,
|
usedRepair: false,
|
||||||
'repairQueries' => [],
|
repairQueries: []
|
||||||
];
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
$repairQueries = $this->buildRepairQueries(
|
$repairQueries = $this->buildRepairQueries(
|
||||||
@@ -63,12 +62,12 @@ final readonly class SearchRepairService
|
|||||||
);
|
);
|
||||||
|
|
||||||
if ($repairQueries === []) {
|
if ($repairQueries === []) {
|
||||||
return [
|
return $this->buildRepairResult(
|
||||||
'results' => $primaryShopResults,
|
results: $primaryShopResults,
|
||||||
'attemptedRepair' => false,
|
attemptedRepair: false,
|
||||||
'usedRepair' => false,
|
usedRepair: false,
|
||||||
'repairQueries' => [],
|
repairQueries: []
|
||||||
];
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->logger->info('Shop repair started', [
|
$this->logger->info('Shop repair started', [
|
||||||
@@ -99,12 +98,12 @@ final readonly class SearchRepairService
|
|||||||
'repairQueries' => $repairQueries,
|
'repairQueries' => $repairQueries,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
return [
|
return $this->buildRepairResult(
|
||||||
'results' => $primaryShopResults,
|
results: $primaryShopResults,
|
||||||
'attemptedRepair' => true,
|
attemptedRepair: true,
|
||||||
'usedRepair' => false,
|
usedRepair: false,
|
||||||
'repairQueries' => $repairQueries,
|
repairQueries: $repairQueries
|
||||||
];
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
$mergedResults = $this->rankMergedResults(
|
$mergedResults = $this->rankMergedResults(
|
||||||
@@ -129,16 +128,16 @@ final readonly class SearchRepairService
|
|||||||
'manufacturer' => $product->manufacturer,
|
'manufacturer' => $product->manufacturer,
|
||||||
'available' => $product->available,
|
'available' => $product->available,
|
||||||
],
|
],
|
||||||
array_slice($mergedResults, 0, 3)
|
array_slice($mergedResults, 0, $this->config->getTopProductLogLimit())
|
||||||
),
|
),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
return [
|
return $this->buildRepairResult(
|
||||||
'results' => $mergedResults,
|
results: $mergedResults,
|
||||||
'attemptedRepair' => true,
|
attemptedRepair: true,
|
||||||
'usedRepair' => true,
|
usedRepair: true,
|
||||||
'repairQueries' => $repairQueries,
|
repairQueries: $repairQueries
|
||||||
];
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -157,15 +156,11 @@ final readonly class SearchRepairService
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Always try repair for bundle/accessory prompts.
|
|
||||||
// These prompts often need a second pass even when the first search
|
|
||||||
// already returned some results, because the user is asking for a
|
|
||||||
// combination of main device + matching accessory.
|
|
||||||
if ($asksForBundle) {
|
if ($asksForBundle) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($primaryResultsCount >= $this->minPrimaryResultsWithoutRepair) {
|
if ($primaryResultsCount >= $this->config->getMinPrimaryResultsWithoutRepair()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -173,7 +168,7 @@ final readonly class SearchRepairService
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return $primaryResultsCount < $this->minPrimaryResultsWithoutRepair;
|
return $primaryResultsCount < $this->config->getMinPrimaryResultsWithoutRepair();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -230,7 +225,7 @@ final readonly class SearchRepairService
|
|||||||
fn(string $query): bool => $query !== '' && !$this->isTooCloseToPrimaryQuery($query, $primaryQuery)
|
fn(string $query): bool => $query !== '' && !$this->isTooCloseToPrimaryQuery($query, $primaryQuery)
|
||||||
));
|
));
|
||||||
|
|
||||||
return array_slice($queries, 0, max(1, $this->maxRepairQueries));
|
return array_slice($queries, 0, max(1, $this->config->getMaxRepairQueries()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -291,7 +286,7 @@ final readonly class SearchRepairService
|
|||||||
$candidates = [];
|
$candidates = [];
|
||||||
|
|
||||||
preg_match_all(
|
preg_match_all(
|
||||||
'/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u',
|
$this->config->getModelCandidatePattern(),
|
||||||
$text,
|
$text,
|
||||||
$matches
|
$matches
|
||||||
);
|
);
|
||||||
@@ -321,7 +316,7 @@ final readonly class SearchRepairService
|
|||||||
$candidates = [];
|
$candidates = [];
|
||||||
|
|
||||||
preg_match_all(
|
preg_match_all(
|
||||||
'/\b((?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu',
|
$this->config->getAccessoryCandidatePattern(),
|
||||||
$text,
|
$text,
|
||||||
$matches
|
$matches
|
||||||
);
|
);
|
||||||
@@ -368,15 +363,15 @@ final readonly class SearchRepairService
|
|||||||
{
|
{
|
||||||
$score = 0;
|
$score = 0;
|
||||||
|
|
||||||
if (preg_match('/\d/u', $candidate) === 1) {
|
if (preg_match($this->config->getContainsDigitPattern(), $candidate) === 1) {
|
||||||
$score += 4;
|
$score += $this->config->getCandidateDigitScore();
|
||||||
}
|
}
|
||||||
|
|
||||||
$wordCount = count($this->tokenize($candidate));
|
$wordCount = count($this->tokenize($candidate));
|
||||||
$score += min($wordCount, 4);
|
$score += min($wordCount, $this->config->getCandidateWordCountCap());
|
||||||
|
|
||||||
if (preg_match('/\b(?:indikator|indicator|testomat|tritromat|titromat|reagenz|reagent)\b/iu', $candidate) === 1) {
|
if (preg_match($this->config->getSpecificityBoostPattern(), $candidate) === 1) {
|
||||||
$score += 3;
|
$score += $this->config->getSpecificityBoostScore();
|
||||||
}
|
}
|
||||||
|
|
||||||
return $score;
|
return $score;
|
||||||
@@ -384,39 +379,19 @@ final readonly class SearchRepairService
|
|||||||
|
|
||||||
private function asksForBundleOrAccessory(string $prompt): bool
|
private function asksForBundleOrAccessory(string $prompt): bool
|
||||||
{
|
{
|
||||||
return preg_match(
|
return preg_match($this->config->getAccessoryOrBundlePattern(), $prompt) === 1;
|
||||||
'/\b(passend|passende|zubehor|zubehör|dazu|zusatz|erganzung|ergänzung|indikator|reagenz|kit|set|auch\s+das|mit\s+preis\s+und\s+allen\s+infos)\b/iu',
|
|
||||||
$prompt
|
|
||||||
) === 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function containsModelLikePhrase(string $text): bool
|
private function containsModelLikePhrase(string $text): bool
|
||||||
{
|
{
|
||||||
return preg_match(
|
return preg_match($this->config->getModelLikePattern(), $text) === 1;
|
||||||
'/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u',
|
|
||||||
$text
|
|
||||||
) === 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function looksTooGeneric(string $candidate): bool
|
private function looksTooGeneric(string $candidate): bool
|
||||||
{
|
{
|
||||||
$normalized = mb_strtolower($candidate);
|
$normalized = mb_strtolower($candidate);
|
||||||
|
|
||||||
foreach ([
|
foreach ($this->config->getGenericCandidateTokens() as $genericToken) {
|
||||||
'wasser',
|
|
||||||
'messgerät',
|
|
||||||
'messgeraet',
|
|
||||||
'produkt',
|
|
||||||
'geräte',
|
|
||||||
'geraete',
|
|
||||||
'gerät',
|
|
||||||
'geraet',
|
|
||||||
'resthärte',
|
|
||||||
'resthaerte',
|
|
||||||
'preis',
|
|
||||||
'infos',
|
|
||||||
'wissen',
|
|
||||||
] as $genericToken) {
|
|
||||||
if ($normalized === $genericToken) {
|
if ($normalized === $genericToken) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -428,8 +403,8 @@ final readonly class SearchRepairService
|
|||||||
private function sanitizeQuery(string $query): string
|
private function sanitizeQuery(string $query): string
|
||||||
{
|
{
|
||||||
$query = trim($query);
|
$query = trim($query);
|
||||||
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
|
$query = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $query) ?? $query;
|
||||||
$query = trim($query, " \t\n\r\0\x0B\"'`.,;:-");
|
$query = trim($query, $this->config->getSanitizeTrimCharacters());
|
||||||
|
|
||||||
return trim($query);
|
return trim($query);
|
||||||
}
|
}
|
||||||
@@ -446,7 +421,7 @@ final readonly class SearchRepairService
|
|||||||
$intersection = array_intersect($candidateTokens, $primaryTokens);
|
$intersection = array_intersect($candidateTokens, $primaryTokens);
|
||||||
$overlapRatio = count($intersection) / max(count($candidateTokens), count($primaryTokens));
|
$overlapRatio = count($intersection) / max(count($candidateTokens), count($primaryTokens));
|
||||||
|
|
||||||
return $overlapRatio >= 0.9;
|
return $overlapRatio >= $this->config->getPrimaryQueryOverlapThreshold();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -497,12 +472,12 @@ final readonly class SearchRepairService
|
|||||||
|
|
||||||
foreach ($allResults as $index => $product) {
|
foreach ($allResults as $index => $product) {
|
||||||
$score = 0;
|
$score = 0;
|
||||||
$score += $this->scoreProductAgainstText($product, $prompt) * 3;
|
$score += $this->scoreProductAgainstText($product, $prompt) * $this->config->getPromptMatchWeight();
|
||||||
$score += $this->scoreProductAgainstText($product, $primaryQuery) * 2;
|
$score += $this->scoreProductAgainstText($product, $primaryQuery) * $this->config->getPrimaryQueryMatchWeight();
|
||||||
$score += $this->scoreProductAgainstText($product, $repairSignal) * 4;
|
$score += $this->scoreProductAgainstText($product, $repairSignal) * $this->config->getRepairSignalMatchWeight();
|
||||||
|
|
||||||
if ($index < count($primaryResults)) {
|
if ($index < count($primaryResults)) {
|
||||||
$score += 1;
|
$score += $this->config->getPrimaryResultOrderBonus();
|
||||||
}
|
}
|
||||||
|
|
||||||
$decorated[] = [
|
$decorated[] = [
|
||||||
@@ -549,11 +524,11 @@ final readonly class SearchRepairService
|
|||||||
|
|
||||||
$score = 0;
|
$score = 0;
|
||||||
$intersection = array_intersect($queryTokens, $productTokens);
|
$intersection = array_intersect($queryTokens, $productTokens);
|
||||||
$score += count($intersection) * 2;
|
$score += count($intersection) * $this->config->getTokenIntersectionScore();
|
||||||
|
|
||||||
foreach ($this->extractNumberTokens($queryTokens) as $numberToken) {
|
foreach ($this->extractNumberTokens($queryTokens) as $numberToken) {
|
||||||
if (in_array($numberToken, $productTokens, true)) {
|
if (in_array($numberToken, $productTokens, true)) {
|
||||||
$score += 4;
|
$score += $this->config->getNumericTokenMatchScore();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -562,7 +537,7 @@ final readonly class SearchRepairService
|
|||||||
|
|
||||||
private function buildProductKey(ShopProductResult $product): string
|
private function buildProductKey(ShopProductResult $product): string
|
||||||
{
|
{
|
||||||
return mb_strtolower(trim(implode('|', [
|
return mb_strtolower(trim(implode($this->config->getProductKeySeparator(), [
|
||||||
$product->id,
|
$product->id,
|
||||||
$product->productNumber ?? '',
|
$product->productNumber ?? '',
|
||||||
$product->name,
|
$product->name,
|
||||||
@@ -576,8 +551,8 @@ final readonly class SearchRepairService
|
|||||||
private function tokenize(string $text): array
|
private function tokenize(string $text): array
|
||||||
{
|
{
|
||||||
$text = mb_strtolower($text);
|
$text = mb_strtolower($text);
|
||||||
$text = preg_replace('/[^\p{L}\p{N}\s\-]+/u', ' ', $text) ?? $text;
|
$text = preg_replace($this->config->getTokenizeCleanupPattern(), ' ', $text) ?? $text;
|
||||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
$text = preg_replace($this->config->getWhitespaceCollapsePattern(), ' ', $text) ?? $text;
|
||||||
$text = trim($text);
|
$text = trim($text);
|
||||||
|
|
||||||
if ($text === '') {
|
if ($text === '') {
|
||||||
@@ -595,7 +570,31 @@ final readonly class SearchRepairService
|
|||||||
{
|
{
|
||||||
return array_values(array_filter(
|
return array_values(array_filter(
|
||||||
$tokens,
|
$tokens,
|
||||||
static fn(string $token): bool => preg_match('/\d/u', $token) === 1
|
fn(string $token): bool => preg_match($this->config->getContainsDigitPattern(), $token) === 1
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param ShopProductResult[] $results
|
||||||
|
* @param string[] $repairQueries
|
||||||
|
* @return array{
|
||||||
|
* results: ShopProductResult[],
|
||||||
|
* attemptedRepair: bool,
|
||||||
|
* usedRepair: bool,
|
||||||
|
* repairQueries: string[]
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
private function buildRepairResult(
|
||||||
|
array $results,
|
||||||
|
bool $attemptedRepair,
|
||||||
|
bool $usedRepair,
|
||||||
|
array $repairQueries
|
||||||
|
): array {
|
||||||
|
return [
|
||||||
|
'results' => $results,
|
||||||
|
'attemptedRepair' => $attemptedRepair,
|
||||||
|
'usedRepair' => $usedRepair,
|
||||||
|
'repairQueries' => $repairQueries,
|
||||||
|
];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -6,50 +6,249 @@ namespace App\Config;
|
|||||||
|
|
||||||
final class AgentRunnerConfig
|
final class AgentRunnerConfig
|
||||||
{
|
{
|
||||||
|
public function getCommerceHistoryBudgetChars(): int
|
||||||
|
{
|
||||||
|
return 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getProductSearchKnowledgeChunkLimit(): int
|
||||||
|
{
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAdvisoryProductSearchKnowledgeChunkLimit(): int
|
||||||
|
{
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getOptimizedShopQueryPrefixPattern(): string
|
||||||
|
{
|
||||||
|
return '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getOptimizedShopQueryTrimCharacters(): string
|
||||||
|
{
|
||||||
|
return " \t\n\r\0\x0B\"'`";
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getEmptyPromptMessage(): string
|
||||||
|
{
|
||||||
|
return '❌ Empty prompt.';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAnalyzeRequestMessage(): string
|
||||||
|
{
|
||||||
|
return 'Ich analysiere deine Anfrage...';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getCheckInternetSourcesMessage(): string
|
||||||
|
{
|
||||||
|
return 'Ich prüfe auf Internetquellen...';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getRetrieveKnowledgeMessage(): string
|
||||||
|
{
|
||||||
|
return 'Ich hole relevante Daten aus meinem RAG-Wissen...';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getOptimizeSearchMessage(): string
|
||||||
|
{
|
||||||
|
return 'Ich optimiere die Recherche...';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getFetchSearchDataMessageTemplate(): string
|
||||||
|
{
|
||||||
|
return 'Ich rufe Recherchedaten ab (type: %s)';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAnalyzeAllInformationMessage(): string
|
||||||
|
{
|
||||||
|
return 'Ich analysiere alle Informationen...';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getThinkingWhileStreamingMessage(): string
|
||||||
|
{
|
||||||
|
return 'Denke nach...';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getNoLlmDataReceivedMessage(): string
|
||||||
|
{
|
||||||
|
return '❌ Es wurden keine Daten vom LLM empfangen.';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getGenericInternalErrorMessage(): string
|
||||||
|
{
|
||||||
|
return '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getDebugInternalErrorPrefix(): string
|
||||||
|
{
|
||||||
|
return '❌ Interner Fehler: ';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getExternalUrlSourceLabel(): string
|
||||||
|
{
|
||||||
|
return 'Externe URL';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getRagKnowledgeSourceLabel(): string
|
||||||
|
{
|
||||||
|
return 'RAG Wissen';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getConversationHistorySourceLabel(): string
|
||||||
|
{
|
||||||
|
return 'Chatverlauf';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopSystemSourceLabel(): string
|
||||||
|
{
|
||||||
|
return 'Shopsystem';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getExtendedShopSearchSourceLabel(): string
|
||||||
|
{
|
||||||
|
return 'Erweiterte Shopsuche';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getUsedSourcesPrefix(): string
|
||||||
|
{
|
||||||
|
return 'Genutzte Quellen: ';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSourcesPrefix(): string
|
||||||
|
{
|
||||||
|
return 'Quellen: ';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSourceBadgeHtmlTemplate(): string
|
||||||
|
{
|
||||||
|
return '<span class="badge bg-info text-black">%s</span>';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getErrorHtmlTemplate(): string
|
||||||
|
{
|
||||||
|
return '<span class="text-danger">%s</span>' . "\n<hr>\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getThinkHtmlTemplate(): string
|
||||||
|
{
|
||||||
|
return '<span class="text-info think">%s</span>' . "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getInfoHtmlTemplate(): string
|
||||||
|
{
|
||||||
|
return "\n\n" . '<span class="text-info fw-bolder">%s</span>' . "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getDebugHtmlTemplate(): string
|
||||||
|
{
|
||||||
|
return "\n\nDEBUG: <code>%s</code>\n";
|
||||||
|
}
|
||||||
|
|
||||||
public function getShopPrompt(string $prompt, string $commerceHistoryContext = ''): string
|
public function getShopPrompt(string $prompt, string $commerceHistoryContext = ''): string
|
||||||
{
|
{
|
||||||
$historyBlock = '';
|
$historyBlock = '';
|
||||||
|
|
||||||
if (trim($commerceHistoryContext) !== '') {
|
if (trim($commerceHistoryContext) !== '') {
|
||||||
$historyBlock = '
|
$historyBlock = $this->buildHistoryBlock($commerceHistoryContext);
|
||||||
RECENT CONVERSATION CONTEXT:
|
|
||||||
' . $commerceHistoryContext . '
|
|
||||||
|
|
||||||
Additional rules for conversation context:
|
|
||||||
- The current user input has highest priority.
|
|
||||||
- Use the recent conversation context only to resolve omitted references.
|
|
||||||
- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.
|
|
||||||
- Do not revive older products unless the current user input clearly refers to them.
|
|
||||||
- If the current input starts a new topic, ignore older product context.
|
|
||||||
- Prefer the most recent product reference over older ones.
|
|
||||||
';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return '
|
return $this->implodePromptBlocks([
|
||||||
Generate a short search query for Shopware 6 from the following user input text.
|
$this->getShopPromptIntro(),
|
||||||
|
$this->buildRulesBlock($this->getShopPromptRules()),
|
||||||
Rules:
|
$this->getShopPromptOutputFormatBlock(),
|
||||||
- Output only the final search query.
|
$historyBlock,
|
||||||
- Always convert relevant search terms to their singular form.
|
$this->getCurrentUserInputLabel() . ':',
|
||||||
- No introduction, no explanation, no quotation marks.
|
trim($prompt),
|
||||||
- Use only shop-relevant search terms from the user input for a shop search.
|
]);
|
||||||
- Maximum 6 search terms, preferably fewer.
|
}
|
||||||
- Remove filler words, polite phrases, and irrelevant words.
|
|
||||||
- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.
|
private function buildHistoryBlock(string $commerceHistoryContext): string
|
||||||
- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).
|
{
|
||||||
- Separate terms using spaces only.
|
return $this->implodePromptBlocks([
|
||||||
- If a relevant product name is present, it must be placed at the beginning of the final search query.
|
$this->getRecentConversationContextLabel() . ':',
|
||||||
- Try to always identify all products mentioned in the user input text, even in long prompts.
|
trim($commerceHistoryContext),
|
||||||
- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.
|
$this->buildRulesBlock($this->getConversationContextRules(), 'Additional rules for conversation context:'),
|
||||||
- If the current user input is vague or referential, use the recent conversation context only as support.
|
]);
|
||||||
- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".
|
}
|
||||||
|
|
||||||
Output format:
|
/**
|
||||||
Keyword1 Keyword2 Keyword3
|
* @return string[]
|
||||||
' . $historyBlock . '
|
*/
|
||||||
|
public function getShopPromptRules(): array
|
||||||
CURRENT USER INPUT:
|
{
|
||||||
' . $prompt . '
|
return [
|
||||||
';
|
'- Output only the final search query.',
|
||||||
|
'- Always convert relevant search terms to their singular form.',
|
||||||
|
'- No introduction, no explanation, no quotation marks.',
|
||||||
|
'- Use only shop-relevant search terms from the user input for a shop search.',
|
||||||
|
'- Maximum 6 search terms, preferably fewer.',
|
||||||
|
'- Remove filler words, polite phrases, and irrelevant words.',
|
||||||
|
'- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.',
|
||||||
|
'- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).',
|
||||||
|
'- Separate terms using spaces only.',
|
||||||
|
'- If a relevant product name is present, it must be placed at the beginning of the final search query.',
|
||||||
|
'- Try to always identify all products mentioned in the user input text, even in long prompts.',
|
||||||
|
'- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.',
|
||||||
|
'- If the current user input is vague or referential, use the recent conversation context only as support.',
|
||||||
|
'- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getConversationContextRules(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'- The current user input has highest priority.',
|
||||||
|
'- Use the recent conversation context only to resolve omitted references.',
|
||||||
|
'- Use it only for product carry-over, brand carry-over, model carry-over, or variant follow-ups.',
|
||||||
|
'- Do not revive older products unless the current user input clearly refers to them.',
|
||||||
|
'- If the current input starts a new topic, ignore older product context.',
|
||||||
|
'- Prefer the most recent product reference over older ones.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopPromptIntro(): string
|
||||||
|
{
|
||||||
|
return 'Generate a short search query for Shopware 6 from the following user input text.';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopPromptOutputFormatBlock(): string
|
||||||
|
{
|
||||||
|
return "Output format:\nKeyword1 Keyword2 Keyword3";
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getRecentConversationContextLabel(): string
|
||||||
|
{
|
||||||
|
return 'RECENT CONVERSATION CONTEXT';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getCurrentUserInputLabel(): string
|
||||||
|
{
|
||||||
|
return 'CURRENT USER INPUT';
|
||||||
|
}
|
||||||
|
|
||||||
|
private function buildRulesBlock(array $rules, string $headline = 'Rules:'): string
|
||||||
|
{
|
||||||
|
return $headline . "\n" . implode("\n", $rules);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $blocks
|
||||||
|
*/
|
||||||
|
private function implodePromptBlocks(array $blocks): string
|
||||||
|
{
|
||||||
|
$normalized = array_values(array_filter(
|
||||||
|
array_map(
|
||||||
|
static fn(string $block): string => trim($block),
|
||||||
|
$blocks
|
||||||
|
),
|
||||||
|
static fn(string $block): bool => $block !== ''
|
||||||
|
));
|
||||||
|
|
||||||
|
return implode("\n\n", $normalized);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -6,57 +6,38 @@ namespace App\Config;
|
|||||||
|
|
||||||
final class CommerceIntentConfig
|
final class CommerceIntentConfig
|
||||||
{
|
{
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
public function getStrongSignalsList(): array
|
public function getStrongSignalsList(): array
|
||||||
{
|
{
|
||||||
return [
|
return [
|
||||||
'shop',
|
'shop',
|
||||||
'alle',
|
'alle',
|
||||||
'preis',
|
'preis',
|
||||||
'preise',
|
|
||||||
'kunde',
|
'kunde',
|
||||||
'online',
|
'online',
|
||||||
'produkt',
|
'produkt',
|
||||||
'produkte',
|
|
||||||
'artikel',
|
'artikel',
|
||||||
'sku',
|
'sku',
|
||||||
'kaufen',
|
'kaufen',
|
||||||
'kostet',
|
'kostet',
|
||||||
'kosten',
|
|
||||||
'verfügbarkeit',
|
|
||||||
'verfuegbarkeit',
|
|
||||||
|
|
||||||
// Search / product discovery signals
|
|
||||||
'suche',
|
'suche',
|
||||||
'such',
|
'such',
|
||||||
'finde',
|
'finde',
|
||||||
'finden',
|
'finden',
|
||||||
'welche',
|
|
||||||
'welcher',
|
|
||||||
'welches',
|
|
||||||
|
|
||||||
// Device / system signals
|
|
||||||
'analysegerät',
|
'analysegerät',
|
||||||
'analysegeraet',
|
'analysegeraet',
|
||||||
'analysegeräte',
|
|
||||||
'analysegeraete',
|
|
||||||
'messgerät',
|
'messgerät',
|
||||||
'messgeraet',
|
'messgeraet',
|
||||||
'messgeräte',
|
|
||||||
'messgeraete',
|
|
||||||
'gerät',
|
|
||||||
'geraet',
|
|
||||||
'geräte',
|
|
||||||
'geraete',
|
|
||||||
'analysator',
|
'analysator',
|
||||||
'analysatoren',
|
|
||||||
'analyzer',
|
'analyzer',
|
||||||
'system',
|
|
||||||
'systeme',
|
|
||||||
'anlage',
|
|
||||||
'anlagen',
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
public function getAdvisorySignals(): array
|
public function getAdvisorySignals(): array
|
||||||
{
|
{
|
||||||
return [
|
return [
|
||||||
@@ -67,30 +48,36 @@ final class CommerceIntentConfig
|
|||||||
'geeignet',
|
'geeignet',
|
||||||
'empfiehl',
|
'empfiehl',
|
||||||
'empfehl',
|
'empfehl',
|
||||||
'vergleich',
|
|
||||||
'vergleichen',
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getPricePattern(): string
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getPriceTerms(): array
|
||||||
{
|
{
|
||||||
$pattern = [
|
return [
|
||||||
'euro',
|
'euro',
|
||||||
'€',
|
'€',
|
||||||
'eur',
|
'eur',
|
||||||
'teuer',
|
'teuer',
|
||||||
'preis',
|
'preis',
|
||||||
'preise',
|
|
||||||
'kosten',
|
'kosten',
|
||||||
'kostet',
|
'kostet',
|
||||||
];
|
];
|
||||||
|
|
||||||
return implode('|', $pattern);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getColorPattern(): string
|
public function getPricePattern(): string
|
||||||
{
|
{
|
||||||
$pattern = [
|
return implode('|', $this->getPriceTerms());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getColorTerms(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
'schwarz',
|
'schwarz',
|
||||||
'weiß',
|
'weiß',
|
||||||
'weis',
|
'weis',
|
||||||
@@ -103,13 +90,19 @@ final class CommerceIntentConfig
|
|||||||
'orange',
|
'orange',
|
||||||
'braun',
|
'braun',
|
||||||
];
|
];
|
||||||
|
|
||||||
return implode('|', $pattern);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getSizeTokenPattern(): string
|
public function getColorPattern(): string
|
||||||
{
|
{
|
||||||
$pattern = [
|
return implode('|', $this->getColorTerms());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getSizeTokenTerms(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
'xs',
|
'xs',
|
||||||
's',
|
's',
|
||||||
'm',
|
'm',
|
||||||
@@ -118,18 +111,189 @@ final class CommerceIntentConfig
|
|||||||
'xxl',
|
'xxl',
|
||||||
'xxxxl',
|
'xxxxl',
|
||||||
];
|
];
|
||||||
|
|
||||||
return implode('|', $pattern);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getSizePattern(): string
|
public function getSizeTokenPattern(): string
|
||||||
{
|
{
|
||||||
$pattern = [
|
return implode('|', $this->getSizeTokenTerms());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getSizeTerms(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
'größe',
|
'größe',
|
||||||
'groesse',
|
'groesse',
|
||||||
'grösse',
|
'grösse',
|
||||||
];
|
];
|
||||||
|
}
|
||||||
|
|
||||||
return implode('|', $pattern);
|
public function getSizePattern(): string
|
||||||
|
{
|
||||||
|
return implode('|', $this->getSizeTerms());
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSizeExtractionPattern(): string
|
||||||
|
{
|
||||||
|
return '/\b(?:' . $this->getSizePattern() . ')\s*([a-z0-9.-]+)\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getSupportDiagnosticPatterns(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'/\bfehler\b/u',
|
||||||
|
'/\bfehlercode\b/u',
|
||||||
|
'/\berror\b/u',
|
||||||
|
'/\bstörung\b/u',
|
||||||
|
'/\bstoerung\b/u',
|
||||||
|
'/\balarm\b/u',
|
||||||
|
'/\bstörungsmeldung\b/u',
|
||||||
|
'/\bstoerungsmeldung\b/u',
|
||||||
|
'/\bmeldung\b/u',
|
||||||
|
'/\bwarnung\b/u',
|
||||||
|
'/\bwarncode\b/u',
|
||||||
|
'/\bcode\b/u',
|
||||||
|
'/\bwas bedeutet\b/u',
|
||||||
|
'/\bwarum\b/u',
|
||||||
|
'/\bblinkt\b/u',
|
||||||
|
'/\bzeigt\b/u',
|
||||||
|
'/\bzeigt an\b/u',
|
||||||
|
'/\bursache\b/u',
|
||||||
|
'/\bdiagnose\b/u',
|
||||||
|
'/\bservicefall\b/u',
|
||||||
|
'/\bproblem\b/u',
|
||||||
|
'/\bstörung beheben\b/u',
|
||||||
|
'/\bstoerung beheben\b/u',
|
||||||
|
'/\be\d{1,3}\b/u',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getExplicitCommerceIntentPatterns(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'/\bshop\b/u',
|
||||||
|
'/\bpreis\b/u',
|
||||||
|
'/\bkosten\b/u',
|
||||||
|
'/\bkostet\b/u',
|
||||||
|
'/\bkaufen\b/u',
|
||||||
|
'/\bbestellen\b/u',
|
||||||
|
'/\bprodukt\b/u',
|
||||||
|
'/\bartikel\b/u',
|
||||||
|
'/\bsku\b/u',
|
||||||
|
'/\bonline\b/u',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSkuLikePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b\d{4,10}\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPriceValuePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b\d+(?:[.,]\d+)?\s*(?:' . $this->getPricePattern() . ')\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSizeValuePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b(?:' . $this->getSizePattern() . ')\s*[a-z0-9.-]+\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSizeTokenValuePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b(?:' . $this->getSizeTokenPattern() . ')\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getColorValuePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b(?:' . $this->getColorPattern() . ')\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSupportOrDiagnosticSignalLabel(): string
|
||||||
|
{
|
||||||
|
return 'support_or_diagnostic';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSkuSignalLabel(): string
|
||||||
|
{
|
||||||
|
return 'sku';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPriceSignalLabel(): string
|
||||||
|
{
|
||||||
|
return 'price';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSizeSignalLabel(): string
|
||||||
|
{
|
||||||
|
return 'size';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSizeTokenSignalLabel(): string
|
||||||
|
{
|
||||||
|
return 'size_token';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getColorSignalLabel(): string
|
||||||
|
{
|
||||||
|
return 'color';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAdvisorySignalPrefix(): string
|
||||||
|
{
|
||||||
|
return 'advisory:';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getProductSearchMinScore(): int
|
||||||
|
{
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAdvisoryProductSearchMinScore(): int
|
||||||
|
{
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getStrongSignalScore(): int
|
||||||
|
{
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSkuSignalScore(): int
|
||||||
|
{
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPriceSignalScore(): int
|
||||||
|
{
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSizeSignalScore(): int
|
||||||
|
{
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSizeTokenSignalScore(): int
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getColorSignalScore(): int
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAdvisorySignalScore(): int
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -4,28 +4,18 @@ declare(strict_types=1);
|
|||||||
|
|
||||||
namespace App\Config;
|
namespace App\Config;
|
||||||
|
|
||||||
final readonly class CommerceQueryParserConfig
|
final class CommerceQueryParserConfig
|
||||||
{
|
{
|
||||||
/**
|
|
||||||
* @param string[] $knownBrands
|
|
||||||
* @param string[] $phrasesToRemove
|
|
||||||
* @param string[] $filterSearchTokensPattern
|
|
||||||
* @param string[] $referenceOnlyTokens
|
|
||||||
*/
|
|
||||||
public function __construct(
|
|
||||||
private array $knownBrands = [],
|
|
||||||
private array $phrasesToRemove = [],
|
|
||||||
private array $filterSearchTokensPattern = [],
|
|
||||||
private array $referenceOnlyTokens = [],
|
|
||||||
) {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return string[]
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
public function getKnownBrands(): array
|
public function getKnownBrands(): array
|
||||||
{
|
{
|
||||||
return $this->knownBrands;
|
return [
|
||||||
|
'heyl',
|
||||||
|
'horiba',
|
||||||
|
'neomeris',
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -33,62 +23,175 @@ final readonly class CommerceQueryParserConfig
|
|||||||
*/
|
*/
|
||||||
public function getPhrasesToRemove(): array
|
public function getPhrasesToRemove(): array
|
||||||
{
|
{
|
||||||
return $this->phrasesToRemove;
|
return [
|
||||||
|
'ich suche',
|
||||||
|
'suche',
|
||||||
|
'habt ihr',
|
||||||
|
'gibt es',
|
||||||
|
'zeige mir',
|
||||||
|
'welches gerät',
|
||||||
|
'welche gerät',
|
||||||
|
'welches modell',
|
||||||
|
'welches ist besser',
|
||||||
|
'welches ist am besten',
|
||||||
|
'alternative',
|
||||||
|
'alternativen',
|
||||||
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getHistoryContextPattern(): string
|
public function getHistoryContextPattern(): string
|
||||||
{
|
{
|
||||||
return 'auch|noch|nochmal|dazu|wie oben|wie zuvor|ähnlich|aehnlich|stattdessen|alternative|alternativ|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|und der preis|kosten|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
|
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getReferenceFollowUpPattern(): string
|
public function getHistoryContextValuePattern(): string
|
||||||
{
|
{
|
||||||
return 'preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
|
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return string[]
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
|
public function getFilterSearchTokens(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'auch',
|
||||||
|
'noch',
|
||||||
|
'nochmal',
|
||||||
|
'zusätzlich',
|
||||||
|
'dazu',
|
||||||
|
'davon',
|
||||||
|
'stattdessen',
|
||||||
|
'bitte',
|
||||||
|
'gern',
|
||||||
|
'gerne',
|
||||||
|
'zeige',
|
||||||
|
'zeig',
|
||||||
|
'such',
|
||||||
|
'suche',
|
||||||
|
'finde',
|
||||||
|
'find',
|
||||||
|
'mir',
|
||||||
|
'mal',
|
||||||
|
'von',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Backward-compatible alias for older callers.
|
||||||
|
*
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
public function getFilterSearchTokensPattern(): array
|
public function getFilterSearchTokensPattern(): array
|
||||||
{
|
{
|
||||||
return $this->filterSearchTokensPattern;
|
return $this->getFilterSearchTokens();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return string[]
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
public function getReferenceOnlyTokens(): array
|
public function getNormalizationSearch(): array
|
||||||
{
|
{
|
||||||
if ($this->referenceOnlyTokens !== []) {
|
return ['€'];
|
||||||
return $this->referenceOnlyTokens;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getNormalizationReplace(): array
|
||||||
|
{
|
||||||
|
return [' euro '];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPromptSanitizePattern(): string
|
||||||
|
{
|
||||||
|
return '/[^\p{L}\p{N}\s.,\-]/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getWhitespaceCollapsePattern(): string
|
||||||
|
{
|
||||||
|
return '/\s+/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getWhitespaceSplitPattern(): string
|
||||||
|
{
|
||||||
|
return '/\s+/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSearchTextTrimCharacters(): string
|
||||||
|
{
|
||||||
|
return " \t\n\r\0\x0B-.,";
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getMinSearchTokenLength(): int
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getMinDirectProductTokenLength(): int
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getHistoryQuestionPattern(): string
|
||||||
|
{
|
||||||
|
return '/^Question:\s*(.+)$/m';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPriceBetweenPattern(): string
|
||||||
|
{
|
||||||
|
return '/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPriceMaxPattern(): string
|
||||||
|
{
|
||||||
|
return '/\b(?:unter|bis|max(?:imal)?)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPriceMinPattern(): string
|
||||||
|
{
|
||||||
|
return '/\b(?:ab|mindestens|min)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getPriceRemovalPatterns(CommerceIntentConfig $intentConfig): array
|
||||||
|
{
|
||||||
return [
|
return [
|
||||||
'preis',
|
'/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u',
|
||||||
'preise',
|
'/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u',
|
||||||
'kosten',
|
'/\b(?:' . $intentConfig->getPricePattern() . ')\b/u',
|
||||||
'kostet',
|
|
||||||
'gerät',
|
|
||||||
'geraet',
|
|
||||||
'modell',
|
|
||||||
'produkt',
|
|
||||||
'artikel',
|
|
||||||
'dafür',
|
|
||||||
'dafuer',
|
|
||||||
'dazu',
|
|
||||||
'davon',
|
|
||||||
'verfügbarkeit',
|
|
||||||
'verfuegbarkeit',
|
|
||||||
'shop',
|
|
||||||
'link',
|
|
||||||
'zum',
|
|
||||||
'zur',
|
|
||||||
'das',
|
|
||||||
'dieses',
|
|
||||||
'den',
|
|
||||||
'dem',
|
|
||||||
'bitte',
|
|
||||||
'und',
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function getDirectProductDigitPattern(): string
|
||||||
|
{
|
||||||
|
return '/\d/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getDirectProductMaxTokens(): int
|
||||||
|
{
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getModelLikePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAccessoryLikePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function buildExactTokenRemovalPattern(string $token): string
|
||||||
|
{
|
||||||
|
return '/\b' . preg_quote($token, '/') . '\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function buildBrandPartOfModelPattern(string $brand): string
|
||||||
|
{
|
||||||
|
return '/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -34,7 +34,7 @@ final class NdjsonHybridRetrieverConfig
|
|||||||
* - the system now has more safeguards:
|
* - the system now has more safeguards:
|
||||||
* lexical cross-signals, scoped retrieval, title/meta boost, selection rules
|
* lexical cross-signals, scoped retrieval, title/meta boost, selection rules
|
||||||
*/
|
*/
|
||||||
public const VECTOR_SCORE_THRESHOLD = 0.82;
|
public const VECTOR_SCORE_THRESHOLD = 0.83;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lower safety boundary for dynamic threshold adjustments.
|
* Lower safety boundary for dynamic threshold adjustments.
|
||||||
|
|||||||
@@ -1,97 +1,459 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
namespace App\Config;
|
namespace App\Config;
|
||||||
|
|
||||||
class PromptBuilderConfig{
|
final class PromptBuilderConfig
|
||||||
/**
|
{
|
||||||
* Approximate character-to-token ratio for conservative prompt budgeting.
|
public function getCharsPerToken(): int
|
||||||
*/
|
{
|
||||||
public const CHARS_PER_TOKEN = 4;
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getHistoryPaddingChars(): int
|
||||||
|
{
|
||||||
|
return 400;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getOutputReserveRatio(): float
|
||||||
|
{
|
||||||
|
return 0.25;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getOutputReserveMinTokens(): int
|
||||||
|
{
|
||||||
|
return 768;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getOutputReserveMaxTokens(): int
|
||||||
|
{
|
||||||
|
return 6000;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSafetyReserveRatio(): float
|
||||||
|
{
|
||||||
|
return 0.05;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSafetyReserveMinTokens(): int
|
||||||
|
{
|
||||||
|
return 256;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSafetyReserveMaxTokens(): int
|
||||||
|
{
|
||||||
|
return 1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getMinPromptBudgetTokens(): int
|
||||||
|
{
|
||||||
|
return 1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getMaxShopResultsInPrompt(): int
|
||||||
|
{
|
||||||
|
return 24;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getDetailedShopResultsMaxCount(): int
|
||||||
|
{
|
||||||
|
return 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getTechnicalProductKeywordMatchThreshold(): int
|
||||||
|
{
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSystemSectionLabel(): string
|
||||||
|
{
|
||||||
|
return 'SYSTEM';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getUserQuestionSectionLabel(): string
|
||||||
|
{
|
||||||
|
return 'USER QUESTION';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getConversationContextSectionLabel(): string
|
||||||
|
{
|
||||||
|
return 'CONVERSATION CONTEXT (contextual only)';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Keep a small gap so history does not consume the last available prompt space.
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
public const HISTORY_PADDING_CHARS = 400;
|
public function getConversationContextIntroLines(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'The following messages are previous turns of this conversation.',
|
||||||
|
'Use them to resolve references, follow-up questions, and user intent.',
|
||||||
|
'They must not override retrieved factual knowledge or live shop data.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopSearchQuerySectionLabel(): string
|
||||||
|
{
|
||||||
|
return 'SHOP SEARCH QUERY';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopSearchQuerySourceLine(): string
|
||||||
|
{
|
||||||
|
return 'Source: Shop Search';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reserve some space for the model output.
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
public const OUTPUT_RESERVE_RATIO = 0.25;
|
public function getLiveShopResultsHeaderLines(): array
|
||||||
public const OUTPUT_RESERVE_MIN_TOKENS = 768;
|
{
|
||||||
public const OUTPUT_RESERVE_MAX_TOKENS = 6000;
|
return [
|
||||||
|
'LIVE SHOP RESULTS (authoritative for current commercial details):',
|
||||||
|
'Use these results as the primary source for current price, availability, URL, and current shop-visible product naming.',
|
||||||
|
'If retrieved documents conflict with shop data on price, availability, URL, or current naming, prefer the shop data.',
|
||||||
|
'Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.',
|
||||||
|
'Do not infer undocumented technical specifications from shop data.',
|
||||||
|
'Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.',
|
||||||
|
'Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getLiveShopResultsOverflowNoticeTemplate(): string
|
||||||
|
{
|
||||||
|
return 'Only the top %d ranked shop results are shown here out of %d total results.';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getOutputPrioritySectionLabel(): string
|
||||||
|
{
|
||||||
|
return 'OUTPUT PRIORITY';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reserve a small safety buffer to avoid hitting the context limit too tightly.
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
public const SAFETY_RESERVE_RATIO = 0.05;
|
public function getOutputPriorityRules(): array
|
||||||
public const SAFETY_RESERVE_MIN_TOKENS = 256;
|
{
|
||||||
public const SAFETY_RESERVE_MAX_TOKENS = 1024;
|
return [
|
||||||
|
'- Use retrieved knowledge first to determine the technically matching product or answer.',
|
||||||
|
'- If shop results are present, use them afterwards to add current price, availability, and the actual URL.',
|
||||||
|
'- Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getResponseFormatSectionLabel(): string
|
||||||
|
{
|
||||||
|
return 'RESPONSE FORMAT RULES';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensure the prompt budget never collapses completely on smaller models.
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
public const MIN_PROMPT_BUDGET_TOKENS = 1024;
|
public function getResponseFormatBaseRules(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'- Keep normal spacing between all words. Never fuse words together.',
|
||||||
|
'- Use short, clean paragraphs or short labeled sections.',
|
||||||
|
'- Do not use persuasive or promotional wording.',
|
||||||
|
'- Do not repeat the same fact in slightly different wording.',
|
||||||
|
'- Never mention brands, manufacturers, model names, or product families that do not appear in the provided shop results, retrieved knowledge, URL content, or conversation context.',
|
||||||
|
'- If no suitable product is explicitly grounded in the provided sources, say that plainly instead of inventing alternatives.',
|
||||||
|
'- Do not generate external alternative lists, vendor suggestions, or purchase recommendations unless they are explicitly present in the provided sources.',
|
||||||
|
'- Do not combine technical identity from one source with commercial fields from a different product.',
|
||||||
|
'- Product number, price, availability, and URL must belong to the same explicitly grounded product.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Limit how many ranked shop results are passed into the final prompt.
|
* @return string[]
|
||||||
* The shop search may return many candidates, but the LLM should only see
|
|
||||||
* the most relevant top subset after local reranking.
|
|
||||||
*/
|
*/
|
||||||
public const MAX_SHOP_RESULTS_IN_PROMPT = 24;
|
public function getResponseFormatWithShopRules(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts.',
|
||||||
|
'- Keep price, availability, and URL on separate lines when they are present.',
|
||||||
|
'- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.',
|
||||||
|
'- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.',
|
||||||
|
'- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Technical product prompts should be answered like documentation,
|
* @return string[]
|
||||||
* not like sales copy.
|
|
||||||
*/
|
*/
|
||||||
public const TECHNICAL_PRODUCT_KEYWORDS = [
|
public function getResponseFormatWithoutShopRules(): array
|
||||||
'technisch',
|
{
|
||||||
'technical',
|
return [
|
||||||
'produkt',
|
'- If no shop results are present, do not compensate by inventing external products or external manufacturers.',
|
||||||
'product',
|
];
|
||||||
'gerät',
|
}
|
||||||
'device',
|
|
||||||
'modell',
|
|
||||||
'model',
|
|
||||||
'messprinzip',
|
|
||||||
'measurement principle',
|
|
||||||
'schnittstelle',
|
|
||||||
'interface',
|
|
||||||
'relais',
|
|
||||||
'relay',
|
|
||||||
'indikator',
|
|
||||||
'indicator',
|
|
||||||
'spannung',
|
|
||||||
'voltage',
|
|
||||||
'strom',
|
|
||||||
'current',
|
|
||||||
'druck',
|
|
||||||
'pressure',
|
|
||||||
'temperatur',
|
|
||||||
'temperature',
|
|
||||||
'schutzart',
|
|
||||||
'ip',
|
|
||||||
'fehlercode',
|
|
||||||
'error code',
|
|
||||||
'wasserhärte',
|
|
||||||
'hardness',
|
|
||||||
'testomat',
|
|
||||||
'chlor',
|
|
||||||
'chlormessung',
|
|
||||||
];
|
|
||||||
|
|
||||||
public const ACCESSORY_REQUEST_KEYWORDS = [
|
/**
|
||||||
'passend',
|
* @return string[]
|
||||||
'passende',
|
*/
|
||||||
'passendes',
|
public function getResponseFormatTechnicalRules(): array
|
||||||
'zubehör',
|
{
|
||||||
'zubehor',
|
return [
|
||||||
'dazu',
|
'- Write like technical documentation: precise, neutral, and source-close.',
|
||||||
'indikator',
|
'- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.',
|
||||||
'reagenz',
|
'- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.',
|
||||||
'kit',
|
];
|
||||||
'set',
|
}
|
||||||
'zusatz',
|
|
||||||
'ergänzung',
|
/**
|
||||||
'ergaenzung',
|
* @return string[]
|
||||||
];
|
*/
|
||||||
}
|
public function getResponseFormatAccessoryRules(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.',
|
||||||
|
'- The main device must come first. The accessory must not replace the main device.',
|
||||||
|
'- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.',
|
||||||
|
'- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getLanguageRulesSectionLabel(): string
|
||||||
|
{
|
||||||
|
return 'LANGUAGE RULES';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getLanguageRules(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'- Answer only in the same language as the user question.',
|
||||||
|
'- All headings, labels, notes, and structural elements must be in the same language as the user question.',
|
||||||
|
'- Do not switch languages unless the user does.',
|
||||||
|
'- If headings are used, write them in the user\'s language.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getFactGroundingRulesSectionLabel(): string
|
||||||
|
{
|
||||||
|
return 'FACT GROUNDING RULES';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getFactGroundingBaseRules(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'- State only facts that are explicitly present in the provided sources.',
|
||||||
|
'- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.',
|
||||||
|
'- Do not invent missing values.',
|
||||||
|
'- Do not replace missing values with estimates, defaults, or typical industry assumptions.',
|
||||||
|
'- Do not claim that information is missing if it appears in the provided sources.',
|
||||||
|
'- Do not compare with other products unless those products are also present in the provided sources.',
|
||||||
|
'- Prefer source-faithful wording over persuasive wording.',
|
||||||
|
'- Avoid marketing language such as \'ideal\', \'perfect\', \'unverzichtbar\', \'entscheidend\', \'optimal\', \'kosteneffizient\', \'prozesssicher\', or \'state-of-the-art\'.',
|
||||||
|
'- Clearly separate explicit facts from inferences.',
|
||||||
|
'- If a conclusion goes beyond the source wording, label it exactly as \'Inference:\'.',
|
||||||
|
'- If a sentence cannot be traced to the provided sources, do not write it.',
|
||||||
|
'- Never mention external manufacturers, external brands, or external products unless they are explicitly present in the provided sources.',
|
||||||
|
'- If the sources do not identify a suitable product, do not invent one.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getFactGroundingWithShopRules(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'- Use shop data as highest priority only for current commercial fields: price, availability, URL, and current shop-visible naming.',
|
||||||
|
'- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.',
|
||||||
|
'- When shop results are present and relevant, include current price and the actual URL if available.',
|
||||||
|
'- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.',
|
||||||
|
'- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.',
|
||||||
|
'- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.',
|
||||||
|
'- Do not assign the product number, price, URL, or availability of a reagent, accessory, kit, set, consumable, or service item to a device identified in retrieved knowledge.',
|
||||||
|
'- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.',
|
||||||
|
'- If the shop match is ambiguous, keep the technical identification and commercial details separate.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getFactGroundingWithoutShopRules(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'- Use retrieved knowledge as authoritative for factual answers.',
|
||||||
|
'- If no shop results are present, do not compensate with external recommendations or external product suggestions.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getFactGroundingTechnicalRules(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'- For technical product questions, answer primarily with explicitly stated facts.',
|
||||||
|
'- Behave like a technical documentation assistant, not like a sales advisor.',
|
||||||
|
'- Keep interpretations minimal and do not generalize application areas beyond the provided sources.',
|
||||||
|
'- Do not describe benefits, consequences, risks, or operational outcomes unless they are explicitly stated in the sources.',
|
||||||
|
'- Do not translate technical facts into business value unless the source explicitly does so.',
|
||||||
|
'- Do not recommend process changes unless explicitly present in the source.',
|
||||||
|
'- Do not use persuasive summaries or advisory conclusions.',
|
||||||
|
'- If the retrieved knowledge describes one specific named product, stay within that product and do not merge related product families or variants.',
|
||||||
|
'- Use neutral engineering language.',
|
||||||
|
'- Do not name specific chemicals, indicator substances, standards, or mechanisms unless explicitly stated in the source.',
|
||||||
|
'- If the source states signal logic such as green/red, output that signal logic only and do not expand it into operational recommendations or alarm semantics unless explicitly stated.',
|
||||||
|
'- If the source lists application areas, repeat only those areas and do not broaden them.',
|
||||||
|
'- If the source names an indicator and threshold, reproduce that exactly without extrapolation.',
|
||||||
|
'- If the source states only a threshold function, do not expand it into broader control logic.',
|
||||||
|
'- If a detail is not explicitly stated in the provided sources, say so plainly.',
|
||||||
|
'- Prefer short, source-close sentences over explanatory expansion.',
|
||||||
|
'- If the sources only support that a product family is not suitable, output only that unsuitability and stop there.',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getRetrievedKnowledgeSectionLabel(): string
|
||||||
|
{
|
||||||
|
return 'RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation)';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getRetrievedKnowledgeSourceLine(): string
|
||||||
|
{
|
||||||
|
return 'Source: Documents';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getUrlContentSectionLabel(): string
|
||||||
|
{
|
||||||
|
return 'CONTENT FROM URL (authoritative if user-provided)';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getUrlContentSourceLine(): string
|
||||||
|
{
|
||||||
|
return 'Source: URL';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopProductNumberLabel(): string
|
||||||
|
{
|
||||||
|
return 'Product number';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopManufacturerLabel(): string
|
||||||
|
{
|
||||||
|
return 'Manufacturer';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopPriceLabel(): string
|
||||||
|
{
|
||||||
|
return 'Price';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopAvailabilityLabel(): string
|
||||||
|
{
|
||||||
|
return 'Available';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopAvailabilityYesLabel(): string
|
||||||
|
{
|
||||||
|
return 'yes';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopAvailabilityNoLabel(): string
|
||||||
|
{
|
||||||
|
return 'no';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopHighlightPrefix(): string
|
||||||
|
{
|
||||||
|
return '- ';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopUrlLabel(): string
|
||||||
|
{
|
||||||
|
return 'URL';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopProductImageLabel(): string
|
||||||
|
{
|
||||||
|
return 'Product image';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopDescriptionLabel(): string
|
||||||
|
{
|
||||||
|
return 'Description';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getShopMetaInformationLabel(): string
|
||||||
|
{
|
||||||
|
return 'Meta information';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getTechnicalProductKeywords(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'technisch',
|
||||||
|
'technical',
|
||||||
|
'produkt',
|
||||||
|
'product',
|
||||||
|
'gerät',
|
||||||
|
'device',
|
||||||
|
'modell',
|
||||||
|
'model',
|
||||||
|
'messprinzip',
|
||||||
|
'measurement principle',
|
||||||
|
'schnittstelle',
|
||||||
|
'interface',
|
||||||
|
'relais',
|
||||||
|
'relay',
|
||||||
|
'indikator',
|
||||||
|
'indicator',
|
||||||
|
'spannung',
|
||||||
|
'voltage',
|
||||||
|
'strom',
|
||||||
|
'current',
|
||||||
|
'druck',
|
||||||
|
'pressure',
|
||||||
|
'temperatur',
|
||||||
|
'temperature',
|
||||||
|
'schutzart',
|
||||||
|
'ip',
|
||||||
|
'fehlercode',
|
||||||
|
'error code',
|
||||||
|
'wasserhärte',
|
||||||
|
'hardness',
|
||||||
|
'testomat',
|
||||||
|
'chlor',
|
||||||
|
'chlormessung',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getAccessoryRequestKeywords(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'passend',
|
||||||
|
'passende',
|
||||||
|
'passendes',
|
||||||
|
'zubehör',
|
||||||
|
'zubehor',
|
||||||
|
'dazu',
|
||||||
|
'indikator',
|
||||||
|
'reagenz',
|
||||||
|
'kit',
|
||||||
|
'set',
|
||||||
|
'zusatz',
|
||||||
|
'ergänzung',
|
||||||
|
'ergaenzung',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getTechnicalProductModelPattern(): string
|
||||||
|
{
|
||||||
|
return '/\b[\p{L}]{2,}\s?\d{2,5}\b/u';
|
||||||
|
}
|
||||||
|
}
|
||||||
204
src/Config/SearchRepairConfig.php
Normal file
204
src/Config/SearchRepairConfig.php
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Config;
|
||||||
|
|
||||||
|
final class SearchRepairConfig
|
||||||
|
{
|
||||||
|
public function isEnabled(): bool
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getMaxRepairQueries(): int
|
||||||
|
{
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getMinPrimaryResultsWithoutRepair(): int
|
||||||
|
{
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getTopProductLogLimit(): int
|
||||||
|
{
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getModelCandidatePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAccessoryCandidatePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b((?:' . implode('|', $this->getAccessoryCandidateTerms()) . ')\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getAccessoryOrBundlePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b(' . implode('|', $this->getAccessoryOrBundleTerms()) . ')\b/iu';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getModelLikePattern(): string
|
||||||
|
{
|
||||||
|
return '/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSpecificityBoostPattern(): string
|
||||||
|
{
|
||||||
|
return '/\b(?:' . implode('|', $this->getSpecificityBoostTerms()) . ')\b/iu';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getGenericCandidateTokens(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'wasser',
|
||||||
|
'messgerät',
|
||||||
|
'messgeraet',
|
||||||
|
'produkt',
|
||||||
|
'geräte',
|
||||||
|
'geraete',
|
||||||
|
'gerät',
|
||||||
|
'geraet',
|
||||||
|
'resthärte',
|
||||||
|
'resthaerte',
|
||||||
|
'preis',
|
||||||
|
'infos',
|
||||||
|
'wissen',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSanitizeTrimCharacters(): string
|
||||||
|
{
|
||||||
|
return " \t\n\r\0\x0B\"'`.,;:-";
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getContainsDigitPattern(): string
|
||||||
|
{
|
||||||
|
return '/\d/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getWhitespaceCollapsePattern(): string
|
||||||
|
{
|
||||||
|
return '/\s+/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getTokenizeCleanupPattern(): string
|
||||||
|
{
|
||||||
|
return '/[^\p{L}\p{N}\s\-]+/u';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getProductKeySeparator(): string
|
||||||
|
{
|
||||||
|
return '|';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getCandidateDigitScore(): int
|
||||||
|
{
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getCandidateWordCountCap(): int
|
||||||
|
{
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getSpecificityBoostScore(): int
|
||||||
|
{
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPrimaryQueryOverlapThreshold(): float
|
||||||
|
{
|
||||||
|
return 0.9;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPromptMatchWeight(): int
|
||||||
|
{
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPrimaryQueryMatchWeight(): int
|
||||||
|
{
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getRepairSignalMatchWeight(): int
|
||||||
|
{
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getPrimaryResultOrderBonus(): int
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getTokenIntersectionScore(): int
|
||||||
|
{
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getNumericTokenMatchScore(): int
|
||||||
|
{
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getAccessoryCandidateTerms(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'indikator',
|
||||||
|
'indicator',
|
||||||
|
'reagenz',
|
||||||
|
'reagent',
|
||||||
|
'kit',
|
||||||
|
'set',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getAccessoryOrBundleTerms(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'passend',
|
||||||
|
'passende',
|
||||||
|
'zubehor',
|
||||||
|
'zubehör',
|
||||||
|
'dazu',
|
||||||
|
'zusatz',
|
||||||
|
'erganzung',
|
||||||
|
'ergänzung',
|
||||||
|
'indikator',
|
||||||
|
'reagenz',
|
||||||
|
'kit',
|
||||||
|
'set',
|
||||||
|
'auch\s+das',
|
||||||
|
'mit\s+preis\s+und\s+allen\s+infos',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getSpecificityBoostTerms(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'indikator',
|
||||||
|
'indicator',
|
||||||
|
'testomat',
|
||||||
|
'tritromat',
|
||||||
|
'titromat',
|
||||||
|
'reagenz',
|
||||||
|
'reagent',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
40
src/Config/StopWordsConfig.php
Normal file
40
src/Config/StopWordsConfig.php
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Config;
|
||||||
|
|
||||||
|
final class StopWordsConfig
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Retrieval-optimized stop-word list.
|
||||||
|
*
|
||||||
|
* Important:
|
||||||
|
* - keep negations
|
||||||
|
* - keep question words
|
||||||
|
* - keep domain terms
|
||||||
|
* - remove only structural filler words
|
||||||
|
*
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getStopWords(): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'mit',
|
||||||
|
'der', 'die', 'das',
|
||||||
|
'ein', 'eine', 'einer', 'eines',
|
||||||
|
'den', 'dem', 'des',
|
||||||
|
'und', 'oder', 'aber', 'sowie',
|
||||||
|
'ich', 'du', 'er', 'sie', 'es',
|
||||||
|
'wir', 'ihr',
|
||||||
|
'halt', 'eben', 'auch', 'schon',
|
||||||
|
'noch', 'mal', 'bitte', 'danke',
|
||||||
|
'also', 'nun', 'tja',
|
||||||
|
'dann', 'danach', 'davor',
|
||||||
|
'hier', 'dort',
|
||||||
|
'heute', 'gestern', 'morgen',
|
||||||
|
'könnte', 'kannst', 'kann',
|
||||||
|
'würde', 'würdest', 'würden',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -22,157 +22,75 @@ final class CommerceIntentLite
|
|||||||
*/
|
*/
|
||||||
public function detect(string $originalPrompt): array
|
public function detect(string $originalPrompt): array
|
||||||
{
|
{
|
||||||
$p = mb_strtolower(trim($originalPrompt));
|
$prompt = mb_strtolower(trim($originalPrompt));
|
||||||
|
|
||||||
if ($p === '') {
|
if ($prompt === '') {
|
||||||
return [
|
return $this->buildDetectionResult(
|
||||||
'intent' => self::NONE,
|
intent: self::NONE,
|
||||||
'score' => 0,
|
score: 0,
|
||||||
'signals' => [],
|
signals: []
|
||||||
];
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Block support / diagnostic questions from entering the commerce flow
|
if ($this->isSupportOrDiagnosticQuery($prompt) && !$this->hasExplicitCommerceIntent($prompt)) {
|
||||||
// unless the prompt also contains very explicit purchase / shop intent.
|
return $this->buildDetectionResult(
|
||||||
if ($this->isSupportOrDiagnosticQuery($p) && !$this->hasExplicitCommerceIntent($p)) {
|
intent: self::NONE,
|
||||||
return [
|
score: 0,
|
||||||
'intent' => self::NONE,
|
signals: [$this->config->getSupportOrDiagnosticSignalLabel()]
|
||||||
'score' => 0,
|
);
|
||||||
'signals' => ['support_or_diagnostic'],
|
|
||||||
];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$score = 0;
|
$score = 0;
|
||||||
$signals = [];
|
$signals = [];
|
||||||
|
|
||||||
$strongSignals = $this->config->getStrongSignalsList();
|
[$score, $signals] = $this->applyStrongSignals($prompt, $score, $signals);
|
||||||
|
[$score, $signals] = $this->applySkuSignal($prompt, $score, $signals);
|
||||||
foreach ($strongSignals as $signal) {
|
[$score, $signals] = $this->applyPriceSignal($prompt, $score, $signals);
|
||||||
if (str_contains($p, mb_strtolower($signal))) {
|
[$score, $signals] = $this->applySizeSignal($prompt, $score, $signals);
|
||||||
$score += 3;
|
[$score, $signals] = $this->applySizeTokenSignal($prompt, $score, $signals);
|
||||||
$signals[] = $signal;
|
[$score, $signals] = $this->applyColorSignal($prompt, $score, $signals);
|
||||||
}
|
[$score, $signals] = $this->applyAdvisorySignals($prompt, $score, $signals);
|
||||||
}
|
|
||||||
|
|
||||||
// Treat long numeric identifiers as stronger product-number-like signals.
|
|
||||||
// This avoids over-triggering commerce purely because a model name contains
|
|
||||||
// a short number such as "808" in support questions.
|
|
||||||
if (preg_match('/\b\d{4,10}\b/u', $p) === 1) {
|
|
||||||
$score += 2;
|
|
||||||
$signals[] = 'sku';
|
|
||||||
}
|
|
||||||
|
|
||||||
$pricePattern = $this->config->getPricePattern();
|
|
||||||
if (preg_match('/\b\d+(?:[.,]\d+)?\s*(' . $pricePattern . ')\b/u', $p) === 1) {
|
|
||||||
$score += 2;
|
|
||||||
$signals[] = 'price';
|
|
||||||
}
|
|
||||||
|
|
||||||
$sizePattern = $this->config->getSizePattern();
|
|
||||||
if (preg_match('/\b(' . $sizePattern . ')\s*[a-z0-9.-]+\b/u', $p) === 1) {
|
|
||||||
$score += 2;
|
|
||||||
$signals[] = 'size';
|
|
||||||
}
|
|
||||||
|
|
||||||
$sizeTokenPattern = $this->config->getSizeTokenPattern();
|
|
||||||
if (preg_match('/\b(' . $sizeTokenPattern . ')\b/u', $p) === 1) {
|
|
||||||
$score += 1;
|
|
||||||
$signals[] = 'size_token';
|
|
||||||
}
|
|
||||||
|
|
||||||
$colorPattern = $this->config->getColorPattern();
|
|
||||||
if (preg_match('/\b(' . $colorPattern . ')\b/u', $p) === 1) {
|
|
||||||
$score += 1;
|
|
||||||
$signals[] = 'color';
|
|
||||||
}
|
|
||||||
|
|
||||||
$advisorySignals = $this->config->getAdvisorySignals();
|
|
||||||
|
|
||||||
foreach ($advisorySignals as $signal) {
|
|
||||||
if (str_contains($p, mb_strtolower($signal))) {
|
|
||||||
$score += 1;
|
|
||||||
$signals[] = 'advisory:' . $signal;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$signals = array_values(array_unique($signals));
|
$signals = array_values(array_unique($signals));
|
||||||
|
|
||||||
if ($score >= 3) {
|
if ($score >= $this->config->getProductSearchMinScore()) {
|
||||||
return [
|
return $this->buildDetectionResult(
|
||||||
'intent' => self::PRODUCT_SEARCH,
|
intent: self::PRODUCT_SEARCH,
|
||||||
'score' => $score,
|
score: $score,
|
||||||
'signals' => $signals,
|
signals: $signals
|
||||||
];
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($score >= 2) {
|
if ($score >= $this->config->getAdvisoryProductSearchMinScore()) {
|
||||||
return [
|
return $this->buildDetectionResult(
|
||||||
'intent' => self::ADVISORY_PRODUCT_SEARCH,
|
intent: self::ADVISORY_PRODUCT_SEARCH,
|
||||||
'score' => $score,
|
score: $score,
|
||||||
'signals' => $signals,
|
signals: $signals
|
||||||
];
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
return [
|
return $this->buildDetectionResult(
|
||||||
'intent' => self::NONE,
|
intent: self::NONE,
|
||||||
'score' => $score,
|
score: $score,
|
||||||
'signals' => $signals,
|
signals: $signals
|
||||||
];
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function isSupportOrDiagnosticQuery(string $prompt): bool
|
private function isSupportOrDiagnosticQuery(string $prompt): bool
|
||||||
{
|
{
|
||||||
$patterns = [
|
return $this->matchesAnyPattern($prompt, $this->config->getSupportDiagnosticPatterns());
|
||||||
'/\bfehler\b/u',
|
|
||||||
'/\bfehlercode\b/u',
|
|
||||||
'/\berror\b/u',
|
|
||||||
'/\bstörung\b/u',
|
|
||||||
'/\bstoerung\b/u',
|
|
||||||
'/\balarm\b/u',
|
|
||||||
'/\bstörungsmeldung\b/u',
|
|
||||||
'/\bstoerungsmeldung\b/u',
|
|
||||||
'/\bmeldung\b/u',
|
|
||||||
'/\bwarnung\b/u',
|
|
||||||
'/\bwarncode\b/u',
|
|
||||||
'/\bcode\b/u',
|
|
||||||
'/\bwas bedeutet\b/u',
|
|
||||||
'/\bwarum\b/u',
|
|
||||||
'/\bblinkt\b/u',
|
|
||||||
'/\bzeigt\b/u',
|
|
||||||
'/\bzeigt an\b/u',
|
|
||||||
'/\bursache\b/u',
|
|
||||||
'/\bdiagnose\b/u',
|
|
||||||
'/\bservicefall\b/u',
|
|
||||||
'/\bproblem\b/u',
|
|
||||||
'/\bstörung beheben\b/u',
|
|
||||||
'/\bstoerung beheben\b/u',
|
|
||||||
'/\be\d{1,3}\b/u',
|
|
||||||
];
|
|
||||||
|
|
||||||
foreach ($patterns as $pattern) {
|
|
||||||
if (preg_match($pattern, $prompt) === 1) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function hasExplicitCommerceIntent(string $prompt): bool
|
private function hasExplicitCommerceIntent(string $prompt): bool
|
||||||
{
|
{
|
||||||
$patterns = [
|
return $this->matchesAnyPattern($prompt, $this->config->getExplicitCommerceIntentPatterns());
|
||||||
'/\bshop\b/u',
|
}
|
||||||
'/\bpreis\b/u',
|
|
||||||
'/\bkosten\b/u',
|
|
||||||
'/\bkostet\b/u',
|
|
||||||
'/\bkaufen\b/u',
|
|
||||||
'/\bbestellen\b/u',
|
|
||||||
'/\bprodukt\b/u',
|
|
||||||
'/\bartikel\b/u',
|
|
||||||
'/\bsku\b/u',
|
|
||||||
'/\bonline\b/u',
|
|
||||||
];
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $patterns
|
||||||
|
*/
|
||||||
|
private function matchesAnyPattern(string $prompt, array $patterns): bool
|
||||||
|
{
|
||||||
foreach ($patterns as $pattern) {
|
foreach ($patterns as $pattern) {
|
||||||
if (preg_match($pattern, $prompt) === 1) {
|
if (preg_match($pattern, $prompt) === 1) {
|
||||||
return true;
|
return true;
|
||||||
@@ -181,4 +99,119 @@ final class CommerceIntentLite
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $signals
|
||||||
|
* @return array{0:int,1:string[]}
|
||||||
|
*/
|
||||||
|
private function applyStrongSignals(string $prompt, int $score, array $signals): array
|
||||||
|
{
|
||||||
|
foreach ($this->config->getStrongSignalsList() as $signal) {
|
||||||
|
if (str_contains($prompt, mb_strtolower($signal))) {
|
||||||
|
$score += $this->config->getStrongSignalScore();
|
||||||
|
$signals[] = $signal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [$score, $signals];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $signals
|
||||||
|
* @return array{0:int,1:string[]}
|
||||||
|
*/
|
||||||
|
private function applySkuSignal(string $prompt, int $score, array $signals): array
|
||||||
|
{
|
||||||
|
if (preg_match($this->config->getSkuLikePattern(), $prompt) === 1) {
|
||||||
|
$score += $this->config->getSkuSignalScore();
|
||||||
|
$signals[] = $this->config->getSkuSignalLabel();
|
||||||
|
}
|
||||||
|
|
||||||
|
return [$score, $signals];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $signals
|
||||||
|
* @return array{0:int,1:string[]}
|
||||||
|
*/
|
||||||
|
private function applyPriceSignal(string $prompt, int $score, array $signals): array
|
||||||
|
{
|
||||||
|
if (preg_match($this->config->getPriceValuePattern(), $prompt) === 1) {
|
||||||
|
$score += $this->config->getPriceSignalScore();
|
||||||
|
$signals[] = $this->config->getPriceSignalLabel();
|
||||||
|
}
|
||||||
|
|
||||||
|
return [$score, $signals];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $signals
|
||||||
|
* @return array{0:int,1:string[]}
|
||||||
|
*/
|
||||||
|
private function applySizeSignal(string $prompt, int $score, array $signals): array
|
||||||
|
{
|
||||||
|
if (preg_match($this->config->getSizeValuePattern(), $prompt) === 1) {
|
||||||
|
$score += $this->config->getSizeSignalScore();
|
||||||
|
$signals[] = $this->config->getSizeSignalLabel();
|
||||||
|
}
|
||||||
|
|
||||||
|
return [$score, $signals];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $signals
|
||||||
|
* @return array{0:int,1:string[]}
|
||||||
|
*/
|
||||||
|
private function applySizeTokenSignal(string $prompt, int $score, array $signals): array
|
||||||
|
{
|
||||||
|
if (preg_match($this->config->getSizeTokenValuePattern(), $prompt) === 1) {
|
||||||
|
$score += $this->config->getSizeTokenSignalScore();
|
||||||
|
$signals[] = $this->config->getSizeTokenSignalLabel();
|
||||||
|
}
|
||||||
|
|
||||||
|
return [$score, $signals];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $signals
|
||||||
|
* @return array{0:int,1:string[]}
|
||||||
|
*/
|
||||||
|
private function applyColorSignal(string $prompt, int $score, array $signals): array
|
||||||
|
{
|
||||||
|
if (preg_match($this->config->getColorValuePattern(), $prompt) === 1) {
|
||||||
|
$score += $this->config->getColorSignalScore();
|
||||||
|
$signals[] = $this->config->getColorSignalLabel();
|
||||||
|
}
|
||||||
|
|
||||||
|
return [$score, $signals];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $signals
|
||||||
|
* @return array{0:int,1:string[]}
|
||||||
|
*/
|
||||||
|
private function applyAdvisorySignals(string $prompt, int $score, array $signals): array
|
||||||
|
{
|
||||||
|
foreach ($this->config->getAdvisorySignals() as $signal) {
|
||||||
|
if (str_contains($prompt, mb_strtolower($signal))) {
|
||||||
|
$score += $this->config->getAdvisorySignalScore();
|
||||||
|
$signals[] = $this->config->getAdvisorySignalPrefix() . $signal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [$score, $signals];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $signals
|
||||||
|
* @return array{intent:string, score:int, signals:string[]}
|
||||||
|
*/
|
||||||
|
private function buildDetectionResult(string $intent, int $score, array $signals): array
|
||||||
|
{
|
||||||
|
return [
|
||||||
|
'intent' => $intent,
|
||||||
|
'score' => $score,
|
||||||
|
'signals' => $signals,
|
||||||
|
];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -17,6 +17,7 @@ final readonly class NdjsonKeywordRetriever
|
|||||||
public function __construct(
|
public function __construct(
|
||||||
private string $projectDir,
|
private string $projectDir,
|
||||||
private LoggerInterface $agentLogger,
|
private LoggerInterface $agentLogger,
|
||||||
|
private StopWords $stopWords,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -170,7 +171,7 @@ final readonly class NdjsonKeywordRetriever
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return StopWords::isStopWord($token);
|
return $this->stopWords->isStopWord($token);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function normalizeText(string $value): string
|
private function normalizeText(string $value): string
|
||||||
@@ -348,7 +349,7 @@ final readonly class NdjsonKeywordRetriever
|
|||||||
* token:string,
|
* token:string,
|
||||||
* chunk_id:string,
|
* chunk_id:string,
|
||||||
* document_id:string,
|
* document_id:string,
|
||||||
* chunk_index:?int,
|
* chunk_index $rows :?int,
|
||||||
* tf:int,
|
* tf:int,
|
||||||
* title_tf:int,
|
* title_tf:int,
|
||||||
* df:int
|
* df:int
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
|
||||||
declare(strict_types=1);
|
declare(strict_types=1);
|
||||||
|
|
||||||
namespace App\Knowledge\Retrieval;
|
namespace App\Knowledge\Retrieval;
|
||||||
@@ -21,10 +20,10 @@ final readonly class NdjsonLexicalIndexBuilder
|
|||||||
private const MAX_UNIQUE_TOKENS_PER_CHUNK = 256;
|
private const MAX_UNIQUE_TOKENS_PER_CHUNK = 256;
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private string $projectDir,
|
private string $projectDir,
|
||||||
private LoggerInterface $agentLogger,
|
private LoggerInterface $agentLogger,
|
||||||
)
|
private StopWords $stopWords,
|
||||||
{
|
) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -345,7 +344,7 @@ final readonly class NdjsonLexicalIndexBuilder
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return StopWords::isStopWord($token);
|
return $this->stopWords->isStopWord($token);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function normalizeText(string $value): string
|
private function normalizeText(string $value): string
|
||||||
|
|||||||
@@ -6,8 +6,13 @@ namespace App\Knowledge\Retrieval;
|
|||||||
|
|
||||||
use App\Knowledge\StopWords;
|
use App\Knowledge\StopWords;
|
||||||
|
|
||||||
final class QueryCleaner
|
final readonly class QueryCleaner
|
||||||
{
|
{
|
||||||
|
public function __construct(
|
||||||
|
private StopWords $stopWords
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Cleans a query strictly for retrieval purposes.
|
* Cleans a query strictly for retrieval purposes.
|
||||||
*
|
*
|
||||||
@@ -66,7 +71,7 @@ final class QueryCleaner
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Remove stop words
|
// Remove stop words
|
||||||
if (StopWords::isStopWord($token)) {
|
if ($this->stopWords->isStopWord($token)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,62 +4,25 @@ declare(strict_types=1);
|
|||||||
|
|
||||||
namespace App\Knowledge;
|
namespace App\Knowledge;
|
||||||
|
|
||||||
final class StopWords
|
use App\Config\StopWordsConfig;
|
||||||
|
|
||||||
|
final readonly class StopWords
|
||||||
{
|
{
|
||||||
/**
|
public function __construct(
|
||||||
* Retrieval-optimierte Stopwortliste (Deutsch).
|
private StopWordsConfig $config
|
||||||
*
|
) {
|
||||||
* WICHTIG:
|
|
||||||
* - Keine Negationen entfernen
|
|
||||||
* - Keine Fragewörter entfernen
|
|
||||||
* - Keine fachlichen Begriffe entfernen
|
|
||||||
* - Nur echte Füll- und Strukturwörter
|
|
||||||
*/
|
|
||||||
private const STOP_WORDS = [
|
|
||||||
|
|
||||||
'mit',
|
|
||||||
// Artikel
|
|
||||||
'der', 'die', 'das',
|
|
||||||
'ein', 'eine', 'einer', 'eines',
|
|
||||||
'den', 'dem', 'des',
|
|
||||||
|
|
||||||
// Konjunktionen
|
|
||||||
'und', 'oder', 'aber', 'sowie',
|
|
||||||
|
|
||||||
// Schwache Pronomen
|
|
||||||
'ich', 'du', 'er', 'sie', 'es',
|
|
||||||
'wir', 'ihr',
|
|
||||||
|
|
||||||
// Füllwörter
|
|
||||||
'halt', 'eben', 'auch', 'schon',
|
|
||||||
'noch', 'mal', 'bitte', 'danke',
|
|
||||||
|
|
||||||
// Strukturwörter
|
|
||||||
'also', 'nun', 'tja',
|
|
||||||
'dann', 'danach', 'davor',
|
|
||||||
'hier', 'dort',
|
|
||||||
|
|
||||||
// Zeit-Füller (kontextarm)
|
|
||||||
'heute', 'gestern', 'morgen',
|
|
||||||
|
|
||||||
// Höflichkeits-/Modalformen
|
|
||||||
'könnte', 'kannst', 'kann',
|
|
||||||
'würde', 'würdest', 'würden',
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gibt die vollständige Stopwortliste zurück.
|
|
||||||
*/
|
|
||||||
public static function getStopWords(): array
|
|
||||||
{
|
|
||||||
return self::STOP_WORDS;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prüft, ob ein Wort ein Stopwort ist.
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
public static function isStopWord(string $word): bool
|
public function getStopWords(): array
|
||||||
{
|
{
|
||||||
return in_array($word, self::STOP_WORDS, true);
|
return $this->config->getStopWords();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function isStopWord(string $word): bool
|
||||||
|
{
|
||||||
|
return in_array($word, $this->config->getStopWords(), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user