optimize retrieval

This commit is contained in:
team 1
2026-04-23 15:47:53 +02:00
parent 8a31e99669
commit 87417febf4
13 changed files with 2093 additions and 287 deletions

View File

@@ -62,7 +62,7 @@ document.addEventListener('DOMContentLoaded', () => {
function hasMeaningfulChildContent(element) {
return element.querySelector(
'img, table, pre, code, ul, ol, h1, h2, h3, h4, h5, h6, a, hr'
'img, table, pre, code, ul, ol, h1, h2, h3, h4, h5, h6, a, hr, .badge'
) !== null;
}
@@ -86,68 +86,23 @@ document.addEventListener('DOMContentLoaded', () => {
});
}
function stripAllThinkContent(container) {
const blockSelector = 'p, div, li, blockquote';
const thinkSpans = Array.from(container.querySelectorAll('.think'));
if (thinkSpans.length === 0) {
return;
}
const handledBlocks = new Set();
thinkSpans.forEach((span) => {
const block = span.closest(blockSelector) || span.parentElement;
if (!block || handledBlocks.has(block)) {
return;
}
handledBlocks.add(block);
const thinksInBlock = Array.from(block.querySelectorAll('.think'));
const lastThinkInBlock = thinksInBlock[thinksInBlock.length - 1];
if (!lastThinkInBlock) {
return;
}
let node = block.firstChild;
while (node) {
const next = node.nextSibling;
const isLastThink = node === lastThinkInBlock;
node.remove();
if (isLastThink) {
break;
}
node = next;
}
while (
block.firstChild &&
(
(block.firstChild.nodeType === Node.TEXT_NODE &&
block.firstChild.textContent.trim() === '') ||
(block.firstChild.nodeType === Node.ELEMENT_NODE &&
block.firstChild.tagName === 'BR')
)
) {
block.firstChild.remove();
}
function removeThinkSpansOnly(container) {
container.querySelectorAll('.think').forEach((span) => {
span.remove();
});
cleanupEmptyBlocks(container);
}
function hasNonThinkContent(container) {
function cloneWithoutThinkContent(container) {
const clone = container.cloneNode(true);
stripAllThinkContent(clone);
clone.querySelectorAll('.think').forEach((span) => span.remove());
cleanupEmptyBlocks(clone);
return clone;
}
function hasNonThinkContent(container) {
const clone = cloneWithoutThinkContent(container);
if ((clone.textContent || '').trim() !== '') {
return true;
@@ -156,6 +111,49 @@ document.addEventListener('DOMContentLoaded', () => {
return hasMeaningfulChildContent(clone);
}
function keepOnlyLastThink(container) {
const thinkSpans = Array.from(container.querySelectorAll('.think'));
if (thinkSpans.length <= 1) {
cleanupEmptyBlocks(container);
return;
}
const lastThink = thinkSpans[thinkSpans.length - 1];
thinkSpans.slice(0, -1).forEach((span) => {
span.remove();
});
const blockSelector = 'p, div, li, blockquote';
const lastBlock = lastThink.closest(blockSelector) || lastThink.parentElement;
if (lastBlock && lastThink.parentElement === lastBlock) {
Array.from(lastBlock.childNodes).forEach((node) => {
if (node === lastThink) {
return;
}
if (
node.nodeType === Node.TEXT_NODE &&
node.textContent.trim() === ''
) {
node.remove();
return;
}
if (
node.nodeType === Node.ELEMENT_NODE &&
node.tagName === 'BR'
) {
node.remove();
}
});
}
cleanupEmptyBlocks(container);
}
function cleanupThinkSpans(container) {
if (!container) {
return;
@@ -168,54 +166,11 @@ document.addEventListener('DOMContentLoaded', () => {
}
if (hasNonThinkContent(container)) {
stripAllThinkContent(container);
removeThinkSpansOnly(container);
return;
}
if (thinkSpans.length <= 1) {
return;
}
const blockSelector = 'p, div, li, blockquote';
const lastThink = thinkSpans[thinkSpans.length - 1];
const lastBlock = lastThink.closest(blockSelector) || lastThink.parentElement;
thinkSpans.slice(0, -1).forEach((span) => {
const block = span.closest(blockSelector) || span.parentElement;
if (block && block !== lastBlock) {
block.remove();
return;
}
if (block === lastBlock) {
span.remove();
}
});
if (lastBlock && lastBlock.contains(lastThink)) {
let node = lastBlock.firstChild;
while (node && node !== lastThink) {
const next = node.nextSibling;
node.remove();
node = next;
}
while (
lastThink.nextSibling &&
(
(lastThink.nextSibling.nodeType === Node.TEXT_NODE &&
lastThink.nextSibling.textContent.trim() === '') ||
(lastThink.nextSibling.nodeType === Node.ELEMENT_NODE &&
lastThink.nextSibling.tagName === 'BR')
)
) {
lastThink.nextSibling.remove();
}
}
cleanupEmptyBlocks(container);
keepOnlyLastThink(container);
}
function renderBubbleContent(bubble, raw) {

View File

@@ -4,6 +4,9 @@ declare(strict_types=1);
namespace App\Agent;
use App\Commerce\CommerceReferenceResolver;
use App\Commerce\CommerceReferenceStore;
use App\Commerce\Dto\CommerceReferenceContext;
use App\Commerce\SearchRepairService;
use App\Commerce\ShopSearchService;
use App\Config\AgentRunnerConfig;
@@ -30,6 +33,8 @@ final readonly class AgentRunner
private RetrieverInterface $retriever,
private ShopSearchService $shopSearchService,
private SearchRepairService $searchRepairService,
private CommerceReferenceStore $commerceReferenceStore,
private CommerceReferenceResolver $commerceReferenceResolver,
private CommerceIntentLite $commerceIntentLite,
private OllamaClient $ollamaClient,
private LoggerInterface $agentLogger,
@@ -52,7 +57,8 @@ final readonly class AgentRunner
$shopResults = [];
$primaryShopResults = [];
$sources = [];
$factSources = [];
$contextSignals = [];
$optimizedShopQuery = '';
$shopSearchQuery = '';
$commerceIntent = CommerceIntentLite::NONE;
@@ -60,6 +66,8 @@ final readonly class AgentRunner
$attemptedShopRepair = false;
$usedShopRepair = false;
$shopRepairQueries = [];
$activeCommerceReference = null;
$shopChecked = false;
$this->agentLogger->info('Agent run started', [
'userId' => $userId,
@@ -72,19 +80,18 @@ final readonly class AgentRunner
}
yield $this->systemMsg('Ich analysiere deine Anfrage...', 'think');
yield $this->systemMsg('Ich prüfe auf Internetquellen...', 'think');
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
if ($urlContent !== '') {
$this->addSource($sources, 'Externe URL');
$this->addBadge($factSources, 'Externe URL');
}
yield $this->systemMsg('Ich hole relevante Daten aus meinem RAG-Wissen...', 'think');
$knowledgeChunks = $this->retriever->retrieve($prompt);
if ($knowledgeChunks !== []) {
$this->addSource($sources, 'RAG Wissen');
$this->addBadge($factSources, 'RAG Wissen');
}
$commerceIntent = $this->detectCommerceIntent($prompt);
@@ -93,18 +100,53 @@ final readonly class AgentRunner
yield $this->systemMsg('Ich optimiere die Recherche...', 'think');
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId);
$activeCommerceReference = $this->loadCommerceReference($userId);
if ($commerceHistoryContext !== '') {
$this->addSource($sources, 'Chatverlauf');
$this->addBadge($contextSignals, 'Gesprächskontext');
}
$optimizedShopQuery = $this->buildOptimizedShopQuery(
if ($activeCommerceReference !== null) {
$this->addBadge($contextSignals, 'Commerce-Referenz');
}
$isReferenceOnlyFollowUp = $this->isReferenceOnlyCommerceFollowUp(
$prompt,
$userId,
$commerceHistoryContext
$activeCommerceReference
);
$shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt;
if ($isReferenceOnlyFollowUp) {
$shopSearchQuery = $this->buildDeterministicReferenceShopQuery($activeCommerceReference);
if ($shopSearchQuery !== '') {
$this->addBadge($contextSignals, 'Deterministische Referenzsuche');
}
$this->agentLogger->info('Using deterministic reference shop query', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'prompt' => $prompt,
'shopSearchQuery' => $shopSearchQuery,
'referenceProductName' => $activeCommerceReference?->productName,
'referenceFocusTerms' => $activeCommerceReference?->focusTerms,
]);
} else {
$optimizedShopQuery = $this->buildOptimizedShopQuery(
$prompt,
$userId,
$commerceHistoryContext
);
if ($optimizedShopQuery !== '' && $optimizedShopQuery !== $prompt) {
$this->addBadge($contextSignals, 'Query-Optimierung');
}
$shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt;
}
if ($shopSearchQuery === '') {
$shopSearchQuery = $prompt;
}
$this->agentLogger->info('Commerce search prepared', [
'userId' => $userId,
@@ -112,8 +154,11 @@ final readonly class AgentRunner
'usedOptimizedShopQuery' => $optimizedShopQuery !== '',
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'usedDeterministicReferenceQuery' => $isReferenceOnlyFollowUp,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'hasActiveCommerceReference' => $activeCommerceReference !== null,
'activeCommerceReferenceProduct' => $activeCommerceReference?->productName,
]);
yield $this->systemMsg(
@@ -121,11 +166,14 @@ final readonly class AgentRunner
'think'
);
$shopChecked = true;
$primaryShopResults = $this->searchShop(
$shopSearchQuery,
$commerceIntent,
$userId,
$commerceHistoryContext
$commerceHistoryContext,
$activeCommerceReference
);
$repairPayload = $this->repairShopResults(
@@ -144,11 +192,13 @@ final readonly class AgentRunner
$shopRepairQueries = $repairPayload['repairQueries'];
if ($shopResults !== []) {
$this->addSource($sources, 'Shopsystem');
$this->addBadge($factSources, 'Shopsystem');
} elseif ($shopChecked) {
$this->addBadge($factSources, 'Shopsystem geprüft');
}
if ($attemptedShopRepair) {
$this->addSource($sources, 'Erweiterte Shopsuche');
$this->addBadge($contextSignals, 'Erweiterte Shopsuche');
}
}
@@ -176,6 +226,7 @@ final readonly class AgentRunner
'shopSearchQuery' => $shopSearchQuery,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'shopChecked' => $shopChecked,
'attemptedShopRepair' => $attemptedShopRepair,
'usedShopRepair' => $usedShopRepair,
'shopRepairQueries' => $shopRepairQueries,
@@ -192,14 +243,22 @@ final readonly class AgentRunner
]);
}
if ($sources !== []) {
yield $this->emitSources($sources, 'Genutzte Quellen: ');
if ($factSources !== [] || $contextSignals !== []) {
yield $this->emitSourceSummary(
$factSources,
$contextSignals,
'Genutzte Datenpfade'
);
}
$fullOutput = yield from $this->streamFinalAnswer($finalPrompt);
if ($sources !== []) {
yield $this->emitSources($sources, 'Quellen: ');
if ($factSources !== [] || $contextSignals !== []) {
yield $this->emitSourceSummary(
$factSources,
$contextSignals,
'Quellen und Signale'
);
}
if ($this->debug) {
@@ -207,10 +266,11 @@ final readonly class AgentRunner
}
if ($fullOutput !== '') {
$this->contextService->appendHistory(
$userId,
$prompt,
$fullOutput
$this->persistConversationState(
userId: $userId,
prompt: $prompt,
fullOutput: $fullOutput,
shopResults: $shopResults
);
}
@@ -221,6 +281,7 @@ final readonly class AgentRunner
'commerceIntent' => $commerceIntent,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'shopChecked' => $shopChecked,
'attemptedShopRepair' => $attemptedShopRepair,
'usedShopRepair' => $usedShopRepair,
'shopRepairQueries' => $shopRepairQueries,
@@ -231,6 +292,8 @@ final readonly class AgentRunner
'shopSearchQuery' => $shopSearchQuery,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'hasActiveCommerceReference' => $activeCommerceReference !== null,
'activeCommerceReferenceProduct' => $activeCommerceReference?->productName,
]);
} catch (Throwable $e) {
$this->agentLogger->error('Agent run failed', [
@@ -298,6 +361,42 @@ final readonly class AgentRunner
return $this->sanitizeOptimizedShopQuery($optimizedQuery);
}
private function isReferenceOnlyCommerceFollowUp(
string $prompt,
?CommerceReferenceContext $referenceContext
): bool {
if ($referenceContext === null) {
return false;
}
$normalizedPrompt = mb_strtolower(trim($prompt), 'UTF-8');
$normalizedPrompt = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt;
$normalizedPrompt = preg_replace('/\s+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt;
$normalizedPrompt = trim($normalizedPrompt);
if ($normalizedPrompt === '') {
return false;
}
if (preg_match('/\b(testomat|lab|evo|eco|calc|thcl|808|2000)\b/u', $normalizedPrompt) === 1) {
return false;
}
return preg_match(
'/\b(preis|preise|kosten|kostet|dazu|dafuer|dafür|davon|was kostet das|verfuegbarkeit|verfügbarkeit|shop|link)\b/u',
$normalizedPrompt
) === 1;
}
private function buildDeterministicReferenceShopQuery(?CommerceReferenceContext $referenceContext): string
{
if ($referenceContext === null) {
return '';
}
return trim($referenceContext->buildReferenceSearchText());
}
/**
* @return array{
* results: array,
@@ -346,13 +445,15 @@ final readonly class AgentRunner
string $query,
string $commerceIntent,
string $userId,
string $commerceHistoryContext = ''
string $commerceHistoryContext = '',
?CommerceReferenceContext $referenceContext = null
): array {
try {
return $this->shopSearchService->search(
$query,
$commerceIntent,
$commerceHistoryContext
$commerceHistoryContext,
$referenceContext
);
} catch (Throwable $e) {
$this->agentLogger->warning('Shop search failed, continuing without shop results', [
@@ -361,6 +462,8 @@ final readonly class AgentRunner
'query' => $query,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'hasReferenceContext' => $referenceContext !== null,
'referenceProductName' => $referenceContext?->productName,
'exception' => $e,
]);
@@ -376,6 +479,64 @@ final readonly class AgentRunner
);
}
private function loadCommerceReference(string $userId): ?CommerceReferenceContext
{
try {
return $this->commerceReferenceStore->load($userId);
} catch (Throwable $e) {
$this->agentLogger->warning('Failed to load commerce reference context', [
'userId' => $userId,
'exception' => $e,
]);
return null;
}
}
/**
* @param array<int, mixed> $shopResults
*/
private function storeCommerceReference(string $userId, string $prompt, string $answer, array $shopResults): void
{
try {
$referenceContext = $this->commerceReferenceResolver->resolveFromCommerceTurn(
$prompt,
$answer,
$shopResults
);
if ($referenceContext === null) {
return;
}
$this->commerceReferenceStore->save($userId, $referenceContext);
} catch (Throwable $e) {
$this->agentLogger->warning('Failed to persist commerce reference context', [
'userId' => $userId,
'exception' => $e,
]);
}
}
/**
* @param array<int, mixed> $shopResults
*/
private function persistConversationState(
string $userId,
string $prompt,
string $fullOutput,
array $shopResults
): void {
$this->contextService->appendHistory($userId, $prompt, $fullOutput);
$this->storeCommerceReference(
userId: $userId,
prompt: $prompt,
answer: $fullOutput,
shopResults: $shopResults
);
}
private function limitKnowledgeChunks(array $knowledgeChunks, string $commerceIntent): array
{
return match ($commerceIntent) {
@@ -447,22 +608,36 @@ final readonly class AgentRunner
}
/**
* @param string[] $sources
* @param string[] $factSources
* @param string[] $contextSignals
*/
private function emitSources(array $sources, string $prefix): string
private function emitSourceSummary(array $factSources, array $contextSignals, string $label): string
{
return $this->systemMsg($prefix . implode(' ', $sources), 'info');
$parts = [];
if ($factSources !== []) {
$parts[] = 'Fakten: ' . implode(' ', $factSources);
}
if ($contextSignals !== []) {
$parts[] = 'Kontext: ' . implode(' ', $contextSignals);
}
return $this->systemMsg(
$label . ': ' . implode(' &nbsp;&nbsp; ', $parts),
'info'
);
}
/**
* @param string[] $sources
* @param string[] $target
*/
private function addSource(array &$sources, string $label): void
private function addBadge(array &$target, string $label): void
{
$badge = $this->badge($label);
if (!in_array($badge, $sources, true)) {
$sources[] = $badge;
if (!in_array($badge, $target, true)) {
$target[] = $badge;
}
}

View File

@@ -14,7 +14,6 @@ use RuntimeException;
final readonly class PromptBuilder
{
public function __construct(
private ContextService $contextService,
private SystemPromptRepository $systemPromptRepository,
@@ -49,12 +48,24 @@ final readonly class PromptBuilder
$hasShopResults = $shopResults !== [];
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
$isPriceDrivenQuestion = $this->isLikelyPriceDrivenQuestion($prompt);
$systemBlock = $this->buildSystemBlock();
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults);
$responseFormatBlock = $this->buildResponseFormatBlock($prompt, $hasShopResults, $isTechnicalProductQuestion);
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults);
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults, $isPriceDrivenQuestion);
$responseFormatBlock = $this->buildResponseFormatBlock(
$prompt,
$hasShopResults,
$isTechnicalProductQuestion,
$isPriceDrivenQuestion
);
$knowledgeBlock = $this->buildKnowledgeBlock(
$knowledgeChunks,
$urlContent,
$prompt,
$hasShopResults,
$isPriceDrivenQuestion
);
$userBlock = $this->buildUserBlock($prompt);
$fixedPrompt = $this->implodeBlocks([
@@ -231,7 +242,9 @@ final readonly class PromptBuilder
"Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" .
"Do not infer undocumented technical specifications from shop data.\n" .
"Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.\n" .
"Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.";
"Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.\n" .
"If shop results only contain accessories, reagents, indicators, or consumables, do not conclude that no matching main device exists unless the sources explicitly support that conclusion.\n" .
"If the user asks for price filtering, use the numeric prices in these live shop results as the decisive source for filtering.";
if ($totalCount > count($limitedShopResults)) {
$header .= "\n" .
@@ -247,12 +260,20 @@ final readonly class PromptBuilder
/**
* Build a small priority block that tells the model what to surface first.
*/
private function buildOutputPriorityBlock(bool $hasShopResults): string
private function buildOutputPriorityBlock(bool $hasShopResults, bool $isPriceDrivenQuestion): string
{
if (!$hasShopResults) {
return '';
}
if ($isPriceDrivenQuestion) {
return
"OUTPUT PRIORITY:\n" .
"For price-driven questions, evaluate shop results first for numeric price filtering.\n" .
"Use retrieved knowledge afterwards only to add technical context or explain missing commercial coverage.\n" .
"Do not let accessory-only shop results prove that no matching device exists unless the sources explicitly support that conclusion.\n";
}
return
"OUTPUT PRIORITY:\n" .
"Use retrieved knowledge first to determine the technically matching product or answer.\n" .
@@ -263,7 +284,8 @@ final readonly class PromptBuilder
private function buildResponseFormatBlock(
string $prompt,
bool $hasShopResults,
bool $isTechnicalProductQuestion
bool $isTechnicalProductQuestion,
bool $isPriceDrivenQuestion
): string {
$rules = [
"RESPONSE FORMAT RULES:",
@@ -284,6 +306,8 @@ final readonly class PromptBuilder
$rules[] = "- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.";
$rules[] = "- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.";
$rules[] = "- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.";
$rules[] = "- If the question includes a price threshold, filter using only explicit numeric shop prices.";
$rules[] = "- Do not say that no device exists above a threshold merely because only cheaper accessories were found in the shop results.";
} else {
$rules[] = "- If no shop results are present, do not compensate by inventing external products or external manufacturers.";
}
@@ -294,6 +318,12 @@ final readonly class PromptBuilder
$rules[] = "- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.";
}
if ($isPriceDrivenQuestion) {
$rules[] = "- For price-driven questions, answer the threshold result first.";
$rules[] = "- If no grounded shop product fulfills the threshold, say that clearly.";
$rules[] = "- Then optionally explain whether retrieved knowledge mentions relevant devices that are not commercially listed in the current shop results.";
}
if ($this->asksForAccessoryOrBundle($prompt)) {
$rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.";
$rules[] = "- The main device must come first. The accessory must not replace the main device.";
@@ -304,14 +334,13 @@ final readonly class PromptBuilder
return implode("\n", $rules);
}
/**
* Build the knowledge block.
*
* Retrieved knowledge remains the main source for technical matching and explanation.
* Shop data is preferred for current commercial fields.
*/
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt, bool $hasShopResults): string
{
private function buildKnowledgeBlock(
array $knowledgeChunks,
string $urlContent,
string $prompt,
bool $hasShopResults,
bool $isPriceDrivenQuestion
): string {
$knowledgeParts = [];
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
@@ -334,7 +363,7 @@ final readonly class PromptBuilder
"LANGUAGE RULES:\n" .
implode("\n", $this->buildLanguageRules()),
"FACT GROUNDING RULES:\n" .
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults)),
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults, $isPriceDrivenQuestion)),
"RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" .
"Source: Documents\n" .
implode("\n\n", $lines),
@@ -354,13 +383,6 @@ final readonly class PromptBuilder
return $this->implodeBlocks($knowledgeParts);
}
/**
* Resolve how many characters may still be used by history.
*
* The active model num_ctx is converted into a conservative prompt budget.
* Shop, knowledge and user question are fixed priority blocks.
* History only receives the remaining space.
*/
private function resolveHistoryBudgetChars(string $fixedPrompt): int
{
$numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx();
@@ -407,8 +429,11 @@ final readonly class PromptBuilder
/**
* @return string[]
*/
private function buildFactGroundingRules(bool $isTechnicalProductQuestion, bool $hasShopResults): array
{
private function buildFactGroundingRules(
bool $isTechnicalProductQuestion,
bool $hasShopResults,
bool $isPriceDrivenQuestion
): array {
$rules = [
"- State only facts that are explicitly present in the provided sources.",
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
@@ -437,6 +462,11 @@ final readonly class PromptBuilder
"- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.",
"- If the shop match is ambiguous, keep the technical identification and commercial details separate.",
]);
if ($isPriceDrivenQuestion) {
$rules[] = "- For price-threshold questions, shop prices are authoritative for the threshold check.";
$rules[] = "- Accessory-only shop hits do not prove that no qualifying device exists.";
}
} else {
$rules[] = "- Use retrieved knowledge as authoritative for factual answers.";
$rules[] = "- If no shop results are present, do not compensate with external recommendations or external product suggestions.";
@@ -523,6 +553,20 @@ final readonly class PromptBuilder
return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1;
}
private function isLikelyPriceDrivenQuestion(string $prompt): bool
{
$normalized = mb_strtolower($prompt, 'UTF-8');
if (preg_match('/\b(mehr\s+als|über|ueber|größer\s+als|groesser\s+als|unter|bis|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*(?:euro|eur|€)\b/u', $normalized) === 1) {
return true;
}
return str_contains($normalized, 'preis')
|| str_contains($normalized, 'preise')
|| str_contains($normalized, 'kosten')
|| str_contains($normalized, 'kostet');
}
private function asksForAccessoryOrBundle(string $prompt): bool
{
$normalized = mb_strtolower($prompt, 'UTF-8');

View File

@@ -4,6 +4,7 @@ declare(strict_types=1);
namespace App\Commerce;
use App\Commerce\Dto\CommerceReferenceContext;
use App\Commerce\Dto\CommerceSearchQuery;
use App\Config\CommerceIntentConfig;
use App\Config\CommerceQueryParserConfig;
@@ -23,10 +24,12 @@ final readonly class CommerceQueryParser
public function parse(
string $originalPrompt,
string $intent,
string $historyContext = ''
string $historyContext = '',
?CommerceReferenceContext $referenceContext = null
): CommerceSearchQuery {
$normalizedPrompt = $this->normalize($originalPrompt);
$isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt);
$isReferenceOnlyFollowUp = $this->isReferenceOnlyFollowUp($normalizedPrompt);
[$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt);
$sizes = $this->extractSizes($normalizedPrompt);
@@ -44,7 +47,7 @@ final readonly class CommerceQueryParser
if (
!$isDirectProductQuery
&& $historyContext !== ''
&& $this->shouldUseHistoryContext($normalizedPrompt)
&& $this->shouldUseHistoryContext($normalizedPrompt, $searchText)
) {
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
@@ -73,7 +76,29 @@ final readonly class CommerceQueryParser
}
}
$finalSearchText = $searchText !== '' ? $searchText : $normalizedPrompt;
if (
!$isDirectProductQuery
&& $referenceContext !== null
&& $this->shouldUseReferenceContext($normalizedPrompt, $searchText)
) {
$referenceSearchText = $this->buildReferenceSearchText($referenceContext);
if ($isReferenceOnlyFollowUp || $this->isTooGenericSearchText($searchText)) {
$searchText = $referenceSearchText !== '' ? $referenceSearchText : $searchText;
} else {
$searchText = $this->mergeSearchTexts($referenceSearchText, $searchText);
}
if (($brand === null || $brand === '') && $referenceContext->manufacturer !== null) {
$normalizedManufacturer = $this->normalize($referenceContext->manufacturer);
if ($normalizedManufacturer !== '') {
$brand = $normalizedManufacturer;
}
}
}
$finalSearchText = trim($searchText !== '' ? $searchText : $normalizedPrompt);
return new CommerceSearchQuery(
originalPrompt: $originalPrompt,
@@ -93,7 +118,7 @@ final readonly class CommerceQueryParser
{
$value = $this->textNormalizer->normalize($prompt);
$value = $this->queryCleaner->clean($value);
$value = mb_strtolower(trim($value));
$value = mb_strtolower(trim($value), 'UTF-8');
$value = str_replace(['€'], ' euro ', $value);
$value = preg_replace('/[^\p{L}\p{N}\s.,\-]/u', ' ', $value) ?? $value;
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
@@ -126,6 +151,17 @@ final readonly class CommerceQueryParser
$priceMin = $this->toFloat($m[1]);
}
// NEW:
// Recognize comparative lower-bound phrasing such as:
// - mehr als 3000 euro
// - über 3000 euro
// - ueber 3000 euro
// - größer als 3000 euro
// - groesser als 3000 euro
if (preg_match('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
$priceMin = $this->toFloat($m[1]);
}
return [$priceMin, $priceMax];
}
@@ -152,7 +188,10 @@ final readonly class CommerceQueryParser
}
}
return array_values(array_unique(array_filter($sizes, static fn($v) => $v !== '')));
return array_values(array_unique(array_filter(
$sizes,
static fn(string $value): bool => $value !== ''
)));
}
private function extractBrand(string $prompt): ?string
@@ -184,6 +223,7 @@ final readonly class CommerceQueryParser
foreach ($this->config->getPhrasesToRemove() as $phrase) {
$normalizedPhrase = $this->normalize((string) $phrase);
if ($normalizedPhrase === '') {
continue;
}
@@ -193,6 +233,7 @@ final readonly class CommerceQueryParser
foreach ($sizes as $size) {
$normalizedSize = $this->normalize((string) $size);
if ($normalizedSize === '') {
continue;
}
@@ -207,6 +248,7 @@ final readonly class CommerceQueryParser
if ($priceMin !== null || $priceMax !== null) {
$text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
$text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
$text = preg_replace('/\b(?:mehr\s+als|über|ueber|größer\s+als|groesser\s+als)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text;
$text = preg_replace('/\b' . $this->intentConfig->getPricePattern() . '\b/u', ' ', $text) ?? $text;
}
@@ -219,14 +261,14 @@ final readonly class CommerceQueryParser
);
$tokens = $this->filterSearchTokens($tokens);
$tokens = $this->stripReferenceOnlyTokens($tokens);
return trim(implode(' ', $tokens));
}
private function buildDirectProductSearchText(string $prompt): string
{
$text = $prompt;
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
$text = preg_replace('/\s+/u', ' ', $prompt) ?? $prompt;
$text = trim($text, " \t\n\r\0\x0B-.,");
$tokens = array_filter(
@@ -234,17 +276,61 @@ final readonly class CommerceQueryParser
static fn(string $token): bool => mb_strlen($token) > 0
);
$tokens = array_values(array_unique($tokens));
return trim(implode(' ', $tokens));
return trim(implode(' ', array_values(array_unique($tokens))));
}
private function shouldUseHistoryContext(string $prompt): bool
private function shouldUseHistoryContext(string $prompt, string $searchText): bool
{
return preg_match(
'/\b(' . $this->config->getHistoryContextPattern() . ')\b/u',
$prompt
) === 1;
if ($this->isReferenceOnlyFollowUp($prompt)) {
return true;
}
if ($this->isTooGenericSearchText($searchText)) {
return true;
}
return preg_match('/\b(' . $this->config->getHistoryContextPattern() . ')\b/u', $prompt) === 1;
}
private function shouldUseReferenceContext(string $prompt, string $searchText): bool
{
if ($this->isReferenceOnlyFollowUp($prompt)) {
return true;
}
return $this->isTooGenericSearchText($searchText);
}
private function isReferenceOnlyFollowUp(string $prompt): bool
{
return preg_match('/\b(' . $this->config->getReferenceFollowUpPattern() . ')\b/u', $prompt) === 1;
}
private function isTooGenericSearchText(string $searchText): bool
{
$tokens = array_values(array_filter(
preg_split('/\s+/u', $searchText, -1, PREG_SPLIT_NO_EMPTY) ?: [],
static fn(string $token): bool => $token !== ''
));
if ($tokens === []) {
return true;
}
$genericTokens = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
foreach ($tokens as $token) {
if (!isset($genericTokens[$token])) {
return false;
}
}
return true;
}
private function buildReferenceSearchText(CommerceReferenceContext $referenceContext): string
{
return $this->normalize($referenceContext->buildReferenceSearchText());
}
private function extractLatestQuestionFromHistory(string $historyContext): string
@@ -256,6 +342,7 @@ final readonly class CommerceQueryParser
}
$questions = $matches[1] ?? [];
if ($questions === []) {
return '';
}
@@ -265,11 +352,11 @@ final readonly class CommerceQueryParser
return is_string($lastQuestion) ? trim($lastQuestion) : '';
}
private function mergeSearchTexts(string $historySearchText, string $currentSearchText): string
private function mergeSearchTexts(string $left, string $right): string
{
$tokens = [];
foreach ([$historySearchText, $currentSearchText] as $text) {
foreach ([$left, $right] as $text) {
if ($text === '') {
continue;
}
@@ -294,11 +381,25 @@ final readonly class CommerceQueryParser
*/
private function filterSearchTokens(array $tokens): array
{
$stopWords = $this->config->getFilterSearchTokensPattern();
$stopWords = array_fill_keys($this->config->getFilterSearchTokensPattern(), true);
return array_values(array_filter(
$tokens,
static fn(string $token): bool => !in_array($token, $stopWords, true)
static fn(string $token): bool => !isset($stopWords[$token])
));
}
/**
* @param string[] $tokens
* @return string[]
*/
private function stripReferenceOnlyTokens(array $tokens): array
{
$referenceOnly = array_fill_keys($this->config->getReferenceOnlyTokens(), true);
return array_values(array_filter(
$tokens,
static fn(string $token): bool => !isset($referenceOnly[$token])
));
}
@@ -318,11 +419,7 @@ final readonly class CommerceQueryParser
$tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: [];
if (count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1) {
return true;
}
return false;
return count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1;
}
private function containsModelLikePhrase(string $text): bool

View File

@@ -0,0 +1,239 @@
<?php
declare(strict_types=1);
namespace App\Commerce;
use App\Commerce\Dto\CommerceReferenceContext;
final readonly class CommerceReferenceResolver
{
/**
* @param array<int, array<string, mixed>> $shopResults
*/
public function resolveFromCommerceTurn(
string $prompt,
string $answerText,
array $shopResults
): ?CommerceReferenceContext {
$fromText = $this->resolveFromText($prompt, $answerText);
$fromShop = $this->resolveFromShopResults($prompt, $shopResults);
if ($fromText !== null && $fromShop !== null && $this->areCompatibleProductNames($fromText->productName, $fromShop->productName)) {
return new CommerceReferenceContext(
productName: $fromShop->productName,
productNumber: $fromShop->productNumber,
manufacturer: $fromShop->manufacturer ?? $fromText->manufacturer,
url: $fromShop->url,
sourceType: 'shop',
confidence: 1.0,
resolvedFromPrompt: $fromText->resolvedFromPrompt ?? $fromShop->resolvedFromPrompt,
resolvedAt: (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM),
focusTerms: $this->mergeFocusTerms(
$fromText->focusTerms,
$fromShop->focusTerms
),
);
}
if ($fromText !== null) {
return $fromText;
}
return $fromShop;
}
/**
* @param array<int, array<string, mixed>> $shopResults
*/
private function resolveFromShopResults(string $prompt, array $shopResults): ?CommerceReferenceContext
{
$top = $shopResults[0] ?? null;
if (!is_array($top)) {
return null;
}
$name = $this->extractString($top, 'name');
$productNumber = $this->extractString($top, 'productNumber');
$manufacturer = $this->extractString($top, 'manufacturer');
$url = $this->extractString($top, 'url');
if ($name === '') {
return null;
}
return new CommerceReferenceContext(
productName: $name,
productNumber: $productNumber !== '' ? $productNumber : null,
manufacturer: $manufacturer !== '' ? $manufacturer : null,
url: $url !== '' ? $url : null,
sourceType: 'shop',
confidence: 1.0,
resolvedFromPrompt: trim($prompt) !== '' ? trim($prompt) : null,
resolvedAt: (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM),
focusTerms: $this->extractFocusTerms($prompt . "\n" . $name),
);
}
private function resolveFromText(string $prompt, string $answerText): ?CommerceReferenceContext
{
$text = trim($prompt . "\n" . $answerText);
if ($text === '') {
return null;
}
$patterns = [
'/\b(Testomat\s+2000\s+THCL)\b/ui',
'/\b(Testomat\s+808)\b/ui',
'/\b(Testomat\s+EVO\s+TH)\b/ui',
'/\b(Testomat\s+EVO\s+CALC)\b/ui',
'/\b(Testomat\s+ECO\s+PLUS)\b/ui',
'/\b(Testomat\s+ECO\s+C)\b/ui',
'/\b(Testomat\s+ECO)\b/ui',
'/\b(Testomat\s+LAB\s+CL)\b/ui',
'/\b(Testomat\s+LAB\s+MONO)\b/ui',
'/\b(Testomat\s+2000)\b/ui',
];
foreach ($patterns as $pattern) {
if (!preg_match($pattern, $text, $matches)) {
continue;
}
$productName = trim((string) ($matches[1] ?? ''));
if ($productName === '') {
continue;
}
return new CommerceReferenceContext(
productName: $productName,
productNumber: null,
manufacturer: null,
url: null,
sourceType: 'conversation',
confidence: 0.8,
resolvedFromPrompt: trim($prompt) !== '' ? trim($prompt) : null,
resolvedAt: (new \DateTimeImmutable())->format(\DateTimeInterface::ATOM),
focusTerms: $this->extractFocusTerms($text),
);
}
return null;
}
/**
* @return string[]
*/
private function extractFocusTerms(string $text): array
{
$normalized = $this->normalizeText($text);
if ($normalized === '') {
return [];
}
$patterns = [
'indikator' => '/\bindikator(?:en)?\b/u',
'indikatoren' => '/\bindikator(?:en)?\b/u',
'reagenz' => '/\breagenz(?:ien)?\b/u',
'reagenzien' => '/\breagenz(?:ien)?\b/u',
'zubehör' => '/\bzubeh[oö]r\b/u',
'ersatzteil' => '/\bersatzteile?\b/u',
'ersatzteile' => '/\bersatzteile?\b/u',
'service-set' => '/\bservice(?:\s|-)?set\b/u',
'filter' => '/\bfilter\b/u',
'pumpenkopf' => '/\bpumpenkopf\b/u',
'motorblock' => '/\bmotorblock\b/u',
'mehrwertpaket' => '/\bmehrwertpaket\b/u',
'neotecmaster' => '/\bneotecmaster\b/u',
];
$terms = [];
foreach ($patterns as $canonical => $pattern) {
if (preg_match($pattern, $normalized) === 1) {
$terms[] = $canonical;
}
}
return array_values(array_unique($terms));
}
/**
* @param string[] $left
* @param string[] $right
* @return string[]
*/
private function mergeFocusTerms(array $left, array $right): array
{
$merged = [];
foreach ([$left, $right] as $list) {
foreach ($list as $item) {
if (!is_string($item)) {
continue;
}
$item = trim($item);
if ($item === '') {
continue;
}
$merged[$item] = $item;
}
}
return array_values($merged);
}
private function areCompatibleProductNames(string $left, string $right): bool
{
$left = $this->normalizeName($left);
$right = $this->normalizeName($right);
if ($left === '' || $right === '') {
return false;
}
if ($left === $right) {
return true;
}
return str_contains($left, $right) || str_contains($right, $left);
}
private function normalizeName(string $value): string
{
$value = mb_strtolower(trim($value), 'UTF-8');
$value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value;
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim($value);
}
private function normalizeText(string $value): string
{
$value = mb_strtolower(trim($value), 'UTF-8');
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim($value);
}
/**
* @param array<string, mixed> $row
*/
private function extractString(array $row, string $key): string
{
$value = $row[$key] ?? null;
if (!is_string($value)) {
return '';
}
return trim($value);
}
}

View File

@@ -0,0 +1,99 @@
<?php
declare(strict_types=1);
namespace App\Commerce;
use App\Commerce\Dto\CommerceReferenceContext;
final readonly class CommerceReferenceStore
{
private string $directory;
public function __construct(string $projectDir)
{
$this->directory = rtrim($projectDir, '/') . '/var/agent-commerce-context';
if (!is_dir($this->directory) && !mkdir($this->directory, 0775, true) && !is_dir($this->directory)) {
throw new \RuntimeException(sprintf(
'Failed to create commerce reference directory: %s',
$this->directory
));
}
}
public function load(string $userId): ?CommerceReferenceContext
{
$path = $this->getPath($userId);
if (!is_file($path)) {
return null;
}
$content = file_get_contents($path);
if ($content === false || trim($content) === '') {
return null;
}
$decoded = json_decode($content, true);
if (!is_array($decoded)) {
return null;
}
return CommerceReferenceContext::fromArray($decoded);
}
public function save(string $userId, CommerceReferenceContext $context): void
{
$path = $this->getPath($userId);
$tmpPath = $path . '.tmp';
$json = json_encode(
$context->toArray(),
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_INVALID_UTF8_SUBSTITUTE
);
if (!is_string($json)) {
throw new \RuntimeException('Failed to encode commerce reference context.');
}
if (file_put_contents($tmpPath, $json, LOCK_EX) === false) {
throw new \RuntimeException(sprintf(
'Failed to write commerce reference context: %s',
$tmpPath
));
}
if (!rename($tmpPath, $path)) {
@unlink($tmpPath);
throw new \RuntimeException(sprintf(
'Failed to move commerce reference context into place: %s',
$path
));
}
}
public function clear(string $userId): void
{
$path = $this->getPath($userId);
if (is_file($path)) {
@unlink($path);
}
}
private function getPath(string $userId): string
{
$safeUserId = preg_replace('/[^a-zA-Z0-9_-]/', '_', trim($userId));
$safeUserId = is_string($safeUserId) ? trim($safeUserId, '_') : '';
if ($safeUserId === '') {
throw new \InvalidArgumentException('User id must not be empty.');
}
return $this->directory . '/' . $safeUserId . '.json';
}
}

View File

@@ -0,0 +1,149 @@
<?php
declare(strict_types=1);
namespace App\Commerce\Dto;
final readonly class CommerceReferenceContext
{
/**
* @param string[] $focusTerms
*/
public function __construct(
public string $productName,
public ?string $productNumber = null,
public ?string $manufacturer = null,
public ?string $url = null,
public string $sourceType = 'conversation',
public float $confidence = 0.0,
public ?string $resolvedFromPrompt = null,
public ?string $resolvedAt = null,
public array $focusTerms = [],
) {
}
/**
* @param array<string, mixed> $payload
*/
public static function fromArray(array $payload): ?self
{
$productName = self::normalizeNullableString($payload['productName'] ?? null);
if ($productName === null) {
return null;
}
return new self(
productName: $productName,
productNumber: self::normalizeNullableString($payload['productNumber'] ?? null),
manufacturer: self::normalizeNullableString($payload['manufacturer'] ?? null),
url: self::normalizeNullableString($payload['url'] ?? null),
sourceType: self::normalizeNullableString($payload['sourceType'] ?? null) ?? 'conversation',
confidence: isset($payload['confidence']) && is_numeric($payload['confidence']) ? (float) $payload['confidence'] : 0.0,
resolvedFromPrompt: self::normalizeNullableString($payload['resolvedFromPrompt'] ?? null),
resolvedAt: self::normalizeNullableString($payload['resolvedAt'] ?? null),
focusTerms: self::normalizeStringList($payload['focusTerms'] ?? []),
);
}
/**
* @return array<string, mixed>
*/
public function toArray(): array
{
return [
'productName' => $this->productName,
'productNumber' => $this->productNumber,
'manufacturer' => $this->manufacturer,
'url' => $this->url,
'sourceType' => $this->sourceType,
'confidence' => $this->confidence,
'resolvedFromPrompt' => $this->resolvedFromPrompt,
'resolvedAt' => $this->resolvedAt,
'focusTerms' => $this->focusTerms,
];
}
public function hasStrongIdentity(): bool
{
return $this->productNumber !== null || $this->confidence >= 0.8;
}
public function buildReferenceSearchText(): string
{
$parts = [];
if ($this->productName !== '') {
$parts[] = $this->productName;
}
if (
$this->productNumber !== null
&& $this->productNumber !== ''
&& stripos($this->productName, $this->productNumber) === false
) {
$parts[] = $this->productNumber;
}
foreach ($this->focusTerms as $focusTerm) {
if ($focusTerm === '') {
continue;
}
$alreadyIncluded = false;
foreach ($parts as $part) {
if (stripos($part, $focusTerm) !== false) {
$alreadyIncluded = true;
break;
}
}
if (!$alreadyIncluded) {
$parts[] = $focusTerm;
}
}
return trim(implode(' ', $parts));
}
private static function normalizeNullableString(mixed $value): ?string
{
if (!is_string($value)) {
return null;
}
$value = trim($value);
return $value !== '' ? $value : null;
}
/**
* @param mixed $value
* @return string[]
*/
private static function normalizeStringList(mixed $value): array
{
if (!is_array($value)) {
return [];
}
$out = [];
foreach ($value as $item) {
if (!is_string($item)) {
continue;
}
$item = trim($item);
if ($item === '') {
continue;
}
$out[] = $item;
}
return array_values(array_unique($out));
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -12,27 +12,48 @@ final class CommerceIntentConfig
'shop',
'alle',
'preis',
'preise',
'kunde',
'online',
'produkt',
'produkte',
'artikel',
'sku',
'kaufen',
'kostet',
'kosten',
'verfügbarkeit',
'verfuegbarkeit',
// Search / product discovery signals
'suche',
'such',
'finde',
'finden',
'welche',
'welcher',
'welches',
// Product-type signals for technical/commercial requests
// Device / system signals
'analysegerät',
'analysegeraet',
'analysegeräte',
'analysegeraete',
'messgerät',
'messgeraet',
'messgeräte',
'messgeraete',
'gerät',
'geraet',
'geräte',
'geraete',
'analysator',
'analysatoren',
'analyzer',
'system',
'systeme',
'anlage',
'anlagen',
];
}
@@ -46,6 +67,8 @@ final class CommerceIntentConfig
'geeignet',
'empfiehl',
'empfehl',
'vergleich',
'vergleichen',
];
}
@@ -57,6 +80,7 @@ final class CommerceIntentConfig
'eur',
'teuer',
'preis',
'preise',
'kosten',
'kostet',
];

View File

@@ -1,63 +1,94 @@
<?php
declare(strict_types=1);
namespace App\Config;
class CommerceQueryParserConfig
final readonly class CommerceQueryParserConfig
{
public function getKnownBrands(): array
{
return [
'heyl',
'horiba',
'neomeris'
];
/**
* @param string[] $knownBrands
* @param string[] $phrasesToRemove
* @param string[] $filterSearchTokensPattern
* @param string[] $referenceOnlyTokens
*/
public function __construct(
private array $knownBrands = [],
private array $phrasesToRemove = [],
private array $filterSearchTokensPattern = [],
private array $referenceOnlyTokens = [],
) {
}
/**
* @return string[]
*/
public function getKnownBrands(): array
{
return $this->knownBrands;
}
/**
* @return string[]
*/
public function getPhrasesToRemove(): array
{
return [
'ich suche',
'suche',
'habt ihr',
'gibt es',
'zeige mir',
'welches gerät',
'welche gerät',
'welches modell',
'welches ist besser',
'welches ist am besten',
'alternative',
'alternativen',
];
return $this->phrasesToRemove;
}
public function getHistoryContextPattern(): string
{
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
return 'auch|noch|nochmal|dazu|wie oben|wie zuvor|ähnlich|aehnlich|stattdessen|alternative|alternativ|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|und der preis|kosten|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
}
public function getReferenceFollowUpPattern(): string
{
return 'preis|preise|preis dazu|preis dafür|preis dafuer|preis zum gerät|preis zum geraet|was kostet das|was kostet das gerät|was kostet das geraet|gerät|geraet|das gerät|das geraet|dieses gerät|dieses geraet|das modell|dieses modell|dafür|dafuer|davon|verfügbarkeit|verfuegbarkeit|link dazu|shop|bitte';
}
/**
* @return string[]
*/
public function getFilterSearchTokensPattern(): array
{
return $this->filterSearchTokensPattern;
}
/**
* @return string[]
*/
public function getReferenceOnlyTokens(): array
{
if ($this->referenceOnlyTokens !== []) {
return $this->referenceOnlyTokens;
}
return [
'auch',
'noch',
'nochmal',
'zusätzlich',
'preis',
'preise',
'kosten',
'kostet',
'gerät',
'geraet',
'modell',
'produkt',
'artikel',
'dafür',
'dafuer',
'dazu',
'davon',
'stattdessen',
'verfügbarkeit',
'verfuegbarkeit',
'shop',
'link',
'zum',
'zur',
'das',
'dieses',
'den',
'dem',
'bitte',
'gern',
'gerne',
'zeige',
'zeig',
'such',
'suche',
'finde',
'find',
'mir',
'mal',
'von',
'und',
];
}
}

View File

@@ -1,46 +1,78 @@
<?php
declare(strict_types=1);
namespace App\Config;
class ShopServiceConfig
final class ShopServiceConfig
{
public const DEVICE_QUERY_KEYWORDS = [
'analysegerät',
'analysegeraet',
'analysegeräte',
'analysegeraete',
'messgerät',
'messgeraet',
'messgeräte',
'messgeraete',
'analysator',
'analysatoren',
'analyzer',
'gerät',
'geraet',
'geräte',
'geraete',
'monitor',
'monitore',
'controller',
'controller',
'gerät für',
'geraet fuer',
'geräte für',
'geraete fuer',
'system',
'systeme',
'anlage',
'anlagen',
];
public const ACCESSORY_QUERY_KEYWORDS = [
'zubehör',
'zubehor',
'reagenz',
'reagenzien',
'reagent',
'indikator',
'indikatoren',
'indicator',
'kit',
'set',
'ersatz',
'ersatzteil',
'ersatzteile',
'verbrauchsmaterial',
'consumable',
'dazu',
'passend',
'passende',
'passendes',
'nachfüll',
'nachfuell',
'refill',
'filter',
'pumpenkopf',
'motorblock',
'service set',
'serviceset',
'service-set',
];
public const ACCESSORY_PRODUCT_KEYWORDS = [
'reagenz',
'reagenzien',
'reagent',
'indikator',
'indikatoren',
'indicator',
'kit',
'set',
@@ -49,6 +81,8 @@ class ShopServiceConfig
'zubehör',
'zubehor',
'ersatz',
'ersatzteil',
'ersatzteile',
'nachfüll',
'nachfuell',
'refill',
@@ -57,22 +91,44 @@ class ShopServiceConfig
'solution',
'teststreifen',
'test strip',
'filter',
'pumpenkopf',
'motorblock',
'service set',
'serviceset',
'service-set',
];
public const DEVICE_PRODUCT_KEYWORDS = [
'analysegerät',
'analysegeraet',
'analysegeräte',
'analysegeraete',
'messgerät',
'messgeraet',
'messgeräte',
'messgeraete',
'analysator',
'analysatoren',
'analyzer',
'monitor',
'monitore',
'controller',
'online-analysator',
'online analysator',
'online-analysegerät',
'online analysegeraet',
'online-analysegeräte',
'online analysegeraete',
'online analyzer',
'online monitor',
'system',
'systeme',
'anlage',
'anlagen',
'gerät',
'geraet',
'geräte',
'geraete',
];
}

View File

@@ -4,6 +4,7 @@ declare(strict_types=1);
namespace App\Controller;
use App\Commerce\CommerceReferenceStore;
use App\Context\ContextService;
use App\Http\ClientIdResolver;
use Symfony\Component\HttpFoundation\JsonResponse;
@@ -27,9 +28,11 @@ use Symfony\Component\Routing\Annotation\Route;
final readonly class HistoryController
{
public function __construct(
private ContextService $contextService,
private ContextService $contextService,
private ClientIdResolver $clientIdResolver,
) {}
private CommerceReferenceStore $commerceReferenceStore,
) {
}
/**
* Returns the full conversation history for the current client
@@ -101,10 +104,11 @@ final readonly class HistoryController
$clientId = $this->clientIdResolver->resolve($request, $response);
$this->contextService->deleteHistory($clientId);
$this->commerceReferenceStore->clear($clientId);
return $this->jsonWithCookies(
[
'status' => 'ok',
'status' => 'ok',
'message' => 'History deleted',
],
$response
@@ -113,6 +117,8 @@ final readonly class HistoryController
/**
* Helper to return JSON responses while forwarding cookies.
*
* @param array<string, mixed>|array<int, array<string, string>> $data
*/
private function jsonWithCookies(array $data, Response $cookieResponse): JsonResponse
{

View File

@@ -6,7 +6,6 @@ namespace App\Shopware;
use RuntimeException;
use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface;
use Symfony\Contracts\HttpClient\Exception\DecodingExceptionInterface;
use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface;
use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface;
use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface;
@@ -31,6 +30,16 @@ final readonly class StoreApiClient
public function searchProducts(array $criteria): array
{
$url = rtrim($this->baseUrl, '/') . '/store-api/search';
$sanitizedCriteria = $this->sanitizeValue($criteria);
$body = json_encode(
$sanitizedCriteria,
JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_INVALID_UTF8_SUBSTITUTE
);
if (!is_string($body)) {
throw new RuntimeException('Failed to encode Store API criteria.');
}
$response = $this->httpClient->request('POST', $url, [
'headers' => [
@@ -38,12 +47,13 @@ final readonly class StoreApiClient
'Accept' => 'application/json',
'sw-access-key' => $this->salesChannelAccessKey,
],
'json' => $criteria,
'body' => $body,
'timeout' => $this->timeoutSeconds,
]);
$statusCode = $response->getStatusCode();
$content = $response->getContent(false);
$content = $this->sanitizeString($content);
if ($statusCode < 200 || $statusCode >= 300) {
throw new RuntimeException(sprintf(
@@ -61,4 +71,48 @@ final readonly class StoreApiClient
return $data;
}
private function sanitizeValue(mixed $value): mixed
{
if (is_array($value)) {
$out = [];
foreach ($value as $key => $item) {
$out[$key] = $this->sanitizeValue($item);
}
return $out;
}
if (!is_string($value)) {
return $value;
}
return $this->sanitizeString($value);
}
private function sanitizeString(string $value): string
{
if (preg_match('//u', $value) === 1) {
return $value;
}
if (function_exists('mb_convert_encoding')) {
$value = mb_convert_encoding($value, 'UTF-8', 'UTF-8');
}
if (preg_match('//u', $value) === 1) {
return $value;
}
if (function_exists('iconv')) {
$converted = @iconv('UTF-8', 'UTF-8//IGNORE', $value);
if (is_string($converted) && $converted !== '') {
return $converted;
}
}
return '';
}
}