optimize retrieval

This commit is contained in:
team 1
2026-04-23 15:47:53 +02:00
parent 8a31e99669
commit 87417febf4
13 changed files with 2093 additions and 287 deletions

View File

@@ -4,6 +4,9 @@ declare(strict_types=1);
namespace App\Agent;
use App\Commerce\CommerceReferenceResolver;
use App\Commerce\CommerceReferenceStore;
use App\Commerce\Dto\CommerceReferenceContext;
use App\Commerce\SearchRepairService;
use App\Commerce\ShopSearchService;
use App\Config\AgentRunnerConfig;
@@ -30,6 +33,8 @@ final readonly class AgentRunner
private RetrieverInterface $retriever,
private ShopSearchService $shopSearchService,
private SearchRepairService $searchRepairService,
private CommerceReferenceStore $commerceReferenceStore,
private CommerceReferenceResolver $commerceReferenceResolver,
private CommerceIntentLite $commerceIntentLite,
private OllamaClient $ollamaClient,
private LoggerInterface $agentLogger,
@@ -52,7 +57,8 @@ final readonly class AgentRunner
$shopResults = [];
$primaryShopResults = [];
$sources = [];
$factSources = [];
$contextSignals = [];
$optimizedShopQuery = '';
$shopSearchQuery = '';
$commerceIntent = CommerceIntentLite::NONE;
@@ -60,6 +66,8 @@ final readonly class AgentRunner
$attemptedShopRepair = false;
$usedShopRepair = false;
$shopRepairQueries = [];
$activeCommerceReference = null;
$shopChecked = false;
$this->agentLogger->info('Agent run started', [
'userId' => $userId,
@@ -72,19 +80,18 @@ final readonly class AgentRunner
}
yield $this->systemMsg('Ich analysiere deine Anfrage...', 'think');
yield $this->systemMsg('Ich prüfe auf Internetquellen...', 'think');
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
if ($urlContent !== '') {
$this->addSource($sources, 'Externe URL');
$this->addBadge($factSources, 'Externe URL');
}
yield $this->systemMsg('Ich hole relevante Daten aus meinem RAG-Wissen...', 'think');
$knowledgeChunks = $this->retriever->retrieve($prompt);
if ($knowledgeChunks !== []) {
$this->addSource($sources, 'RAG Wissen');
$this->addBadge($factSources, 'RAG Wissen');
}
$commerceIntent = $this->detectCommerceIntent($prompt);
@@ -93,18 +100,53 @@ final readonly class AgentRunner
yield $this->systemMsg('Ich optimiere die Recherche...', 'think');
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId);
$activeCommerceReference = $this->loadCommerceReference($userId);
if ($commerceHistoryContext !== '') {
$this->addSource($sources, 'Chatverlauf');
$this->addBadge($contextSignals, 'Gesprächskontext');
}
$optimizedShopQuery = $this->buildOptimizedShopQuery(
if ($activeCommerceReference !== null) {
$this->addBadge($contextSignals, 'Commerce-Referenz');
}
$isReferenceOnlyFollowUp = $this->isReferenceOnlyCommerceFollowUp(
$prompt,
$userId,
$commerceHistoryContext
$activeCommerceReference
);
$shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt;
if ($isReferenceOnlyFollowUp) {
$shopSearchQuery = $this->buildDeterministicReferenceShopQuery($activeCommerceReference);
if ($shopSearchQuery !== '') {
$this->addBadge($contextSignals, 'Deterministische Referenzsuche');
}
$this->agentLogger->info('Using deterministic reference shop query', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'prompt' => $prompt,
'shopSearchQuery' => $shopSearchQuery,
'referenceProductName' => $activeCommerceReference?->productName,
'referenceFocusTerms' => $activeCommerceReference?->focusTerms,
]);
} else {
$optimizedShopQuery = $this->buildOptimizedShopQuery(
$prompt,
$userId,
$commerceHistoryContext
);
if ($optimizedShopQuery !== '' && $optimizedShopQuery !== $prompt) {
$this->addBadge($contextSignals, 'Query-Optimierung');
}
$shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt;
}
if ($shopSearchQuery === '') {
$shopSearchQuery = $prompt;
}
$this->agentLogger->info('Commerce search prepared', [
'userId' => $userId,
@@ -112,8 +154,11 @@ final readonly class AgentRunner
'usedOptimizedShopQuery' => $optimizedShopQuery !== '',
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'usedDeterministicReferenceQuery' => $isReferenceOnlyFollowUp,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'hasActiveCommerceReference' => $activeCommerceReference !== null,
'activeCommerceReferenceProduct' => $activeCommerceReference?->productName,
]);
yield $this->systemMsg(
@@ -121,11 +166,14 @@ final readonly class AgentRunner
'think'
);
$shopChecked = true;
$primaryShopResults = $this->searchShop(
$shopSearchQuery,
$commerceIntent,
$userId,
$commerceHistoryContext
$commerceHistoryContext,
$activeCommerceReference
);
$repairPayload = $this->repairShopResults(
@@ -144,11 +192,13 @@ final readonly class AgentRunner
$shopRepairQueries = $repairPayload['repairQueries'];
if ($shopResults !== []) {
$this->addSource($sources, 'Shopsystem');
$this->addBadge($factSources, 'Shopsystem');
} elseif ($shopChecked) {
$this->addBadge($factSources, 'Shopsystem geprüft');
}
if ($attemptedShopRepair) {
$this->addSource($sources, 'Erweiterte Shopsuche');
$this->addBadge($contextSignals, 'Erweiterte Shopsuche');
}
}
@@ -176,6 +226,7 @@ final readonly class AgentRunner
'shopSearchQuery' => $shopSearchQuery,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'shopChecked' => $shopChecked,
'attemptedShopRepair' => $attemptedShopRepair,
'usedShopRepair' => $usedShopRepair,
'shopRepairQueries' => $shopRepairQueries,
@@ -192,14 +243,22 @@ final readonly class AgentRunner
]);
}
if ($sources !== []) {
yield $this->emitSources($sources, 'Genutzte Quellen: ');
if ($factSources !== [] || $contextSignals !== []) {
yield $this->emitSourceSummary(
$factSources,
$contextSignals,
'Genutzte Datenpfade'
);
}
$fullOutput = yield from $this->streamFinalAnswer($finalPrompt);
if ($sources !== []) {
yield $this->emitSources($sources, 'Quellen: ');
if ($factSources !== [] || $contextSignals !== []) {
yield $this->emitSourceSummary(
$factSources,
$contextSignals,
'Quellen und Signale'
);
}
if ($this->debug) {
@@ -207,10 +266,11 @@ final readonly class AgentRunner
}
if ($fullOutput !== '') {
$this->contextService->appendHistory(
$userId,
$prompt,
$fullOutput
$this->persistConversationState(
userId: $userId,
prompt: $prompt,
fullOutput: $fullOutput,
shopResults: $shopResults
);
}
@@ -221,6 +281,7 @@ final readonly class AgentRunner
'commerceIntent' => $commerceIntent,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'shopChecked' => $shopChecked,
'attemptedShopRepair' => $attemptedShopRepair,
'usedShopRepair' => $usedShopRepair,
'shopRepairQueries' => $shopRepairQueries,
@@ -231,6 +292,8 @@ final readonly class AgentRunner
'shopSearchQuery' => $shopSearchQuery,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'hasActiveCommerceReference' => $activeCommerceReference !== null,
'activeCommerceReferenceProduct' => $activeCommerceReference?->productName,
]);
} catch (Throwable $e) {
$this->agentLogger->error('Agent run failed', [
@@ -298,6 +361,42 @@ final readonly class AgentRunner
return $this->sanitizeOptimizedShopQuery($optimizedQuery);
}
private function isReferenceOnlyCommerceFollowUp(
string $prompt,
?CommerceReferenceContext $referenceContext
): bool {
if ($referenceContext === null) {
return false;
}
$normalizedPrompt = mb_strtolower(trim($prompt), 'UTF-8');
$normalizedPrompt = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt;
$normalizedPrompt = preg_replace('/\s+/u', ' ', $normalizedPrompt) ?? $normalizedPrompt;
$normalizedPrompt = trim($normalizedPrompt);
if ($normalizedPrompt === '') {
return false;
}
if (preg_match('/\b(testomat|lab|evo|eco|calc|thcl|808|2000)\b/u', $normalizedPrompt) === 1) {
return false;
}
return preg_match(
'/\b(preis|preise|kosten|kostet|dazu|dafuer|dafür|davon|was kostet das|verfuegbarkeit|verfügbarkeit|shop|link)\b/u',
$normalizedPrompt
) === 1;
}
private function buildDeterministicReferenceShopQuery(?CommerceReferenceContext $referenceContext): string
{
if ($referenceContext === null) {
return '';
}
return trim($referenceContext->buildReferenceSearchText());
}
/**
* @return array{
* results: array,
@@ -346,13 +445,15 @@ final readonly class AgentRunner
string $query,
string $commerceIntent,
string $userId,
string $commerceHistoryContext = ''
string $commerceHistoryContext = '',
?CommerceReferenceContext $referenceContext = null
): array {
try {
return $this->shopSearchService->search(
$query,
$commerceIntent,
$commerceHistoryContext
$commerceHistoryContext,
$referenceContext
);
} catch (Throwable $e) {
$this->agentLogger->warning('Shop search failed, continuing without shop results', [
@@ -361,6 +462,8 @@ final readonly class AgentRunner
'query' => $query,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'hasReferenceContext' => $referenceContext !== null,
'referenceProductName' => $referenceContext?->productName,
'exception' => $e,
]);
@@ -376,6 +479,64 @@ final readonly class AgentRunner
);
}
private function loadCommerceReference(string $userId): ?CommerceReferenceContext
{
try {
return $this->commerceReferenceStore->load($userId);
} catch (Throwable $e) {
$this->agentLogger->warning('Failed to load commerce reference context', [
'userId' => $userId,
'exception' => $e,
]);
return null;
}
}
/**
* @param array<int, mixed> $shopResults
*/
private function storeCommerceReference(string $userId, string $prompt, string $answer, array $shopResults): void
{
try {
$referenceContext = $this->commerceReferenceResolver->resolveFromCommerceTurn(
$prompt,
$answer,
$shopResults
);
if ($referenceContext === null) {
return;
}
$this->commerceReferenceStore->save($userId, $referenceContext);
} catch (Throwable $e) {
$this->agentLogger->warning('Failed to persist commerce reference context', [
'userId' => $userId,
'exception' => $e,
]);
}
}
/**
* @param array<int, mixed> $shopResults
*/
private function persistConversationState(
string $userId,
string $prompt,
string $fullOutput,
array $shopResults
): void {
$this->contextService->appendHistory($userId, $prompt, $fullOutput);
$this->storeCommerceReference(
userId: $userId,
prompt: $prompt,
answer: $fullOutput,
shopResults: $shopResults
);
}
private function limitKnowledgeChunks(array $knowledgeChunks, string $commerceIntent): array
{
return match ($commerceIntent) {
@@ -447,22 +608,36 @@ final readonly class AgentRunner
}
/**
* @param string[] $sources
* @param string[] $factSources
* @param string[] $contextSignals
*/
private function emitSources(array $sources, string $prefix): string
private function emitSourceSummary(array $factSources, array $contextSignals, string $label): string
{
return $this->systemMsg($prefix . implode(' ', $sources), 'info');
$parts = [];
if ($factSources !== []) {
$parts[] = 'Fakten: ' . implode(' ', $factSources);
}
if ($contextSignals !== []) {
$parts[] = 'Kontext: ' . implode(' ', $contextSignals);
}
return $this->systemMsg(
$label . ': ' . implode(' &nbsp;&nbsp; ', $parts),
'info'
);
}
/**
* @param string[] $sources
* @param string[] $target
*/
private function addSource(array &$sources, string $label): void
private function addBadge(array &$target, string $label): void
{
$badge = $this->badge($label);
if (!in_array($badge, $sources, true)) {
$sources[] = $badge;
if (!in_array($badge, $target, true)) {
$target[] = $badge;
}
}

View File

@@ -14,7 +14,6 @@ use RuntimeException;
final readonly class PromptBuilder
{
public function __construct(
private ContextService $contextService,
private SystemPromptRepository $systemPromptRepository,
@@ -49,12 +48,24 @@ final readonly class PromptBuilder
$hasShopResults = $shopResults !== [];
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
$isPriceDrivenQuestion = $this->isLikelyPriceDrivenQuestion($prompt);
$systemBlock = $this->buildSystemBlock();
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults);
$responseFormatBlock = $this->buildResponseFormatBlock($prompt, $hasShopResults, $isTechnicalProductQuestion);
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults);
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults, $isPriceDrivenQuestion);
$responseFormatBlock = $this->buildResponseFormatBlock(
$prompt,
$hasShopResults,
$isTechnicalProductQuestion,
$isPriceDrivenQuestion
);
$knowledgeBlock = $this->buildKnowledgeBlock(
$knowledgeChunks,
$urlContent,
$prompt,
$hasShopResults,
$isPriceDrivenQuestion
);
$userBlock = $this->buildUserBlock($prompt);
$fixedPrompt = $this->implodeBlocks([
@@ -231,7 +242,9 @@ final readonly class PromptBuilder
"Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" .
"Do not infer undocumented technical specifications from shop data.\n" .
"Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.\n" .
"Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.";
"Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.\n" .
"If shop results only contain accessories, reagents, indicators, or consumables, do not conclude that no matching main device exists unless the sources explicitly support that conclusion.\n" .
"If the user asks for price filtering, use the numeric prices in these live shop results as the decisive source for filtering.";
if ($totalCount > count($limitedShopResults)) {
$header .= "\n" .
@@ -247,12 +260,20 @@ final readonly class PromptBuilder
/**
* Build a small priority block that tells the model what to surface first.
*/
private function buildOutputPriorityBlock(bool $hasShopResults): string
private function buildOutputPriorityBlock(bool $hasShopResults, bool $isPriceDrivenQuestion): string
{
if (!$hasShopResults) {
return '';
}
if ($isPriceDrivenQuestion) {
return
"OUTPUT PRIORITY:\n" .
"For price-driven questions, evaluate shop results first for numeric price filtering.\n" .
"Use retrieved knowledge afterwards only to add technical context or explain missing commercial coverage.\n" .
"Do not let accessory-only shop results prove that no matching device exists unless the sources explicitly support that conclusion.\n";
}
return
"OUTPUT PRIORITY:\n" .
"Use retrieved knowledge first to determine the technically matching product or answer.\n" .
@@ -263,7 +284,8 @@ final readonly class PromptBuilder
private function buildResponseFormatBlock(
string $prompt,
bool $hasShopResults,
bool $isTechnicalProductQuestion
bool $isTechnicalProductQuestion,
bool $isPriceDrivenQuestion
): string {
$rules = [
"RESPONSE FORMAT RULES:",
@@ -284,6 +306,8 @@ final readonly class PromptBuilder
$rules[] = "- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.";
$rules[] = "- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.";
$rules[] = "- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.";
$rules[] = "- If the question includes a price threshold, filter using only explicit numeric shop prices.";
$rules[] = "- Do not say that no device exists above a threshold merely because only cheaper accessories were found in the shop results.";
} else {
$rules[] = "- If no shop results are present, do not compensate by inventing external products or external manufacturers.";
}
@@ -294,6 +318,12 @@ final readonly class PromptBuilder
$rules[] = "- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.";
}
if ($isPriceDrivenQuestion) {
$rules[] = "- For price-driven questions, answer the threshold result first.";
$rules[] = "- If no grounded shop product fulfills the threshold, say that clearly.";
$rules[] = "- Then optionally explain whether retrieved knowledge mentions relevant devices that are not commercially listed in the current shop results.";
}
if ($this->asksForAccessoryOrBundle($prompt)) {
$rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.";
$rules[] = "- The main device must come first. The accessory must not replace the main device.";
@@ -304,14 +334,13 @@ final readonly class PromptBuilder
return implode("\n", $rules);
}
/**
* Build the knowledge block.
*
* Retrieved knowledge remains the main source for technical matching and explanation.
* Shop data is preferred for current commercial fields.
*/
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt, bool $hasShopResults): string
{
private function buildKnowledgeBlock(
array $knowledgeChunks,
string $urlContent,
string $prompt,
bool $hasShopResults,
bool $isPriceDrivenQuestion
): string {
$knowledgeParts = [];
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
@@ -334,7 +363,7 @@ final readonly class PromptBuilder
"LANGUAGE RULES:\n" .
implode("\n", $this->buildLanguageRules()),
"FACT GROUNDING RULES:\n" .
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults)),
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults, $isPriceDrivenQuestion)),
"RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" .
"Source: Documents\n" .
implode("\n\n", $lines),
@@ -354,13 +383,6 @@ final readonly class PromptBuilder
return $this->implodeBlocks($knowledgeParts);
}
/**
* Resolve how many characters may still be used by history.
*
* The active model num_ctx is converted into a conservative prompt budget.
* Shop, knowledge and user question are fixed priority blocks.
* History only receives the remaining space.
*/
private function resolveHistoryBudgetChars(string $fixedPrompt): int
{
$numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx();
@@ -407,8 +429,11 @@ final readonly class PromptBuilder
/**
* @return string[]
*/
private function buildFactGroundingRules(bool $isTechnicalProductQuestion, bool $hasShopResults): array
{
private function buildFactGroundingRules(
bool $isTechnicalProductQuestion,
bool $hasShopResults,
bool $isPriceDrivenQuestion
): array {
$rules = [
"- State only facts that are explicitly present in the provided sources.",
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
@@ -437,6 +462,11 @@ final readonly class PromptBuilder
"- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.",
"- If the shop match is ambiguous, keep the technical identification and commercial details separate.",
]);
if ($isPriceDrivenQuestion) {
$rules[] = "- For price-threshold questions, shop prices are authoritative for the threshold check.";
$rules[] = "- Accessory-only shop hits do not prove that no qualifying device exists.";
}
} else {
$rules[] = "- Use retrieved knowledge as authoritative for factual answers.";
$rules[] = "- If no shop results are present, do not compensate with external recommendations or external product suggestions.";
@@ -523,6 +553,20 @@ final readonly class PromptBuilder
return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1;
}
private function isLikelyPriceDrivenQuestion(string $prompt): bool
{
$normalized = mb_strtolower($prompt, 'UTF-8');
if (preg_match('/\b(mehr\s+als|über|ueber|größer\s+als|groesser\s+als|unter|bis|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*(?:euro|eur|€)\b/u', $normalized) === 1) {
return true;
}
return str_contains($normalized, 'preis')
|| str_contains($normalized, 'preise')
|| str_contains($normalized, 'kosten')
|| str_contains($normalized, 'kostet');
}
private function asksForAccessoryOrBundle(string $prompt): bool
{
$normalized = mb_strtolower($prompt, 'UTF-8');