Files
MtoRagSystem/src/Agent/AgentRunner.php
2026-04-27 17:15:38 +02:00

1400 lines
48 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Agent;
use App\Commerce\SearchRepairService;
use App\Commerce\ShopSearchService;
use App\Config\AgentRunnerConfig;
use App\Context\ContextService;
use App\Context\UrlAnalyzer;
use App\Infrastructure\OllamaClient;
use App\Intent\CommerceIntentLite;
use App\Knowledge\Retrieval\RetrieverInterface;
use Generator;
use Psr\Log\LoggerInterface;
use Throwable;
final readonly class AgentRunner
{
private bool $systemMsgOn;
public function __construct(
private PromptBuilder $promptBuilder,
private ThinkSuppressor $thinkSuppressor,
private ContextService $contextService,
private UrlAnalyzer $urlAnalyzer,
private RetrieverInterface $retriever,
private ShopSearchService $shopSearchService,
private SearchRepairService $searchRepairService,
private CommerceIntentLite $commerceIntentLite,
private OllamaClient $ollamaClient,
private LoggerInterface $agentLogger,
private AgentRunnerConfig $agentRunnerConfig,
private bool $debug,
private bool $logPrompt,
private bool $logContext,
) {
$this->systemMsgOn = true;
}
public function run(string $prompt, string $userId, bool $forceFullContext = false, string $requestContextHint = ''): Generator
{
$prompt = trim($prompt);
if ($prompt === '') {
yield $this->systemMsg($this->agentRunnerConfig->getEmptyPromptMessage(), 'err');
return;
}
$shopResults = [];
$primaryShopResults = [];
$sources = [];
$optimizedShopQuery = '';
$shopSearchQuery = '';
$commerceIntent = CommerceIntentLite::NONE;
$knowledgeRetrievalPrompt = $prompt;
$usedFollowUpRetrievalContext = false;
$commerceHistoryContext = '';
$attemptedShopRepair = false;
$usedShopRepair = false;
$shopRepairQueries = [];
$primaryShopSearchHadSystemFailure = false;
$historyNotices = [];
$this->agentLogger->info('Agent run started', [
'userId' => $userId,
]);
try {
if ($forceFullContext) {
// Full context mode is already passed to PromptBuilder.
// Additional context strategies can be added here later.
}
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeRequestMessage(), 'think');
yield $this->systemMsg($this->agentRunnerConfig->getCheckInternetSourcesMessage(), 'think');
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
if ($urlContent !== '') {
$this->addSource($sources, $this->agentRunnerConfig->getExternalUrlSourceLabel());
}
$commerceIntent = $this->detectCommerceIntent($prompt);
yield $this->systemMsg($this->agentRunnerConfig->getRetrieveKnowledgeMessage(), 'think');
$knowledgeRetrievalPrompt = $this->buildKnowledgeRetrievalPrompt(
prompt: $prompt,
userId: $userId,
commerceIntent: $commerceIntent
);
$usedFollowUpRetrievalContext = $knowledgeRetrievalPrompt !== $prompt;
$knowledgeChunks = $this->retriever->retrieve($knowledgeRetrievalPrompt);
if ($knowledgeChunks !== []) {
$this->addSource($sources, $this->agentRunnerConfig->getRagKnowledgeSourceLabel());
}
if ($usedFollowUpRetrievalContext) {
$this->agentLogger->info('Knowledge retrieval used follow-up context', [
'userId' => $userId,
'prompt' => $prompt,
'knowledgeRetrievalPrompt' => $knowledgeRetrievalPrompt,
'commerceIntent' => $commerceIntent,
]);
}
if ($this->isCommerceIntent($commerceIntent)) {
yield $this->systemMsg($this->agentRunnerConfig->getOptimizeSearchMessage(), 'think');
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId, $requestContextHint);
if ($commerceHistoryContext !== '') {
$this->addSource($sources, $this->agentRunnerConfig->getConversationHistorySourceLabel());
}
$optimizedShopQuery = yield from $this->buildOptimizedShopQuery(
$prompt,
$userId,
$commerceHistoryContext
);
$shopSearchQuery = $this->resolveShopSearchQuery(
prompt: $prompt,
optimizedShopQuery: $optimizedShopQuery,
commerceHistoryContext: $commerceHistoryContext,
userId: $userId
);
$usedResolvedOptimizedShopQuery = $optimizedShopQuery !== '' && $shopSearchQuery === $optimizedShopQuery;
if ($shopSearchQuery === '') {
$this->agentLogger->info('Commerce search skipped because no concrete shop query could be resolved', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'prompt' => $prompt,
'optimizedShopQuery' => $optimizedShopQuery,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'hasRequestContextHint' => trim($requestContextHint) !== '',
]);
yield $this->systemMsg(
$this->agentRunnerConfig->getNoConcreteShopQueryMessage(),
'info'
);
return;
} else {
$shopQueryPreview = $this->shopSearchService->buildSearchQueryPreview(
$shopSearchQuery,
$commerceIntent,
$commerceHistoryContext
);
yield $this->systemMsg(
$this->buildShopSearchMetaMessage(
query: $shopQueryPreview->searchText !== '' ? $shopQueryPreview->searchText : $shopSearchQuery,
commerceIntent: $commerceIntent,
usedOptimizedQuery: $usedResolvedOptimizedShopQuery,
originalQuery: $shopSearchQuery
),
'meta'
);
$this->agentLogger->info('Commerce search prepared', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'usedOptimizedShopQuery' => $usedResolvedOptimizedShopQuery,
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
]);
yield $this->systemMsg(
sprintf($this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), $commerceIntent),
'think'
);
$primaryShopResults = $this->searchShop(
$shopSearchQuery,
$commerceIntent,
$userId,
$commerceHistoryContext
);
$primaryShopSearchHadSystemFailure = $this->shopSearchService->hadLastSearchSystemFailure();
$primaryShopSearchFailureReason = $this->shopSearchService->getLastSearchFailureReason();
if ($primaryShopSearchHadSystemFailure) {
$this->agentLogger->warning('Shop repair skipped after Store API system failure', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'shopSearchQuery' => $shopSearchQuery,
'failureReason' => $primaryShopSearchFailureReason,
]);
$shopUnavailableMessage = $this->buildShopUnavailableMessage($primaryShopSearchFailureReason);
yield $this->systemMsg(
$shopUnavailableMessage,
'err'
);
$historyNotices[] = $this->buildHistoryNotice(
'Shopdaten konnten nicht geladen werden',
$primaryShopSearchFailureReason
);
$repairPayload = [
'results' => $primaryShopResults,
'attemptedRepair' => false,
'usedRepair' => false,
'repairQueries' => [],
];
} else {
yield $this->systemMsg('Erweiterte Shopsuche wird geprüft…', 'think');
$repairPayload = $this->repairShopResults(
prompt: $prompt,
userId: $userId,
commerceIntent: $commerceIntent,
commerceHistoryContext: $commerceHistoryContext,
primaryQuery: $shopSearchQuery,
primaryShopResults: $primaryShopResults,
knowledgeChunks: $knowledgeChunks
);
}
}
$shopResults = $repairPayload['results'];
$attemptedShopRepair = $repairPayload['attemptedRepair'];
$usedShopRepair = $repairPayload['usedRepair'];
$shopRepairQueries = $repairPayload['repairQueries'];
if ($shopResults !== []) {
$this->addSource($sources, $this->agentRunnerConfig->getShopSystemSourceLabel());
}
if ($attemptedShopRepair) {
$this->addSource($sources, $this->agentRunnerConfig->getExtendedShopSearchSourceLabel());
}
}
if ($shopResults !== []) {
$knowledgeChunks = $this->limitKnowledgeChunks($knowledgeChunks, $commerceIntent);
}
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeAllInformationMessage(), 'think');
$finalPrompt = $this->promptBuilder->build(
prompt: $prompt,
userId: $userId,
urlContent: $urlContent,
knowledgeChunks: $knowledgeChunks,
shopResults: $shopResults,
fullContext: $forceFullContext,
swagFullOutPut: $optimizedShopQuery
);
if ($this->debug && $this->logPrompt) {
$this->agentLogger->debug('Final prompt', [
'userId' => $userId,
'finalPrompt' => $finalPrompt,
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'knowledgeRetrievalPrompt' => $knowledgeRetrievalPrompt,
'usedFollowUpRetrievalContext' => $usedFollowUpRetrievalContext,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'attemptedShopRepair' => $attemptedShopRepair,
'usedShopRepair' => $usedShopRepair,
'shopRepairQueries' => $shopRepairQueries,
]);
}
if ($this->debug && $this->logContext) {
$this->agentLogger->debug('Conversation context snapshot', [
'userId' => $userId,
'context' => $this->contextService->buildUserContext(
$userId,
$forceFullContext
),
]);
}
if ($sources !== []) {
yield $this->emitSources(
$sources,
$this->agentRunnerConfig->getUsedSourcesPrefix()
);
}
$fullOutput = yield from $this->streamFinalAnswer($finalPrompt);
if ($sources !== []) {
yield $this->emitSources(
$sources,
$this->agentRunnerConfig->getSourcesPrefix()
);
}
if ($this->debug) {
yield $this->systemMsg($finalPrompt, 'debug');
}
$historyResponse = $this->buildHistoryResponse($fullOutput, $historyNotices);
if ($historyResponse !== '') {
$this->contextService->appendHistory(
$userId,
$prompt,
$historyResponse
);
}
$this->agentLogger->info('Agent run finished', [
'userId' => $userId,
'outputLength' => mb_strlen($fullOutput),
'contextMode' => $forceFullContext ? 'full' : 'recent',
'commerceIntent' => $commerceIntent,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'attemptedShopRepair' => $attemptedShopRepair,
'usedShopRepair' => $usedShopRepair,
'shopRepairQueries' => $shopRepairQueries,
'primaryShopSearchHadSystemFailure' => $primaryShopSearchHadSystemFailure,
'primaryShopSearchFailureReason' => $primaryShopSearchFailureReason ?? null,
'knowledgeChunkCount' => count($knowledgeChunks),
'knowledgeRetrievalPrompt' => $knowledgeRetrievalPrompt,
'usedFollowUpRetrievalContext' => $usedFollowUpRetrievalContext,
'hasUrlContent' => $urlContent !== '',
'usedOptimizedShopQuery' => $optimizedShopQuery !== '',
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
]);
} catch (Throwable $e) {
$this->agentLogger->error('Agent run failed', [
'userId' => $userId,
'exception' => $e,
]);
$userErrorMessage = $this->buildUserErrorMessage($e);
yield $this->systemMsg($userErrorMessage, 'err');
$historyResponse = $this->buildHistoryResponse('', array_merge(
$historyNotices,
[$this->buildHistoryNotice('Antwort konnte nicht abgeschlossen werden', $e->getMessage())]
));
if ($historyResponse !== '') {
$this->contextService->appendHistory($userId, $prompt, $historyResponse);
}
}
}
private function detectCommerceIntent(string $prompt): string
{
$commerceMeta = $this->commerceIntentLite->detect($prompt);
return (string) ($commerceMeta['intent'] ?? CommerceIntentLite::NONE);
}
private function isCommerceIntent(string $commerceIntent): bool
{
return $commerceIntent === CommerceIntentLite::PRODUCT_SEARCH
|| $commerceIntent === CommerceIntentLite::ADVISORY_PRODUCT_SEARCH;
}
private function buildKnowledgeRetrievalPrompt(
string $prompt,
string $userId,
string $commerceIntent
): string {
if (!$this->shouldUseFollowUpContextForKnowledgeRetrieval($prompt, $commerceIntent)) {
return $prompt;
}
$history = $this->contextService->buildUserContextWithinBudget($userId, 3000);
$previousQuestions = $this->extractRecentUserQuestions($history, 2);
$referenceAnchors = $this->extractLatestAssistantReferenceAnchors($history);
if ($previousQuestions === [] && $referenceAnchors === []) {
return $prompt;
}
$lines = [];
foreach ($previousQuestions as $question) {
$lines[] = 'Vorherige Nutzerfrage: ' . $question;
}
if ($referenceAnchors !== []) {
$lines[] = 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): '
. implode(' ', $referenceAnchors);
}
$lines[] = 'Aktuelle Folgefrage: ' . $prompt;
return implode("\n", $lines);
}
private function shouldUseFollowUpContextForKnowledgeRetrieval(string $prompt, string $commerceIntent): bool
{
if ($this->isCommerceIntent($commerceIntent)) {
return false;
}
$normalized = $this->normalizeFollowUpText($prompt);
if ($normalized === '') {
return false;
}
if ($this->containsExplicitCommercialFollowUpSignal($normalized)) {
return false;
}
if (mb_strlen($normalized, 'UTF-8') > 180 && !$this->containsStrongFollowUpReference($normalized)) {
return false;
}
return $this->containsStrongFollowUpReference($normalized);
}
private function containsStrongFollowUpReference(string $normalized): bool
{
$patterns = [
'/\bder\s+wert\b/u',
'/\bdieser\s+wert\b/u',
'/\bdiesen\s+wert\b/u',
'/\bdem\s+wert\b/u',
'/\bmit\s+welche(?:m|n|r)?\b/u',
'/\bwomit\b/u',
'/\bdamit\b/u',
'/\bdafuer\b/u',
'/\bdafür\b/u',
'/\bdazu\b/u',
'/\bdaraus\b/u',
'/\bwelche(?:r|s|m|n)?\s+indikator\b/u',
'/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u',
'/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u',
'/\bwelche(?:r|s|m|n)?\s+bereich\b/u',
'/\bwelche(?:r|s|m|n)?\s+messbereich\b/u',
'/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u',
];
foreach ($patterns as $pattern) {
if (preg_match($pattern, $normalized) === 1) {
return true;
}
}
return false;
}
private function containsExplicitCommercialFollowUpSignal(string $normalized): bool
{
$commercialSignals = [
'shop', 'preis', 'preise', 'kostet', 'kosten', 'kaufen', 'bestellen',
'warenkorb', 'lieferzeit', 'verfuegbar', 'verfügbar', 'lager', 'url',
'link', 'artikelnummer', 'sku', 'produktnummer',
];
foreach ($commercialSignals as $signal) {
if (str_contains($normalized, $signal)) {
return true;
}
}
return false;
}
/**
* @return string[]
*/
private function extractRecentUserQuestions(string $history, int $limit): array
{
$history = trim($history);
if ($history === '' || $limit <= 0) {
return [];
}
if (preg_match_all('/^Question:\s*(.+)$/mi', $history, $matches) < 1) {
return [];
}
$questions = array_values(array_filter(
array_map(
fn(string $question): string => $this->sanitizeHistoryQuestion($question),
$matches[1] ?? []
),
static fn(string $question): bool => $question !== ''
));
if ($questions === []) {
return [];
}
return array_slice($questions, -$limit);
}
/**
* Extracts stable reference anchors from the latest assistant answer.
*
* These anchors are only used to resolve follow-up references such as
* "der Wert" or "welcher Indikator". They are not factual evidence for
* the final answer. To avoid propagating wrong earlier answers, only the
* first explicit Testomat model reference and the first explicit °dH value
* are kept. Indicator names, reagent codes, prices, URLs and product
* numbers are intentionally ignored here.
*
* @return string[]
*/
private function extractLatestAssistantReferenceAnchors(string $history): array
{
$turn = $this->extractLatestHistoryTurn($history);
if ($turn === '') {
return [];
}
$answer = preg_replace('/^Question:\s*.*(?:\R|$)/u', '', $turn, 1) ?? '';
$answer = trim($answer);
if ($answer === '') {
return [];
}
$anchors = [];
$model = $this->extractFirstTestomatModelAnchor($answer);
if ($model !== '') {
$anchors[] = $model;
}
$hardnessValue = $this->extractFirstHardnessValueAnchor($answer);
if ($hardnessValue !== '') {
$anchors[] = $hardnessValue;
}
return array_values(array_unique($anchors));
}
private function extractLatestHistoryTurn(string $history): string
{
$history = trim($history);
if ($history === '') {
return '';
}
$parts = preg_split('/(?=^Question:\s)/m', $history);
if ($parts === false || $parts === []) {
return '';
}
$turns = array_values(array_filter(
array_map(static fn(string $part): string => trim($part), $parts),
static fn(string $part): bool => $part !== ''
));
if ($turns === []) {
return '';
}
return (string) end($turns);
}
private function extractFirstTestomatModelAnchor(string $text): string
{
$pattern = '/\bTestomat(?:®)?\s+'
. '(?:\d{3,4}|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)'
. '\b/iu';
if (preg_match($pattern, $text, $matches) !== 1) {
return '';
}
$value = $this->sanitizeHistoryQuestion((string) ($matches[0] ?? ''));
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim(str_replace('®', '', $value));
}
private function extractFirstHardnessValueAnchor(string $text): string
{
if (preg_match('/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu', $text, $matches) !== 1) {
return '';
}
$value = preg_replace('/\s+/u', ' ', (string) ($matches[0] ?? '')) ?? '';
return trim($value);
}
private function sanitizeHistoryQuestion(string $question): string
{
$question = trim((string) preg_replace('/\s+/u', ' ', $question));
if ($question === '') {
return '';
}
if (mb_strlen($question, 'UTF-8') <= 500) {
return $question;
}
return rtrim(mb_substr($question, 0, 497, 'UTF-8')) . '...';
}
private function normalizeFollowUpText(string $value): string
{
$value = mb_strtolower(trim($value), 'UTF-8');
$value = str_replace(['-', '/', '_'], ' ', $value);
$value = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $value) ?? $value;
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim($value);
}
/**
* @return Generator<int, string, mixed, string>
*/
private function buildOptimizedShopQuery(
string $prompt,
string $userId,
string $commerceHistoryContext = ''
): Generator {
$shopPrompt = trim($this->agentRunnerConfig->getShopPrompt(
$prompt,
$commerceHistoryContext
));
if ($shopPrompt === '') {
return '';
}
$optimizedQuery = '';
$lastHeartbeatAt = time();
$this->thinkSuppressor->reset();
try {
foreach ($this->ollamaClient->stream($shopPrompt) as $token) {
if (!is_string($token)) {
continue;
}
if (time() - $lastHeartbeatAt >= 2) {
yield $this->systemMsg('Shop-Suchanfrage wird optimiert…', 'think');
$lastHeartbeatAt = time();
}
$cleanToken = $this->thinkSuppressor->filter($token);
if ($cleanToken === '') {
continue;
}
$optimizedQuery .= $cleanToken;
}
} catch (Throwable $e) {
$this->agentLogger->warning('Shop query optimization failed, falling back to original prompt', [
'userId' => $userId,
'exception' => $e,
]);
return '';
}
return $this->sanitizeOptimizedShopQuery($optimizedQuery, $prompt, $commerceHistoryContext);
}
/**
* @return array{
* results: array,
* attemptedRepair: bool,
* usedRepair: bool,
* repairQueries: string[]
* }
*/
private function repairShopResults(
string $prompt,
string $userId,
string $commerceIntent,
string $commerceHistoryContext,
string $primaryQuery,
array $primaryShopResults,
array $knowledgeChunks
): array {
try {
return $this->searchRepairService->repair(
prompt: $prompt,
commerceIntent: $commerceIntent,
commerceHistoryContext: $commerceHistoryContext,
primaryQuery: $primaryQuery,
primaryShopResults: $primaryShopResults,
knowledgeChunks: $knowledgeChunks
);
} catch (Throwable $e) {
$this->agentLogger->warning('Shop repair failed, continuing with primary shop results', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'primaryQuery' => $primaryQuery,
'primaryShopResultsCount' => count($primaryShopResults),
'exception' => $e,
]);
return [
'results' => $primaryShopResults,
'attemptedRepair' => false,
'usedRepair' => false,
'repairQueries' => [],
];
}
}
private function resolveShopSearchQuery(
string $prompt,
string $optimizedShopQuery,
string $commerceHistoryContext,
string $userId
): string {
$promptIsMetaOnly = $this->isMetaOnlyShopQuery($prompt);
/**
* A pure meta command such as "suche im shop" has no own product
* semantics. In that case the LLM optimizer must not be trusted as the
* primary source because it can copy instruction terms from the query
* prompt itself (for example "Shopware 6"). Resolve meta commands
* deterministically from the recent conversation instead.
*/
if (!$promptIsMetaOnly && $optimizedShopQuery !== '' && !$this->isMetaOnlyShopQuery($optimizedShopQuery)) {
return $optimizedShopQuery;
}
if (!$promptIsMetaOnly) {
return $prompt;
}
$contextQuery = $this->extractContextualShopSearchQuery($commerceHistoryContext);
if ($contextQuery !== '' && !$this->isMetaOnlyShopQuery($contextQuery)) {
return $contextQuery;
}
$extendedHistoryBudget = $this->agentRunnerConfig->getShopQueryContextFallbackHistoryBudgetChars();
if ($extendedHistoryBudget > mb_strlen($commerceHistoryContext, 'UTF-8')) {
$extendedHistory = $this->contextService->buildUserContextWithinBudget($userId, $extendedHistoryBudget);
$extendedContextQuery = $this->extractContextualShopSearchQuery($extendedHistory);
if ($extendedContextQuery !== '' && !$this->isMetaOnlyShopQuery($extendedContextQuery)) {
return $extendedContextQuery;
}
}
if ($this->agentRunnerConfig->shouldUseFullHistoryForShopQueryContextFallback()) {
$fullHistory = $this->contextService->buildUserContext($userId, true);
$fullHistoryContextQuery = $this->extractContextualShopSearchQuery($fullHistory);
if ($fullHistoryContextQuery !== '' && !$this->isMetaOnlyShopQuery($fullHistoryContextQuery)) {
return $fullHistoryContextQuery;
}
}
return '';
}
private function extractContextualShopSearchQuery(string $commerceHistoryContext): string
{
if (!$this->agentRunnerConfig->isShopQueryContextFallbackEnabled()) {
return '';
}
$questions = $this->extractRecentUserQuestions(
$commerceHistoryContext,
$this->agentRunnerConfig->getShopQueryContextFallbackQuestionLimit()
);
for ($i = count($questions) - 1; $i >= 0; $i--) {
$question = trim($questions[$i]);
if ($question === '' || $this->isMetaOnlyShopQuery($question)) {
continue;
}
$contextQuery = $this->buildContextFallbackShopQuery($question);
if ($contextQuery !== '' && !$this->isMetaOnlyShopQuery($contextQuery)) {
return $contextQuery;
}
}
return '';
}
private function buildContextFallbackShopQuery(string $question): string
{
$tokens = $this->tokenizeShopQueryCandidate($question);
if ($tokens === []) {
return '';
}
$filterTerms = [];
foreach (array_merge(
$this->agentRunnerConfig->getShopQueryMetaOnlyTerms(),
$this->agentRunnerConfig->getShopQueryContextFallbackFilterTerms()
) as $term) {
foreach ($this->tokenizeShopQueryCandidate($term) as $token) {
$filterTerms[$token] = true;
}
}
$maxTerms = max(1, $this->agentRunnerConfig->getShopQueryContextFallbackMaxTerms());
$out = [];
foreach ($tokens as $token) {
if (isset($filterTerms[$token])) {
continue;
}
if (in_array($token, $out, true)) {
continue;
}
$out[] = $token;
if (count($out) >= $maxTerms) {
break;
}
}
return implode(' ', $out);
}
/**
* @return string[]
*/
private function tokenizeShopQueryCandidate(string $value): array
{
$value = mb_strtolower(trim($value), 'UTF-8');
$value = str_replace(['-', '/', '_'], ' ', $value);
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', $value, $matches) < 1) {
return [];
}
return array_values(array_filter(
array_map(static fn(string $token): string => trim($token), $matches[0] ?? []),
static fn(string $token): bool => $token !== ''
));
}
private function isMetaOnlyShopQuery(string $query): bool
{
if (!$this->agentRunnerConfig->isShopQueryMetaGuardEnabled()) {
return false;
}
$tokens = $this->tokenizeMetaGuardText($query);
if ($tokens === []) {
return true;
}
$metaTerms = [];
foreach ($this->agentRunnerConfig->getShopQueryMetaOnlyTerms() as $term) {
foreach ($this->tokenizeMetaGuardText($term) as $token) {
$metaTerms[$token] = true;
}
}
if ($metaTerms === []) {
return false;
}
foreach ($tokens as $token) {
if (!isset($metaTerms[$token])) {
return false;
}
}
return true;
}
/**
* @return string[]
*/
private function tokenizeMetaGuardText(string $value): array
{
$value = mb_strtolower(trim($value), 'UTF-8');
$value = str_replace(['-', '/', '_'], ' ', $value);
$value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value;
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
$value = trim($value);
if ($value === '') {
return [];
}
return array_values(array_filter(
explode(' ', $value),
static fn(string $token): bool => $token !== ''
));
}
private function searchShop(
string $query,
string $commerceIntent,
string $userId,
string $commerceHistoryContext = ''
): array {
try {
return $this->shopSearchService->search(
$query,
$commerceIntent,
$commerceHistoryContext
);
} catch (Throwable $e) {
$this->agentLogger->warning('Shop search failed, continuing without shop results', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'query' => $query,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'exception' => $e,
]);
return [];
}
}
private function buildCommerceHistoryContext(string $userId, string $requestContextHint = ''): string
{
$history = $this->contextService->buildUserContextWithinBudget(
$userId,
$this->agentRunnerConfig->getCommerceHistoryBudgetChars()
);
$requestContextHint = $this->sanitizeRequestContextHintForCommerce($requestContextHint);
if ($requestContextHint === '') {
return $history;
}
if ($history === '') {
return $requestContextHint;
}
return trim($history) . "\n\n" . $requestContextHint;
}
private function sanitizeRequestContextHintForCommerce(string $requestContextHint): string
{
$requestContextHint = str_replace(["\r\n", "\r"], "\n", $requestContextHint);
$requestContextHint = preg_replace('/[\t ]+/u', ' ', $requestContextHint) ?? $requestContextHint;
$requestContextHint = preg_replace('/\n{3,}/u', "\n\n", $requestContextHint) ?? $requestContextHint;
$requestContextHint = trim($requestContextHint);
if ($requestContextHint === '') {
return '';
}
if (mb_strlen($requestContextHint, 'UTF-8') > 4000) {
$requestContextHint = mb_substr($requestContextHint, 0, 4000, 'UTF-8');
}
return trim($requestContextHint);
}
private function limitKnowledgeChunks(array $knowledgeChunks, string $commerceIntent): array
{
return match ($commerceIntent) {
CommerceIntentLite::PRODUCT_SEARCH => array_slice(
$knowledgeChunks,
0,
$this->agentRunnerConfig->getProductSearchKnowledgeChunkLimit()
),
CommerceIntentLite::ADVISORY_PRODUCT_SEARCH => array_slice(
$knowledgeChunks,
0,
$this->agentRunnerConfig->getAdvisoryProductSearchKnowledgeChunkLimit()
),
default => $knowledgeChunks,
};
}
private function sanitizeOptimizedShopQuery(
string $query,
string $sourcePrompt = '',
string $commerceHistoryContext = ''
): string {
$query = trim($query);
if ($query === '') {
return '';
}
$query = preg_split('/\R+/u', $query, 2)[0] ?? $query;
$query = preg_replace($this->agentRunnerConfig->getOptimizedShopQueryPrefixPattern(), '', $query) ?? $query;
$query = trim($query, $this->agentRunnerConfig->getOptimizedShopQueryTrimCharacters());
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
$query = $this->preserveOptimizedShopQueryLanguage($query, $sourcePrompt);
$query = $this->enrichReferentialShopQueryFromHistory($query, $sourcePrompt, $commerceHistoryContext);
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
return trim($query);
}
private function enrichReferentialShopQueryFromHistory(
string $query,
string $sourcePrompt,
string $commerceHistoryContext
): string {
if (!$this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled()) {
return $query;
}
if (trim($commerceHistoryContext) === '') {
return $query;
}
$queryTokens = $this->tokenizeShopQueryCandidate($query);
if ($queryTokens === []) {
return $query;
}
$maxTerms = max(1, $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms());
if (count($queryTokens) > $maxTerms) {
return $query;
}
if (!$this->containsConfiguredShopQueryAnchorTrigger(trim($query . ' ' . $sourcePrompt))) {
return $query;
}
$anchor = $this->normalizeShopQueryAnchor(
$this->extractLatestConfiguredShopQueryContextAnchor($commerceHistoryContext)
);
if ($anchor === '' || $this->queryAlreadyContainsAllAnchorTokens($query, $anchor)) {
return $query;
}
$template = $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate();
$enriched = str_replace(['{anchor}', '{query}'], [$anchor, $query], $template);
$enriched = preg_replace('/\s+/u', ' ', $enriched) ?? $enriched;
return trim($enriched) !== '' ? trim($enriched) : $query;
}
private function containsConfiguredShopQueryAnchorTrigger(string $text): bool
{
$tokens = $this->tokenizeShopQueryCandidate($text);
if ($tokens === []) {
return false;
}
$tokenSet = array_fill_keys($tokens, true);
foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() as $term) {
foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) {
if (isset($tokenSet[$termToken])) {
return true;
}
}
}
return false;
}
private function extractLatestConfiguredShopQueryContextAnchor(string $commerceHistoryContext): string
{
$latest = '';
foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns() as $pattern) {
if (@preg_match_all($pattern, $commerceHistoryContext, $matches, PREG_SET_ORDER) === false) {
continue;
}
foreach ($matches as $match) {
$candidate = trim((string) ($match[0] ?? ''));
if ($candidate !== '') {
$latest = $candidate;
}
}
}
return $latest;
}
private function normalizeShopQueryAnchor(string $anchor): string
{
$anchor = str_replace('®', '', $anchor);
$anchor = mb_strtolower(trim($anchor), 'UTF-8');
$anchor = preg_replace('/[^\p{L}\p{N},.%°+\-\s]+/u', ' ', $anchor) ?? $anchor;
$anchor = preg_replace('/\s+/u', ' ', $anchor) ?? $anchor;
return trim($anchor);
}
private function queryAlreadyContainsAllAnchorTokens(string $query, string $anchor): bool
{
$queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($query), true);
foreach ($this->tokenizeShopQueryCandidate($anchor) as $token) {
if (!isset($queryTokens[$token])) {
return false;
}
}
return true;
}
private function preserveOptimizedShopQueryLanguage(string $query, string $sourcePrompt): string
{
if (!$this->agentRunnerConfig->isShopQueryLanguagePreservationEnabled()) {
return $query;
}
$language = $this->detectConfiguredShopQueryLanguage($sourcePrompt);
if ($language === null) {
return $query;
}
$replacements = $this->agentRunnerConfig->getShopQueryTranslationReplacements($language);
if ($replacements === []) {
return $query;
}
foreach ($replacements as $source => $target) {
$pattern = '/(?<![\\p{L}\\p{N}])' . preg_replace('/\\s+/u', '\\s+', preg_quote($source, '/')) . '(?![\\p{L}\\p{N}])/iu';
$query = preg_replace($pattern, $target, $query) ?? $query;
}
return $query;
}
private function detectConfiguredShopQueryLanguage(string $sourcePrompt): ?string
{
$normalized = ' ' . strtolower($sourcePrompt) . ' ';
$normalized = preg_replace('/[\\r\\n\\t]+/u', ' ', $normalized) ?? $normalized;
$normalized = preg_replace('/\\s+/u', ' ', $normalized) ?? $normalized;
foreach ($this->agentRunnerConfig->getShopQueryLanguageMarkers() as $language => $markers) {
foreach ($markers as $marker) {
if ($marker !== '' && str_contains($normalized, $marker)) {
return $language;
}
}
}
return null;
}
/**
* @return Generator<int, string, mixed, string>
*/
private function streamFinalAnswer(string $finalPrompt): Generator
{
$fullOutput = '';
$thinkingNoticeShown = false;
$chunker = new StreamChunker();
$this->thinkSuppressor->reset();
yield $this->systemMsg($this->agentRunnerConfig->getThinkingWhileStreamingMessage(), 'think');
$thinkingNoticeShown = true;
foreach ($this->ollamaClient->stream($finalPrompt) as $token) {
if (!is_string($token)) {
continue;
}
$cleanToken = $this->thinkSuppressor->filter($token);
if ($cleanToken === '') {
if (!$thinkingNoticeShown) {
yield $this->systemMsg($this->agentRunnerConfig->getThinkingWhileStreamingMessage(), 'think');
$thinkingNoticeShown = true;
}
continue;
}
$fullOutput .= $cleanToken;
$chunk = $chunker->push($cleanToken);
if ($chunk !== null) {
yield $this->systemMsg($chunk, 'answer');
}
}
$finalChunk = $chunker->flush();
if ($finalChunk !== null) {
yield $this->systemMsg($finalChunk, 'answer');
} elseif ($fullOutput === '') {
yield $this->systemMsg($this->agentRunnerConfig->getNoLlmDataReceivedMessage(), 'err');
}
return $fullOutput;
}
/**
* @param string[] $sources
*/
private function emitSources(array $sources, string $prefix): string
{
return $this->systemMsg($prefix . implode(' ', $sources), 'info');
}
/**
* @param string[] $sources
*/
private function addSource(array &$sources, string $label): void
{
$badge = $this->badge($label);
if (!in_array($badge, $sources, true)) {
$sources[] = $badge;
}
}
/**
* @param string[] $notices
*/
private function buildHistoryResponse(string $fullOutput, array $notices): string
{
$parts = [];
foreach ($notices as $notice) {
$notice = trim($notice);
if ($notice !== '') {
$parts[] = $notice;
}
}
$fullOutput = trim($fullOutput);
if ($fullOutput !== '') {
$parts[] = $fullOutput;
} else {
$noLlmMessage = $this->plainTextFromHtml($this->agentRunnerConfig->getNoLlmDataReceivedMessage());
if ($noLlmMessage === '') {
$noLlmMessage = 'Es wurden keine Daten vom LLM empfangen.';
}
$parts[] = 'Systemhinweis: ' . $noLlmMessage;
}
return trim(implode("\n\n", $parts));
}
private function buildHistoryNotice(string $title, ?string $detail): string
{
$title = $this->normalizeOneLine($this->plainTextFromHtml($title));
$detail = $this->normalizeOneLine($this->plainTextFromHtml((string) $detail));
if ($title === '') {
$title = 'Systemhinweis';
}
if ($detail === '') {
return 'Systemhinweis: ' . $title . '.';
}
if (mb_strlen($detail, 'UTF-8') > 500) {
$detail = rtrim(mb_substr($detail, 0, 497, 'UTF-8')) . '...';
}
return 'Systemhinweis: ' . $title . '. Ursache: ' . $detail;
}
private function plainTextFromHtml(string $value): string
{
$value = html_entity_decode(strip_tags($value), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim($value);
}
private function buildShopSearchMetaMessage(
string $query,
string $commerceIntent,
bool $usedOptimizedQuery,
string $originalQuery
): string {
$query = $this->normalizeOneLine($query);
$originalQuery = $this->normalizeOneLine($originalQuery);
if ($query === '') {
$query = $originalQuery !== '' ? $originalQuery : 'keine Suchquery ermittelt';
}
$badge = $usedOptimizedQuery ? 'optimiert' : 'direkt';
$intentLabel = $commerceIntent !== '' ? $commerceIntent : 'commerce';
return '<div class="retriex-meta-card retriex-shop-meta">'
. '<div class="retriex-meta-card__eyebrow">Live-Shopdaten</div>'
. '<div class="retriex-meta-card__title">Shop-Suche wird ausgeführt</div>'
. '<div class="retriex-meta-card__body">'
. '<span class="retriex-meta-pill">' . htmlspecialchars($badge, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8') . '</span>'
. '<span class="retriex-meta-pill">Intent: ' . htmlspecialchars($intentLabel, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8') . '</span>'
. '</div>'
. '<div class="retriex-meta-query"><span>Gesendete Suchquery</span><code>'
. htmlspecialchars($query, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
. '</code></div>'
. '</div>';
}
private function buildShopUnavailableMessage(?string $reason): string
{
$reason = $this->normalizeOneLine((string) $reason);
if ($reason === '') {
$reason = 'Keine Detailmeldung vom Shopware-Server.';
}
if (mb_strlen($reason, 'UTF-8') > 320) {
$reason = rtrim(mb_substr($reason, 0, 317, 'UTF-8')) . '...';
}
return '<div class="retriex-alert retriex-alert--warning">'
. '<div class="retriex-alert__icon">⚠️</div>'
. '<div class="retriex-alert__content">'
. '<div class="retriex-alert__title">Shopdaten konnten nicht geladen werden</div>'
. '<div class="retriex-alert__text">RetrieX antwortet ohne Live-Shopdaten weiter. Ursache: '
. htmlspecialchars($reason, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
. '</div>'
. '</div>'
. '</div>';
}
private function normalizeOneLine(string $value): string
{
$value = trim($value);
return preg_replace('/\s+/u', ' ', $value) ?? $value;
}
private function buildUserErrorMessage(Throwable $e): string
{
$message = trim($e->getMessage());
if ($message === '') {
$message = $e::class;
}
$safeMessage = htmlspecialchars($message, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
if (!$this->debug) {
return $this->agentRunnerConfig->getGenericInternalErrorMessage()
. '<br><small>Technischer Fehler: ' . $safeMessage . '</small>';
}
return $this->agentRunnerConfig->getDebugInternalErrorPrefix()
. $safeMessage;
}
private function badge(string $label): string
{
return sprintf(
$this->agentRunnerConfig->getSourceBadgeHtmlTemplate(),
htmlspecialchars($label, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
);
}
private function systemMsg(string $msg, string $type = ''): string
{
if (!$this->systemMsgOn) {
return '';
}
return match ($type) {
'answer' => $msg,
'err' => sprintf($this->agentRunnerConfig->getErrorHtmlTemplate(), $msg),
'think' => sprintf($this->agentRunnerConfig->getThinkHtmlTemplate(), $msg),
'info' => sprintf($this->agentRunnerConfig->getInfoHtmlTemplate(), $msg),
'meta' => $msg,
'debug' => sprintf(
$this->agentRunnerConfig->getDebugHtmlTemplate(),
htmlspecialchars($msg, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
),
default => $msg,
};
}
}