Files
MtoRagSystem/src/Agent/AgentRunner.php

792 lines
26 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Agent;
use App\Commerce\SearchRepairService;
use App\Commerce\ShopSearchService;
use App\Config\AgentRunnerConfig;
use App\Context\ContextService;
use App\Context\UrlAnalyzer;
use App\Infrastructure\OllamaClient;
use App\Intent\CommerceIntentLite;
use App\Knowledge\Retrieval\RetrieverInterface;
use Generator;
use Psr\Log\LoggerInterface;
use Throwable;
final readonly class AgentRunner
{
private bool $systemMsgOn;
public function __construct(
private PromptBuilder $promptBuilder,
private ThinkSuppressor $thinkSuppressor,
private ContextService $contextService,
private UrlAnalyzer $urlAnalyzer,
private RetrieverInterface $retriever,
private ShopSearchService $shopSearchService,
private SearchRepairService $searchRepairService,
private CommerceIntentLite $commerceIntentLite,
private OllamaClient $ollamaClient,
private LoggerInterface $agentLogger,
private AgentRunnerConfig $agentRunnerConfig,
private bool $debug,
private bool $logPrompt,
private bool $logContext,
) {
$this->systemMsgOn = true;
}
public function run(string $prompt, string $userId, bool $forceFullContext = false): Generator
{
$prompt = trim($prompt);
if ($prompt === '') {
yield $this->systemMsg($this->agentRunnerConfig->getEmptyPromptMessage(), 'err');
return;
}
$shopResults = [];
$primaryShopResults = [];
$sources = [];
$optimizedShopQuery = '';
$shopSearchQuery = '';
$commerceIntent = CommerceIntentLite::NONE;
$knowledgeRetrievalPrompt = $prompt;
$usedFollowUpRetrievalContext = false;
$commerceHistoryContext = '';
$attemptedShopRepair = false;
$usedShopRepair = false;
$shopRepairQueries = [];
$this->agentLogger->info('Agent run started', [
'userId' => $userId,
]);
try {
if ($forceFullContext) {
// Full context mode is already passed to PromptBuilder.
// Additional context strategies can be added here later.
}
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeRequestMessage(), 'think');
yield $this->systemMsg($this->agentRunnerConfig->getCheckInternetSourcesMessage(), 'think');
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
if ($urlContent !== '') {
$this->addSource($sources, $this->agentRunnerConfig->getExternalUrlSourceLabel());
}
$commerceIntent = $this->detectCommerceIntent($prompt);
yield $this->systemMsg($this->agentRunnerConfig->getRetrieveKnowledgeMessage(), 'think');
$knowledgeRetrievalPrompt = $this->buildKnowledgeRetrievalPrompt(
prompt: $prompt,
userId: $userId,
commerceIntent: $commerceIntent
);
$usedFollowUpRetrievalContext = $knowledgeRetrievalPrompt !== $prompt;
$knowledgeChunks = $this->retriever->retrieve($knowledgeRetrievalPrompt);
if ($knowledgeChunks !== []) {
$this->addSource($sources, $this->agentRunnerConfig->getRagKnowledgeSourceLabel());
}
if ($usedFollowUpRetrievalContext) {
$this->agentLogger->info('Knowledge retrieval used follow-up context', [
'userId' => $userId,
'prompt' => $prompt,
'knowledgeRetrievalPrompt' => $knowledgeRetrievalPrompt,
'commerceIntent' => $commerceIntent,
]);
}
if ($this->isCommerceIntent($commerceIntent)) {
yield $this->systemMsg($this->agentRunnerConfig->getOptimizeSearchMessage(), 'think');
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId);
if ($commerceHistoryContext !== '') {
$this->addSource($sources, $this->agentRunnerConfig->getConversationHistorySourceLabel());
}
$optimizedShopQuery = $this->buildOptimizedShopQuery(
$prompt,
$userId,
$commerceHistoryContext
);
$shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt;
$this->agentLogger->info('Commerce search prepared', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'usedOptimizedShopQuery' => $optimizedShopQuery !== '',
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
]);
yield $this->systemMsg(
sprintf($this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), $commerceIntent),
'think'
);
$primaryShopResults = $this->searchShop(
$shopSearchQuery,
$commerceIntent,
$userId,
$commerceHistoryContext
);
$repairPayload = $this->repairShopResults(
prompt: $prompt,
userId: $userId,
commerceIntent: $commerceIntent,
commerceHistoryContext: $commerceHistoryContext,
primaryQuery: $shopSearchQuery,
primaryShopResults: $primaryShopResults,
knowledgeChunks: $knowledgeChunks
);
$shopResults = $repairPayload['results'];
$attemptedShopRepair = $repairPayload['attemptedRepair'];
$usedShopRepair = $repairPayload['usedRepair'];
$shopRepairQueries = $repairPayload['repairQueries'];
if ($shopResults !== []) {
$this->addSource($sources, $this->agentRunnerConfig->getShopSystemSourceLabel());
}
if ($attemptedShopRepair) {
$this->addSource($sources, $this->agentRunnerConfig->getExtendedShopSearchSourceLabel());
}
}
if ($shopResults !== []) {
$knowledgeChunks = $this->limitKnowledgeChunks($knowledgeChunks, $commerceIntent);
}
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeAllInformationMessage(), 'think');
$finalPrompt = $this->promptBuilder->build(
prompt: $prompt,
userId: $userId,
urlContent: $urlContent,
knowledgeChunks: $knowledgeChunks,
shopResults: $shopResults,
fullContext: $forceFullContext,
swagFullOutPut: $optimizedShopQuery
);
if ($this->debug && $this->logPrompt) {
$this->agentLogger->debug('Final prompt', [
'userId' => $userId,
'finalPrompt' => $finalPrompt,
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'knowledgeRetrievalPrompt' => $knowledgeRetrievalPrompt,
'usedFollowUpRetrievalContext' => $usedFollowUpRetrievalContext,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'attemptedShopRepair' => $attemptedShopRepair,
'usedShopRepair' => $usedShopRepair,
'shopRepairQueries' => $shopRepairQueries,
]);
}
if ($this->debug && $this->logContext) {
$this->agentLogger->debug('Conversation context snapshot', [
'userId' => $userId,
'context' => $this->contextService->buildUserContext(
$userId,
$forceFullContext
),
]);
}
if ($sources !== []) {
yield $this->emitSources(
$sources,
$this->agentRunnerConfig->getUsedSourcesPrefix()
);
}
$fullOutput = yield from $this->streamFinalAnswer($finalPrompt);
if ($sources !== []) {
yield $this->emitSources(
$sources,
$this->agentRunnerConfig->getSourcesPrefix()
);
}
if ($this->debug) {
yield $this->systemMsg($finalPrompt, 'debug');
}
if ($fullOutput !== '') {
$this->contextService->appendHistory(
$userId,
$prompt,
$fullOutput
);
}
$this->agentLogger->info('Agent run finished', [
'userId' => $userId,
'outputLength' => mb_strlen($fullOutput),
'contextMode' => $forceFullContext ? 'full' : 'recent',
'commerceIntent' => $commerceIntent,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'attemptedShopRepair' => $attemptedShopRepair,
'usedShopRepair' => $usedShopRepair,
'shopRepairQueries' => $shopRepairQueries,
'knowledgeChunkCount' => count($knowledgeChunks),
'knowledgeRetrievalPrompt' => $knowledgeRetrievalPrompt,
'usedFollowUpRetrievalContext' => $usedFollowUpRetrievalContext,
'hasUrlContent' => $urlContent !== '',
'usedOptimizedShopQuery' => $optimizedShopQuery !== '',
'optimizedShopQuery' => $optimizedShopQuery,
'shopSearchQuery' => $shopSearchQuery,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
]);
} catch (Throwable $e) {
$this->agentLogger->error('Agent run failed', [
'userId' => $userId,
'exception' => $e,
]);
yield $this->systemMsg($this->buildUserErrorMessage($e), 'err');
}
}
private function detectCommerceIntent(string $prompt): string
{
$commerceMeta = $this->commerceIntentLite->detect($prompt);
return (string) ($commerceMeta['intent'] ?? CommerceIntentLite::NONE);
}
private function isCommerceIntent(string $commerceIntent): bool
{
return $commerceIntent === CommerceIntentLite::PRODUCT_SEARCH
|| $commerceIntent === CommerceIntentLite::ADVISORY_PRODUCT_SEARCH;
}
private function buildKnowledgeRetrievalPrompt(
string $prompt,
string $userId,
string $commerceIntent
): string {
if (!$this->shouldUseFollowUpContextForKnowledgeRetrieval($prompt, $commerceIntent)) {
return $prompt;
}
$history = $this->contextService->buildUserContextWithinBudget($userId, 3000);
$previousQuestions = $this->extractRecentUserQuestions($history, 2);
$referenceAnchors = $this->extractLatestAssistantReferenceAnchors($history);
if ($previousQuestions === [] && $referenceAnchors === []) {
return $prompt;
}
$lines = [];
foreach ($previousQuestions as $question) {
$lines[] = 'Vorherige Nutzerfrage: ' . $question;
}
if ($referenceAnchors !== []) {
$lines[] = 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): '
. implode(' ', $referenceAnchors);
}
$lines[] = 'Aktuelle Folgefrage: ' . $prompt;
return implode("\n", $lines);
}
private function shouldUseFollowUpContextForKnowledgeRetrieval(string $prompt, string $commerceIntent): bool
{
if ($this->isCommerceIntent($commerceIntent)) {
return false;
}
$normalized = $this->normalizeFollowUpText($prompt);
if ($normalized === '') {
return false;
}
if ($this->containsExplicitCommercialFollowUpSignal($normalized)) {
return false;
}
if (mb_strlen($normalized, 'UTF-8') > 180 && !$this->containsStrongFollowUpReference($normalized)) {
return false;
}
return $this->containsStrongFollowUpReference($normalized);
}
private function containsStrongFollowUpReference(string $normalized): bool
{
$patterns = [
'/\bder\s+wert\b/u',
'/\bdieser\s+wert\b/u',
'/\bdiesen\s+wert\b/u',
'/\bdem\s+wert\b/u',
'/\bmit\s+welche(?:m|n|r)?\b/u',
'/\bwomit\b/u',
'/\bdamit\b/u',
'/\bdafuer\b/u',
'/\bdafür\b/u',
'/\bdazu\b/u',
'/\bdaraus\b/u',
'/\bwelche(?:r|s|m|n)?\s+indikator\b/u',
'/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u',
'/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u',
'/\bwelche(?:r|s|m|n)?\s+bereich\b/u',
'/\bwelche(?:r|s|m|n)?\s+messbereich\b/u',
'/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u',
];
foreach ($patterns as $pattern) {
if (preg_match($pattern, $normalized) === 1) {
return true;
}
}
return false;
}
private function containsExplicitCommercialFollowUpSignal(string $normalized): bool
{
$commercialSignals = [
'shop', 'preis', 'preise', 'kostet', 'kosten', 'kaufen', 'bestellen',
'warenkorb', 'lieferzeit', 'verfuegbar', 'verfügbar', 'lager', 'url',
'link', 'artikelnummer', 'sku', 'produktnummer',
];
foreach ($commercialSignals as $signal) {
if (str_contains($normalized, $signal)) {
return true;
}
}
return false;
}
/**
* @return string[]
*/
private function extractRecentUserQuestions(string $history, int $limit): array
{
$history = trim($history);
if ($history === '' || $limit <= 0) {
return [];
}
if (preg_match_all('/^Question:\s*(.+)$/mi', $history, $matches) !== 1) {
return [];
}
$questions = array_values(array_filter(
array_map(
fn(string $question): string => $this->sanitizeHistoryQuestion($question),
$matches[1] ?? []
),
static fn(string $question): bool => $question !== ''
));
if ($questions === []) {
return [];
}
return array_slice($questions, -$limit);
}
/**
* Extracts stable reference anchors from the latest assistant answer.
*
* These anchors are only used to resolve follow-up references such as
* "der Wert" or "welcher Indikator". They are not factual evidence for
* the final answer. To avoid propagating wrong earlier answers, only the
* first explicit Testomat model reference and the first explicit °dH value
* are kept. Indicator names, reagent codes, prices, URLs and product
* numbers are intentionally ignored here.
*
* @return string[]
*/
private function extractLatestAssistantReferenceAnchors(string $history): array
{
$turn = $this->extractLatestHistoryTurn($history);
if ($turn === '') {
return [];
}
$answer = preg_replace('/^Question:\s*.*(?:\R|$)/u', '', $turn, 1) ?? '';
$answer = trim($answer);
if ($answer === '') {
return [];
}
$anchors = [];
$model = $this->extractFirstTestomatModelAnchor($answer);
if ($model !== '') {
$anchors[] = $model;
}
$hardnessValue = $this->extractFirstHardnessValueAnchor($answer);
if ($hardnessValue !== '') {
$anchors[] = $hardnessValue;
}
return array_values(array_unique($anchors));
}
private function extractLatestHistoryTurn(string $history): string
{
$history = trim($history);
if ($history === '') {
return '';
}
$parts = preg_split('/(?=^Question:\s)/m', $history);
if ($parts === false || $parts === []) {
return '';
}
$turns = array_values(array_filter(
array_map(static fn(string $part): string => trim($part), $parts),
static fn(string $part): bool => $part !== ''
));
if ($turns === []) {
return '';
}
return (string) end($turns);
}
private function extractFirstTestomatModelAnchor(string $text): string
{
$pattern = '/\bTestomat(?:®)?\s+'
. '(?:\d{3,4}|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)'
. '\b/iu';
if (preg_match($pattern, $text, $matches) !== 1) {
return '';
}
$value = $this->sanitizeHistoryQuestion((string) ($matches[0] ?? ''));
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim(str_replace('®', '', $value));
}
private function extractFirstHardnessValueAnchor(string $text): string
{
if (preg_match('/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu', $text, $matches) !== 1) {
return '';
}
$value = preg_replace('/\s+/u', ' ', (string) ($matches[0] ?? '')) ?? '';
return trim($value);
}
private function sanitizeHistoryQuestion(string $question): string
{
$question = trim((string) preg_replace('/\s+/u', ' ', $question));
if ($question === '') {
return '';
}
if (mb_strlen($question, 'UTF-8') <= 500) {
return $question;
}
return rtrim(mb_substr($question, 0, 497, 'UTF-8')) . '...';
}
private function normalizeFollowUpText(string $value): string
{
$value = mb_strtolower(trim($value), 'UTF-8');
$value = str_replace(['-', '/', '_'], ' ', $value);
$value = preg_replace('/[^\p{L}\p{N}\s]+/u', ' ', $value) ?? $value;
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim($value);
}
private function buildOptimizedShopQuery(
string $prompt,
string $userId,
string $commerceHistoryContext = ''
): string {
$shopPrompt = trim($this->agentRunnerConfig->getShopPrompt(
$prompt,
$commerceHistoryContext
));
if ($shopPrompt === '') {
return '';
}
$optimizedQuery = '';
$this->thinkSuppressor->reset();
try {
foreach ($this->ollamaClient->stream($shopPrompt) as $token) {
if (!is_string($token)) {
continue;
}
$cleanToken = $this->thinkSuppressor->filter($token);
if ($cleanToken === '') {
continue;
}
$optimizedQuery .= $cleanToken;
}
} catch (Throwable $e) {
$this->agentLogger->warning('Shop query optimization failed, falling back to original prompt', [
'userId' => $userId,
'exception' => $e,
]);
return '';
}
return $this->sanitizeOptimizedShopQuery($optimizedQuery);
}
/**
* @return array{
* results: array,
* attemptedRepair: bool,
* usedRepair: bool,
* repairQueries: string[]
* }
*/
private function repairShopResults(
string $prompt,
string $userId,
string $commerceIntent,
string $commerceHistoryContext,
string $primaryQuery,
array $primaryShopResults,
array $knowledgeChunks
): array {
try {
return $this->searchRepairService->repair(
prompt: $prompt,
commerceIntent: $commerceIntent,
commerceHistoryContext: $commerceHistoryContext,
primaryQuery: $primaryQuery,
primaryShopResults: $primaryShopResults,
knowledgeChunks: $knowledgeChunks
);
} catch (Throwable $e) {
$this->agentLogger->warning('Shop repair failed, continuing with primary shop results', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'primaryQuery' => $primaryQuery,
'primaryShopResultsCount' => count($primaryShopResults),
'exception' => $e,
]);
return [
'results' => $primaryShopResults,
'attemptedRepair' => false,
'usedRepair' => false,
'repairQueries' => [],
];
}
}
private function searchShop(
string $query,
string $commerceIntent,
string $userId,
string $commerceHistoryContext = ''
): array {
try {
return $this->shopSearchService->search(
$query,
$commerceIntent,
$commerceHistoryContext
);
} catch (Throwable $e) {
$this->agentLogger->warning('Shop search failed, continuing without shop results', [
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'query' => $query,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
'exception' => $e,
]);
return [];
}
}
private function buildCommerceHistoryContext(string $userId): string
{
return $this->contextService->buildUserContextWithinBudget(
$userId,
$this->agentRunnerConfig->getCommerceHistoryBudgetChars()
);
}
private function limitKnowledgeChunks(array $knowledgeChunks, string $commerceIntent): array
{
return match ($commerceIntent) {
CommerceIntentLite::PRODUCT_SEARCH => array_slice(
$knowledgeChunks,
0,
$this->agentRunnerConfig->getProductSearchKnowledgeChunkLimit()
),
CommerceIntentLite::ADVISORY_PRODUCT_SEARCH => array_slice(
$knowledgeChunks,
0,
$this->agentRunnerConfig->getAdvisoryProductSearchKnowledgeChunkLimit()
),
default => $knowledgeChunks,
};
}
private function sanitizeOptimizedShopQuery(string $query): string
{
$query = trim($query);
if ($query === '') {
return '';
}
$query = preg_split('/\R+/u', $query, 2)[0] ?? $query;
$query = preg_replace($this->agentRunnerConfig->getOptimizedShopQueryPrefixPattern(), '', $query) ?? $query;
$query = trim($query, $this->agentRunnerConfig->getOptimizedShopQueryTrimCharacters());
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
return trim($query);
}
/**
* @return Generator<int, string, mixed, string>
*/
private function streamFinalAnswer(string $finalPrompt): Generator
{
$fullOutput = '';
$firstThinkLoop = true;
$chunker = new StreamChunker();
$this->thinkSuppressor->reset();
foreach ($this->ollamaClient->stream($finalPrompt) as $token) {
if (!is_string($token)) {
continue;
}
$cleanToken = $this->thinkSuppressor->filter($token);
if ($cleanToken === '') {
if ($firstThinkLoop) {
yield $this->systemMsg($this->agentRunnerConfig->getThinkingWhileStreamingMessage(), 'think');
$firstThinkLoop = false;
}
continue;
}
$fullOutput .= $cleanToken;
$chunk = $chunker->push($cleanToken);
if ($chunk !== null) {
yield $this->systemMsg($chunk, 'answer');
}
}
$finalChunk = $chunker->flush();
if ($finalChunk !== null) {
yield $this->systemMsg($finalChunk, 'answer');
} elseif ($fullOutput === '') {
yield $this->systemMsg($this->agentRunnerConfig->getNoLlmDataReceivedMessage(), 'err');
}
return $fullOutput;
}
/**
* @param string[] $sources
*/
private function emitSources(array $sources, string $prefix): string
{
return $this->systemMsg($prefix . implode(' ', $sources), 'info');
}
/**
* @param string[] $sources
*/
private function addSource(array &$sources, string $label): void
{
$badge = $this->badge($label);
if (!in_array($badge, $sources, true)) {
$sources[] = $badge;
}
}
private function buildUserErrorMessage(Throwable $e): string
{
if (!$this->debug) {
return $this->agentRunnerConfig->getGenericInternalErrorMessage();
}
return $this->agentRunnerConfig->getDebugInternalErrorPrefix()
. htmlspecialchars($e->getMessage(), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
}
private function badge(string $label): string
{
return sprintf(
$this->agentRunnerConfig->getSourceBadgeHtmlTemplate(),
htmlspecialchars($label, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
);
}
private function systemMsg(string $msg, string $type = ''): string
{
if (!$this->systemMsgOn) {
return '';
}
return match ($type) {
'answer' => $msg,
'err' => sprintf($this->agentRunnerConfig->getErrorHtmlTemplate(), $msg),
'think' => sprintf($this->agentRunnerConfig->getThinkHtmlTemplate(), $msg),
'info' => sprintf($this->agentRunnerConfig->getInfoHtmlTemplate(), $msg),
'debug' => sprintf(
$this->agentRunnerConfig->getDebugHtmlTemplate(),
htmlspecialchars($msg, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8')
),
default => $msg,
};
}
}