This commit is contained in:
team 1
2026-05-02 19:17:59 +02:00
parent 72241c1d00
commit 0fc34f4bc0
8 changed files with 940 additions and 23 deletions

View File

@@ -42,7 +42,9 @@ final readonly class AgentRunner
public function run(string $prompt, string $userId, bool $forceFullContext = false, string $requestContextHint = ''): Generator
{
$prompt = trim($prompt);
$originalPrompt = trim($prompt);
$prompt = $originalPrompt;
$routingPrompt = $prompt;
if ($prompt === '') {
yield $this->systemMsg($this->agentRunnerConfig->getEmptyPromptMessage(), 'err');
@@ -87,26 +89,37 @@ final readonly class AgentRunner
);
yield $this->systemMsg($this->agentRunnerConfig->getAnalyzeRequestMessage(), 'think');
$normalizedPrompt = yield from $this->normalizePromptForRouting($prompt, $userId);
if ($normalizedPrompt !== $prompt) {
$this->agentLogger->info('Prompt normalized before routing', [
'userId' => $userId,
'originalPrompt' => $prompt,
'normalizedPrompt' => $normalizedPrompt,
]);
$routingPrompt = $normalizedPrompt;
}
yield $this->systemMsg($this->agentRunnerConfig->getCheckInternetSourcesMessage(), 'think');
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($originalPrompt);
if ($urlContent !== '') {
$this->addSource($sources, $this->agentRunnerConfig->getExternalUrlSourceLabel());
}
$commerceIntent = $this->detectCommerceIntent($prompt);
$commerceIntent = $this->detectCommerceIntent($routingPrompt);
yield $this->systemMsg($this->agentRunnerConfig->getRetrieveKnowledgeMessage(), 'think');
$knowledgeRetrievalPrompt = $this->buildKnowledgeRetrievalPrompt(
prompt: $prompt,
prompt: $routingPrompt,
userId: $userId,
commerceIntent: $commerceIntent
);
$usedFollowUpRetrievalContext = $knowledgeRetrievalPrompt !== $prompt;
$usedFollowUpRetrievalContext = $knowledgeRetrievalPrompt !== $routingPrompt;
$knowledgeChunks = $this->retriever->retrieve($knowledgeRetrievalPrompt);
$knowledgeEvidenceState = $this->resolveKnowledgeEvidenceState($prompt, $knowledgeChunks, $urlContent);
$knowledgeEvidenceState = $this->resolveKnowledgeEvidenceState($routingPrompt, $knowledgeChunks, $urlContent);
if ($knowledgeChunks !== []) {
$this->addSource($sources, $this->agentRunnerConfig->getRagKnowledgeSourceLabel());
}
@@ -127,6 +140,7 @@ final readonly class AgentRunner
$this->agentLogger->info('Knowledge retrieval used follow-up context', [
'userId' => $userId,
'prompt' => $prompt,
'routingPrompt' => $routingPrompt,
'knowledgeRetrievalPrompt' => $knowledgeRetrievalPrompt,
'commerceIntent' => $commerceIntent,
]);
@@ -154,13 +168,13 @@ final readonly class AgentRunner
}
$optimizedShopQuery = yield from $this->buildOptimizedShopQuery(
$prompt,
$routingPrompt,
$userId,
$commerceHistoryContext
);
$shopSearchQuery = $this->resolveShopSearchQuery(
prompt: $prompt,
prompt: $routingPrompt,
optimizedShopQuery: $optimizedShopQuery,
commerceHistoryContext: $commerceHistoryContext,
userId: $userId
@@ -171,6 +185,7 @@ final readonly class AgentRunner
'userId' => $userId,
'commerceIntent' => $commerceIntent,
'prompt' => $prompt,
'routingPrompt' => $routingPrompt,
'optimizedShopQuery' => $optimizedShopQuery,
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
@@ -199,7 +214,7 @@ final readonly class AgentRunner
$this->contextService->appendHistory(
$userId,
$prompt,
$originalPrompt,
$this->plainTextFromHtml($noConcreteShopQueryMessage)
);
@@ -484,7 +499,7 @@ final readonly class AgentRunner
if ($historyResponse !== '') {
$this->contextService->appendHistory(
$userId,
$prompt,
$originalPrompt,
$historyResponse
);
}
@@ -494,6 +509,10 @@ final readonly class AgentRunner
'outputLength' => mb_strlen($fullOutput),
'contextMode' => $forceFullContext ? 'full' : 'recent',
'commerceIntent' => $commerceIntent,
'originalPrompt' => $originalPrompt,
'effectivePrompt' => $prompt,
'routingPrompt' => $routingPrompt,
'promptWasNormalized' => $routingPrompt !== $originalPrompt,
'primaryShopResultsCount' => count($primaryShopResults),
'shopResultsCount' => count($shopResults),
'attemptedShopRepair' => $attemptedShopRepair,
@@ -539,11 +558,374 @@ final readonly class AgentRunner
));
if ($historyResponse !== '') {
$this->contextService->appendHistory($userId, $prompt, $historyResponse);
$this->contextService->appendHistory($userId, $originalPrompt, $historyResponse);
}
}
}
/**
* @return Generator<int, string, mixed, string>
*/
private function normalizePromptForRouting(string $prompt, string $userId): Generator
{
if (!$this->agentRunnerConfig->isInputNormalizationEnabled()) {
return $prompt;
}
if ($this->shouldSkipInputNormalization($prompt)) {
return $prompt;
}
$normalizationPrompt = trim($this->agentRunnerConfig->getInputNormalizationPrompt($prompt));
if ($normalizationPrompt === '') {
return $prompt;
}
$candidate = '';
$lastHeartbeatAt = time();
$this->thinkSuppressor->reset();
try {
foreach ($this->ollamaClient->stream($normalizationPrompt) as $token) {
if (!is_string($token)) {
continue;
}
if (time() - $lastHeartbeatAt >= 2) {
yield $this->systemMsg($this->agentRunnerConfig->getInputNormalizationHeartbeatMessage(), 'think');
$lastHeartbeatAt = time();
}
$cleanToken = $this->thinkSuppressor->filter($token);
if ($cleanToken === '') {
continue;
}
$candidate .= $cleanToken;
}
} catch (Throwable $e) {
$this->agentLogger->warning('Prompt normalization failed, falling back to fuzzy routing-signal normalization', [
'userId' => $userId,
'exception' => $e,
]);
return $this->applyFuzzyRoutingSignalNormalization($prompt, $prompt);
}
$normalized = $this->sanitizeNormalizedPromptForRouting($candidate, $prompt);
return $this->applyFuzzyRoutingSignalNormalization($normalized, $prompt);
}
private function shouldSkipInputNormalization(string $prompt): bool
{
if (mb_strlen($prompt, 'UTF-8') > $this->agentRunnerConfig->getInputNormalizationMaxInputChars()) {
return true;
}
foreach ($this->agentRunnerConfig->getInputNormalizationSkipPatterns() as $pattern) {
if (@preg_match($pattern, $prompt) === 1) {
return true;
}
}
return false;
}
private function sanitizeNormalizedPromptForRouting(string $candidate, string $originalPrompt): string
{
$candidate = trim($candidate);
if ($candidate === '') {
return $originalPrompt;
}
$candidate = preg_split('/\R{2,}/u', $candidate, 2)[0] ?? $candidate;
$candidate = trim($candidate);
$candidate = preg_replace($this->agentRunnerConfig->getInputNormalizationOutputPrefixPattern(), '', $candidate) ?? $candidate;
$candidate = trim($candidate, $this->agentRunnerConfig->getOptimizedShopQueryTrimCharacters());
$candidate = preg_replace('/\s+/u', ' ', $candidate) ?? $candidate;
$candidate = trim($candidate);
if ($candidate === '') {
return $originalPrompt;
}
if (mb_strlen($candidate, 'UTF-8') > $this->agentRunnerConfig->getInputNormalizationMaxOutputChars()) {
return $originalPrompt;
}
if ($this->normalizeRoutingComparisonText($candidate) === $this->normalizeRoutingComparisonText($originalPrompt)) {
return $originalPrompt;
}
if (!$this->isSafeNormalizedPromptCandidate($candidate, $originalPrompt)) {
return $originalPrompt;
}
return $candidate;
}
private function applyFuzzyRoutingSignalNormalization(string $candidate, string $originalPrompt): string
{
if (!$this->agentRunnerConfig->isInputNormalizationFuzzyRoutingEnabled()) {
return $candidate;
}
$terms = $this->buildFuzzyRoutingTermIndex();
if ($terms === []) {
return $candidate;
}
$minLength = $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMinTokenLength();
$changed = false;
$normalized = preg_replace_callback(
'/(?<![\p{L}\p{N}])[\p{L}][\p{L}\p{N}\-]{' . max(0, $minLength - 1) . ',}(?![\p{L}\p{N}])/u',
function (array $matches) use ($terms, &$changed): string {
$token = (string) ($matches[0] ?? '');
$replacement = $this->resolveFuzzyRoutingTokenReplacement($token, $terms);
if ($replacement === null || $replacement === $token) {
return $token;
}
$changed = true;
return $replacement;
},
$candidate
);
if (!is_string($normalized) || !$changed) {
return $candidate;
}
$normalized = preg_replace('/\s+/u', ' ', trim($normalized)) ?? trim($normalized);
if ($normalized === '' || $this->normalizeRoutingComparisonText($normalized) === $this->normalizeRoutingComparisonText($candidate)) {
return $candidate;
}
if (!$this->isSafeNormalizedPromptCandidate($normalized, $originalPrompt)) {
return $candidate;
}
return $normalized;
}
/**
* @return array<string, string>
*/
private function buildFuzzyRoutingTermIndex(): array
{
$terms = [];
foreach ($this->agentRunnerConfig->getInputNormalizationFuzzyRoutingTerms() as $term) {
$term = trim($term);
if ($term === '') {
continue;
}
$normalized = $this->normalizeFuzzyRoutingToken($term);
if ($normalized === '') {
continue;
}
$terms[$normalized] ??= mb_strtolower($term, 'UTF-8');
}
return $terms;
}
/**
* @param array<string, string> $terms
*/
private function resolveFuzzyRoutingTokenReplacement(string $token, array $terms): ?string
{
$normalizedToken = $this->normalizeFuzzyRoutingToken($token);
if ($normalizedToken === '' || isset($terms[$normalizedToken])) {
return null;
}
$bestTerm = null;
$bestDistance = PHP_INT_MAX;
$ambiguous = false;
$tokenLength = max(1, strlen($normalizedToken));
foreach ($terms as $normalizedTerm => $term) {
$termLength = strlen($normalizedTerm);
if (abs($tokenLength - $termLength) > $this->resolveFuzzyRoutingMaxDistance(max($tokenLength, $termLength))) {
continue;
}
$distance = $this->calculateFuzzyRoutingDistance($normalizedToken, $normalizedTerm);
$maxLength = max($tokenLength, $termLength);
$maxDistance = $this->resolveFuzzyRoutingMaxDistance($maxLength);
if ($distance > $maxDistance) {
continue;
}
$similarityPercent = (int) round((1 - ($distance / max(1, $maxLength))) * 100);
if ($similarityPercent < $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMinSimilarityPercent()) {
continue;
}
if ($distance < $bestDistance) {
$bestDistance = $distance;
$bestTerm = $term;
$ambiguous = false;
continue;
}
if ($distance === $bestDistance && $term !== $bestTerm) {
$ambiguous = true;
}
}
if ($bestTerm === null || $ambiguous) {
return null;
}
return $bestTerm;
}
private function calculateFuzzyRoutingDistance(string $left, string $right): int
{
$leftLength = strlen($left);
$rightLength = strlen($right);
if ($leftLength === 0) {
return $rightLength;
}
if ($rightLength === 0) {
return $leftLength;
}
$distance = [];
for ($i = 0; $i <= $leftLength; $i++) {
$distance[$i] = [$i];
}
for ($j = 0; $j <= $rightLength; $j++) {
$distance[0][$j] = $j;
}
for ($i = 1; $i <= $leftLength; $i++) {
for ($j = 1; $j <= $rightLength; $j++) {
$cost = $left[$i - 1] === $right[$j - 1] ? 0 : 1;
$distance[$i][$j] = min(
$distance[$i - 1][$j] + 1,
$distance[$i][$j - 1] + 1,
$distance[$i - 1][$j - 1] + $cost
);
if (
$i > 1
&& $j > 1
&& $left[$i - 1] === $right[$j - 2]
&& $left[$i - 2] === $right[$j - 1]
) {
$distance[$i][$j] = min($distance[$i][$j], $distance[$i - 2][$j - 2] + 1);
}
}
}
return $distance[$leftLength][$rightLength];
}
private function resolveFuzzyRoutingMaxDistance(int $tokenLength): int
{
if ($tokenLength >= $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingLongTokenLength()) {
return $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMaxDistanceLong();
}
if ($tokenLength >= $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMediumTokenLength()) {
return $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMaxDistanceMedium();
}
return $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingMaxDistanceShort();
}
private function normalizeFuzzyRoutingToken(string $token): string
{
$token = mb_strtolower(trim($token), 'UTF-8');
$token = strtr($token, [
'ä' => 'ae',
'ö' => 'oe',
'ü' => 'ue',
'ß' => 'ss',
]);
$token = preg_replace('/[^a-z0-9]+/u', '', $token) ?? $token;
return trim($token);
}
private function isSafeNormalizedPromptCandidate(string $candidate, string $originalPrompt): bool
{
$originalLength = max(1, mb_strlen($originalPrompt, 'UTF-8'));
$candidateLength = mb_strlen($candidate, 'UTF-8');
$maxLength = (int) ceil($originalLength * ($this->agentRunnerConfig->getInputNormalizationMaxLengthRatioPercent() / 100));
if ($candidateLength > $maxLength) {
return false;
}
$originalTokens = $this->tokenizeInputNormalizationGuardText($originalPrompt);
$candidateTokens = $this->tokenizeInputNormalizationGuardText($candidate);
$maxAddedTokens = $this->agentRunnerConfig->getInputNormalizationMaxAddedTokens();
if (count($candidateTokens) > count($originalTokens) + $maxAddedTokens) {
return false;
}
$originalNumbers = $this->extractInputNormalizationNumbers($originalPrompt);
foreach ($this->extractInputNormalizationNumbers($candidate) as $number) {
if (!in_array($number, $originalNumbers, true)) {
return false;
}
}
return true;
}
private function normalizeRoutingComparisonText(string $value): string
{
$value = mb_strtolower(trim($value), 'UTF-8');
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim($value);
}
/**
* @return string[]
*/
private function tokenizeInputNormalizationGuardText(string $value): array
{
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', mb_strtolower($value, 'UTF-8'), $matches) !== 1) {
return [];
}
return array_values(array_filter(
array_map(static fn(string $token): string => trim($token), $matches[0] ?? []),
static fn(string $token): bool => $token !== ''
));
}
/**
* @return string[]
*/
private function extractInputNormalizationNumbers(string $value): array
{
if (preg_match_all('/\d+(?:[,.]\d+)?/u', $value, $matches) !== 1) {
return [];
}
return array_values(array_unique(array_map(
static fn(string $number): string => str_replace(',', '.', $number),
$matches[0] ?? []
)));
}
private function detectCommerceIntent(string $prompt): string
{
$commerceMeta = $this->commerceIntentLite->detect($prompt);