diff --git a/RETRIEX_PATCH_44_RUNTIME_ANSWER_GUARD_README.md b/RETRIEX_PATCH_44_RUNTIME_ANSWER_GUARD_README.md new file mode 100644 index 0000000..38c76d3 --- /dev/null +++ b/RETRIEX_PATCH_44_RUNTIME_ANSWER_GUARD_README.md @@ -0,0 +1,134 @@ +# RetrieX Patch 44 - Runtime Answer Guard / Shop Follow-up Cleanup + +## Ziel + +Dieser Patch behandelt drei Fehler außerhalb der p43 YAML-/Listen-/Maps-Konsolidierung: + +1. Sporadische endlos lange Produktlisten, bei denen ein letztes Shop-Produkt wiederholt wird. +2. Falscher einleitender Satz bei direkten Zubehör-/Kabel-Shopfragen wie `zeige mir Anschlusskabel für pH/Redox`. +3. Falsche Shop-Follow-up-Query `schaue` bei `schaue im shop` nach einer vorherigen fachlichen Frage. + +## Änderungen + +### 1. Runtime-Schutz gegen Endlosantworten + +Dateien: + +- `src/Agent/AgentRunner.php` +- `src/Config/AgentRunnerConfig.php` +- `config/retriex/agent.yaml` + +Änderung: + +- Neuer konfigurierbarer `final_answer_guard`. +- Stoppt die finale LLM-Ausgabe, wenn eine Antwort zu lang wird oder dieselbe relevante Antwortzeile zu oft wiederholt wird. +- Der Guard läuft im finalen Streaming-Pfad und ist generisch: keine Chlor-, Schwimmbad-, Testomat- oder Produkt-Sonderlogik. +- Bei Abbruch wird ein kurzer Hinweis ausgegeben statt endlos weiterzustreamen. + +### 2. Ollama Output-Cap + +Dateien: + +- `src/Infrastructure/OllamaClient.php` +- `config/retriex/model.yaml` +- `config/services.yaml` +- `src/Config/RetriexEffectiveConfigProvider.php` + +Änderung: + +- Neuer Parameter `retriex.llm.num_predict: 2048`. +- Wird als Ollama-Option `num_predict` gesendet. +- Wichtig für Installationen mit blockierender Ollama-Generierung (`stream=false`), damit Runaway-Ausgaben bereits auf Modellseite begrenzt werden. + +### 3. `schaue im shop` als Meta-Shop-Follow-up + +Datei: + +- `config/retriex/language.yaml` + +Änderung: + +- Ergänzt zentrale User-Instruction-Terme: `schaue`, `schau`, `schauen`, `nachschauen`. +- Dadurch wird `schaue im shop` wie `suche im shop` als Meta-Shop-Follow-up behandelt und kann den vorherigen Kontext verwenden, statt die Suchquery `schaue` zu senden. + +### 4. Zubehörantwort-Hardening + +Datei: + +- `config/retriex/prompt.yaml` + +Änderung: + +- Verstärkt Antwortregeln für direkte Zubehör-/Kabel-Shopfragen. +- Bei passenden Shop-Treffern soll nicht mit einer fehlenden Hauptgerät-/Messgeräte-Aussage begonnen werden. +- Das verhindert Sätze wie sinngemäß „kein sicher belegter Testomat für pH-Messung“, wenn der Nutzer Anschlusskabel für pH/Redox angefragt hat. + +## Geänderte Dateien + +- `config/retriex/agent.yaml` +- `config/retriex/language.yaml` +- `config/retriex/model.yaml` +- `config/retriex/prompt.yaml` +- `config/services.yaml` +- `src/Agent/AgentRunner.php` +- `src/Config/AgentRunnerConfig.php` +- `src/Config/RetriexEffectiveConfigProvider.php` +- `src/Infrastructure/OllamaClient.php` + +## Lokale Prüfungen + +Ausgeführt: + +```bash +php -l src/Agent/AgentRunner.php +php -l src/Config/AgentRunnerConfig.php +php -l src/Infrastructure/OllamaClient.php +php -l src/Config/RetriexEffectiveConfigProvider.php +python3 -c 'YAML parse check for changed YAML files' +``` + +Ergebnis: + +- PHP-Syntax der geänderten PHP-Dateien: grün. +- YAML-Parsing der geänderten YAML-Dateien: grün. +- Konfigurations-Sanity für `schaue/schau` und `shop`: grün. + +Nicht vollständig lokal ausführbar: + +```bash +php bin/console mto:agent:config:validate +``` + +Grund: Im entpackten ZIP ist kein `vendor/` enthalten; `bin/console` bricht mit `Dependencies are missing. Try running "composer install".` ab. + +## Nach dem Einspielen bitte prüfen + +```bash +bin/console cache:clear +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` + +## Manuelle Regressionen + +1. `ich würde gern chlor im schwinnbad messen` + - Erwartung: keine endlose Wiederholung des letzten Produkts. + - Falls das Modell trotzdem wiederholt, muss die Antwort durch den Guard abbrechen. + +2. `zeige mir Anschlusskabel für pH/Redox` + - Erwartung: Zubehör-/Kabeltreffer direkt listen. + - Kein einleitender Satz über fehlenden Testomat oder fehlendes pH-Messgerät. + +3. Dialog: + - `welche indikatoren gibt es für die messung von freiem chlor` + - danach: `schaue im shop` + - Erwartung: Shop-Follow-up nutzt den vorherigen Kontext, nicht die Query `schaue`. + +## Hinweise + +- Keine Retrieval-, Ranking- oder Produktmatching-Logik wurde geändert. +- Keine neuen fachlichen Token-/Produktlisten im PHP-Core. +- Neue fachliche/sprachliche Begriffe liegen in YAML. +- Der Runtime-Guard ist bewusst generisch und konfigurierbar. diff --git a/config/retriex/agent.yaml b/config/retriex/agent.yaml index 2daca60..ea2a910 100644 --- a/config/retriex/agent.yaml +++ b/config/retriex/agent.yaml @@ -149,6 +149,24 @@ parameters: generic_internal_error: '❌ Bei der Verarbeitung der Anfrage ist ein interner Fehler aufgetreten.' debug_internal_error_prefix: '❌ Interner Fehler: ' + final_answer_guard: + enabled: true + # Prevents runaway final answers caused by repeated LLM output. This is a + # runtime safety guard and does not change retrieval, ranking or product matching. + max_output_chars: 12000 + truncation_message: |2- + + Hinweis: Die Antwort wurde gekürzt, weil eine wiederholte oder zu lange Ausgabe erkannt wurde. Bitte starten Sie bei Bedarf eine präzisere Nachfrage. + repeated_line: + enabled: true + min_output_chars: 1200 + min_line_chars: 16 + max_line_repetitions: 3 + trailing_window_lines: 220 + ignore_patterns: + - '/^\s*(?:produkt(?:-|\s)?nummer|artikel(?:-|\s)?nummer|preis|verfügbar|verfuegbar|url|quellen?)\b/iu' + - '/^\s*(?:status|query|intent|datenbasis|shop(?:-|\s)?suche)\b/iu' + rag_evidence_guard: cleanup_profile: rag_evidence # Legacy/domain override list. Generic German stopwords are provided by diff --git a/config/retriex/language.yaml b/config/retriex/language.yaml index 1ff347c..2ef8e3e 100644 --- a/config/retriex/language.yaml +++ b/config/retriex/language.yaml @@ -172,6 +172,10 @@ parameters: - suche - such - suchen + - schaue + - schau + - schauen + - nachschauen - finde - find - finden diff --git a/config/retriex/model.yaml b/config/retriex/model.yaml index a620129..18e610e 100644 --- a/config/retriex/model.yaml +++ b/config/retriex/model.yaml @@ -15,3 +15,6 @@ parameters: retriex.model.guardrail_max_vector_top_k: 200 retriex.llm.timeout_seconds: 300 + # Hard upper bound for generated output tokens. Prevents runaway repeated answers + # before they can become very long streamed or blocking responses. + retriex.llm.num_predict: 2048 diff --git a/config/retriex/prompt.yaml b/config/retriex/prompt.yaml index 502b8f6..5710886 100644 --- a/config/retriex/prompt.yaml +++ b/config/retriex/prompt.yaml @@ -215,6 +215,7 @@ parameters: - '- If RAG knowledge and shop data are both available but do not explicitly support the same technical suitability, start with the uncertainty instead of selecting a product confidently.' nur_shop_treffer_kein_belastbares_fachwissen: - '- Start the answer by making the fallback clear: "Aus den Shopdaten ergeben sich folgende Treffer; technische Eignung bitte prüfen."' + - '- If the user directly asks for accessories, cables, electrodes, buffers, kits, sets, indicators, reagents, or consumables and matching shop hits are present, do not start with a missing main-device or missing measuring-device sentence; start directly with the accessory shop hits.' - '- Do not present shop-only matches as verified technical suitability unless the shop text explicitly states that suitability.' - '- Do not say that RAG knowledge confirms the result. Say that no belastbares RAG-Fachwissen was available for this selection.' keine_belastbaren_daten: @@ -279,6 +280,7 @@ parameters: accessory_rules: - '- If the user directly asks for accessories, cables, electrodes, buffers, kits, sets, indicators, reagents, or consumables, answer the accessory request first instead of reframing it as a request for a measuring device.' - '- For direct accessory shop searches, do not introduce Testomat, measuring-device, or main-device caveats unless the user asks for a device or the provided sources explicitly require a device context.' + - '- For direct accessory shop searches with matching shop hits, never begin with a missing-device statement; begin with the accessory hits or a short shop-only fallback sentence.' - '- If the shop product name itself explicitly contains the requested accessory type and parameter, such as pH/Redox, treat it as a commercial accessory match and list the exact shop fields. Do not demand separate proof that the accessory itself measures the parameter.' - '- If the user asks for a matching accessory for a named main device, separate the answer into: main device and matching accessory.' - '- The main device must come first only when a main device is explicitly requested or named.' diff --git a/config/services.yaml b/config/services.yaml index 338aeac..9861948 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -167,6 +167,7 @@ services: arguments: $apiUrl: '%env(AI_LLM_API_URL)%' $timeoutSeconds: '%retriex.llm.timeout_seconds%' + $numPredict: '%retriex.llm.num_predict%' $configProvider: '@App\Service\ModelGenerationConfigProvider' diff --git a/public/assets/styles/base.css b/public/assets/styles/base.css index 81648c3..6ece282 100644 --- a/public/assets/styles/base.css +++ b/public/assets/styles/base.css @@ -31,6 +31,15 @@ h3, .h3 { font-size: 1.35rem; } +a { + color: #7a9ed1; + text-decoration: none; +} + +a:hover { + color: #FFF; +} + h4, h5, h6, .h4, .h5, .h6 { font-size: 0.95rem; } @@ -198,7 +207,7 @@ button:disabled { .bg-dark { background-color: var(--assistant) !important; color: #fff; - border: 1px solid var(--border); + border: 1px solid #324053 !important; } .btn-trans { @@ -254,7 +263,7 @@ button:disabled { ========================================================= */ body { - color: #babac3; + color: #cacad5; } .bg-black { @@ -476,6 +485,7 @@ span.think { color: rgba(248, 249, 250, 0.76); font-size: 0.84rem; border-radius: 0 0 6px 6px; + background-color: #212b39 !important; } .retriex-option-toggle { diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index 697e043..d767222 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -2682,6 +2682,7 @@ final readonly class AgentRunner { $fullOutput = ''; $thinkingNoticeShown = false; + $stoppedByFinalAnswerGuard = false; $chunker = new StreamChunker(); $this->thinkSuppressor->reset(); @@ -2706,11 +2707,36 @@ final readonly class AgentRunner continue; } - $fullOutput .= $cleanToken; + $guardReason = null; + $cleanToken = $this->guardFinalAnswerToken($fullOutput, $cleanToken, $guardReason); - $chunk = $chunker->push($cleanToken); - if ($chunk !== null) { - yield $this->systemMsg($chunk, 'answer'); + if ($cleanToken !== '') { + $fullOutput .= $cleanToken; + + $chunk = $chunker->push($cleanToken); + if ($chunk !== null) { + yield $this->systemMsg($chunk, 'answer'); + } + } + + if ($guardReason !== null) { + $stoppedByFinalAnswerGuard = true; + + $finalChunk = $chunker->flush(); + if ($finalChunk !== null) { + yield $this->systemMsg($finalChunk, 'answer'); + } + + $guardMessage = $this->agentRunnerConfig->getFinalAnswerGuardTruncationMessage(); + $fullOutput .= $guardMessage; + yield $this->systemMsg($guardMessage, 'answer'); + + $this->agentLogger->warning('Final answer guard stopped LLM output', [ + 'reason' => $guardReason, + 'outputLength' => mb_strlen($fullOutput, 'UTF-8'), + ]); + + break; } } } catch (Throwable $e) { @@ -2730,6 +2756,10 @@ final readonly class AgentRunner return $fullOutput; } + if ($stoppedByFinalAnswerGuard) { + return $fullOutput; + } + $finalChunk = $chunker->flush(); if ($finalChunk !== null) { yield $this->systemMsg($finalChunk, 'answer'); @@ -2747,6 +2777,112 @@ final readonly class AgentRunner return $fullOutput; } + private function guardFinalAnswerToken(string $currentOutput, string $nextToken, ?string &$reason): string + { + $reason = null; + + if (!$this->agentRunnerConfig->isFinalAnswerGuardEnabled()) { + return $nextToken; + } + + $maxOutputChars = max(1000, $this->agentRunnerConfig->getFinalAnswerGuardMaxOutputChars()); + $currentChars = mb_strlen($currentOutput, 'UTF-8'); + $nextChars = mb_strlen($nextToken, 'UTF-8'); + + if (($currentChars + $nextChars) > $maxOutputChars) { + $reason = 'max_output_chars'; + $remainingChars = max(0, $maxOutputChars - $currentChars); + + return $remainingChars > 0 ? mb_substr($nextToken, 0, $remainingChars, 'UTF-8') : ''; + } + + $candidate = $currentOutput . $nextToken; + $cutoffBytes = $this->detectRepeatedFinalAnswerLineCutoff($candidate); + + if ($cutoffBytes === null) { + return $nextToken; + } + + $reason = 'repeated_line'; + $currentBytes = strlen($currentOutput); + + if ($cutoffBytes <= $currentBytes) { + return ''; + } + + return mb_strcut($nextToken, 0, $cutoffBytes - $currentBytes, 'UTF-8'); + } + + private function detectRepeatedFinalAnswerLineCutoff(string $text): ?int + { + if (!$this->agentRunnerConfig->isFinalAnswerRepeatedLineGuardEnabled()) { + return null; + } + + if (mb_strlen($text, 'UTF-8') < max(0, $this->agentRunnerConfig->getFinalAnswerRepeatedLineMinOutputChars())) { + return null; + } + + if (preg_match_all('/[^\r\n]+/u', $text, $matches, PREG_OFFSET_CAPTURE) === false) { + return null; + } + + $lines = $matches[0] ?? []; + $window = max(10, $this->agentRunnerConfig->getFinalAnswerRepeatedLineTrailingWindowLines()); + if (count($lines) > $window) { + $lines = array_slice($lines, -$window); + } + + $counts = []; + $maxRepetitions = max(1, $this->agentRunnerConfig->getFinalAnswerRepeatedLineMaxRepetitions()); + + foreach ($lines as $lineMatch) { + $line = (string) ($lineMatch[0] ?? ''); + $offset = (int) ($lineMatch[1] ?? 0); + $normalizedLine = $this->normalizeFinalAnswerLineForRepetitionGuard($line); + + if ($normalizedLine === '') { + continue; + } + + $counts[$normalizedLine] = ($counts[$normalizedLine] ?? 0) + 1; + + if ($counts[$normalizedLine] > $maxRepetitions) { + return $offset; + } + } + + return null; + } + + private function normalizeFinalAnswerLineForRepetitionGuard(string $line): string + { + $line = html_entity_decode(strip_tags($line), ENT_QUOTES | ENT_HTML5, 'UTF-8'); + $line = preg_replace('/^\s*(?:[-*•]+|\d+[.)])\s*/u', '', $line) ?? $line; + $line = preg_replace('/\s+/u', ' ', $line) ?? $line; + $line = trim($line, " \t\n\r\0\x0B:;.-"); + + if ($line === '') { + return ''; + } + + foreach ($this->agentRunnerConfig->getFinalAnswerRepeatedLineIgnorePatterns() as $pattern) { + try { + if (@preg_match($pattern, $line) === 1) { + return ''; + } + } catch (Throwable) { + continue; + } + } + + if (mb_strlen($line, 'UTF-8') < max(1, $this->agentRunnerConfig->getFinalAnswerRepeatedLineMinLineChars())) { + return ''; + } + + return mb_strtolower($line, 'UTF-8'); + } + /** * Build a deterministic safety answer for environments where the LLM returns no tokens. * diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index bcb9f34..272aa2d 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -684,6 +684,54 @@ final class AgentRunnerConfig return $this->getRequiredString('messages.no_llm_data_received'); } + public function isFinalAnswerGuardEnabled(): bool + { + return $this->getRequiredBool('final_answer_guard.enabled'); + } + + public function getFinalAnswerGuardMaxOutputChars(): int + { + return $this->getRequiredInt('final_answer_guard.max_output_chars'); + } + + public function getFinalAnswerGuardTruncationMessage(): string + { + return $this->getRequiredString('final_answer_guard.truncation_message'); + } + + public function isFinalAnswerRepeatedLineGuardEnabled(): bool + { + return $this->getRequiredBool('final_answer_guard.repeated_line.enabled'); + } + + public function getFinalAnswerRepeatedLineMinOutputChars(): int + { + return $this->getRequiredInt('final_answer_guard.repeated_line.min_output_chars'); + } + + public function getFinalAnswerRepeatedLineMinLineChars(): int + { + return $this->getRequiredInt('final_answer_guard.repeated_line.min_line_chars'); + } + + public function getFinalAnswerRepeatedLineMaxRepetitions(): int + { + return $this->getRequiredInt('final_answer_guard.repeated_line.max_line_repetitions'); + } + + public function getFinalAnswerRepeatedLineTrailingWindowLines(): int + { + return $this->getRequiredInt('final_answer_guard.repeated_line.trailing_window_lines'); + } + + /** + * @return string[] + */ + public function getFinalAnswerRepeatedLineIgnorePatterns(): array + { + return $this->getRequiredStringList('final_answer_guard.repeated_line.ignore_patterns'); + } + public function getNoLlmFallbackMaxShopResults(): int { return $this->getRequiredInt('no_llm_fallback.max_shop_results'); diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index bb8cde9..d44cb5c 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -42,7 +42,10 @@ final readonly class RetriexEffectiveConfigProvider 'runtime' => $this->runtimeConfig(), 'index' => $this->indexConfig(), 'model_generation' => $this->modelConfig(), - 'llm' => ['timeout_seconds' => $this->param('retriex.llm.timeout_seconds')], + 'llm' => [ + 'timeout_seconds' => $this->param('retriex.llm.timeout_seconds'), + 'num_predict' => $this->param('retriex.llm.num_predict'), + ], 'retrieval' => $this->retrievalConfig(), 'prompt' => $this->promptConfig(), 'agent' => $this->agentConfig(), @@ -639,6 +642,19 @@ final readonly class RetriexEffectiveConfigProvider 'generic_internal_error' => $this->agentRunnerConfig->getGenericInternalErrorMessage(), 'debug_internal_error_prefix' => $this->agentRunnerConfig->getDebugInternalErrorPrefix(), ], + 'final_answer_guard' => [ + 'enabled' => $this->agentRunnerConfig->isFinalAnswerGuardEnabled(), + 'max_output_chars' => $this->agentRunnerConfig->getFinalAnswerGuardMaxOutputChars(), + 'truncation_message' => $this->agentRunnerConfig->getFinalAnswerGuardTruncationMessage(), + 'repeated_line' => [ + 'enabled' => $this->agentRunnerConfig->isFinalAnswerRepeatedLineGuardEnabled(), + 'min_output_chars' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineMinOutputChars(), + 'min_line_chars' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineMinLineChars(), + 'max_line_repetitions' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineMaxRepetitions(), + 'trailing_window_lines' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineTrailingWindowLines(), + 'ignore_patterns' => $this->agentRunnerConfig->getFinalAnswerRepeatedLineIgnorePatterns(), + ], + ], 'rag_evidence_guard' => [ 'cleanup_profile' => $this->agentRunnerConfig->getRagEvidenceCleanupProfile(), 'stop_terms' => $this->agentRunnerConfig->getRagEvidenceStopTerms(), diff --git a/src/Infrastructure/OllamaClient.php b/src/Infrastructure/OllamaClient.php index 185d994..20e4f5f 100644 --- a/src/Infrastructure/OllamaClient.php +++ b/src/Infrastructure/OllamaClient.php @@ -22,6 +22,7 @@ final class OllamaClient public function __construct( private string $apiUrl, private string $timeoutSeconds, + private int|string $numPredict, private ModelGenerationConfigProvider $configProvider ) {} @@ -188,13 +189,20 @@ final class OllamaClient private function buildOptions(): array { $this->config = $this->getConfig(); - return [ + $options = [ 'temperature' => $this->config->getTemperature(), 'top_k' => $this->config->getTopK(), 'top_p' => $this->config->getTopP(), 'repeat_penalty' => $this->config->getRepeatPenalty(), 'num_ctx' => $this->config->getNumCtx(), ]; + + $numPredict = (int) $this->numPredict; + if ($numPredict > 0) { + $options['num_predict'] = $numPredict; + } + + return $options; } private function requestTimeoutSeconds(): int