patch 20o
This commit is contained in:
3242
AgentRunner.php
3242
AgentRunner.php
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,75 @@
|
||||
# RetrieX Patch p20o – Standalone Shop Query Isolation Fix
|
||||
|
||||
## Ziel
|
||||
|
||||
Dieser Patch korrigiert die Fehlleitung neuer Standalone-Shopfragen wie:
|
||||
|
||||
```text
|
||||
zeige mir Anschlusskabel für pH/Redox
|
||||
```
|
||||
|
||||
Diese Anfrage durfte nicht aus altem Chatverlauf zu einer Shop-Suchquery wie `testomat 808 indikator` werden.
|
||||
|
||||
## Ursache
|
||||
|
||||
Der bisherige Schutz war zu indirekt. Sobald der Shop-Query-Pfad einen Verlaufskontext zuließ, konnte der LLM-Shop-Query-Optimizer oder der nachgelagerte Commerce-Parser ältere Kontextanker wieder in eine neue Suchfrage übernehmen.
|
||||
|
||||
Zusätzlich enthielt der Shop-Optimizer-Prompt konkrete Beispiele wie `Indikator 300`, `Testomat 808` und `Testomat 2000`. Diese Beispiele konnten den Optimizer in genau die falsche Richtung biasen.
|
||||
|
||||
Außerdem konnte der Input-Normalizer den Platzhaltertext `normalized user input` wörtlich zurückgeben. Dieser Patch schützt dagegen ebenfalls.
|
||||
|
||||
## Änderungen
|
||||
|
||||
### `src/Agent/AgentRunner.php`
|
||||
|
||||
- Neue Standalone-Shopfragen werden jetzt explizit von History/Optimizer-Kontext isoliert.
|
||||
- Die Entscheidung wird aus dem Originalprompt getroffen, nicht aus dem normalisierten Routing-Prompt.
|
||||
- Nicht-referenzielle Shopfragen verwenden deterministische Query-Erzeugung über den aktuellen Prompt.
|
||||
- Referenzielle Follow-ups wie `die tabelle mit preisen`, `suche im shop` oder `was kostet der indikator` dürfen weiterhin Verlauf nutzen.
|
||||
- Placeholder-Ausgaben des Normalizers wie `normalized user input` werden verworfen und auf den Originalprompt zurückgesetzt.
|
||||
|
||||
### `config/retriex/agent.yaml`
|
||||
|
||||
- Der Normalizer-Prompt enthält keinen `<normalized user input>`-Platzhalter mehr.
|
||||
- Die Normalizer-Regel verbietet, den Platzhaltertext wörtlich auszugeben.
|
||||
- Konkrete Shop-Optimizer-Beispiele `Indikator 300`, `Testomat 808`, `Testomat 2000` wurden aus dem Prompt entfernt.
|
||||
|
||||
## Erwartete manuelle Tests
|
||||
|
||||
```text
|
||||
zeige mir Anschlusskabel für pH/Redox
|
||||
```
|
||||
|
||||
Erwartung: Suchquery sinngemäß `anschlusskabel ph redox`, nicht `testomat 808 indikator`.
|
||||
|
||||
```text
|
||||
shop testomat 808
|
||||
```
|
||||
|
||||
Erwartung: Suchquery `testomat 808`, nicht `testomat 808 indikator`.
|
||||
|
||||
```text
|
||||
welche grenzwerte kann der testomat 808 messen
|
||||
|
||||
die tabelle mit preisen
|
||||
```
|
||||
|
||||
Erwartung: referenzieller Follow-up darf weiter Verlauf nutzen, sinngemäß `testomat 808 indikator`.
|
||||
|
||||
```text
|
||||
was kpstet der indikator
|
||||
```
|
||||
|
||||
Erwartung: Tippfehler-Normalisierung/Fuzzy-Routing bleibt aktiv und darf den vorherigen Indikator-Kontext nutzen.
|
||||
|
||||
## Pflichtchecks
|
||||
|
||||
```bash
|
||||
bin/console cache:clear
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
|
||||
Bei OPcache/PHP-FPM bitte PHP-FPM bzw. Container neu laden.
|
||||
@@ -24,10 +24,10 @@ parameters:
|
||||
intro: 'Normalize the following user input for RetrieX routing before intent detection.'
|
||||
output_format_block: |-
|
||||
Output format:
|
||||
<normalized user input>
|
||||
Return exactly one line: the corrected user input.
|
||||
current_user_input_label: 'USER INPUT'
|
||||
rules:
|
||||
- '- Output only the normalized user input.'
|
||||
- '- Output only the corrected user input text itself, never the words "normalized user input".'
|
||||
- '- Correct only obvious typing mistakes, transposed letters, missing umlauts, spacing, and punctuation that clearly preserve the same meaning.'
|
||||
- '- Do not add product names, model numbers, article numbers, measurement values, parameters, brands, or application areas that are not already present in the input.'
|
||||
- '- Preserve product names, model numbers, article numbers, chemical symbols, units, pH, Redox, ORP, and measurement values exactly unless only letter casing is corrected.'
|
||||
@@ -410,7 +410,7 @@ parameters:
|
||||
- '- Preserve the language of the CURRENT USER INPUT for generic product/search terms; do not translate German search terms into English.'
|
||||
- '- For German user input, output German shop terms, for example "freies Chlor Messung" instead of "free chlorine measurement".'
|
||||
- '- Preserve domain terms from the current user input or resolved context in their original language.'
|
||||
- '- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).'
|
||||
- '- Numbers that belong to a product name or model must be preserved when they are present in the CURRENT USER INPUT or a clearly resolved reference.'
|
||||
- '- Separate terms using spaces only.'
|
||||
- '- If a relevant product name is present, it must be placed at the beginning of the final search query.'
|
||||
- '- Try to always identify all products mentioned in the user input text, even in long prompts.'
|
||||
|
||||
@@ -213,10 +213,13 @@ final readonly class AgentRunner
|
||||
yield $this->systemMsg($this->agentRunnerConfig->getOptimizeSearchMessage(), 'think');
|
||||
|
||||
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId, $requestContextHint);
|
||||
$shopQueryHistoryContext = $this->resolveShopQueryHistoryContext(
|
||||
prompt: $originalPrompt,
|
||||
commerceHistoryContext: $commerceHistoryContext
|
||||
);
|
||||
$isStandaloneShopQuery = $this->shouldIsolateStandaloneShopQueryFromHistory($originalPrompt);
|
||||
$shopQueryHistoryContext = $isStandaloneShopQuery
|
||||
? ''
|
||||
: $this->resolveShopQueryHistoryContext(
|
||||
prompt: $originalPrompt,
|
||||
commerceHistoryContext: $commerceHistoryContext
|
||||
);
|
||||
|
||||
if ($shopQueryHistoryContext !== '') {
|
||||
$this->addSource($sources, $this->agentRunnerConfig->getConversationHistorySourceLabel());
|
||||
@@ -229,22 +232,43 @@ final readonly class AgentRunner
|
||||
'routingPrompt' => $routingPrompt,
|
||||
'originalPrompt' => $originalPrompt,
|
||||
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
||||
'standaloneShopQueryIsolated' => $isStandaloneShopQuery,
|
||||
]);
|
||||
}
|
||||
|
||||
$optimizedShopQuery = yield from $this->buildOptimizedShopQuery(
|
||||
$routingPrompt,
|
||||
$userId,
|
||||
$shopQueryHistoryContext
|
||||
);
|
||||
if ($isStandaloneShopQuery) {
|
||||
$optimizedShopQuery = '';
|
||||
$shopSearchQuery = $this->guardFinalStandaloneShopSearchQuery(
|
||||
prompt: $originalPrompt,
|
||||
shopSearchQuery: $routingPrompt
|
||||
);
|
||||
|
||||
$shopSearchQuery = $this->resolveShopSearchQuery(
|
||||
prompt: $originalPrompt,
|
||||
optimizedShopQuery: $optimizedShopQuery,
|
||||
commerceHistoryContext: $shopQueryHistoryContext,
|
||||
userId: $userId,
|
||||
currentPromptFallback: $routingPrompt
|
||||
);
|
||||
if ($shopSearchQuery === '') {
|
||||
$shopSearchQuery = $originalPrompt;
|
||||
}
|
||||
|
||||
$this->agentLogger->info('Using deterministic standalone shop query without LLM optimizer history', [
|
||||
'userId' => $userId,
|
||||
'prompt' => $prompt,
|
||||
'routingPrompt' => $routingPrompt,
|
||||
'originalPrompt' => $originalPrompt,
|
||||
'shopSearchQuery' => $shopSearchQuery,
|
||||
]);
|
||||
} else {
|
||||
$optimizedShopQuery = yield from $this->buildOptimizedShopQuery(
|
||||
$routingPrompt,
|
||||
$userId,
|
||||
$shopQueryHistoryContext
|
||||
);
|
||||
|
||||
$shopSearchQuery = $this->resolveShopSearchQuery(
|
||||
prompt: $originalPrompt,
|
||||
optimizedShopQuery: $optimizedShopQuery,
|
||||
commerceHistoryContext: $shopQueryHistoryContext,
|
||||
userId: $userId,
|
||||
currentPromptFallback: $routingPrompt
|
||||
);
|
||||
}
|
||||
|
||||
$guardedShopSearchQuery = $this->guardFinalStandaloneShopSearchQuery(
|
||||
prompt: $originalPrompt,
|
||||
@@ -756,6 +780,10 @@ final readonly class AgentRunner
|
||||
return $originalPrompt;
|
||||
}
|
||||
|
||||
if ($this->isInputNormalizationPlaceholderOutput($candidate)) {
|
||||
return $originalPrompt;
|
||||
}
|
||||
|
||||
if (mb_strlen($candidate, 'UTF-8') > $this->agentRunnerConfig->getInputNormalizationMaxOutputChars()) {
|
||||
return $originalPrompt;
|
||||
}
|
||||
@@ -994,6 +1022,19 @@ final readonly class AgentRunner
|
||||
return true;
|
||||
}
|
||||
|
||||
private function isInputNormalizationPlaceholderOutput(string $candidate): bool
|
||||
{
|
||||
$normalized = $this->normalizeRoutingComparisonText($candidate);
|
||||
|
||||
return in_array($normalized, [
|
||||
'normalized user input',
|
||||
'corrected user input',
|
||||
'user input',
|
||||
'normalisierte nutzereingabe',
|
||||
'korrigierte nutzereingabe',
|
||||
], true);
|
||||
}
|
||||
|
||||
private function normalizeRoutingComparisonText(string $value): string
|
||||
{
|
||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||
@@ -1484,6 +1525,74 @@ final readonly class AgentRunner
|
||||
return false;
|
||||
}
|
||||
|
||||
private function shouldIsolateStandaloneShopQueryFromHistory(string $prompt): bool
|
||||
{
|
||||
$prompt = trim($prompt);
|
||||
|
||||
if ($prompt === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($this->isCommercialTableFollowUpPrompt($prompt) || $this->isMetaOnlyShopQuery($prompt)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$normalizedPrompt = $this->normalizeFollowUpText($prompt);
|
||||
$usesReferenceLanguage = $this->containsReferentialShopQueryMarker($normalizedPrompt)
|
||||
|| $this->containsConfiguredShopQueryAnchorTrigger($normalizedPrompt);
|
||||
|
||||
if (!$usesReferenceLanguage) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return $this->hasStandaloneConcreteShopSubject($prompt);
|
||||
}
|
||||
|
||||
private function hasStandaloneConcreteShopSubject(string $prompt): bool
|
||||
{
|
||||
if ($this->extractFirstTestomatModelAnchor($prompt) !== '') {
|
||||
return true;
|
||||
}
|
||||
|
||||
$contextFallbackQuery = $this->buildContextFallbackShopQuery($prompt);
|
||||
$tokens = $this->tokenizeShopQueryCandidate($contextFallbackQuery);
|
||||
|
||||
if (count($tokens) >= 2) {
|
||||
return true;
|
||||
}
|
||||
|
||||
foreach ($tokens as $token) {
|
||||
if (preg_match('/\d/u', $token) === 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function shouldUseDeterministicStandaloneShopQuery(string $prompt, string $shopQueryHistoryContext): bool
|
||||
{
|
||||
$prompt = trim($prompt);
|
||||
|
||||
if ($prompt === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (trim($shopQueryHistoryContext) !== '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($this->isCommercialTableFollowUpPrompt($prompt)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($this->isMetaOnlyShopQuery($prompt)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function guardStandaloneOptimizedShopQuery(string $prompt, string $optimizedShopQuery): string
|
||||
{
|
||||
if ($this->shouldUseCommerceHistoryForShopQuery($prompt)) {
|
||||
|
||||
Reference in New Issue
Block a user