patch 20o
This commit is contained in:
3242
AgentRunner.php
3242
AgentRunner.php
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,75 @@
|
|||||||
|
# RetrieX Patch p20o – Standalone Shop Query Isolation Fix
|
||||||
|
|
||||||
|
## Ziel
|
||||||
|
|
||||||
|
Dieser Patch korrigiert die Fehlleitung neuer Standalone-Shopfragen wie:
|
||||||
|
|
||||||
|
```text
|
||||||
|
zeige mir Anschlusskabel für pH/Redox
|
||||||
|
```
|
||||||
|
|
||||||
|
Diese Anfrage durfte nicht aus altem Chatverlauf zu einer Shop-Suchquery wie `testomat 808 indikator` werden.
|
||||||
|
|
||||||
|
## Ursache
|
||||||
|
|
||||||
|
Der bisherige Schutz war zu indirekt. Sobald der Shop-Query-Pfad einen Verlaufskontext zuließ, konnte der LLM-Shop-Query-Optimizer oder der nachgelagerte Commerce-Parser ältere Kontextanker wieder in eine neue Suchfrage übernehmen.
|
||||||
|
|
||||||
|
Zusätzlich enthielt der Shop-Optimizer-Prompt konkrete Beispiele wie `Indikator 300`, `Testomat 808` und `Testomat 2000`. Diese Beispiele konnten den Optimizer in genau die falsche Richtung biasen.
|
||||||
|
|
||||||
|
Außerdem konnte der Input-Normalizer den Platzhaltertext `normalized user input` wörtlich zurückgeben. Dieser Patch schützt dagegen ebenfalls.
|
||||||
|
|
||||||
|
## Änderungen
|
||||||
|
|
||||||
|
### `src/Agent/AgentRunner.php`
|
||||||
|
|
||||||
|
- Neue Standalone-Shopfragen werden jetzt explizit von History/Optimizer-Kontext isoliert.
|
||||||
|
- Die Entscheidung wird aus dem Originalprompt getroffen, nicht aus dem normalisierten Routing-Prompt.
|
||||||
|
- Nicht-referenzielle Shopfragen verwenden deterministische Query-Erzeugung über den aktuellen Prompt.
|
||||||
|
- Referenzielle Follow-ups wie `die tabelle mit preisen`, `suche im shop` oder `was kostet der indikator` dürfen weiterhin Verlauf nutzen.
|
||||||
|
- Placeholder-Ausgaben des Normalizers wie `normalized user input` werden verworfen und auf den Originalprompt zurückgesetzt.
|
||||||
|
|
||||||
|
### `config/retriex/agent.yaml`
|
||||||
|
|
||||||
|
- Der Normalizer-Prompt enthält keinen `<normalized user input>`-Platzhalter mehr.
|
||||||
|
- Die Normalizer-Regel verbietet, den Platzhaltertext wörtlich auszugeben.
|
||||||
|
- Konkrete Shop-Optimizer-Beispiele `Indikator 300`, `Testomat 808`, `Testomat 2000` wurden aus dem Prompt entfernt.
|
||||||
|
|
||||||
|
## Erwartete manuelle Tests
|
||||||
|
|
||||||
|
```text
|
||||||
|
zeige mir Anschlusskabel für pH/Redox
|
||||||
|
```
|
||||||
|
|
||||||
|
Erwartung: Suchquery sinngemäß `anschlusskabel ph redox`, nicht `testomat 808 indikator`.
|
||||||
|
|
||||||
|
```text
|
||||||
|
shop testomat 808
|
||||||
|
```
|
||||||
|
|
||||||
|
Erwartung: Suchquery `testomat 808`, nicht `testomat 808 indikator`.
|
||||||
|
|
||||||
|
```text
|
||||||
|
welche grenzwerte kann der testomat 808 messen
|
||||||
|
|
||||||
|
die tabelle mit preisen
|
||||||
|
```
|
||||||
|
|
||||||
|
Erwartung: referenzieller Follow-up darf weiter Verlauf nutzen, sinngemäß `testomat 808 indikator`.
|
||||||
|
|
||||||
|
```text
|
||||||
|
was kpstet der indikator
|
||||||
|
```
|
||||||
|
|
||||||
|
Erwartung: Tippfehler-Normalisierung/Fuzzy-Routing bleibt aktiv und darf den vorherigen Indikator-Kontext nutzen.
|
||||||
|
|
||||||
|
## Pflichtchecks
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bin/console cache:clear
|
||||||
|
bin/console mto:agent:config:validate
|
||||||
|
bin/console mto:agent:regression:test
|
||||||
|
bin/console mto:agent:config:audit-source --details
|
||||||
|
bin/console mto:agent:config:audit-patterns --details
|
||||||
|
```
|
||||||
|
|
||||||
|
Bei OPcache/PHP-FPM bitte PHP-FPM bzw. Container neu laden.
|
||||||
@@ -24,10 +24,10 @@ parameters:
|
|||||||
intro: 'Normalize the following user input for RetrieX routing before intent detection.'
|
intro: 'Normalize the following user input for RetrieX routing before intent detection.'
|
||||||
output_format_block: |-
|
output_format_block: |-
|
||||||
Output format:
|
Output format:
|
||||||
<normalized user input>
|
Return exactly one line: the corrected user input.
|
||||||
current_user_input_label: 'USER INPUT'
|
current_user_input_label: 'USER INPUT'
|
||||||
rules:
|
rules:
|
||||||
- '- Output only the normalized user input.'
|
- '- Output only the corrected user input text itself, never the words "normalized user input".'
|
||||||
- '- Correct only obvious typing mistakes, transposed letters, missing umlauts, spacing, and punctuation that clearly preserve the same meaning.'
|
- '- Correct only obvious typing mistakes, transposed letters, missing umlauts, spacing, and punctuation that clearly preserve the same meaning.'
|
||||||
- '- Do not add product names, model numbers, article numbers, measurement values, parameters, brands, or application areas that are not already present in the input.'
|
- '- Do not add product names, model numbers, article numbers, measurement values, parameters, brands, or application areas that are not already present in the input.'
|
||||||
- '- Preserve product names, model numbers, article numbers, chemical symbols, units, pH, Redox, ORP, and measurement values exactly unless only letter casing is corrected.'
|
- '- Preserve product names, model numbers, article numbers, chemical symbols, units, pH, Redox, ORP, and measurement values exactly unless only letter casing is corrected.'
|
||||||
@@ -410,7 +410,7 @@ parameters:
|
|||||||
- '- Preserve the language of the CURRENT USER INPUT for generic product/search terms; do not translate German search terms into English.'
|
- '- Preserve the language of the CURRENT USER INPUT for generic product/search terms; do not translate German search terms into English.'
|
||||||
- '- For German user input, output German shop terms, for example "freies Chlor Messung" instead of "free chlorine measurement".'
|
- '- For German user input, output German shop terms, for example "freies Chlor Messung" instead of "free chlorine measurement".'
|
||||||
- '- Preserve domain terms from the current user input or resolved context in their original language.'
|
- '- Preserve domain terms from the current user input or resolved context in their original language.'
|
||||||
- '- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).'
|
- '- Numbers that belong to a product name or model must be preserved when they are present in the CURRENT USER INPUT or a clearly resolved reference.'
|
||||||
- '- Separate terms using spaces only.'
|
- '- Separate terms using spaces only.'
|
||||||
- '- If a relevant product name is present, it must be placed at the beginning of the final search query.'
|
- '- If a relevant product name is present, it must be placed at the beginning of the final search query.'
|
||||||
- '- Try to always identify all products mentioned in the user input text, even in long prompts.'
|
- '- Try to always identify all products mentioned in the user input text, even in long prompts.'
|
||||||
|
|||||||
@@ -213,7 +213,10 @@ final readonly class AgentRunner
|
|||||||
yield $this->systemMsg($this->agentRunnerConfig->getOptimizeSearchMessage(), 'think');
|
yield $this->systemMsg($this->agentRunnerConfig->getOptimizeSearchMessage(), 'think');
|
||||||
|
|
||||||
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId, $requestContextHint);
|
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId, $requestContextHint);
|
||||||
$shopQueryHistoryContext = $this->resolveShopQueryHistoryContext(
|
$isStandaloneShopQuery = $this->shouldIsolateStandaloneShopQueryFromHistory($originalPrompt);
|
||||||
|
$shopQueryHistoryContext = $isStandaloneShopQuery
|
||||||
|
? ''
|
||||||
|
: $this->resolveShopQueryHistoryContext(
|
||||||
prompt: $originalPrompt,
|
prompt: $originalPrompt,
|
||||||
commerceHistoryContext: $commerceHistoryContext
|
commerceHistoryContext: $commerceHistoryContext
|
||||||
);
|
);
|
||||||
@@ -229,9 +232,29 @@ final readonly class AgentRunner
|
|||||||
'routingPrompt' => $routingPrompt,
|
'routingPrompt' => $routingPrompt,
|
||||||
'originalPrompt' => $originalPrompt,
|
'originalPrompt' => $originalPrompt,
|
||||||
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
||||||
|
'standaloneShopQueryIsolated' => $isStandaloneShopQuery,
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($isStandaloneShopQuery) {
|
||||||
|
$optimizedShopQuery = '';
|
||||||
|
$shopSearchQuery = $this->guardFinalStandaloneShopSearchQuery(
|
||||||
|
prompt: $originalPrompt,
|
||||||
|
shopSearchQuery: $routingPrompt
|
||||||
|
);
|
||||||
|
|
||||||
|
if ($shopSearchQuery === '') {
|
||||||
|
$shopSearchQuery = $originalPrompt;
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->agentLogger->info('Using deterministic standalone shop query without LLM optimizer history', [
|
||||||
|
'userId' => $userId,
|
||||||
|
'prompt' => $prompt,
|
||||||
|
'routingPrompt' => $routingPrompt,
|
||||||
|
'originalPrompt' => $originalPrompt,
|
||||||
|
'shopSearchQuery' => $shopSearchQuery,
|
||||||
|
]);
|
||||||
|
} else {
|
||||||
$optimizedShopQuery = yield from $this->buildOptimizedShopQuery(
|
$optimizedShopQuery = yield from $this->buildOptimizedShopQuery(
|
||||||
$routingPrompt,
|
$routingPrompt,
|
||||||
$userId,
|
$userId,
|
||||||
@@ -245,6 +268,7 @@ final readonly class AgentRunner
|
|||||||
userId: $userId,
|
userId: $userId,
|
||||||
currentPromptFallback: $routingPrompt
|
currentPromptFallback: $routingPrompt
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
$guardedShopSearchQuery = $this->guardFinalStandaloneShopSearchQuery(
|
$guardedShopSearchQuery = $this->guardFinalStandaloneShopSearchQuery(
|
||||||
prompt: $originalPrompt,
|
prompt: $originalPrompt,
|
||||||
@@ -756,6 +780,10 @@ final readonly class AgentRunner
|
|||||||
return $originalPrompt;
|
return $originalPrompt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($this->isInputNormalizationPlaceholderOutput($candidate)) {
|
||||||
|
return $originalPrompt;
|
||||||
|
}
|
||||||
|
|
||||||
if (mb_strlen($candidate, 'UTF-8') > $this->agentRunnerConfig->getInputNormalizationMaxOutputChars()) {
|
if (mb_strlen($candidate, 'UTF-8') > $this->agentRunnerConfig->getInputNormalizationMaxOutputChars()) {
|
||||||
return $originalPrompt;
|
return $originalPrompt;
|
||||||
}
|
}
|
||||||
@@ -994,6 +1022,19 @@ final readonly class AgentRunner
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function isInputNormalizationPlaceholderOutput(string $candidate): bool
|
||||||
|
{
|
||||||
|
$normalized = $this->normalizeRoutingComparisonText($candidate);
|
||||||
|
|
||||||
|
return in_array($normalized, [
|
||||||
|
'normalized user input',
|
||||||
|
'corrected user input',
|
||||||
|
'user input',
|
||||||
|
'normalisierte nutzereingabe',
|
||||||
|
'korrigierte nutzereingabe',
|
||||||
|
], true);
|
||||||
|
}
|
||||||
|
|
||||||
private function normalizeRoutingComparisonText(string $value): string
|
private function normalizeRoutingComparisonText(string $value): string
|
||||||
{
|
{
|
||||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||||
@@ -1484,6 +1525,74 @@ final readonly class AgentRunner
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function shouldIsolateStandaloneShopQueryFromHistory(string $prompt): bool
|
||||||
|
{
|
||||||
|
$prompt = trim($prompt);
|
||||||
|
|
||||||
|
if ($prompt === '') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->isCommercialTableFollowUpPrompt($prompt) || $this->isMetaOnlyShopQuery($prompt)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$normalizedPrompt = $this->normalizeFollowUpText($prompt);
|
||||||
|
$usesReferenceLanguage = $this->containsReferentialShopQueryMarker($normalizedPrompt)
|
||||||
|
|| $this->containsConfiguredShopQueryAnchorTrigger($normalizedPrompt);
|
||||||
|
|
||||||
|
if (!$usesReferenceLanguage) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->hasStandaloneConcreteShopSubject($prompt);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function hasStandaloneConcreteShopSubject(string $prompt): bool
|
||||||
|
{
|
||||||
|
if ($this->extractFirstTestomatModelAnchor($prompt) !== '') {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
$contextFallbackQuery = $this->buildContextFallbackShopQuery($prompt);
|
||||||
|
$tokens = $this->tokenizeShopQueryCandidate($contextFallbackQuery);
|
||||||
|
|
||||||
|
if (count($tokens) >= 2) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($tokens as $token) {
|
||||||
|
if (preg_match('/\d/u', $token) === 1) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function shouldUseDeterministicStandaloneShopQuery(string $prompt, string $shopQueryHistoryContext): bool
|
||||||
|
{
|
||||||
|
$prompt = trim($prompt);
|
||||||
|
|
||||||
|
if ($prompt === '') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trim($shopQueryHistoryContext) !== '') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->isCommercialTableFollowUpPrompt($prompt)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->isMetaOnlyShopQuery($prompt)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private function guardStandaloneOptimizedShopQuery(string $prompt, string $optimizedShopQuery): string
|
private function guardStandaloneOptimizedShopQuery(string $prompt, string $optimizedShopQuery): string
|
||||||
{
|
{
|
||||||
if ($this->shouldUseCommerceHistoryForShopQuery($prompt)) {
|
if ($this->shouldUseCommerceHistoryForShopQuery($prompt)) {
|
||||||
|
|||||||
Reference in New Issue
Block a user