patch 20i
This commit is contained in:
@@ -0,0 +1,124 @@
|
||||
# RetrieX Patch p20i – Standalone Shop Query History Isolation
|
||||
|
||||
## Ziel
|
||||
|
||||
Dieser Patch behebt die beobachtete Query-Kontamination bei expliziten Shop-Suchen wie:
|
||||
|
||||
```text
|
||||
shop testomat 808
|
||||
```
|
||||
|
||||
Vor p20i wurde der Shop-Query-Optimizer immer mit dem aktuellen Prompt **und** dem Commerce-Verlauf aufgerufen. Wenn im Verlauf zuvor eine Grenzwert-/Indikatortabelle vorkam, konnte das LLM die eigentlich neue, konkrete Shop-Suche fälschlich zu folgender Query erweitern:
|
||||
|
||||
```text
|
||||
testomat 808 indikator
|
||||
```
|
||||
|
||||
Erwartet ist bei einem expliziten neuen Shop-Prompt:
|
||||
|
||||
```text
|
||||
testomat 808
|
||||
```
|
||||
|
||||
## Ursache im Code
|
||||
|
||||
Die Ursache wurde im realen Codepfad verifiziert:
|
||||
|
||||
- `AgentRunner::buildCommerceHistoryContext()` baut immer den Commerce-Verlauf.
|
||||
- Dieser Verlauf wurde direkt an `buildOptimizedShopQuery()` übergeben.
|
||||
- `AgentRunnerConfig::getShopPrompt()` fügt den Verlauf als `RECENT CONVERSATION CONTEXT` in den LLM-Optimizer-Prompt ein.
|
||||
- Das LLM durfte dadurch alte Kontextanker wie `Indikator` übernehmen, obwohl der aktuelle Prompt mit `shop testomat 808` eine neue konkrete Suche startet.
|
||||
|
||||
## Lösung
|
||||
|
||||
p20i trennt den allgemeinen Commerce-Verlauf vom tatsächlich für die Shop-Query erlaubten Verlauf:
|
||||
|
||||
- Neuer interner Kontext: `$shopQueryHistoryContext`
|
||||
- Für referenzielle Follow-ups bleibt Verlauf erlaubt, z. B.:
|
||||
- `die tabelle mit preisen`
|
||||
- `suche im shop`
|
||||
- `was kostet der indikator`
|
||||
- Für explizite Standalone-Produktqueries mit Modellanker wird der Verlauf aus Optimizer, Preview, Search und Repair entfernt, z. B.:
|
||||
- `shop testomat 808`
|
||||
- `testomat 808 preis`
|
||||
- Zusätzlich schützt `guardStandaloneOptimizedShopQuery()` gegen LLM-Optimizer-Ausgaben, die bei einer Standalone-Modellquery einen nicht im aktuellen Prompt enthaltenen Kontextanker wie `indikator` hinzufügen.
|
||||
|
||||
## Geänderte Dateien
|
||||
|
||||
- `src/Agent/AgentRunner.php`
|
||||
- `src/Config/AgentRunnerConfig.php`
|
||||
- `src/Config/RetriexEffectiveConfigProvider.php`
|
||||
- `config/retriex/agent.yaml`
|
||||
|
||||
## Erwartete Tests
|
||||
|
||||
### Neuer Regressionsfall
|
||||
|
||||
```text
|
||||
shop testomat 808
|
||||
```
|
||||
|
||||
Erwartung:
|
||||
|
||||
- Shop wird angefragt.
|
||||
- Gesendete Suchquery: `testomat 808`
|
||||
- Nicht mehr: `testomat 808 indikator`
|
||||
- Quelle: nur Shopsystem, kein Chatverlauf als aktive Shop-Query-Quelle.
|
||||
|
||||
### Bestehende Follow-up-Fälle müssen weiterhin funktionieren
|
||||
|
||||
```text
|
||||
welche grenzwerte kann der testomat 808 messen
|
||||
die tabelle mit preisen
|
||||
```
|
||||
|
||||
Erwartung:
|
||||
|
||||
- Shop wird weiterhin angefragt.
|
||||
- Verlauf darf verwendet werden.
|
||||
- Query sinngemäß: `Testomat 808 indikator`
|
||||
|
||||
```text
|
||||
Was ist der niedrigste Grenzwert für die Wasserhärte, welcher mit einem Testomaten überwacht werden kann?
|
||||
mit welchem indikator wird der wert gemessen
|
||||
was kpstet der indikator
|
||||
```
|
||||
|
||||
Erwartung:
|
||||
|
||||
- LLM-/Routing-Normalisierung bleibt erhalten.
|
||||
- Referenzieller Indikator-Preis-Follow-up darf weiterhin Verlauf nutzen.
|
||||
|
||||
## Lokale Prüfungen in dieser Umgebung
|
||||
|
||||
Ausgeführt:
|
||||
|
||||
```bash
|
||||
php -l src/Agent/AgentRunner.php
|
||||
php -l src/Config/AgentRunnerConfig.php
|
||||
php -l src/Config/RetriexEffectiveConfigProvider.php
|
||||
python3 -c 'import yaml; yaml.safe_load(open("config/retriex/agent.yaml"))'
|
||||
```
|
||||
|
||||
Nicht ausführbar in dieser Umgebung:
|
||||
|
||||
```bash
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
|
||||
Grund: Die hochgeladene ZIP enthält keine installierten Composer-Dependencies (`vendor/`).
|
||||
|
||||
## Pflichtchecks nach Einspielen
|
||||
|
||||
```bash
|
||||
bin/console cache:clear
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
|
||||
Bei OPcache/PHP-FPM: PHP-FPM bzw. Container neu laden.
|
||||
@@ -424,6 +424,29 @@ parameters:
|
||||
- '- Do not revive older products unless the current user input clearly refers to them.'
|
||||
- '- If the current input starts a new topic, ignore older product context.'
|
||||
- '- Prefer the most recent product reference over older ones.'
|
||||
context_usage:
|
||||
referential_terms:
|
||||
- der
|
||||
- die
|
||||
- das
|
||||
- den
|
||||
- dem
|
||||
- dazu
|
||||
- davon
|
||||
- dafür
|
||||
- dafuer
|
||||
- dieser
|
||||
- diese
|
||||
- dieses
|
||||
- obige
|
||||
- obigen
|
||||
- oben
|
||||
- vorher
|
||||
- zuvor
|
||||
- gleiche
|
||||
- gleichen
|
||||
- selbe
|
||||
- selben
|
||||
context_anchor_enrichment:
|
||||
enabled: true
|
||||
max_query_terms: 2
|
||||
|
||||
@@ -59,6 +59,7 @@ final readonly class AgentRunner
|
||||
$optimizedShopQuery = '';
|
||||
$shopSearchQuery = '';
|
||||
$commerceHistoryContext = '';
|
||||
$shopQueryHistoryContext = '';
|
||||
$attemptedShopRepair = false;
|
||||
$usedShopRepair = false;
|
||||
$shopRepairQueries = [];
|
||||
@@ -212,21 +213,34 @@ final readonly class AgentRunner
|
||||
yield $this->systemMsg($this->agentRunnerConfig->getOptimizeSearchMessage(), 'think');
|
||||
|
||||
$commerceHistoryContext = $this->buildCommerceHistoryContext($userId, $requestContextHint);
|
||||
$shopQueryHistoryContext = $this->resolveShopQueryHistoryContext(
|
||||
prompt: $routingPrompt,
|
||||
commerceHistoryContext: $commerceHistoryContext
|
||||
);
|
||||
|
||||
if ($commerceHistoryContext !== '') {
|
||||
if ($shopQueryHistoryContext !== '') {
|
||||
$this->addSource($sources, $this->agentRunnerConfig->getConversationHistorySourceLabel());
|
||||
}
|
||||
|
||||
if ($commerceHistoryContext !== '' && $shopQueryHistoryContext === '') {
|
||||
$this->agentLogger->info('Ignored commerce history for standalone shop query', [
|
||||
'userId' => $userId,
|
||||
'prompt' => $prompt,
|
||||
'routingPrompt' => $routingPrompt,
|
||||
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
||||
]);
|
||||
}
|
||||
|
||||
$optimizedShopQuery = yield from $this->buildOptimizedShopQuery(
|
||||
$routingPrompt,
|
||||
$userId,
|
||||
$commerceHistoryContext
|
||||
$shopQueryHistoryContext
|
||||
);
|
||||
|
||||
$shopSearchQuery = $this->resolveShopSearchQuery(
|
||||
prompt: $routingPrompt,
|
||||
optimizedShopQuery: $optimizedShopQuery,
|
||||
commerceHistoryContext: $commerceHistoryContext,
|
||||
commerceHistoryContext: $shopQueryHistoryContext,
|
||||
userId: $userId
|
||||
);
|
||||
|
||||
@@ -279,7 +293,7 @@ final readonly class AgentRunner
|
||||
$shopQueryPreview = $this->shopSearchService->buildSearchQueryPreview(
|
||||
$shopSearchQuery,
|
||||
$commerceIntent,
|
||||
$commerceHistoryContext
|
||||
$shopQueryHistoryContext
|
||||
);
|
||||
|
||||
$shopSearchDisplayQuery = $shopQueryPreview->searchText !== ''
|
||||
@@ -329,7 +343,7 @@ final readonly class AgentRunner
|
||||
$shopSearchQuery,
|
||||
$commerceIntent,
|
||||
$userId,
|
||||
$commerceHistoryContext
|
||||
$shopQueryHistoryContext
|
||||
);
|
||||
$primaryShopSearchHadSystemFailure = $this->shopSearchService->hadLastSearchSystemFailure();
|
||||
$primaryShopSearchFailureReason = $this->shopSearchService->getLastSearchFailureReason();
|
||||
@@ -376,7 +390,7 @@ final readonly class AgentRunner
|
||||
prompt: $prompt,
|
||||
userId: $userId,
|
||||
commerceIntent: $commerceIntent,
|
||||
commerceHistoryContext: $commerceHistoryContext,
|
||||
commerceHistoryContext: $shopQueryHistoryContext,
|
||||
primaryQuery: $shopSearchQuery,
|
||||
primaryShopResults: $primaryShopResults,
|
||||
knowledgeChunks: $knowledgeChunks
|
||||
@@ -1379,6 +1393,102 @@ final readonly class AgentRunner
|
||||
}
|
||||
}
|
||||
|
||||
private function resolveShopQueryHistoryContext(string $prompt, string $commerceHistoryContext): string
|
||||
{
|
||||
$commerceHistoryContext = trim($commerceHistoryContext);
|
||||
|
||||
if ($commerceHistoryContext === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
if ($this->shouldUseCommerceHistoryForShopQuery($prompt)) {
|
||||
return $commerceHistoryContext;
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
private function shouldUseCommerceHistoryForShopQuery(string $prompt): bool
|
||||
{
|
||||
$prompt = trim($prompt);
|
||||
|
||||
if ($prompt === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($this->isCommercialTableFollowUpPrompt($prompt)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($this->isMetaOnlyShopQuery($prompt)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($this->extractFirstTestomatModelAnchor($prompt) !== '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
$normalizedPrompt = $this->normalizeFollowUpText($prompt);
|
||||
|
||||
if ($this->containsConfiguredShopQueryAnchorTrigger($normalizedPrompt)) {
|
||||
return !$this->containsNumericShopQueryToken($normalizedPrompt);
|
||||
}
|
||||
|
||||
return $this->containsReferentialShopQueryMarker($normalizedPrompt);
|
||||
}
|
||||
|
||||
private function containsNumericShopQueryToken(string $text): bool
|
||||
{
|
||||
return preg_match('/\d/u', $text) === 1;
|
||||
}
|
||||
|
||||
private function containsReferentialShopQueryMarker(string $text): bool
|
||||
{
|
||||
$tokens = $this->tokenizeShopQueryCandidate($text);
|
||||
|
||||
if ($tokens === []) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$tokenSet = array_fill_keys($tokens, true);
|
||||
|
||||
foreach ($this->agentRunnerConfig->getShopQueryContextUsageReferentialTerms() as $term) {
|
||||
foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) {
|
||||
if (isset($tokenSet[$termToken])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function guardStandaloneOptimizedShopQuery(string $prompt, string $optimizedShopQuery): string
|
||||
{
|
||||
if ($this->shouldUseCommerceHistoryForShopQuery($prompt)) {
|
||||
return $optimizedShopQuery;
|
||||
}
|
||||
|
||||
if ($this->extractFirstTestomatModelAnchor($prompt) === '') {
|
||||
return $optimizedShopQuery;
|
||||
}
|
||||
|
||||
if (!$this->containsConfiguredShopQueryAnchorTrigger($optimizedShopQuery)) {
|
||||
return $optimizedShopQuery;
|
||||
}
|
||||
|
||||
if ($this->containsConfiguredShopQueryAnchorTrigger($prompt)) {
|
||||
return $optimizedShopQuery;
|
||||
}
|
||||
|
||||
$this->agentLogger->info('Ignored optimized shop query because it added an unsupported context anchor', [
|
||||
'prompt' => $prompt,
|
||||
'optimizedShopQuery' => $optimizedShopQuery,
|
||||
]);
|
||||
|
||||
return $prompt;
|
||||
}
|
||||
|
||||
private function resolveShopSearchQuery(
|
||||
string $prompt,
|
||||
string $optimizedShopQuery,
|
||||
@@ -1396,7 +1506,7 @@ final readonly class AgentRunner
|
||||
}
|
||||
|
||||
if ($optimizedShopQuery !== '' && !$this->isMetaOnlyShopQuery($optimizedShopQuery)) {
|
||||
return $optimizedShopQuery;
|
||||
return $this->guardStandaloneOptimizedShopQuery($prompt, $optimizedShopQuery);
|
||||
}
|
||||
|
||||
if (!$this->isMetaOnlyShopQuery($prompt)) {
|
||||
|
||||
@@ -721,6 +721,14 @@ final class AgentRunnerConfig
|
||||
return $this->getRequiredStringList('shop_prompt.conversation_context_rules');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getShopQueryContextUsageReferentialTerms(): array
|
||||
{
|
||||
return $this->getRequiredStringList('shop_prompt.context_usage.referential_terms');
|
||||
}
|
||||
|
||||
public function getShopPromptIntro(): string
|
||||
{
|
||||
return $this->getRequiredString('shop_prompt.intro');
|
||||
|
||||
@@ -529,6 +529,9 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
'language_markers' => $this->agentRunnerConfig->getShopQueryLanguageMarkers(),
|
||||
'translation_replacements_de' => $this->agentRunnerConfig->getShopQueryTranslationReplacements('de'),
|
||||
],
|
||||
'context_usage' => [
|
||||
'referential_terms' => $this->agentRunnerConfig->getShopQueryContextUsageReferentialTerms(),
|
||||
],
|
||||
'context_anchor_enrichment' => [
|
||||
'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(),
|
||||
'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(),
|
||||
@@ -1132,6 +1135,11 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
$errors[] = 'agent.input_normalization.prompt.current_user_input_label must not be empty.';
|
||||
}
|
||||
|
||||
$contextUsage = $agent['shop_query_optimizer']['context_usage'] ?? [];
|
||||
if (is_array($contextUsage)) {
|
||||
$this->validateStringList($this->toList($contextUsage['referential_terms'] ?? []), 'agent.shop_query_optimizer.context_usage.referential_terms', $errors, $warnings);
|
||||
}
|
||||
|
||||
$anchorEnrichment = $agent['shop_query_optimizer']['context_anchor_enrichment'] ?? [];
|
||||
if (is_array($anchorEnrichment)) {
|
||||
$this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_query_optimizer.context_anchor_enrichment.trigger_terms', $errors, $warnings);
|
||||
|
||||
Reference in New Issue
Block a user