From 308e980b4abd11721593aec31e2b9f5a127c241e Mon Sep 17 00:00:00 2001 From: team 1 Date: Sun, 26 Apr 2026 19:59:31 +0200 Subject: [PATCH] harden history find tokens and shops earch --- ...EX_SHOP_CONTEXT_SSE_GUARD_HOTFIX_README.md | 60 +++ ...EX_SHOP_QUERY_CONTEXT_ANCHOR_FIX_README.md | 63 +++ ...HOP_QUERY_CONTEXT_CLEANUP_HOTFIX_README.md | 32 ++ ...OP_QUERY_CONTEXT_FALLBACK_HOTFIX_README.md | 34 ++ RETRIEX_SHOP_QUERY_LANGUAGE_FIX_README.md | 68 +++ ...IEX_SHOP_QUERY_META_GUARD_HOTFIX_README.md | 36 ++ config/retriex/agent.yaml | 124 ++++- src/Agent/AgentRunner.php | 504 +++++++++++++++--- src/Config/AgentRunnerConfig.php | 285 +++++++++- src/Config/RetriexEffectiveConfigProvider.php | 73 +++ src/Controller/AskSseController.php | 26 +- 11 files changed, 1230 insertions(+), 75 deletions(-) create mode 100644 RETRIEX_SHOP_CONTEXT_SSE_GUARD_HOTFIX_README.md create mode 100644 RETRIEX_SHOP_QUERY_CONTEXT_ANCHOR_FIX_README.md create mode 100644 RETRIEX_SHOP_QUERY_CONTEXT_CLEANUP_HOTFIX_README.md create mode 100644 RETRIEX_SHOP_QUERY_CONTEXT_FALLBACK_HOTFIX_README.md create mode 100644 RETRIEX_SHOP_QUERY_LANGUAGE_FIX_README.md create mode 100644 RETRIEX_SHOP_QUERY_META_GUARD_HOTFIX_README.md diff --git a/RETRIEX_SHOP_CONTEXT_SSE_GUARD_HOTFIX_README.md b/RETRIEX_SHOP_CONTEXT_SSE_GUARD_HOTFIX_README.md new file mode 100644 index 0000000..f024ee0 --- /dev/null +++ b/RETRIEX_SHOP_CONTEXT_SSE_GUARD_HOTFIX_README.md @@ -0,0 +1,60 @@ +# RetrieX Shop Context + SSE Guard Hotfix + +Patch-only hotfix for the Shopware query meta-command flow and duplicate SSE reconnect handling. + +## Problem + +After the user asked a domain question and then wrote a meta-command like `suche im shop`, the system could either: + +- fail to recover the previous domain question from history, +- ask for a concrete search query even though a relevant previous question existed, +- continue with a misleading RAG-only final answer after the shop search was skipped, +- append a misleading duplicate SSE reconnect message such as `Der Antwort-Stream läuft bereits...`. + +## Changes + +- The shop meta-command fallback now uses a larger history budget. +- If the budgeted history is still polluted by repeated meta turns, the fallback checks the full recent context as a last resort. +- The fallback still ignores meta-only turns such as `shop` / `suche im shop` and derives a compact shop query from the last non-meta domain question. +- If no concrete shop query can be resolved, the agent now stops after the clarification message instead of generating a misleading final answer from RAG context. +- Duplicate EventSource reconnects for already-running or completed jobs are silently closed with `done`, so the UI does not append a misleading stream error after the real stream already produced output. + +## Expected behavior + +Conversation: + +```text +mit welchem testomat kann ich freies chlor messen +suche im shop +``` + +Expected Shop query: + +```text +testomat freies chlor +``` + +Not expected: + +```text +shop +mit welchem testomat kann ich freies chlor messen +``` + +## Notes + +- No retrieval logic changed. +- No scoring logic changed. +- No product-family special case was added. +- No Testomat 808 / Testomat 2000 / Tritromat special handling was added. +- Existing config fallbacks remain intact. + +## After applying + +Run: + +```bash +php bin/console cache:clear +php bin/console mto:agent:config:validate +php bin/console mto:agent:regression:test +``` diff --git a/RETRIEX_SHOP_QUERY_CONTEXT_ANCHOR_FIX_README.md b/RETRIEX_SHOP_QUERY_CONTEXT_ANCHOR_FIX_README.md new file mode 100644 index 0000000..8ca5833 --- /dev/null +++ b/RETRIEX_SHOP_QUERY_CONTEXT_ANCHOR_FIX_README.md @@ -0,0 +1,63 @@ +# RetrieX Shop Query Context Anchor Fix + +Patch-only fix for referential shop follow-up questions such as: + +- `was kostet der indikator` +- `was kostet das reagenz` +- `suche im shop` after a prior product/parameter question + +## Problem + +The shop query optimizer could reduce a referential price follow-up to a too generic query, for example: + +```text +indikator +``` + +Even when the previous RAG answer contained a precise anchor such as: + +```text +Indikatortyp 300 +``` + +The shop result could still be correct by chance, but the query was less robust than the previously confirmed stable behavior. + +## Fix + +The optimized query is now enriched generically from recent conversation context when all conditions match: + +- the optimized shop query is short/generic, +- the current prompt/query contains a configured accessory/reagent trigger term, +- the recent context contains a configured type/code anchor such as `Indikatortyp 300`. + +Expected example: + +```text +indikator + history anchor Indikatortyp 300 +=> indikatortyp 300 indikator +``` + +## Design constraints + +- No Retriever changes. +- No vector/scoring changes. +- No Testomat-808/Testomat-2000/Tritromat special case. +- Product/type/code detection is configurable in `config/retriex/agent.yaml`. +- PHP defaults remain as fallback. + +## Files + +- `config/retriex/agent.yaml` +- `src/Agent/AgentRunner.php` +- `src/Config/AgentRunnerConfig.php` +- `src/Config/RetriexEffectiveConfigProvider.php` + +## After applying + +Run: + +```bash +php bin/console cache:clear +php bin/console mto:agent:config:validate +php bin/console mto:agent:regression:test +``` diff --git a/RETRIEX_SHOP_QUERY_CONTEXT_CLEANUP_HOTFIX_README.md b/RETRIEX_SHOP_QUERY_CONTEXT_CLEANUP_HOTFIX_README.md new file mode 100644 index 0000000..0c2e678 --- /dev/null +++ b/RETRIEX_SHOP_QUERY_CONTEXT_CLEANUP_HOTFIX_README.md @@ -0,0 +1,32 @@ +# RetrieX Shop Query Context Cleanup Hotfix + +Purpose: +- Keeps the existing language preservation and meta-query guard behavior. +- Fixes the follow-up case where a meta command such as "suche im shop" reused the full previous user question as the Shopware search query. +- The fallback now extracts a compact, generic shop query from recent context, e.g.: + - "mit welchem testomat kann ich freies chlor messen" -> "testomat freies chlor" + +Design constraints: +- No retrieval changes. +- No product-family-specific special cases. +- No Testomat 808 / Testomat 2000 / Tritromat hardcoding. +- Filtering terms are configurable in config/retriex/agent.yaml. +- PHP defaults remain as safety fallback. + +Changed files: +- config/retriex/agent.yaml +- src/Agent/AgentRunner.php +- src/Config/AgentRunnerConfig.php +- src/Config/RetriexEffectiveConfigProvider.php + +Recommended after applying: +- php bin/console cache:clear +- php bin/console mto:agent:config:validate +- php bin/console mto:agent:regression:test + +Manual regression: +1. Ask: mit welchem testomat kann ich freies chlor messen +2. Then: suche im shop +3. Expected shop query should be a compact product/parameter query such as: + testomat freies chlor + It must not be the full previous prompt. diff --git a/RETRIEX_SHOP_QUERY_CONTEXT_FALLBACK_HOTFIX_README.md b/RETRIEX_SHOP_QUERY_CONTEXT_FALLBACK_HOTFIX_README.md new file mode 100644 index 0000000..4affd0a --- /dev/null +++ b/RETRIEX_SHOP_QUERY_CONTEXT_FALLBACK_HOTFIX_README.md @@ -0,0 +1,34 @@ +# RetrieX Shop Query Context Fallback Hotfix + +Patch-only hotfix for meta shop follow-ups such as `suche im shop`. + +## Problem + +After several shop meta follow-ups, the previous non-meta product/advisory question could fall outside the small context fallback window. The guard then correctly rejected `shop` as a query, but failed to derive a compact query from the last real topic. + +## Fix + +- Keep the language-preservation and meta-query guard behavior. +- Increase the configurable fallback question window. +- Add a separate configurable extended history budget for meta shop fallbacks. +- If the normal commerce history does not provide a concrete query, scan a larger recent history window for the last non-meta user question. +- Still compact the question into product/search tokens before sending it to Shopware. + +## Expected example + +`mit welchem testomat kann ich freies chlor messen` followed by `suche im shop` should resolve to a compact query such as: + +```text +testomat freies chlor +``` + +It must not send: + +```text +shop +mit welchem testomat kann ich freies chlor messen +``` + +## Scope + +No retrieval, vector, scoring, prompt-answering, product-family special case, Testomat-specific hardcoding or Shopware API changes. diff --git a/RETRIEX_SHOP_QUERY_LANGUAGE_FIX_README.md b/RETRIEX_SHOP_QUERY_LANGUAGE_FIX_README.md new file mode 100644 index 0000000..ee2be4d --- /dev/null +++ b/RETRIEX_SHOP_QUERY_LANGUAGE_FIX_README.md @@ -0,0 +1,68 @@ +# RetrieX Shop Query Language Fix + +Patch-only fix for v1.5.0. + +## Problem + +The Shopware query optimizer could translate German user input into English, for example: + +```text +welchen testomat kann ich nutzen zur freien chlor messung +``` + +became: + +```text +testomat free chlorine measurement +``` + +This reduced Shopware hit quality because the shop catalog is primarily queried with German/domain terms. + +## Fix + +- Strengthens `config/retriex/agent.yaml` prompt rules so the optimized shop query must preserve the language of the current user input. +- Adds configurable language-preservation settings under `shop_prompt.language_preservation`. +- Adds a small post-optimizer repair step in `AgentRunner` that only applies configured replacements when the current user input matches configured language markers. +- Keeps all translation/repair terms in YAML/fallback config, not as hard-coded product-specific logic in the Shop/Retrieval core. +- Extends the effective config dump with the new language-preservation settings. + +## Example + +```text +testomat free chlorine measurement +``` + +is repaired to: + +```text +testomat freies chlor messung +``` + +for German input. + +## Safety + +- No Retrieval, Vector, Scoring, PromptBuilder, SearchRepair or Shopware API logic changed. +- No Testomat-808-specific logic. +- Product names, brands and model numbers remain preserved. +- All new language repair rules are configurable in `agent.yaml`. + +## After installation + +```bash +php bin/console cache:clear +php bin/console mto:agent:config:validate +php bin/console mto:agent:regression:test +``` + +Then retest: + +```text +welchen testomat kann ich nutzen zur freien chlor messung +``` + +The displayed shop query should stay German, e.g.: + +```text +testomat freies chlor messung +``` diff --git a/RETRIEX_SHOP_QUERY_META_GUARD_HOTFIX_README.md b/RETRIEX_SHOP_QUERY_META_GUARD_HOTFIX_README.md new file mode 100644 index 0000000..6cceea9 --- /dev/null +++ b/RETRIEX_SHOP_QUERY_META_GUARD_HOTFIX_README.md @@ -0,0 +1,36 @@ +# RetrieX shop query meta guard hotfix + +This patch hardens the Shopware query optimizer after the language-preservation fix. + +## Problem + +A pure shop command such as `suche im shop` could be optimized to the meta word `shop` and then executed as an actual Store API search query. + +## Fix + +- Adds a configurable `shop_prompt.meta_query_guard` section in `config/retriex/agent.yaml`. +- Rejects optimized queries that consist only of generic command/meta terms such as `shop`, `suche`, `im`, `bitte`. +- If the current user input is only a shop command, RetrieX tries to use the latest meaningful user question from the recent conversation context as a generic fallback. +- If no meaningful context exists, the shop search is skipped and a clear message is shown instead of searching for `shop`. +- Keeps the previous language-preservation behavior for German search terms. + +## Scope + +Changed files only: + +- `config/retriex/agent.yaml` +- `src/Agent/AgentRunner.php` +- `src/Config/AgentRunnerConfig.php` +- `src/Config/RetriexEffectiveConfigProvider.php` + +No retrieval, vector, scoring, prompt-builder, vocabulary, Shopware API or product-family-specific logic was changed. + +## Validation + +Run after installing: + +```bash +php bin/console cache:clear +php bin/console mto:agent:config:validate +php bin/console mto:agent:regression:test +``` diff --git a/config/retriex/agent.yaml b/config/retriex/agent.yaml index ee43f5d..59fecdd 100644 --- a/config/retriex/agent.yaml +++ b/config/retriex/agent.yaml @@ -13,6 +13,7 @@ parameters: check_internet_sources: 'Ich prüfe auf Internetquellen...' retrieve_knowledge: 'Ich hole relevante Daten aus meinem RAG-Wissen...' optimize_search: 'Ich optimiere die Recherche...' + no_concrete_shop_query: 'Ich habe keine konkrete Shop-Suchanfrage erkannt. Bitte nenne das Produkt, Zubehör oder die Artikelnummer.' fetch_search_data_template: 'Ich rufe Recherchedaten ab (type: %s)' analyze_all_information: 'Ich analysiere alle Informationen...' thinking_while_streaming: 'Denke nach...' @@ -53,11 +54,14 @@ parameters: - '- Maximum 6 search terms, preferably fewer.' - '- Remove filler words, polite phrases, and irrelevant words.' - '- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.' + - '- Preserve the language of the CURRENT USER INPUT for generic product/search terms; do not translate German search terms into English.' + - '- For German user input, output German shop terms, for example "freies Chlor Messung" instead of "free chlorine measurement".' + - '- Preserve domain terms from the current user input or resolved context in their original language.' - '- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).' - '- Separate terms using spaces only.' - '- If a relevant product name is present, it must be placed at the beginning of the final search query.' - '- Try to always identify all products mentioned in the user input text, even in long prompts.' - - '- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.' + - '- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator/Indikator.' - '- If the current user input is vague or referential, use the recent conversation context only as support.' - '- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".' conversation_context_rules: @@ -67,3 +71,121 @@ parameters: - '- Do not revive older products unless the current user input clearly refers to them.' - '- If the current input starts a new topic, ignore older product context.' - '- Prefer the most recent product reference over older ones.' + context_anchor_enrichment: + enabled: true + max_query_terms: 2 + template: '{anchor} {query}' + trigger_terms: + - indikator + - indikatortyp + - indicator + - reagenz + - reagenzsatz + - reagent + - zubehör + - zubehor + - accessory + anchor_patterns: + - '/\b(?:indikator(?:typ)?|indicator(?:\s+type)?|reagenz(?:satz|typ)?|reagent(?:\s+set|\s+type)?|typ|type)\s+[A-Za-zÄÖÜäöüß]{0,8}\s*\d{1,5}(?:\s*[A-ZÄÖÜ]{1,4})?(?:\s*%)?\b/iu' + meta_query_guard: + enabled: true + context_fallback_enabled: true + context_fallback_question_limit: 12 + context_fallback_history_budget_chars: 20000 + context_fallback_use_full_history: true + context_fallback_max_terms: 6 + context_fallback_filter_terms: + - mit + - welche + - welcher + - welches + - welchem + - welchen + - was + - wie + - wo + - kann + - koennen + - können + - konnte + - könnte + - ich + - wir + - man + - nutzen + - benutzen + - verwenden + - verwende + - nehmen + - zur + - zum + - für + - fuer + - messen + - gemessen + - messung + meta_only_terms: + - shop + - shopsuche + - shop-suche + - suche + - suchen + - such + - finde + - find + - zeige + - zeig + - bitte + - mal + - im + - in + - nach + - den + - die + - das + - der + - dem + language_preservation: + enabled: true + language_markers: + de: + - ' ä ' + - ' ö ' + - ' ü ' + - ' ß ' + - ' der ' + - ' die ' + - ' das ' + - ' ein ' + - ' eine ' + - ' einer ' + - ' einen ' + - ' welchem ' + - ' welchen ' + - ' welche ' + - ' welcher ' + - ' kann ' + - ' nutzen ' + - ' zur ' + - ' für ' + - ' fuer ' + - ' messung ' + - ' indikator ' + - ' reagenz ' + - ' chlor ' + translation_replacements: + de: + free chlorine: 'freies chlor' + free chlor: 'freies chlor' + total chlorine: 'gesamtchlor' + chlorine measurement: 'chlor messung' + water hardness: 'wasserhärte' + measurement: 'messung' + measuring: 'messung' + chlorine: 'chlor' + indicator: 'indikator' + indicators: 'indikatoren' + reagent: 'reagenz' + reagents: 'reagenzien' + accessory: 'zubehör' + accessories: 'zubehör' diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index c473208..4e08bf6 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -121,84 +121,108 @@ final readonly class AgentRunner $commerceHistoryContext ); - $shopSearchQuery = $optimizedShopQuery !== '' ? $optimizedShopQuery : $prompt; - - $shopQueryPreview = $this->shopSearchService->buildSearchQueryPreview( - $shopSearchQuery, - $commerceIntent, - $commerceHistoryContext + $shopSearchQuery = $this->resolveShopSearchQuery( + prompt: $prompt, + optimizedShopQuery: $optimizedShopQuery, + commerceHistoryContext: $commerceHistoryContext, + userId: $userId ); - yield $this->systemMsg( - $this->buildShopSearchMetaMessage( - query: $shopQueryPreview->searchText !== '' ? $shopQueryPreview->searchText : $shopSearchQuery, - commerceIntent: $commerceIntent, - usedOptimizedQuery: $optimizedShopQuery !== '', - originalQuery: $shopSearchQuery - ), - 'meta' - ); - - $this->agentLogger->info('Commerce search prepared', [ - 'userId' => $userId, - 'commerceIntent' => $commerceIntent, - 'usedOptimizedShopQuery' => $optimizedShopQuery !== '', - 'optimizedShopQuery' => $optimizedShopQuery, - 'shopSearchQuery' => $shopSearchQuery, - 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', - 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), - ]); - - yield $this->systemMsg( - sprintf($this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), $commerceIntent), - 'think' - ); - - $primaryShopResults = $this->searchShop( - $shopSearchQuery, - $commerceIntent, - $userId, - $commerceHistoryContext - ); - $primaryShopSearchHadSystemFailure = $this->shopSearchService->hadLastSearchSystemFailure(); - $primaryShopSearchFailureReason = $this->shopSearchService->getLastSearchFailureReason(); - - if ($primaryShopSearchHadSystemFailure) { - $this->agentLogger->warning('Shop repair skipped after Store API system failure', [ + if ($shopSearchQuery === '') { + $this->agentLogger->info('Commerce search skipped because no concrete shop query could be resolved', [ 'userId' => $userId, 'commerceIntent' => $commerceIntent, - 'shopSearchQuery' => $shopSearchQuery, - 'failureReason' => $primaryShopSearchFailureReason, + 'prompt' => $prompt, + 'optimizedShopQuery' => $optimizedShopQuery, + 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', + 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), ]); - $shopUnavailableMessage = $this->buildShopUnavailableMessage($primaryShopSearchFailureReason); yield $this->systemMsg( - $shopUnavailableMessage, - 'err' - ); - $historyNotices[] = $this->buildHistoryNotice( - 'Shopdaten konnten nicht geladen werden', - $primaryShopSearchFailureReason + $this->agentRunnerConfig->getNoConcreteShopQueryMessage(), + 'info' ); - $repairPayload = [ - 'results' => $primaryShopResults, - 'attemptedRepair' => false, - 'usedRepair' => false, - 'repairQueries' => [], - ]; + + return; } else { - yield $this->systemMsg('Erweiterte Shopsuche wird geprüft…', 'think'); - - $repairPayload = $this->repairShopResults( - prompt: $prompt, - userId: $userId, - commerceIntent: $commerceIntent, - commerceHistoryContext: $commerceHistoryContext, - primaryQuery: $shopSearchQuery, - primaryShopResults: $primaryShopResults, - knowledgeChunks: $knowledgeChunks + $shopQueryPreview = $this->shopSearchService->buildSearchQueryPreview( + $shopSearchQuery, + $commerceIntent, + $commerceHistoryContext ); + + yield $this->systemMsg( + $this->buildShopSearchMetaMessage( + query: $shopQueryPreview->searchText !== '' ? $shopQueryPreview->searchText : $shopSearchQuery, + commerceIntent: $commerceIntent, + usedOptimizedQuery: $optimizedShopQuery !== '', + originalQuery: $shopSearchQuery + ), + 'meta' + ); + + $this->agentLogger->info('Commerce search prepared', [ + 'userId' => $userId, + 'commerceIntent' => $commerceIntent, + 'usedOptimizedShopQuery' => $optimizedShopQuery !== '', + 'optimizedShopQuery' => $optimizedShopQuery, + 'shopSearchQuery' => $shopSearchQuery, + 'hasCommerceHistoryContext' => $commerceHistoryContext !== '', + 'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext), + ]); + + yield $this->systemMsg( + sprintf($this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), $commerceIntent), + 'think' + ); + + $primaryShopResults = $this->searchShop( + $shopSearchQuery, + $commerceIntent, + $userId, + $commerceHistoryContext + ); + $primaryShopSearchHadSystemFailure = $this->shopSearchService->hadLastSearchSystemFailure(); + $primaryShopSearchFailureReason = $this->shopSearchService->getLastSearchFailureReason(); + + if ($primaryShopSearchHadSystemFailure) { + $this->agentLogger->warning('Shop repair skipped after Store API system failure', [ + 'userId' => $userId, + 'commerceIntent' => $commerceIntent, + 'shopSearchQuery' => $shopSearchQuery, + 'failureReason' => $primaryShopSearchFailureReason, + ]); + + $shopUnavailableMessage = $this->buildShopUnavailableMessage($primaryShopSearchFailureReason); + yield $this->systemMsg( + $shopUnavailableMessage, + 'err' + ); + $historyNotices[] = $this->buildHistoryNotice( + 'Shopdaten konnten nicht geladen werden', + $primaryShopSearchFailureReason + ); + + $repairPayload = [ + 'results' => $primaryShopResults, + 'attemptedRepair' => false, + 'usedRepair' => false, + 'repairQueries' => [], + ]; + } else { + yield $this->systemMsg('Erweiterte Shopsuche wird geprüft…', 'think'); + + $repairPayload = $this->repairShopResults( + prompt: $prompt, + userId: $userId, + commerceIntent: $commerceIntent, + commerceHistoryContext: $commerceHistoryContext, + primaryQuery: $shopSearchQuery, + primaryShopResults: $primaryShopResults, + knowledgeChunks: $knowledgeChunks + ); + } } $shopResults = $repairPayload['results']; @@ -645,7 +669,7 @@ final readonly class AgentRunner return ''; } - return $this->sanitizeOptimizedShopQuery($optimizedQuery); + return $this->sanitizeOptimizedShopQuery($optimizedQuery, $prompt, $commerceHistoryContext); } /** @@ -692,6 +716,189 @@ final readonly class AgentRunner } } + private function resolveShopSearchQuery( + string $prompt, + string $optimizedShopQuery, + string $commerceHistoryContext, + string $userId + ): string { + if ($optimizedShopQuery !== '' && !$this->isMetaOnlyShopQuery($optimizedShopQuery)) { + return $optimizedShopQuery; + } + + if (!$this->isMetaOnlyShopQuery($prompt)) { + return $prompt; + } + + $contextQuery = $this->extractContextualShopSearchQuery($commerceHistoryContext); + + if ($contextQuery !== '' && !$this->isMetaOnlyShopQuery($contextQuery)) { + return $contextQuery; + } + + $extendedHistoryBudget = $this->agentRunnerConfig->getShopQueryContextFallbackHistoryBudgetChars(); + + if ($extendedHistoryBudget > mb_strlen($commerceHistoryContext, 'UTF-8')) { + $extendedHistory = $this->contextService->buildUserContextWithinBudget($userId, $extendedHistoryBudget); + $extendedContextQuery = $this->extractContextualShopSearchQuery($extendedHistory); + + if ($extendedContextQuery !== '' && !$this->isMetaOnlyShopQuery($extendedContextQuery)) { + return $extendedContextQuery; + } + } + + if ($this->agentRunnerConfig->shouldUseFullHistoryForShopQueryContextFallback()) { + $fullHistory = $this->contextService->buildUserContext($userId, true); + $fullHistoryContextQuery = $this->extractContextualShopSearchQuery($fullHistory); + + if ($fullHistoryContextQuery !== '' && !$this->isMetaOnlyShopQuery($fullHistoryContextQuery)) { + return $fullHistoryContextQuery; + } + } + + return ''; + } + + private function extractContextualShopSearchQuery(string $commerceHistoryContext): string + { + if (!$this->agentRunnerConfig->isShopQueryContextFallbackEnabled()) { + return ''; + } + + $questions = $this->extractRecentUserQuestions( + $commerceHistoryContext, + $this->agentRunnerConfig->getShopQueryContextFallbackQuestionLimit() + ); + + for ($i = count($questions) - 1; $i >= 0; $i--) { + $question = trim($questions[$i]); + + if ($question === '' || $this->isMetaOnlyShopQuery($question)) { + continue; + } + + $contextQuery = $this->buildContextFallbackShopQuery($question); + + if ($contextQuery !== '' && !$this->isMetaOnlyShopQuery($contextQuery)) { + return $contextQuery; + } + } + + return ''; + } + + private function buildContextFallbackShopQuery(string $question): string + { + $tokens = $this->tokenizeShopQueryCandidate($question); + + if ($tokens === []) { + return ''; + } + + $filterTerms = []; + + foreach (array_merge( + $this->agentRunnerConfig->getShopQueryMetaOnlyTerms(), + $this->agentRunnerConfig->getShopQueryContextFallbackFilterTerms() + ) as $term) { + foreach ($this->tokenizeShopQueryCandidate($term) as $token) { + $filterTerms[$token] = true; + } + } + + $maxTerms = max(1, $this->agentRunnerConfig->getShopQueryContextFallbackMaxTerms()); + $out = []; + + foreach ($tokens as $token) { + if (isset($filterTerms[$token])) { + continue; + } + + if (in_array($token, $out, true)) { + continue; + } + + $out[] = $token; + + if (count($out) >= $maxTerms) { + break; + } + } + + return implode(' ', $out); + } + + /** + * @return string[] + */ + private function tokenizeShopQueryCandidate(string $value): array + { + $value = mb_strtolower(trim($value), 'UTF-8'); + $value = str_replace(['-', '/', '_'], ' ', $value); + + if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', $value, $matches) !== 1) { + return []; + } + + return array_values(array_filter( + array_map(static fn(string $token): string => trim($token), $matches[0] ?? []), + static fn(string $token): bool => $token !== '' + )); + } + + private function isMetaOnlyShopQuery(string $query): bool + { + if (!$this->agentRunnerConfig->isShopQueryMetaGuardEnabled()) { + return false; + } + + $tokens = $this->tokenizeMetaGuardText($query); + + if ($tokens === []) { + return true; + } + + $metaTerms = []; + foreach ($this->agentRunnerConfig->getShopQueryMetaOnlyTerms() as $term) { + foreach ($this->tokenizeMetaGuardText($term) as $token) { + $metaTerms[$token] = true; + } + } + + if ($metaTerms === []) { + return false; + } + + foreach ($tokens as $token) { + if (!isset($metaTerms[$token])) { + return false; + } + } + + return true; + } + + /** + * @return string[] + */ + private function tokenizeMetaGuardText(string $value): array + { + $value = mb_strtolower(trim($value), 'UTF-8'); + $value = str_replace(['-', '/', '_'], ' ', $value); + $value = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $value) ?? $value; + $value = preg_replace('/\s+/u', ' ', $value) ?? $value; + $value = trim($value); + + if ($value === '') { + return []; + } + + return array_values(array_filter( + explode(' ', $value), + static fn(string $token): bool => $token !== '' + )); + } + private function searchShop( string $query, string $commerceIntent, @@ -743,8 +950,11 @@ final readonly class AgentRunner }; } - private function sanitizeOptimizedShopQuery(string $query): string - { + private function sanitizeOptimizedShopQuery( + string $query, + string $sourcePrompt = '', + string $commerceHistoryContext = '' + ): string { $query = trim($query); if ($query === '') { @@ -755,10 +965,162 @@ final readonly class AgentRunner $query = preg_replace($this->agentRunnerConfig->getOptimizedShopQueryPrefixPattern(), '', $query) ?? $query; $query = trim($query, $this->agentRunnerConfig->getOptimizedShopQueryTrimCharacters()); $query = preg_replace('/\s+/u', ' ', $query) ?? $query; + $query = $this->preserveOptimizedShopQueryLanguage($query, $sourcePrompt); + $query = $this->enrichReferentialShopQueryFromHistory($query, $sourcePrompt, $commerceHistoryContext); + $query = preg_replace('/\s+/u', ' ', $query) ?? $query; return trim($query); } + private function enrichReferentialShopQueryFromHistory( + string $query, + string $sourcePrompt, + string $commerceHistoryContext + ): string { + if (!$this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled()) { + return $query; + } + + if (trim($commerceHistoryContext) === '') { + return $query; + } + + $queryTokens = $this->tokenizeShopQueryCandidate($query); + + if ($queryTokens === []) { + return $query; + } + + $maxTerms = max(1, $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms()); + if (count($queryTokens) > $maxTerms) { + return $query; + } + + if (!$this->containsConfiguredShopQueryAnchorTrigger(trim($query . ' ' . $sourcePrompt))) { + return $query; + } + + $anchor = $this->normalizeShopQueryAnchor( + $this->extractLatestConfiguredShopQueryContextAnchor($commerceHistoryContext) + ); + + if ($anchor === '' || $this->queryAlreadyContainsAllAnchorTokens($query, $anchor)) { + return $query; + } + + $template = $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(); + $enriched = str_replace(['{anchor}', '{query}'], [$anchor, $query], $template); + $enriched = preg_replace('/\s+/u', ' ', $enriched) ?? $enriched; + + return trim($enriched) !== '' ? trim($enriched) : $query; + } + + private function containsConfiguredShopQueryAnchorTrigger(string $text): bool + { + $tokens = $this->tokenizeShopQueryCandidate($text); + + if ($tokens === []) { + return false; + } + + $tokenSet = array_fill_keys($tokens, true); + + foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() as $term) { + foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) { + if (isset($tokenSet[$termToken])) { + return true; + } + } + } + + return false; + } + + private function extractLatestConfiguredShopQueryContextAnchor(string $commerceHistoryContext): string + { + $latest = ''; + + foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns() as $pattern) { + if (@preg_match_all($pattern, $commerceHistoryContext, $matches, PREG_SET_ORDER) === false) { + continue; + } + + foreach ($matches as $match) { + $candidate = trim((string) ($match[0] ?? '')); + if ($candidate !== '') { + $latest = $candidate; + } + } + } + + return $latest; + } + + private function normalizeShopQueryAnchor(string $anchor): string + { + $anchor = str_replace('®', '', $anchor); + $anchor = mb_strtolower(trim($anchor), 'UTF-8'); + $anchor = preg_replace('/[^\p{L}\p{N},.%°+\-\s]+/u', ' ', $anchor) ?? $anchor; + $anchor = preg_replace('/\s+/u', ' ', $anchor) ?? $anchor; + + return trim($anchor); + } + + private function queryAlreadyContainsAllAnchorTokens(string $query, string $anchor): bool + { + $queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($query), true); + + foreach ($this->tokenizeShopQueryCandidate($anchor) as $token) { + if (!isset($queryTokens[$token])) { + return false; + } + } + + return true; + } + + private function preserveOptimizedShopQueryLanguage(string $query, string $sourcePrompt): string + { + if (!$this->agentRunnerConfig->isShopQueryLanguagePreservationEnabled()) { + return $query; + } + + $language = $this->detectConfiguredShopQueryLanguage($sourcePrompt); + + if ($language === null) { + return $query; + } + + $replacements = $this->agentRunnerConfig->getShopQueryTranslationReplacements($language); + + if ($replacements === []) { + return $query; + } + + foreach ($replacements as $source => $target) { + $pattern = '/(?agentRunnerConfig->getShopQueryLanguageMarkers() as $language => $markers) { + foreach ($markers as $marker) { + if ($marker !== '' && str_contains($normalized, $marker)) { + return $language; + } + } + } + + return null; + } /** * @return Generator */ @@ -993,4 +1355,4 @@ final readonly class AgentRunner default => $msg, }; } -} \ No newline at end of file +} diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index e8cfd40..50870aa 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -46,6 +46,29 @@ final class AgentRunnerConfig return is_numeric($value) ? (int) $value : $default; } + private function getBool(string $key, bool $default): bool + { + $value = $this->value($key, $default); + + if (is_bool($value)) { + return $value; + } + + if (is_scalar($value)) { + $normalized = strtolower(trim((string) $value)); + + if (in_array($normalized, ['1', 'true', 'yes', 'on'], true)) { + return true; + } + + if (in_array($normalized, ['0', 'false', 'no', 'off'], true)) { + return false; + } + } + + return $default; + } + private function getString(string $key, string $default): string { $value = $this->value($key, $default); @@ -122,6 +145,14 @@ final class AgentRunnerConfig return $this->getString('messages.optimize_search', 'Ich optimiere die Recherche...'); } + public function getNoConcreteShopQueryMessage(): string + { + return $this->getString( + 'messages.no_concrete_shop_query', + 'Ich habe keine konkrete Shop-Suchanfrage erkannt. Bitte nenne das Produkt, Zubehör oder die Artikelnummer.' + ); + } + public function getFetchSearchDataMessageTemplate(): string { return $this->getString('messages.fetch_search_data_template', 'Ich rufe Recherchedaten ab (type: %s)'); @@ -252,11 +283,14 @@ final class AgentRunnerConfig '- Maximum 6 search terms, preferably fewer.', '- Remove filler words, polite phrases, and irrelevant words.', '- Preserve product names, brands, model numbers, and compound terms exactly if they are relevant.', + '- Preserve the language of the CURRENT USER INPUT for generic product/search terms; do not translate German search terms into English.', + '- For German user input, output German shop terms, for example "freies Chlor Messung" instead of "free chlorine measurement".', + '- Preserve domain terms from the current user input or resolved context in their original language.', '- Numbers that belong to a product name or model must be preserved (e.g. Indikator 300, Testomat 808, Testomat 2000).', '- Separate terms using spaces only.', '- If a relevant product name is present, it must be placed at the beginning of the final search query.', '- Try to always identify all products mentioned in the user input text, even in long prompts.', - '- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator.', + '- Look for terms such as Testomat, Horiba, Tritromat, or words like indicator/Indikator.', '- If the current user input is vague or referential, use the recent conversation context only as support.', '- Do not output words that only describe conversation flow, such as "same", "again", "also", or "like above".', ]); @@ -297,6 +331,253 @@ final class AgentRunnerConfig return $this->getString('shop_prompt.current_user_input_label', 'CURRENT USER INPUT'); } + public function isShopQueryLanguagePreservationEnabled(): bool + { + return $this->getBool('shop_prompt.language_preservation.enabled', true); + } + + /** + * @return array + */ + public function getShopQueryLanguageMarkers(): array + { + $default = [ + 'de' => [ + ' ä ', ' ö ', ' ü ', ' ß ', + ' der ', ' die ', ' das ', ' ein ', ' eine ', ' einer ', ' einen ', + ' welchem ', ' welchen ', ' welche ', ' welcher ', + ' kann ', ' nutzen ', ' zur ', ' für ', ' fuer ', + ' messung ', ' indikator ', ' reagenz ', ' chlor ', + ], + ]; + + $value = $this->value('shop_prompt.language_preservation.language_markers', $default); + + if (!is_array($value)) { + return $default; + } + + $out = []; + + foreach ($value as $language => $markers) { + if (!is_string($language) || !is_array($markers)) { + continue; + } + + $cleanMarkers = []; + + foreach ($markers as $marker) { + if (!is_scalar($marker)) { + continue; + } + + $marker = strtolower((string) $marker); + + if ($marker !== '') { + $cleanMarkers[] = $marker; + } + } + + if ($cleanMarkers !== []) { + $out[$language] = array_values(array_unique($cleanMarkers)); + } + } + + return $out !== [] ? $out : $default; + } + + /** + * @return array + */ + public function isShopQueryMetaGuardEnabled(): bool + { + return $this->getBool('shop_prompt.meta_query_guard.enabled', true); + } + + /** + * @return string[] + */ + public function getShopQueryMetaOnlyTerms(): array + { + return $this->getStringList('shop_prompt.meta_query_guard.meta_only_terms', [ + 'shop', + 'shopsuche', + 'shop-suche', + 'suche', + 'suchen', + 'such', + 'finde', + 'find', + 'zeige', + 'zeig', + 'bitte', + 'mal', + 'im', + 'in', + 'nach', + 'den', + 'die', + 'das', + 'der', + 'dem', + ]); + } + + public function isShopQueryContextFallbackEnabled(): bool + { + return $this->getBool('shop_prompt.meta_query_guard.context_fallback_enabled', true); + } + + public function getShopQueryContextFallbackQuestionLimit(): int + { + return $this->getInt('shop_prompt.meta_query_guard.context_fallback_question_limit', 12); + } + + public function getShopQueryContextFallbackHistoryBudgetChars(): int + { + return $this->getInt('shop_prompt.meta_query_guard.context_fallback_history_budget_chars', 20000); + } + + public function shouldUseFullHistoryForShopQueryContextFallback(): bool + { + return $this->getBool('shop_prompt.meta_query_guard.context_fallback_use_full_history', true); + } + + public function getShopQueryContextFallbackMaxTerms(): int + { + return $this->getInt('shop_prompt.meta_query_guard.context_fallback_max_terms', 6); + } + + /** + * @return string[] + */ + public function getShopQueryContextFallbackFilterTerms(): array + { + return $this->getStringList('shop_prompt.meta_query_guard.context_fallback_filter_terms', [ + 'mit', + 'welche', + 'welcher', + 'welches', + 'welchem', + 'welchen', + 'was', + 'wie', + 'wo', + 'kann', + 'koennen', + 'können', + 'konnte', + 'könnte', + 'ich', + 'wir', + 'man', + 'nutzen', + 'benutzen', + 'verwenden', + 'verwende', + 'nehmen', + 'zur', + 'zum', + 'für', + 'fuer', + 'messen', + 'gemessen', + 'messung', + ]); + } + + public function isShopQueryContextAnchorEnrichmentEnabled(): bool + { + return $this->getBool('shop_prompt.context_anchor_enrichment.enabled', true); + } + + public function getShopQueryContextAnchorEnrichmentMaxQueryTerms(): int + { + return $this->getInt('shop_prompt.context_anchor_enrichment.max_query_terms', 2); + } + + /** + * @return string[] + */ + public function getShopQueryContextAnchorEnrichmentTriggerTerms(): array + { + return $this->getStringList('shop_prompt.context_anchor_enrichment.trigger_terms', [ + 'indikator', + 'indikatortyp', + 'indicator', + 'reagenz', + 'reagenzsatz', + 'reagent', + 'zubehör', + 'zubehor', + 'accessory', + ]); + } + + /** + * @return string[] + */ + public function getShopQueryContextAnchorEnrichmentPatterns(): array + { + return $this->getStringList('shop_prompt.context_anchor_enrichment.anchor_patterns', [ + '/\b(?:indikator(?:typ)?|indicator(?:\s+type)?|reagenz(?:satz|typ)?|reagent(?:\s+set|\s+type)?|typ|type)\s+[A-Za-zÄÖÜäöüß]{0,8}\s*\d{1,5}(?:\s*[A-ZÄÖÜ]{1,4})?(?:\s*%)?\b/iu', + ]); + } + + public function getShopQueryContextAnchorEnrichmentTemplate(): string + { + return $this->getString('shop_prompt.context_anchor_enrichment.template', '{anchor} {query}'); + } + public function getShopQueryTranslationReplacements(string $language): array + { + $default = [ + 'de' => [ + 'free chlorine' => 'freies chlor', + 'free chlor' => 'freies chlor', + 'total chlorine' => 'gesamtchlor', + 'chlorine measurement' => 'chlor messung', + 'water hardness' => 'wasserhärte', + 'measurement' => 'messung', + 'measuring' => 'messung', + 'chlorine' => 'chlor', + 'indicator' => 'indikator', + 'indicators' => 'indikatoren', + 'reagent' => 'reagenz', + 'reagents' => 'reagenzien', + 'accessory' => 'zubehör', + 'accessories' => 'zubehör', + ], + ]; + + $value = $this->value( + 'shop_prompt.language_preservation.translation_replacements.' . $language, + $default[$language] ?? [] + ); + + if (!is_array($value)) { + return $default[$language] ?? []; + } + + $out = []; + + foreach ($value as $source => $target) { + if (!is_scalar($source) || !is_scalar($target)) { + continue; + } + + $source = strtolower(trim((string) $source)); + $target = trim((string) $target); + + if ($source !== '' && $target !== '') { + $out[$source] = $target; + } + } + + uksort($out, static fn(string $a, string $b): int => strlen($b) <=> strlen($a)); + + return $out !== [] ? $out : ($default[$language] ?? []); + } + private function buildRulesBlock(array $rules, string $headline = 'Rules:'): string { return $headline . "\n" . implode("\n", $rules); @@ -317,4 +598,4 @@ final class AgentRunnerConfig return implode("\n\n", $normalized); } -} \ No newline at end of file +} diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 4fb297a..1ec3e18 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -192,6 +192,47 @@ final readonly class RetriexEffectiveConfigProvider $errors[] = 'Shop query optimizer prompt no longer contains the original query.'; } + $metaOnlyTerms = $this->agentRunnerConfig->getShopQueryMetaOnlyTerms(); + foreach (['shop', 'suche'] as $term) { + $key = 'shop_query_meta_guard_term_' . $term; + $checks[$key] = in_array($term, $metaOnlyTerms, true); + if (!$checks[$key]) { + $errors[] = 'Missing shop query meta guard term: ' . $term; + } + } + $checks['shop_query_context_fallback_enabled'] = $this->agentRunnerConfig->isShopQueryContextFallbackEnabled(); + if (!$checks['shop_query_context_fallback_enabled']) { + $errors[] = 'Shop query context fallback is disabled.'; + } + + $contextFallbackFilterTerms = $this->agentRunnerConfig->getShopQueryContextFallbackFilterTerms(); + foreach (['welchem', 'kann', 'messen'] as $term) { + $key = 'shop_query_context_fallback_filter_' . $term; + $checks[$key] = in_array($term, $contextFallbackFilterTerms, true); + if (!$checks[$key]) { + $errors[] = 'Missing shop query context fallback filter term: ' . $term; + } + } + $checks['shop_query_context_fallback_history_budget_positive'] = $this->agentRunnerConfig->getShopQueryContextFallbackHistoryBudgetChars() > 0; + if (!$checks['shop_query_context_fallback_history_budget_positive']) { + $errors[] = 'Shop query context fallback history budget must be greater than zero.'; + } + + $checks['shop_query_context_fallback_full_history_enabled'] = $this->agentRunnerConfig->shouldUseFullHistoryForShopQueryContextFallback(); + if (!$checks['shop_query_context_fallback_full_history_enabled']) { + $errors[] = 'Shop query context fallback full-history fallback is disabled.'; + } + + $checks['shop_query_context_fallback_question_limit_minimum'] = $this->agentRunnerConfig->getShopQueryContextFallbackQuestionLimit() >= 6; + if (!$checks['shop_query_context_fallback_question_limit_minimum']) { + $errors[] = 'Shop query context fallback question limit is too low for repeated meta follow-ups.'; + } + + $checks['shop_query_context_fallback_max_terms_positive'] = $this->agentRunnerConfig->getShopQueryContextFallbackMaxTerms() > 0; + if (!$checks['shop_query_context_fallback_max_terms_positive']) { + $errors[] = 'Shop query context fallback max terms must be greater than zero.'; + } + $status = $errors === [] ? 'OK' : 'ERROR'; return [ @@ -362,6 +403,7 @@ final readonly class RetriexEffectiveConfigProvider 'check_internet_sources' => $this->agentRunnerConfig->getCheckInternetSourcesMessage(), 'retrieve_knowledge' => $this->agentRunnerConfig->getRetrieveKnowledgeMessage(), 'optimize_search' => $this->agentRunnerConfig->getOptimizeSearchMessage(), + 'no_concrete_shop_query' => $this->agentRunnerConfig->getNoConcreteShopQueryMessage(), 'fetch_search_data_template' => $this->agentRunnerConfig->getFetchSearchDataMessageTemplate(), 'analyze_all_information' => $this->agentRunnerConfig->getAnalyzeAllInformationMessage(), 'thinking_while_streaming' => $this->agentRunnerConfig->getThinkingWhileStreamingMessage(), @@ -392,6 +434,28 @@ final readonly class RetriexEffectiveConfigProvider 'output_format_block' => $this->agentRunnerConfig->getShopPromptOutputFormatBlock(), 'recent_conversation_context_label' => $this->agentRunnerConfig->getRecentConversationContextLabel(), 'current_user_input_label' => $this->agentRunnerConfig->getCurrentUserInputLabel(), + 'language_preservation' => [ + 'enabled' => $this->agentRunnerConfig->isShopQueryLanguagePreservationEnabled(), + 'language_markers' => $this->agentRunnerConfig->getShopQueryLanguageMarkers(), + 'translation_replacements_de' => $this->agentRunnerConfig->getShopQueryTranslationReplacements('de'), + ], + 'context_anchor_enrichment' => [ + 'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(), + 'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(), + 'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(), + 'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(), + 'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(), + ], + 'meta_query_guard' => [ + 'enabled' => $this->agentRunnerConfig->isShopQueryMetaGuardEnabled(), + 'context_fallback_use_full_history' => $this->agentRunnerConfig->shouldUseFullHistoryForShopQueryContextFallback(), + 'meta_only_terms' => $this->agentRunnerConfig->getShopQueryMetaOnlyTerms(), + 'context_fallback_enabled' => $this->agentRunnerConfig->isShopQueryContextFallbackEnabled(), + 'context_fallback_question_limit' => $this->agentRunnerConfig->getShopQueryContextFallbackQuestionLimit(), + 'context_fallback_history_budget_chars' => $this->agentRunnerConfig->getShopQueryContextFallbackHistoryBudgetChars(), + 'context_fallback_max_terms' => $this->agentRunnerConfig->getShopQueryContextFallbackMaxTerms(), + 'context_fallback_filter_terms' => $this->agentRunnerConfig->getShopQueryContextFallbackFilterTerms(), + ], ], ]; } @@ -795,6 +859,15 @@ final readonly class RetriexEffectiveConfigProvider $this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings); $this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings); $this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors); + + $anchorEnrichment = $agent['shop_query_optimizer']['context_anchor_enrichment'] ?? []; + if (is_array($anchorEnrichment)) { + $this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_query_optimizer.context_anchor_enrichment.trigger_terms', $errors, $warnings); + $this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_query_optimizer.context_anchor_enrichment.anchor_patterns', $errors); + if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') { + $errors[] = 'agent.shop_query_optimizer.context_anchor_enrichment.template must not be empty.'; + } + } } /** diff --git a/src/Controller/AskSseController.php b/src/Controller/AskSseController.php index 84f17bd..924b002 100644 --- a/src/Controller/AskSseController.php +++ b/src/Controller/AskSseController.php @@ -92,6 +92,13 @@ final readonly class AskSseController if (($claimed['ok'] ?? false) !== true) { $this->prepareStreamRuntime(); echo "retry: 15000\n\n"; + + if ($this->shouldSilentlyCloseDuplicateJobStream($claimed)) { + $this->sendComment('duplicate-or-finished-stream'); + $this->sendEvent('done', '[DONE]'); + return; + } + $this->sendEvent('error', $this->jobClaimErrorMessage($claimed)); $this->sendEvent('done', '[DONE]'); return; @@ -504,6 +511,24 @@ final readonly class AskSseController } } } + /** + * EventSource may reconnect to an already running or already completed job. + * Those duplicate connections should be closed quietly so the UI does not + * append a misleading error after the real stream already produced output. + * + * @param array $claim + */ + private function shouldSilentlyCloseDuplicateJobStream(array $claim): bool + { + if (($claim['reason'] ?? null) !== 'not_pending') { + return false; + } + + $status = (string) ($claim['status'] ?? ''); + + return $status === self::JOB_STATUS_RUNNING + || $status === self::JOB_STATUS_COMPLETED; + } /** * @param array $claim @@ -548,7 +573,6 @@ final readonly class AskSseController return 'Der Antwort-Job wurde nicht gefunden. Falls deine Verbindung kurz unterbrochen war, sende die Anfrage bitte erneut.'; } - private function cleanupExpiredJobs(): void { $directory = $this->jobDirectory();