This commit is contained in:
team 1
2026-05-07 19:17:59 +02:00
parent 61f6841a5a
commit 476b664520
6 changed files with 298 additions and 24 deletions

View File

@@ -188,7 +188,7 @@ parameters:
positive_token_filter: positive_token_filter:
enabled: false enabled: false
min_query_tokens_after_filter: 2 min_query_tokens_after_filter: 1
include_current_input_preservation_terms: true include_current_input_preservation_terms: true
include_semantic_shop_search_tokens: true include_semantic_shop_search_tokens: true
include_product_role_terms: true include_product_role_terms: true
@@ -223,6 +223,8 @@ parameters:
template: '' template: ''
vocabulary_views: vocabulary_views:
trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms
query_terms: []
query_noise_terms: []
anchor_patterns: [] anchor_patterns: []
meta_query_guard: meta_query_guard:
enabled: true enabled: true
@@ -376,10 +378,6 @@ parameters:
separator: ' | ' separator: ' | '
unavailable_reason_template: '{message} Ursache: {reason}' unavailable_reason_template: '{message} Ursache: {reason}'
product_roles:
vocabulary_views:
main_device_request_keywords: agent.no_llm_fallback.product_roles.main_device_request_keywords
accessory_product_keywords: agent.no_llm_fallback.product_roles.accessory_product_keywords
production_ui: production_ui:

View File

@@ -14,6 +14,8 @@
# the value surface stays native and maintenance-focused. # the value surface stays native and maintenance-focused.
# p59F adds machine-readable review-group metadata so the remaining # p59F adds machine-readable review-group metadata so the remaining
# compatibility paths can be removed or kept deliberately by category. # compatibility paths can be removed or kept deliberately by category.
# p59G rewires the no-LLM product-role fallback directly to genre values
# and removes its obsolete legacy compatibility view declarations.
parameters: parameters:
retriex.genre.config: retriex.genre.config:
id: water_analysis id: water_analysis
@@ -30,15 +32,6 @@ parameters:
- configuration_values.product_roles.shop_views - configuration_values.product_roles.shop_views
- configuration_values.product_roles.prompt_views - configuration_values.product_roles.prompt_views
- configuration_values.product_roles.no_llm_fallback_terms - configuration_values.product_roles.no_llm_fallback_terms
review_path_groups:
frozen_compatibility_views:
description: Legacy no-LLM product-role vocabulary views. Keep frozen until the fallback reads only from genre.configuration_values.product_roles.
classification: legacy_compatibility_view
source_state: legacy_frozen_non_empty
cleanup_action: rewire_to_genre_value_and_remove_legacy_view
paths:
- agent.no_llm_fallback.product_roles.vocabulary_views.main_device_request_keywords
- agent.no_llm_fallback.product_roles.vocabulary_views.accessory_product_keywords
product_attributes: product_attributes:
description: Genre-specific attributes and constraints, for example measurement values now or size/color/material later. description: Genre-specific attributes and constraints, for example measurement values now or size/color/material later.
value_paths: value_paths:
@@ -1090,6 +1083,24 @@ parameters:
- zubehör - zubehör
- zubehor - zubehor
- accessory - accessory
# Terms that should be sent to Shopware for referential accessory or
# consumable follow-ups. Broader RAG markers can remain in
# trigger_terms without becoming dominant shop query tokens.
query_terms:
- indikator
- indicator
- reagenz
- reagent
- zubehör
- zubehor
- accessory
# Terms that are useful for interpreting RAG/history phrasing but are
# too meta or type-oriented for the plain Shopware text query.
query_noise_terms:
- indikatortyp
- indicator type
- typ
- type
anchor_patterns: anchor_patterns:
- /\b(?:indikator(?:typ)?|indicator(?:\s+type)?|reagenz(?:satz|typ)?|reagent(?:\s+set|\s+type)?|typ|type)\s+[A-Za-zÄÖÜäöüß]{0,8}\s*\d{1,5}(?:\s*[A-ZÄÖÜ]{1,4})?(?:\s*%)?\b/iu - /\b(?:indikator(?:typ)?|indicator(?:\s+type)?|reagenz(?:satz|typ)?|reagent(?:\s+set|\s+type)?|typ|type)\s+[A-Za-zÄÖÜäöüß]{0,8}\s*\d{1,5}(?:\s*[A-ZÄÖÜ]{1,4})?(?:\s*%)?\b/iu
template: '{anchor} {query}' template: '{anchor} {query}'
@@ -1198,7 +1209,7 @@ parameters:
positive_token_filter: positive_token_filter:
origin: genre_native origin: genre_native
enabled: true enabled: true
min_query_tokens_after_filter: 2 min_query_tokens_after_filter: 1
include_current_input_preservation_terms: true include_current_input_preservation_terms: true
include_semantic_shop_search_tokens: true include_semantic_shop_search_tokens: true
include_product_role_terms: true include_product_role_terms: true

View File

@@ -0,0 +1,86 @@
# RetrieX Patch p61C - Positive Shop Query Token Filter on p60
p61C reapplies the positive Shopware query token filter on the confirmed p60 baseline.
## Why p61C exists
p61B was built on a stale base and reintroduced legacy `agent.no_llm_fallback.product_roles.vocabulary_views.*` paths that had already been removed by p59G. It also did not reliably preserve the p60 referential device anchor in the generated query.
p61C uses the confirmed p60 baseline and keeps the p59G/p60 cleanup intact.
## Goal
The final plain Shopware query should contain only product-relevant tokens:
- product/device/accessory names from the active genre vocabulary
- explicitly allowed product family/application terms
- protected short technical terms such as pH/RX/TH/TC/TP/TM when configured
- model/type/code tokens such as `808`, `300`, `TH2100`, `2x100ml` when they match configured regex patterns
Sentence, relation and RAG-only reference words such as `gemessen`, `beim` or `indikatortyp` must not dominate the shop query.
## Expected example
Input query after p60 referential/RAG anchoring:
```text
testomat 808 gemessen 300 beim indikator
```
Final shop query after p61C:
```text
testomat 808 300 indikator
```
## Configuration
Primary configuration lives in:
```yaml
config/retriex/genre.yaml
parameters:
retriex.genre.config:
configuration_values:
shop_query_runtime:
positive_token_filter:
```
Important fields:
- `enabled`: activates the filter for the active genre.
- `min_query_tokens_after_filter`: set to `1` so a single valid product token can still replace a noisy query.
- `allowed_terms`: extra genre-specific product family/application terms.
- `blocked_terms`: terms that are useful for RAG/reference resolution but poor shop search tokens.
- `code_patterns`: regex patterns for model/type/article/size tokens.
- `include_current_input_preservation_terms`: includes configured protected short terms from the shop query preservation surface.
- `include_semantic_shop_search_tokens`: includes the genre's shop semantic product vocabulary.
- `include_product_role_terms`: includes the genre's device/accessory role vocabulary.
`agent.yaml` contains only an inactive compatibility fallback for this feature. Runtime values should be maintained in `genre.yaml`.
## Scope
No hard-coded product names or stopword lists were added to PHP. The PHP code only applies the configured positive token filter.
No changes to:
- retrieval ranking
- prompt rules
- shop result scoring
- SearchRepair
- intent routing
- product identity matching
## Validation
Run:
```bash
bin/console mto:agent:config:validate
bin/console mto:agent:regression:test
bin/console mto:agent:config:audit-source --details
bin/console mto:agent:config:audit-patterns --details
```
The p59G no-LLM legacy paths must remain absent from `agent.yaml`, `genre.yaml` source paths and `governance.yaml` frozen hashes.

View File

@@ -517,6 +517,7 @@ final readonly class AgentRunner
$shopResults = $directIdentityRepairPayload['results']; $shopResults = $directIdentityRepairPayload['results'];
} }
$shopResults = $this->guardShopResultsByReferencedProductAnchor($shopSearchQuery, $shopResults);
$shopResults = $this->sortShopResultsForLengthRequest($prompt, $shopSearchQuery, $shopResults); $shopResults = $this->sortShopResultsForLengthRequest($prompt, $shopSearchQuery, $shopResults);
$attemptedShopRepair = $repairPayload['attemptedRepair'] || $directIdentityRepairPayload['attemptedRepair']; $attemptedShopRepair = $repairPayload['attemptedRepair'] || $directIdentityRepairPayload['attemptedRepair'];
$usedShopRepair = $repairPayload['usedRepair'] || $directIdentityRepairPayload['usedRepair']; $usedShopRepair = $repairPayload['usedRepair'] || $directIdentityRepairPayload['usedRepair'];
@@ -2799,20 +2800,40 @@ final readonly class AgentRunner
return ''; return '';
} }
$triggerTokens = []; $triggerTokens = $this->buildShopQueryTokenSet(
foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() as $term) { $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms()
foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) { );
$triggerTokens[$termToken] = true;
}
}
if ($triggerTokens === []) { if ($triggerTokens === []) {
return ''; return '';
} }
$hasTrigger = false;
foreach ($tokens as $token) {
if (isset($triggerTokens[$token])) {
$hasTrigger = true;
break;
}
}
if (!$hasTrigger) {
return '';
}
$queryTokens = $this->buildShopQueryTokenSet(
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms()
);
if ($queryTokens === []) {
$queryTokens = $triggerTokens;
}
$noiseTokens = $this->buildShopQueryTokenSet(
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms()
);
$out = []; $out = [];
foreach ($tokens as $token) { foreach ($tokens as $token) {
if (!isset($triggerTokens[$token]) || isset($out[$token])) { if (!isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) {
continue; continue;
} }
@@ -2822,6 +2843,23 @@ final readonly class AgentRunner
return implode(' ', array_values($out)); return implode(' ', array_values($out));
} }
/**
* @param string[] $terms
* @return array<string, true>
*/
private function buildShopQueryTokenSet(array $terms): array
{
$tokens = [];
foreach ($terms as $term) {
foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) {
$tokens[$termToken] = true;
}
}
return $tokens;
}
private function enrichReferentialShopQueryFromHistory( private function enrichReferentialShopQueryFromHistory(
string $query, string $query,
string $sourcePrompt, string $sourcePrompt,
@@ -2890,11 +2928,33 @@ final readonly class AgentRunner
} }
private function extractLatestConfiguredShopQueryContextAnchor(string $commerceHistoryContext): string private function extractLatestConfiguredShopQueryContextAnchor(string $commerceHistoryContext): string
{
foreach ($this->extractHistoryTurnsNewestFirst($commerceHistoryContext) as $turn) {
if (!$this->containsConfiguredShopQueryAnchorTrigger($turn)) {
continue;
}
$modelAnchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($turn);
$turnAnchor = $this->extractLatestConfiguredShopQueryPatternAnchor($turn);
if ($modelAnchor !== '') {
return $this->buildModelQualifiedShopQueryAnchor($modelAnchor, $turnAnchor);
}
if ($turnAnchor !== '') {
return $turnAnchor;
}
}
return $this->extractLatestConfiguredShopQueryPatternAnchor($commerceHistoryContext);
}
private function extractLatestConfiguredShopQueryPatternAnchor(string $text): string
{ {
$latest = ''; $latest = '';
foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns() as $pattern) { foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns() as $pattern) {
if (@preg_match_all($pattern, $commerceHistoryContext, $matches, PREG_SET_ORDER) === false) { if (@preg_match_all($pattern, $text, $matches, PREG_SET_ORDER) === false) {
continue; continue;
} }
@@ -2909,6 +2969,51 @@ final readonly class AgentRunner
return $latest; return $latest;
} }
private function buildModelQualifiedShopQueryAnchor(string $modelAnchor, string $detailAnchor): string
{
$modelAnchor = trim($modelAnchor);
if ($modelAnchor === '') {
return trim($detailAnchor);
}
$detailTokens = $this->extractShopQueryDetailAnchorTokens($detailAnchor, $modelAnchor);
if ($detailTokens === []) {
return $modelAnchor;
}
return trim($modelAnchor . ' ' . implode(' ', $detailTokens));
}
/**
* @return string[]
*/
private function extractShopQueryDetailAnchorTokens(string $detailAnchor, string $modelAnchor): array
{
$tokens = $this->tokenizeShopQueryCandidate($detailAnchor);
if ($tokens === []) {
return [];
}
$modelTokens = array_fill_keys($this->tokenizeShopQueryCandidate($modelAnchor), true);
$queryTokens = $this->buildShopQueryTokenSet(
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms()
);
$noiseTokens = $this->buildShopQueryTokenSet(
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms()
);
$out = [];
foreach ($tokens as $token) {
if (isset($modelTokens[$token]) || isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) {
continue;
}
$out[$token] = $token;
}
return array_values($out);
}
private function normalizeShopQueryAnchor(string $anchor): string private function normalizeShopQueryAnchor(string $anchor): string
{ {
$anchor = str_replace('®', '', $anchor); $anchor = str_replace('®', '', $anchor);
@@ -3376,6 +3481,48 @@ final readonly class AgentRunner
return trim(implode(' ', $this->tokenizeShopQueryCandidate($query))); return trim(implode(' ', $this->tokenizeShopQueryCandidate($query)));
} }
/**
* @param ShopProductResult[] $shopResults
* @return ShopProductResult[]
*/
private function guardShopResultsByReferencedProductAnchor(string $shopSearchQuery, array $shopResults): array
{
if ($shopResults === []) {
return $shopResults;
}
$anchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($shopSearchQuery);
if ($anchor === '') {
return $shopResults;
}
$filtered = [];
foreach ($shopResults as $product) {
if (!$product instanceof ShopProductResult) {
continue;
}
if ($this->shopProductMatchesReferencedProductAnchor($product, $anchor)) {
$filtered[] = $product;
}
}
return $filtered;
}
private function shopProductMatchesReferencedProductAnchor(ShopProductResult $product, string $anchor): bool
{
$productText = trim(implode(' ', array_filter([
$product->name,
$product->description,
implode(' ', $product->highlights),
$product->customFields,
$product->url,
])));
return $this->containsAllShopQueryTokens($productText, $anchor);
}
/** /**
* @param ShopProductResult[] $shopResults * @param ShopProductResult[] $shopResults
* @return ShopProductResult[] * @return ShopProductResult[]

View File

@@ -962,6 +962,11 @@ final class AgentRunnerConfig
*/ */
public function getNoLlmMainDeviceRequestRoleKeywords(): array public function getNoLlmMainDeviceRequestRoleKeywords(): array
{ {
$terms = $this->genreStringList('product_roles.no_llm_fallback_terms.main_device_request_keywords');
if ($terms !== []) {
return $terms;
}
return $this->getConfiguredStringListOrVocabularyView( return $this->getConfiguredStringListOrVocabularyView(
'no_llm_fallback.product_roles.main_device_request_keywords', 'no_llm_fallback.product_roles.main_device_request_keywords',
'no_llm_fallback.product_roles.vocabulary_views.main_device_request_keywords' 'no_llm_fallback.product_roles.vocabulary_views.main_device_request_keywords'
@@ -973,6 +978,11 @@ final class AgentRunnerConfig
*/ */
public function getNoLlmAccessoryProductRoleKeywords(): array public function getNoLlmAccessoryProductRoleKeywords(): array
{ {
$terms = $this->genreStringList('product_roles.no_llm_fallback_terms.accessory_product_keywords');
if ($terms !== []) {
return $terms;
}
return $this->getConfiguredStringListOrVocabularyView( return $this->getConfiguredStringListOrVocabularyView(
'no_llm_fallback.product_roles.accessory_product_keywords', 'no_llm_fallback.product_roles.accessory_product_keywords',
'no_llm_fallback.product_roles.vocabulary_views.accessory_product_keywords' 'no_llm_fallback.product_roles.vocabulary_views.accessory_product_keywords'
@@ -1195,7 +1205,7 @@ final class AgentRunnerConfig
public function getShopQueryPositiveTokenFilterMinTokens(): int public function getShopQueryPositiveTokenFilterMinTokens(): int
{ {
return $this->genreInt('shop_query_runtime.positive_token_filter.min_query_tokens_after_filter') return $this->genreInt('shop_query_runtime.positive_token_filter.min_query_tokens_after_filter')
?? $this->getOptionalInt('shop_runtime.query_cleanup.positive_token_filter.min_query_tokens_after_filter', 2); ?? $this->getOptionalInt('shop_runtime.query_cleanup.positive_token_filter.min_query_tokens_after_filter', 1);
} }
public function shouldShopQueryPositiveTokenFilterIncludeCurrentInputPreservationTerms(): bool public function shouldShopQueryPositiveTokenFilterIncludeCurrentInputPreservationTerms(): bool
@@ -1508,6 +1518,24 @@ final class AgentRunnerConfig
); );
} }
/**
* @return string[]
*/
public function getShopQueryContextAnchorEnrichmentQueryTerms(): array
{
return $this->genreStringList('context_resolution.history_anchor_enrichment.query_terms')
?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_terms');
}
/**
* @return string[]
*/
public function getShopQueryContextAnchorEnrichmentQueryNoiseTerms(): array
{
return $this->genreStringList('context_resolution.history_anchor_enrichment.query_noise_terms')
?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms');
}
/** /**
* @return string[] * @return string[]
*/ */

View File

@@ -713,6 +713,8 @@ final readonly class RetriexEffectiveConfigProvider
'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(), 'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(),
'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(), 'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(),
'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(), 'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(),
'query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms(),
'query_noise_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms(),
'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(), 'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(),
'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(), 'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(),
], ],
@@ -1871,6 +1873,8 @@ final readonly class RetriexEffectiveConfigProvider
$anchorEnrichment = $contextResolution['history_anchor_enrichment'] ?? []; $anchorEnrichment = $contextResolution['history_anchor_enrichment'] ?? [];
if (is_array($anchorEnrichment)) { if (is_array($anchorEnrichment)) {
$this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms', $errors, $warnings); $this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms', $errors, $warnings);
$this->validateStringList($this->toList($anchorEnrichment['query_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_terms', $errors, $warnings);
$this->validateStringList($this->toList($anchorEnrichment['query_noise_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms', $errors, $warnings);
$this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns', $errors); $this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns', $errors);
if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') { if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') {
$errors[] = 'agent.shop_runtime.context_resolution.history_anchor_enrichment.template must not be empty.'; $errors[] = 'agent.shop_runtime.context_resolution.history_anchor_enrichment.template must not be empty.';