p61c fix
This commit is contained in:
@@ -188,7 +188,7 @@ parameters:
|
||||
|
||||
positive_token_filter:
|
||||
enabled: false
|
||||
min_query_tokens_after_filter: 2
|
||||
min_query_tokens_after_filter: 1
|
||||
include_current_input_preservation_terms: true
|
||||
include_semantic_shop_search_tokens: true
|
||||
include_product_role_terms: true
|
||||
@@ -223,6 +223,8 @@ parameters:
|
||||
template: ''
|
||||
vocabulary_views:
|
||||
trigger_terms: agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms
|
||||
query_terms: []
|
||||
query_noise_terms: []
|
||||
anchor_patterns: []
|
||||
meta_query_guard:
|
||||
enabled: true
|
||||
@@ -376,10 +378,6 @@ parameters:
|
||||
separator: ' | '
|
||||
unavailable_reason_template: '{message} Ursache: {reason}'
|
||||
|
||||
product_roles:
|
||||
vocabulary_views:
|
||||
main_device_request_keywords: agent.no_llm_fallback.product_roles.main_device_request_keywords
|
||||
accessory_product_keywords: agent.no_llm_fallback.product_roles.accessory_product_keywords
|
||||
|
||||
|
||||
production_ui:
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
# the value surface stays native and maintenance-focused.
|
||||
# p59F adds machine-readable review-group metadata so the remaining
|
||||
# compatibility paths can be removed or kept deliberately by category.
|
||||
# p59G rewires the no-LLM product-role fallback directly to genre values
|
||||
# and removes its obsolete legacy compatibility view declarations.
|
||||
parameters:
|
||||
retriex.genre.config:
|
||||
id: water_analysis
|
||||
@@ -30,15 +32,6 @@ parameters:
|
||||
- configuration_values.product_roles.shop_views
|
||||
- configuration_values.product_roles.prompt_views
|
||||
- configuration_values.product_roles.no_llm_fallback_terms
|
||||
review_path_groups:
|
||||
frozen_compatibility_views:
|
||||
description: Legacy no-LLM product-role vocabulary views. Keep frozen until the fallback reads only from genre.configuration_values.product_roles.
|
||||
classification: legacy_compatibility_view
|
||||
source_state: legacy_frozen_non_empty
|
||||
cleanup_action: rewire_to_genre_value_and_remove_legacy_view
|
||||
paths:
|
||||
- agent.no_llm_fallback.product_roles.vocabulary_views.main_device_request_keywords
|
||||
- agent.no_llm_fallback.product_roles.vocabulary_views.accessory_product_keywords
|
||||
product_attributes:
|
||||
description: Genre-specific attributes and constraints, for example measurement values now or size/color/material later.
|
||||
value_paths:
|
||||
@@ -1090,6 +1083,24 @@ parameters:
|
||||
- zubehör
|
||||
- zubehor
|
||||
- accessory
|
||||
# Terms that should be sent to Shopware for referential accessory or
|
||||
# consumable follow-ups. Broader RAG markers can remain in
|
||||
# trigger_terms without becoming dominant shop query tokens.
|
||||
query_terms:
|
||||
- indikator
|
||||
- indicator
|
||||
- reagenz
|
||||
- reagent
|
||||
- zubehör
|
||||
- zubehor
|
||||
- accessory
|
||||
# Terms that are useful for interpreting RAG/history phrasing but are
|
||||
# too meta or type-oriented for the plain Shopware text query.
|
||||
query_noise_terms:
|
||||
- indikatortyp
|
||||
- indicator type
|
||||
- typ
|
||||
- type
|
||||
anchor_patterns:
|
||||
- /\b(?:indikator(?:typ)?|indicator(?:\s+type)?|reagenz(?:satz|typ)?|reagent(?:\s+set|\s+type)?|typ|type)\s+[A-Za-zÄÖÜäöüß]{0,8}\s*\d{1,5}(?:\s*[A-ZÄÖÜ]{1,4})?(?:\s*%)?\b/iu
|
||||
template: '{anchor} {query}'
|
||||
@@ -1198,7 +1209,7 @@ parameters:
|
||||
positive_token_filter:
|
||||
origin: genre_native
|
||||
enabled: true
|
||||
min_query_tokens_after_filter: 2
|
||||
min_query_tokens_after_filter: 1
|
||||
include_current_input_preservation_terms: true
|
||||
include_semantic_shop_search_tokens: true
|
||||
include_product_role_terms: true
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
# RetrieX Patch p61C - Positive Shop Query Token Filter on p60
|
||||
|
||||
p61C reapplies the positive Shopware query token filter on the confirmed p60 baseline.
|
||||
|
||||
## Why p61C exists
|
||||
|
||||
p61B was built on a stale base and reintroduced legacy `agent.no_llm_fallback.product_roles.vocabulary_views.*` paths that had already been removed by p59G. It also did not reliably preserve the p60 referential device anchor in the generated query.
|
||||
|
||||
p61C uses the confirmed p60 baseline and keeps the p59G/p60 cleanup intact.
|
||||
|
||||
## Goal
|
||||
|
||||
The final plain Shopware query should contain only product-relevant tokens:
|
||||
|
||||
- product/device/accessory names from the active genre vocabulary
|
||||
- explicitly allowed product family/application terms
|
||||
- protected short technical terms such as pH/RX/TH/TC/TP/TM when configured
|
||||
- model/type/code tokens such as `808`, `300`, `TH2100`, `2x100ml` when they match configured regex patterns
|
||||
|
||||
Sentence, relation and RAG-only reference words such as `gemessen`, `beim` or `indikatortyp` must not dominate the shop query.
|
||||
|
||||
## Expected example
|
||||
|
||||
Input query after p60 referential/RAG anchoring:
|
||||
|
||||
```text
|
||||
testomat 808 gemessen 300 beim indikator
|
||||
```
|
||||
|
||||
Final shop query after p61C:
|
||||
|
||||
```text
|
||||
testomat 808 300 indikator
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Primary configuration lives in:
|
||||
|
||||
```yaml
|
||||
config/retriex/genre.yaml
|
||||
parameters:
|
||||
retriex.genre.config:
|
||||
configuration_values:
|
||||
shop_query_runtime:
|
||||
positive_token_filter:
|
||||
```
|
||||
|
||||
Important fields:
|
||||
|
||||
- `enabled`: activates the filter for the active genre.
|
||||
- `min_query_tokens_after_filter`: set to `1` so a single valid product token can still replace a noisy query.
|
||||
- `allowed_terms`: extra genre-specific product family/application terms.
|
||||
- `blocked_terms`: terms that are useful for RAG/reference resolution but poor shop search tokens.
|
||||
- `code_patterns`: regex patterns for model/type/article/size tokens.
|
||||
- `include_current_input_preservation_terms`: includes configured protected short terms from the shop query preservation surface.
|
||||
- `include_semantic_shop_search_tokens`: includes the genre's shop semantic product vocabulary.
|
||||
- `include_product_role_terms`: includes the genre's device/accessory role vocabulary.
|
||||
|
||||
`agent.yaml` contains only an inactive compatibility fallback for this feature. Runtime values should be maintained in `genre.yaml`.
|
||||
|
||||
## Scope
|
||||
|
||||
No hard-coded product names or stopword lists were added to PHP. The PHP code only applies the configured positive token filter.
|
||||
|
||||
No changes to:
|
||||
|
||||
- retrieval ranking
|
||||
- prompt rules
|
||||
- shop result scoring
|
||||
- SearchRepair
|
||||
- intent routing
|
||||
- product identity matching
|
||||
|
||||
## Validation
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
|
||||
The p59G no-LLM legacy paths must remain absent from `agent.yaml`, `genre.yaml` source paths and `governance.yaml` frozen hashes.
|
||||
@@ -517,6 +517,7 @@ final readonly class AgentRunner
|
||||
$shopResults = $directIdentityRepairPayload['results'];
|
||||
}
|
||||
|
||||
$shopResults = $this->guardShopResultsByReferencedProductAnchor($shopSearchQuery, $shopResults);
|
||||
$shopResults = $this->sortShopResultsForLengthRequest($prompt, $shopSearchQuery, $shopResults);
|
||||
$attemptedShopRepair = $repairPayload['attemptedRepair'] || $directIdentityRepairPayload['attemptedRepair'];
|
||||
$usedShopRepair = $repairPayload['usedRepair'] || $directIdentityRepairPayload['usedRepair'];
|
||||
@@ -2799,20 +2800,40 @@ final readonly class AgentRunner
|
||||
return '';
|
||||
}
|
||||
|
||||
$triggerTokens = [];
|
||||
foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms() as $term) {
|
||||
foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) {
|
||||
$triggerTokens[$termToken] = true;
|
||||
}
|
||||
}
|
||||
$triggerTokens = $this->buildShopQueryTokenSet(
|
||||
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms()
|
||||
);
|
||||
|
||||
if ($triggerTokens === []) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$hasTrigger = false;
|
||||
foreach ($tokens as $token) {
|
||||
if (isset($triggerTokens[$token])) {
|
||||
$hasTrigger = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$hasTrigger) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$queryTokens = $this->buildShopQueryTokenSet(
|
||||
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms()
|
||||
);
|
||||
if ($queryTokens === []) {
|
||||
$queryTokens = $triggerTokens;
|
||||
}
|
||||
|
||||
$noiseTokens = $this->buildShopQueryTokenSet(
|
||||
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms()
|
||||
);
|
||||
|
||||
$out = [];
|
||||
foreach ($tokens as $token) {
|
||||
if (!isset($triggerTokens[$token]) || isset($out[$token])) {
|
||||
if (!isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -2822,6 +2843,23 @@ final readonly class AgentRunner
|
||||
return implode(' ', array_values($out));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $terms
|
||||
* @return array<string, true>
|
||||
*/
|
||||
private function buildShopQueryTokenSet(array $terms): array
|
||||
{
|
||||
$tokens = [];
|
||||
|
||||
foreach ($terms as $term) {
|
||||
foreach ($this->tokenizeShopQueryCandidate($term) as $termToken) {
|
||||
$tokens[$termToken] = true;
|
||||
}
|
||||
}
|
||||
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
private function enrichReferentialShopQueryFromHistory(
|
||||
string $query,
|
||||
string $sourcePrompt,
|
||||
@@ -2890,11 +2928,33 @@ final readonly class AgentRunner
|
||||
}
|
||||
|
||||
private function extractLatestConfiguredShopQueryContextAnchor(string $commerceHistoryContext): string
|
||||
{
|
||||
foreach ($this->extractHistoryTurnsNewestFirst($commerceHistoryContext) as $turn) {
|
||||
if (!$this->containsConfiguredShopQueryAnchorTrigger($turn)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$modelAnchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($turn);
|
||||
$turnAnchor = $this->extractLatestConfiguredShopQueryPatternAnchor($turn);
|
||||
|
||||
if ($modelAnchor !== '') {
|
||||
return $this->buildModelQualifiedShopQueryAnchor($modelAnchor, $turnAnchor);
|
||||
}
|
||||
|
||||
if ($turnAnchor !== '') {
|
||||
return $turnAnchor;
|
||||
}
|
||||
}
|
||||
|
||||
return $this->extractLatestConfiguredShopQueryPatternAnchor($commerceHistoryContext);
|
||||
}
|
||||
|
||||
private function extractLatestConfiguredShopQueryPatternAnchor(string $text): string
|
||||
{
|
||||
$latest = '';
|
||||
|
||||
foreach ($this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns() as $pattern) {
|
||||
if (@preg_match_all($pattern, $commerceHistoryContext, $matches, PREG_SET_ORDER) === false) {
|
||||
if (@preg_match_all($pattern, $text, $matches, PREG_SET_ORDER) === false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -2909,6 +2969,51 @@ final readonly class AgentRunner
|
||||
return $latest;
|
||||
}
|
||||
|
||||
private function buildModelQualifiedShopQueryAnchor(string $modelAnchor, string $detailAnchor): string
|
||||
{
|
||||
$modelAnchor = trim($modelAnchor);
|
||||
if ($modelAnchor === '') {
|
||||
return trim($detailAnchor);
|
||||
}
|
||||
|
||||
$detailTokens = $this->extractShopQueryDetailAnchorTokens($detailAnchor, $modelAnchor);
|
||||
if ($detailTokens === []) {
|
||||
return $modelAnchor;
|
||||
}
|
||||
|
||||
return trim($modelAnchor . ' ' . implode(' ', $detailTokens));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function extractShopQueryDetailAnchorTokens(string $detailAnchor, string $modelAnchor): array
|
||||
{
|
||||
$tokens = $this->tokenizeShopQueryCandidate($detailAnchor);
|
||||
if ($tokens === []) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$modelTokens = array_fill_keys($this->tokenizeShopQueryCandidate($modelAnchor), true);
|
||||
$queryTokens = $this->buildShopQueryTokenSet(
|
||||
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms()
|
||||
);
|
||||
$noiseTokens = $this->buildShopQueryTokenSet(
|
||||
$this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms()
|
||||
);
|
||||
|
||||
$out = [];
|
||||
foreach ($tokens as $token) {
|
||||
if (isset($modelTokens[$token]) || isset($queryTokens[$token]) || isset($noiseTokens[$token]) || isset($out[$token])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$out[$token] = $token;
|
||||
}
|
||||
|
||||
return array_values($out);
|
||||
}
|
||||
|
||||
private function normalizeShopQueryAnchor(string $anchor): string
|
||||
{
|
||||
$anchor = str_replace('®', '', $anchor);
|
||||
@@ -3376,6 +3481,48 @@ final readonly class AgentRunner
|
||||
return trim(implode(' ', $this->tokenizeShopQueryCandidate($query)));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $shopResults
|
||||
* @return ShopProductResult[]
|
||||
*/
|
||||
private function guardShopResultsByReferencedProductAnchor(string $shopSearchQuery, array $shopResults): array
|
||||
{
|
||||
if ($shopResults === []) {
|
||||
return $shopResults;
|
||||
}
|
||||
|
||||
$anchor = $this->referenceAnchorExtractor->extractFirstProductModelAnchor($shopSearchQuery);
|
||||
if ($anchor === '') {
|
||||
return $shopResults;
|
||||
}
|
||||
|
||||
$filtered = [];
|
||||
foreach ($shopResults as $product) {
|
||||
if (!$product instanceof ShopProductResult) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($this->shopProductMatchesReferencedProductAnchor($product, $anchor)) {
|
||||
$filtered[] = $product;
|
||||
}
|
||||
}
|
||||
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
private function shopProductMatchesReferencedProductAnchor(ShopProductResult $product, string $anchor): bool
|
||||
{
|
||||
$productText = trim(implode(' ', array_filter([
|
||||
$product->name,
|
||||
$product->description,
|
||||
implode(' ', $product->highlights),
|
||||
$product->customFields,
|
||||
$product->url,
|
||||
])));
|
||||
|
||||
return $this->containsAllShopQueryTokens($productText, $anchor);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $shopResults
|
||||
* @return ShopProductResult[]
|
||||
|
||||
@@ -962,6 +962,11 @@ final class AgentRunnerConfig
|
||||
*/
|
||||
public function getNoLlmMainDeviceRequestRoleKeywords(): array
|
||||
{
|
||||
$terms = $this->genreStringList('product_roles.no_llm_fallback_terms.main_device_request_keywords');
|
||||
if ($terms !== []) {
|
||||
return $terms;
|
||||
}
|
||||
|
||||
return $this->getConfiguredStringListOrVocabularyView(
|
||||
'no_llm_fallback.product_roles.main_device_request_keywords',
|
||||
'no_llm_fallback.product_roles.vocabulary_views.main_device_request_keywords'
|
||||
@@ -973,6 +978,11 @@ final class AgentRunnerConfig
|
||||
*/
|
||||
public function getNoLlmAccessoryProductRoleKeywords(): array
|
||||
{
|
||||
$terms = $this->genreStringList('product_roles.no_llm_fallback_terms.accessory_product_keywords');
|
||||
if ($terms !== []) {
|
||||
return $terms;
|
||||
}
|
||||
|
||||
return $this->getConfiguredStringListOrVocabularyView(
|
||||
'no_llm_fallback.product_roles.accessory_product_keywords',
|
||||
'no_llm_fallback.product_roles.vocabulary_views.accessory_product_keywords'
|
||||
@@ -1195,7 +1205,7 @@ final class AgentRunnerConfig
|
||||
public function getShopQueryPositiveTokenFilterMinTokens(): int
|
||||
{
|
||||
return $this->genreInt('shop_query_runtime.positive_token_filter.min_query_tokens_after_filter')
|
||||
?? $this->getOptionalInt('shop_runtime.query_cleanup.positive_token_filter.min_query_tokens_after_filter', 2);
|
||||
?? $this->getOptionalInt('shop_runtime.query_cleanup.positive_token_filter.min_query_tokens_after_filter', 1);
|
||||
}
|
||||
|
||||
public function shouldShopQueryPositiveTokenFilterIncludeCurrentInputPreservationTerms(): bool
|
||||
@@ -1508,6 +1518,24 @@ final class AgentRunnerConfig
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getShopQueryContextAnchorEnrichmentQueryTerms(): array
|
||||
{
|
||||
return $this->genreStringList('context_resolution.history_anchor_enrichment.query_terms')
|
||||
?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_terms');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getShopQueryContextAnchorEnrichmentQueryNoiseTerms(): array
|
||||
{
|
||||
return $this->genreStringList('context_resolution.history_anchor_enrichment.query_noise_terms')
|
||||
?: $this->getOptionalStringList('shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
|
||||
@@ -713,6 +713,8 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(),
|
||||
'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(),
|
||||
'trigger_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTriggerTerms(),
|
||||
'query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryTerms(),
|
||||
'query_noise_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentQueryNoiseTerms(),
|
||||
'anchor_patterns' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentPatterns(),
|
||||
'template' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentTemplate(),
|
||||
],
|
||||
@@ -1871,6 +1873,8 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
$anchorEnrichment = $contextResolution['history_anchor_enrichment'] ?? [];
|
||||
if (is_array($anchorEnrichment)) {
|
||||
$this->validateStringList($this->toList($anchorEnrichment['trigger_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.trigger_terms', $errors, $warnings);
|
||||
$this->validateStringList($this->toList($anchorEnrichment['query_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_terms', $errors, $warnings);
|
||||
$this->validateStringList($this->toList($anchorEnrichment['query_noise_terms'] ?? []), 'agent.shop_runtime.context_resolution.history_anchor_enrichment.query_noise_terms', $errors, $warnings);
|
||||
$this->validateRegexPatternList($anchorEnrichment['anchor_patterns'] ?? [], 'agent.shop_runtime.context_resolution.history_anchor_enrichment.anchor_patterns', $errors);
|
||||
if (trim((string) ($anchorEnrichment['template'] ?? '')) === '') {
|
||||
$errors[] = 'agent.shop_runtime.context_resolution.history_anchor_enrichment.template must not be empty.';
|
||||
|
||||
Reference in New Issue
Block a user