This commit is contained in:
team 1
2026-05-01 19:49:43 +02:00
parent a42f8d656d
commit 1897fdf0eb
9 changed files with 215 additions and 0 deletions

View File

@@ -0,0 +1,46 @@
# RetrieX Patch 17 - Accuracy Hotfix
## Purpose
Patch 17 addresses the first concrete accuracy bug set after the YAML-only and governance cleanup.
It keeps the YAML-only policy intact and does not introduce hard domain lists in PHP.
## Covered issue classes
1. Product title anchoring for prompts that mention a product family plus variant suffix but omit an intermediate numeric family token.
This prevents variant-specific questions from falling back to broader semantic hits.
2. RAG evidence confidence for aggregate/count-style portfolio questions.
Semantic product hits are no longer enough to mark such answers as `fachlich belegt` unless aggregate evidence terms are present.
3. Commerce intent detection for product-seeking measurement prompts such as wanting to measure a parameter in an application context.
4. Commerce intent/shop query support for cable/accessory searches such as pH/Redox connection cables.
5. Prompt guardrail reinforcement so the model does not transfer accessory roles between separate shop product records.
## Changed files
- `src/Knowledge/Retrieval/NdjsonChunkLookup.php`
- `src/Agent/AgentRunner.php`
- `src/Config/AgentRunnerConfig.php`
- `src/Config/RetriexEffectiveConfigProvider.php`
- `config/retriex/agent.yaml`
- `config/retriex/intent.yaml`
- `config/retriex/commerce.yaml`
- `config/retriex/prompt.yaml`
## Local checks
Run after applying:
```bash
bin/console mto:agent:config:validate
bin/console mto:agent:regression:test
bin/console mto:agent:config:audit-source --details
bin/console mto:agent:config:audit-patterns --details
```
## Manual accuracy retests
- `qwelche grenzwerte kann der testomat testomat cal messen`
- `wieviele testomat geräte haben wir`
- `ich würde gern chlor im schwinnbad messen`
- `zeige mir Anschlusskabel für pH/Redox` followed by `suche im shop`
- `suche Messgeräte zur wasseranalyse mit dem parameter Carbonhärte (KH)`

View File

@@ -105,6 +105,24 @@ parameters:
- produkte
- artikel
- shop
aggregate_query_patterns:
- '/\bwie\s+viele\b/u'
- '/\bwieviele\b/u'
- '/\banzahl\b/u'
- '/\bcount\b/u'
- '/\bgesamtzahl\b/u'
aggregate_evidence_terms:
- anzahl
- gesamtzahl
- stückzahl
- stueckzahl
- count
- portfolio
- sortiment
- bestand
- bestände
- bestaende
- lieferprogramm
synonyms:
salinität:
- salinität
@@ -304,6 +322,7 @@ parameters:
- konnte
- könnte
- ich
- mir
- wir
- man
- nutzen

View File

@@ -140,6 +140,8 @@ parameters:
sienem: seinem
sienes: seines
indicatoren: indikatoren
schwinnbad: schwimmbad
schwimbad: schwimmbad
search_token_canonical_map:
indikatoren: indikator
@@ -158,6 +160,10 @@ parameters:
- zubehör
- zubehor
- ersatzteil
- anschlusskabel
- kabel
- sensorkabel
- elektrodenkabel
- verbrauchsmaterial
- chemie
- indikatorchemie

View File

@@ -40,6 +40,11 @@ parameters:
- zubehör
- zubehoer
- ersatzteil
- anschlusskabel
- kabel
- sensorkabel
- elektrode
- elektrodenkabel
non_product_commerce_signals:
- shop
- alle
@@ -69,6 +74,8 @@ parameters:
- '/\bmit\s+welche(?:m|n|r|s)?\s+(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:messen|messung|überwach(?:en|ung)?|ueberwach(?:en|ung)?)\b/u'
- '/\bwelche(?:r|s|n|m)?\s+(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:kann|können|koennen|misst|messen|überwacht|ueberwacht|eignet|geeignet|passt|gut|empfehl)\b.*\b(?:messen|messung|überwach(?:en|ung)?|ueberwach(?:en|ung)?)\b/u'
- '/\b(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:für|fuer)\b.*\b(?:messung|messen|überwachung|ueberwachung)\b/u'
- '/\b(?:ich\s+)?(?:würde|wuerde|möchte|moechte|will|brauche|benötige|benoetige)\b.{0,80}\b(?:messen|messung|überwachen|ueberwachen|kontrollieren)\b/u'
- '/\b(?:messen|messung|überwachen|ueberwachen|kontrollieren)\b.{0,80}\b(?:schwimmbad|pool|becken|wasseranalyse)\b/u'
price_terms:
- euro
-
@@ -143,6 +150,10 @@ parameters:
- '/\bzubehör\b/u'
- '/\bzubehoer\b/u'
- '/\bersatzteil(?:e)?\b/u'
- '/\banschlusskabel\b/u'
- '/\bkabel\b/u'
- '/\bsensorkabel\b/u'
- '/\belektrodenkabel\b/u'
technical_factual_knowledge:
signal_label: technical_factual_knowledge_query
question_marker_patterns:

View File

@@ -434,6 +434,8 @@ parameters:
- '- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.'
- '- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the
main device.'
- '- Use the Requested role, Inferred role, and Role compatibility fields independently for each SHOP PRODUCT RECORD; never transfer the role of an accessory, indicator, reagent, kit, or set to a different shop record.'
- '- If a SHOP PRODUCT RECORD has Inferred role: main_device, do not describe that same product as accessory_or_consumable merely because other shown records are accessories or consumables.'
- '- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.'
- '- If no price is shown for a shop item, omit the price instead of writing 0,00 €, free, kostenlos, or a guessed price.'
- '- For every shop hit shown in the answer, copy the exact shop product name verbatim from the same SHOP PRODUCT RECORD as the item heading.'

View File

@@ -1653,6 +1653,13 @@ final readonly class AgentRunner
$haystack = $this->normalizeRagEvidenceText(implode("\n\n", array_map('strval', $knowledgeChunks)));
if (
$this->isAggregateRagEvidenceQuery($prompt)
&& !$this->containsAnyRagEvidenceTerm($haystack, $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms())
) {
return 'weak';
}
foreach ($needles as $needleGroup) {
foreach ($needleGroup as $needle) {
if ($this->containsRagEvidenceTerm($haystack, $needle)) {
@@ -1687,6 +1694,37 @@ final readonly class AgentRunner
};
}
private function isAggregateRagEvidenceQuery(string $prompt): bool
{
$normalizedPrompt = $this->normalizeRagEvidenceText($prompt);
if ($normalizedPrompt === '') {
return false;
}
foreach ($this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns() as $pattern) {
if (@preg_match($pattern, $normalizedPrompt) === 1) {
return true;
}
}
return false;
}
/**
* @param string[] $terms
*/
private function containsAnyRagEvidenceTerm(string $haystack, array $terms): bool
{
foreach ($terms as $term) {
if ($this->containsRagEvidenceTerm($haystack, $term)) {
return true;
}
}
return false;
}
/**
* @return array<int, string[]>
*/

View File

@@ -339,6 +339,22 @@ final class AgentRunnerConfig
return $this->getRequiredStringListMap('rag_evidence_guard.synonyms');
}
/**
* @return string[]
*/
public function getRagEvidenceAggregateQueryPatterns(): array
{
return $this->getRequiredStringList('rag_evidence_guard.aggregate_query_patterns');
}
/**
* @return string[]
*/
public function getRagEvidenceAggregateEvidenceTerms(): array
{
return $this->getRequiredStringList('rag_evidence_guard.aggregate_evidence_terms');
}
public function getNoLlmFallbackShopOnlyMessage(): string
{
return $this->getRequiredString('no_llm_fallback.messages.shop_only');

View File

@@ -455,6 +455,12 @@ final readonly class RetriexEffectiveConfigProvider
'generic_internal_error' => $this->agentRunnerConfig->getGenericInternalErrorMessage(),
'debug_internal_error_prefix' => $this->agentRunnerConfig->getDebugInternalErrorPrefix(),
],
'rag_evidence_guard' => [
'stop_terms' => $this->agentRunnerConfig->getRagEvidenceStopTerms(),
'synonyms' => $this->agentRunnerConfig->getRagEvidenceSynonyms(),
'aggregate_query_patterns' => $this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns(),
'aggregate_evidence_terms' => $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms(),
],
'source_labels' => [
'external_url' => $this->agentRunnerConfig->getExternalUrlSourceLabel(),
'rag_knowledge' => $this->agentRunnerConfig->getRagKnowledgeSourceLabel(),
@@ -1009,6 +1015,13 @@ final readonly class RetriexEffectiveConfigProvider
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
$ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : [];
$this->validateStringList($this->toList($ragEvidence['stop_terms'] ?? []), 'agent.rag_evidence_guard.stop_terms', $errors, $warnings);
$this->validateStringListMap($ragEvidence['synonyms'] ?? [], 'agent.rag_evidence_guard.synonyms', $errors, $warnings);
$this->validateRegexPatternList($ragEvidence['aggregate_query_patterns'] ?? [], 'agent.rag_evidence_guard.aggregate_query_patterns', $errors);
$this->validateStringList($this->toList($ragEvidence['aggregate_evidence_terms'] ?? []), 'agent.rag_evidence_guard.aggregate_evidence_terms', $errors, $warnings);
$this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings);
$this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors);

View File

@@ -155,6 +155,27 @@ final readonly class NdjsonChunkLookup
}
}
if ($best === null) {
foreach ($documents as $document) {
$normalizedTitle = $document['normalized_title'];
if (!$this->isConfidentTitleAlphaTokenMatch($normalizedPrompt, $normalizedTitle)) {
continue;
}
$score = 250 + mb_strlen($normalizedTitle, 'UTF-8');
if (preg_match('/\d/u', $normalizedTitle) === 1) {
$score += 500;
}
if ($best === null || $score > $bestScore) {
$best = $document;
$bestScore = $score;
}
}
}
if ($best === null) {
return null;
}
@@ -248,6 +269,49 @@ final readonly class NdjsonChunkLookup
return true;
}
/**
* Fallback for product titles where the prompt contains the significant
* alphabetic model tokens, but omits a numeric family token.
*
* This keeps prompts such as a product family plus variant suffix anchored
* to the correct document instead of falling back to broader semantic hits.
*/
private function isConfidentTitleAlphaTokenMatch(string $normalizedPrompt, string $normalizedTitle): bool
{
if ($normalizedPrompt === '' || $normalizedTitle === '') {
return false;
}
$titleTokens = $this->significantTitleTokens($normalizedTitle);
$alphaTokens = array_values(array_filter(
$titleTokens,
static fn (string $token): bool => preg_match('/\d/u', $token) !== 1
));
if (count($alphaTokens) < 2 || count($alphaTokens) === count($titleTokens)) {
return false;
}
$promptTokenVariants = $this->tokenVariantLookup($normalizedPrompt);
foreach ($alphaTokens as $titleToken) {
$matched = false;
foreach ($this->tokenVariants($titleToken) as $variant) {
if (isset($promptTokenVariants[$variant])) {
$matched = true;
break;
}
}
if (!$matched) {
return false;
}
}
return true;
}
/**
* @return string[]
*/