patch 17
This commit is contained in:
46
RETRIEX_PATCH_17_ACCURACY_HOTFIX_README.md
Normal file
46
RETRIEX_PATCH_17_ACCURACY_HOTFIX_README.md
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# RetrieX Patch 17 - Accuracy Hotfix
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Patch 17 addresses the first concrete accuracy bug set after the YAML-only and governance cleanup.
|
||||||
|
It keeps the YAML-only policy intact and does not introduce hard domain lists in PHP.
|
||||||
|
|
||||||
|
## Covered issue classes
|
||||||
|
|
||||||
|
1. Product title anchoring for prompts that mention a product family plus variant suffix but omit an intermediate numeric family token.
|
||||||
|
This prevents variant-specific questions from falling back to broader semantic hits.
|
||||||
|
2. RAG evidence confidence for aggregate/count-style portfolio questions.
|
||||||
|
Semantic product hits are no longer enough to mark such answers as `fachlich belegt` unless aggregate evidence terms are present.
|
||||||
|
3. Commerce intent detection for product-seeking measurement prompts such as wanting to measure a parameter in an application context.
|
||||||
|
4. Commerce intent/shop query support for cable/accessory searches such as pH/Redox connection cables.
|
||||||
|
5. Prompt guardrail reinforcement so the model does not transfer accessory roles between separate shop product records.
|
||||||
|
|
||||||
|
## Changed files
|
||||||
|
|
||||||
|
- `src/Knowledge/Retrieval/NdjsonChunkLookup.php`
|
||||||
|
- `src/Agent/AgentRunner.php`
|
||||||
|
- `src/Config/AgentRunnerConfig.php`
|
||||||
|
- `src/Config/RetriexEffectiveConfigProvider.php`
|
||||||
|
- `config/retriex/agent.yaml`
|
||||||
|
- `config/retriex/intent.yaml`
|
||||||
|
- `config/retriex/commerce.yaml`
|
||||||
|
- `config/retriex/prompt.yaml`
|
||||||
|
|
||||||
|
## Local checks
|
||||||
|
|
||||||
|
Run after applying:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bin/console mto:agent:config:validate
|
||||||
|
bin/console mto:agent:regression:test
|
||||||
|
bin/console mto:agent:config:audit-source --details
|
||||||
|
bin/console mto:agent:config:audit-patterns --details
|
||||||
|
```
|
||||||
|
|
||||||
|
## Manual accuracy retests
|
||||||
|
|
||||||
|
- `qwelche grenzwerte kann der testomat testomat cal messen`
|
||||||
|
- `wieviele testomat geräte haben wir`
|
||||||
|
- `ich würde gern chlor im schwinnbad messen`
|
||||||
|
- `zeige mir Anschlusskabel für pH/Redox` followed by `suche im shop`
|
||||||
|
- `suche Messgeräte zur wasseranalyse mit dem parameter Carbonhärte (KH)`
|
||||||
@@ -105,6 +105,24 @@ parameters:
|
|||||||
- produkte
|
- produkte
|
||||||
- artikel
|
- artikel
|
||||||
- shop
|
- shop
|
||||||
|
aggregate_query_patterns:
|
||||||
|
- '/\bwie\s+viele\b/u'
|
||||||
|
- '/\bwieviele\b/u'
|
||||||
|
- '/\banzahl\b/u'
|
||||||
|
- '/\bcount\b/u'
|
||||||
|
- '/\bgesamtzahl\b/u'
|
||||||
|
aggregate_evidence_terms:
|
||||||
|
- anzahl
|
||||||
|
- gesamtzahl
|
||||||
|
- stückzahl
|
||||||
|
- stueckzahl
|
||||||
|
- count
|
||||||
|
- portfolio
|
||||||
|
- sortiment
|
||||||
|
- bestand
|
||||||
|
- bestände
|
||||||
|
- bestaende
|
||||||
|
- lieferprogramm
|
||||||
synonyms:
|
synonyms:
|
||||||
salinität:
|
salinität:
|
||||||
- salinität
|
- salinität
|
||||||
@@ -304,6 +322,7 @@ parameters:
|
|||||||
- konnte
|
- konnte
|
||||||
- könnte
|
- könnte
|
||||||
- ich
|
- ich
|
||||||
|
- mir
|
||||||
- wir
|
- wir
|
||||||
- man
|
- man
|
||||||
- nutzen
|
- nutzen
|
||||||
|
|||||||
@@ -140,6 +140,8 @@ parameters:
|
|||||||
sienem: seinem
|
sienem: seinem
|
||||||
sienes: seines
|
sienes: seines
|
||||||
indicatoren: indikatoren
|
indicatoren: indikatoren
|
||||||
|
schwinnbad: schwimmbad
|
||||||
|
schwimbad: schwimmbad
|
||||||
|
|
||||||
search_token_canonical_map:
|
search_token_canonical_map:
|
||||||
indikatoren: indikator
|
indikatoren: indikator
|
||||||
@@ -158,6 +160,10 @@ parameters:
|
|||||||
- zubehör
|
- zubehör
|
||||||
- zubehor
|
- zubehor
|
||||||
- ersatzteil
|
- ersatzteil
|
||||||
|
- anschlusskabel
|
||||||
|
- kabel
|
||||||
|
- sensorkabel
|
||||||
|
- elektrodenkabel
|
||||||
- verbrauchsmaterial
|
- verbrauchsmaterial
|
||||||
- chemie
|
- chemie
|
||||||
- indikatorchemie
|
- indikatorchemie
|
||||||
|
|||||||
@@ -40,6 +40,11 @@ parameters:
|
|||||||
- zubehör
|
- zubehör
|
||||||
- zubehoer
|
- zubehoer
|
||||||
- ersatzteil
|
- ersatzteil
|
||||||
|
- anschlusskabel
|
||||||
|
- kabel
|
||||||
|
- sensorkabel
|
||||||
|
- elektrode
|
||||||
|
- elektrodenkabel
|
||||||
non_product_commerce_signals:
|
non_product_commerce_signals:
|
||||||
- shop
|
- shop
|
||||||
- alle
|
- alle
|
||||||
@@ -69,6 +74,8 @@ parameters:
|
|||||||
- '/\bmit\s+welche(?:m|n|r|s)?\s+(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:messen|messung|überwach(?:en|ung)?|ueberwach(?:en|ung)?)\b/u'
|
- '/\bmit\s+welche(?:m|n|r|s)?\s+(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:messen|messung|überwach(?:en|ung)?|ueberwach(?:en|ung)?)\b/u'
|
||||||
- '/\bwelche(?:r|s|n|m)?\s+(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:kann|können|koennen|misst|messen|überwacht|ueberwacht|eignet|geeignet|passt|gut|empfehl)\b.*\b(?:messen|messung|überwach(?:en|ung)?|ueberwach(?:en|ung)?)\b/u'
|
- '/\bwelche(?:r|s|n|m)?\s+(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:kann|können|koennen|misst|messen|überwacht|ueberwacht|eignet|geeignet|passt|gut|empfehl)\b.*\b(?:messen|messung|überwach(?:en|ung)?|ueberwach(?:en|ung)?)\b/u'
|
||||||
- '/\b(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:für|fuer)\b.*\b(?:messung|messen|überwachung|ueberwachung)\b/u'
|
- '/\b(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:für|fuer)\b.*\b(?:messung|messen|überwachung|ueberwachung)\b/u'
|
||||||
|
- '/\b(?:ich\s+)?(?:würde|wuerde|möchte|moechte|will|brauche|benötige|benoetige)\b.{0,80}\b(?:messen|messung|überwachen|ueberwachen|kontrollieren)\b/u'
|
||||||
|
- '/\b(?:messen|messung|überwachen|ueberwachen|kontrollieren)\b.{0,80}\b(?:schwimmbad|pool|becken|wasseranalyse)\b/u'
|
||||||
price_terms:
|
price_terms:
|
||||||
- euro
|
- euro
|
||||||
- €
|
- €
|
||||||
@@ -143,6 +150,10 @@ parameters:
|
|||||||
- '/\bzubehör\b/u'
|
- '/\bzubehör\b/u'
|
||||||
- '/\bzubehoer\b/u'
|
- '/\bzubehoer\b/u'
|
||||||
- '/\bersatzteil(?:e)?\b/u'
|
- '/\bersatzteil(?:e)?\b/u'
|
||||||
|
- '/\banschlusskabel\b/u'
|
||||||
|
- '/\bkabel\b/u'
|
||||||
|
- '/\bsensorkabel\b/u'
|
||||||
|
- '/\belektrodenkabel\b/u'
|
||||||
technical_factual_knowledge:
|
technical_factual_knowledge:
|
||||||
signal_label: technical_factual_knowledge_query
|
signal_label: technical_factual_knowledge_query
|
||||||
question_marker_patterns:
|
question_marker_patterns:
|
||||||
|
|||||||
@@ -434,6 +434,8 @@ parameters:
|
|||||||
- '- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.'
|
- '- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.'
|
||||||
- '- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the
|
- '- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the
|
||||||
main device.'
|
main device.'
|
||||||
|
- '- Use the Requested role, Inferred role, and Role compatibility fields independently for each SHOP PRODUCT RECORD; never transfer the role of an accessory, indicator, reagent, kit, or set to a different shop record.'
|
||||||
|
- '- If a SHOP PRODUCT RECORD has Inferred role: main_device, do not describe that same product as accessory_or_consumable merely because other shown records are accessories or consumables.'
|
||||||
- '- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.'
|
- '- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.'
|
||||||
- '- If no price is shown for a shop item, omit the price instead of writing 0,00 €, free, kostenlos, or a guessed price.'
|
- '- If no price is shown for a shop item, omit the price instead of writing 0,00 €, free, kostenlos, or a guessed price.'
|
||||||
- '- For every shop hit shown in the answer, copy the exact shop product name verbatim from the same SHOP PRODUCT RECORD as the item heading.'
|
- '- For every shop hit shown in the answer, copy the exact shop product name verbatim from the same SHOP PRODUCT RECORD as the item heading.'
|
||||||
|
|||||||
@@ -1653,6 +1653,13 @@ final readonly class AgentRunner
|
|||||||
|
|
||||||
$haystack = $this->normalizeRagEvidenceText(implode("\n\n", array_map('strval', $knowledgeChunks)));
|
$haystack = $this->normalizeRagEvidenceText(implode("\n\n", array_map('strval', $knowledgeChunks)));
|
||||||
|
|
||||||
|
if (
|
||||||
|
$this->isAggregateRagEvidenceQuery($prompt)
|
||||||
|
&& !$this->containsAnyRagEvidenceTerm($haystack, $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms())
|
||||||
|
) {
|
||||||
|
return 'weak';
|
||||||
|
}
|
||||||
|
|
||||||
foreach ($needles as $needleGroup) {
|
foreach ($needles as $needleGroup) {
|
||||||
foreach ($needleGroup as $needle) {
|
foreach ($needleGroup as $needle) {
|
||||||
if ($this->containsRagEvidenceTerm($haystack, $needle)) {
|
if ($this->containsRagEvidenceTerm($haystack, $needle)) {
|
||||||
@@ -1687,6 +1694,37 @@ final readonly class AgentRunner
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function isAggregateRagEvidenceQuery(string $prompt): bool
|
||||||
|
{
|
||||||
|
$normalizedPrompt = $this->normalizeRagEvidenceText($prompt);
|
||||||
|
|
||||||
|
if ($normalizedPrompt === '') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns() as $pattern) {
|
||||||
|
if (@preg_match($pattern, $normalizedPrompt) === 1) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $terms
|
||||||
|
*/
|
||||||
|
private function containsAnyRagEvidenceTerm(string $haystack, array $terms): bool
|
||||||
|
{
|
||||||
|
foreach ($terms as $term) {
|
||||||
|
if ($this->containsRagEvidenceTerm($haystack, $term)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return array<int, string[]>
|
* @return array<int, string[]>
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -339,6 +339,22 @@ final class AgentRunnerConfig
|
|||||||
return $this->getRequiredStringListMap('rag_evidence_guard.synonyms');
|
return $this->getRequiredStringListMap('rag_evidence_guard.synonyms');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getRagEvidenceAggregateQueryPatterns(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringList('rag_evidence_guard.aggregate_query_patterns');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getRagEvidenceAggregateEvidenceTerms(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringList('rag_evidence_guard.aggregate_evidence_terms');
|
||||||
|
}
|
||||||
|
|
||||||
public function getNoLlmFallbackShopOnlyMessage(): string
|
public function getNoLlmFallbackShopOnlyMessage(): string
|
||||||
{
|
{
|
||||||
return $this->getRequiredString('no_llm_fallback.messages.shop_only');
|
return $this->getRequiredString('no_llm_fallback.messages.shop_only');
|
||||||
|
|||||||
@@ -455,6 +455,12 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
'generic_internal_error' => $this->agentRunnerConfig->getGenericInternalErrorMessage(),
|
'generic_internal_error' => $this->agentRunnerConfig->getGenericInternalErrorMessage(),
|
||||||
'debug_internal_error_prefix' => $this->agentRunnerConfig->getDebugInternalErrorPrefix(),
|
'debug_internal_error_prefix' => $this->agentRunnerConfig->getDebugInternalErrorPrefix(),
|
||||||
],
|
],
|
||||||
|
'rag_evidence_guard' => [
|
||||||
|
'stop_terms' => $this->agentRunnerConfig->getRagEvidenceStopTerms(),
|
||||||
|
'synonyms' => $this->agentRunnerConfig->getRagEvidenceSynonyms(),
|
||||||
|
'aggregate_query_patterns' => $this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns(),
|
||||||
|
'aggregate_evidence_terms' => $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms(),
|
||||||
|
],
|
||||||
'source_labels' => [
|
'source_labels' => [
|
||||||
'external_url' => $this->agentRunnerConfig->getExternalUrlSourceLabel(),
|
'external_url' => $this->agentRunnerConfig->getExternalUrlSourceLabel(),
|
||||||
'rag_knowledge' => $this->agentRunnerConfig->getRagKnowledgeSourceLabel(),
|
'rag_knowledge' => $this->agentRunnerConfig->getRagKnowledgeSourceLabel(),
|
||||||
@@ -1009,6 +1015,13 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
|
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
|
||||||
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
|
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
|
||||||
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
|
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
|
||||||
|
|
||||||
|
$ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : [];
|
||||||
|
$this->validateStringList($this->toList($ragEvidence['stop_terms'] ?? []), 'agent.rag_evidence_guard.stop_terms', $errors, $warnings);
|
||||||
|
$this->validateStringListMap($ragEvidence['synonyms'] ?? [], 'agent.rag_evidence_guard.synonyms', $errors, $warnings);
|
||||||
|
$this->validateRegexPatternList($ragEvidence['aggregate_query_patterns'] ?? [], 'agent.rag_evidence_guard.aggregate_query_patterns', $errors);
|
||||||
|
$this->validateStringList($this->toList($ragEvidence['aggregate_evidence_terms'] ?? []), 'agent.rag_evidence_guard.aggregate_evidence_terms', $errors, $warnings);
|
||||||
|
|
||||||
$this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings);
|
$this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings);
|
||||||
$this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors);
|
$this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors);
|
||||||
|
|
||||||
|
|||||||
@@ -155,6 +155,27 @@ final readonly class NdjsonChunkLookup
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($best === null) {
|
||||||
|
foreach ($documents as $document) {
|
||||||
|
$normalizedTitle = $document['normalized_title'];
|
||||||
|
|
||||||
|
if (!$this->isConfidentTitleAlphaTokenMatch($normalizedPrompt, $normalizedTitle)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$score = 250 + mb_strlen($normalizedTitle, 'UTF-8');
|
||||||
|
|
||||||
|
if (preg_match('/\d/u', $normalizedTitle) === 1) {
|
||||||
|
$score += 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($best === null || $score > $bestScore) {
|
||||||
|
$best = $document;
|
||||||
|
$bestScore = $score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ($best === null) {
|
if ($best === null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@@ -248,6 +269,49 @@ final readonly class NdjsonChunkLookup
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fallback for product titles where the prompt contains the significant
|
||||||
|
* alphabetic model tokens, but omits a numeric family token.
|
||||||
|
*
|
||||||
|
* This keeps prompts such as a product family plus variant suffix anchored
|
||||||
|
* to the correct document instead of falling back to broader semantic hits.
|
||||||
|
*/
|
||||||
|
private function isConfidentTitleAlphaTokenMatch(string $normalizedPrompt, string $normalizedTitle): bool
|
||||||
|
{
|
||||||
|
if ($normalizedPrompt === '' || $normalizedTitle === '') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$titleTokens = $this->significantTitleTokens($normalizedTitle);
|
||||||
|
$alphaTokens = array_values(array_filter(
|
||||||
|
$titleTokens,
|
||||||
|
static fn (string $token): bool => preg_match('/\d/u', $token) !== 1
|
||||||
|
));
|
||||||
|
|
||||||
|
if (count($alphaTokens) < 2 || count($alphaTokens) === count($titleTokens)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$promptTokenVariants = $this->tokenVariantLookup($normalizedPrompt);
|
||||||
|
|
||||||
|
foreach ($alphaTokens as $titleToken) {
|
||||||
|
$matched = false;
|
||||||
|
|
||||||
|
foreach ($this->tokenVariants($titleToken) as $variant) {
|
||||||
|
if (isset($promptTokenVariants[$variant])) {
|
||||||
|
$matched = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$matched) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return string[]
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user