patch 17
This commit is contained in:
46
RETRIEX_PATCH_17_ACCURACY_HOTFIX_README.md
Normal file
46
RETRIEX_PATCH_17_ACCURACY_HOTFIX_README.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# RetrieX Patch 17 - Accuracy Hotfix
|
||||
|
||||
## Purpose
|
||||
|
||||
Patch 17 addresses the first concrete accuracy bug set after the YAML-only and governance cleanup.
|
||||
It keeps the YAML-only policy intact and does not introduce hard domain lists in PHP.
|
||||
|
||||
## Covered issue classes
|
||||
|
||||
1. Product title anchoring for prompts that mention a product family plus variant suffix but omit an intermediate numeric family token.
|
||||
This prevents variant-specific questions from falling back to broader semantic hits.
|
||||
2. RAG evidence confidence for aggregate/count-style portfolio questions.
|
||||
Semantic product hits are no longer enough to mark such answers as `fachlich belegt` unless aggregate evidence terms are present.
|
||||
3. Commerce intent detection for product-seeking measurement prompts such as wanting to measure a parameter in an application context.
|
||||
4. Commerce intent/shop query support for cable/accessory searches such as pH/Redox connection cables.
|
||||
5. Prompt guardrail reinforcement so the model does not transfer accessory roles between separate shop product records.
|
||||
|
||||
## Changed files
|
||||
|
||||
- `src/Knowledge/Retrieval/NdjsonChunkLookup.php`
|
||||
- `src/Agent/AgentRunner.php`
|
||||
- `src/Config/AgentRunnerConfig.php`
|
||||
- `src/Config/RetriexEffectiveConfigProvider.php`
|
||||
- `config/retriex/agent.yaml`
|
||||
- `config/retriex/intent.yaml`
|
||||
- `config/retriex/commerce.yaml`
|
||||
- `config/retriex/prompt.yaml`
|
||||
|
||||
## Local checks
|
||||
|
||||
Run after applying:
|
||||
|
||||
```bash
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
|
||||
## Manual accuracy retests
|
||||
|
||||
- `qwelche grenzwerte kann der testomat testomat cal messen`
|
||||
- `wieviele testomat geräte haben wir`
|
||||
- `ich würde gern chlor im schwinnbad messen`
|
||||
- `zeige mir Anschlusskabel für pH/Redox` followed by `suche im shop`
|
||||
- `suche Messgeräte zur wasseranalyse mit dem parameter Carbonhärte (KH)`
|
||||
@@ -105,6 +105,24 @@ parameters:
|
||||
- produkte
|
||||
- artikel
|
||||
- shop
|
||||
aggregate_query_patterns:
|
||||
- '/\bwie\s+viele\b/u'
|
||||
- '/\bwieviele\b/u'
|
||||
- '/\banzahl\b/u'
|
||||
- '/\bcount\b/u'
|
||||
- '/\bgesamtzahl\b/u'
|
||||
aggregate_evidence_terms:
|
||||
- anzahl
|
||||
- gesamtzahl
|
||||
- stückzahl
|
||||
- stueckzahl
|
||||
- count
|
||||
- portfolio
|
||||
- sortiment
|
||||
- bestand
|
||||
- bestände
|
||||
- bestaende
|
||||
- lieferprogramm
|
||||
synonyms:
|
||||
salinität:
|
||||
- salinität
|
||||
@@ -304,6 +322,7 @@ parameters:
|
||||
- konnte
|
||||
- könnte
|
||||
- ich
|
||||
- mir
|
||||
- wir
|
||||
- man
|
||||
- nutzen
|
||||
|
||||
@@ -140,6 +140,8 @@ parameters:
|
||||
sienem: seinem
|
||||
sienes: seines
|
||||
indicatoren: indikatoren
|
||||
schwinnbad: schwimmbad
|
||||
schwimbad: schwimmbad
|
||||
|
||||
search_token_canonical_map:
|
||||
indikatoren: indikator
|
||||
@@ -158,6 +160,10 @@ parameters:
|
||||
- zubehör
|
||||
- zubehor
|
||||
- ersatzteil
|
||||
- anschlusskabel
|
||||
- kabel
|
||||
- sensorkabel
|
||||
- elektrodenkabel
|
||||
- verbrauchsmaterial
|
||||
- chemie
|
||||
- indikatorchemie
|
||||
|
||||
@@ -40,6 +40,11 @@ parameters:
|
||||
- zubehör
|
||||
- zubehoer
|
||||
- ersatzteil
|
||||
- anschlusskabel
|
||||
- kabel
|
||||
- sensorkabel
|
||||
- elektrode
|
||||
- elektrodenkabel
|
||||
non_product_commerce_signals:
|
||||
- shop
|
||||
- alle
|
||||
@@ -69,6 +74,8 @@ parameters:
|
||||
- '/\bmit\s+welche(?:m|n|r|s)?\s+(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:messen|messung|überwach(?:en|ung)?|ueberwach(?:en|ung)?)\b/u'
|
||||
- '/\bwelche(?:r|s|n|m)?\s+(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:kann|können|koennen|misst|messen|überwacht|ueberwacht|eignet|geeignet|passt|gut|empfehl)\b.*\b(?:messen|messung|überwach(?:en|ung)?|ueberwach(?:en|ung)?)\b/u'
|
||||
- '/\b(?:testomat(?:en)?|pockettester|pocket\s+tester|analysegerät|analysegeraet|messgerät|messgeraet|analysator|analyzer)\b.*\b(?:für|fuer)\b.*\b(?:messung|messen|überwachung|ueberwachung)\b/u'
|
||||
- '/\b(?:ich\s+)?(?:würde|wuerde|möchte|moechte|will|brauche|benötige|benoetige)\b.{0,80}\b(?:messen|messung|überwachen|ueberwachen|kontrollieren)\b/u'
|
||||
- '/\b(?:messen|messung|überwachen|ueberwachen|kontrollieren)\b.{0,80}\b(?:schwimmbad|pool|becken|wasseranalyse)\b/u'
|
||||
price_terms:
|
||||
- euro
|
||||
- €
|
||||
@@ -143,6 +150,10 @@ parameters:
|
||||
- '/\bzubehör\b/u'
|
||||
- '/\bzubehoer\b/u'
|
||||
- '/\bersatzteil(?:e)?\b/u'
|
||||
- '/\banschlusskabel\b/u'
|
||||
- '/\bkabel\b/u'
|
||||
- '/\bsensorkabel\b/u'
|
||||
- '/\belektrodenkabel\b/u'
|
||||
technical_factual_knowledge:
|
||||
signal_label: technical_factual_knowledge_query
|
||||
question_marker_patterns:
|
||||
|
||||
@@ -434,6 +434,8 @@ parameters:
|
||||
- '- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.'
|
||||
- '- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the
|
||||
main device.'
|
||||
- '- Use the Requested role, Inferred role, and Role compatibility fields independently for each SHOP PRODUCT RECORD; never transfer the role of an accessory, indicator, reagent, kit, or set to a different shop record.'
|
||||
- '- If a SHOP PRODUCT RECORD has Inferred role: main_device, do not describe that same product as accessory_or_consumable merely because other shown records are accessories or consumables.'
|
||||
- '- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.'
|
||||
- '- If no price is shown for a shop item, omit the price instead of writing 0,00 €, free, kostenlos, or a guessed price.'
|
||||
- '- For every shop hit shown in the answer, copy the exact shop product name verbatim from the same SHOP PRODUCT RECORD as the item heading.'
|
||||
|
||||
@@ -1653,6 +1653,13 @@ final readonly class AgentRunner
|
||||
|
||||
$haystack = $this->normalizeRagEvidenceText(implode("\n\n", array_map('strval', $knowledgeChunks)));
|
||||
|
||||
if (
|
||||
$this->isAggregateRagEvidenceQuery($prompt)
|
||||
&& !$this->containsAnyRagEvidenceTerm($haystack, $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms())
|
||||
) {
|
||||
return 'weak';
|
||||
}
|
||||
|
||||
foreach ($needles as $needleGroup) {
|
||||
foreach ($needleGroup as $needle) {
|
||||
if ($this->containsRagEvidenceTerm($haystack, $needle)) {
|
||||
@@ -1687,6 +1694,37 @@ final readonly class AgentRunner
|
||||
};
|
||||
}
|
||||
|
||||
private function isAggregateRagEvidenceQuery(string $prompt): bool
|
||||
{
|
||||
$normalizedPrompt = $this->normalizeRagEvidenceText($prompt);
|
||||
|
||||
if ($normalizedPrompt === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach ($this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns() as $pattern) {
|
||||
if (@preg_match($pattern, $normalizedPrompt) === 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $terms
|
||||
*/
|
||||
private function containsAnyRagEvidenceTerm(string $haystack, array $terms): bool
|
||||
{
|
||||
foreach ($terms as $term) {
|
||||
if ($this->containsRagEvidenceTerm($haystack, $term)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, string[]>
|
||||
*/
|
||||
|
||||
@@ -339,6 +339,22 @@ final class AgentRunnerConfig
|
||||
return $this->getRequiredStringListMap('rag_evidence_guard.synonyms');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getRagEvidenceAggregateQueryPatterns(): array
|
||||
{
|
||||
return $this->getRequiredStringList('rag_evidence_guard.aggregate_query_patterns');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getRagEvidenceAggregateEvidenceTerms(): array
|
||||
{
|
||||
return $this->getRequiredStringList('rag_evidence_guard.aggregate_evidence_terms');
|
||||
}
|
||||
|
||||
public function getNoLlmFallbackShopOnlyMessage(): string
|
||||
{
|
||||
return $this->getRequiredString('no_llm_fallback.messages.shop_only');
|
||||
|
||||
@@ -455,6 +455,12 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
'generic_internal_error' => $this->agentRunnerConfig->getGenericInternalErrorMessage(),
|
||||
'debug_internal_error_prefix' => $this->agentRunnerConfig->getDebugInternalErrorPrefix(),
|
||||
],
|
||||
'rag_evidence_guard' => [
|
||||
'stop_terms' => $this->agentRunnerConfig->getRagEvidenceStopTerms(),
|
||||
'synonyms' => $this->agentRunnerConfig->getRagEvidenceSynonyms(),
|
||||
'aggregate_query_patterns' => $this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns(),
|
||||
'aggregate_evidence_terms' => $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms(),
|
||||
],
|
||||
'source_labels' => [
|
||||
'external_url' => $this->agentRunnerConfig->getExternalUrlSourceLabel(),
|
||||
'rag_knowledge' => $this->agentRunnerConfig->getRagKnowledgeSourceLabel(),
|
||||
@@ -1009,6 +1015,13 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
|
||||
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
|
||||
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
|
||||
|
||||
$ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : [];
|
||||
$this->validateStringList($this->toList($ragEvidence['stop_terms'] ?? []), 'agent.rag_evidence_guard.stop_terms', $errors, $warnings);
|
||||
$this->validateStringListMap($ragEvidence['synonyms'] ?? [], 'agent.rag_evidence_guard.synonyms', $errors, $warnings);
|
||||
$this->validateRegexPatternList($ragEvidence['aggregate_query_patterns'] ?? [], 'agent.rag_evidence_guard.aggregate_query_patterns', $errors);
|
||||
$this->validateStringList($this->toList($ragEvidence['aggregate_evidence_terms'] ?? []), 'agent.rag_evidence_guard.aggregate_evidence_terms', $errors, $warnings);
|
||||
|
||||
$this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings);
|
||||
$this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors);
|
||||
|
||||
|
||||
@@ -155,6 +155,27 @@ final readonly class NdjsonChunkLookup
|
||||
}
|
||||
}
|
||||
|
||||
if ($best === null) {
|
||||
foreach ($documents as $document) {
|
||||
$normalizedTitle = $document['normalized_title'];
|
||||
|
||||
if (!$this->isConfidentTitleAlphaTokenMatch($normalizedPrompt, $normalizedTitle)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$score = 250 + mb_strlen($normalizedTitle, 'UTF-8');
|
||||
|
||||
if (preg_match('/\d/u', $normalizedTitle) === 1) {
|
||||
$score += 500;
|
||||
}
|
||||
|
||||
if ($best === null || $score > $bestScore) {
|
||||
$best = $document;
|
||||
$bestScore = $score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($best === null) {
|
||||
return null;
|
||||
}
|
||||
@@ -248,6 +269,49 @@ final readonly class NdjsonChunkLookup
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback for product titles where the prompt contains the significant
|
||||
* alphabetic model tokens, but omits a numeric family token.
|
||||
*
|
||||
* This keeps prompts such as a product family plus variant suffix anchored
|
||||
* to the correct document instead of falling back to broader semantic hits.
|
||||
*/
|
||||
private function isConfidentTitleAlphaTokenMatch(string $normalizedPrompt, string $normalizedTitle): bool
|
||||
{
|
||||
if ($normalizedPrompt === '' || $normalizedTitle === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
$titleTokens = $this->significantTitleTokens($normalizedTitle);
|
||||
$alphaTokens = array_values(array_filter(
|
||||
$titleTokens,
|
||||
static fn (string $token): bool => preg_match('/\d/u', $token) !== 1
|
||||
));
|
||||
|
||||
if (count($alphaTokens) < 2 || count($alphaTokens) === count($titleTokens)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$promptTokenVariants = $this->tokenVariantLookup($normalizedPrompt);
|
||||
|
||||
foreach ($alphaTokens as $titleToken) {
|
||||
$matched = false;
|
||||
|
||||
foreach ($this->tokenVariants($titleToken) as $variant) {
|
||||
if (isset($promptTokenVariants[$variant])) {
|
||||
$matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$matched) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user