This commit is contained in:
team 1
2026-05-09 20:28:43 +02:00
parent aae4935d69
commit 8827a5a13b
4 changed files with 382 additions and 0 deletions

View File

@@ -1435,6 +1435,7 @@ parameters:
- '- Never write shop-hit lines such as price, availability, URL, product number, or Shop-Treffer below a RAG device unless the same exact SHOP PRODUCT RECORD names that device as the exact shop product.'
- '- Never rename a role-incompatible accessory shop record into a main device in headings, summaries, or shop-hit lines.'
- '- If the user asks for the price or availability of a referenced accessory, indicator, reagent, kit, set, or consumable, use commercial fields only from a shop result that clearly matches that accessory identity and code.'
- '- If an accessory, indicator, reagent, kit, set, or consumable code is explicitly requested, do not merge shop variants whose code has an additional suffix, prefix, or variant token unless the user explicitly requested that full variant code.'
- '- For such accessory price follow-ups, do not answer with the price, URL, product number, or availability of the main device or of unrelated reagents; if no matching accessory shop item is present, say that the price is not available in the provided shop data.'
prompt_keyword_views:
origin: genre_native

View File

@@ -0,0 +1,74 @@
# RetrieX Patch p72 - Exact Accessory Code Precision
## Ziel
p72 verhindert, dass bei Preis-/Verfügbarkeits-Follow-ups zu einer konkret referenzierten Zubehör-, Indikator- oder Reagenz-Code-Identität nahe Varianten als gleichwertige Treffer in die Antwort gelangen.
Konkreter Regressionsfall:
1. `Was ist der niedrigste Grenzwert für die Wasserhärte, welcher mit einem Testomaten überwacht werden kann?`
2. `mit welchem indikator wird der wert gemessen`
3. `was kostet der indikator`
Wenn der Verlauf auf `Indikatortyp 300` zeigt, dürfen Produkte wie `300 S` nicht als Preisantwort für `300` mit ausgegeben werden, sofern der Nutzer nicht explizit die vollständige Variante `300 S` angefragt hat.
## Umsetzung
- `AgentRunner` filtert Shop-Ergebnisse nach einer exakt angefragten Zubehör-/Code-Identität, bevor die Ergebnisse an den PromptBuilder gehen.
- Die Erkennung ist generisch:
- Code-Begriffe kommen aus `genre.yaml` / bestehender Konfiguration (`search_repair.requested_accessory_code_terms`).
- Code-Token werden anhand bestehender Code-Patterns bzw. generischer alphanumerischer Code-Formen erkannt.
- Ein reiner Code wie `300` matcht nicht mehr automatisch Varianten mit zusätzlichem Ein-Buchstaben-Suffix wie `300 S`.
- Wird die Variante explizit angefragt, z. B. `300 S`, bleibt sie matchfähig.
- `AgentRunnerConfig` stellt die bestehenden requested-accessory-code terms für den Runner bereit.
- `genre.yaml` ergänzt eine generische Prompt-Regel, damit das LLM zusätzlich keine Code-Varianten zusammenführt, die nicht explizit angefragt wurden.
## Bewusst nicht geändert
- Kein Testomat-/Indikator-300-Sonderfall im PHP-Core.
- Keine neue harte fachliche Tokenliste im PHP-Core.
- Keine Änderung an Retrieval, Scoring, Shopware-Suche, Ranking oder Follow-up-Action-UI.
## Geänderte Dateien
- `src/Agent/AgentRunner.php`
- `src/Config/AgentRunnerConfig.php`
- `config/retriex/genre.yaml`
## Lokale Checks
Ausgeführt im Patch-Arbeitsverzeichnis:
```bash
php -l src/Agent/AgentRunner.php
php -l src/Config/AgentRunnerConfig.php
python3 - <<'PY'
import yaml
for f in ['config/retriex/genre.yaml','config/retriex/chat-messages.yaml','config/retriex/agent.yaml']:
with open(f, 'r', encoding='utf-8') as fh:
yaml.safe_load(fh)
print('YAML parse OK')
PY
```
`vendor/` ist im ZIP nicht enthalten; Symfony-Console-Checks bitte in der Zielumgebung ausführen.
## Empfohlene Regressionstests
```text
Was ist der niedrigste Grenzwert für die Wasserhärte, welcher mit einem Testomaten überwacht werden kann?
```
```text
mit welchem indikator wird der wert gemessen
```
```text
was kostet der indikator
```
Erwartung:
- Preisantwort für `Indikatortyp 300` bleibt bei exakt passenden `300`-Produkten.
- `300 S` wird nicht als Preisvariante für `300` ausgegeben, solange `300 S` nicht explizit angefragt wurde.
- Falls explizit nach `300 S` gefragt wird, darf `300 S` weiterhin gefunden werden.

View File

@@ -519,6 +519,7 @@ final readonly class AgentRunner
}
$shopResults = $this->guardShopResultsByReferencedProductAnchor($shopSearchQuery, $shopResults);
$shopResults = $this->guardShopResultsByExactRequestedAccessoryCode($prompt, $shopSearchQuery, $shopResults);
$shopResults = $this->sortShopResultsForLengthRequest($prompt, $shopSearchQuery, $shopResults);
$attemptedShopRepair = $repairPayload['attemptedRepair'] || $directIdentityRepairPayload['attemptedRepair'];
$usedShopRepair = $repairPayload['usedRepair'] || $directIdentityRepairPayload['usedRepair'];
@@ -3538,6 +3539,303 @@ final readonly class AgentRunner
return $this->containsAllShopQueryTokens($productText, $anchor);
}
/**
* @param ShopProductResult[] $shopResults
* @return ShopProductResult[]
*/
private function guardShopResultsByExactRequestedAccessoryCode(string $prompt, string $shopSearchQuery, array $shopResults): array
{
if ($shopResults === []) {
return $shopResults;
}
$requestedCodes = $this->extractExactRequestedAccessoryCodes($prompt, $shopSearchQuery);
if ($requestedCodes === []) {
return $shopResults;
}
$filtered = [];
foreach ($shopResults as $product) {
if (!$product instanceof ShopProductResult) {
continue;
}
if ($this->shopProductMatchesExactRequestedAccessoryCode($product, $requestedCodes)) {
$filtered[] = $product;
}
}
return $filtered !== [] ? $filtered : $shopResults;
}
/**
* @return string[]
*/
private function extractExactRequestedAccessoryCodes(string $prompt, string $shopSearchQuery): array
{
$text = $this->normalizeOneLine(trim($prompt . ' ' . $shopSearchQuery));
if ($text === '') {
return [];
}
$codeTerms = $this->agentRunnerConfig->getRequestedAccessoryCodeTerms();
if ($codeTerms === []) {
return [];
}
$tokens = $this->tokenizeAccessoryCodeContext($text);
if ($tokens === []) {
return [];
}
$termTokenSequences = [];
foreach ($codeTerms as $term) {
$termTokens = $this->tokenizeAccessoryCodeContext($term);
if ($termTokens !== []) {
$termTokenSequences[] = $termTokens;
}
}
if ($termTokenSequences === []) {
return [];
}
$codes = [];
foreach ($termTokenSequences as $termTokens) {
$termLength = count($termTokens);
foreach ($tokens as $position => $_token) {
if (!$this->tokenSequenceMatchesAt($tokens, $termTokens, $position)) {
continue;
}
$code = $this->findNearestRequestedAccessoryCodeAfter($tokens, $position + $termLength, 3, $termTokenSequences);
if ($code === '') {
$code = $this->findNearestRequestedAccessoryCodeBefore($tokens, $position - 1, 3, $termTokenSequences);
}
if ($code !== '') {
$codes[$code] = $code;
}
}
}
return array_values($codes);
}
/**
* @param string[] $tokens
* @param array<int, string[]> $termTokenSequences
*/
private function findNearestRequestedAccessoryCodeAfter(array $tokens, int $start, int $window, array $termTokenSequences): string
{
$end = min(count($tokens) - 1, $start + max(0, $window - 1));
for ($index = max(0, $start); $index <= $end; $index++) {
$code = $this->buildRequestedAccessoryCodeFromTokenWindow($tokens, $index, $termTokenSequences);
if ($code !== '') {
return $code;
}
}
return '';
}
/**
* @param string[] $tokens
* @param array<int, string[]> $termTokenSequences
*/
private function findNearestRequestedAccessoryCodeBefore(array $tokens, int $start, int $window, array $termTokenSequences): string
{
$end = max(0, $start - max(0, $window - 1));
for ($index = min(count($tokens) - 1, $start); $index >= $end; $index--) {
$code = $this->buildRequestedAccessoryCodeFromTokenWindow($tokens, $index, $termTokenSequences);
if ($code !== '') {
return $code;
}
}
return '';
}
/**
* @param string[] $tokens
* @param string[] $needle
*/
private function tokenSequenceMatchesAt(array $tokens, array $needle, int $position): bool
{
if ($needle === [] || $position < 0 || $position + count($needle) > count($tokens)) {
return false;
}
foreach ($needle as $offset => $needleToken) {
if (($tokens[$position + $offset] ?? null) !== $needleToken) {
return false;
}
}
return true;
}
/**
* @param string[] $tokens
* @param array<int, string[]> $termTokenSequences
*/
private function buildRequestedAccessoryCodeFromTokenWindow(array $tokens, int $index, array $termTokenSequences): string
{
$token = $tokens[$index] ?? '';
if (!$this->isStrictAccessoryCodeToken($token)) {
return '';
}
$next = $tokens[$index + 1] ?? '';
if ($this->isSingleLetterVariantSuffix($next) && !$this->tokenStartsAnyConfiguredTerm($tokens, $termTokenSequences, $index + 1)) {
return $this->normalizeAccessoryCodePhrase($token . ' ' . $next);
}
$previous = $tokens[$index - 1] ?? '';
if ($this->isShortAlphaCodePrefix($previous) && !$this->tokenStartsAnyConfiguredTerm($tokens, $termTokenSequences, $index - 1)) {
return $this->normalizeAccessoryCodePhrase($previous . ' ' . $token);
}
return $this->normalizeAccessoryCodePhrase($token);
}
/**
* @param string[] $tokens
* @param array<int, string[]> $termTokenSequences
*/
private function tokenStartsAnyConfiguredTerm(array $tokens, array $termTokenSequences, int $position): bool
{
foreach ($termTokenSequences as $termTokens) {
if ($this->tokenSequenceMatchesAt($tokens, $termTokens, $position)) {
return true;
}
}
return false;
}
/**
* @param string[] $requestedCodes
*/
private function shopProductMatchesExactRequestedAccessoryCode(ShopProductResult $product, array $requestedCodes): bool
{
$identityText = $this->normalizeOneLine(trim(implode(' ', array_filter([
$product->name,
$product->url,
]))));
if ($identityText === '') {
return false;
}
$tokens = $this->tokenizeAccessoryCodeContext($identityText);
if ($tokens === []) {
return false;
}
foreach ($requestedCodes as $code) {
if ($this->accessoryCodeTokensContainExactCode($tokens, $code)) {
return true;
}
}
return false;
}
/**
* @param string[] $tokens
*/
private function accessoryCodeTokensContainExactCode(array $tokens, string $requestedCode): bool
{
$codeTokens = $this->tokenizeAccessoryCodeContext($requestedCode);
if ($codeTokens === []) {
return false;
}
$compactCode = $this->normalizeAccessoryCodeForExactMatch($requestedCode);
$codeLength = count($codeTokens);
foreach ($tokens as $index => $token) {
if ($codeLength === 1 && $this->normalizeAccessoryCodeForExactMatch($token) === $compactCode) {
$next = $tokens[$index + 1] ?? '';
if (!$this->isSingleLetterVariantSuffix($next)) {
return true;
}
continue;
}
if ($this->tokenSequenceMatchesAt($tokens, $codeTokens, $index)) {
return true;
}
if ($this->normalizeAccessoryCodeForExactMatch(implode(' ', array_slice($tokens, $index, $codeLength))) === $compactCode) {
return true;
}
}
return false;
}
/**
* @return string[]
*/
private function tokenizeAccessoryCodeContext(string $text): array
{
$normalized = mb_strtolower($this->normalizeOneLine($text), 'UTF-8');
if ($normalized === '') {
return [];
}
preg_match_all('/[\p{L}]+\d[\p{L}\p{N}\-]*|\d+(?:[,.]\d+)?[\p{L}\p{N}\-]*|[\p{L}]+/u', $normalized, $matches);
return array_values(array_filter(
array_map(static fn(string $token): string => trim($token), $matches[0] ?? []),
static fn(string $token): bool => $token !== ''
));
}
private function isStrictAccessoryCodeToken(string $token): bool
{
$token = trim($token);
if ($token === '' || str_contains($token, ',') || str_contains($token, '.')) {
return false;
}
if (preg_match('/^\d+$/u', $token) === 1) {
return mb_strlen($token, 'UTF-8') >= 2;
}
foreach ($this->agentRunnerConfig->getShopQueryPositiveTokenFilterCodePatterns() as $pattern) {
if (@preg_match($pattern, $token) === 1) {
return true;
}
}
return preg_match('/^(?:[a-z]{1,4}\d{1,5}[a-z0-9-]*|\d{2,5}[a-z0-9-]*)$/iu', $token) === 1;
}
private function isSingleLetterVariantSuffix(string $token): bool
{
return preg_match('/^[a-z]$/iu', trim($token)) === 1;
}
private function isShortAlphaCodePrefix(string $token): bool
{
return preg_match('/^[a-z]{1,4}$/iu', trim($token)) === 1;
}
private function normalizeAccessoryCodePhrase(string $code): string
{
return $this->normalizeOneLine(mb_strtolower($code, 'UTF-8'));
}
private function normalizeAccessoryCodeForExactMatch(string $code): string
{
return preg_replace('/[^a-z0-9]+/iu', '', mb_strtolower($code, 'UTF-8')) ?? '';
}
/**
* @param ShopProductResult[] $shopResults
* @return ShopProductResult[]

View File

@@ -1379,6 +1379,15 @@ final class AgentRunnerConfig
)));
}
/**
* @return string[]
*/
public function getRequestedAccessoryCodeTerms(): array
{
return $this->genreStringList('search_repair.requested_accessory_code_terms.terms')
?: $this->genreStringList('product_roles.requested_accessory_code_terms.terms');
}
public function isDirectShopResultGuardEnabled(): bool
{
return $this->getRequiredBool('shop_runtime.result_identity.enabled');