new tokens

This commit is contained in:
team 1
2026-05-01 12:10:32 +02:00
parent f28deb9eff
commit 724afd4a26
8 changed files with 154 additions and 81 deletions

View File

@@ -8,6 +8,50 @@ parameters:
optimized_shop_query_prefix_pattern: '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu' optimized_shop_query_prefix_pattern: '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu'
optimized_shop_query_trim_characters: " \t\n\r\0\x0B\"'`" optimized_shop_query_trim_characters: " \t\n\r\0\x0B\"'`"
follow_up_context:
strong_reference_patterns:
- '/\bder\s+wert\b/u'
- '/\bdieser\s+wert\b/u'
- '/\bdiesen\s+wert\b/u'
- '/\bdem\s+wert\b/u'
- '/\bmit\s+welche(?:m|n|r)?\b/u'
- '/\bwomit\b/u'
- '/\bdamit\b/u'
- '/\bdafuer\b/u'
- '/\bdafür\b/u'
- '/\bdazu\b/u'
- '/\bdaraus\b/u'
- '/\bwelche(?:r|s|m|n)?\s+indikator\b/u'
- '/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u'
- '/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u'
- '/\bwelche(?:r|s|m|n)?\s+bereich\b/u'
- '/\bwelche(?:r|s|m|n)?\s+messbereich\b/u'
- '/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u'
explicit_commercial_signal_terms:
- shop
- preis
- preise
- kostet
- kosten
- kaufen
- bestellen
- warenkorb
- lieferzeit
- verfuegbar
- verfügbar
- lager
- url
- link
- artikelnummer
- sku
- produktnummer
history_question_pattern: '/^Question:\s*(.+)$/mi'
history_turn_split_pattern: '/(?=^Question:\s)/m'
history_question_strip_pattern: '/^Question:\s*.*(?:\R|$)/u'
reference_anchor:
testomat_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
hardness_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
messages: messages:
empty_prompt: '❌ Empty prompt.' empty_prompt: '❌ Empty prompt.'
analyze_request: 'Ich analysiere deine Anfrage...' analyze_request: 'Ich analysiere deine Anfrage...'

View File

@@ -121,6 +121,18 @@ parameters:
- passen - passen
- passend - passend
search_control_tokens:
- shop
- store
- produkt
- produkte
- artikel
- kaufen
- kaufe
- bestellen
- bestelle
- online
search_token_corrections: search_token_corrections:
siene: seine siene: seine
sienen: seinen sienen: seinen
@@ -220,6 +232,36 @@ parameters:
exact_token_removal_template: '/\b{token}\b/u' exact_token_removal_template: '/\b{token}\b/u'
brand_part_of_model_template: '/\b{brand}\s+\d{2,5}[a-z0-9\-]*\b/u' brand_part_of_model_template: '/\b{brand}\s+\d{2,5}[a-z0-9\-]*\b/u'
# Commerce reference resolver configuration.
# YAML is the only operative source of truth for conversation product and focus-term patterns.
retriex.commerce_reference_resolver.config:
conversation_product_patterns:
- '/\b(Testomat\s+2000\s+THCL)\b/ui'
- '/\b(Testomat\s+808)\b/ui'
- '/\b(Testomat\s+EVO\s+TH)\b/ui'
- '/\b(Testomat\s+EVO\s+CALC)\b/ui'
- '/\b(Testomat\s+ECO\s+PLUS)\b/ui'
- '/\b(Testomat\s+ECO\s+C)\b/ui'
- '/\b(Testomat\s+ECO)\b/ui'
- '/\b(Testomat\s+LAB\s+CL)\b/ui'
- '/\b(Testomat\s+LAB\s+MONO)\b/ui'
- '/\b(Testomat\s+2000)\b/ui'
focus_term_patterns:
indikator: '/\bindikator(?:en)?\b/u'
indikatoren: '/\bindikator(?:en)?\b/u'
reagenz: '/\breagenz(?:ien)?\b/u'
reagenzien: '/\breagenz(?:ien)?\b/u'
zubehör: '/\bzubeh[oö]r\b/u'
ersatzteil: '/\bersatzteile?\b/u'
ersatzteile: '/\bersatzteile?\b/u'
service-set: '/\bservice(?:\s|-)?set\b/u'
filter: '/\bfilter\b/u'
pumpenkopf: '/\bpumpenkopf\b/u'
motorblock: '/\bmotorblock\b/u'
mehrwertpaket: '/\bmehrwertpaket\b/u'
neotecmaster: '/\bneotecmaster\b/u'
# Shop matching and presentation configuration. # Shop matching and presentation configuration.
# YAML is the only operative source of truth; PHP must not contain shop matching defaults. # YAML is the only operative source of truth; PHP must not contain shop matching defaults.
retriex.shop_matching.config: retriex.shop_matching.config:

View File

@@ -198,6 +198,10 @@ services:
arguments: arguments:
$config: '%retriex.commerce_query.config%' $config: '%retriex.commerce_query.config%'
App\Config\CommerceReferenceResolverConfig:
arguments:
$config: '%retriex.commerce_reference_resolver.config%'
App\Commerce\CommerceQueryParser: ~ App\Commerce\CommerceQueryParser: ~
App\Config\SearchRepairConfig: App\Config\SearchRepairConfig:

View File

@@ -612,27 +612,7 @@ final readonly class AgentRunner
private function containsStrongFollowUpReference(string $normalized): bool private function containsStrongFollowUpReference(string $normalized): bool
{ {
$patterns = [ foreach ($this->agentRunnerConfig->getFollowUpStrongReferencePatterns() as $pattern) {
'/\bder\s+wert\b/u',
'/\bdieser\s+wert\b/u',
'/\bdiesen\s+wert\b/u',
'/\bdem\s+wert\b/u',
'/\bmit\s+welche(?:m|n|r)?\b/u',
'/\bwomit\b/u',
'/\bdamit\b/u',
'/\bdafuer\b/u',
'/\bdafür\b/u',
'/\bdazu\b/u',
'/\bdaraus\b/u',
'/\bwelche(?:r|s|m|n)?\s+indikator\b/u',
'/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u',
'/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u',
'/\bwelche(?:r|s|m|n)?\s+bereich\b/u',
'/\bwelche(?:r|s|m|n)?\s+messbereich\b/u',
'/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u',
];
foreach ($patterns as $pattern) {
if (preg_match($pattern, $normalized) === 1) { if (preg_match($pattern, $normalized) === 1) {
return true; return true;
} }
@@ -643,14 +623,8 @@ final readonly class AgentRunner
private function containsExplicitCommercialFollowUpSignal(string $normalized): bool private function containsExplicitCommercialFollowUpSignal(string $normalized): bool
{ {
$commercialSignals = [ foreach ($this->agentRunnerConfig->getFollowUpExplicitCommercialSignalTerms() as $signal) {
'shop', 'preis', 'preise', 'kostet', 'kosten', 'kaufen', 'bestellen', if (str_contains($normalized, mb_strtolower($signal, 'UTF-8'))) {
'warenkorb', 'lieferzeit', 'verfuegbar', 'verfügbar', 'lager', 'url',
'link', 'artikelnummer', 'sku', 'produktnummer',
];
foreach ($commercialSignals as $signal) {
if (str_contains($normalized, $signal)) {
return true; return true;
} }
} }
@@ -669,7 +643,7 @@ final readonly class AgentRunner
return []; return [];
} }
if (preg_match_all('/^Question:\s*(.+)$/mi', $history, $matches) !== 1) { if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) !== 1) {
return []; return [];
} }
@@ -708,7 +682,7 @@ final readonly class AgentRunner
return []; return [];
} }
$answer = preg_replace('/^Question:\s*.*(?:\R|$)/u', '', $turn, 1) ?? ''; $answer = preg_replace($this->agentRunnerConfig->getFollowUpHistoryQuestionStripPattern(), '', $turn, 1) ?? '';
$answer = trim($answer); $answer = trim($answer);
if ($answer === '') { if ($answer === '') {
@@ -738,7 +712,7 @@ final readonly class AgentRunner
return ''; return '';
} }
$parts = preg_split('/(?=^Question:\s)/m', $history); $parts = preg_split($this->agentRunnerConfig->getFollowUpHistoryTurnSplitPattern(), $history);
if ($parts === false || $parts === []) { if ($parts === false || $parts === []) {
return ''; return '';
@@ -758,11 +732,7 @@ final readonly class AgentRunner
private function extractFirstTestomatModelAnchor(string $text): string private function extractFirstTestomatModelAnchor(string $text): string
{ {
$pattern = '/\bTestomat(?:®)?\s+' if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorTestomatModelPattern(), $text, $matches) !== 1) {
. '(?:\d{3,4}|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)'
. '\b/iu';
if (preg_match($pattern, $text, $matches) !== 1) {
return ''; return '';
} }
@@ -774,7 +744,7 @@ final readonly class AgentRunner
private function extractFirstHardnessValueAnchor(string $text): string private function extractFirstHardnessValueAnchor(string $text): string
{ {
if (preg_match('/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu', $text, $matches) !== 1) { if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorHardnessValuePattern(), $text, $matches) !== 1) {
return ''; return '';
} }

View File

@@ -512,18 +512,7 @@ final readonly class CommerceQueryParser
return true; return true;
} }
return in_array($token, [ return in_array($token, $this->config->getSearchControlTokens(), true);
'shop',
'store',
'produkt',
'produkte',
'artikel',
'kaufen',
'kaufe',
'bestellen',
'bestelle',
'online',
], true);
} }
private function isDirectProductQuery(string $prompt): bool private function isDirectProductQuery(string $prompt): bool

View File

@@ -5,9 +5,15 @@ declare(strict_types=1);
namespace App\Commerce; namespace App\Commerce;
use App\Commerce\Dto\CommerceReferenceContext; use App\Commerce\Dto\CommerceReferenceContext;
use App\Config\CommerceReferenceResolverConfig;
final readonly class CommerceReferenceResolver final readonly class CommerceReferenceResolver
{ {
public function __construct(
private CommerceReferenceResolverConfig $config,
) {
}
/** /**
* @param array<int, array<string, mixed>> $shopResults * @param array<int, array<string, mixed>> $shopResults
*/ */
@@ -84,20 +90,7 @@ final readonly class CommerceReferenceResolver
return null; return null;
} }
$patterns = [ foreach ($this->config->getConversationProductPatterns() as $pattern) {
'/\b(Testomat\s+2000\s+THCL)\b/ui',
'/\b(Testomat\s+808)\b/ui',
'/\b(Testomat\s+EVO\s+TH)\b/ui',
'/\b(Testomat\s+EVO\s+CALC)\b/ui',
'/\b(Testomat\s+ECO\s+PLUS)\b/ui',
'/\b(Testomat\s+ECO\s+C)\b/ui',
'/\b(Testomat\s+ECO)\b/ui',
'/\b(Testomat\s+LAB\s+CL)\b/ui',
'/\b(Testomat\s+LAB\s+MONO)\b/ui',
'/\b(Testomat\s+2000)\b/ui',
];
foreach ($patterns as $pattern) {
if (!preg_match($pattern, $text, $matches)) { if (!preg_match($pattern, $text, $matches)) {
continue; continue;
} }
@@ -135,25 +128,9 @@ final readonly class CommerceReferenceResolver
return []; return [];
} }
$patterns = [
'indikator' => '/\bindikator(?:en)?\b/u',
'indikatoren' => '/\bindikator(?:en)?\b/u',
'reagenz' => '/\breagenz(?:ien)?\b/u',
'reagenzien' => '/\breagenz(?:ien)?\b/u',
'zubehör' => '/\bzubeh[oö]r\b/u',
'ersatzteil' => '/\bersatzteile?\b/u',
'ersatzteile' => '/\bersatzteile?\b/u',
'service-set' => '/\bservice(?:\s|-)?set\b/u',
'filter' => '/\bfilter\b/u',
'pumpenkopf' => '/\bpumpenkopf\b/u',
'motorblock' => '/\bmotorblock\b/u',
'mehrwertpaket' => '/\bmehrwertpaket\b/u',
'neotecmaster' => '/\bneotecmaster\b/u',
];
$terms = []; $terms = [];
foreach ($patterns as $canonical => $pattern) { foreach ($this->config->getFocusTermPatterns() as $canonical => $pattern) {
if (preg_match($pattern, $normalized) === 1) { if (preg_match($pattern, $normalized) === 1) {
$terms[] = $canonical; $terms[] = $canonical;
} }

View File

@@ -39,6 +39,47 @@ final class AgentRunnerConfig
return $this->getRequiredString('optimized_shop_query_trim_characters'); return $this->getRequiredString('optimized_shop_query_trim_characters');
} }
/**
* @return string[]
*/
public function getFollowUpStrongReferencePatterns(): array
{
return $this->getRequiredStringList('follow_up_context.strong_reference_patterns');
}
/**
* @return string[]
*/
public function getFollowUpExplicitCommercialSignalTerms(): array
{
return $this->getRequiredStringList('follow_up_context.explicit_commercial_signal_terms');
}
public function getFollowUpHistoryQuestionPattern(): string
{
return $this->getRequiredString('follow_up_context.history_question_pattern');
}
public function getFollowUpHistoryTurnSplitPattern(): string
{
return $this->getRequiredString('follow_up_context.history_turn_split_pattern');
}
public function getFollowUpHistoryQuestionStripPattern(): string
{
return $this->getRequiredString('follow_up_context.history_question_strip_pattern');
}
public function getFollowUpReferenceAnchorTestomatModelPattern(): string
{
return $this->getRequiredString('follow_up_context.reference_anchor.testomat_model_pattern');
}
public function getFollowUpReferenceAnchorHardnessValuePattern(): string
{
return $this->getRequiredString('follow_up_context.reference_anchor.hardness_value_pattern');
}
private function getRequiredInt(string $key): int private function getRequiredInt(string $key): int
{ {
$value = $this->requiredValue($key); $value = $this->requiredValue($key);

View File

@@ -48,6 +48,12 @@ final class CommerceQueryParserConfig
return $this->stringList('filter_search_tokens'); return $this->stringList('filter_search_tokens');
} }
/** @return string[] */
public function getSearchControlTokens(): array
{
return $this->stringList('search_control_tokens');
}
/** @return string[] */ /** @return string[] */
private function whitespacePreservingStringList(string $path): array private function whitespacePreservingStringList(string $path): array
{ {