From 724afd4a263ea8eab250a78fcde172945eeab039 Mon Sep 17 00:00:00 2001 From: team 1 Date: Fri, 1 May 2026 12:10:32 +0200 Subject: [PATCH] new tokens --- config/retriex/agent.yaml | 44 +++++++++++++++++++++ config/retriex/commerce.yaml | 42 ++++++++++++++++++++ config/services.yaml | 4 ++ src/Agent/AgentRunner.php | 46 ++++------------------ src/Commerce/CommerceQueryParser.php | 13 +----- src/Commerce/CommerceReferenceResolver.php | 39 ++++-------------- src/Config/AgentRunnerConfig.php | 41 +++++++++++++++++++ src/Config/CommerceQueryParserConfig.php | 6 +++ 8 files changed, 154 insertions(+), 81 deletions(-) diff --git a/config/retriex/agent.yaml b/config/retriex/agent.yaml index 82bb7f5..ab7acf7 100644 --- a/config/retriex/agent.yaml +++ b/config/retriex/agent.yaml @@ -8,6 +8,50 @@ parameters: optimized_shop_query_prefix_pattern: '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu' optimized_shop_query_trim_characters: " \t\n\r\0\x0B\"'`" + follow_up_context: + strong_reference_patterns: + - '/\bder\s+wert\b/u' + - '/\bdieser\s+wert\b/u' + - '/\bdiesen\s+wert\b/u' + - '/\bdem\s+wert\b/u' + - '/\bmit\s+welche(?:m|n|r)?\b/u' + - '/\bwomit\b/u' + - '/\bdamit\b/u' + - '/\bdafuer\b/u' + - '/\bdafür\b/u' + - '/\bdazu\b/u' + - '/\bdaraus\b/u' + - '/\bwelche(?:r|s|m|n)?\s+indikator\b/u' + - '/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u' + - '/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u' + - '/\bwelche(?:r|s|m|n)?\s+bereich\b/u' + - '/\bwelche(?:r|s|m|n)?\s+messbereich\b/u' + - '/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u' + explicit_commercial_signal_terms: + - shop + - preis + - preise + - kostet + - kosten + - kaufen + - bestellen + - warenkorb + - lieferzeit + - verfuegbar + - verfügbar + - lager + - url + - link + - artikelnummer + - sku + - produktnummer + history_question_pattern: '/^Question:\s*(.+)$/mi' + history_turn_split_pattern: '/(?=^Question:\s)/m' + history_question_strip_pattern: '/^Question:\s*.*(?:\R|$)/u' + reference_anchor: + testomat_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu' + hardness_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu' + messages: empty_prompt: '❌ Empty prompt.' analyze_request: 'Ich analysiere deine Anfrage...' diff --git a/config/retriex/commerce.yaml b/config/retriex/commerce.yaml index 558678c..2cd6a83 100644 --- a/config/retriex/commerce.yaml +++ b/config/retriex/commerce.yaml @@ -121,6 +121,18 @@ parameters: - passen - passend + search_control_tokens: + - shop + - store + - produkt + - produkte + - artikel + - kaufen + - kaufe + - bestellen + - bestelle + - online + search_token_corrections: siene: seine sienen: seinen @@ -220,6 +232,36 @@ parameters: exact_token_removal_template: '/\b{token}\b/u' brand_part_of_model_template: '/\b{brand}\s+\d{2,5}[a-z0-9\-]*\b/u' + # Commerce reference resolver configuration. + # YAML is the only operative source of truth for conversation product and focus-term patterns. + retriex.commerce_reference_resolver.config: + conversation_product_patterns: + - '/\b(Testomat\s+2000\s+THCL)\b/ui' + - '/\b(Testomat\s+808)\b/ui' + - '/\b(Testomat\s+EVO\s+TH)\b/ui' + - '/\b(Testomat\s+EVO\s+CALC)\b/ui' + - '/\b(Testomat\s+ECO\s+PLUS)\b/ui' + - '/\b(Testomat\s+ECO\s+C)\b/ui' + - '/\b(Testomat\s+ECO)\b/ui' + - '/\b(Testomat\s+LAB\s+CL)\b/ui' + - '/\b(Testomat\s+LAB\s+MONO)\b/ui' + - '/\b(Testomat\s+2000)\b/ui' + + focus_term_patterns: + indikator: '/\bindikator(?:en)?\b/u' + indikatoren: '/\bindikator(?:en)?\b/u' + reagenz: '/\breagenz(?:ien)?\b/u' + reagenzien: '/\breagenz(?:ien)?\b/u' + zubehör: '/\bzubeh[oö]r\b/u' + ersatzteil: '/\bersatzteile?\b/u' + ersatzteile: '/\bersatzteile?\b/u' + service-set: '/\bservice(?:\s|-)?set\b/u' + filter: '/\bfilter\b/u' + pumpenkopf: '/\bpumpenkopf\b/u' + motorblock: '/\bmotorblock\b/u' + mehrwertpaket: '/\bmehrwertpaket\b/u' + neotecmaster: '/\bneotecmaster\b/u' + # Shop matching and presentation configuration. # YAML is the only operative source of truth; PHP must not contain shop matching defaults. retriex.shop_matching.config: diff --git a/config/services.yaml b/config/services.yaml index 71b0ab8..ef8fc4e 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -198,6 +198,10 @@ services: arguments: $config: '%retriex.commerce_query.config%' + App\Config\CommerceReferenceResolverConfig: + arguments: + $config: '%retriex.commerce_reference_resolver.config%' + App\Commerce\CommerceQueryParser: ~ App\Config\SearchRepairConfig: diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index ee1db3f..0724db3 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -612,27 +612,7 @@ final readonly class AgentRunner private function containsStrongFollowUpReference(string $normalized): bool { - $patterns = [ - '/\bder\s+wert\b/u', - '/\bdieser\s+wert\b/u', - '/\bdiesen\s+wert\b/u', - '/\bdem\s+wert\b/u', - '/\bmit\s+welche(?:m|n|r)?\b/u', - '/\bwomit\b/u', - '/\bdamit\b/u', - '/\bdafuer\b/u', - '/\bdafür\b/u', - '/\bdazu\b/u', - '/\bdaraus\b/u', - '/\bwelche(?:r|s|m|n)?\s+indikator\b/u', - '/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u', - '/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u', - '/\bwelche(?:r|s|m|n)?\s+bereich\b/u', - '/\bwelche(?:r|s|m|n)?\s+messbereich\b/u', - '/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u', - ]; - - foreach ($patterns as $pattern) { + foreach ($this->agentRunnerConfig->getFollowUpStrongReferencePatterns() as $pattern) { if (preg_match($pattern, $normalized) === 1) { return true; } @@ -643,14 +623,8 @@ final readonly class AgentRunner private function containsExplicitCommercialFollowUpSignal(string $normalized): bool { - $commercialSignals = [ - 'shop', 'preis', 'preise', 'kostet', 'kosten', 'kaufen', 'bestellen', - 'warenkorb', 'lieferzeit', 'verfuegbar', 'verfügbar', 'lager', 'url', - 'link', 'artikelnummer', 'sku', 'produktnummer', - ]; - - foreach ($commercialSignals as $signal) { - if (str_contains($normalized, $signal)) { + foreach ($this->agentRunnerConfig->getFollowUpExplicitCommercialSignalTerms() as $signal) { + if (str_contains($normalized, mb_strtolower($signal, 'UTF-8'))) { return true; } } @@ -669,7 +643,7 @@ final readonly class AgentRunner return []; } - if (preg_match_all('/^Question:\s*(.+)$/mi', $history, $matches) !== 1) { + if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) !== 1) { return []; } @@ -708,7 +682,7 @@ final readonly class AgentRunner return []; } - $answer = preg_replace('/^Question:\s*.*(?:\R|$)/u', '', $turn, 1) ?? ''; + $answer = preg_replace($this->agentRunnerConfig->getFollowUpHistoryQuestionStripPattern(), '', $turn, 1) ?? ''; $answer = trim($answer); if ($answer === '') { @@ -738,7 +712,7 @@ final readonly class AgentRunner return ''; } - $parts = preg_split('/(?=^Question:\s)/m', $history); + $parts = preg_split($this->agentRunnerConfig->getFollowUpHistoryTurnSplitPattern(), $history); if ($parts === false || $parts === []) { return ''; @@ -758,11 +732,7 @@ final readonly class AgentRunner private function extractFirstTestomatModelAnchor(string $text): string { - $pattern = '/\bTestomat(?:®)?\s+' - . '(?:\d{3,4}|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)' - . '\b/iu'; - - if (preg_match($pattern, $text, $matches) !== 1) { + if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorTestomatModelPattern(), $text, $matches) !== 1) { return ''; } @@ -774,7 +744,7 @@ final readonly class AgentRunner private function extractFirstHardnessValueAnchor(string $text): string { - if (preg_match('/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu', $text, $matches) !== 1) { + if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorHardnessValuePattern(), $text, $matches) !== 1) { return ''; } diff --git a/src/Commerce/CommerceQueryParser.php b/src/Commerce/CommerceQueryParser.php index d620afe..42da1a8 100644 --- a/src/Commerce/CommerceQueryParser.php +++ b/src/Commerce/CommerceQueryParser.php @@ -512,18 +512,7 @@ final readonly class CommerceQueryParser return true; } - return in_array($token, [ - 'shop', - 'store', - 'produkt', - 'produkte', - 'artikel', - 'kaufen', - 'kaufe', - 'bestellen', - 'bestelle', - 'online', - ], true); + return in_array($token, $this->config->getSearchControlTokens(), true); } private function isDirectProductQuery(string $prompt): bool diff --git a/src/Commerce/CommerceReferenceResolver.php b/src/Commerce/CommerceReferenceResolver.php index db163c9..d01d6f5 100644 --- a/src/Commerce/CommerceReferenceResolver.php +++ b/src/Commerce/CommerceReferenceResolver.php @@ -5,9 +5,15 @@ declare(strict_types=1); namespace App\Commerce; use App\Commerce\Dto\CommerceReferenceContext; +use App\Config\CommerceReferenceResolverConfig; final readonly class CommerceReferenceResolver { + public function __construct( + private CommerceReferenceResolverConfig $config, + ) { + } + /** * @param array> $shopResults */ @@ -84,20 +90,7 @@ final readonly class CommerceReferenceResolver return null; } - $patterns = [ - '/\b(Testomat\s+2000\s+THCL)\b/ui', - '/\b(Testomat\s+808)\b/ui', - '/\b(Testomat\s+EVO\s+TH)\b/ui', - '/\b(Testomat\s+EVO\s+CALC)\b/ui', - '/\b(Testomat\s+ECO\s+PLUS)\b/ui', - '/\b(Testomat\s+ECO\s+C)\b/ui', - '/\b(Testomat\s+ECO)\b/ui', - '/\b(Testomat\s+LAB\s+CL)\b/ui', - '/\b(Testomat\s+LAB\s+MONO)\b/ui', - '/\b(Testomat\s+2000)\b/ui', - ]; - - foreach ($patterns as $pattern) { + foreach ($this->config->getConversationProductPatterns() as $pattern) { if (!preg_match($pattern, $text, $matches)) { continue; } @@ -135,25 +128,9 @@ final readonly class CommerceReferenceResolver return []; } - $patterns = [ - 'indikator' => '/\bindikator(?:en)?\b/u', - 'indikatoren' => '/\bindikator(?:en)?\b/u', - 'reagenz' => '/\breagenz(?:ien)?\b/u', - 'reagenzien' => '/\breagenz(?:ien)?\b/u', - 'zubehör' => '/\bzubeh[oö]r\b/u', - 'ersatzteil' => '/\bersatzteile?\b/u', - 'ersatzteile' => '/\bersatzteile?\b/u', - 'service-set' => '/\bservice(?:\s|-)?set\b/u', - 'filter' => '/\bfilter\b/u', - 'pumpenkopf' => '/\bpumpenkopf\b/u', - 'motorblock' => '/\bmotorblock\b/u', - 'mehrwertpaket' => '/\bmehrwertpaket\b/u', - 'neotecmaster' => '/\bneotecmaster\b/u', - ]; - $terms = []; - foreach ($patterns as $canonical => $pattern) { + foreach ($this->config->getFocusTermPatterns() as $canonical => $pattern) { if (preg_match($pattern, $normalized) === 1) { $terms[] = $canonical; } diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index 9626cb6..4cba91e 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -39,6 +39,47 @@ final class AgentRunnerConfig return $this->getRequiredString('optimized_shop_query_trim_characters'); } + /** + * @return string[] + */ + public function getFollowUpStrongReferencePatterns(): array + { + return $this->getRequiredStringList('follow_up_context.strong_reference_patterns'); + } + + /** + * @return string[] + */ + public function getFollowUpExplicitCommercialSignalTerms(): array + { + return $this->getRequiredStringList('follow_up_context.explicit_commercial_signal_terms'); + } + + public function getFollowUpHistoryQuestionPattern(): string + { + return $this->getRequiredString('follow_up_context.history_question_pattern'); + } + + public function getFollowUpHistoryTurnSplitPattern(): string + { + return $this->getRequiredString('follow_up_context.history_turn_split_pattern'); + } + + public function getFollowUpHistoryQuestionStripPattern(): string + { + return $this->getRequiredString('follow_up_context.history_question_strip_pattern'); + } + + public function getFollowUpReferenceAnchorTestomatModelPattern(): string + { + return $this->getRequiredString('follow_up_context.reference_anchor.testomat_model_pattern'); + } + + public function getFollowUpReferenceAnchorHardnessValuePattern(): string + { + return $this->getRequiredString('follow_up_context.reference_anchor.hardness_value_pattern'); + } + private function getRequiredInt(string $key): int { $value = $this->requiredValue($key); diff --git a/src/Config/CommerceQueryParserConfig.php b/src/Config/CommerceQueryParserConfig.php index 6e17bb6..e3d03dc 100644 --- a/src/Config/CommerceQueryParserConfig.php +++ b/src/Config/CommerceQueryParserConfig.php @@ -48,6 +48,12 @@ final class CommerceQueryParserConfig return $this->stringList('filter_search_tokens'); } + /** @return string[] */ + public function getSearchControlTokens(): array + { + return $this->stringList('search_control_tokens'); + } + /** @return string[] */ private function whitespacePreservingStringList(string $path): array {