new tokens
This commit is contained in:
@@ -8,6 +8,50 @@ parameters:
|
|||||||
optimized_shop_query_prefix_pattern: '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu'
|
optimized_shop_query_prefix_pattern: '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu'
|
||||||
optimized_shop_query_trim_characters: " \t\n\r\0\x0B\"'`"
|
optimized_shop_query_trim_characters: " \t\n\r\0\x0B\"'`"
|
||||||
|
|
||||||
|
follow_up_context:
|
||||||
|
strong_reference_patterns:
|
||||||
|
- '/\bder\s+wert\b/u'
|
||||||
|
- '/\bdieser\s+wert\b/u'
|
||||||
|
- '/\bdiesen\s+wert\b/u'
|
||||||
|
- '/\bdem\s+wert\b/u'
|
||||||
|
- '/\bmit\s+welche(?:m|n|r)?\b/u'
|
||||||
|
- '/\bwomit\b/u'
|
||||||
|
- '/\bdamit\b/u'
|
||||||
|
- '/\bdafuer\b/u'
|
||||||
|
- '/\bdafür\b/u'
|
||||||
|
- '/\bdazu\b/u'
|
||||||
|
- '/\bdaraus\b/u'
|
||||||
|
- '/\bwelche(?:r|s|m|n)?\s+indikator\b/u'
|
||||||
|
- '/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u'
|
||||||
|
- '/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u'
|
||||||
|
- '/\bwelche(?:r|s|m|n)?\s+bereich\b/u'
|
||||||
|
- '/\bwelche(?:r|s|m|n)?\s+messbereich\b/u'
|
||||||
|
- '/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u'
|
||||||
|
explicit_commercial_signal_terms:
|
||||||
|
- shop
|
||||||
|
- preis
|
||||||
|
- preise
|
||||||
|
- kostet
|
||||||
|
- kosten
|
||||||
|
- kaufen
|
||||||
|
- bestellen
|
||||||
|
- warenkorb
|
||||||
|
- lieferzeit
|
||||||
|
- verfuegbar
|
||||||
|
- verfügbar
|
||||||
|
- lager
|
||||||
|
- url
|
||||||
|
- link
|
||||||
|
- artikelnummer
|
||||||
|
- sku
|
||||||
|
- produktnummer
|
||||||
|
history_question_pattern: '/^Question:\s*(.+)$/mi'
|
||||||
|
history_turn_split_pattern: '/(?=^Question:\s)/m'
|
||||||
|
history_question_strip_pattern: '/^Question:\s*.*(?:\R|$)/u'
|
||||||
|
reference_anchor:
|
||||||
|
testomat_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
|
||||||
|
hardness_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
|
||||||
|
|
||||||
messages:
|
messages:
|
||||||
empty_prompt: '❌ Empty prompt.'
|
empty_prompt: '❌ Empty prompt.'
|
||||||
analyze_request: 'Ich analysiere deine Anfrage...'
|
analyze_request: 'Ich analysiere deine Anfrage...'
|
||||||
|
|||||||
@@ -121,6 +121,18 @@ parameters:
|
|||||||
- passen
|
- passen
|
||||||
- passend
|
- passend
|
||||||
|
|
||||||
|
search_control_tokens:
|
||||||
|
- shop
|
||||||
|
- store
|
||||||
|
- produkt
|
||||||
|
- produkte
|
||||||
|
- artikel
|
||||||
|
- kaufen
|
||||||
|
- kaufe
|
||||||
|
- bestellen
|
||||||
|
- bestelle
|
||||||
|
- online
|
||||||
|
|
||||||
search_token_corrections:
|
search_token_corrections:
|
||||||
siene: seine
|
siene: seine
|
||||||
sienen: seinen
|
sienen: seinen
|
||||||
@@ -220,6 +232,36 @@ parameters:
|
|||||||
exact_token_removal_template: '/\b{token}\b/u'
|
exact_token_removal_template: '/\b{token}\b/u'
|
||||||
brand_part_of_model_template: '/\b{brand}\s+\d{2,5}[a-z0-9\-]*\b/u'
|
brand_part_of_model_template: '/\b{brand}\s+\d{2,5}[a-z0-9\-]*\b/u'
|
||||||
|
|
||||||
|
# Commerce reference resolver configuration.
|
||||||
|
# YAML is the only operative source of truth for conversation product and focus-term patterns.
|
||||||
|
retriex.commerce_reference_resolver.config:
|
||||||
|
conversation_product_patterns:
|
||||||
|
- '/\b(Testomat\s+2000\s+THCL)\b/ui'
|
||||||
|
- '/\b(Testomat\s+808)\b/ui'
|
||||||
|
- '/\b(Testomat\s+EVO\s+TH)\b/ui'
|
||||||
|
- '/\b(Testomat\s+EVO\s+CALC)\b/ui'
|
||||||
|
- '/\b(Testomat\s+ECO\s+PLUS)\b/ui'
|
||||||
|
- '/\b(Testomat\s+ECO\s+C)\b/ui'
|
||||||
|
- '/\b(Testomat\s+ECO)\b/ui'
|
||||||
|
- '/\b(Testomat\s+LAB\s+CL)\b/ui'
|
||||||
|
- '/\b(Testomat\s+LAB\s+MONO)\b/ui'
|
||||||
|
- '/\b(Testomat\s+2000)\b/ui'
|
||||||
|
|
||||||
|
focus_term_patterns:
|
||||||
|
indikator: '/\bindikator(?:en)?\b/u'
|
||||||
|
indikatoren: '/\bindikator(?:en)?\b/u'
|
||||||
|
reagenz: '/\breagenz(?:ien)?\b/u'
|
||||||
|
reagenzien: '/\breagenz(?:ien)?\b/u'
|
||||||
|
zubehör: '/\bzubeh[oö]r\b/u'
|
||||||
|
ersatzteil: '/\bersatzteile?\b/u'
|
||||||
|
ersatzteile: '/\bersatzteile?\b/u'
|
||||||
|
service-set: '/\bservice(?:\s|-)?set\b/u'
|
||||||
|
filter: '/\bfilter\b/u'
|
||||||
|
pumpenkopf: '/\bpumpenkopf\b/u'
|
||||||
|
motorblock: '/\bmotorblock\b/u'
|
||||||
|
mehrwertpaket: '/\bmehrwertpaket\b/u'
|
||||||
|
neotecmaster: '/\bneotecmaster\b/u'
|
||||||
|
|
||||||
# Shop matching and presentation configuration.
|
# Shop matching and presentation configuration.
|
||||||
# YAML is the only operative source of truth; PHP must not contain shop matching defaults.
|
# YAML is the only operative source of truth; PHP must not contain shop matching defaults.
|
||||||
retriex.shop_matching.config:
|
retriex.shop_matching.config:
|
||||||
|
|||||||
@@ -198,6 +198,10 @@ services:
|
|||||||
arguments:
|
arguments:
|
||||||
$config: '%retriex.commerce_query.config%'
|
$config: '%retriex.commerce_query.config%'
|
||||||
|
|
||||||
|
App\Config\CommerceReferenceResolverConfig:
|
||||||
|
arguments:
|
||||||
|
$config: '%retriex.commerce_reference_resolver.config%'
|
||||||
|
|
||||||
App\Commerce\CommerceQueryParser: ~
|
App\Commerce\CommerceQueryParser: ~
|
||||||
|
|
||||||
App\Config\SearchRepairConfig:
|
App\Config\SearchRepairConfig:
|
||||||
|
|||||||
@@ -612,27 +612,7 @@ final readonly class AgentRunner
|
|||||||
|
|
||||||
private function containsStrongFollowUpReference(string $normalized): bool
|
private function containsStrongFollowUpReference(string $normalized): bool
|
||||||
{
|
{
|
||||||
$patterns = [
|
foreach ($this->agentRunnerConfig->getFollowUpStrongReferencePatterns() as $pattern) {
|
||||||
'/\bder\s+wert\b/u',
|
|
||||||
'/\bdieser\s+wert\b/u',
|
|
||||||
'/\bdiesen\s+wert\b/u',
|
|
||||||
'/\bdem\s+wert\b/u',
|
|
||||||
'/\bmit\s+welche(?:m|n|r)?\b/u',
|
|
||||||
'/\bwomit\b/u',
|
|
||||||
'/\bdamit\b/u',
|
|
||||||
'/\bdafuer\b/u',
|
|
||||||
'/\bdafür\b/u',
|
|
||||||
'/\bdazu\b/u',
|
|
||||||
'/\bdaraus\b/u',
|
|
||||||
'/\bwelche(?:r|s|m|n)?\s+indikator\b/u',
|
|
||||||
'/\bwelche(?:r|s|m|n)?\s+indikatortyp\b/u',
|
|
||||||
'/\bindikator\s+(?:dafuer|dafür|dazu|hierfuer|hierfür)\b/u',
|
|
||||||
'/\bwelche(?:r|s|m|n)?\s+bereich\b/u',
|
|
||||||
'/\bwelche(?:r|s|m|n)?\s+messbereich\b/u',
|
|
||||||
'/\bwelche(?:r|s|m|n)?\s+grenzwert\b/u',
|
|
||||||
];
|
|
||||||
|
|
||||||
foreach ($patterns as $pattern) {
|
|
||||||
if (preg_match($pattern, $normalized) === 1) {
|
if (preg_match($pattern, $normalized) === 1) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -643,14 +623,8 @@ final readonly class AgentRunner
|
|||||||
|
|
||||||
private function containsExplicitCommercialFollowUpSignal(string $normalized): bool
|
private function containsExplicitCommercialFollowUpSignal(string $normalized): bool
|
||||||
{
|
{
|
||||||
$commercialSignals = [
|
foreach ($this->agentRunnerConfig->getFollowUpExplicitCommercialSignalTerms() as $signal) {
|
||||||
'shop', 'preis', 'preise', 'kostet', 'kosten', 'kaufen', 'bestellen',
|
if (str_contains($normalized, mb_strtolower($signal, 'UTF-8'))) {
|
||||||
'warenkorb', 'lieferzeit', 'verfuegbar', 'verfügbar', 'lager', 'url',
|
|
||||||
'link', 'artikelnummer', 'sku', 'produktnummer',
|
|
||||||
];
|
|
||||||
|
|
||||||
foreach ($commercialSignals as $signal) {
|
|
||||||
if (str_contains($normalized, $signal)) {
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -669,7 +643,7 @@ final readonly class AgentRunner
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (preg_match_all('/^Question:\s*(.+)$/mi', $history, $matches) !== 1) {
|
if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) !== 1) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -708,7 +682,7 @@ final readonly class AgentRunner
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
$answer = preg_replace('/^Question:\s*.*(?:\R|$)/u', '', $turn, 1) ?? '';
|
$answer = preg_replace($this->agentRunnerConfig->getFollowUpHistoryQuestionStripPattern(), '', $turn, 1) ?? '';
|
||||||
$answer = trim($answer);
|
$answer = trim($answer);
|
||||||
|
|
||||||
if ($answer === '') {
|
if ($answer === '') {
|
||||||
@@ -738,7 +712,7 @@ final readonly class AgentRunner
|
|||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
$parts = preg_split('/(?=^Question:\s)/m', $history);
|
$parts = preg_split($this->agentRunnerConfig->getFollowUpHistoryTurnSplitPattern(), $history);
|
||||||
|
|
||||||
if ($parts === false || $parts === []) {
|
if ($parts === false || $parts === []) {
|
||||||
return '';
|
return '';
|
||||||
@@ -758,11 +732,7 @@ final readonly class AgentRunner
|
|||||||
|
|
||||||
private function extractFirstTestomatModelAnchor(string $text): string
|
private function extractFirstTestomatModelAnchor(string $text): string
|
||||||
{
|
{
|
||||||
$pattern = '/\bTestomat(?:®)?\s+'
|
if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorTestomatModelPattern(), $text, $matches) !== 1) {
|
||||||
. '(?:\d{3,4}|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)'
|
|
||||||
. '\b/iu';
|
|
||||||
|
|
||||||
if (preg_match($pattern, $text, $matches) !== 1) {
|
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -774,7 +744,7 @@ final readonly class AgentRunner
|
|||||||
|
|
||||||
private function extractFirstHardnessValueAnchor(string $text): string
|
private function extractFirstHardnessValueAnchor(string $text): string
|
||||||
{
|
{
|
||||||
if (preg_match('/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu', $text, $matches) !== 1) {
|
if (preg_match($this->agentRunnerConfig->getFollowUpReferenceAnchorHardnessValuePattern(), $text, $matches) !== 1) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -512,18 +512,7 @@ final readonly class CommerceQueryParser
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return in_array($token, [
|
return in_array($token, $this->config->getSearchControlTokens(), true);
|
||||||
'shop',
|
|
||||||
'store',
|
|
||||||
'produkt',
|
|
||||||
'produkte',
|
|
||||||
'artikel',
|
|
||||||
'kaufen',
|
|
||||||
'kaufe',
|
|
||||||
'bestellen',
|
|
||||||
'bestelle',
|
|
||||||
'online',
|
|
||||||
], true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function isDirectProductQuery(string $prompt): bool
|
private function isDirectProductQuery(string $prompt): bool
|
||||||
|
|||||||
@@ -5,9 +5,15 @@ declare(strict_types=1);
|
|||||||
namespace App\Commerce;
|
namespace App\Commerce;
|
||||||
|
|
||||||
use App\Commerce\Dto\CommerceReferenceContext;
|
use App\Commerce\Dto\CommerceReferenceContext;
|
||||||
|
use App\Config\CommerceReferenceResolverConfig;
|
||||||
|
|
||||||
final readonly class CommerceReferenceResolver
|
final readonly class CommerceReferenceResolver
|
||||||
{
|
{
|
||||||
|
public function __construct(
|
||||||
|
private CommerceReferenceResolverConfig $config,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param array<int, array<string, mixed>> $shopResults
|
* @param array<int, array<string, mixed>> $shopResults
|
||||||
*/
|
*/
|
||||||
@@ -84,20 +90,7 @@ final readonly class CommerceReferenceResolver
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
$patterns = [
|
foreach ($this->config->getConversationProductPatterns() as $pattern) {
|
||||||
'/\b(Testomat\s+2000\s+THCL)\b/ui',
|
|
||||||
'/\b(Testomat\s+808)\b/ui',
|
|
||||||
'/\b(Testomat\s+EVO\s+TH)\b/ui',
|
|
||||||
'/\b(Testomat\s+EVO\s+CALC)\b/ui',
|
|
||||||
'/\b(Testomat\s+ECO\s+PLUS)\b/ui',
|
|
||||||
'/\b(Testomat\s+ECO\s+C)\b/ui',
|
|
||||||
'/\b(Testomat\s+ECO)\b/ui',
|
|
||||||
'/\b(Testomat\s+LAB\s+CL)\b/ui',
|
|
||||||
'/\b(Testomat\s+LAB\s+MONO)\b/ui',
|
|
||||||
'/\b(Testomat\s+2000)\b/ui',
|
|
||||||
];
|
|
||||||
|
|
||||||
foreach ($patterns as $pattern) {
|
|
||||||
if (!preg_match($pattern, $text, $matches)) {
|
if (!preg_match($pattern, $text, $matches)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -135,25 +128,9 @@ final readonly class CommerceReferenceResolver
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
$patterns = [
|
|
||||||
'indikator' => '/\bindikator(?:en)?\b/u',
|
|
||||||
'indikatoren' => '/\bindikator(?:en)?\b/u',
|
|
||||||
'reagenz' => '/\breagenz(?:ien)?\b/u',
|
|
||||||
'reagenzien' => '/\breagenz(?:ien)?\b/u',
|
|
||||||
'zubehör' => '/\bzubeh[oö]r\b/u',
|
|
||||||
'ersatzteil' => '/\bersatzteile?\b/u',
|
|
||||||
'ersatzteile' => '/\bersatzteile?\b/u',
|
|
||||||
'service-set' => '/\bservice(?:\s|-)?set\b/u',
|
|
||||||
'filter' => '/\bfilter\b/u',
|
|
||||||
'pumpenkopf' => '/\bpumpenkopf\b/u',
|
|
||||||
'motorblock' => '/\bmotorblock\b/u',
|
|
||||||
'mehrwertpaket' => '/\bmehrwertpaket\b/u',
|
|
||||||
'neotecmaster' => '/\bneotecmaster\b/u',
|
|
||||||
];
|
|
||||||
|
|
||||||
$terms = [];
|
$terms = [];
|
||||||
|
|
||||||
foreach ($patterns as $canonical => $pattern) {
|
foreach ($this->config->getFocusTermPatterns() as $canonical => $pattern) {
|
||||||
if (preg_match($pattern, $normalized) === 1) {
|
if (preg_match($pattern, $normalized) === 1) {
|
||||||
$terms[] = $canonical;
|
$terms[] = $canonical;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,6 +39,47 @@ final class AgentRunnerConfig
|
|||||||
return $this->getRequiredString('optimized_shop_query_trim_characters');
|
return $this->getRequiredString('optimized_shop_query_trim_characters');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getFollowUpStrongReferencePatterns(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringList('follow_up_context.strong_reference_patterns');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getFollowUpExplicitCommercialSignalTerms(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringList('follow_up_context.explicit_commercial_signal_terms');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getFollowUpHistoryQuestionPattern(): string
|
||||||
|
{
|
||||||
|
return $this->getRequiredString('follow_up_context.history_question_pattern');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getFollowUpHistoryTurnSplitPattern(): string
|
||||||
|
{
|
||||||
|
return $this->getRequiredString('follow_up_context.history_turn_split_pattern');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getFollowUpHistoryQuestionStripPattern(): string
|
||||||
|
{
|
||||||
|
return $this->getRequiredString('follow_up_context.history_question_strip_pattern');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getFollowUpReferenceAnchorTestomatModelPattern(): string
|
||||||
|
{
|
||||||
|
return $this->getRequiredString('follow_up_context.reference_anchor.testomat_model_pattern');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getFollowUpReferenceAnchorHardnessValuePattern(): string
|
||||||
|
{
|
||||||
|
return $this->getRequiredString('follow_up_context.reference_anchor.hardness_value_pattern');
|
||||||
|
}
|
||||||
|
|
||||||
private function getRequiredInt(string $key): int
|
private function getRequiredInt(string $key): int
|
||||||
{
|
{
|
||||||
$value = $this->requiredValue($key);
|
$value = $this->requiredValue($key);
|
||||||
|
|||||||
@@ -48,6 +48,12 @@ final class CommerceQueryParserConfig
|
|||||||
return $this->stringList('filter_search_tokens');
|
return $this->stringList('filter_search_tokens');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return string[] */
|
||||||
|
public function getSearchControlTokens(): array
|
||||||
|
{
|
||||||
|
return $this->stringList('search_control_tokens');
|
||||||
|
}
|
||||||
|
|
||||||
/** @return string[] */
|
/** @return string[] */
|
||||||
private function whitespacePreservingStringList(string $path): array
|
private function whitespacePreservingStringList(string $path): array
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user