From 1815a42035969dad7320b1ad2d680698487dd727 Mon Sep 17 00:00:00 2001 From: team 1 Date: Wed, 15 Apr 2026 08:46:26 +0200 Subject: [PATCH] add new configs --- src/Agent/AgentRunner.php | 26 +---- src/Commerce/CommerceQueryParser.php | 54 +++------- src/Commerce/ShopSearchService.php | 67 ++++++------ src/Config/AgentRunnerConfig.php | 29 +++++ src/Config/CatalogIntentConfig.php | 12 +++ src/Config/CommerceIntentConfig.php | 101 ++++++++++++++++++ src/Config/CommerceQueryParserConfig.php | 32 ++++++ src/Config/ContextServiceConfig.php | 12 +++ src/Config/IntentLightConfig.php | 50 +++++++++ src/Config/NdjsonHybridRetrieverConfig.php | 21 ++++ src/Config/SalesIntentConfig.php | 65 +++++++++++ src/Context/ContextService.php | 21 ++-- src/Intent/CatalogIntentLite.php | 22 ++-- src/Intent/CommerceIntentLite.php | 56 ++++------ src/Intent/IntentLite.php | 62 ++++------- src/Intent/SalesIntentLite.php | 96 ++++++----------- .../Retrieval/NdjsonHybridRetriever.php | 64 +++++------ src/Shopware/StoreApiClient.php | 27 +++-- 18 files changed, 508 insertions(+), 309 deletions(-) create mode 100644 src/Config/AgentRunnerConfig.php create mode 100644 src/Config/CatalogIntentConfig.php create mode 100644 src/Config/CommerceIntentConfig.php create mode 100644 src/Config/CommerceQueryParserConfig.php create mode 100644 src/Config/ContextServiceConfig.php create mode 100644 src/Config/IntentLightConfig.php create mode 100644 src/Config/NdjsonHybridRetrieverConfig.php create mode 100644 src/Config/SalesIntentConfig.php diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index e9bd71c..999d169 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -5,6 +5,7 @@ declare(strict_types=1); namespace App\Agent; use App\Commerce\ShopSearchService; +use App\Config\AgentRunnerConfig; use App\Context\ContextService; use App\Context\UrlAnalyzer; use App\Infrastructure\OllamaClient; @@ -28,6 +29,7 @@ final readonly class AgentRunner private CommerceIntentLite $commerceIntentLite, private OllamaClient $ollamaClient, private LoggerInterface $agentLogger, + private AgentRunnerConfig $agentRunnerConfig, private bool $debug, private bool $logPrompt, private bool $logContext, @@ -84,30 +86,12 @@ final readonly class AgentRunner if ($commerceIntent === CommerceIntentLite::PRODUCT_SEARCH || $commerceIntent === CommerceIntentLite::ADVISORY_PRODUCT_SEARCH) { //PreOptimize swag search query - $promptSwagSearch = ' - Erzeuge aus dem folgenden Nutzereingabetext einen kurzen Suchtext für die Shopware-6-Suche. - - Regeln: - - Gib nur den finalen Suchtext aus. - - erstelle immer die singular form von den relevanten Suchbegriffen - - Keine Einleitung, keine Erklärung, keine Anführungszeichen. - - Verwende nur die shop relevanten Suchbegriffe für eine Shopsuche aus dem Nutzereingabetext. - - Maximal 6 Suchbegriffe, besser weniger. - - Entferne Füllwörter, Höflichkeitsformen und irrelevante Wörter. - - Erhalte Produktnamen, Marken, Modellnummern und zusammengesetzte Begriffe exakt, wenn sie relevant sind. - - Zahlen, die zu einem Produktnamen oder Modell gehören (zb Indikator 300 oder Testomat 808), müssen erhalten bleiben. - - Trenne die Begriffe nur durch Leerzeichen. - - Ausgabeformat: - Keyword1 Keyword2 Keyword3 - - Nutzereingabetext: ' . $prompt . ' - '; + $promptSwagSearch = $this->agentRunnerConfig->getShopPrompt($prompt); //Reset thinkSuppressor $this->thinkSuppressor->reset(); - yield $this->systemMsg("Ich optimere die Shopanfrage...", "think"); + yield $this->systemMsg("Ich optimere die Recherche...", "think"); //Call ai for optimized swag query foreach ($this->ollamaClient->stream($promptSwagSearch) as $swagToken) { @@ -125,7 +109,7 @@ final readonly class AgentRunner $swagFullOutPut .= $swagCleanToken; } - yield $this->systemMsg("Ich rufe Shopdaten ab (type: " . $commerceIntent . ")", "think"); + yield $this->systemMsg("Ich rufe Recherchedaten ab (type: " . $commerceIntent . ")", "think"); //Search in swag by ai optimized query try { diff --git a/src/Commerce/CommerceQueryParser.php b/src/Commerce/CommerceQueryParser.php index e91f3f5..440fcfe 100644 --- a/src/Commerce/CommerceQueryParser.php +++ b/src/Commerce/CommerceQueryParser.php @@ -5,27 +5,22 @@ declare(strict_types=1); namespace App\Commerce; use App\Commerce\Dto\CommerceSearchQuery; +use App\Config\CommerceIntentConfig; +use App\Config\CommerceQueryParserConfig; use App\Knowledge\Retrieval\QueryCleaner; use App\Knowledge\Text\TextNormalizer; -final class CommerceQueryParser +final readonly class CommerceQueryParser { public function __construct( - private readonly TextNormalizer $textNormalizer, - private readonly QueryCleaner $queryCleaner, + private TextNormalizer $textNormalizer, + private QueryCleaner $queryCleaner, + private CommerceQueryParserConfig $config, + private CommerceIntentConfig $intentConfig, ) { - } - /** - * @var string[] - */ - private array $knownBrands = [ - 'heyl', - 'horiba' - ]; - public function parse(string $originalPrompt, string $intent): CommerceSearchQuery { $normalized = $this->normalize($originalPrompt); @@ -103,7 +98,8 @@ final class CommerceQueryParser { $sizes = []; - if (preg_match_all('/\b(?:größe|groesse|grösse)\s*([a-z0-9.-]+)\b/u', $prompt, $matches) === false) { + $sizePattern = $this->intentConfig->getSizePattern(); + if (preg_match_all('/\b(?:' . $sizePattern . ')\s*([a-z0-9.-]+)\b/u', $prompt, $matches) === false) { return []; } @@ -111,7 +107,8 @@ final class CommerceQueryParser $sizes[] = trim($size); } - if (preg_match_all('/\b(xs|s|m|l|xl|xxl|xxxl)\b/u', $prompt, $tokenMatches) !== false) { + $sizeTokenPattern = $this->intentConfig->getSizeTokenPattern(); + if (preg_match_all('/\b(' . $sizeTokenPattern . ')\b/u', $prompt, $tokenMatches) !== false) { foreach ($tokenMatches[1] as $sizeToken) { $sizes[] = trim($sizeToken); } @@ -122,16 +119,12 @@ final class CommerceQueryParser private function extractBrand(string $prompt): ?string { - foreach ($this->knownBrands as $brand) { + foreach ($this->config->getKnownBrands() as $brand) { if (str_contains($prompt, $brand)) { return $brand; } } - if (preg_match('/\bheyl\s+([a-z0-9][a-z0-9\s\-]+)/u', $prompt, $m) === 1) { - return trim($m[1]); - } - return null; } @@ -145,20 +138,7 @@ final class CommerceQueryParser { $text = ' ' . $prompt . ' '; - $phrasesToRemove = [ - 'ich suche', - 'suche', - 'habt ihr', - 'gibt es', - 'zeige mir', - 'welches gerät', - 'welche gerät', - 'welches modell', - 'welches ist besser', - 'welches ist am besten', - 'alternative', - 'alternativen', - ]; + $phrasesToRemove = $this->config->getPhrasesToRemove(); foreach ($phrasesToRemove as $phrase) { $text = str_replace($phrase, ' ', $text); @@ -173,11 +153,9 @@ final class CommerceQueryParser } if ($priceMin !== null || $priceMax !== null) { - if ($priceMin !== null || $priceMax !== null) { - $text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; - $text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; - $text = preg_replace('/\beuro\b/u', ' ', $text) ?? $text; - } + $text = preg_replace('/\bzwischen\s+\d+(?:[.,]\d+)?\s+und\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; + $text = preg_replace('/\b(?:unter|bis|max(?:imal)?|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*euro\b/u', ' ', $text) ?? $text; + $text = preg_replace('/\b'.$this->intentConfig->getPricePattern().'\b/u', ' ', $text) ?? $text; } $text = preg_replace('/\s+/u', ' ', $text) ?? $text; diff --git a/src/Commerce/ShopSearchService.php b/src/Commerce/ShopSearchService.php index 04d9fc7..49cce7b 100644 --- a/src/Commerce/ShopSearchService.php +++ b/src/Commerce/ShopSearchService.php @@ -8,7 +8,6 @@ use App\Commerce\Dto\ShopProductResult; use App\Shopware\ShopwareCriteriaBuilder; use App\Shopware\StoreApiClient; use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface; -use Symfony\Contracts\HttpClient\Exception\DecodingExceptionInterface; use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface; use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface; use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface; @@ -16,14 +15,13 @@ use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface; final readonly class ShopSearchService { public function __construct( - private CommerceQueryParser $queryParser, + private CommerceQueryParser $queryParser, private ShopwareCriteriaBuilder $criteriaBuilder, - private StoreApiClient $storeApiClient, - private bool $enabled = true, - private int $maxResults = 25, - private string $baseUrl - ) - { + private StoreApiClient $storeApiClient, + private bool $enabled = true, + private int $maxResults = 25, + private string $baseUrl + ) { } /** @@ -36,18 +34,17 @@ final readonly class ShopSearchService } $response = []; + $query = $this->queryParser->parse($originalPrompt, $commerceIntent); $criteria = $this->criteriaBuilder->build($query, $this->maxResults); try { $response = $this->storeApiClient->searchProducts($criteria); - } catch (ClientExceptionInterface|DecodingExceptionInterface|RedirectionExceptionInterface|ServerExceptionInterface|TransportExceptionInterface $e) { + } catch (ClientExceptionInterface|RedirectionExceptionInterface|ServerExceptionInterface|TransportExceptionInterface $e) { } - $result = $this->mapProducts($response);; - - return $result; + return $this->mapProducts($response); } /** @@ -68,42 +65,44 @@ final readonly class ShopSearchService } $results[] = new ShopProductResult( - id: (string)($row['id'] ?? ''), - name: trim((string)($row['translated']['name'] ?? '')), - productNumber: isset($row['productNumber']) ? (string)$row['productNumber'] : null, + id: (string) ($row['id'] ?? ''), + name: trim((string) ($row['translated']['name'] ?? '')), + productNumber: isset($row['productNumber']) ? (string) $row['productNumber'] : null, + manufacturer: $this->extractManufacturer($row), price: $this->extractPrice($row), - available: isset($row['available']) ? (bool)$row['available'] : null, + available: isset($row['available']) ? (bool) $row['available'] : null, url: $this->baseUrl . $this->extractUrl($row), highlights: $this->extractHighlights($row), description: $this->cleanUpDescription($row), productImage: $row['cover']['media']['thumbnails'][0]['url'] ?? 'no-image', - customFields: $this->getRelevantCustomFields($row['customFields']) + customFields: $this->getRelevantCustomFields($row['customFields'] ?? []) ); } return array_values(array_filter( $results, - static fn(ShopProductResult $product): bool => $product->name !== '' + static fn (ShopProductResult $product): bool => $product->name !== '' )); } - private function getRelevantCustomFields($customField): string + private function getRelevantCustomFields(array $customField): string { $result = ($customField['migration_Backup_product_attr1'] ?? '') . ': ' . ($customField['migration_Backup_product_attr2'] ?? ''); $result .= ' | Einsatzgebiete: ' . ($customField['migration_Backup_product_attr4'] ?? ''); $result .= ' | Sprachen: ' . ($customField['migration_Backup_product_attr5'] ?? ''); - return $result; + return trim($result); } - private function cleanUpDescription($description): string + private function cleanUpDescription(array $description): string { if (isset($description['translated']['description'])) { - $newDesc = strip_tags((string)$description['translated']['description']); - $newDesc = preg_replace('/^[ \t]*\R/m', '', $newDesc); // leere Zeilen weg - $newDesc = preg_replace('/[ \t]{2,}/', ' ', $newDesc); // mehrere Spaces zu einem - $result = trim($newDesc); - return substr($result, 0, 500); + $newDesc = strip_tags((string) $description['translated']['description']); + $newDesc = preg_replace('/^[ \t]*\R/m', '', $newDesc); + $newDesc = preg_replace('/[ \t]{2,}/', ' ', $newDesc); + $result = trim((string) $newDesc); + + return mb_substr($result, 0, 500); } return ''; @@ -114,7 +113,9 @@ final readonly class ShopSearchService $manufacturer = $row['manufacturer'] ?? null; if (is_array($manufacturer) && isset($manufacturer['name']) && is_string($manufacturer['name'])) { - return trim($manufacturer['name']) !== '' ? trim($manufacturer['name']) : null; + $name = trim($manufacturer['name']); + + return $name !== '' ? $name : null; } return null; @@ -128,12 +129,18 @@ final readonly class ShopSearchService return null; } - $unitPrice = $calculatedPrice['unitPrice'] ?? $calculatedPrice['totalPrice'] ?? $calculatedPrice['referencePrice'] ?? $calculatedPrice['listPrice'] ?? $calculatedPrice['regulationPrice'] ?? 0; + $unitPrice = $calculatedPrice['unitPrice'] + ?? $calculatedPrice['totalPrice'] + ?? $calculatedPrice['referencePrice'] + ?? $calculatedPrice['listPrice'] + ?? $calculatedPrice['regulationPrice'] + ?? null; + if (!is_numeric($unitPrice)) { return null; } - return number_format((float)$unitPrice, 2, ',', '.') . ' €'; + return number_format((float) $unitPrice, 2, ',', '.') . ' €'; } private function extractUrl(array $row): ?string @@ -166,7 +173,7 @@ final readonly class ShopSearchService $highlights = []; if (isset($row['available'])) { - $highlights[] = ((bool)$row['available']) ? 'Verfügbar' : 'Nicht verfügbar'; + $highlights[] = (bool) $row['available'] ? 'Verfügbar' : 'Nicht verfügbar'; } if (isset($row['productNumber']) && is_string($row['productNumber']) && trim($row['productNumber']) !== '') { diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php new file mode 100644 index 0000000..5ca4789 --- /dev/null +++ b/src/Config/AgentRunnerConfig.php @@ -0,0 +1,29 @@ +maxFullLines : $this->maxRegularLines; + $maxLines = $full ? ContextServiceConfig::MAX_FULL_LINES : ContextServiceConfig::MAX_VISIBLE_REGULAR_LINES; $selected = array_slice($lines, -$maxLines); return implode("\n", $selected); diff --git a/src/Intent/CatalogIntentLite.php b/src/Intent/CatalogIntentLite.php index c85f144..bd5e73f 100644 --- a/src/Intent/CatalogIntentLite.php +++ b/src/Intent/CatalogIntentLite.php @@ -4,6 +4,7 @@ declare(strict_types=1); namespace App\Intent; +use App\Config\CatalogIntentConfig; use App\Knowledge\Retrieval\QueryCleaner; use App\Tag\TagVectorSearchClient; use App\Tag\TagTypes; @@ -24,23 +25,12 @@ use App\Tag\TagTypes; * - SalesIntent * - Routing */ -final class CatalogIntentLite +final readonly class CatalogIntentLite { - /** - * Minimaler Similarity-Score. - * Verhindert Rauschen. - */ - private const MIN_SCORE = 0.72; - - /** - * Differenz zwischen Top1 und Top2, - * damit kein unsicherer Treffer akzeptiert wird. - */ - private const AMBIGUITY_DELTA = 0.02; public function __construct( - private readonly TagVectorSearchClient $tagVectorClient, - private readonly QueryCleaner $queryCleaner, + private TagVectorSearchClient $tagVectorClient, + private QueryCleaner $queryCleaner ) {} /** @@ -67,7 +57,7 @@ final class CatalogIntentLite $bestScore = (float)($best['score'] ?? 0.0); // 2) Score-Tags - if ($bestScore < self::MIN_SCORE) { + if ($bestScore < CatalogIntentConfig::MIN_SCORE) { return null; } @@ -75,7 +65,7 @@ final class CatalogIntentLite if (isset($hits[1])) { $secondScore = (float)($hits[1]['score'] ?? 0.0); - if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) { + if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) { return null; } } diff --git a/src/Intent/CommerceIntentLite.php b/src/Intent/CommerceIntentLite.php index 605e65f..cdc476b 100644 --- a/src/Intent/CommerceIntentLite.php +++ b/src/Intent/CommerceIntentLite.php @@ -4,12 +4,21 @@ declare(strict_types=1); namespace App\Intent; +use App\Config\CommerceIntentConfig; + final class CommerceIntentLite { public const NONE = 'none'; public const PRODUCT_SEARCH = 'product_search'; public const ADVISORY_PRODUCT_SEARCH = 'advisory_product_search'; + public function __construct( + private readonly CommerceIntentConfig $config + ) + { + + } + /** * @return array{intent:string, score:int, signals:string[]} */ @@ -28,28 +37,7 @@ final class CommerceIntentLite $score = 0; $signals = []; - $strongSignals = [ - 'suche', - 'habt', - 'gibt', - 'zeig', - 'welche', - 'vergleich', - 'alternativ', - 'find', - 'shop', - 'store', - 'sku', - 'Artikel', - 'Gerät', - 'testomat', - 'indikator', - 'Titromat', - 'Seminar', - 'Schulung', - 'Sensor', - 'liste' - ]; + $strongSignals = $this->config->getStrongSignalsList(); foreach ($strongSignals as $signal) { if (str_contains($p, strtolower($signal))) { @@ -58,40 +46,36 @@ final class CommerceIntentLite } } - if(preg_match('#\d{3,10}#', $p)){ + if (preg_match('#\d{3,10}#', $p)) { $score += 2; $signals[] = 'sku'; } - if (preg_match('/\b\d+(?:[.,]\d+)?\s*(euro|€|eur|teuer|preis|kosten)\b/u', $p) === 1) { + $pricePattern = $this->config->getPricePattern(); + if (preg_match('/\b\d+(?:[.,]\d+)?\s*(' . $pricePattern . ')\b/u', $p) === 1) { $score += 2; $signals[] = 'price'; } - if (preg_match('/\b(größe|groesse|grösse)\s*[a-z0-9.-]+\b/u', $p) === 1) { + $sizePattern = $this->config->getSizePattern(); + if (preg_match('/\b(' . $sizePattern . ')\s*[a-z0-9.-]+\b/u', $p) === 1) { $score += 2; $signals[] = 'size'; } - if (preg_match('/\b(xs|s|m|l|xl|xxl|xxxl)\b/u', $p) === 1) { + $sizeTokenPattern = $this->config->getSizeTokenPattern(); + if (preg_match('/\b(' . $sizeTokenPattern . ')\b/u', $p) === 1) { $score += 1; $signals[] = 'size_token'; } - if (preg_match('/\b(schwarz|weiß|weiss|rot|blau|grün|gruen|gelb|grau|beige|rosa|pink|orange|braun)\b/u', $p) === 1) { + $colorPattern = $this->config->getColorPattern(); + if (preg_match('/\b(' . $colorPattern . ')\b/u', $p) === 1) { $score += 1; $signals[] = 'color'; } - $advisorySignals = [ - 'passt', - 'eignet', - 'besser', - 'besten', - 'geeignet', - 'empfiehl', - 'empfehl', - ]; + $advisorySignals = $this->config->getAdvisorySignals(); foreach ($advisorySignals as $signal) { if (str_contains($p, $signal)) { diff --git a/src/Intent/IntentLite.php b/src/Intent/IntentLite.php index 4eea2c1..a696979 100644 --- a/src/Intent/IntentLite.php +++ b/src/Intent/IntentLite.php @@ -4,19 +4,27 @@ declare(strict_types=1); namespace App\Intent; +use App\Config\IntentLightConfig; + /** * IntentLite * - * Deterministische, LLM-agnostische Intent-Erkennung. - * Fokus: LIST-Intent für Retrieval-Steuerung. + * Deterministic, LLM-agnostic intent detection. + * Focus: LIST intent for retrieval control. * - * WICHTIG: - * - Immer mit dem ORIGINAL-Prompt aufrufen. - * - Nicht mit dem QueryCleaner-Ergebnis. + * IMPORTANT: + * - Always call it with the ORIGINAL prompt. + * - Not with the QueryCleaner result. */ -final class IntentLite +final readonly class IntentLite { - private const LIST_THRESHOLD = 4; + + public function __construct( + private IntentLightConfig $config + ) + { + + } public function detectList(string $originalPrompt): array { @@ -28,19 +36,7 @@ final class IntentLite // -------------------------------------------------------- // 1. Starke explizite Listen-Trigger (hohes Gewicht) // -------------------------------------------------------- - $strongPatterns = [ - '/\bliste(n)?\b/u', - '/\bauflisten\b/u', - '/\baufz(a|ä)hl(en)?\b/u', - '/\bnenn(e)?\b/u', - '/\bzeig(e)?\b/u', - '/\bwelche\s+sind\b/u', - '/\bwelche\s+gibt\s+es\b/u', - '/\bwas\s+sind\b/u', - '/\bwie\s+viele\b/u', - '/\branking\b/u', - '/\btop\s*\d+\b/u', - ]; + $strongPatterns = $this->config->getStrongPatterns(); foreach ($strongPatterns as $pattern) { if (preg_match($pattern, $p) === 1) { @@ -52,27 +48,7 @@ final class IntentLite // -------------------------------------------------------- // 2. Mengen- / Mehrzahl-Indikatoren // -------------------------------------------------------- - $quantityWords = [ - 'alle', - 'sämtliche', - 'saemtliche', - 'mehrere', - 'verschiedene', - 'einige', - 'viele', - 'optionen', - 'möglichkeiten', - 'moeglichkeiten', - 'varianten', - 'arten', - 'modelle', - 'funktionen', - 'punkte', - 'schritte', - 'kategorien', - 'übersicht', - 'uebersicht', - ]; + $quantityWords = $this->config->getQuantityWords(); foreach ($quantityWords as $word) { if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p) === 1) { @@ -102,11 +78,11 @@ final class IntentLite // -------------------------------------------------------- // Entscheidung // -------------------------------------------------------- - $isList = $score >= self::LIST_THRESHOLD; + $isList = $score >= IntentLightConfig::LIST_THRESHOLD; return [ 'is_list' => $isList, - 'score' => $score, + 'score' => $score, 'signals' => $signals, ]; } diff --git a/src/Intent/SalesIntentLite.php b/src/Intent/SalesIntentLite.php index b623f94..06c74df 100644 --- a/src/Intent/SalesIntentLite.php +++ b/src/Intent/SalesIntentLite.php @@ -4,26 +4,23 @@ declare(strict_types=1); namespace App\Intent; +use App\Config\SalesIntentConfig; + final class SalesIntentLite { - public const DISCOVERY = 'discovery'; - public const PRICING = 'pricing'; - public const COMPARISON = 'comparison'; - public const OBJECTION = 'objection'; + public const DISCOVERY = 'discovery'; + public const PRICING = 'pricing'; + public const COMPARISON = 'comparison'; + public const OBJECTION = 'objection'; public const IMPLEMENTATION = 'implementation'; - public const ROI = 'roi'; + public const ROI = 'roi'; - /** - * Mindestabstand zwischen Top1 und Top2, - * damit ein Intent wirklich dominant ist. - */ - private const DOMINANCE_DELTA = 2; + public function __construct( + private readonly SalesIntentConfig $config + ) + { - /** - * Mindestscore, damit überhaupt ein Nicht-Discovery-Intent - * akzeptiert wird. - */ - private const MIN_SCORE_THRESHOLD = 3; + } public function detect(string $originalPrompt): array { @@ -32,27 +29,23 @@ final class SalesIntentLite if ($p === '') { return [ 'intent' => self::DISCOVERY, - 'score' => 0, + 'score' => 0, ]; } $scores = [ - self::PRICING => 0, - self::COMPARISON => 0, - self::OBJECTION => 0, + self::PRICING => 0, + self::COMPARISON => 0, + self::OBJECTION => 0, self::IMPLEMENTATION => 0, - self::ROI => 0, + self::ROI => 0, ]; // ------------------------------------------------------------ // PRICING // ------------------------------------------------------------ - foreach ([ - 'preis','preise','kosten','lizenz','lizenzmodell', - 'tarif','tarife','gebuehr','gebühr', - 'monatlich','jaehrlich','jährlich','abo','subscription' - ] as $word) { - if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) { + foreach ($this->config->getSalesSignals() as $word) { + if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) { $scores[self::PRICING] += 3; } } @@ -60,14 +53,7 @@ final class SalesIntentLite // ------------------------------------------------------------ // COMPARISON // ------------------------------------------------------------ - foreach ([ - '/\bvergleich(en)?\b/u', - '/\bvs\b/u', - '/\bgegenueber\b/u', - '/\balternative(n)?\b/u', - '/\bunterschied(e)?\b/u', - '/\bbesser\b/u' - ] as $pattern) { + foreach ($this->config->getComparisonSignals() as $pattern) { if (preg_match($pattern, $p)) { $scores[self::COMPARISON] += 3; } @@ -76,12 +62,8 @@ final class SalesIntentLite // ------------------------------------------------------------ // OBJECTION // ------------------------------------------------------------ - foreach ([ - 'problem','risiko','nachteil','datenschutz', - 'dsgvo','sicherheit','compliance', - 'kritik','zweifel','unsicher' - ] as $word) { - if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) { + foreach ($this->config->getComparisonSignals() as $word) { + if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) { $scores[self::OBJECTION] += 3; } } @@ -89,15 +71,8 @@ final class SalesIntentLite // ------------------------------------------------------------ // IMPLEMENTATION // ------------------------------------------------------------ - foreach ([ - 'implementierung','implementieren', - 'integration','integrieren', - 'einführung','einfuehrung', - 'aufwand','setup','rollout', - 'migration','installation', - 'api','schnittstelle' - ] as $word) { - if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) { + foreach ($this->config->getImplementationSignals() as $word) { + if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) { $scores[self::IMPLEMENTATION] += 3; } } @@ -105,13 +80,8 @@ final class SalesIntentLite // ------------------------------------------------------------ // ROI // ------------------------------------------------------------ - foreach ([ - 'roi','rentabilitaet','rentabilität', - 'business case','einsparung', - 'kosten senken','umsatz steigern', - 'effizienz steigern' - ] as $word) { - if (preg_match('/\b'.preg_quote($word,'/').'\b/u', $p)) { + foreach ($this->config->getRoiSignals() as $word) { + if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p)) { $scores[self::ROI] += 3; } } @@ -123,31 +93,31 @@ final class SalesIntentLite arsort($scores); $intents = array_keys($scores); - $values = array_values($scores); + $values = array_values($scores); $topIntent = $intents[0] ?? self::DISCOVERY; - $topScore = $values[0] ?? 0; + $topScore = $values[0] ?? 0; $secondScore = $values[1] ?? 0; // Kein relevanter Score → Discovery - if ($topScore < self::MIN_SCORE_THRESHOLD) { + if ($topScore < SalesIntentConfig::MIN_SCORE_THRESHOLD) { return [ 'intent' => self::DISCOVERY, - 'score' => 0, + 'score' => 0, ]; } // Keine klare Dominanz → Discovery - if (($topScore - $secondScore) < self::DOMINANCE_DELTA) { + if (($topScore - $secondScore) < SalesIntentConfig::DOMINANCE_DELTA) { return [ 'intent' => self::DISCOVERY, - 'score' => $topScore, + 'score' => $topScore, ]; } return [ 'intent' => $topIntent, - 'score' => $topScore, + 'score' => $topScore, ]; } diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php index b12fd03..ce215e0 100644 --- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php +++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php @@ -5,6 +5,7 @@ declare(strict_types=1); namespace App\Knowledge\Retrieval; use App\Catalog\EntityCatalogService; +use App\Config\NdjsonHybridRetrieverConfig; use App\Entity\ModelGenerationConfig; use App\Intent\CatalogIntentLite; use App\Intent\IntentLite; @@ -14,35 +15,20 @@ use App\Routing\IntentRouteResolver; use App\Tag\TagRoutingService; use App\Vector\VectorSearchClient; -final class NdjsonHybridRetriever implements RetrieverInterface +final readonly class NdjsonHybridRetriever implements RetrieverInterface { - private const VECTOR_SCORE_THRESHOLD = 0.75; - - private const HARD_MAX_CHUNKS = 90; - private const HARD_MAX_VECTORK = 250; - - private const LIST_BONUS = 1.25; - - private const MAX_CHUNKS_PER_DOC = 2; - private const MIN_CHUNK_DISTANCE = 2.5; - private const RRF_K = 60; - - private const THRESHOLD_FLOOR = 0.83; - private const THRESHOLD_CEIL = 0.92; - private const EMPTY_RRF_FALLBACK_TOPN = 1; - public function __construct( - private readonly NdjsonChunkLookup $lookup, - private readonly VectorSearchClient $vectorClient, - private readonly TagRoutingService $tagRouting, - private readonly ModelGenerationConfigRepository $configRepository, - private readonly QueryCleaner $queryCleaner, - private readonly IntentLite $intentLite, - private readonly SalesIntentLite $salesIntentLite, - private readonly CatalogIntentLite $catalogIntent, - private readonly IntentRouteResolver $routeResolver, - private readonly EntityCatalogService $entityCatalogService, - private readonly QueryEnricher $queryEnricher, + private NdjsonChunkLookup $lookup, + private VectorSearchClient $vectorClient, + private TagRoutingService $tagRouting, + private ModelGenerationConfigRepository $configRepository, + private QueryCleaner $queryCleaner, + private IntentLite $intentLite, + private SalesIntentLite $salesIntentLite, + private CatalogIntentLite $catalogIntent, + private IntentRouteResolver $routeResolver, + private EntityCatalogService $entityCatalogService, + private QueryEnricher $queryEnricher, ) { } @@ -206,8 +192,8 @@ final class NdjsonHybridRetriever implements RetrieverInterface ): array { - $limit = max(1, min($config->getRetrievalMaxChunks(), self::HARD_MAX_CHUNKS)); - $vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), self::HARD_MAX_VECTORK)); + $limit = max(1, min($config->getRetrievalMaxChunks(), NdjsonHybridRetrieverConfig::HARD_MAX_CHUNKS)); + $vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK)); $isListQuery = $this->intentLite->isListQuery($prompt); @@ -218,7 +204,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface return [ 'limit' => $limit, 'is_list_query' => $isListQuery, - 'threshold' => self::VECTOR_SCORE_THRESHOLD, + 'threshold' => NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD, 'ranked_chunk_ids' => [], 'rows' => [], 'rrf_scores' => [], @@ -270,7 +256,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface if ($rrfScores === [] && $globalHits !== []) { $rrfScores = $this->fallbackRrfFromHits( $globalHits, - self::EMPTY_RRF_FALLBACK_TOPN + NdjsonHybridRetrieverConfig::EMPTY_RRF_FALLBACK_TOPN ); } @@ -327,7 +313,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface ): array { - $threshold = self::VECTOR_SCORE_THRESHOLD; + $threshold = NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD; $topK = $vectorTopKBase; if ( @@ -338,11 +324,11 @@ final class NdjsonHybridRetriever implements RetrieverInterface } if ($isListQuery) { - $topK = (int)round($topK * self::LIST_BONUS); + $topK = (int)round($topK * NdjsonHybridRetrieverConfig::LIST_BONUS); } - $topK = max(1, min($topK, self::HARD_MAX_VECTORK)); - $threshold = max(self::THRESHOLD_FLOOR, min(self::THRESHOLD_CEIL, $threshold)); + $topK = max(1, min($topK, NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK)); + $threshold = max(NdjsonHybridRetrieverConfig::THRESHOLD_FLOOR, min(NdjsonHybridRetrieverConfig::THRESHOLD_CEIL, $threshold)); return [$threshold, $topK]; } @@ -382,7 +368,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface } $rank++; - $rrf = 1.0 / (self::RRF_K + $rank); + $rrf = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank); if ($boost) { $rrf *= 1.2; @@ -413,7 +399,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface } $rank++; - $rrf[(string)$hit['chunk_id']] = 1.0 / (self::RRF_K + $rank); + $rrf[(string)$hit['chunk_id']] = 1.0 / (NdjsonHybridRetrieverConfig::RRF_K + $rank); if ($rank >= $topN) { break; @@ -475,13 +461,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface continue; } - if (($docCounter[$docId] ?? 0) >= self::MAX_CHUNKS_PER_DOC) { + if (($docCounter[$docId] ?? 0) >= NdjsonHybridRetrieverConfig::MAX_CHUNKS_PER_DOC) { continue; } if (is_int($chunkIndex)) { foreach ($docChunkPositions[$docId] ?? [] as $prevIdx) { - if (abs($prevIdx - $chunkIndex) < self::MIN_CHUNK_DISTANCE) { + if (abs($prevIdx - $chunkIndex) < NdjsonHybridRetrieverConfig::MIN_CHUNK_DISTANCE) { continue 2; } } diff --git a/src/Shopware/StoreApiClient.php b/src/Shopware/StoreApiClient.php index 3245962..e7a2da1 100644 --- a/src/Shopware/StoreApiClient.php +++ b/src/Shopware/StoreApiClient.php @@ -4,6 +4,7 @@ declare(strict_types=1); namespace App\Shopware; +use RuntimeException; use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface; use Symfony\Contracts\HttpClient\Exception\DecodingExceptionInterface; use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface; @@ -15,18 +16,16 @@ final readonly class StoreApiClient { public function __construct( private HttpClientInterface $httpClient, - private string $baseUrl, - private string $salesChannelAccessKey, - private int $timeoutSeconds = 5, - ) - { + private string $baseUrl, + private string $salesChannelAccessKey, + private int $timeoutSeconds = 5, + ) { } /** * @throws TransportExceptionInterface * @throws ServerExceptionInterface * @throws RedirectionExceptionInterface - * @throws DecodingExceptionInterface * @throws ClientExceptionInterface */ public function searchProducts(array $criteria): array @@ -44,10 +43,22 @@ final readonly class StoreApiClient ]); $statusCode = $response->getStatusCode(); + $content = $response->getContent(false); + if ($statusCode < 200 || $statusCode >= 300) { - return []; + throw new RuntimeException(sprintf( + 'Shopware Store API request failed with status %d. Response: %s', + $statusCode, + mb_substr(trim($content), 0, 1000) + )); } - return $response->toArray(false); + $data = json_decode($content, true); + + if (!is_array($data)) { + throw new RuntimeException('Shopware Store API returned invalid JSON.'); + } + + return $data; } } \ No newline at end of file