rm CachedRetriever.php
add second shopsearch
This commit is contained in:
@@ -47,6 +47,10 @@ parameters:
|
||||
mto.commerce.store_api_base_url: '%env(SHOPWARE_STORE_API_BASE_URL)%'
|
||||
mto.commerce.sales_channel_access_key: '%env(SHOPWARE_SALES_CHANNEL_ACCESS_KEY)%'
|
||||
|
||||
mto.commerce.search_repair.enabled: true
|
||||
mto.commerce.search_repair.max_queries: 3
|
||||
mto.commerce.search_repair.min_primary_results_without_repair: 2
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Services
|
||||
@@ -116,6 +120,13 @@ services:
|
||||
|
||||
App\Commerce\CommerceQueryParser: ~
|
||||
|
||||
App\Commerce\SearchRepairService:
|
||||
arguments:
|
||||
$logger: '@monolog.logger.agent'
|
||||
$enabled: '%mto.commerce.search_repair.enabled%'
|
||||
$maxRepairQueries: '%mto.commerce.search_repair.max_queries%'
|
||||
$minPrimaryResultsWithoutRepair: '%mto.commerce.search_repair.min_primary_results_without_repair%'
|
||||
|
||||
App\Shopware\ShopwareCriteriaBuilder: ~
|
||||
|
||||
App\Shopware\StoreApiClient:
|
||||
|
||||
@@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Agent;
|
||||
|
||||
use App\Commerce\SearchRepairService;
|
||||
use App\Commerce\ShopSearchService;
|
||||
use App\Config\AgentRunnerConfig;
|
||||
use App\Context\ContextService;
|
||||
@@ -28,6 +29,7 @@ final readonly class AgentRunner
|
||||
private UrlAnalyzer $urlAnalyzer,
|
||||
private RetrieverInterface $retriever,
|
||||
private ShopSearchService $shopSearchService,
|
||||
private SearchRepairService $searchRepairService,
|
||||
private CommerceIntentLite $commerceIntentLite,
|
||||
private OllamaClient $ollamaClient,
|
||||
private LoggerInterface $agentLogger,
|
||||
@@ -49,20 +51,21 @@ final readonly class AgentRunner
|
||||
}
|
||||
|
||||
$shopResults = [];
|
||||
$primaryShopResults = [];
|
||||
$sources = [];
|
||||
$optimizedShopQuery = '';
|
||||
$shopSearchQuery = '';
|
||||
$commerceIntent = CommerceIntentLite::NONE;
|
||||
$commerceHistoryContext = '';
|
||||
$attemptedShopRepair = false;
|
||||
$usedShopRepair = false;
|
||||
$shopRepairQueries = [];
|
||||
|
||||
$this->agentLogger->info('Agent run started', [
|
||||
'userId' => $userId,
|
||||
]);
|
||||
|
||||
try {
|
||||
// ---------------------------------------------------------
|
||||
// 1) Context strategy
|
||||
// ---------------------------------------------------------
|
||||
if ($includeFullContext) {
|
||||
// Full context mode is already passed to PromptBuilder.
|
||||
// Additional context strategies can be added here later.
|
||||
@@ -70,9 +73,6 @@ final readonly class AgentRunner
|
||||
|
||||
yield $this->systemMsg('Ich analysiere deine Anfrage...', 'think');
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 2) Extract URL content
|
||||
// ---------------------------------------------------------
|
||||
yield $this->systemMsg('Ich prüfe auf Internetquellen...', 'think');
|
||||
|
||||
$urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
|
||||
@@ -80,9 +80,6 @@ final readonly class AgentRunner
|
||||
$this->addSource($sources, 'Externe URL');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 3) Retrieve RAG knowledge
|
||||
// ---------------------------------------------------------
|
||||
yield $this->systemMsg('Ich hole relevante Daten aus meinem RAG-Wissen...', 'think');
|
||||
|
||||
$knowledgeChunks = $this->retriever->retrieve($prompt);
|
||||
@@ -90,9 +87,6 @@ final readonly class AgentRunner
|
||||
$this->addSource($sources, 'RAG Wissen');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 4) Optional commerce/shop search
|
||||
// ---------------------------------------------------------
|
||||
$commerceIntent = $this->detectCommerceIntent($prompt);
|
||||
|
||||
if ($this->isCommerceIntent($commerceIntent)) {
|
||||
@@ -127,16 +121,35 @@ final readonly class AgentRunner
|
||||
'think'
|
||||
);
|
||||
|
||||
$shopResults = $this->searchShop(
|
||||
$primaryShopResults = $this->searchShop(
|
||||
$shopSearchQuery,
|
||||
$commerceIntent,
|
||||
$userId,
|
||||
$commerceHistoryContext
|
||||
);
|
||||
|
||||
$repairPayload = $this->repairShopResults(
|
||||
prompt: $prompt,
|
||||
userId: $userId,
|
||||
commerceIntent: $commerceIntent,
|
||||
commerceHistoryContext: $commerceHistoryContext,
|
||||
primaryQuery: $shopSearchQuery,
|
||||
primaryShopResults: $primaryShopResults,
|
||||
knowledgeChunks: $knowledgeChunks
|
||||
);
|
||||
|
||||
$shopResults = $repairPayload['results'];
|
||||
$attemptedShopRepair = $repairPayload['attemptedRepair'];
|
||||
$usedShopRepair = $repairPayload['usedRepair'];
|
||||
$shopRepairQueries = $repairPayload['repairQueries'];
|
||||
|
||||
if ($shopResults !== []) {
|
||||
$this->addSource($sources, 'Shopsystem');
|
||||
}
|
||||
|
||||
if ($attemptedShopRepair) {
|
||||
$this->addSource($sources, 'Erweiterte Shopsuche');
|
||||
}
|
||||
}
|
||||
|
||||
if ($shopResults !== []) {
|
||||
@@ -145,9 +158,6 @@ final readonly class AgentRunner
|
||||
|
||||
yield $this->systemMsg('Ich analysiere alle Informationen...', 'think');
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 5) Build final prompt
|
||||
// ---------------------------------------------------------
|
||||
$finalPrompt = $this->promptBuilder->build(
|
||||
prompt: $prompt,
|
||||
userId: $userId,
|
||||
@@ -164,6 +174,11 @@ final readonly class AgentRunner
|
||||
'finalPrompt' => $finalPrompt,
|
||||
'optimizedShopQuery' => $optimizedShopQuery,
|
||||
'shopSearchQuery' => $shopSearchQuery,
|
||||
'primaryShopResultsCount' => count($primaryShopResults),
|
||||
'shopResultsCount' => count($shopResults),
|
||||
'attemptedShopRepair' => $attemptedShopRepair,
|
||||
'usedShopRepair' => $usedShopRepair,
|
||||
'shopRepairQueries' => $shopRepairQueries,
|
||||
]);
|
||||
}
|
||||
|
||||
@@ -181,9 +196,6 @@ final readonly class AgentRunner
|
||||
yield $this->emitSources($sources, 'Genutzte Quellen: ');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 6) Stream final LLM answer
|
||||
// ---------------------------------------------------------
|
||||
$fullOutput = yield from $this->streamFinalAnswer($finalPrompt);
|
||||
|
||||
if ($sources !== []) {
|
||||
@@ -194,9 +206,6 @@ final readonly class AgentRunner
|
||||
yield $this->systemMsg($finalPrompt, 'debug');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 7) Persist conversation history
|
||||
// ---------------------------------------------------------
|
||||
if ($fullOutput !== '') {
|
||||
$this->contextService->appendHistory(
|
||||
$userId,
|
||||
@@ -210,7 +219,11 @@ final readonly class AgentRunner
|
||||
'outputLength' => mb_strlen($fullOutput),
|
||||
'contextMode' => $includeFullContext ? 'full' : 'recent',
|
||||
'commerceIntent' => $commerceIntent,
|
||||
'primaryShopResultsCount' => count($primaryShopResults),
|
||||
'shopResultsCount' => count($shopResults),
|
||||
'attemptedShopRepair' => $attemptedShopRepair,
|
||||
'usedShopRepair' => $usedShopRepair,
|
||||
'shopRepairQueries' => $shopRepairQueries,
|
||||
'knowledgeChunkCount' => count($knowledgeChunks),
|
||||
'hasUrlContent' => $urlContent !== '',
|
||||
'usedOptimizedShopQuery' => $optimizedShopQuery !== '',
|
||||
@@ -282,7 +295,51 @@ final readonly class AgentRunner
|
||||
return '';
|
||||
}
|
||||
|
||||
return trim($optimizedQuery);
|
||||
return $this->sanitizeOptimizedShopQuery($optimizedQuery);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{
|
||||
* results: array,
|
||||
* attemptedRepair: bool,
|
||||
* usedRepair: bool,
|
||||
* repairQueries: string[]
|
||||
* }
|
||||
*/
|
||||
private function repairShopResults(
|
||||
string $prompt,
|
||||
string $userId,
|
||||
string $commerceIntent,
|
||||
string $commerceHistoryContext,
|
||||
string $primaryQuery,
|
||||
array $primaryShopResults,
|
||||
array $knowledgeChunks
|
||||
): array {
|
||||
try {
|
||||
return $this->searchRepairService->repair(
|
||||
prompt: $prompt,
|
||||
commerceIntent: $commerceIntent,
|
||||
commerceHistoryContext: $commerceHistoryContext,
|
||||
primaryQuery: $primaryQuery,
|
||||
primaryShopResults: $primaryShopResults,
|
||||
knowledgeChunks: $knowledgeChunks
|
||||
);
|
||||
} catch (Throwable $e) {
|
||||
$this->agentLogger->warning('Shop repair failed, continuing with primary shop results', [
|
||||
'userId' => $userId,
|
||||
'commerceIntent' => $commerceIntent,
|
||||
'primaryQuery' => $primaryQuery,
|
||||
'primaryShopResultsCount' => count($primaryShopResults),
|
||||
'exception' => $e,
|
||||
]);
|
||||
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => false,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => [],
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
private function searchShop(
|
||||
@@ -328,6 +385,22 @@ final readonly class AgentRunner
|
||||
};
|
||||
}
|
||||
|
||||
private function sanitizeOptimizedShopQuery(string $query): string
|
||||
{
|
||||
$query = trim($query);
|
||||
|
||||
if ($query === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
$query = preg_split('/\R+/u', $query, 2)[0] ?? $query;
|
||||
$query = preg_replace('/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu', '', $query) ?? $query;
|
||||
$query = trim($query, " \t\n\r\0\x0B\"'`");
|
||||
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
|
||||
|
||||
return trim($query);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Generator<int, string, mixed, string>
|
||||
*/
|
||||
|
||||
@@ -87,6 +87,22 @@ final readonly class PromptBuilder
|
||||
'testomat',
|
||||
];
|
||||
|
||||
private const ACCESSORY_REQUEST_KEYWORDS = [
|
||||
'passend',
|
||||
'passende',
|
||||
'passendes',
|
||||
'zubehör',
|
||||
'zubehor',
|
||||
'dazu',
|
||||
'indikator',
|
||||
'reagenz',
|
||||
'kit',
|
||||
'set',
|
||||
'zusatz',
|
||||
'ergänzung',
|
||||
'ergaenzung',
|
||||
];
|
||||
|
||||
public function __construct(
|
||||
private ContextService $contextService,
|
||||
private SystemPromptRepository $systemPromptRepository,
|
||||
@@ -119,18 +135,20 @@ final readonly class PromptBuilder
|
||||
$swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut);
|
||||
|
||||
$hasShopResults = $shopResults !== [];
|
||||
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
||||
|
||||
$systemBlock = $this->buildSystemBlock();
|
||||
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
|
||||
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults);
|
||||
$responseFormatBlock = $this->buildResponseFormatBlock($prompt, $hasShopResults, $isTechnicalProductQuestion);
|
||||
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults);
|
||||
$userBlock = $this->buildUserBlock($prompt);
|
||||
|
||||
// Build fixed blocks first so history only receives the remaining budget.
|
||||
$fixedPrompt = $this->implodeBlocks([
|
||||
$systemBlock,
|
||||
$shopBlock,
|
||||
$outputPriorityBlock,
|
||||
$responseFormatBlock,
|
||||
$knowledgeBlock,
|
||||
$userBlock,
|
||||
]);
|
||||
@@ -145,6 +163,7 @@ final readonly class PromptBuilder
|
||||
$systemBlock,
|
||||
$shopBlock,
|
||||
$outputPriorityBlock,
|
||||
$responseFormatBlock,
|
||||
$knowledgeBlock,
|
||||
$contextBlock,
|
||||
$userBlock,
|
||||
@@ -326,6 +345,39 @@ final readonly class PromptBuilder
|
||||
"Do not let bundles, accessories, or service items override a better technical match unless the user explicitly asks for them.\n";
|
||||
}
|
||||
|
||||
private function buildResponseFormatBlock(
|
||||
string $prompt,
|
||||
bool $hasShopResults,
|
||||
bool $isTechnicalProductQuestion
|
||||
): string {
|
||||
$rules = [
|
||||
"RESPONSE FORMAT RULES:",
|
||||
"- Keep normal spacing between all words. Never fuse words together.",
|
||||
"- Use short, clean paragraphs or short labeled sections.",
|
||||
"- Do not use persuasive or promotional wording.",
|
||||
"- Do not repeat the same fact in slightly different wording.",
|
||||
];
|
||||
|
||||
if ($hasShopResults) {
|
||||
$rules[] = "- If a product is identified, prefer this structure per product: product name, product number, price, availability, URL, then only the most relevant technical facts.";
|
||||
$rules[] = "- Keep price, availability, and URL on separate lines when they are present.";
|
||||
}
|
||||
|
||||
if ($isTechnicalProductQuestion) {
|
||||
$rules[] = "- Write like technical documentation: precise, neutral, and source-close.";
|
||||
$rules[] = "- Prefer exact values, ranges, thresholds, compatibility notes, and application areas over general explanation.";
|
||||
}
|
||||
|
||||
if ($this->asksForAccessoryOrBundle($prompt)) {
|
||||
$rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.";
|
||||
$rules[] = "- The main device must come first. The accessory must not replace the main device.";
|
||||
$rules[] = "- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.";
|
||||
$rules[] = "- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.";
|
||||
}
|
||||
|
||||
return implode("\n", $rules);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the knowledge block.
|
||||
*
|
||||
@@ -451,6 +503,8 @@ final readonly class PromptBuilder
|
||||
"- Use retrieved knowledge as highest priority for technical matching, thresholds, measurement principles, and technical explanation.",
|
||||
"- When shop results are present and relevant, include current price and the actual URL if available.",
|
||||
"- Do not let accessories, bundles, or service items override a technically better product match unless the user explicitly asks for them.",
|
||||
"- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.",
|
||||
"- Do not claim that an accessory is required, necessary, used for calibration, or sets the measurement range unless this is explicitly stated in the provided sources.",
|
||||
]);
|
||||
} else {
|
||||
$rules[] = "- Use retrieved knowledge as authoritative for factual answers.";
|
||||
@@ -484,10 +538,10 @@ final readonly class PromptBuilder
|
||||
{
|
||||
$filtered = array_values(array_filter(
|
||||
array_map(
|
||||
fn ($block): string => is_string($block) ? $this->normalizeBlockText($block) : '',
|
||||
fn($block): string => is_string($block) ? $this->normalizeBlockText($block) : '',
|
||||
$blocks
|
||||
),
|
||||
static fn (string $block): bool => $block !== ''
|
||||
static fn(string $block): bool => $block !== ''
|
||||
));
|
||||
|
||||
return implode("\n\n", $filtered);
|
||||
@@ -536,6 +590,19 @@ final readonly class PromptBuilder
|
||||
return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function asksForAccessoryOrBundle(string $prompt): bool
|
||||
{
|
||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
||||
|
||||
foreach (self::ACCESSORY_REQUEST_KEYWORDS as $keyword) {
|
||||
if (str_contains($normalized, $keyword)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function clamp(int $value, int $min, int $max): int
|
||||
{
|
||||
return max($min, min($max, $value));
|
||||
|
||||
@@ -4,11 +4,15 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use App\Commerce\SearchRepairService;
|
||||
use App\Commerce\ShopSearchService;
|
||||
use App\Intent\CommerceIntentLite;
|
||||
use App\Knowledge\Retrieval\RetrieverInterface;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputArgument;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
|
||||
#[AsCommand(name: 'mto:agent:test:shop-search')]
|
||||
@@ -16,33 +20,105 @@ final class TestShopSearchCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly ShopSearchService $shopSearchService,
|
||||
private readonly SearchRepairService $searchRepairService,
|
||||
private readonly RetrieverInterface $retriever,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function configure(): void
|
||||
{
|
||||
$this->addArgument(
|
||||
'query',
|
||||
InputArgument::OPTIONAL,
|
||||
'Die zu testende Suchanfrage',
|
||||
'zeige mir testomat modelle wasserhärte unter 5000 euro'
|
||||
);
|
||||
$this
|
||||
->addArgument(
|
||||
'query',
|
||||
InputArgument::OPTIONAL,
|
||||
'Die zu testende Suchanfrage',
|
||||
'zeige mir testomat modelle wasserhärte unter 5000 euro'
|
||||
)
|
||||
->addOption(
|
||||
'intent',
|
||||
null,
|
||||
InputOption::VALUE_OPTIONAL,
|
||||
'Commerce intent',
|
||||
CommerceIntentLite::ADVISORY_PRODUCT_SEARCH
|
||||
)
|
||||
->addOption(
|
||||
'history',
|
||||
null,
|
||||
InputOption::VALUE_OPTIONAL,
|
||||
'Optionaler Commerce-History-Kontext',
|
||||
''
|
||||
)
|
||||
->addOption(
|
||||
'repair',
|
||||
null,
|
||||
InputOption::VALUE_NONE,
|
||||
'Aktiviert zusätzlich den Search-Repair-Test'
|
||||
);
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$query = (string) $input->getArgument('query');
|
||||
$query = trim((string) $input->getArgument('query'));
|
||||
$intent = trim((string) $input->getOption('intent'));
|
||||
$history = trim((string) $input->getOption('history'));
|
||||
$useRepair = (bool) $input->getOption('repair');
|
||||
|
||||
$output->writeln('<info>Test query:</info> ' . $query);
|
||||
$output->writeln('<info>Intent:</info> ' . $intent);
|
||||
$output->writeln('<info>Repair:</info> ' . ($useRepair ? 'ja' : 'nein'));
|
||||
|
||||
if ($history !== '') {
|
||||
$output->writeln('<info>History:</info> ' . $history);
|
||||
}
|
||||
|
||||
$output->writeln('Test query: ' . $query);
|
||||
$output->writeln('');
|
||||
|
||||
$results = $this->shopSearchService->search($query);
|
||||
$primaryResults = $this->shopSearchService->search($query, $intent, $history);
|
||||
|
||||
$output->writeln('<comment>Primärsuche</comment>');
|
||||
$this->renderResults($output, $primaryResults);
|
||||
|
||||
if (!$useRepair) {
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
$knowledgeChunks = $this->retriever->retrieve($query);
|
||||
|
||||
$repairPayload = $this->searchRepairService->repair(
|
||||
prompt: $query,
|
||||
commerceIntent: $intent,
|
||||
commerceHistoryContext: $history,
|
||||
primaryQuery: $query,
|
||||
primaryShopResults: $primaryResults,
|
||||
knowledgeChunks: $knowledgeChunks
|
||||
);
|
||||
|
||||
$output->writeln('');
|
||||
$output->writeln('<comment>Repair-Auswertung</comment>');
|
||||
$output->writeln(' Used repair: ' . ($repairPayload['usedRepair'] ? 'ja' : 'nein'));
|
||||
$output->writeln(' Repair queries: ' . (
|
||||
$repairPayload['repairQueries'] !== []
|
||||
? implode(' | ', $repairPayload['repairQueries'])
|
||||
: '-'
|
||||
));
|
||||
$output->writeln('');
|
||||
|
||||
$output->writeln('<comment>Finale Ergebnisse nach Repair/Merge</comment>');
|
||||
$this->renderResults($output, $repairPayload['results']);
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, object> $results
|
||||
*/
|
||||
private function renderResults(OutputInterface $output, array $results): void
|
||||
{
|
||||
if ($results === []) {
|
||||
$output->writeln('Keine Shop-Ergebnisse gefunden.');
|
||||
|
||||
return Command::SUCCESS;
|
||||
return;
|
||||
}
|
||||
|
||||
foreach ($results as $index => $result) {
|
||||
@@ -57,7 +133,19 @@ final class TestShopSearchCommand extends Command
|
||||
$output->writeln(' URL: ' . ($result->url ?? '-'));
|
||||
$output->writeln(' Description: ' . ($result->description ?? '-'));
|
||||
|
||||
if ($result->highlights !== []) {
|
||||
if (property_exists($result, 'matchScore')) {
|
||||
$output->writeln(' MatchScore: ' . (($result->matchScore ?? null) !== null ? (string) $result->matchScore : '-'));
|
||||
}
|
||||
|
||||
if (property_exists($result, 'matchSource')) {
|
||||
$output->writeln(' MatchSource: ' . ($result->matchSource ?? '-'));
|
||||
}
|
||||
|
||||
if (property_exists($result, 'matchedQueries') && is_array($result->matchedQueries) && $result->matchedQueries !== []) {
|
||||
$output->writeln(' MatchedQueries: ' . implode(' | ', $result->matchedQueries));
|
||||
}
|
||||
|
||||
if (is_array($result->highlights) && $result->highlights !== []) {
|
||||
$output->writeln(' Highlights:');
|
||||
foreach ($result->highlights as $highlight) {
|
||||
$output->writeln(' - ' . $highlight);
|
||||
@@ -66,7 +154,5 @@ final class TestShopSearchCommand extends Command
|
||||
|
||||
$output->writeln('');
|
||||
}
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
}
|
||||
@@ -26,35 +26,43 @@ final readonly class CommerceQueryParser
|
||||
string $historyContext = ''
|
||||
): CommerceSearchQuery {
|
||||
$normalizedPrompt = $this->normalize($originalPrompt);
|
||||
$isDirectProductQuery = $this->isDirectProductQuery($normalizedPrompt);
|
||||
|
||||
[$priceMin, $priceMax] = $this->extractPriceRange($normalizedPrompt);
|
||||
$sizes = $this->extractSizes($normalizedPrompt);
|
||||
$brand = $this->extractBrand($normalizedPrompt);
|
||||
|
||||
$searchText = $this->buildSearchText(
|
||||
$normalizedPrompt,
|
||||
$sizes,
|
||||
$brand,
|
||||
$priceMin,
|
||||
$priceMax
|
||||
prompt: $normalizedPrompt,
|
||||
sizes: $sizes,
|
||||
brand: $brand,
|
||||
priceMin: $priceMin,
|
||||
priceMax: $priceMax,
|
||||
preserveDirectProductQuery: $isDirectProductQuery
|
||||
);
|
||||
|
||||
if ($historyContext !== '' && $this->shouldUseHistoryContext($normalizedPrompt)) {
|
||||
if (
|
||||
!$isDirectProductQuery
|
||||
&& $historyContext !== ''
|
||||
&& $this->shouldUseHistoryContext($normalizedPrompt)
|
||||
) {
|
||||
$latestHistoryQuestion = $this->extractLatestQuestionFromHistory($historyContext);
|
||||
|
||||
if ($latestHistoryQuestion !== '') {
|
||||
$normalizedHistoryPrompt = $this->normalize($latestHistoryQuestion);
|
||||
$isDirectHistoryProductQuery = $this->isDirectProductQuery($normalizedHistoryPrompt);
|
||||
|
||||
[$historyPriceMin, $historyPriceMax] = $this->extractPriceRange($normalizedHistoryPrompt);
|
||||
$historySizes = $this->extractSizes($normalizedHistoryPrompt);
|
||||
$historyBrand = $this->extractBrand($normalizedHistoryPrompt);
|
||||
|
||||
$historySearchText = $this->buildSearchText(
|
||||
$normalizedHistoryPrompt,
|
||||
$historySizes,
|
||||
$historyBrand,
|
||||
$historyPriceMin,
|
||||
$historyPriceMax
|
||||
prompt: $normalizedHistoryPrompt,
|
||||
sizes: $historySizes,
|
||||
brand: $historyBrand,
|
||||
priceMin: $historyPriceMin,
|
||||
priceMax: $historyPriceMax,
|
||||
preserveDirectProductQuery: $isDirectHistoryProductQuery
|
||||
);
|
||||
|
||||
$searchText = $this->mergeSearchTexts($historySearchText, $searchText);
|
||||
@@ -98,7 +106,7 @@ final readonly class CommerceQueryParser
|
||||
*/
|
||||
private function extractPriceRange(string $prompt): array
|
||||
{
|
||||
$priceMin = 0;
|
||||
$priceMin = null;
|
||||
$priceMax = null;
|
||||
|
||||
if (preg_match('/\bzwischen\s+(\d+(?:[.,]\d+)?)\s+und\s+(\d+(?:[.,]\d+)?)\s+euro\b/u', $prompt, $m) === 1) {
|
||||
@@ -165,19 +173,34 @@ final readonly class CommerceQueryParser
|
||||
array $sizes,
|
||||
?string $brand,
|
||||
?float $priceMin,
|
||||
?float $priceMax
|
||||
?float $priceMax,
|
||||
bool $preserveDirectProductQuery = false
|
||||
): string {
|
||||
if ($preserveDirectProductQuery) {
|
||||
return $this->buildDirectProductSearchText($prompt);
|
||||
}
|
||||
|
||||
$text = ' ' . $prompt . ' ';
|
||||
|
||||
foreach ($this->config->getPhrasesToRemove() as $phrase) {
|
||||
$text = str_replace($phrase, ' ', $text);
|
||||
$normalizedPhrase = $this->normalize((string) $phrase);
|
||||
if ($normalizedPhrase === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$text = str_replace(' ' . $normalizedPhrase . ' ', ' ', $text);
|
||||
}
|
||||
|
||||
foreach ($sizes as $size) {
|
||||
$text = preg_replace('/\b' . preg_quote($size, '/') . '\b/u', ' ', $text) ?? $text;
|
||||
$normalizedSize = $this->normalize((string) $size);
|
||||
if ($normalizedSize === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$text = preg_replace('/\b' . preg_quote($normalizedSize, '/') . '\b/u', ' ', $text) ?? $text;
|
||||
}
|
||||
|
||||
if ($brand !== null && $brand !== '') {
|
||||
if ($brand !== null && $brand !== '' && !$this->isBrandPartOfModelPhrase($prompt, $brand)) {
|
||||
$text = preg_replace('/\b' . preg_quote($brand, '/') . '\b/u', ' ', $text) ?? $text;
|
||||
}
|
||||
|
||||
@@ -200,6 +223,22 @@ final readonly class CommerceQueryParser
|
||||
return trim(implode(' ', $tokens));
|
||||
}
|
||||
|
||||
private function buildDirectProductSearchText(string $prompt): string
|
||||
{
|
||||
$text = $prompt;
|
||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
||||
$text = trim($text, " \t\n\r\0\x0B-.,");
|
||||
|
||||
$tokens = array_filter(
|
||||
explode(' ', $text),
|
||||
static fn(string $token): bool => mb_strlen($token) > 0
|
||||
);
|
||||
|
||||
$tokens = array_values(array_unique($tokens));
|
||||
|
||||
return trim(implode(' ', $tokens));
|
||||
}
|
||||
|
||||
private function shouldUseHistoryContext(string $prompt): bool
|
||||
{
|
||||
return preg_match(
|
||||
@@ -263,6 +302,57 @@ final readonly class CommerceQueryParser
|
||||
));
|
||||
}
|
||||
|
||||
private function isDirectProductQuery(string $prompt): bool
|
||||
{
|
||||
if ($prompt === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($this->containsModelLikePhrase($prompt)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($this->containsAccessoryLikePhrase($prompt)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
$tokens = preg_split('/\s+/u', $prompt, -1, PREG_SPLIT_NO_EMPTY) ?: [];
|
||||
|
||||
if (count($tokens) <= 4 && preg_match('/\d/u', $prompt) === 1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function containsModelLikePhrase(string $text): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b[a-zäöüß][a-zäöüß®\-]*(?:\s+[a-zäöüß][a-zäöüß®\-]*){0,2}\s+\d{2,5}[a-z0-9\-]*\b/u',
|
||||
$text
|
||||
) === 1;
|
||||
}
|
||||
|
||||
private function containsAccessoryLikePhrase(string $text): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b(?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[a-z0-9\-]*\b/u',
|
||||
$text
|
||||
) === 1;
|
||||
}
|
||||
|
||||
private function isBrandPartOfModelPhrase(string $prompt, string $brand): bool
|
||||
{
|
||||
if ($brand === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return preg_match(
|
||||
'/\b' . preg_quote($brand, '/') . '\s+\d{2,5}[a-z0-9\-]*\b/u',
|
||||
$prompt
|
||||
) === 1;
|
||||
}
|
||||
|
||||
private function toFloat(string $value): ?float
|
||||
{
|
||||
$value = str_replace(',', '.', trim($value));
|
||||
|
||||
@@ -10,18 +10,22 @@ final readonly class ShopProductResult
|
||||
* @param string[] $highlights
|
||||
*/
|
||||
public function __construct(
|
||||
public string $id,
|
||||
public string $name,
|
||||
public string $id,
|
||||
public string $name,
|
||||
public ?string $productNumber = null,
|
||||
public ?string $manufacturer = null,
|
||||
public ?string $price = null,
|
||||
public ?bool $available = null,
|
||||
public ?bool $available = null,
|
||||
public ?string $url = null,
|
||||
public array $highlights = [],
|
||||
public array $highlights = [],
|
||||
public ?string $description = null,
|
||||
public ?string $productImage = null,
|
||||
public ?string $customFields = null,
|
||||
)
|
||||
{
|
||||
|
||||
// Internal ranking/merge metadata
|
||||
public ?int $matchScore = null,
|
||||
public ?string $matchSource = null,
|
||||
public array $matchedQueries = [],
|
||||
) {
|
||||
}
|
||||
}
|
||||
601
src/Commerce/SearchRepairService.php
Normal file
601
src/Commerce/SearchRepairService.php
Normal file
@@ -0,0 +1,601 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Commerce;
|
||||
|
||||
use App\Commerce\Dto\ShopProductResult;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
final readonly class SearchRepairService
|
||||
{
|
||||
public function __construct(
|
||||
private ShopSearchService $shopSearchService,
|
||||
private LoggerInterface $logger,
|
||||
private bool $enabled = true,
|
||||
private int $maxRepairQueries = 3,
|
||||
private int $minPrimaryResultsWithoutRepair = 2,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $primaryShopResults
|
||||
* @param array<mixed> $knowledgeChunks
|
||||
*
|
||||
* @return array{
|
||||
* results: ShopProductResult[],
|
||||
* attemptedRepair: bool,
|
||||
* usedRepair: bool,
|
||||
* repairQueries: string[]
|
||||
* }
|
||||
*/
|
||||
public function repair(
|
||||
string $prompt,
|
||||
string $commerceIntent,
|
||||
string $commerceHistoryContext,
|
||||
string $primaryQuery,
|
||||
array $primaryShopResults,
|
||||
array $knowledgeChunks
|
||||
): array {
|
||||
if (!$this->enabled) {
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => false,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => [],
|
||||
];
|
||||
}
|
||||
|
||||
if (!$this->shouldAttemptRepair($prompt, $primaryQuery, $primaryShopResults)) {
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => false,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => [],
|
||||
];
|
||||
}
|
||||
|
||||
$repairQueries = $this->buildRepairQueries(
|
||||
prompt: $prompt,
|
||||
primaryQuery: $primaryQuery,
|
||||
primaryShopResults: $primaryShopResults,
|
||||
knowledgeChunks: $knowledgeChunks
|
||||
);
|
||||
|
||||
if ($repairQueries === []) {
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => false,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => [],
|
||||
];
|
||||
}
|
||||
|
||||
$this->logger->info('Shop repair started', [
|
||||
'commerceIntent' => $commerceIntent,
|
||||
'primaryQuery' => $primaryQuery,
|
||||
'primaryResultsCount' => count($primaryShopResults),
|
||||
'repairQueries' => $repairQueries,
|
||||
'hasCommerceHistoryContext' => $commerceHistoryContext !== '',
|
||||
'commerceHistoryContextLength' => mb_strlen($commerceHistoryContext),
|
||||
]);
|
||||
|
||||
$repairResults = [];
|
||||
|
||||
foreach ($repairQueries as $repairQuery) {
|
||||
$results = $this->shopSearchService->search($repairQuery, $commerceIntent, '');
|
||||
|
||||
if ($results === []) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$repairResults = $this->mergeUniqueProducts($repairResults, $results);
|
||||
}
|
||||
|
||||
if ($repairResults === []) {
|
||||
$this->logger->info('Shop repair finished without additional products', [
|
||||
'commerceIntent' => $commerceIntent,
|
||||
'primaryQuery' => $primaryQuery,
|
||||
'repairQueries' => $repairQueries,
|
||||
]);
|
||||
|
||||
return [
|
||||
'results' => $primaryShopResults,
|
||||
'attemptedRepair' => true,
|
||||
'usedRepair' => false,
|
||||
'repairQueries' => $repairQueries,
|
||||
];
|
||||
}
|
||||
|
||||
$mergedResults = $this->rankMergedResults(
|
||||
primaryResults: $primaryShopResults,
|
||||
repairResults: $repairResults,
|
||||
prompt: $prompt,
|
||||
primaryQuery: $primaryQuery,
|
||||
repairQueries: $repairQueries
|
||||
);
|
||||
|
||||
$this->logger->info('Shop repair finished', [
|
||||
'commerceIntent' => $commerceIntent,
|
||||
'primaryQuery' => $primaryQuery,
|
||||
'primaryResultsCount' => count($primaryShopResults),
|
||||
'repairResultsCount' => count($repairResults),
|
||||
'mergedResultsCount' => count($mergedResults),
|
||||
'repairQueries' => $repairQueries,
|
||||
'topProducts' => array_map(
|
||||
static fn(ShopProductResult $product): array => [
|
||||
'name' => $product->name,
|
||||
'productNumber' => $product->productNumber,
|
||||
'manufacturer' => $product->manufacturer,
|
||||
'available' => $product->available,
|
||||
],
|
||||
array_slice($mergedResults, 0, 3)
|
||||
),
|
||||
]);
|
||||
|
||||
return [
|
||||
'results' => $mergedResults,
|
||||
'attemptedRepair' => true,
|
||||
'usedRepair' => true,
|
||||
'repairQueries' => $repairQueries,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $primaryShopResults
|
||||
*/
|
||||
private function shouldAttemptRepair(
|
||||
string $prompt,
|
||||
string $primaryQuery,
|
||||
array $primaryShopResults
|
||||
): bool {
|
||||
$asksForBundle = $this->asksForBundleOrAccessory($prompt);
|
||||
$hasModelLikePrimaryQuery = $this->containsModelLikePhrase($primaryQuery);
|
||||
$primaryResultsCount = count($primaryShopResults);
|
||||
|
||||
if ($primaryResultsCount === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Always try repair for bundle/accessory prompts.
|
||||
// These prompts often need a second pass even when the first search
|
||||
// already returned some results, because the user is asking for a
|
||||
// combination of main device + matching accessory.
|
||||
if ($asksForBundle) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ($primaryResultsCount >= $this->minPrimaryResultsWithoutRepair) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($hasModelLikePrimaryQuery && $primaryResultsCount > 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $primaryResultsCount < $this->minPrimaryResultsWithoutRepair;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $primaryShopResults
|
||||
* @param array<mixed> $knowledgeChunks
|
||||
* @return string[]
|
||||
*/
|
||||
private function buildRepairQueries(
|
||||
string $prompt,
|
||||
string $primaryQuery,
|
||||
array $primaryShopResults,
|
||||
array $knowledgeChunks
|
||||
): array {
|
||||
$knowledgeText = $this->flattenToText($knowledgeChunks);
|
||||
$primaryResultText = $this->flattenPrimaryResults($primaryShopResults);
|
||||
$combinedText = trim($prompt . "\n" . $knowledgeText . "\n" . $primaryResultText);
|
||||
|
||||
$modelCandidates = $this->extractModelCandidates($combinedText);
|
||||
$accessoryCandidates = $this->extractAccessoryCandidates($combinedText);
|
||||
|
||||
$topPrimaryName = $primaryShopResults[0]->name ?? '';
|
||||
$topPrimaryProductNumber = $primaryShopResults[0]->productNumber ?? null;
|
||||
$topPrimaryPhrase = trim($topPrimaryName . ' ' . ($topPrimaryProductNumber ?? ''));
|
||||
|
||||
$queries = [];
|
||||
|
||||
if ($topPrimaryPhrase !== '' && $this->containsModelLikePhrase($topPrimaryPhrase)) {
|
||||
$queries[] = $topPrimaryPhrase;
|
||||
} elseif ($topPrimaryName !== '' && $this->containsModelLikePhrase($topPrimaryName)) {
|
||||
$queries[] = $topPrimaryName;
|
||||
}
|
||||
|
||||
foreach ($modelCandidates as $candidate) {
|
||||
$queries[] = $candidate;
|
||||
}
|
||||
|
||||
if ($this->asksForBundleOrAccessory($prompt)) {
|
||||
foreach ($accessoryCandidates as $accessoryCandidate) {
|
||||
if ($topPrimaryName !== '') {
|
||||
$queries[] = trim($topPrimaryName . ' ' . $accessoryCandidate);
|
||||
}
|
||||
|
||||
$queries[] = $accessoryCandidate;
|
||||
}
|
||||
}
|
||||
|
||||
$queries = array_map(
|
||||
fn(string $query): string => $this->sanitizeQuery($query),
|
||||
$queries
|
||||
);
|
||||
|
||||
$queries = array_values(array_filter(
|
||||
array_unique($queries),
|
||||
fn(string $query): bool => $query !== '' && !$this->isTooCloseToPrimaryQuery($query, $primaryQuery)
|
||||
));
|
||||
|
||||
return array_slice($queries, 0, max(1, $this->maxRepairQueries));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<mixed> $value
|
||||
*/
|
||||
private function flattenToText(array $value): string
|
||||
{
|
||||
$parts = [];
|
||||
|
||||
$walker = function (mixed $item) use (&$parts, &$walker): void {
|
||||
if (is_array($item)) {
|
||||
foreach ($item as $subItem) {
|
||||
$walker($subItem);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_scalar($item)) {
|
||||
$text = trim((string) $item);
|
||||
if ($text !== '') {
|
||||
$parts[] = $text;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
$walker($value);
|
||||
|
||||
return trim(implode("\n", $parts));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $products
|
||||
*/
|
||||
private function flattenPrimaryResults(array $products): string
|
||||
{
|
||||
$parts = [];
|
||||
|
||||
foreach ($products as $product) {
|
||||
$parts[] = trim(implode(' ', array_filter([
|
||||
$product->name,
|
||||
$product->productNumber,
|
||||
$product->manufacturer,
|
||||
$product->description,
|
||||
implode(' ', $product->highlights),
|
||||
$product->customFields,
|
||||
])));
|
||||
}
|
||||
|
||||
return trim(implode("\n", array_filter($parts)));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function extractModelCandidates(string $text): array
|
||||
{
|
||||
$candidates = [];
|
||||
|
||||
preg_match_all(
|
||||
'/\b([A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*)\b/u',
|
||||
$text,
|
||||
$matches
|
||||
);
|
||||
|
||||
foreach ($matches[1] ?? [] as $candidate) {
|
||||
$candidate = $this->sanitizeQuery($candidate);
|
||||
|
||||
if ($candidate === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($this->looksTooGeneric($candidate)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$candidates[] = $candidate;
|
||||
}
|
||||
|
||||
return $this->sortCandidatesBySpecificity($candidates);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function extractAccessoryCandidates(string $text): array
|
||||
{
|
||||
$candidates = [];
|
||||
|
||||
preg_match_all(
|
||||
'/\b((?:indikator|indicator|reagenz|reagent|kit|set)\s+\d{1,5}[A-Za-z0-9\-]*)\b/iu',
|
||||
$text,
|
||||
$matches
|
||||
);
|
||||
|
||||
foreach ($matches[1] ?? [] as $candidate) {
|
||||
$candidate = $this->sanitizeQuery($candidate);
|
||||
|
||||
if ($candidate === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$candidates[] = $candidate;
|
||||
}
|
||||
|
||||
return $this->sortCandidatesBySpecificity($candidates);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $candidates
|
||||
* @return string[]
|
||||
*/
|
||||
private function sortCandidatesBySpecificity(array $candidates): array
|
||||
{
|
||||
$decorated = [];
|
||||
|
||||
foreach (array_values(array_unique($candidates)) as $candidate) {
|
||||
$decorated[] = [
|
||||
'candidate' => $candidate,
|
||||
'score' => $this->scoreCandidate($candidate),
|
||||
];
|
||||
}
|
||||
|
||||
usort($decorated, static function (array $a, array $b): int {
|
||||
return $b['score'] <=> $a['score'];
|
||||
});
|
||||
|
||||
return array_values(array_map(
|
||||
static fn(array $row): string => $row['candidate'],
|
||||
$decorated
|
||||
));
|
||||
}
|
||||
|
||||
private function scoreCandidate(string $candidate): int
|
||||
{
|
||||
$score = 0;
|
||||
|
||||
if (preg_match('/\d/u', $candidate) === 1) {
|
||||
$score += 4;
|
||||
}
|
||||
|
||||
$wordCount = count($this->tokenize($candidate));
|
||||
$score += min($wordCount, 4);
|
||||
|
||||
if (preg_match('/\b(?:indikator|indicator|testomat|tritromat|titromat|reagenz|reagent)\b/iu', $candidate) === 1) {
|
||||
$score += 3;
|
||||
}
|
||||
|
||||
return $score;
|
||||
}
|
||||
|
||||
private function asksForBundleOrAccessory(string $prompt): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b(passend|passende|zubehor|zubehör|dazu|zusatz|erganzung|ergänzung|indikator|reagenz|kit|set|auch\s+das|mit\s+preis\s+und\s+allen\s+infos)\b/iu',
|
||||
$prompt
|
||||
) === 1;
|
||||
}
|
||||
|
||||
private function containsModelLikePhrase(string $text): bool
|
||||
{
|
||||
return preg_match(
|
||||
'/\b[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*(?:\s+[A-Za-zÄÖÜäöüß][A-Za-zÄÖÜäöüß®\-]*){0,2}\s+\d{2,5}[A-Za-z0-9\-]*\b/u',
|
||||
$text
|
||||
) === 1;
|
||||
}
|
||||
|
||||
private function looksTooGeneric(string $candidate): bool
|
||||
{
|
||||
$normalized = mb_strtolower($candidate);
|
||||
|
||||
foreach ([
|
||||
'wasser',
|
||||
'messgerät',
|
||||
'messgeraet',
|
||||
'produkt',
|
||||
'geräte',
|
||||
'geraete',
|
||||
'gerät',
|
||||
'geraet',
|
||||
'resthärte',
|
||||
'resthaerte',
|
||||
'preis',
|
||||
'infos',
|
||||
'wissen',
|
||||
] as $genericToken) {
|
||||
if ($normalized === $genericToken) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function sanitizeQuery(string $query): string
|
||||
{
|
||||
$query = trim($query);
|
||||
$query = preg_replace('/\s+/u', ' ', $query) ?? $query;
|
||||
$query = trim($query, " \t\n\r\0\x0B\"'`.,;:-");
|
||||
|
||||
return trim($query);
|
||||
}
|
||||
|
||||
private function isTooCloseToPrimaryQuery(string $candidateQuery, string $primaryQuery): bool
|
||||
{
|
||||
$candidateTokens = $this->tokenize($candidateQuery);
|
||||
$primaryTokens = $this->tokenize($primaryQuery);
|
||||
|
||||
if ($candidateTokens === [] || $primaryTokens === []) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$intersection = array_intersect($candidateTokens, $primaryTokens);
|
||||
$overlapRatio = count($intersection) / max(count($candidateTokens), count($primaryTokens));
|
||||
|
||||
return $overlapRatio >= 0.9;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $existing
|
||||
* @param ShopProductResult[] $incoming
|
||||
* @return ShopProductResult[]
|
||||
*/
|
||||
private function mergeUniqueProducts(array $existing, array $incoming): array
|
||||
{
|
||||
$merged = $existing;
|
||||
$seen = [];
|
||||
|
||||
foreach ($existing as $product) {
|
||||
$seen[$this->buildProductKey($product)] = true;
|
||||
}
|
||||
|
||||
foreach ($incoming as $product) {
|
||||
$key = $this->buildProductKey($product);
|
||||
|
||||
if (isset($seen[$key])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$seen[$key] = true;
|
||||
$merged[] = $product;
|
||||
}
|
||||
|
||||
return $merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $primaryResults
|
||||
* @param ShopProductResult[] $repairResults
|
||||
* @param string[] $repairQueries
|
||||
* @return ShopProductResult[]
|
||||
*/
|
||||
private function rankMergedResults(
|
||||
array $primaryResults,
|
||||
array $repairResults,
|
||||
string $prompt,
|
||||
string $primaryQuery,
|
||||
array $repairQueries
|
||||
): array {
|
||||
$allResults = $this->mergeUniqueProducts($primaryResults, $repairResults);
|
||||
$repairSignal = trim(implode(' ', $repairQueries));
|
||||
|
||||
$decorated = [];
|
||||
|
||||
foreach ($allResults as $index => $product) {
|
||||
$score = 0;
|
||||
$score += $this->scoreProductAgainstText($product, $prompt) * 3;
|
||||
$score += $this->scoreProductAgainstText($product, $primaryQuery) * 2;
|
||||
$score += $this->scoreProductAgainstText($product, $repairSignal) * 4;
|
||||
|
||||
if ($index < count($primaryResults)) {
|
||||
$score += 1;
|
||||
}
|
||||
|
||||
$decorated[] = [
|
||||
'index' => $index,
|
||||
'score' => $score,
|
||||
'product' => $product,
|
||||
];
|
||||
}
|
||||
|
||||
usort($decorated, static function (array $a, array $b): int {
|
||||
if ($a['score'] === $b['score']) {
|
||||
return $a['index'] <=> $b['index'];
|
||||
}
|
||||
|
||||
return $b['score'] <=> $a['score'];
|
||||
});
|
||||
|
||||
return array_values(array_map(
|
||||
static fn(array $row): ShopProductResult => $row['product'],
|
||||
$decorated
|
||||
));
|
||||
}
|
||||
|
||||
private function scoreProductAgainstText(ShopProductResult $product, string $text): int
|
||||
{
|
||||
$queryTokens = $this->tokenize($text);
|
||||
if ($queryTokens === []) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$productText = trim(implode(' ', array_filter([
|
||||
$product->name,
|
||||
$product->productNumber,
|
||||
$product->manufacturer,
|
||||
implode(' ', $product->highlights),
|
||||
$product->description,
|
||||
$product->customFields,
|
||||
])));
|
||||
|
||||
$productTokens = $this->tokenize($productText);
|
||||
if ($productTokens === []) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$score = 0;
|
||||
$intersection = array_intersect($queryTokens, $productTokens);
|
||||
$score += count($intersection) * 2;
|
||||
|
||||
foreach ($this->extractNumberTokens($queryTokens) as $numberToken) {
|
||||
if (in_array($numberToken, $productTokens, true)) {
|
||||
$score += 4;
|
||||
}
|
||||
}
|
||||
|
||||
return $score;
|
||||
}
|
||||
|
||||
private function buildProductKey(ShopProductResult $product): string
|
||||
{
|
||||
return mb_strtolower(trim(implode('|', [
|
||||
$product->id,
|
||||
$product->productNumber ?? '',
|
||||
$product->name,
|
||||
$product->url ?? '',
|
||||
])));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function tokenize(string $text): array
|
||||
{
|
||||
$text = mb_strtolower($text);
|
||||
$text = preg_replace('/[^\p{L}\p{N}\s\-]+/u', ' ', $text) ?? $text;
|
||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
||||
$text = trim($text);
|
||||
|
||||
if ($text === '') {
|
||||
return [];
|
||||
}
|
||||
|
||||
return array_values(array_filter(explode(' ', $text)));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $tokens
|
||||
* @return string[]
|
||||
*/
|
||||
private function extractNumberTokens(array $tokens): array
|
||||
{
|
||||
return array_values(array_filter(
|
||||
$tokens,
|
||||
static fn(string $token): bool => preg_match('/\d/u', $token) === 1
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -207,10 +207,12 @@ final readonly class ShopSearchService
|
||||
);
|
||||
}
|
||||
|
||||
return array_values(array_filter(
|
||||
$results = array_values(array_filter(
|
||||
$results,
|
||||
static fn(ShopProductResult $product): bool => $product->name !== ''
|
||||
));
|
||||
|
||||
return $this->deduplicateProducts($results);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -251,26 +253,46 @@ final readonly class ShopSearchService
|
||||
{
|
||||
$score = 0;
|
||||
|
||||
$normalizedPrompt = $this->normalizeForMatching($query->normalizedPrompt ?: $query->originalPrompt);
|
||||
$normalizedPrompt = $this->normalizeForMatching($query->normalizedPrompt !== ''
|
||||
? $query->normalizedPrompt
|
||||
: $query->originalPrompt);
|
||||
|
||||
$normalizedSearchText = $this->normalizeForMatching($query->searchText);
|
||||
$normalizedQuery = trim($normalizedPrompt . ' ' . $normalizedSearchText);
|
||||
$normalizedBrand = $this->normalizeForMatching((string) ($query->brand ?? ''));
|
||||
$normalizedSizes = array_values(array_filter(array_map(
|
||||
fn(mixed $size): string => $this->normalizeForMatching((string) $size),
|
||||
$query->sizes
|
||||
)));
|
||||
|
||||
$normalizedQuery = trim(implode(' ', array_filter([
|
||||
$normalizedPrompt,
|
||||
$normalizedSearchText,
|
||||
$normalizedBrand,
|
||||
implode(' ', $normalizedSizes),
|
||||
])));
|
||||
|
||||
$queryTokens = $this->tokenize($normalizedQuery);
|
||||
$queryNumberTokens = $this->extractNumberTokens($queryTokens);
|
||||
|
||||
$normalizedProductName = $this->normalizeForMatching($product->name);
|
||||
$productNameTokens = $this->tokenize($normalizedProductName);
|
||||
$productNameNumberTokens = $this->extractNumberTokens($productNameTokens);
|
||||
|
||||
$normalizedProductNumber = $this->normalizeForMatching((string) ($product->productNumber ?? ''));
|
||||
$productNumberTokens = $this->tokenize($normalizedProductNumber);
|
||||
$productNumberNumberTokens = $this->extractNumberTokens($productNumberTokens);
|
||||
|
||||
$normalizedManufacturer = $this->normalizeForMatching((string) ($product->manufacturer ?? ''));
|
||||
$normalizedBrand = $this->normalizeForMatching((string) ($query->brand ?? ''));
|
||||
$normalizedProductCorpus = $this->buildNormalizedProductCorpus($product);
|
||||
|
||||
$productNameTokens = $this->tokenize($normalizedProductName);
|
||||
$productNumberTokens = $this->tokenize($normalizedProductNumber);
|
||||
$productCorpusTokens = $this->tokenize($normalizedProductCorpus);
|
||||
|
||||
$productNameNumberTokens = $this->extractNumberTokens($productNameTokens);
|
||||
$productNumberNumberTokens = $this->extractNumberTokens($productNumberTokens);
|
||||
$productCorpusNumberTokens = $this->extractNumberTokens($productCorpusTokens);
|
||||
|
||||
if ($normalizedProductNumber !== '' && $this->containsWholePhrase($normalizedQuery, $normalizedProductNumber)) {
|
||||
$score += 120;
|
||||
$score += 140;
|
||||
}
|
||||
|
||||
if ($normalizedProductName !== '' && $this->containsWholePhrase($normalizedQuery, $normalizedProductName)) {
|
||||
$score += 80;
|
||||
}
|
||||
|
||||
if ($normalizedBrand !== '') {
|
||||
@@ -281,20 +303,22 @@ final readonly class ShopSearchService
|
||||
}
|
||||
}
|
||||
|
||||
$score += $this->countOverlap($queryTokens, $productNameTokens) * 4;
|
||||
$score += $this->countOverlap($queryTokens, $productNumberTokens) * 8;
|
||||
$score += $this->countOverlap($queryNumberTokens, $productNameNumberTokens) * 16;
|
||||
$score += $this->countOverlap($queryNumberTokens, $productNumberNumberTokens) * 24;
|
||||
$score += $this->countOverlap($queryTokens, $productNameTokens) * 6;
|
||||
$score += $this->countOverlap($queryTokens, $productNumberTokens) * 10;
|
||||
$score += $this->countOverlap($queryTokens, $productCorpusTokens) * 2;
|
||||
|
||||
foreach ($query->sizes as $size) {
|
||||
$normalizedSize = $this->normalizeForMatching((string) $size);
|
||||
$score += $this->countOverlap($queryNumberTokens, $productNameNumberTokens) * 18;
|
||||
$score += $this->countOverlap($queryNumberTokens, $productNumberNumberTokens) * 28;
|
||||
$score += $this->countOverlap($queryNumberTokens, $productCorpusNumberTokens) * 8;
|
||||
|
||||
foreach ($normalizedSizes as $normalizedSize) {
|
||||
if ($normalizedSize === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($this->containsWholePhrase($normalizedProductName, $normalizedSize)
|
||||
|| $this->containsWholePhrase($normalizedProductNumber, $normalizedSize)) {
|
||||
|| $this->containsWholePhrase($normalizedProductNumber, $normalizedSize)
|
||||
|| $this->containsWholePhrase($normalizedProductCorpus, $normalizedSize)) {
|
||||
$score += 12;
|
||||
}
|
||||
}
|
||||
@@ -306,6 +330,18 @@ final readonly class ShopSearchService
|
||||
return $score;
|
||||
}
|
||||
|
||||
private function buildNormalizedProductCorpus(ShopProductResult $product): string
|
||||
{
|
||||
return $this->normalizeForMatching(implode(' ', array_filter([
|
||||
$product->name,
|
||||
$product->productNumber,
|
||||
$product->manufacturer,
|
||||
implode(' ', $product->highlights),
|
||||
$product->description,
|
||||
$product->customFields,
|
||||
])));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $left
|
||||
* @param string[] $right
|
||||
@@ -480,4 +516,32 @@ final readonly class ShopSearchService
|
||||
|
||||
return array_values(array_unique($highlights));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $products
|
||||
* @return ShopProductResult[]
|
||||
*/
|
||||
private function deduplicateProducts(array $products): array
|
||||
{
|
||||
$unique = [];
|
||||
$seen = [];
|
||||
|
||||
foreach ($products as $product) {
|
||||
$key = mb_strtolower(trim(implode('|', [
|
||||
$product->id,
|
||||
$product->productNumber ?? '',
|
||||
$product->name,
|
||||
$product->url ?? '',
|
||||
])));
|
||||
|
||||
if (isset($seen[$key])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$seen[$key] = true;
|
||||
$unique[] = $product;
|
||||
}
|
||||
|
||||
return $unique;
|
||||
}
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Knowledge\Retrieval;
|
||||
|
||||
use Psr\Cache\CacheItemPoolInterface;
|
||||
use Psr\Cache\InvalidArgumentException;
|
||||
|
||||
final readonly class CachedRetriever implements RetrieverInterface
|
||||
{
|
||||
public function __construct(
|
||||
private RetrieverInterface $inner,
|
||||
private CacheItemPoolInterface $cache,
|
||||
private int $ttlSeconds = 300,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function retrieve(string $prompt): array
|
||||
{
|
||||
$key = $this->buildCacheKey($prompt);
|
||||
|
||||
$item = $this->cache->getItem($key);
|
||||
if ($item->isHit()) {
|
||||
$cached = $item->get();
|
||||
|
||||
return is_array($cached) ? $cached : [];
|
||||
}
|
||||
|
||||
$result = $this->inner->retrieve($prompt);
|
||||
|
||||
$item->set($result);
|
||||
$item->expiresAfter($this->ttlSeconds);
|
||||
$this->cache->save($item);
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
private function buildCacheKey(string $prompt): string
|
||||
{
|
||||
$normalized = mb_strtolower(trim($prompt));
|
||||
$normalized = preg_replace('/\s+/u', ' ', $normalized) ?? $normalized;
|
||||
|
||||
return 'rag_retrieval_' . sha1($normalized);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user