optimize retrieval
This commit is contained in:
@@ -14,7 +14,6 @@ use RuntimeException;
|
||||
|
||||
final readonly class PromptBuilder
|
||||
{
|
||||
|
||||
public function __construct(
|
||||
private ContextService $contextService,
|
||||
private SystemPromptRepository $systemPromptRepository,
|
||||
@@ -49,12 +48,24 @@ final readonly class PromptBuilder
|
||||
|
||||
$hasShopResults = $shopResults !== [];
|
||||
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
||||
$isPriceDrivenQuestion = $this->isLikelyPriceDrivenQuestion($prompt);
|
||||
|
||||
$systemBlock = $this->buildSystemBlock();
|
||||
$shopBlock = $this->buildShopBlock($shopResults, $swagFullOutPut);
|
||||
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults);
|
||||
$responseFormatBlock = $this->buildResponseFormatBlock($prompt, $hasShopResults, $isTechnicalProductQuestion);
|
||||
$knowledgeBlock = $this->buildKnowledgeBlock($knowledgeChunks, $urlContent, $prompt, $hasShopResults);
|
||||
$outputPriorityBlock = $this->buildOutputPriorityBlock($hasShopResults, $isPriceDrivenQuestion);
|
||||
$responseFormatBlock = $this->buildResponseFormatBlock(
|
||||
$prompt,
|
||||
$hasShopResults,
|
||||
$isTechnicalProductQuestion,
|
||||
$isPriceDrivenQuestion
|
||||
);
|
||||
$knowledgeBlock = $this->buildKnowledgeBlock(
|
||||
$knowledgeChunks,
|
||||
$urlContent,
|
||||
$prompt,
|
||||
$hasShopResults,
|
||||
$isPriceDrivenQuestion
|
||||
);
|
||||
$userBlock = $this->buildUserBlock($prompt);
|
||||
|
||||
$fixedPrompt = $this->implodeBlocks([
|
||||
@@ -231,7 +242,9 @@ final readonly class PromptBuilder
|
||||
"Output real URL values exactly as provided in the shop results. Do not replace them with placeholders, link labels, or product names.\n" .
|
||||
"Do not infer undocumented technical specifications from shop data.\n" .
|
||||
"Commercial fields from shop data may only be assigned to a product if the shop item clearly matches the same product identity.\n" .
|
||||
"Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.";
|
||||
"Do not merge a device identified in retrieved knowledge with price, URL, product number, or availability from a different shop item such as a reagent, accessory, kit, consumable, or service item.\n" .
|
||||
"If shop results only contain accessories, reagents, indicators, or consumables, do not conclude that no matching main device exists unless the sources explicitly support that conclusion.\n" .
|
||||
"If the user asks for price filtering, use the numeric prices in these live shop results as the decisive source for filtering.";
|
||||
|
||||
if ($totalCount > count($limitedShopResults)) {
|
||||
$header .= "\n" .
|
||||
@@ -247,12 +260,20 @@ final readonly class PromptBuilder
|
||||
/**
|
||||
* Build a small priority block that tells the model what to surface first.
|
||||
*/
|
||||
private function buildOutputPriorityBlock(bool $hasShopResults): string
|
||||
private function buildOutputPriorityBlock(bool $hasShopResults, bool $isPriceDrivenQuestion): string
|
||||
{
|
||||
if (!$hasShopResults) {
|
||||
return '';
|
||||
}
|
||||
|
||||
if ($isPriceDrivenQuestion) {
|
||||
return
|
||||
"OUTPUT PRIORITY:\n" .
|
||||
"For price-driven questions, evaluate shop results first for numeric price filtering.\n" .
|
||||
"Use retrieved knowledge afterwards only to add technical context or explain missing commercial coverage.\n" .
|
||||
"Do not let accessory-only shop results prove that no matching device exists unless the sources explicitly support that conclusion.\n";
|
||||
}
|
||||
|
||||
return
|
||||
"OUTPUT PRIORITY:\n" .
|
||||
"Use retrieved knowledge first to determine the technically matching product or answer.\n" .
|
||||
@@ -263,7 +284,8 @@ final readonly class PromptBuilder
|
||||
private function buildResponseFormatBlock(
|
||||
string $prompt,
|
||||
bool $hasShopResults,
|
||||
bool $isTechnicalProductQuestion
|
||||
bool $isTechnicalProductQuestion,
|
||||
bool $isPriceDrivenQuestion
|
||||
): string {
|
||||
$rules = [
|
||||
"RESPONSE FORMAT RULES:",
|
||||
@@ -284,6 +306,8 @@ final readonly class PromptBuilder
|
||||
$rules[] = "- Only use shop price, URL, product number, or availability for the main product when the shop result clearly matches that same main product.";
|
||||
$rules[] = "- If the matching shop item appears to be an accessory, reagent, consumable, set, or kit, keep it separate and do not present its commercial fields as the main device.";
|
||||
$rules[] = "- If the commercial match is uncertain, say that commercial details for the main product are not clearly available in the provided shop results.";
|
||||
$rules[] = "- If the question includes a price threshold, filter using only explicit numeric shop prices.";
|
||||
$rules[] = "- Do not say that no device exists above a threshold merely because only cheaper accessories were found in the shop results.";
|
||||
} else {
|
||||
$rules[] = "- If no shop results are present, do not compensate by inventing external products or external manufacturers.";
|
||||
}
|
||||
@@ -294,6 +318,12 @@ final readonly class PromptBuilder
|
||||
$rules[] = "- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.";
|
||||
}
|
||||
|
||||
if ($isPriceDrivenQuestion) {
|
||||
$rules[] = "- For price-driven questions, answer the threshold result first.";
|
||||
$rules[] = "- If no grounded shop product fulfills the threshold, say that clearly.";
|
||||
$rules[] = "- Then optionally explain whether retrieved knowledge mentions relevant devices that are not commercially listed in the current shop results.";
|
||||
}
|
||||
|
||||
if ($this->asksForAccessoryOrBundle($prompt)) {
|
||||
$rules[] = "- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.";
|
||||
$rules[] = "- The main device must come first. The accessory must not replace the main device.";
|
||||
@@ -304,14 +334,13 @@ final readonly class PromptBuilder
|
||||
return implode("\n", $rules);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the knowledge block.
|
||||
*
|
||||
* Retrieved knowledge remains the main source for technical matching and explanation.
|
||||
* Shop data is preferred for current commercial fields.
|
||||
*/
|
||||
private function buildKnowledgeBlock(array $knowledgeChunks, string $urlContent, string $prompt, bool $hasShopResults): string
|
||||
{
|
||||
private function buildKnowledgeBlock(
|
||||
array $knowledgeChunks,
|
||||
string $urlContent,
|
||||
string $prompt,
|
||||
bool $hasShopResults,
|
||||
bool $isPriceDrivenQuestion
|
||||
): string {
|
||||
$knowledgeParts = [];
|
||||
$isTechnicalProductQuestion = $this->isLikelyTechnicalProductQuestion($prompt);
|
||||
|
||||
@@ -334,7 +363,7 @@ final readonly class PromptBuilder
|
||||
"LANGUAGE RULES:\n" .
|
||||
implode("\n", $this->buildLanguageRules()),
|
||||
"FACT GROUNDING RULES:\n" .
|
||||
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults)),
|
||||
implode("\n", $this->buildFactGroundingRules($isTechnicalProductQuestion, $hasShopResults, $isPriceDrivenQuestion)),
|
||||
"RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation):\n" .
|
||||
"Source: Documents\n" .
|
||||
implode("\n\n", $lines),
|
||||
@@ -354,13 +383,6 @@ final readonly class PromptBuilder
|
||||
return $this->implodeBlocks($knowledgeParts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve how many characters may still be used by history.
|
||||
*
|
||||
* The active model num_ctx is converted into a conservative prompt budget.
|
||||
* Shop, knowledge and user question are fixed priority blocks.
|
||||
* History only receives the remaining space.
|
||||
*/
|
||||
private function resolveHistoryBudgetChars(string $fixedPrompt): int
|
||||
{
|
||||
$numCtx = $this->modelGenerationConfigProvider->getActiveNumCtx();
|
||||
@@ -407,8 +429,11 @@ final readonly class PromptBuilder
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function buildFactGroundingRules(bool $isTechnicalProductQuestion, bool $hasShopResults): array
|
||||
{
|
||||
private function buildFactGroundingRules(
|
||||
bool $isTechnicalProductQuestion,
|
||||
bool $hasShopResults,
|
||||
bool $isPriceDrivenQuestion
|
||||
): array {
|
||||
$rules = [
|
||||
"- State only facts that are explicitly present in the provided sources.",
|
||||
"- Extract concrete values exactly when they are present, including units, ranges, model names, indicator names, IP classes, temperatures, pressures, dimensions, counts, relay outputs, current outputs, and error codes.",
|
||||
@@ -437,6 +462,11 @@ final readonly class PromptBuilder
|
||||
"- Only use commercial fields for the main product when the shop item and the technically identified product clearly refer to the same product identity.",
|
||||
"- If the shop match is ambiguous, keep the technical identification and commercial details separate.",
|
||||
]);
|
||||
|
||||
if ($isPriceDrivenQuestion) {
|
||||
$rules[] = "- For price-threshold questions, shop prices are authoritative for the threshold check.";
|
||||
$rules[] = "- Accessory-only shop hits do not prove that no qualifying device exists.";
|
||||
}
|
||||
} else {
|
||||
$rules[] = "- Use retrieved knowledge as authoritative for factual answers.";
|
||||
$rules[] = "- If no shop results are present, do not compensate with external recommendations or external product suggestions.";
|
||||
@@ -523,6 +553,20 @@ final readonly class PromptBuilder
|
||||
return preg_match('/\b[\p{L}]{2,}\s?\d{2,5}\b/u', $prompt) === 1;
|
||||
}
|
||||
|
||||
private function isLikelyPriceDrivenQuestion(string $prompt): bool
|
||||
{
|
||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
||||
|
||||
if (preg_match('/\b(mehr\s+als|über|ueber|größer\s+als|groesser\s+als|unter|bis|ab|mindestens|min)\s+\d+(?:[.,]\d+)?\s*(?:euro|eur|€)\b/u', $normalized) === 1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return str_contains($normalized, 'preis')
|
||||
|| str_contains($normalized, 'preise')
|
||||
|| str_contains($normalized, 'kosten')
|
||||
|| str_contains($normalized, 'kostet');
|
||||
}
|
||||
|
||||
private function asksForAccessoryOrBundle(string $prompt): bool
|
||||
{
|
||||
$normalized = mb_strtolower($prompt, 'UTF-8');
|
||||
|
||||
Reference in New Issue
Block a user