fix history anchor
This commit is contained in:
@@ -638,6 +638,39 @@ parameters:
|
||||
- kostet
|
||||
- shopsuche
|
||||
- shop-suche
|
||||
rag_anchor_enrichment:
|
||||
enabled: true
|
||||
# Enriches overly narrow numeric shop queries with a product anchor from
|
||||
# retrieved RAG records when the same record explicitly contains the
|
||||
# requested numeric value/unit. This prevents queries such as "0,02"
|
||||
# when RAG already identified a concrete product such as a device model.
|
||||
min_score: 120
|
||||
max_query_terms: 6
|
||||
early_chunk_bonus_max: 10
|
||||
template: '{anchor} {query} {subject}'
|
||||
scores:
|
||||
exact_value_with_unit: 120
|
||||
exact_value_only: 40
|
||||
anchor_bonus: 50
|
||||
numeric_focus_patterns:
|
||||
- '/(?P<value>\d+(?:[,.]\d+)?)\s*(?P<unit>°?\s*d\s*h|dh|dH)/iu'
|
||||
product_title_patterns:
|
||||
- '/#\s*Produkt\s+Titel:\s*`(?P<title>[^`]+)`/iu'
|
||||
- '/\*\*Produktname:\*\*\s*(?P<title>[^\r\n]+)/iu'
|
||||
anchor_bonus_patterns:
|
||||
- '/\b[\p{L}][\p{L}\p{N}®+._-]*(?:\s+|[-_])\d{2,5}\b/u'
|
||||
subject_terms:
|
||||
- resthärte
|
||||
- resthaerte
|
||||
- wasserhärte
|
||||
- wasserhaerte
|
||||
- gesamthärte
|
||||
- gesamthaerte
|
||||
- härte
|
||||
- haerte
|
||||
- grenzwert
|
||||
- messung
|
||||
|
||||
language_preservation:
|
||||
enabled: true
|
||||
language_markers:
|
||||
|
||||
@@ -129,6 +129,7 @@ parameters:
|
||||
language_rules_label: LANGUAGE RULES
|
||||
fact_grounding_rules_label: FACT GROUNDING RULES
|
||||
measurement_evidence_label: MEASUREMENT PARAMETER EVIDENCE CHECK
|
||||
numeric_value_focus_label: EXACT NUMERIC VALUE FOCUS
|
||||
retrieved_knowledge_label: RETRIEVED KNOWLEDGE (primary for technical matching and factual explanation)
|
||||
url_content_label: CONTENT FROM URL (authoritative if user-provided)
|
||||
conversation_context:
|
||||
@@ -220,14 +221,18 @@ parameters:
|
||||
- '- This block is generated from the current user question and is stricter than broad product-selection wording.'
|
||||
- '- For measurement-parameter questions, technical suitability requires explicit positive evidence for the requested parameter in the same source record.'
|
||||
- '- Similar water-treatment parameters, abbreviations, units, product families, search queries, or ranking positions are not enough.'
|
||||
- '- For exact numeric measurement requests, records that do not contain the requested numeric value/unit must not provide indicators, ranges, variants, accessories, or suitability details for the selected product.'
|
||||
product_specific_rules:
|
||||
- '- Verify every recommended product independently against the requested measurement parameter.'
|
||||
- '- If a retrieved RAG record mentions several products, only use a product for the requested parameter when that product is named in the same sentence, bullet, table row, or clearly bounded product section as the parameter evidence.'
|
||||
- '- Do not transfer measurement suitability from one product, variant, indicator, category, tag, heading, or nearby paragraph to another product.'
|
||||
- '- Generic category, umbrella-topic, device-class, product-family, document-title, tag, or application-area terms are not enough to prove a specific measurement parameter for a specific product.'
|
||||
- '- Indicator names and indicator ranges are product-specific evidence. Use them only when the same source record explicitly connects the exact product, requested value or range, and indicator code.'
|
||||
- '- If a record proves the device/value but the visible excerpt does not connect an indicator code to that device/value, omit the indicator instead of borrowing one from a different product record.'
|
||||
generic_request_patterns:
|
||||
- '/\b(?:mit|für|fuer|zur|zum)\s+(?:dem\s+)?(?:messparameter|parameter|messwert|messgröße|messgroesse)\s+(?<parameter>[^?.!,;\n]{2,80})/iu'
|
||||
- '/\b(?:messparameter|parameter|messwert|messgröße|messgroesse)\s*(?:für|fuer|von|zur|zum|:)\s*(?<parameter>[^?.!,;\n]{2,80})/iu'
|
||||
- '/\b(?:messung|messen|überwachung|ueberwachung|bestimmung|analyse)\s+(?:von|der|des|für|fuer|zur|zum)\s+(?<parameter>[^?.!,;\n]{2,80})/iu'
|
||||
generic_positive_context_terms:
|
||||
- Messung
|
||||
- messen
|
||||
@@ -436,6 +441,18 @@ parameters:
|
||||
- '- If one source chunk contains both the best matching value and nearby comparison values, use the nearby values only as context and do not include them unless the user asks for comparison or alternatives.'
|
||||
- '- For lowest/highest/minimum/maximum questions, answer only the requested extreme value and the product/device explicitly connected to it.'
|
||||
- '- Do not add runner-up products, second-lowest values, adjacent ranges, broader tables, or explanatory comparisons unless explicitly requested.'
|
||||
- '- For a product recommendation tied to an exact numeric value, keep the recommendation anchored to records that contain that exact value/unit. Do not pull indicator codes or ranges from records for other products.'
|
||||
- '- If the user asks for a suitable device/product and not for an indicator, do not add indicator names unless a same-record device-value-indicator mapping is visible.'
|
||||
numeric_value_focus:
|
||||
enabled: true
|
||||
max_values: 3
|
||||
value_patterns:
|
||||
- '/(?<value>\d+(?:[,.]\d+)?)\s*(?<unit>°?\s*dH|dh|°dh|°\s*dH)\b/iu'
|
||||
rules:
|
||||
- '- Exact numeric focus from the current user question: {values}.'
|
||||
- '- Prefer retrieved records that explicitly contain this exact value/unit when selecting or recommending a product.'
|
||||
- '- Records without the exact value/unit may provide background only; they must not provide indicator codes, indicator ranges, variants, or suitability details for the selected product.'
|
||||
- '- If the selected product record proves the exact value but does not visibly connect an indicator code to that product and value, omit the indicator field rather than borrowing an indicator from another record.'
|
||||
fallback_escalation:
|
||||
state_line_template: '- Internal confidence state: {state}.'
|
||||
base_rules:
|
||||
@@ -518,6 +535,7 @@ parameters:
|
||||
- '- For direct follow-up questions about an indicator, value, threshold, or device, answer the resolved mapping first before any table or explanation.'
|
||||
- '- If the sources only support a negative finding, output only that negative finding and do not add speculative alternatives.'
|
||||
- '- For product-selection answers, keep the answer minimal: suitable product if explicitly supported, exact evidence, current shop fields if same product identity is clear. Do not add sections for Vorteile, Einsatzbereiche, Messprinzip, or Hinweise unless directly asked and explicitly sourced.'
|
||||
- '- For product-selection answers tied to a numeric value/range, do not include an indicator field unless the same retrieved record explicitly connects the selected product, numeric value/range, and indicator code.'
|
||||
accessory_rules:
|
||||
- '- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.'
|
||||
- '- The main device must come first. The accessory must not replace the main device.'
|
||||
@@ -606,6 +624,7 @@ parameters:
|
||||
- '- For lowest/highest/minimum/maximum questions, answer only the requested extreme unless the user explicitly asks for a comparison or alternatives.'
|
||||
- '- For direct numeric lookup questions such as which device measures a given threshold, answer with the exact matching device/value pair first and avoid advisory
|
||||
caveats.'
|
||||
- '- For product recommendations based on an exact numeric value, use only same-record evidence for the recommended product. Do not import indicator names, ranges, or variants from higher-ranked records that describe different products.'
|
||||
- '- Do not add the runner-up product, second-lowest value, or adjacent range unless the user asks for it.'
|
||||
- '- Do not add calibration, accuracy, pretreatment, temperature, or application notes unless those exact notes are requested and explicitly present in the
|
||||
retrieved source.'
|
||||
@@ -615,6 +634,7 @@ parameters:
|
||||
- '- Do not output the full indicator table, measurement principle, application areas, or advisory notes unless the user explicitly asks for all indicators, details, a table, or device information.'
|
||||
- '- For numeric extreme questions, do not combine a value, device name, indicator name, range or product variant from different chunks unless the same retrieved
|
||||
entry explicitly connects them.'
|
||||
- '- For exact-value product recommendations, if the retrieved record only supports product plus value/range, answer product plus value/range only; indicator details require an explicit same-record product-value-indicator mapping.'
|
||||
- '- If several devices or indicators are present, keep each device-indicator-range assignment separate and do not transfer an indicator from one product to
|
||||
another.'
|
||||
- '- For Testomat CAL or Testomat 2000 CAL threshold/range questions, use only source entries that explicitly name CAL or Testomat 2000 CAL in the same product record. Do not answer with Testomat 808 indicator ranges or the generic 0,02 °dH to 5 °dH range unless a CAL source record explicitly contains that exact assignment.'
|
||||
|
||||
@@ -291,6 +291,26 @@ final readonly class AgentRunner
|
||||
$optimizedShopQuery = '';
|
||||
}
|
||||
|
||||
$ragAnchoredShopSearchQuery = $this->enrichShopSearchQueryWithRagAnchor(
|
||||
prompt: $originalPrompt,
|
||||
shopSearchQuery: $shopSearchQuery,
|
||||
knowledgeChunks: $knowledgeChunks
|
||||
);
|
||||
|
||||
if ($ragAnchoredShopSearchQuery !== $shopSearchQuery) {
|
||||
$this->agentLogger->info('Enriched shop search query with RAG product anchor', [
|
||||
'userId' => $userId,
|
||||
'prompt' => $prompt,
|
||||
'routingPrompt' => $routingPrompt,
|
||||
'optimizedShopQuery' => $optimizedShopQuery,
|
||||
'shopSearchQuery' => $shopSearchQuery,
|
||||
'ragAnchoredShopSearchQuery' => $ragAnchoredShopSearchQuery,
|
||||
]);
|
||||
|
||||
$shopSearchQuery = $ragAnchoredShopSearchQuery;
|
||||
$optimizedShopQuery = '';
|
||||
}
|
||||
|
||||
if ($shopSearchQuery === '') {
|
||||
$this->agentLogger->info('Commerce search skipped because no concrete shop query could be resolved', [
|
||||
'userId' => $userId,
|
||||
@@ -1042,7 +1062,7 @@ final readonly class AgentRunner
|
||||
*/
|
||||
private function tokenizeInputNormalizationGuardText(string $value): array
|
||||
{
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', mb_strtolower($value, 'UTF-8'), $matches) !== 1) {
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', mb_strtolower($value, 'UTF-8'), $matches) === false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -1057,7 +1077,7 @@ final readonly class AgentRunner
|
||||
*/
|
||||
private function extractInputNormalizationNumbers(string $value): array
|
||||
{
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?/u', $value, $matches) !== 1) {
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?/u', $value, $matches) === false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -1193,7 +1213,7 @@ final readonly class AgentRunner
|
||||
return [];
|
||||
}
|
||||
|
||||
if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) !== 1) {
|
||||
if (preg_match_all($this->agentRunnerConfig->getFollowUpHistoryQuestionPattern(), $history, $matches) === false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -1685,6 +1705,302 @@ final readonly class AgentRunner
|
||||
return trim($shopSearchQuery . ' ' . implode(' ', array_values($appendTokens)));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $knowledgeChunks
|
||||
*/
|
||||
private function enrichShopSearchQueryWithRagAnchor(
|
||||
string $prompt,
|
||||
string $shopSearchQuery,
|
||||
array $knowledgeChunks
|
||||
): string {
|
||||
$shopSearchQuery = trim($shopSearchQuery);
|
||||
|
||||
if (
|
||||
$shopSearchQuery === ''
|
||||
|| $knowledgeChunks === []
|
||||
|| !$this->agentRunnerConfig->isShopQueryRagAnchorEnrichmentEnabled()
|
||||
) {
|
||||
return $shopSearchQuery;
|
||||
}
|
||||
|
||||
$focuses = $this->extractShopQueryNumericFocuses($prompt);
|
||||
if ($focuses === []) {
|
||||
return $shopSearchQuery;
|
||||
}
|
||||
|
||||
$anchor = $this->resolveBestRagShopQueryAnchor($knowledgeChunks, $focuses);
|
||||
if ($anchor === '') {
|
||||
return $shopSearchQuery;
|
||||
}
|
||||
|
||||
$queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($shopSearchQuery), true);
|
||||
$anchorTokens = $this->tokenizeShopQueryCandidate($anchor);
|
||||
|
||||
if ($anchorTokens === []) {
|
||||
return $shopSearchQuery;
|
||||
}
|
||||
|
||||
$missingAnchorToken = false;
|
||||
foreach ($anchorTokens as $anchorToken) {
|
||||
if (!isset($queryTokens[$anchorToken])) {
|
||||
$missingAnchorToken = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$missingAnchorToken) {
|
||||
return $shopSearchQuery;
|
||||
}
|
||||
|
||||
$subject = $this->extractRagAnchorSubjectTerms($prompt, $shopSearchQuery);
|
||||
$rendered = strtr($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentTemplate(), [
|
||||
'{anchor}' => $anchor,
|
||||
'{query}' => $shopSearchQuery,
|
||||
'{subject}' => $subject,
|
||||
]);
|
||||
|
||||
$enrichedQuery = $this->limitShopQueryTerms(
|
||||
$rendered,
|
||||
$this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMaxQueryTerms()
|
||||
);
|
||||
|
||||
return $enrichedQuery !== '' ? $enrichedQuery : $shopSearchQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, array{value:string, unit:string}>
|
||||
*/
|
||||
private function extractShopQueryNumericFocuses(string $prompt): array
|
||||
{
|
||||
$focuses = [];
|
||||
$seen = [];
|
||||
|
||||
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentNumericFocusPatterns() as $pattern) {
|
||||
if (@preg_match_all($pattern, $prompt, $matches, PREG_SET_ORDER) === false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($matches as $match) {
|
||||
$rawValue = $match['value'] ?? ($match[1] ?? '');
|
||||
$rawUnit = $match['unit'] ?? ($match[2] ?? '');
|
||||
|
||||
if (!is_scalar($rawValue) || !is_scalar($rawUnit)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$value = $this->normalizeShopQueryNumericFocusValue((string) $rawValue);
|
||||
$unit = $this->normalizeShopQueryNumericFocusUnit((string) $rawUnit);
|
||||
|
||||
if ($value === '' || $unit === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$key = $value . '|' . $unit;
|
||||
if (isset($seen[$key])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$seen[$key] = true;
|
||||
$focuses[] = [
|
||||
'value' => $value,
|
||||
'unit' => $unit,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $focuses;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $knowledgeChunks
|
||||
* @param array<int, array{value:string, unit:string}> $focuses
|
||||
*/
|
||||
private function resolveBestRagShopQueryAnchor(array $knowledgeChunks, array $focuses): string
|
||||
{
|
||||
$bestAnchor = '';
|
||||
$bestScore = 0;
|
||||
$minScore = $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentMinScore();
|
||||
$earlyBonusMax = max(0, $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentEarlyChunkBonusMax());
|
||||
|
||||
foreach (array_values($knowledgeChunks) as $index => $chunk) {
|
||||
$chunk = (string) $chunk;
|
||||
$anchor = $this->extractRagProductTitleAnchor($chunk);
|
||||
|
||||
if ($anchor === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$score = $this->scoreRagChunkForShopQueryNumericFocus($chunk, $focuses);
|
||||
if ($score <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($this->ragAnchorMatchesAnyBonusPattern($anchor)) {
|
||||
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusScore();
|
||||
}
|
||||
|
||||
if ($earlyBonusMax > 0) {
|
||||
$score += max(0, $earlyBonusMax - min($earlyBonusMax, $index));
|
||||
}
|
||||
|
||||
if ($score < $minScore || $score <= $bestScore) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$bestScore = $score;
|
||||
$bestAnchor = $anchor;
|
||||
}
|
||||
|
||||
return $bestAnchor;
|
||||
}
|
||||
|
||||
private function extractRagProductTitleAnchor(string $chunk): string
|
||||
{
|
||||
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentProductTitlePatterns() as $pattern) {
|
||||
if (@preg_match($pattern, $chunk, $matches) !== 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$title = $matches['title'] ?? ($matches[1] ?? '');
|
||||
if (!is_scalar($title)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$title = trim(preg_replace('/\s+/u', ' ', str_replace('®', '', (string) $title)) ?? '');
|
||||
if ($title !== '') {
|
||||
return $title;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array{value:string, unit:string}> $focuses
|
||||
*/
|
||||
private function scoreRagChunkForShopQueryNumericFocus(string $chunk, array $focuses): int
|
||||
{
|
||||
$normalizedChunk = $this->normalizeShopQueryNumericFocusSearchText($chunk);
|
||||
if ($normalizedChunk === '') {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$score = 0;
|
||||
foreach ($focuses as $focus) {
|
||||
$hasValue = $focus['value'] !== '' && str_contains($normalizedChunk, $focus['value']);
|
||||
$hasUnit = $focus['unit'] === '' || str_contains($normalizedChunk, $focus['unit']);
|
||||
|
||||
if ($hasValue && $hasUnit) {
|
||||
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueUnitScore();
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($hasValue) {
|
||||
$score += $this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentExactValueScore();
|
||||
}
|
||||
}
|
||||
|
||||
return $score;
|
||||
}
|
||||
|
||||
private function ragAnchorMatchesAnyBonusPattern(string $anchor): bool
|
||||
{
|
||||
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentAnchorBonusPatterns() as $pattern) {
|
||||
if (@preg_match($pattern, $anchor) === 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private function extractRagAnchorSubjectTerms(string $prompt, string $shopSearchQuery): string
|
||||
{
|
||||
$promptTokens = array_fill_keys($this->tokenizeShopQueryCandidate($prompt), true);
|
||||
$queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($shopSearchQuery), true);
|
||||
$subjectTerms = [];
|
||||
|
||||
foreach ($this->agentRunnerConfig->getShopQueryRagAnchorEnrichmentSubjectTerms() as $term) {
|
||||
$termTokens = $this->tokenizeShopQueryCandidate($term);
|
||||
if ($termTokens === []) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$allPresent = true;
|
||||
foreach ($termTokens as $termToken) {
|
||||
if (!isset($promptTokens[$termToken])) {
|
||||
$allPresent = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$allPresent) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$alreadyInQuery = true;
|
||||
foreach ($termTokens as $termToken) {
|
||||
if (!isset($queryTokens[$termToken])) {
|
||||
$alreadyInQuery = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$alreadyInQuery) {
|
||||
$subjectTerms[] = $term;
|
||||
}
|
||||
}
|
||||
|
||||
return implode(' ', array_values(array_unique($subjectTerms)));
|
||||
}
|
||||
|
||||
private function limitShopQueryTerms(string $query, int $maxTerms): string
|
||||
{
|
||||
$maxTerms = max(1, $maxTerms);
|
||||
$tokens = [];
|
||||
|
||||
foreach ($this->tokenizeShopQueryCandidate($query) as $token) {
|
||||
if (isset($tokens[$token])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tokens[$token] = $token;
|
||||
|
||||
if (count($tokens) >= $maxTerms) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return implode(' ', array_values($tokens));
|
||||
}
|
||||
|
||||
private function normalizeShopQueryNumericFocusValue(string $value): string
|
||||
{
|
||||
$value = $this->normalizeShopQueryNumericFocusSearchText($value);
|
||||
$value = preg_replace('/[^0-9,]+/u', '', $value) ?? $value;
|
||||
|
||||
return trim($value, ',');
|
||||
}
|
||||
|
||||
private function normalizeShopQueryNumericFocusUnit(string $unit): string
|
||||
{
|
||||
$unit = $this->normalizeShopQueryNumericFocusSearchText($unit);
|
||||
$unit = preg_replace('/[^\p{L}]+/u', '', $unit) ?? $unit;
|
||||
|
||||
return $unit;
|
||||
}
|
||||
|
||||
private function normalizeShopQueryNumericFocusSearchText(string $value): string
|
||||
{
|
||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||
$value = $this->languageCleanupConfig->normalizeDashEquivalents($value);
|
||||
$value = str_replace('.', ',', $value);
|
||||
$value = preg_replace('/\s+/u', '', $value) ?? $value;
|
||||
$value = preg_replace('/[^\p{L}\p{N},]+/u', '', $value) ?? $value;
|
||||
|
||||
return trim($value);
|
||||
}
|
||||
|
||||
private function standaloneOptimizedShopQueryIntroducesUnsupportedContext(
|
||||
string $prompt,
|
||||
string $optimizedShopQuery
|
||||
@@ -2021,7 +2337,7 @@ final readonly class AgentRunner
|
||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||
$value = $this->languageCleanupConfig->replaceWordSeparatorsWithSpace($value);
|
||||
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', $value, $matches) !== 1) {
|
||||
if (preg_match_all('/\d+(?:[,.]\d+)?|[\p{L}\p{N}]+/u', $value, $matches) === false) {
|
||||
return [];
|
||||
}
|
||||
|
||||
|
||||
@@ -50,6 +50,7 @@ final readonly class PromptBuilder
|
||||
$prompt = $this->normalizeBlockText($prompt);
|
||||
$urlContent = $this->normalizeBlockText($urlContent);
|
||||
$swagFullOutPut = $this->normalizeNullableBlockText($swagFullOutPut);
|
||||
$knowledgeChunks = $this->prioritizeKnowledgeChunksForNumericValueFocus($prompt, $knowledgeChunks);
|
||||
|
||||
$hasShopResults = $shopResults !== [];
|
||||
$hasKnowledge = $knowledgeChunks !== [] || $urlContent !== '';
|
||||
@@ -73,6 +74,7 @@ final readonly class PromptBuilder
|
||||
shopResults: $shopResults,
|
||||
requestedRole: $requestedProductRole
|
||||
);
|
||||
$numericValueFocusBlock = $this->buildNumericValueFocusBlock($prompt);
|
||||
$outputPriorityBlock = $this->buildOutputPriorityBlock(
|
||||
hasShopResults: $hasShopResults,
|
||||
isTechnicalProductQuestion: $isTechnicalProductQuestion
|
||||
@@ -101,6 +103,7 @@ final readonly class PromptBuilder
|
||||
$systemBlock,
|
||||
$shopBlock,
|
||||
$measurementEvidenceBlock,
|
||||
$numericValueFocusBlock,
|
||||
$outputPriorityBlock,
|
||||
$fallbackEscalationBlock,
|
||||
$responseFormatBlock,
|
||||
@@ -118,6 +121,7 @@ final readonly class PromptBuilder
|
||||
$systemBlock,
|
||||
$shopBlock,
|
||||
$measurementEvidenceBlock,
|
||||
$numericValueFocusBlock,
|
||||
$outputPriorityBlock,
|
||||
$fallbackEscalationBlock,
|
||||
$responseFormatBlock,
|
||||
@@ -250,6 +254,174 @@ final readonly class PromptBuilder
|
||||
return $this->implodeBlocks($parts);
|
||||
}
|
||||
|
||||
private function buildNumericValueFocusBlock(string $prompt): string
|
||||
{
|
||||
if (!$this->config->isNumericValueFocusEnabled()) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$focuses = $this->extractNumericValueFocuses($prompt);
|
||||
if ($focuses === []) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$labels = array_map(static fn(array $focus): string => $focus['label'], $focuses);
|
||||
$values = implode(', ', array_values(array_unique($labels)));
|
||||
$rules = [];
|
||||
|
||||
foreach ($this->config->getNumericValueFocusRules() as $rule) {
|
||||
$rules[] = $this->renderPromptTemplate($rule, ['values' => $values]);
|
||||
}
|
||||
|
||||
return $this->buildRuleBlock(
|
||||
$this->config->getNumericValueFocusSectionLabel(),
|
||||
$rules
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Keep exact numeric value matches close to the front of the prompt.
|
||||
*
|
||||
* @param string[] $knowledgeChunks
|
||||
* @return string[]
|
||||
*/
|
||||
private function prioritizeKnowledgeChunksForNumericValueFocus(string $prompt, array $knowledgeChunks): array
|
||||
{
|
||||
if (!$this->config->isNumericValueFocusEnabled() || count($knowledgeChunks) < 2) {
|
||||
return $knowledgeChunks;
|
||||
}
|
||||
|
||||
$focuses = $this->extractNumericValueFocuses($prompt);
|
||||
if ($focuses === []) {
|
||||
return $knowledgeChunks;
|
||||
}
|
||||
|
||||
$decorated = [];
|
||||
foreach (array_values($knowledgeChunks) as $index => $chunk) {
|
||||
$chunk = (string) $chunk;
|
||||
$decorated[] = [
|
||||
'index' => $index,
|
||||
'score' => $this->scoreKnowledgeChunkForNumericValueFocus($chunk, $focuses),
|
||||
'chunk' => $chunk,
|
||||
];
|
||||
}
|
||||
|
||||
usort($decorated, static function (array $a, array $b): int {
|
||||
$scoreCompare = $b['score'] <=> $a['score'];
|
||||
if ($scoreCompare !== 0) {
|
||||
return $scoreCompare;
|
||||
}
|
||||
|
||||
return $a['index'] <=> $b['index'];
|
||||
});
|
||||
|
||||
return array_map(static fn(array $item): string => $item['chunk'], $decorated);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array{value: string, unit: string, label: string}> $focuses
|
||||
*/
|
||||
private function scoreKnowledgeChunkForNumericValueFocus(string $chunk, array $focuses): int
|
||||
{
|
||||
$normalizedChunk = $this->normalizeNumericFocusSearchText($chunk);
|
||||
if ($normalizedChunk === '') {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$score = 0;
|
||||
foreach ($focuses as $focus) {
|
||||
$hasValue = $focus['value'] !== '' && str_contains($normalizedChunk, $focus['value']);
|
||||
$hasUnit = $focus['unit'] === '' || str_contains($normalizedChunk, $focus['unit']);
|
||||
|
||||
if ($hasValue && $hasUnit) {
|
||||
$score += 100;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($hasValue) {
|
||||
$score += 20;
|
||||
}
|
||||
}
|
||||
|
||||
return $score;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, array{value: string, unit: string, label: string}>
|
||||
*/
|
||||
private function extractNumericValueFocuses(string $prompt): array
|
||||
{
|
||||
$focuses = [];
|
||||
$seen = [];
|
||||
$maxValues = max(1, $this->config->getNumericValueFocusMaxValues());
|
||||
|
||||
foreach ($this->config->getNumericValueFocusPatterns() as $pattern) {
|
||||
if (@preg_match_all($pattern, $prompt, $matches, PREG_SET_ORDER) === false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($matches as $match) {
|
||||
$rawValue = $match['value'] ?? ($match[1] ?? '');
|
||||
$rawUnit = $match['unit'] ?? ($match[2] ?? '');
|
||||
|
||||
if (!is_scalar($rawValue) || !is_scalar($rawUnit)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$value = $this->normalizeNumericFocusValue((string) $rawValue);
|
||||
$unit = $this->normalizeNumericFocusUnit((string) $rawUnit);
|
||||
if ($value === '' || $unit === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$key = $value . '|' . $unit;
|
||||
if (isset($seen[$key])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$seen[$key] = true;
|
||||
$focuses[] = [
|
||||
'value' => $value,
|
||||
'unit' => $unit,
|
||||
'label' => $this->normalizeBlockText(trim((string) $rawValue . ' ' . (string) $rawUnit)),
|
||||
];
|
||||
|
||||
if (count($focuses) >= $maxValues) {
|
||||
break 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $focuses;
|
||||
}
|
||||
|
||||
private function normalizeNumericFocusValue(string $value): string
|
||||
{
|
||||
$value = $this->normalizeNumericFocusSearchText($value);
|
||||
$value = preg_replace('/[^0-9,]+/u', '', $value) ?? $value;
|
||||
|
||||
return trim($value, ',');
|
||||
}
|
||||
|
||||
private function normalizeNumericFocusUnit(string $unit): string
|
||||
{
|
||||
$unit = $this->normalizeNumericFocusSearchText($unit);
|
||||
$unit = preg_replace('/[^\p{L}]+/u', '', $unit) ?? $unit;
|
||||
|
||||
return $unit;
|
||||
}
|
||||
|
||||
private function normalizeNumericFocusSearchText(string $value): string
|
||||
{
|
||||
$value = mb_strtolower($this->normalizeBlockText($value), 'UTF-8');
|
||||
$value = $this->languageCleanupConfig->normalizeDashEquivalents($value);
|
||||
$value = str_replace('.', ',', $value);
|
||||
$value = preg_replace('/\s+/u', '', $value) ?? $value;
|
||||
$value = preg_replace('/[^\p{L}\p{N},]+/u', '', $value) ?? $value;
|
||||
|
||||
return trim($value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a small priority block that tells the model what to surface first.
|
||||
*/
|
||||
@@ -810,7 +982,7 @@ final readonly class PromptBuilder
|
||||
$withoutParentheses = preg_replace('/\([^)]*\)/u', ' ', $rawParameter) ?? $rawParameter;
|
||||
$this->appendGenericMeasurementParameterParts($terms, $withoutParentheses);
|
||||
|
||||
if (preg_match_all('/\(([^)]{1,40})\)/u', $rawParameter, $matches) === 1) {
|
||||
if (preg_match_all('/\(([^)]{1,40})\)/u', $rawParameter, $matches) !== false) {
|
||||
foreach ($matches[1] as $parenthetical) {
|
||||
$this->appendGenericMeasurementParameterParts($terms, (string) $parenthetical);
|
||||
}
|
||||
|
||||
@@ -1074,6 +1074,78 @@ final class AgentRunnerConfig
|
||||
{
|
||||
return $this->getRequiredString('shop_prompt.context_anchor_enrichment.template');
|
||||
}
|
||||
public function isShopQueryRagAnchorEnrichmentEnabled(): bool
|
||||
{
|
||||
return $this->getRequiredBool('shop_prompt.rag_anchor_enrichment.enabled');
|
||||
}
|
||||
|
||||
public function getShopQueryRagAnchorEnrichmentMinScore(): int
|
||||
{
|
||||
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.min_score');
|
||||
}
|
||||
|
||||
public function getShopQueryRagAnchorEnrichmentMaxQueryTerms(): int
|
||||
{
|
||||
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.max_query_terms');
|
||||
}
|
||||
|
||||
public function getShopQueryRagAnchorEnrichmentEarlyChunkBonusMax(): int
|
||||
{
|
||||
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.early_chunk_bonus_max');
|
||||
}
|
||||
|
||||
public function getShopQueryRagAnchorEnrichmentExactValueUnitScore(): int
|
||||
{
|
||||
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.scores.exact_value_with_unit');
|
||||
}
|
||||
|
||||
public function getShopQueryRagAnchorEnrichmentExactValueScore(): int
|
||||
{
|
||||
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.scores.exact_value_only');
|
||||
}
|
||||
|
||||
public function getShopQueryRagAnchorEnrichmentAnchorBonusScore(): int
|
||||
{
|
||||
return $this->getRequiredInt('shop_prompt.rag_anchor_enrichment.scores.anchor_bonus');
|
||||
}
|
||||
|
||||
public function getShopQueryRagAnchorEnrichmentTemplate(): string
|
||||
{
|
||||
return $this->getRequiredString('shop_prompt.rag_anchor_enrichment.template');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getShopQueryRagAnchorEnrichmentNumericFocusPatterns(): array
|
||||
{
|
||||
return $this->getRequiredStringList('shop_prompt.rag_anchor_enrichment.numeric_focus_patterns');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getShopQueryRagAnchorEnrichmentProductTitlePatterns(): array
|
||||
{
|
||||
return $this->getRequiredStringList('shop_prompt.rag_anchor_enrichment.product_title_patterns');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getShopQueryRagAnchorEnrichmentAnchorBonusPatterns(): array
|
||||
{
|
||||
return $this->getRequiredStringList('shop_prompt.rag_anchor_enrichment.anchor_bonus_patterns');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getShopQueryRagAnchorEnrichmentSubjectTerms(): array
|
||||
{
|
||||
return $this->getRequiredStringList('shop_prompt.rag_anchor_enrichment.subject_terms');
|
||||
}
|
||||
|
||||
public function getShopQueryTranslationReplacements(string $language): array
|
||||
{
|
||||
$value = $this->getOptionalStringMap('shop_prompt.language_preservation.translation_replacements.' . $language);
|
||||
|
||||
@@ -98,6 +98,17 @@ final class PromptBuilderConfig
|
||||
return (float) $value;
|
||||
}
|
||||
|
||||
private function getRequiredBool(string $path): bool
|
||||
{
|
||||
$value = $this->getRequiredValue($path);
|
||||
|
||||
if (!is_bool($value)) {
|
||||
throw new \InvalidArgumentException(sprintf('RetrieX prompt config value "%s" must be boolean.', $path));
|
||||
}
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
|
||||
private function getRequiredString(string $path): string
|
||||
{
|
||||
@@ -578,6 +589,37 @@ final class PromptBuilderConfig
|
||||
return $this->getRequiredString('sections.measurement_evidence_label');
|
||||
}
|
||||
|
||||
public function isNumericValueFocusEnabled(): bool
|
||||
{
|
||||
return $this->getRequiredBool('numeric_value_focus.enabled');
|
||||
}
|
||||
|
||||
public function getNumericValueFocusSectionLabel(): string
|
||||
{
|
||||
return $this->getRequiredString('sections.numeric_value_focus_label');
|
||||
}
|
||||
|
||||
public function getNumericValueFocusMaxValues(): int
|
||||
{
|
||||
return $this->getRequiredInt('numeric_value_focus.max_values');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getNumericValueFocusPatterns(): array
|
||||
{
|
||||
return $this->getRequiredStringList('numeric_value_focus.value_patterns');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getNumericValueFocusRules(): array
|
||||
{
|
||||
return $this->getRequiredStringList('numeric_value_focus.rules');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user