This commit is contained in:
team 1
2026-05-01 19:49:43 +02:00
parent a42f8d656d
commit 1897fdf0eb
9 changed files with 215 additions and 0 deletions

View File

@@ -1653,6 +1653,13 @@ final readonly class AgentRunner
$haystack = $this->normalizeRagEvidenceText(implode("\n\n", array_map('strval', $knowledgeChunks)));
if (
$this->isAggregateRagEvidenceQuery($prompt)
&& !$this->containsAnyRagEvidenceTerm($haystack, $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms())
) {
return 'weak';
}
foreach ($needles as $needleGroup) {
foreach ($needleGroup as $needle) {
if ($this->containsRagEvidenceTerm($haystack, $needle)) {
@@ -1687,6 +1694,37 @@ final readonly class AgentRunner
};
}
private function isAggregateRagEvidenceQuery(string $prompt): bool
{
$normalizedPrompt = $this->normalizeRagEvidenceText($prompt);
if ($normalizedPrompt === '') {
return false;
}
foreach ($this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns() as $pattern) {
if (@preg_match($pattern, $normalizedPrompt) === 1) {
return true;
}
}
return false;
}
/**
* @param string[] $terms
*/
private function containsAnyRagEvidenceTerm(string $haystack, array $terms): bool
{
foreach ($terms as $term) {
if ($this->containsRagEvidenceTerm($haystack, $term)) {
return true;
}
}
return false;
}
/**
* @return array<int, string[]>
*/

View File

@@ -339,6 +339,22 @@ final class AgentRunnerConfig
return $this->getRequiredStringListMap('rag_evidence_guard.synonyms');
}
/**
* @return string[]
*/
public function getRagEvidenceAggregateQueryPatterns(): array
{
return $this->getRequiredStringList('rag_evidence_guard.aggregate_query_patterns');
}
/**
* @return string[]
*/
public function getRagEvidenceAggregateEvidenceTerms(): array
{
return $this->getRequiredStringList('rag_evidence_guard.aggregate_evidence_terms');
}
public function getNoLlmFallbackShopOnlyMessage(): string
{
return $this->getRequiredString('no_llm_fallback.messages.shop_only');

View File

@@ -455,6 +455,12 @@ final readonly class RetriexEffectiveConfigProvider
'generic_internal_error' => $this->agentRunnerConfig->getGenericInternalErrorMessage(),
'debug_internal_error_prefix' => $this->agentRunnerConfig->getDebugInternalErrorPrefix(),
],
'rag_evidence_guard' => [
'stop_terms' => $this->agentRunnerConfig->getRagEvidenceStopTerms(),
'synonyms' => $this->agentRunnerConfig->getRagEvidenceSynonyms(),
'aggregate_query_patterns' => $this->agentRunnerConfig->getRagEvidenceAggregateQueryPatterns(),
'aggregate_evidence_terms' => $this->agentRunnerConfig->getRagEvidenceAggregateEvidenceTerms(),
],
'source_labels' => [
'external_url' => $this->agentRunnerConfig->getExternalUrlSourceLabel(),
'rag_knowledge' => $this->agentRunnerConfig->getRagKnowledgeSourceLabel(),
@@ -1009,6 +1015,13 @@ final readonly class RetriexEffectiveConfigProvider
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
$ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : [];
$this->validateStringList($this->toList($ragEvidence['stop_terms'] ?? []), 'agent.rag_evidence_guard.stop_terms', $errors, $warnings);
$this->validateStringListMap($ragEvidence['synonyms'] ?? [], 'agent.rag_evidence_guard.synonyms', $errors, $warnings);
$this->validateRegexPatternList($ragEvidence['aggregate_query_patterns'] ?? [], 'agent.rag_evidence_guard.aggregate_query_patterns', $errors);
$this->validateStringList($this->toList($ragEvidence['aggregate_evidence_terms'] ?? []), 'agent.rag_evidence_guard.aggregate_evidence_terms', $errors, $warnings);
$this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings);
$this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors);

View File

@@ -155,6 +155,27 @@ final readonly class NdjsonChunkLookup
}
}
if ($best === null) {
foreach ($documents as $document) {
$normalizedTitle = $document['normalized_title'];
if (!$this->isConfidentTitleAlphaTokenMatch($normalizedPrompt, $normalizedTitle)) {
continue;
}
$score = 250 + mb_strlen($normalizedTitle, 'UTF-8');
if (preg_match('/\d/u', $normalizedTitle) === 1) {
$score += 500;
}
if ($best === null || $score > $bestScore) {
$best = $document;
$bestScore = $score;
}
}
}
if ($best === null) {
return null;
}
@@ -248,6 +269,49 @@ final readonly class NdjsonChunkLookup
return true;
}
/**
* Fallback for product titles where the prompt contains the significant
* alphabetic model tokens, but omits a numeric family token.
*
* This keeps prompts such as a product family plus variant suffix anchored
* to the correct document instead of falling back to broader semantic hits.
*/
private function isConfidentTitleAlphaTokenMatch(string $normalizedPrompt, string $normalizedTitle): bool
{
if ($normalizedPrompt === '' || $normalizedTitle === '') {
return false;
}
$titleTokens = $this->significantTitleTokens($normalizedTitle);
$alphaTokens = array_values(array_filter(
$titleTokens,
static fn (string $token): bool => preg_match('/\d/u', $token) !== 1
));
if (count($alphaTokens) < 2 || count($alphaTokens) === count($titleTokens)) {
return false;
}
$promptTokenVariants = $this->tokenVariantLookup($normalizedPrompt);
foreach ($alphaTokens as $titleToken) {
$matched = false;
foreach ($this->tokenVariants($titleToken) as $variant) {
if (isset($promptTokenVariants[$variant])) {
$matched = true;
break;
}
}
if (!$matched) {
return false;
}
}
return true;
}
/**
* @return string[]
*/