fix retrieve final
This commit is contained in:
@@ -291,8 +291,9 @@ final readonly class AgentRunner
|
|||||||
|
|
||||||
$history = $this->contextService->buildUserContextWithinBudget($userId, 3000);
|
$history = $this->contextService->buildUserContextWithinBudget($userId, 3000);
|
||||||
$previousQuestions = $this->extractRecentUserQuestions($history, 2);
|
$previousQuestions = $this->extractRecentUserQuestions($history, 2);
|
||||||
|
$referenceAnchors = $this->extractLatestAssistantReferenceAnchors($history);
|
||||||
|
|
||||||
if ($previousQuestions === []) {
|
if ($previousQuestions === [] && $referenceAnchors === []) {
|
||||||
return $prompt;
|
return $prompt;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -302,6 +303,11 @@ final readonly class AgentRunner
|
|||||||
$lines[] = 'Vorherige Nutzerfrage: ' . $question;
|
$lines[] = 'Vorherige Nutzerfrage: ' . $question;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($referenceAnchors !== []) {
|
||||||
|
$lines[] = 'Vorherige technische Referenzanker (nur zur Referenzauflösung, keine Faktenquelle): '
|
||||||
|
. implode(' ', $referenceAnchors);
|
||||||
|
}
|
||||||
|
|
||||||
$lines[] = 'Aktuelle Folgefrage: ' . $prompt;
|
$lines[] = 'Aktuelle Folgefrage: ' . $prompt;
|
||||||
|
|
||||||
return implode("\n", $lines);
|
return implode("\n", $lines);
|
||||||
@@ -408,6 +414,101 @@ final readonly class AgentRunner
|
|||||||
return array_slice($questions, -$limit);
|
return array_slice($questions, -$limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts stable reference anchors from the latest assistant answer.
|
||||||
|
*
|
||||||
|
* These anchors are only used to resolve follow-up references such as
|
||||||
|
* "der Wert" or "welcher Indikator". They are not factual evidence for
|
||||||
|
* the final answer. To avoid propagating wrong earlier answers, only the
|
||||||
|
* first explicit Testomat model reference and the first explicit °dH value
|
||||||
|
* are kept. Indicator names, reagent codes, prices, URLs and product
|
||||||
|
* numbers are intentionally ignored here.
|
||||||
|
*
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
private function extractLatestAssistantReferenceAnchors(string $history): array
|
||||||
|
{
|
||||||
|
$turn = $this->extractLatestHistoryTurn($history);
|
||||||
|
|
||||||
|
if ($turn === '') {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$answer = preg_replace('/^Question:\s*.*(?:\R|$)/u', '', $turn, 1) ?? '';
|
||||||
|
$answer = trim($answer);
|
||||||
|
|
||||||
|
if ($answer === '') {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$anchors = [];
|
||||||
|
|
||||||
|
$model = $this->extractFirstTestomatModelAnchor($answer);
|
||||||
|
if ($model !== '') {
|
||||||
|
$anchors[] = $model;
|
||||||
|
}
|
||||||
|
|
||||||
|
$hardnessValue = $this->extractFirstHardnessValueAnchor($answer);
|
||||||
|
if ($hardnessValue !== '') {
|
||||||
|
$anchors[] = $hardnessValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return array_values(array_unique($anchors));
|
||||||
|
}
|
||||||
|
|
||||||
|
private function extractLatestHistoryTurn(string $history): string
|
||||||
|
{
|
||||||
|
$history = trim($history);
|
||||||
|
|
||||||
|
if ($history === '') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$parts = preg_split('/(?=^Question:\s)/m', $history);
|
||||||
|
|
||||||
|
if ($parts === false || $parts === []) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$turns = array_values(array_filter(
|
||||||
|
array_map(static fn(string $part): string => trim($part), $parts),
|
||||||
|
static fn(string $part): bool => $part !== ''
|
||||||
|
));
|
||||||
|
|
||||||
|
if ($turns === []) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
return (string) end($turns);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function extractFirstTestomatModelAnchor(string $text): string
|
||||||
|
{
|
||||||
|
$pattern = '/\bTestomat(?:®)?\s+'
|
||||||
|
. '(?:\d{3,4}|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)'
|
||||||
|
. '\b/iu';
|
||||||
|
|
||||||
|
if (preg_match($pattern, $text, $matches) !== 1) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$value = $this->sanitizeHistoryQuestion((string) ($matches[0] ?? ''));
|
||||||
|
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
||||||
|
|
||||||
|
return trim(str_replace('®', '', $value));
|
||||||
|
}
|
||||||
|
|
||||||
|
private function extractFirstHardnessValueAnchor(string $text): string
|
||||||
|
{
|
||||||
|
if (preg_match('/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu', $text, $matches) !== 1) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$value = preg_replace('/\s+/u', ' ', (string) ($matches[0] ?? '')) ?? '';
|
||||||
|
|
||||||
|
return trim($value);
|
||||||
|
}
|
||||||
|
|
||||||
private function sanitizeHistoryQuestion(string $question): string
|
private function sanitizeHistoryQuestion(string $question): string
|
||||||
{
|
{
|
||||||
$question = trim((string) preg_replace('/\s+/u', ' ', $question));
|
$question = trim((string) preg_replace('/\s+/u', ' ', $question));
|
||||||
|
|||||||
@@ -310,6 +310,9 @@ final class PromptBuilderConfig
|
|||||||
'- If the source lists application areas, repeat only those areas and do not broaden them.',
|
'- If the source lists application areas, repeat only those areas and do not broaden them.',
|
||||||
'- If the source names an indicator and threshold, reproduce that exactly without extrapolation.',
|
'- If the source names an indicator and threshold, reproduce that exactly without extrapolation.',
|
||||||
'- For lowest, highest, smallest, largest, minimum, maximum, Grenzwert, Messbereich or Aufloesung questions, first identify the exact numeric extreme from the retrieved knowledge and answer that value directly.',
|
'- For lowest, highest, smallest, largest, minimum, maximum, Grenzwert, Messbereich or Aufloesung questions, first identify the exact numeric extreme from the retrieved knowledge and answer that value directly.',
|
||||||
|
'- For lowest/highest/minimum/maximum questions, answer only the requested extreme unless the user explicitly asks for a comparison or alternatives.',
|
||||||
|
'- Do not add the runner-up product, second-lowest value, or adjacent range unless the user asks for it.',
|
||||||
|
'- For follow-up questions such as "which indicator measures that value", first resolve the referenced value/device, then use the retrieved source entry that explicitly connects value, device and indicator.',
|
||||||
'- For numeric extreme questions, do not combine a value, device name, indicator name, range or product variant from different chunks unless the same retrieved entry explicitly connects them.',
|
'- For numeric extreme questions, do not combine a value, device name, indicator name, range or product variant from different chunks unless the same retrieved entry explicitly connects them.',
|
||||||
'- If several devices or indicators are present, keep each device-indicator-range assignment separate and do not transfer an indicator from one product to another.',
|
'- If several devices or indicators are present, keep each device-indicator-range assignment separate and do not transfer an indicator from one product to another.',
|
||||||
'- If the source states only a threshold function, do not expand it into broader control logic.',
|
'- If the source states only a threshold function, do not expand it into broader control logic.',
|
||||||
|
|||||||
@@ -655,12 +655,15 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
$queryTokens = $this->buildExactDocumentSelectionTokens($prompt);
|
$queryTokens = $this->expandExactSelectionTokenVariants(
|
||||||
|
$this->buildExactDocumentSelectionTokens($prompt)
|
||||||
|
);
|
||||||
|
|
||||||
if ($queryTokens === []) {
|
if ($queryTokens === []) {
|
||||||
return $this->firstChunkIdsFromRows($orderedRows, $max);
|
return $this->firstChunkIdsFromRows($orderedRows, $max);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$detailFocus = $this->buildExactDocumentDetailFocus($prompt);
|
||||||
$scored = [];
|
$scored = [];
|
||||||
|
|
||||||
foreach ($orderedRows as $order => $row) {
|
foreach ($orderedRows as $order => $row) {
|
||||||
@@ -672,7 +675,10 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
}
|
}
|
||||||
|
|
||||||
$haystack = $this->normalizeText($this->extractDocumentTitle($row) . ' ' . $text);
|
$haystack = $this->normalizeText($this->extractDocumentTitle($row) . ' ' . $text);
|
||||||
$haystackTokens = array_fill_keys($this->tokenizeText($haystack), true);
|
$haystackTokens = array_fill_keys(
|
||||||
|
$this->expandExactSelectionTokenVariants($this->tokenizeText($haystack)),
|
||||||
|
true
|
||||||
|
);
|
||||||
$score = 0.0;
|
$score = 0.0;
|
||||||
|
|
||||||
foreach ($queryTokens as $token) {
|
foreach ($queryTokens as $token) {
|
||||||
@@ -693,6 +699,8 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
$score += 2.0;
|
$score += 2.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$score += $this->scoreExactDocumentDetailFocus($detailFocus, $haystack, $text);
|
||||||
|
|
||||||
// Keep early chunks slightly competitive for overview facts,
|
// Keep early chunks slightly competitive for overview facts,
|
||||||
// without letting them hide strongly matching detail chunks.
|
// without letting them hide strongly matching detail chunks.
|
||||||
$score += max(0.0, 1.0 - ($order * 0.05));
|
$score += max(0.0, 1.0 - ($order * 0.05));
|
||||||
@@ -815,10 +823,143 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
return array_values(array_unique($out));
|
return array_values(array_unique($out));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $tokens
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
private function expandExactSelectionTokenVariants(array $tokens): array
|
||||||
|
{
|
||||||
|
$out = [];
|
||||||
|
|
||||||
|
foreach ($tokens as $token) {
|
||||||
|
foreach ($this->exactSelectionTokenVariants($token) as $variant) {
|
||||||
|
$out[] = $variant;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return array_values(array_unique(array_filter(
|
||||||
|
$out,
|
||||||
|
static fn(string $token): bool => $token !== ''
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
private function exactSelectionTokenVariants(string $token): array
|
||||||
|
{
|
||||||
|
$token = trim($token);
|
||||||
|
|
||||||
|
if ($token === '') {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$variants = [$token];
|
||||||
|
$length = mb_strlen($token, 'UTF-8');
|
||||||
|
|
||||||
|
if ($length >= 5) {
|
||||||
|
foreach (['typen', 'innen', 'enen', 'ern', 'en', 'er', 'es', 'e', 's', 'n'] as $suffix) {
|
||||||
|
if (!str_ends_with($token, $suffix)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$stem = mb_substr($token, 0, $length - mb_strlen($suffix, 'UTF-8'), 'UTF-8');
|
||||||
|
|
||||||
|
if (mb_strlen($stem, 'UTF-8') >= 3) {
|
||||||
|
$variants[] = $stem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (str_starts_with($token, 'indikator')) {
|
||||||
|
$variants[] = 'indikator';
|
||||||
|
$variants[] = 'indikatortyp';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (str_starts_with($token, 'grenzwert')) {
|
||||||
|
$variants[] = 'grenzwert';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (str_starts_with($token, 'messbereich')) {
|
||||||
|
$variants[] = 'messbereich';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (str_starts_with($token, 'testomat')) {
|
||||||
|
$variants[] = 'testomat';
|
||||||
|
}
|
||||||
|
|
||||||
|
return array_values(array_unique($variants));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array{asks_indicator:bool}
|
||||||
|
*/
|
||||||
|
private function buildExactDocumentDetailFocus(string $prompt): array
|
||||||
|
{
|
||||||
|
$normalized = $this->normalizeText($prompt);
|
||||||
|
$tokens = array_fill_keys(
|
||||||
|
$this->expandExactSelectionTokenVariants($this->tokenizeText($normalized)),
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
|
$asksIndicator = isset($tokens['indikator'])
|
||||||
|
|| isset($tokens['indikatortyp'])
|
||||||
|
|| isset($tokens['reagenz'])
|
||||||
|
|| isset($tokens['reagens'])
|
||||||
|
|| str_contains($normalized, 'mit welchem')
|
||||||
|
|| str_contains($normalized, 'womit');
|
||||||
|
|
||||||
|
return [
|
||||||
|
'asks_indicator' => $asksIndicator,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gives detail chunks inside an already matched exact document a strong
|
||||||
|
* advantage for follow-up questions such as "which indicator measures that
|
||||||
|
* value". This remains scoped to the exact document, so it does not affect
|
||||||
|
* shop searches or broad product discovery.
|
||||||
|
*
|
||||||
|
* @param array{asks_indicator:bool} $detailFocus
|
||||||
|
*/
|
||||||
|
private function scoreExactDocumentDetailFocus(array $detailFocus, string $normalizedHaystack, string $rawText): float
|
||||||
|
{
|
||||||
|
$score = 0.0;
|
||||||
|
|
||||||
|
if (!$detailFocus['asks_indicator']) {
|
||||||
|
return $score;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match('/verf(?:ü|ue)gbare\s+indikatortypen|indikatortypen|indikatorvarianten/iu', $rawText) === 1) {
|
||||||
|
$score += 14.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match('/\|\s*(?:typ|indikator)\s*\|\s*(?:grenzwert|messbereich|bereich)/iu', $rawText) === 1) {
|
||||||
|
$score += 10.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (preg_match('/\|\s*[A-Z]{0,4}\s*\d{2,4}\s*[A-Z]?\s*\|\s*\d/iu', $rawText) === 1) {
|
||||||
|
$score += 8.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
str_contains($normalizedHaystack, 'indikator')
|
||||||
|
&& (
|
||||||
|
str_contains($normalizedHaystack, 'grenzwert')
|
||||||
|
|| str_contains($normalizedHaystack, 'messbereich')
|
||||||
|
|| str_contains($normalizedHaystack, 'bereich')
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
$score += 5.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $score;
|
||||||
|
}
|
||||||
|
|
||||||
private function isExactDetailToken(string $token): bool
|
private function isExactDetailToken(string $token): bool
|
||||||
{
|
{
|
||||||
return in_array($token, [
|
return in_array($token, [
|
||||||
'indikator', 'indikatortyp', 'reagenz', 'reagens', 'grenzwert',
|
'indikator', 'indikatoren', 'indikatortyp', 'indikatortypen', 'reagenz', 'reagens', 'grenzwert',
|
||||||
'messbereich', 'bereich', 'wasserhaerte', 'wasserhärte',
|
'messbereich', 'bereich', 'wasserhaerte', 'wasserhärte',
|
||||||
'resthaerte', 'resthärte', 'haerte', 'härte', 'aufloesung',
|
'resthaerte', 'resthärte', 'haerte', 'härte', 'aufloesung',
|
||||||
'auflösung', 'schnittstelle', 'relais', 'fehlercode', 'code',
|
'auflösung', 'schnittstelle', 'relais', 'fehlercode', 'code',
|
||||||
@@ -830,7 +971,9 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
{
|
{
|
||||||
return in_array($token, [
|
return in_array($token, [
|
||||||
'vorherige', 'vorheriger', 'nutzerfrage', 'aktuelle',
|
'vorherige', 'vorheriger', 'nutzerfrage', 'aktuelle',
|
||||||
'folgefrage', 'frage', 'antwort', 'welche', 'welcher', 'welches',
|
'folgefrage', 'frage', 'antwort', 'technische', 'referenzanker',
|
||||||
|
'referenzaufloesung', 'referenzauflösung', 'faktenquelle', 'keine',
|
||||||
|
'welche', 'welcher', 'welches',
|
||||||
'welchem', 'welchen', 'wird', 'werden', 'wurde', 'kann', 'koennen',
|
'welchem', 'welchen', 'wird', 'werden', 'wurde', 'kann', 'koennen',
|
||||||
'können', 'mit', 'der', 'die', 'das', 'den', 'dem', 'ein', 'eine',
|
'können', 'mit', 'der', 'die', 'das', 'den', 'dem', 'ein', 'eine',
|
||||||
'einer', 'eines', 'ist', 'sind', 'was', 'wie', 'wo', 'zum', 'zur',
|
'einer', 'eines', 'ist', 'sind', 'was', 'wie', 'wo', 'zum', 'zur',
|
||||||
|
|||||||
Reference in New Issue
Block a user