Files
MtoRagSystem/src/Agent/ReferenceAnchorExtractor.php
team 1 10a3a09a63 p43C
2026-05-05 14:17:54 +02:00

130 lines
3.7 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Agent;
use App\Config\AgentRunnerConfig;
/**
* Extracts generic reference anchors used to resolve follow-up questions.
*
* The extractor is deliberately domain-neutral: product model anchors and
* measurement value anchors are configured through AgentRunnerConfig. Current
* water-analysis patterns remain supported by configuration, but the calling
* code no longer needs Testomat- or hardness-specific concepts.
*/
final readonly class ReferenceAnchorExtractor
{
public function __construct(
private AgentRunnerConfig $config,
) {
}
/**
* Extract stable reference anchors from the latest assistant answer.
*
* These anchors are only used to resolve follow-up references such as
* "der Wert" or "welcher Indikator". They are not factual evidence for
* the final answer. To avoid propagating wrong earlier answers, only the
* first explicit product-model reference and the first explicit measurement
* value are kept. Indicator names, reagent codes, prices, URLs and product
* numbers are intentionally ignored here.
*
* @return string[]
*/
public function extractLatestAssistantReferenceAnchors(string $history): array
{
$turn = $this->extractLatestHistoryTurn($history);
if ($turn === '') {
return [];
}
$answer = preg_replace($this->config->getFollowUpHistoryQuestionStripPattern(), '', $turn, 1) ?? '';
$answer = trim($answer);
if ($answer === '') {
return [];
}
$anchors = [];
$model = $this->extractFirstProductModelAnchor($answer);
if ($model !== '') {
$anchors[] = $model;
}
$measurementValue = $this->extractFirstMeasurementValueAnchor($answer);
if ($measurementValue !== '') {
$anchors[] = $measurementValue;
}
return array_values(array_unique($anchors));
}
public function extractFirstProductModelAnchor(string $text): string
{
if (preg_match($this->config->getFollowUpReferenceAnchorProductModelPattern(), $text, $matches) !== 1) {
return '';
}
$value = $this->sanitizeAnchor((string) ($matches[0] ?? ''));
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim(str_replace('®', '', $value));
}
public function extractFirstMeasurementValueAnchor(string $text): string
{
if (preg_match($this->config->getFollowUpReferenceAnchorMeasurementValuePattern(), $text, $matches) !== 1) {
return '';
}
$value = preg_replace('/\s+/u', ' ', (string) ($matches[0] ?? '')) ?? '';
return trim($value);
}
private function extractLatestHistoryTurn(string $history): string
{
$history = trim($history);
if ($history === '') {
return '';
}
$parts = preg_split($this->config->getFollowUpHistoryTurnSplitPattern(), $history);
if ($parts === false || $parts === []) {
return '';
}
$turns = array_values(array_filter(
array_map(static fn(string $part): string => trim($part), $parts),
static fn(string $part): bool => $part !== ''
));
if ($turns === []) {
return '';
}
return (string) end($turns);
}
private function sanitizeAnchor(string $value): string
{
$value = trim((string) preg_replace('/\s+/u', ' ', $value));
if ($value === '') {
return '';
}
if (mb_strlen($value, 'UTF-8') <= 500) {
return $value;
}
return rtrim(mb_substr($value, 0, 497, 'UTF-8')) . '...';
}
}