130 lines
3.7 KiB
PHP
130 lines
3.7 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Agent;
|
|
|
|
use App\Config\AgentRunnerConfig;
|
|
|
|
/**
|
|
* Extracts generic reference anchors used to resolve follow-up questions.
|
|
*
|
|
* The extractor is deliberately domain-neutral: product model anchors and
|
|
* measurement value anchors are configured through AgentRunnerConfig. Current
|
|
* water-analysis patterns remain supported by configuration, but the calling
|
|
* code no longer needs Testomat- or hardness-specific concepts.
|
|
*/
|
|
final readonly class ReferenceAnchorExtractor
|
|
{
|
|
public function __construct(
|
|
private AgentRunnerConfig $config,
|
|
) {
|
|
}
|
|
|
|
/**
|
|
* Extract stable reference anchors from the latest assistant answer.
|
|
*
|
|
* These anchors are only used to resolve follow-up references such as
|
|
* "der Wert" or "welcher Indikator". They are not factual evidence for
|
|
* the final answer. To avoid propagating wrong earlier answers, only the
|
|
* first explicit product-model reference and the first explicit measurement
|
|
* value are kept. Indicator names, reagent codes, prices, URLs and product
|
|
* numbers are intentionally ignored here.
|
|
*
|
|
* @return string[]
|
|
*/
|
|
public function extractLatestAssistantReferenceAnchors(string $history): array
|
|
{
|
|
$turn = $this->extractLatestHistoryTurn($history);
|
|
|
|
if ($turn === '') {
|
|
return [];
|
|
}
|
|
|
|
$answer = preg_replace($this->config->getFollowUpHistoryQuestionStripPattern(), '', $turn, 1) ?? '';
|
|
$answer = trim($answer);
|
|
|
|
if ($answer === '') {
|
|
return [];
|
|
}
|
|
|
|
$anchors = [];
|
|
|
|
$model = $this->extractFirstProductModelAnchor($answer);
|
|
if ($model !== '') {
|
|
$anchors[] = $model;
|
|
}
|
|
|
|
$measurementValue = $this->extractFirstMeasurementValueAnchor($answer);
|
|
if ($measurementValue !== '') {
|
|
$anchors[] = $measurementValue;
|
|
}
|
|
|
|
return array_values(array_unique($anchors));
|
|
}
|
|
|
|
public function extractFirstProductModelAnchor(string $text): string
|
|
{
|
|
if (preg_match($this->config->getFollowUpReferenceAnchorProductModelPattern(), $text, $matches) !== 1) {
|
|
return '';
|
|
}
|
|
|
|
$value = $this->sanitizeAnchor((string) ($matches[0] ?? ''));
|
|
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
|
|
|
return trim(str_replace('®', '', $value));
|
|
}
|
|
|
|
public function extractFirstMeasurementValueAnchor(string $text): string
|
|
{
|
|
if (preg_match($this->config->getFollowUpReferenceAnchorMeasurementValuePattern(), $text, $matches) !== 1) {
|
|
return '';
|
|
}
|
|
|
|
$value = preg_replace('/\s+/u', ' ', (string) ($matches[0] ?? '')) ?? '';
|
|
|
|
return trim($value);
|
|
}
|
|
|
|
private function extractLatestHistoryTurn(string $history): string
|
|
{
|
|
$history = trim($history);
|
|
|
|
if ($history === '') {
|
|
return '';
|
|
}
|
|
|
|
$parts = preg_split($this->config->getFollowUpHistoryTurnSplitPattern(), $history);
|
|
|
|
if ($parts === false || $parts === []) {
|
|
return '';
|
|
}
|
|
|
|
$turns = array_values(array_filter(
|
|
array_map(static fn(string $part): string => trim($part), $parts),
|
|
static fn(string $part): bool => $part !== ''
|
|
));
|
|
|
|
if ($turns === []) {
|
|
return '';
|
|
}
|
|
|
|
return (string) end($turns);
|
|
}
|
|
|
|
private function sanitizeAnchor(string $value): string
|
|
{
|
|
$value = trim((string) preg_replace('/\s+/u', ' ', $value));
|
|
|
|
if ($value === '') {
|
|
return '';
|
|
}
|
|
|
|
if (mb_strlen($value, 'UTF-8') <= 500) {
|
|
return $value;
|
|
}
|
|
|
|
return rtrim(mb_substr($value, 0, 497, 'UTF-8')) . '...';
|
|
}
|
|
}
|