p43C
This commit is contained in:
129
src/Agent/ReferenceAnchorExtractor.php
Normal file
129
src/Agent/ReferenceAnchorExtractor.php
Normal file
@@ -0,0 +1,129 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Agent;
|
||||
|
||||
use App\Config\AgentRunnerConfig;
|
||||
|
||||
/**
|
||||
* Extracts generic reference anchors used to resolve follow-up questions.
|
||||
*
|
||||
* The extractor is deliberately domain-neutral: product model anchors and
|
||||
* measurement value anchors are configured through AgentRunnerConfig. Current
|
||||
* water-analysis patterns remain supported by configuration, but the calling
|
||||
* code no longer needs Testomat- or hardness-specific concepts.
|
||||
*/
|
||||
final readonly class ReferenceAnchorExtractor
|
||||
{
|
||||
public function __construct(
|
||||
private AgentRunnerConfig $config,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract stable reference anchors from the latest assistant answer.
|
||||
*
|
||||
* These anchors are only used to resolve follow-up references such as
|
||||
* "der Wert" or "welcher Indikator". They are not factual evidence for
|
||||
* the final answer. To avoid propagating wrong earlier answers, only the
|
||||
* first explicit product-model reference and the first explicit measurement
|
||||
* value are kept. Indicator names, reagent codes, prices, URLs and product
|
||||
* numbers are intentionally ignored here.
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
public function extractLatestAssistantReferenceAnchors(string $history): array
|
||||
{
|
||||
$turn = $this->extractLatestHistoryTurn($history);
|
||||
|
||||
if ($turn === '') {
|
||||
return [];
|
||||
}
|
||||
|
||||
$answer = preg_replace($this->config->getFollowUpHistoryQuestionStripPattern(), '', $turn, 1) ?? '';
|
||||
$answer = trim($answer);
|
||||
|
||||
if ($answer === '') {
|
||||
return [];
|
||||
}
|
||||
|
||||
$anchors = [];
|
||||
|
||||
$model = $this->extractFirstProductModelAnchor($answer);
|
||||
if ($model !== '') {
|
||||
$anchors[] = $model;
|
||||
}
|
||||
|
||||
$measurementValue = $this->extractFirstMeasurementValueAnchor($answer);
|
||||
if ($measurementValue !== '') {
|
||||
$anchors[] = $measurementValue;
|
||||
}
|
||||
|
||||
return array_values(array_unique($anchors));
|
||||
}
|
||||
|
||||
public function extractFirstProductModelAnchor(string $text): string
|
||||
{
|
||||
if (preg_match($this->config->getFollowUpReferenceAnchorProductModelPattern(), $text, $matches) !== 1) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$value = $this->sanitizeAnchor((string) ($matches[0] ?? ''));
|
||||
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
||||
|
||||
return trim(str_replace('®', '', $value));
|
||||
}
|
||||
|
||||
public function extractFirstMeasurementValueAnchor(string $text): string
|
||||
{
|
||||
if (preg_match($this->config->getFollowUpReferenceAnchorMeasurementValuePattern(), $text, $matches) !== 1) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$value = preg_replace('/\s+/u', ' ', (string) ($matches[0] ?? '')) ?? '';
|
||||
|
||||
return trim($value);
|
||||
}
|
||||
|
||||
private function extractLatestHistoryTurn(string $history): string
|
||||
{
|
||||
$history = trim($history);
|
||||
|
||||
if ($history === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
$parts = preg_split($this->config->getFollowUpHistoryTurnSplitPattern(), $history);
|
||||
|
||||
if ($parts === false || $parts === []) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$turns = array_values(array_filter(
|
||||
array_map(static fn(string $part): string => trim($part), $parts),
|
||||
static fn(string $part): bool => $part !== ''
|
||||
));
|
||||
|
||||
if ($turns === []) {
|
||||
return '';
|
||||
}
|
||||
|
||||
return (string) end($turns);
|
||||
}
|
||||
|
||||
private function sanitizeAnchor(string $value): string
|
||||
{
|
||||
$value = trim((string) preg_replace('/\s+/u', ' ', $value));
|
||||
|
||||
if ($value === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
if (mb_strlen($value, 'UTF-8') <= 500) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
return rtrim(mb_substr($value, 0, 497, 'UTF-8')) . '...';
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user