Files
MtoRagSystem/src/Intent/IntentLite.php
2026-04-15 08:46:26 +02:00

120 lines
3.4 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Intent;
use App\Config\IntentLightConfig;
/**
* IntentLite
*
* Deterministic, LLM-agnostic intent detection.
* Focus: LIST intent for retrieval control.
*
* IMPORTANT:
* - Always call it with the ORIGINAL prompt.
* - Not with the QueryCleaner result.
*/
final readonly class IntentLite
{
public function __construct(
private IntentLightConfig $config
)
{
}
public function detectList(string $originalPrompt): array
{
$p = $this->normalize($originalPrompt);
$score = 0;
$signals = [];
// --------------------------------------------------------
// 1. Starke explizite Listen-Trigger (hohes Gewicht)
// --------------------------------------------------------
$strongPatterns = $this->config->getStrongPatterns();
foreach ($strongPatterns as $pattern) {
if (preg_match($pattern, $p) === 1) {
$score += 3;
$signals[] = "strong:$pattern";
}
}
// --------------------------------------------------------
// 2. Mengen- / Mehrzahl-Indikatoren
// --------------------------------------------------------
$quantityWords = $this->config->getQuantityWords();
foreach ($quantityWords as $word) {
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p) === 1) {
$score += 2;
$signals[] = "quantity:$word";
}
}
// --------------------------------------------------------
// 3. Explizite Zahlen (z. B. "5 Vorteile")
// --------------------------------------------------------
if (preg_match('/\b\d+\b/u', $p) === 1) {
$score += 2;
$signals[] = 'number';
}
// --------------------------------------------------------
// 4. Enumeration-Hinweise (1., -, *, etc.)
// --------------------------------------------------------
if (
preg_match('/(^|\s)(\d+\)|\d+\.|-\s|\*\s)/u', $originalPrompt) === 1
) {
$score += 1;
$signals[] = 'enumeration_hint';
}
// --------------------------------------------------------
// Entscheidung
// --------------------------------------------------------
$isList = $score >= IntentLightConfig::LIST_THRESHOLD;
return [
'is_list' => $isList,
'score' => $score,
'signals' => $signals,
];
}
public function isListQuery(string $originalPrompt): bool
{
return $this->detectList($originalPrompt)['is_list'];
}
// ------------------------------------------------------------
// Interne Normalisierung (ohne Stopword-Entfernung!)
// ------------------------------------------------------------
private function normalize(string $s): string
{
$s = mb_strtolower($s);
// Umlaute zusätzlich absichern (falls QueryCleaner das tut)
$replacements = [
'ä' => 'ae',
'ö' => 'oe',
'ü' => 'ue',
'ß' => 'ss',
];
// Nur als Zusatzform speichern (nicht ersetzen!)
foreach ($replacements as $umlaut => $alt) {
if (str_contains($s, $umlaut)) {
$s .= ' ' . str_replace($umlaut, $alt, $s);
break;
}
}
return $s;
}
}