120 lines
3.4 KiB
PHP
120 lines
3.4 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Intent;
|
|
|
|
use App\Config\IntentLightConfig;
|
|
|
|
/**
|
|
* IntentLite
|
|
*
|
|
* Deterministic, LLM-agnostic intent detection.
|
|
* Focus: LIST intent for retrieval control.
|
|
*
|
|
* IMPORTANT:
|
|
* - Always call it with the ORIGINAL prompt.
|
|
* - Not with the QueryCleaner result.
|
|
*/
|
|
final readonly class IntentLite
|
|
{
|
|
|
|
public function __construct(
|
|
private IntentLightConfig $config
|
|
)
|
|
{
|
|
|
|
}
|
|
|
|
public function detectList(string $originalPrompt): array
|
|
{
|
|
$p = $this->normalize($originalPrompt);
|
|
|
|
$score = 0;
|
|
$signals = [];
|
|
|
|
// --------------------------------------------------------
|
|
// 1. Starke explizite Listen-Trigger (hohes Gewicht)
|
|
// --------------------------------------------------------
|
|
$strongPatterns = $this->config->getStrongPatterns();
|
|
|
|
foreach ($strongPatterns as $pattern) {
|
|
if (preg_match($pattern, $p) === 1) {
|
|
$score += 3;
|
|
$signals[] = "strong:$pattern";
|
|
}
|
|
}
|
|
|
|
// --------------------------------------------------------
|
|
// 2. Mengen- / Mehrzahl-Indikatoren
|
|
// --------------------------------------------------------
|
|
$quantityWords = $this->config->getQuantityWords();
|
|
|
|
foreach ($quantityWords as $word) {
|
|
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p) === 1) {
|
|
$score += 2;
|
|
$signals[] = "quantity:$word";
|
|
}
|
|
}
|
|
|
|
// --------------------------------------------------------
|
|
// 3. Explizite Zahlen (z. B. "5 Vorteile")
|
|
// --------------------------------------------------------
|
|
if (preg_match('/\b\d+\b/u', $p) === 1) {
|
|
$score += 2;
|
|
$signals[] = 'number';
|
|
}
|
|
|
|
// --------------------------------------------------------
|
|
// 4. Enumeration-Hinweise (1., -, *, etc.)
|
|
// --------------------------------------------------------
|
|
if (
|
|
preg_match('/(^|\s)(\d+\)|\d+\.|-\s|\*\s)/u', $originalPrompt) === 1
|
|
) {
|
|
$score += 1;
|
|
$signals[] = 'enumeration_hint';
|
|
}
|
|
|
|
// --------------------------------------------------------
|
|
// Entscheidung
|
|
// --------------------------------------------------------
|
|
$isList = $score >= IntentLightConfig::LIST_THRESHOLD;
|
|
|
|
return [
|
|
'is_list' => $isList,
|
|
'score' => $score,
|
|
'signals' => $signals,
|
|
];
|
|
}
|
|
|
|
public function isListQuery(string $originalPrompt): bool
|
|
{
|
|
return $this->detectList($originalPrompt)['is_list'];
|
|
}
|
|
|
|
// ------------------------------------------------------------
|
|
// Interne Normalisierung (ohne Stopword-Entfernung!)
|
|
// ------------------------------------------------------------
|
|
private function normalize(string $s): string
|
|
{
|
|
$s = mb_strtolower($s);
|
|
|
|
// Umlaute zusätzlich absichern (falls QueryCleaner das tut)
|
|
$replacements = [
|
|
'ä' => 'ae',
|
|
'ö' => 'oe',
|
|
'ü' => 'ue',
|
|
'ß' => 'ss',
|
|
];
|
|
|
|
// Nur als Zusatzform speichern (nicht ersetzen!)
|
|
foreach ($replacements as $umlaut => $alt) {
|
|
if (str_contains($s, $umlaut)) {
|
|
$s .= ' ' . str_replace($umlaut, $alt, $s);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return $s;
|
|
}
|
|
} |