optimize intents
This commit is contained in:
144
src/Intent/IntentLite.php
Normal file
144
src/Intent/IntentLite.php
Normal file
@@ -0,0 +1,144 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Intent;
|
||||
|
||||
/**
|
||||
* IntentLite
|
||||
*
|
||||
* Deterministische, LLM-agnostische Intent-Erkennung.
|
||||
* Fokus: LIST-Intent für Retrieval-Steuerung.
|
||||
*
|
||||
* WICHTIG:
|
||||
* - Immer mit dem ORIGINAL-Prompt aufrufen.
|
||||
* - Nicht mit dem QueryCleaner-Ergebnis.
|
||||
*/
|
||||
final class IntentLite
|
||||
{
|
||||
private const LIST_THRESHOLD = 4;
|
||||
|
||||
public function detectList(string $originalPrompt): array
|
||||
{
|
||||
$p = $this->normalize($originalPrompt);
|
||||
|
||||
$score = 0;
|
||||
$signals = [];
|
||||
|
||||
// --------------------------------------------------------
|
||||
// 1. Starke explizite Listen-Trigger (hohes Gewicht)
|
||||
// --------------------------------------------------------
|
||||
$strongPatterns = [
|
||||
'/\bliste(n)?\b/u',
|
||||
'/\bauflisten\b/u',
|
||||
'/\baufz(a|ä)hl(en)?\b/u',
|
||||
'/\bnenn(e)?\b/u',
|
||||
'/\bzeig(e)?\b/u',
|
||||
'/\bwelche\s+sind\b/u',
|
||||
'/\bwelche\s+gibt\s+es\b/u',
|
||||
'/\bwas\s+sind\b/u',
|
||||
'/\bwie\s+viele\b/u',
|
||||
'/\branking\b/u',
|
||||
'/\btop\s*\d+\b/u',
|
||||
];
|
||||
|
||||
foreach ($strongPatterns as $pattern) {
|
||||
if (preg_match($pattern, $p) === 1) {
|
||||
$score += 3;
|
||||
$signals[] = "strong:$pattern";
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
// 2. Mengen- / Mehrzahl-Indikatoren
|
||||
// --------------------------------------------------------
|
||||
$quantityWords = [
|
||||
'alle',
|
||||
'sämtliche',
|
||||
'saemtliche',
|
||||
'mehrere',
|
||||
'verschiedene',
|
||||
'einige',
|
||||
'viele',
|
||||
'optionen',
|
||||
'möglichkeiten',
|
||||
'moeglichkeiten',
|
||||
'varianten',
|
||||
'arten',
|
||||
'modelle',
|
||||
'funktionen',
|
||||
'punkte',
|
||||
'schritte',
|
||||
'kategorien',
|
||||
'übersicht',
|
||||
'uebersicht',
|
||||
];
|
||||
|
||||
foreach ($quantityWords as $word) {
|
||||
if (preg_match('/\b' . preg_quote($word, '/') . '\b/u', $p) === 1) {
|
||||
$score += 2;
|
||||
$signals[] = "quantity:$word";
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
// 3. Explizite Zahlen (z. B. "5 Vorteile")
|
||||
// --------------------------------------------------------
|
||||
if (preg_match('/\b\d+\b/u', $p) === 1) {
|
||||
$score += 2;
|
||||
$signals[] = 'number';
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
// 4. Enumeration-Hinweise (1., -, *, etc.)
|
||||
// --------------------------------------------------------
|
||||
if (
|
||||
preg_match('/(^|\s)(\d+\)|\d+\.|-\s|\*\s)/u', $originalPrompt) === 1
|
||||
) {
|
||||
$score += 1;
|
||||
$signals[] = 'enumeration_hint';
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
// Entscheidung
|
||||
// --------------------------------------------------------
|
||||
$isList = $score >= self::LIST_THRESHOLD;
|
||||
|
||||
return [
|
||||
'is_list' => $isList,
|
||||
'score' => $score,
|
||||
'signals' => $signals,
|
||||
];
|
||||
}
|
||||
|
||||
public function isListQuery(string $originalPrompt): bool
|
||||
{
|
||||
return $this->detectList($originalPrompt)['is_list'];
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Interne Normalisierung (ohne Stopword-Entfernung!)
|
||||
// ------------------------------------------------------------
|
||||
private function normalize(string $s): string
|
||||
{
|
||||
$s = mb_strtolower($s);
|
||||
|
||||
// Umlaute zusätzlich absichern (falls QueryCleaner das tut)
|
||||
$replacements = [
|
||||
'ä' => 'ae',
|
||||
'ö' => 'oe',
|
||||
'ü' => 'ue',
|
||||
'ß' => 'ss',
|
||||
];
|
||||
|
||||
// Nur als Zusatzform speichern (nicht ersetzen!)
|
||||
foreach ($replacements as $umlaut => $alt) {
|
||||
if (str_contains($s, $umlaut)) {
|
||||
$s .= ' ' . str_replace($umlaut, $alt, $s);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $s;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user