diff --git a/config/services.yaml b/config/services.yaml index d4eefba..74ffb5d 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -101,12 +101,7 @@ services: App\Knowledge\Retrieval\NdjsonChunkLookup: ~ - App\Knowledge\Retrieval\NdjsonKeywordSearch: ~ - - App\Knowledge\Retrieval\NdjsonHybridRetriever: - arguments: - $maxChunks: 3 - $vectorTopK: 5 + App\Knowledge\Retrieval\NdjsonHybridRetriever: ~ App\Knowledge\Retrieval\CachedRetriever: arguments: diff --git a/src/Knowledge/Retrieval/CachedRetriever.php b/src/Knowledge/Retrieval/CachedRetriever.php index 85c3e38..301de3c 100644 --- a/src/Knowledge/Retrieval/CachedRetriever.php +++ b/src/Knowledge/Retrieval/CachedRetriever.php @@ -5,16 +5,22 @@ declare(strict_types=1); namespace App\Knowledge\Retrieval; use Psr\Cache\CacheItemPoolInterface; +use Psr\Cache\InvalidArgumentException; -final class CachedRetriever implements RetrieverInterface +final readonly class CachedRetriever implements RetrieverInterface { public function __construct( - private RetrieverInterface $inner, + private RetrieverInterface $inner, private CacheItemPoolInterface $cache, - private int $ttlSeconds = 600 // 10 Minuten - ) {} + private int $ttlSeconds + ) + { + } - public function retrieve(string $prompt, int $limit = 3): array + /** + * @throws InvalidArgumentException + */ + public function retrieve(string $prompt, int $limit = 10): array { $key = $this->buildCacheKey($prompt, $limit); diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php index 83e15c5..6ebc492 100644 --- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php +++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php @@ -1,6 +1,5 @@ maxChunks; + $limit = $this->maxChunks; + $keywordChunks = []; - $terms = $this->extractTerms($prompt); - - // 1) Keyword first - $keywordChunks = $this->keywordSearch->search($terms, $limit); - if (\count($keywordChunks) >= $limit) { - return array_slice($keywordChunks, 0, $limit); - } - - // 2) Vector fallback / enrichment + // Vector / enrichment $hits = $this->vectorClient->search($prompt, $this->vectorTopK); if ($hits === []) { - return $keywordChunks; + return $this->diversifyByDevice($keywordChunks, $limit, 1); } $chunkIds = []; @@ -51,7 +42,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface } if ($chunkIds === []) { - return $keywordChunks; + return $this->diversifyByDevice($keywordChunks, $limit, 1); } $rows = $this->lookup->findByChunkIds($chunkIds); @@ -63,9 +54,9 @@ final class NdjsonHybridRetriever implements RetrieverInterface $keywordChunks[] = trim($rows[$id]['text']); } - // dedupe + limit + // dedupe $seen = []; - $out = []; + $deduped = []; foreach ($keywordChunks as $chunk) { $key = mb_strtolower(preg_replace('/\s+/u', ' ', $chunk)); @@ -73,20 +64,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface continue; } $seen[$key] = true; - $out[] = $chunk; - if (\count($out) >= $limit) { - break; - } + $deduped[] = $chunk; } - return $out; + // diversify + return $this->diversifyByDevice($deduped, $limit, 1); } - /** - * minimal term extraction (we keep your old behavior) - * - * @return string[] - */ private function extractTerms(string $text): array { $text = mb_strtolower((string)preg_replace('/[^\p{L}\p{N}\s]/u', '', $text)); @@ -96,4 +80,41 @@ final class NdjsonHybridRetriever implements RetrieverInterface static fn(string $w) => mb_strlen($w) > 2 )); } + + private function extractDevice(string $chunk): string + { + $firstLine = explode("\n", $chunk, 2)[0] ?? ''; + return trim($firstLine); + } + + private function diversifyByDevice(array $chunks, int $limit, int $maxPerDevice = 1): array + { + $seenDevices = []; + $out = []; + + foreach ($chunks as $chunk) { + $device = $this->extractDevice($chunk); + + if ($device === '') { + continue; + } + + if (!isset($seenDevices[$device])) { + $seenDevices[$device] = 0; + } + + if ($seenDevices[$device] >= $maxPerDevice) { + continue; + } + + $out[] = $chunk; + $seenDevices[$device]++; + + if (\count($out) >= $limit) { + break; + } + } + + return $out; + } } diff --git a/src/Knowledge/Retrieval/NdjsonKeywordSearch.php b/src/Knowledge/Retrieval/NdjsonKeywordSearch.php deleted file mode 100644 index e15c91d..0000000 --- a/src/Knowledge/Retrieval/NdjsonKeywordSearch.php +++ /dev/null @@ -1,101 +0,0 @@ -stopWords->getStopWords(), true); - })); - - if ($terms === []) { - return []; - } - - // bounded min-heap (score => chunkText) - $best = []; - - foreach ($this->chunkManager->streamAll() as $row) { - $text = $row['text'] ?? null; - if (!is_string($text) || $text === '') { - continue; - } - - $score = $this->scoreText($text, $terms); - if ($score <= 0) { - continue; - } - - $best[] = ['score' => $score, 'text' => trim($text)]; - - // keep array bounded to avoid memory spikes - if (\count($best) > $candidateCap) { - usort($best, fn($a, $b) => $b['score'] <=> $a['score']); - $best = array_slice($best, 0, $candidateCap); - } - } - - if ($best === []) { - return []; - } - - usort($best, fn($a, $b) => $b['score'] <=> $a['score']); - - $out = []; - $seen = []; - - foreach ($best as $row) { - $key = mb_strtolower(preg_replace('/\s+/u', ' ', $row['text'])); - if (isset($seen[$key])) { - continue; - } - $seen[$key] = true; - $out[] = $row['text']; - - if (\count($out) >= $limit) { - break; - } - } - - return $out; - } - - /** - * Simple scoring: count matches, weight long terms slightly. - */ - private function scoreText(string $text, array $terms): int - { - $content = mb_strtolower($text); - $score = 0; - - foreach ($terms as $term) { - if ($term === '') { - continue; - } - if (str_contains($content, $term)) { - $score += (mb_strlen($term) >= 10) ? 2 : 1; - } - } - - return $score; - } -} diff --git a/src/Knowledge/Retrieval/RetrieverInterface.php b/src/Knowledge/Retrieval/RetrieverInterface.php index dc9f0dc..13ea899 100644 --- a/src/Knowledge/Retrieval/RetrieverInterface.php +++ b/src/Knowledge/Retrieval/RetrieverInterface.php @@ -7,5 +7,5 @@ interface RetrieverInterface /** * @return string[] Plain text knowledge chunks */ - public function retrieve(string $prompt, int $limit = 3): array; + public function retrieve(string $prompt, int $limit = 10): array; } diff --git a/templates/admin/model_config/create.html.twig b/templates/admin/model_config/create.html.twig index 8734115..4c22ee5 100644 --- a/templates/admin/model_config/create.html.twig +++ b/templates/admin/model_config/create.html.twig @@ -31,7 +31,7 @@ placeholder="z. B. qwen3:latest" required>
- Exakter Modellname wie im Endpunkt konfiguriert. + Exakter Modellname wie im KI-Endpunkt konfiguriert (z. B. Ollama oder API).
@@ -42,26 +42,31 @@ type="checkbox" name="stream" value="1" - id="streamSwitch"> + id="streamSwitch" checked> +
+ Aktiviert Token-Streaming im Chat (empfohlen für bessere UX). +
- Niedrige Werte = deterministisch (empfohlen für RAG). + Steuert die Kreativität der Antworten. + Niedrige Werte (0.2–0.4) erzeugen stabile, sachliche Ergebnisse – empfohlen für RAG-Systeme. + Höhere Werte führen zu freieren, weniger deterministischen Antworten.
@@ -71,9 +76,14 @@ +
+ Begrenzt die Anzahl der wahrscheinlichsten Token, aus denen das Modell auswählt. + Niedrigere Werte = konservativer, höhere Werte = flexibler. + 20–50 ist für Wissenssysteme üblich. +
@@ -84,9 +94,14 @@ min="0" max="1" name="top_p" - value="0.8" + value="0.9" class="form-control bg-dark text-light border-secondary" required> +
+ Nucleus Sampling: Das Modell berücksichtigt nur Token, + deren kumulative Wahrscheinlichkeit innerhalb dieses Werts liegt. + 0.8–0.95 bietet eine gute Balance zwischen Stabilität und Natürlichkeit. +
@@ -97,9 +112,13 @@ min="0" max="5" name="repeat_penalty" - value="1.05" + value="1.1" class="form-control bg-dark text-light border-secondary" required> +
+ Bestraft Wortwiederholungen. Werte leicht über 1.0 (z. B. 1.1–1.15) + verhindern Schleifen und redundante Antworten. +
@@ -109,11 +128,13 @@ min="512" max="32768" name="num_ctx" - value="4096" + value="8192" class="form-control bg-dark text-light border-secondary" required>
- Muss zum Modell passen. Zu hohe Werte können Performance beeinflussen. + Maximale Kontextlänge in Tokens (Systemprompt + Benutzerfrage + Retrieval-Chunks). + Muss vom Modell unterstützt werden. + Höhere Werte ermöglichen größere Wissenskontexte, erhöhen jedoch Speicher- und Rechenbedarf.