diff --git a/config/services.yaml b/config/services.yaml
index d4eefba..74ffb5d 100644
--- a/config/services.yaml
+++ b/config/services.yaml
@@ -101,12 +101,7 @@ services:
 
   App\Knowledge\Retrieval\NdjsonChunkLookup: ~
 
-  App\Knowledge\Retrieval\NdjsonKeywordSearch: ~
-
-  App\Knowledge\Retrieval\NdjsonHybridRetriever:
-    arguments:
-      $maxChunks: 3
-      $vectorTopK: 5
+  App\Knowledge\Retrieval\NdjsonHybridRetriever: ~
 
   App\Knowledge\Retrieval\CachedRetriever:
     arguments:
diff --git a/src/Knowledge/Retrieval/CachedRetriever.php b/src/Knowledge/Retrieval/CachedRetriever.php
index 85c3e38..301de3c 100644
--- a/src/Knowledge/Retrieval/CachedRetriever.php
+++ b/src/Knowledge/Retrieval/CachedRetriever.php
@@ -5,16 +5,22 @@ declare(strict_types=1);
 namespace App\Knowledge\Retrieval;
 
 use Psr\Cache\CacheItemPoolInterface;
+use Psr\Cache\InvalidArgumentException;
 
-final class CachedRetriever implements RetrieverInterface
+final readonly class CachedRetriever implements RetrieverInterface
 {
     public function __construct(
-        private RetrieverInterface    $inner,
+        private RetrieverInterface     $inner,
         private CacheItemPoolInterface $cache,
-        private int                   $ttlSeconds = 600 // 10 Minuten
-    ) {}
+        private int                    $ttlSeconds
+    )
+    {
+    }
 
-    public function retrieve(string $prompt, int $limit = 3): array
+    /**
+     * @throws InvalidArgumentException
+     */
+    public function retrieve(string $prompt, int $limit = 10): array
     {
         $key = $this->buildCacheKey($prompt, $limit);
 
diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
index 83e15c5..6ebc492 100644
--- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
+++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
@@ -1,6 +1,5 @@
 <?php
 
-
 declare(strict_types=1);
 
 namespace App\Knowledge\Retrieval;
@@ -9,34 +8,26 @@ use App\Vector\VectorSearchClient;
 
 final class NdjsonHybridRetriever implements RetrieverInterface
 {
-    private const VECTOR_SCORE_THRESHOLD = 0.65;
+    private const VECTOR_SCORE_THRESHOLD = 0.25;
 
     public function __construct(
-        private readonly NdjsonKeywordSearch $keywordSearch,
         private readonly NdjsonChunkLookup   $lookup,
         private readonly VectorSearchClient  $vectorClient,
-        private readonly int                 $maxChunks = 3,
-        private readonly int                 $vectorTopK = 5,
+        private readonly int                 $maxChunks = 10,
+        private readonly int                 $vectorTopK = 10,
     )
     {
     }
 
     public function retrieve(string $prompt, int $limit = null): array
     {
-        $limit ??= $this->maxChunks;
+        $limit = $this->maxChunks;
+        $keywordChunks = [];
 
-        $terms = $this->extractTerms($prompt);
-
-        // 1) Keyword first
-        $keywordChunks = $this->keywordSearch->search($terms, $limit);
-        if (\count($keywordChunks) >= $limit) {
-            return array_slice($keywordChunks, 0, $limit);
-        }
-
-        // 2) Vector fallback / enrichment
+        // Vector / enrichment
         $hits = $this->vectorClient->search($prompt, $this->vectorTopK);
         if ($hits === []) {
-            return $keywordChunks;
+            return $this->diversifyByDevice($keywordChunks, $limit, 1);
         }
 
         $chunkIds = [];
@@ -51,7 +42,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
         }
 
         if ($chunkIds === []) {
-            return $keywordChunks;
+            return $this->diversifyByDevice($keywordChunks, $limit, 1);
         }
 
         $rows = $this->lookup->findByChunkIds($chunkIds);
@@ -63,9 +54,9 @@ final class NdjsonHybridRetriever implements RetrieverInterface
             $keywordChunks[] = trim($rows[$id]['text']);
         }
 
-        // dedupe + limit
+        // dedupe
         $seen = [];
-        $out = [];
+        $deduped = [];
 
         foreach ($keywordChunks as $chunk) {
             $key = mb_strtolower(preg_replace('/\s+/u', ' ', $chunk));
@@ -73,20 +64,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface
                 continue;
             }
             $seen[$key] = true;
-            $out[] = $chunk;
-            if (\count($out) >= $limit) {
-                break;
-            }
+            $deduped[] = $chunk;
         }
 
-        return $out;
+        // diversify
+        return $this->diversifyByDevice($deduped, $limit, 1);
     }
 
-    /**
-     * minimal term extraction (we keep your old behavior)
-     *
-     * @return string[]
-     */
     private function extractTerms(string $text): array
     {
         $text = mb_strtolower((string)preg_replace('/[^\p{L}\p{N}\s]/u', '', $text));
@@ -96,4 +80,41 @@ final class NdjsonHybridRetriever implements RetrieverInterface
             static fn(string $w) => mb_strlen($w) > 2
         ));
     }
+
+    private function extractDevice(string $chunk): string
+    {
+        $firstLine = explode("\n", $chunk, 2)[0] ?? '';
+        return trim($firstLine);
+    }
+
+    private function diversifyByDevice(array $chunks, int $limit, int $maxPerDevice = 1): array
+    {
+        $seenDevices = [];
+        $out = [];
+
+        foreach ($chunks as $chunk) {
+            $device = $this->extractDevice($chunk);
+
+            if ($device === '') {
+                continue;
+            }
+
+            if (!isset($seenDevices[$device])) {
+                $seenDevices[$device] = 0;
+            }
+
+            if ($seenDevices[$device] >= $maxPerDevice) {
+                continue;
+            }
+
+            $out[] = $chunk;
+            $seenDevices[$device]++;
+
+            if (\count($out) >= $limit) {
+                break;
+            }
+        }
+
+        return $out;
+    }
 }
diff --git a/src/Knowledge/Retrieval/NdjsonKeywordSearch.php b/src/Knowledge/Retrieval/NdjsonKeywordSearch.php
deleted file mode 100644
index e15c91d..0000000
--- a/src/Knowledge/Retrieval/NdjsonKeywordSearch.php
+++ /dev/null
@@ -1,101 +0,0 @@
-<?php
-
-declare(strict_types=1);
-
-namespace App\Knowledge\Retrieval;
-
-use App\Knowledge\ChunkManager;
-use App\Knowledge\StopWords;
-
-final class NdjsonKeywordSearch
-{
-    public function __construct(
-        private readonly ChunkManager $chunkManager,
-        private readonly StopWords $stopWords,
-    ) {
-    }
-
-    /**
-     * Streaming Keyword-Search über index.ndjson.
-     *
-     * @param string[] $terms (already lowercased)
-     * @return string[] best chunks
-     */
-    public function search(array $terms, int $limit = 3, int $candidateCap = 200): array
-    {
-        $terms = array_values(array_filter($terms, function (string $t): bool {
-            return $t !== '' && !\in_array($t, $this->stopWords->getStopWords(), true);
-        }));
-
-        if ($terms === []) {
-            return [];
-        }
-
-        // bounded min-heap (score => chunkText)
-        $best = [];
-
-        foreach ($this->chunkManager->streamAll() as $row) {
-            $text = $row['text'] ?? null;
-            if (!is_string($text) || $text === '') {
-                continue;
-            }
-
-            $score = $this->scoreText($text, $terms);
-            if ($score <= 0) {
-                continue;
-            }
-
-            $best[] = ['score' => $score, 'text' => trim($text)];
-
-            // keep array bounded to avoid memory spikes
-            if (\count($best) > $candidateCap) {
-                usort($best, fn($a, $b) => $b['score'] <=> $a['score']);
-                $best = array_slice($best, 0, $candidateCap);
-            }
-        }
-
-        if ($best === []) {
-            return [];
-        }
-
-        usort($best, fn($a, $b) => $b['score'] <=> $a['score']);
-
-        $out = [];
-        $seen = [];
-
-        foreach ($best as $row) {
-            $key = mb_strtolower(preg_replace('/\s+/u', ' ', $row['text']));
-            if (isset($seen[$key])) {
-                continue;
-            }
-            $seen[$key] = true;
-            $out[] = $row['text'];
-
-            if (\count($out) >= $limit) {
-                break;
-            }
-        }
-
-        return $out;
-    }
-
-    /**
-     * Simple scoring: count matches, weight long terms slightly.
-     */
-    private function scoreText(string $text, array $terms): int
-    {
-        $content = mb_strtolower($text);
-        $score = 0;
-
-        foreach ($terms as $term) {
-            if ($term === '') {
-                continue;
-            }
-            if (str_contains($content, $term)) {
-                $score += (mb_strlen($term) >= 10) ? 2 : 1;
-            }
-        }
-
-        return $score;
-    }
-}
diff --git a/src/Knowledge/Retrieval/RetrieverInterface.php b/src/Knowledge/Retrieval/RetrieverInterface.php
index dc9f0dc..13ea899 100644
--- a/src/Knowledge/Retrieval/RetrieverInterface.php
+++ b/src/Knowledge/Retrieval/RetrieverInterface.php
@@ -7,5 +7,5 @@ interface RetrieverInterface
     /**
      * @return string[]  Plain text knowledge chunks
      */
-    public function retrieve(string $prompt, int $limit = 3): array;
+    public function retrieve(string $prompt, int $limit = 10): array;
 }
diff --git a/templates/admin/model_config/create.html.twig b/templates/admin/model_config/create.html.twig
index 8734115..4c22ee5 100644
--- a/templates/admin/model_config/create.html.twig
+++ b/templates/admin/model_config/create.html.twig
@@ -31,7 +31,7 @@
                                placeholder="z. B. qwen3:latest"
                                required>
                         <div class="form-text text-secondary">
-                            Exakter Modellname wie im Endpunkt konfiguriert.
+                            Exakter Modellname wie im KI-Endpunkt konfiguriert (z. B. Ollama oder API).
                         </div>
                     </div>
 
@@ -42,26 +42,31 @@
                                    type="checkbox"
                                    name="stream"
                                    value="1"
-                                   id="streamSwitch">
+                                   id="streamSwitch" checked>
                             <label class="form-check-label" for="streamSwitch">
                                 Streaming aktivieren
                             </label>
                         </div>
+                        <div class="form-text text-secondary ms-3">
+                            Aktiviert Token-Streaming im Chat (empfohlen für bessere UX).
+                        </div>
                     </div>
 
                     <!-- Temperature -->
                     <div class="col-md-4">
                         <label class="form-label">Temperature</label>
                         <input type="number"
-                               step="0.1"
+                               step="0.05"
                                min="0"
                                max="2"
                                name="temperature"
-                               value="0.1"
+                               value="0.35"
                                class="form-control bg-dark text-light border-secondary"
                                required>
                         <div class="form-text text-secondary">
-                            Niedrige Werte = deterministisch (empfohlen für RAG).
+                            Steuert die Kreativität der Antworten.
+                            Niedrige Werte (0.2–0.4) erzeugen stabile, sachliche Ergebnisse – empfohlen für RAG-Systeme.
+                            Höhere Werte führen zu freieren, weniger deterministischen Antworten.
                         </div>
                     </div>
 
@@ -71,9 +76,14 @@
                         <input type="number"
                                min="1"
                                name="top_k"
-                               value="20"
+                               value="40"
                                class="form-control bg-dark text-light border-secondary"
                                required>
+                        <div class="form-text text-secondary">
+                            Begrenzt die Anzahl der wahrscheinlichsten Token, aus denen das Modell auswählt.
+                            Niedrigere Werte = konservativer, höhere Werte = flexibler.
+                            20–50 ist für Wissenssysteme üblich.
+                        </div>
                     </div>
 
                     <!-- Top P -->
@@ -84,9 +94,14 @@
                                min="0"
                                max="1"
                                name="top_p"
-                               value="0.8"
+                               value="0.9"
                                class="form-control bg-dark text-light border-secondary"
                                required>
+                        <div class="form-text text-secondary">
+                            Nucleus Sampling: Das Modell berücksichtigt nur Token,
+                            deren kumulative Wahrscheinlichkeit innerhalb dieses Werts liegt.
+                            0.8–0.95 bietet eine gute Balance zwischen Stabilität und Natürlichkeit.
+                        </div>
                     </div>
 
                     <!-- Repeat Penalty -->
@@ -97,9 +112,13 @@
                                min="0"
                                max="5"
                                name="repeat_penalty"
-                               value="1.05"
+                               value="1.1"
                                class="form-control bg-dark text-light border-secondary"
                                required>
+                        <div class="form-text text-secondary">
+                            Bestraft Wortwiederholungen. Werte leicht über 1.0 (z. B. 1.1–1.15)
+                            verhindern Schleifen und redundante Antworten.
+                        </div>
                     </div>
 
                     <!-- Num Ctx -->
@@ -109,11 +128,13 @@
                                min="512"
                                max="32768"
                                name="num_ctx"
-                               value="4096"
+                               value="8192"
                                class="form-control bg-dark text-light border-secondary"
                                required>
                         <div class="form-text text-secondary">
-                            Muss zum Modell passen. Zu hohe Werte können Performance beeinflussen.
+                            Maximale Kontextlänge in Tokens (Systemprompt + Benutzerfrage + Retrieval-Chunks).
+                            Muss vom Modell unterstützt werden.
+                            Höhere Werte ermöglichen größere Wissenskontexte, erhöhen jedoch Speicher- und Rechenbedarf.
                         </div>
                     </div>