remove direct chnuk search. only vector search
This commit is contained in:
@@ -101,12 +101,7 @@ services:
|
|||||||
|
|
||||||
App\Knowledge\Retrieval\NdjsonChunkLookup: ~
|
App\Knowledge\Retrieval\NdjsonChunkLookup: ~
|
||||||
|
|
||||||
App\Knowledge\Retrieval\NdjsonKeywordSearch: ~
|
App\Knowledge\Retrieval\NdjsonHybridRetriever: ~
|
||||||
|
|
||||||
App\Knowledge\Retrieval\NdjsonHybridRetriever:
|
|
||||||
arguments:
|
|
||||||
$maxChunks: 3
|
|
||||||
$vectorTopK: 5
|
|
||||||
|
|
||||||
App\Knowledge\Retrieval\CachedRetriever:
|
App\Knowledge\Retrieval\CachedRetriever:
|
||||||
arguments:
|
arguments:
|
||||||
|
|||||||
@@ -5,16 +5,22 @@ declare(strict_types=1);
|
|||||||
namespace App\Knowledge\Retrieval;
|
namespace App\Knowledge\Retrieval;
|
||||||
|
|
||||||
use Psr\Cache\CacheItemPoolInterface;
|
use Psr\Cache\CacheItemPoolInterface;
|
||||||
|
use Psr\Cache\InvalidArgumentException;
|
||||||
|
|
||||||
final class CachedRetriever implements RetrieverInterface
|
final readonly class CachedRetriever implements RetrieverInterface
|
||||||
{
|
{
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private RetrieverInterface $inner,
|
private RetrieverInterface $inner,
|
||||||
private CacheItemPoolInterface $cache,
|
private CacheItemPoolInterface $cache,
|
||||||
private int $ttlSeconds = 600 // 10 Minuten
|
private int $ttlSeconds
|
||||||
) {}
|
)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
public function retrieve(string $prompt, int $limit = 3): array
|
/**
|
||||||
|
* @throws InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function retrieve(string $prompt, int $limit = 10): array
|
||||||
{
|
{
|
||||||
$key = $this->buildCacheKey($prompt, $limit);
|
$key = $this->buildCacheKey($prompt, $limit);
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
|
||||||
declare(strict_types=1);
|
declare(strict_types=1);
|
||||||
|
|
||||||
namespace App\Knowledge\Retrieval;
|
namespace App\Knowledge\Retrieval;
|
||||||
@@ -9,34 +8,26 @@ use App\Vector\VectorSearchClient;
|
|||||||
|
|
||||||
final class NdjsonHybridRetriever implements RetrieverInterface
|
final class NdjsonHybridRetriever implements RetrieverInterface
|
||||||
{
|
{
|
||||||
private const VECTOR_SCORE_THRESHOLD = 0.65;
|
private const VECTOR_SCORE_THRESHOLD = 0.25;
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private readonly NdjsonKeywordSearch $keywordSearch,
|
|
||||||
private readonly NdjsonChunkLookup $lookup,
|
private readonly NdjsonChunkLookup $lookup,
|
||||||
private readonly VectorSearchClient $vectorClient,
|
private readonly VectorSearchClient $vectorClient,
|
||||||
private readonly int $maxChunks = 3,
|
private readonly int $maxChunks = 10,
|
||||||
private readonly int $vectorTopK = 5,
|
private readonly int $vectorTopK = 10,
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function retrieve(string $prompt, int $limit = null): array
|
public function retrieve(string $prompt, int $limit = null): array
|
||||||
{
|
{
|
||||||
$limit ??= $this->maxChunks;
|
$limit = $this->maxChunks;
|
||||||
|
$keywordChunks = [];
|
||||||
|
|
||||||
$terms = $this->extractTerms($prompt);
|
// Vector / enrichment
|
||||||
|
|
||||||
// 1) Keyword first
|
|
||||||
$keywordChunks = $this->keywordSearch->search($terms, $limit);
|
|
||||||
if (\count($keywordChunks) >= $limit) {
|
|
||||||
return array_slice($keywordChunks, 0, $limit);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2) Vector fallback / enrichment
|
|
||||||
$hits = $this->vectorClient->search($prompt, $this->vectorTopK);
|
$hits = $this->vectorClient->search($prompt, $this->vectorTopK);
|
||||||
if ($hits === []) {
|
if ($hits === []) {
|
||||||
return $keywordChunks;
|
return $this->diversifyByDevice($keywordChunks, $limit, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
$chunkIds = [];
|
$chunkIds = [];
|
||||||
@@ -51,7 +42,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ($chunkIds === []) {
|
if ($chunkIds === []) {
|
||||||
return $keywordChunks;
|
return $this->diversifyByDevice($keywordChunks, $limit, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
$rows = $this->lookup->findByChunkIds($chunkIds);
|
$rows = $this->lookup->findByChunkIds($chunkIds);
|
||||||
@@ -63,9 +54,9 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
$keywordChunks[] = trim($rows[$id]['text']);
|
$keywordChunks[] = trim($rows[$id]['text']);
|
||||||
}
|
}
|
||||||
|
|
||||||
// dedupe + limit
|
// dedupe
|
||||||
$seen = [];
|
$seen = [];
|
||||||
$out = [];
|
$deduped = [];
|
||||||
|
|
||||||
foreach ($keywordChunks as $chunk) {
|
foreach ($keywordChunks as $chunk) {
|
||||||
$key = mb_strtolower(preg_replace('/\s+/u', ' ', $chunk));
|
$key = mb_strtolower(preg_replace('/\s+/u', ' ', $chunk));
|
||||||
@@ -73,20 +64,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$seen[$key] = true;
|
$seen[$key] = true;
|
||||||
$out[] = $chunk;
|
$deduped[] = $chunk;
|
||||||
if (\count($out) >= $limit) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return $out;
|
// diversify
|
||||||
|
return $this->diversifyByDevice($deduped, $limit, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* minimal term extraction (we keep your old behavior)
|
|
||||||
*
|
|
||||||
* @return string[]
|
|
||||||
*/
|
|
||||||
private function extractTerms(string $text): array
|
private function extractTerms(string $text): array
|
||||||
{
|
{
|
||||||
$text = mb_strtolower((string)preg_replace('/[^\p{L}\p{N}\s]/u', '', $text));
|
$text = mb_strtolower((string)preg_replace('/[^\p{L}\p{N}\s]/u', '', $text));
|
||||||
@@ -96,4 +80,41 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
static fn(string $w) => mb_strlen($w) > 2
|
static fn(string $w) => mb_strlen($w) > 2
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function extractDevice(string $chunk): string
|
||||||
|
{
|
||||||
|
$firstLine = explode("\n", $chunk, 2)[0] ?? '';
|
||||||
|
return trim($firstLine);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function diversifyByDevice(array $chunks, int $limit, int $maxPerDevice = 1): array
|
||||||
|
{
|
||||||
|
$seenDevices = [];
|
||||||
|
$out = [];
|
||||||
|
|
||||||
|
foreach ($chunks as $chunk) {
|
||||||
|
$device = $this->extractDevice($chunk);
|
||||||
|
|
||||||
|
if ($device === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isset($seenDevices[$device])) {
|
||||||
|
$seenDevices[$device] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($seenDevices[$device] >= $maxPerDevice) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$out[] = $chunk;
|
||||||
|
$seenDevices[$device]++;
|
||||||
|
|
||||||
|
if (\count($out) >= $limit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,101 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
declare(strict_types=1);
|
|
||||||
|
|
||||||
namespace App\Knowledge\Retrieval;
|
|
||||||
|
|
||||||
use App\Knowledge\ChunkManager;
|
|
||||||
use App\Knowledge\StopWords;
|
|
||||||
|
|
||||||
final class NdjsonKeywordSearch
|
|
||||||
{
|
|
||||||
public function __construct(
|
|
||||||
private readonly ChunkManager $chunkManager,
|
|
||||||
private readonly StopWords $stopWords,
|
|
||||||
) {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Streaming Keyword-Search über index.ndjson.
|
|
||||||
*
|
|
||||||
* @param string[] $terms (already lowercased)
|
|
||||||
* @return string[] best chunks
|
|
||||||
*/
|
|
||||||
public function search(array $terms, int $limit = 3, int $candidateCap = 200): array
|
|
||||||
{
|
|
||||||
$terms = array_values(array_filter($terms, function (string $t): bool {
|
|
||||||
return $t !== '' && !\in_array($t, $this->stopWords->getStopWords(), true);
|
|
||||||
}));
|
|
||||||
|
|
||||||
if ($terms === []) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// bounded min-heap (score => chunkText)
|
|
||||||
$best = [];
|
|
||||||
|
|
||||||
foreach ($this->chunkManager->streamAll() as $row) {
|
|
||||||
$text = $row['text'] ?? null;
|
|
||||||
if (!is_string($text) || $text === '') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$score = $this->scoreText($text, $terms);
|
|
||||||
if ($score <= 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$best[] = ['score' => $score, 'text' => trim($text)];
|
|
||||||
|
|
||||||
// keep array bounded to avoid memory spikes
|
|
||||||
if (\count($best) > $candidateCap) {
|
|
||||||
usort($best, fn($a, $b) => $b['score'] <=> $a['score']);
|
|
||||||
$best = array_slice($best, 0, $candidateCap);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($best === []) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
usort($best, fn($a, $b) => $b['score'] <=> $a['score']);
|
|
||||||
|
|
||||||
$out = [];
|
|
||||||
$seen = [];
|
|
||||||
|
|
||||||
foreach ($best as $row) {
|
|
||||||
$key = mb_strtolower(preg_replace('/\s+/u', ' ', $row['text']));
|
|
||||||
if (isset($seen[$key])) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$seen[$key] = true;
|
|
||||||
$out[] = $row['text'];
|
|
||||||
|
|
||||||
if (\count($out) >= $limit) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Simple scoring: count matches, weight long terms slightly.
|
|
||||||
*/
|
|
||||||
private function scoreText(string $text, array $terms): int
|
|
||||||
{
|
|
||||||
$content = mb_strtolower($text);
|
|
||||||
$score = 0;
|
|
||||||
|
|
||||||
foreach ($terms as $term) {
|
|
||||||
if ($term === '') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (str_contains($content, $term)) {
|
|
||||||
$score += (mb_strlen($term) >= 10) ? 2 : 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $score;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -7,5 +7,5 @@ interface RetrieverInterface
|
|||||||
/**
|
/**
|
||||||
* @return string[] Plain text knowledge chunks
|
* @return string[] Plain text knowledge chunks
|
||||||
*/
|
*/
|
||||||
public function retrieve(string $prompt, int $limit = 3): array;
|
public function retrieve(string $prompt, int $limit = 10): array;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,7 +31,7 @@
|
|||||||
placeholder="z. B. qwen3:latest"
|
placeholder="z. B. qwen3:latest"
|
||||||
required>
|
required>
|
||||||
<div class="form-text text-secondary">
|
<div class="form-text text-secondary">
|
||||||
Exakter Modellname wie im Endpunkt konfiguriert.
|
Exakter Modellname wie im KI-Endpunkt konfiguriert (z. B. Ollama oder API).
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -42,26 +42,31 @@
|
|||||||
type="checkbox"
|
type="checkbox"
|
||||||
name="stream"
|
name="stream"
|
||||||
value="1"
|
value="1"
|
||||||
id="streamSwitch">
|
id="streamSwitch" checked>
|
||||||
<label class="form-check-label" for="streamSwitch">
|
<label class="form-check-label" for="streamSwitch">
|
||||||
Streaming aktivieren
|
Streaming aktivieren
|
||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="form-text text-secondary ms-3">
|
||||||
|
Aktiviert Token-Streaming im Chat (empfohlen für bessere UX).
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Temperature -->
|
<!-- Temperature -->
|
||||||
<div class="col-md-4">
|
<div class="col-md-4">
|
||||||
<label class="form-label">Temperature</label>
|
<label class="form-label">Temperature</label>
|
||||||
<input type="number"
|
<input type="number"
|
||||||
step="0.1"
|
step="0.05"
|
||||||
min="0"
|
min="0"
|
||||||
max="2"
|
max="2"
|
||||||
name="temperature"
|
name="temperature"
|
||||||
value="0.1"
|
value="0.35"
|
||||||
class="form-control bg-dark text-light border-secondary"
|
class="form-control bg-dark text-light border-secondary"
|
||||||
required>
|
required>
|
||||||
<div class="form-text text-secondary">
|
<div class="form-text text-secondary">
|
||||||
Niedrige Werte = deterministisch (empfohlen für RAG).
|
Steuert die Kreativität der Antworten.
|
||||||
|
Niedrige Werte (0.2–0.4) erzeugen stabile, sachliche Ergebnisse – empfohlen für RAG-Systeme.
|
||||||
|
Höhere Werte führen zu freieren, weniger deterministischen Antworten.
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -71,9 +76,14 @@
|
|||||||
<input type="number"
|
<input type="number"
|
||||||
min="1"
|
min="1"
|
||||||
name="top_k"
|
name="top_k"
|
||||||
value="20"
|
value="40"
|
||||||
class="form-control bg-dark text-light border-secondary"
|
class="form-control bg-dark text-light border-secondary"
|
||||||
required>
|
required>
|
||||||
|
<div class="form-text text-secondary">
|
||||||
|
Begrenzt die Anzahl der wahrscheinlichsten Token, aus denen das Modell auswählt.
|
||||||
|
Niedrigere Werte = konservativer, höhere Werte = flexibler.
|
||||||
|
20–50 ist für Wissenssysteme üblich.
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Top P -->
|
<!-- Top P -->
|
||||||
@@ -84,9 +94,14 @@
|
|||||||
min="0"
|
min="0"
|
||||||
max="1"
|
max="1"
|
||||||
name="top_p"
|
name="top_p"
|
||||||
value="0.8"
|
value="0.9"
|
||||||
class="form-control bg-dark text-light border-secondary"
|
class="form-control bg-dark text-light border-secondary"
|
||||||
required>
|
required>
|
||||||
|
<div class="form-text text-secondary">
|
||||||
|
Nucleus Sampling: Das Modell berücksichtigt nur Token,
|
||||||
|
deren kumulative Wahrscheinlichkeit innerhalb dieses Werts liegt.
|
||||||
|
0.8–0.95 bietet eine gute Balance zwischen Stabilität und Natürlichkeit.
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Repeat Penalty -->
|
<!-- Repeat Penalty -->
|
||||||
@@ -97,9 +112,13 @@
|
|||||||
min="0"
|
min="0"
|
||||||
max="5"
|
max="5"
|
||||||
name="repeat_penalty"
|
name="repeat_penalty"
|
||||||
value="1.05"
|
value="1.1"
|
||||||
class="form-control bg-dark text-light border-secondary"
|
class="form-control bg-dark text-light border-secondary"
|
||||||
required>
|
required>
|
||||||
|
<div class="form-text text-secondary">
|
||||||
|
Bestraft Wortwiederholungen. Werte leicht über 1.0 (z. B. 1.1–1.15)
|
||||||
|
verhindern Schleifen und redundante Antworten.
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Num Ctx -->
|
<!-- Num Ctx -->
|
||||||
@@ -109,11 +128,13 @@
|
|||||||
min="512"
|
min="512"
|
||||||
max="32768"
|
max="32768"
|
||||||
name="num_ctx"
|
name="num_ctx"
|
||||||
value="4096"
|
value="8192"
|
||||||
class="form-control bg-dark text-light border-secondary"
|
class="form-control bg-dark text-light border-secondary"
|
||||||
required>
|
required>
|
||||||
<div class="form-text text-secondary">
|
<div class="form-text text-secondary">
|
||||||
Muss zum Modell passen. Zu hohe Werte können Performance beeinflussen.
|
Maximale Kontextlänge in Tokens (Systemprompt + Benutzerfrage + Retrieval-Chunks).
|
||||||
|
Muss vom Modell unterstützt werden.
|
||||||
|
Höhere Werte ermöglichen größere Wissenskontexte, erhöhen jedoch Speicher- und Rechenbedarf.
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user