remove direct chnuk search. only vector search

This commit is contained in:
team 1
2026-02-18 14:46:55 +01:00
parent 12a5ab0bb3
commit ce03c65aca
6 changed files with 94 additions and 152 deletions

View File

@@ -101,12 +101,7 @@ services:
App\Knowledge\Retrieval\NdjsonChunkLookup: ~ App\Knowledge\Retrieval\NdjsonChunkLookup: ~
App\Knowledge\Retrieval\NdjsonKeywordSearch: ~ App\Knowledge\Retrieval\NdjsonHybridRetriever: ~
App\Knowledge\Retrieval\NdjsonHybridRetriever:
arguments:
$maxChunks: 3
$vectorTopK: 5
App\Knowledge\Retrieval\CachedRetriever: App\Knowledge\Retrieval\CachedRetriever:
arguments: arguments:

View File

@@ -5,16 +5,22 @@ declare(strict_types=1);
namespace App\Knowledge\Retrieval; namespace App\Knowledge\Retrieval;
use Psr\Cache\CacheItemPoolInterface; use Psr\Cache\CacheItemPoolInterface;
use Psr\Cache\InvalidArgumentException;
final class CachedRetriever implements RetrieverInterface final readonly class CachedRetriever implements RetrieverInterface
{ {
public function __construct( public function __construct(
private RetrieverInterface $inner, private RetrieverInterface $inner,
private CacheItemPoolInterface $cache, private CacheItemPoolInterface $cache,
private int $ttlSeconds = 600 // 10 Minuten private int $ttlSeconds
) {} )
{
}
public function retrieve(string $prompt, int $limit = 3): array /**
* @throws InvalidArgumentException
*/
public function retrieve(string $prompt, int $limit = 10): array
{ {
$key = $this->buildCacheKey($prompt, $limit); $key = $this->buildCacheKey($prompt, $limit);

View File

@@ -1,6 +1,5 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
namespace App\Knowledge\Retrieval; namespace App\Knowledge\Retrieval;
@@ -9,34 +8,26 @@ use App\Vector\VectorSearchClient;
final class NdjsonHybridRetriever implements RetrieverInterface final class NdjsonHybridRetriever implements RetrieverInterface
{ {
private const VECTOR_SCORE_THRESHOLD = 0.65; private const VECTOR_SCORE_THRESHOLD = 0.25;
public function __construct( public function __construct(
private readonly NdjsonKeywordSearch $keywordSearch,
private readonly NdjsonChunkLookup $lookup, private readonly NdjsonChunkLookup $lookup,
private readonly VectorSearchClient $vectorClient, private readonly VectorSearchClient $vectorClient,
private readonly int $maxChunks = 3, private readonly int $maxChunks = 10,
private readonly int $vectorTopK = 5, private readonly int $vectorTopK = 10,
) )
{ {
} }
public function retrieve(string $prompt, int $limit = null): array public function retrieve(string $prompt, int $limit = null): array
{ {
$limit ??= $this->maxChunks; $limit = $this->maxChunks;
$keywordChunks = [];
$terms = $this->extractTerms($prompt); // Vector / enrichment
// 1) Keyword first
$keywordChunks = $this->keywordSearch->search($terms, $limit);
if (\count($keywordChunks) >= $limit) {
return array_slice($keywordChunks, 0, $limit);
}
// 2) Vector fallback / enrichment
$hits = $this->vectorClient->search($prompt, $this->vectorTopK); $hits = $this->vectorClient->search($prompt, $this->vectorTopK);
if ($hits === []) { if ($hits === []) {
return $keywordChunks; return $this->diversifyByDevice($keywordChunks, $limit, 1);
} }
$chunkIds = []; $chunkIds = [];
@@ -51,7 +42,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
} }
if ($chunkIds === []) { if ($chunkIds === []) {
return $keywordChunks; return $this->diversifyByDevice($keywordChunks, $limit, 1);
} }
$rows = $this->lookup->findByChunkIds($chunkIds); $rows = $this->lookup->findByChunkIds($chunkIds);
@@ -63,9 +54,9 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$keywordChunks[] = trim($rows[$id]['text']); $keywordChunks[] = trim($rows[$id]['text']);
} }
// dedupe + limit // dedupe
$seen = []; $seen = [];
$out = []; $deduped = [];
foreach ($keywordChunks as $chunk) { foreach ($keywordChunks as $chunk) {
$key = mb_strtolower(preg_replace('/\s+/u', ' ', $chunk)); $key = mb_strtolower(preg_replace('/\s+/u', ' ', $chunk));
@@ -73,20 +64,13 @@ final class NdjsonHybridRetriever implements RetrieverInterface
continue; continue;
} }
$seen[$key] = true; $seen[$key] = true;
$out[] = $chunk; $deduped[] = $chunk;
if (\count($out) >= $limit) {
break;
}
} }
return $out; // diversify
return $this->diversifyByDevice($deduped, $limit, 1);
} }
/**
* minimal term extraction (we keep your old behavior)
*
* @return string[]
*/
private function extractTerms(string $text): array private function extractTerms(string $text): array
{ {
$text = mb_strtolower((string)preg_replace('/[^\p{L}\p{N}\s]/u', '', $text)); $text = mb_strtolower((string)preg_replace('/[^\p{L}\p{N}\s]/u', '', $text));
@@ -96,4 +80,41 @@ final class NdjsonHybridRetriever implements RetrieverInterface
static fn(string $w) => mb_strlen($w) > 2 static fn(string $w) => mb_strlen($w) > 2
)); ));
} }
private function extractDevice(string $chunk): string
{
$firstLine = explode("\n", $chunk, 2)[0] ?? '';
return trim($firstLine);
}
private function diversifyByDevice(array $chunks, int $limit, int $maxPerDevice = 1): array
{
$seenDevices = [];
$out = [];
foreach ($chunks as $chunk) {
$device = $this->extractDevice($chunk);
if ($device === '') {
continue;
}
if (!isset($seenDevices[$device])) {
$seenDevices[$device] = 0;
}
if ($seenDevices[$device] >= $maxPerDevice) {
continue;
}
$out[] = $chunk;
$seenDevices[$device]++;
if (\count($out) >= $limit) {
break;
}
}
return $out;
}
} }

View File

@@ -1,101 +0,0 @@
<?php
declare(strict_types=1);
namespace App\Knowledge\Retrieval;
use App\Knowledge\ChunkManager;
use App\Knowledge\StopWords;
final class NdjsonKeywordSearch
{
public function __construct(
private readonly ChunkManager $chunkManager,
private readonly StopWords $stopWords,
) {
}
/**
* Streaming Keyword-Search über index.ndjson.
*
* @param string[] $terms (already lowercased)
* @return string[] best chunks
*/
public function search(array $terms, int $limit = 3, int $candidateCap = 200): array
{
$terms = array_values(array_filter($terms, function (string $t): bool {
return $t !== '' && !\in_array($t, $this->stopWords->getStopWords(), true);
}));
if ($terms === []) {
return [];
}
// bounded min-heap (score => chunkText)
$best = [];
foreach ($this->chunkManager->streamAll() as $row) {
$text = $row['text'] ?? null;
if (!is_string($text) || $text === '') {
continue;
}
$score = $this->scoreText($text, $terms);
if ($score <= 0) {
continue;
}
$best[] = ['score' => $score, 'text' => trim($text)];
// keep array bounded to avoid memory spikes
if (\count($best) > $candidateCap) {
usort($best, fn($a, $b) => $b['score'] <=> $a['score']);
$best = array_slice($best, 0, $candidateCap);
}
}
if ($best === []) {
return [];
}
usort($best, fn($a, $b) => $b['score'] <=> $a['score']);
$out = [];
$seen = [];
foreach ($best as $row) {
$key = mb_strtolower(preg_replace('/\s+/u', ' ', $row['text']));
if (isset($seen[$key])) {
continue;
}
$seen[$key] = true;
$out[] = $row['text'];
if (\count($out) >= $limit) {
break;
}
}
return $out;
}
/**
* Simple scoring: count matches, weight long terms slightly.
*/
private function scoreText(string $text, array $terms): int
{
$content = mb_strtolower($text);
$score = 0;
foreach ($terms as $term) {
if ($term === '') {
continue;
}
if (str_contains($content, $term)) {
$score += (mb_strlen($term) >= 10) ? 2 : 1;
}
}
return $score;
}
}

View File

@@ -7,5 +7,5 @@ interface RetrieverInterface
/** /**
* @return string[] Plain text knowledge chunks * @return string[] Plain text knowledge chunks
*/ */
public function retrieve(string $prompt, int $limit = 3): array; public function retrieve(string $prompt, int $limit = 10): array;
} }

View File

@@ -31,7 +31,7 @@
placeholder="z. B. qwen3:latest" placeholder="z. B. qwen3:latest"
required> required>
<div class="form-text text-secondary"> <div class="form-text text-secondary">
Exakter Modellname wie im Endpunkt konfiguriert. Exakter Modellname wie im KI-Endpunkt konfiguriert (z. B. Ollama oder API).
</div> </div>
</div> </div>
@@ -42,26 +42,31 @@
type="checkbox" type="checkbox"
name="stream" name="stream"
value="1" value="1"
id="streamSwitch"> id="streamSwitch" checked>
<label class="form-check-label" for="streamSwitch"> <label class="form-check-label" for="streamSwitch">
Streaming aktivieren Streaming aktivieren
</label> </label>
</div> </div>
<div class="form-text text-secondary ms-3">
Aktiviert Token-Streaming im Chat (empfohlen für bessere UX).
</div>
</div> </div>
<!-- Temperature --> <!-- Temperature -->
<div class="col-md-4"> <div class="col-md-4">
<label class="form-label">Temperature</label> <label class="form-label">Temperature</label>
<input type="number" <input type="number"
step="0.1" step="0.05"
min="0" min="0"
max="2" max="2"
name="temperature" name="temperature"
value="0.1" value="0.35"
class="form-control bg-dark text-light border-secondary" class="form-control bg-dark text-light border-secondary"
required> required>
<div class="form-text text-secondary"> <div class="form-text text-secondary">
Niedrige Werte = deterministisch (empfohlen für RAG). Steuert die Kreativität der Antworten.
Niedrige Werte (0.20.4) erzeugen stabile, sachliche Ergebnisse empfohlen für RAG-Systeme.
Höhere Werte führen zu freieren, weniger deterministischen Antworten.
</div> </div>
</div> </div>
@@ -71,9 +76,14 @@
<input type="number" <input type="number"
min="1" min="1"
name="top_k" name="top_k"
value="20" value="40"
class="form-control bg-dark text-light border-secondary" class="form-control bg-dark text-light border-secondary"
required> required>
<div class="form-text text-secondary">
Begrenzt die Anzahl der wahrscheinlichsten Token, aus denen das Modell auswählt.
Niedrigere Werte = konservativer, höhere Werte = flexibler.
2050 ist für Wissenssysteme üblich.
</div>
</div> </div>
<!-- Top P --> <!-- Top P -->
@@ -84,9 +94,14 @@
min="0" min="0"
max="1" max="1"
name="top_p" name="top_p"
value="0.8" value="0.9"
class="form-control bg-dark text-light border-secondary" class="form-control bg-dark text-light border-secondary"
required> required>
<div class="form-text text-secondary">
Nucleus Sampling: Das Modell berücksichtigt nur Token,
deren kumulative Wahrscheinlichkeit innerhalb dieses Werts liegt.
0.80.95 bietet eine gute Balance zwischen Stabilität und Natürlichkeit.
</div>
</div> </div>
<!-- Repeat Penalty --> <!-- Repeat Penalty -->
@@ -97,9 +112,13 @@
min="0" min="0"
max="5" max="5"
name="repeat_penalty" name="repeat_penalty"
value="1.05" value="1.1"
class="form-control bg-dark text-light border-secondary" class="form-control bg-dark text-light border-secondary"
required> required>
<div class="form-text text-secondary">
Bestraft Wortwiederholungen. Werte leicht über 1.0 (z. B. 1.11.15)
verhindern Schleifen und redundante Antworten.
</div>
</div> </div>
<!-- Num Ctx --> <!-- Num Ctx -->
@@ -109,11 +128,13 @@
min="512" min="512"
max="32768" max="32768"
name="num_ctx" name="num_ctx"
value="4096" value="8192"
class="form-control bg-dark text-light border-secondary" class="form-control bg-dark text-light border-secondary"
required> required>
<div class="form-text text-secondary"> <div class="form-text text-secondary">
Muss zum Modell passen. Zu hohe Werte können Performance beeinflussen. Maximale Kontextlänge in Tokens (Systemprompt + Benutzerfrage + Retrieval-Chunks).
Muss vom Modell unterstützt werden.
Höhere Werte ermöglichen größere Wissenskontexte, erhöhen jedoch Speicher- und Rechenbedarf.
</div> </div>
</div> </div>