remove direct chnuk search. only vector search

This commit is contained in:
team 1
2026-02-18 15:52:06 +01:00
parent ce03c65aca
commit b6e7c7cbab
2 changed files with 47 additions and 5 deletions

View File

@@ -0,0 +1,39 @@
<?php
declare(strict_types=1);
namespace App\Knowledge;
final readonly class QueryCleaner
{
public function __construct(
private StopWords $stopWords
) {
}
public function clean(string $query): string
{
$query = mb_strtolower($query);
$query = preg_replace('/[^\p{L}\p{N}\s]/u', ' ', $query);
$query = preg_replace('/\s+/u', ' ', $query);
$query = trim($query);
if ($query === '') {
return '';
}
$tokens = explode(' ', $query);
$stopWords = $this->stopWords->getStopWords(); // <-- wichtig: nutzt deine Klasse
$filtered = array_filter(
$tokens,
function (string $word) use ($stopWords): bool {
return $word !== ''
&& mb_strlen($word) > 2
&& !in_array($word, $stopWords, true);
}
);
return implode(' ', $filtered);
}
}

View File

@@ -4,6 +4,7 @@ declare(strict_types=1);
namespace App\Knowledge\Retrieval; namespace App\Knowledge\Retrieval;
use App\Knowledge\QueryCleaner;
use App\Vector\VectorSearchClient; use App\Vector\VectorSearchClient;
final class NdjsonHybridRetriever implements RetrieverInterface final class NdjsonHybridRetriever implements RetrieverInterface
@@ -11,10 +12,11 @@ final class NdjsonHybridRetriever implements RetrieverInterface
private const VECTOR_SCORE_THRESHOLD = 0.25; private const VECTOR_SCORE_THRESHOLD = 0.25;
public function __construct( public function __construct(
private readonly NdjsonChunkLookup $lookup, private readonly NdjsonChunkLookup $lookup,
private readonly VectorSearchClient $vectorClient, private readonly VectorSearchClient $vectorClient,
private readonly int $maxChunks = 10, private readonly QueryCleaner $queryCleaner,
private readonly int $vectorTopK = 10, private readonly int $maxChunks = 25,
private readonly int $vectorTopK = 10,
) )
{ {
} }
@@ -23,9 +25,10 @@ final class NdjsonHybridRetriever implements RetrieverInterface
{ {
$limit = $this->maxChunks; $limit = $this->maxChunks;
$keywordChunks = []; $keywordChunks = [];
$query = $this->queryCleaner->clean($prompt);
// Vector / enrichment // Vector / enrichment
$hits = $this->vectorClient->search($prompt, $this->vectorTopK); $hits = $this->vectorClient->search($query, $this->vectorTopK);
if ($hits === []) { if ($hits === []) {
return $this->diversifyByDevice($keywordChunks, $limit, 1); return $this->diversifyByDevice($keywordChunks, $limit, 1);
} }