optimize py autoload
This commit is contained in:
@@ -6,6 +6,7 @@ namespace App\Knowledge\Retrieval;
|
||||
|
||||
use App\Entity\ModelGenerationConfig;
|
||||
use App\Knowledge\ChunkManager;
|
||||
use App\Knowledge\QueryCleaner;
|
||||
use App\Repository\ModelGenerationConfigRepository;
|
||||
use App\Tag\TagRoutingService;
|
||||
use App\Vector\VectorSearchClient;
|
||||
@@ -24,6 +25,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
private readonly VectorSearchClient $vectorClient,
|
||||
private readonly TagRoutingService $tagRouting,
|
||||
private readonly ModelGenerationConfigRepository $configRepository,
|
||||
private readonly QueryCleaner $queryCleaner,
|
||||
) {}
|
||||
|
||||
public function retrieve(string $prompt): array
|
||||
@@ -47,12 +49,21 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$limit = max(1, min($config->getRetrievalMaxChunks(), self::HARD_MAX_CHUNKS));
|
||||
$vectorTopKBase = max(1, min($config->getRetrievalVectorTopK(), self::HARD_MAX_VECTORK));
|
||||
|
||||
// Wichtig: List-Detection bleibt auf Originalprompt (sonst entfernst du "zeige/liste" etc.)
|
||||
$isListQuery = $this->isListQuery($prompt);
|
||||
|
||||
// -------------------------------------------------
|
||||
// 1) Tag Routing
|
||||
// CLEAN QUERY (nur für Retrieval: Tags + Vector)
|
||||
// -------------------------------------------------
|
||||
$candidateDocIds = $this->tagRouting->route($prompt);
|
||||
$cleanQuery = $this->queryCleaner->clean($prompt);
|
||||
if ($cleanQuery === '') {
|
||||
$cleanQuery = $prompt;
|
||||
}
|
||||
|
||||
// -------------------------------------------------
|
||||
// 1) Tag Routing (bereinigte Query)
|
||||
// -------------------------------------------------
|
||||
$candidateDocIds = $this->tagRouting->route($cleanQuery);
|
||||
$candidateSet = null;
|
||||
|
||||
if (is_array($candidateDocIds) && $candidateDocIds !== []) {
|
||||
@@ -76,22 +87,22 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
}
|
||||
|
||||
// -------------------------------------------------
|
||||
// 3) Vector Search (Scoped wenn möglich)
|
||||
// 3) Vector Search (bereinigte Query; scoped wenn möglich)
|
||||
// -------------------------------------------------
|
||||
if ($candidateSet !== null) {
|
||||
$hits = $this->vectorClient->searchScoped(
|
||||
$prompt,
|
||||
$cleanQuery,
|
||||
$topK,
|
||||
array_keys($candidateSet)
|
||||
);
|
||||
|
||||
// Wenn scoped nichts liefert → global fallback
|
||||
if ($hits === []) {
|
||||
$hits = $this->vectorClient->search($prompt, $vectorTopKBase);
|
||||
$hits = $this->vectorClient->search($cleanQuery, $vectorTopKBase);
|
||||
}
|
||||
|
||||
} else {
|
||||
$hits = $this->vectorClient->search($prompt, $topK);
|
||||
$hits = $this->vectorClient->search($cleanQuery, $topK);
|
||||
}
|
||||
|
||||
if ($hits === []) {
|
||||
@@ -294,7 +305,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
|
||||
}
|
||||
|
||||
$chunk = trim($rows[$id]['text']);
|
||||
|
||||
$key = mb_strtolower((string)preg_replace('/\s+/u', ' ', $chunk));
|
||||
|
||||
if (isset($seen[$key])) {
|
||||
|
||||
Reference in New Issue
Block a user