lexical logic
This commit is contained in:
@@ -6,36 +6,50 @@ namespace App\Ingest;
|
||||
|
||||
use App\Index\IndexMetaManager;
|
||||
use App\Knowledge\ChunkManager;
|
||||
use App\Knowledge\Retrieval\NdjsonLexicalIndexBuilder;
|
||||
use App\Vector\VectorIndexBuilder;
|
||||
|
||||
final readonly class VectorRebuildService
|
||||
{
|
||||
public function __construct(
|
||||
private VectorIndexBuilder $vectorBuilder,
|
||||
private IndexMetaManager $metaManager,
|
||||
private ChunkManager $chunkManager,
|
||||
) {}
|
||||
private NdjsonLexicalIndexBuilder $lexicalIndexBuilder,
|
||||
private IndexMetaManager $metaManager,
|
||||
private ChunkManager $chunkManager,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Führt einen vollständigen, deterministischen FAISS-Rebuild aus.
|
||||
* Executes a full deterministic rebuild of all derived retrieval artifacts.
|
||||
*
|
||||
* Ablauf:
|
||||
* 1. Rebuild des Vector Index aus index.ndjson
|
||||
* 2. Chunk-Zählung via ChunkManager
|
||||
* 3. Runtime-Stats atomar aktualisieren
|
||||
* Flow:
|
||||
* 1. Ensure index_meta.json exists
|
||||
* 2. Rebuild vector index from index.ndjson
|
||||
* 3. Rebuild lexical index from index.ndjson
|
||||
* 4. Count chunks streaming-safe
|
||||
* 5. Update runtime stats atomically
|
||||
*
|
||||
* Important:
|
||||
* - Vector and lexical index are both derived from the same NDJSON source
|
||||
* - rebuilding both here prevents drift between semantic and lexical retrieval layers
|
||||
* - failures in either derived artifact should fail the rebuild as a whole
|
||||
* @throws \Throwable
|
||||
*/
|
||||
public function rebuild(?string $logPath = null): void
|
||||
{
|
||||
// ✅ Stelle sicher, dass index_meta.json existiert
|
||||
// Ensure metadata exists before derived index work starts.
|
||||
$this->metaManager->ensureExists();
|
||||
|
||||
// 1️⃣ Vector Index neu bauen
|
||||
// 1) Rebuild semantic vector index.
|
||||
$this->vectorBuilder->rebuildFromNdjson($logPath);
|
||||
|
||||
// 2️⃣ Chunk Count streaming-safe zählen
|
||||
// 2) Rebuild generic lexical index from the same NDJSON source.
|
||||
$this->lexicalIndexBuilder->build();
|
||||
|
||||
// 3) Count chunks streaming-safe.
|
||||
$chunkCount = $this->chunkManager->countAllChunks();
|
||||
|
||||
// 3️⃣ Runtime-Stats aktualisieren (atomar)
|
||||
// 4) Update runtime stats atomically.
|
||||
$this->metaManager->updateRuntimeStats($chunkCount);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user