lexical logic

This commit is contained in:
team2
2026-04-20 21:46:42 +02:00
parent 2587ac8b4b
commit 065f59c090
9 changed files with 2576 additions and 326 deletions

View File

@@ -6,36 +6,50 @@ namespace App\Ingest;
use App\Index\IndexMetaManager;
use App\Knowledge\ChunkManager;
use App\Knowledge\Retrieval\NdjsonLexicalIndexBuilder;
use App\Vector\VectorIndexBuilder;
final readonly class VectorRebuildService
{
public function __construct(
private VectorIndexBuilder $vectorBuilder,
private IndexMetaManager $metaManager,
private ChunkManager $chunkManager,
) {}
private NdjsonLexicalIndexBuilder $lexicalIndexBuilder,
private IndexMetaManager $metaManager,
private ChunkManager $chunkManager,
) {
}
/**
* Führt einen vollständigen, deterministischen FAISS-Rebuild aus.
* Executes a full deterministic rebuild of all derived retrieval artifacts.
*
* Ablauf:
* 1. Rebuild des Vector Index aus index.ndjson
* 2. Chunk-Zählung via ChunkManager
* 3. Runtime-Stats atomar aktualisieren
* Flow:
* 1. Ensure index_meta.json exists
* 2. Rebuild vector index from index.ndjson
* 3. Rebuild lexical index from index.ndjson
* 4. Count chunks streaming-safe
* 5. Update runtime stats atomically
*
* Important:
* - Vector and lexical index are both derived from the same NDJSON source
* - rebuilding both here prevents drift between semantic and lexical retrieval layers
* - failures in either derived artifact should fail the rebuild as a whole
* @throws \Throwable
*/
public function rebuild(?string $logPath = null): void
{
// ✅ Stelle sicher, dass index_meta.json existiert
// Ensure metadata exists before derived index work starts.
$this->metaManager->ensureExists();
// 1️⃣ Vector Index neu bauen
// 1) Rebuild semantic vector index.
$this->vectorBuilder->rebuildFromNdjson($logPath);
// 2️⃣ Chunk Count streaming-safe zählen
// 2) Rebuild generic lexical index from the same NDJSON source.
$this->lexicalIndexBuilder->build();
// 3) Count chunks streaming-safe.
$chunkCount = $this->chunkManager->countAllChunks();
// 3⃣ Runtime-Stats aktualisieren (atomar)
// 4) Update runtime stats atomically.
$this->metaManager->updateRuntimeStats($chunkCount);
}
}