add document dele and rebuild faiss index

This commit is contained in:
team 1
2026-02-17 14:49:35 +01:00
parent 1bb753e071
commit fcd9488a18
6 changed files with 189 additions and 50 deletions

View File

@@ -10,18 +10,11 @@ use App\Knowledge\ChunkManager;
use App\Knowledge\Ingest\KnowledgeIngestService;
use App\Vector\VectorIndexBuilder;
use Psr\Log\LoggerInterface;
use Symfony\Component\Uid\Uuid;
final readonly class IngestFlow
{
/**
* Realistische Betriebsgrenze für dieses Systemdesign (CPU Embedding + FlatIP + Full Rebuild).
* Wird beim lokalen Ingest (Dokumentversion) enforced.
*/
public const CHUNK_LIMIT_HARD = 120000;
/**
* Ab hier nur Warnung (keine Blockade) damit man frühzeitig reagieren kann.
*/
private const CHUNK_LIMIT_WARN = 100000;
public function __construct(
@@ -37,16 +30,10 @@ final readonly class IngestFlow
{
$this->metaManager->validateAgainstCurrent();
// Entfernt alte Chunks dieses Dokuments -> danach ist "existing" der Basis-Index ohne dieses Dokument.
$this->chunkManager->compactByDocument($version->getDocument()->getId());
// ------------------------------
// Chunk-Limit Guardrail (Hard Cap)
// ------------------------------
$existing = $this->chunkManager->countAllChunks();
// buildChunkRecords() ist generatorbasiert; für einen sauberen Hard-Cap materialisieren wir lokal,
// damit wir vor dem Append abbrechen können (keine Partial Writes).
$recordsIterable = $this->knowledgeIngestService->buildChunkRecords($version);
$records = is_array($recordsIterable)
? $recordsIterable
@@ -83,6 +70,30 @@ final readonly class IngestFlow
$this->updateChuckCount();
}
/**
* HARD DELETE FLOW
*
* Removes all chunks belonging to a document from index.ndjson
* and rebuilds the vector index deterministically.
*/
public function deleteDocument(Uuid $documentId): void
{
$this->metaManager->validateAgainstCurrent();
$this->logger->info('Deleting document from RAG index.', [
'document_id' => $documentId->toRfc4122(),
]);
// Remove chunks for this document
$this->chunkManager->compactByDocument($documentId);
// Rebuild vector index from updated NDJSON
$this->vectorBuilder->rebuildFromNdjson();
// Update runtime stats
$this->updateChuckCount();
}
public function globalReindex(): void
{
$allRecords = $this->knowledgeIngestService->buildAllActiveChunkRecords();
@@ -101,4 +112,4 @@ final readonly class IngestFlow
$chunkCount = $this->chunkManager->countAllChunks();
$this->metaManager->updateRuntimeStats($chunkCount);
}
}
}