add document dele and rebuild faiss index
This commit is contained in:
@@ -10,18 +10,11 @@ use App\Knowledge\ChunkManager;
|
||||
use App\Knowledge\Ingest\KnowledgeIngestService;
|
||||
use App\Vector\VectorIndexBuilder;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Symfony\Component\Uid\Uuid;
|
||||
|
||||
final readonly class IngestFlow
|
||||
{
|
||||
/**
|
||||
* Realistische Betriebsgrenze für dieses Systemdesign (CPU Embedding + FlatIP + Full Rebuild).
|
||||
* Wird beim lokalen Ingest (Dokumentversion) enforced.
|
||||
*/
|
||||
public const CHUNK_LIMIT_HARD = 120000;
|
||||
|
||||
/**
|
||||
* Ab hier nur Warnung (keine Blockade) – damit man frühzeitig reagieren kann.
|
||||
*/
|
||||
private const CHUNK_LIMIT_WARN = 100000;
|
||||
|
||||
public function __construct(
|
||||
@@ -37,16 +30,10 @@ final readonly class IngestFlow
|
||||
{
|
||||
$this->metaManager->validateAgainstCurrent();
|
||||
|
||||
// Entfernt alte Chunks dieses Dokuments -> danach ist "existing" der Basis-Index ohne dieses Dokument.
|
||||
$this->chunkManager->compactByDocument($version->getDocument()->getId());
|
||||
|
||||
// ------------------------------
|
||||
// Chunk-Limit Guardrail (Hard Cap)
|
||||
// ------------------------------
|
||||
$existing = $this->chunkManager->countAllChunks();
|
||||
|
||||
// buildChunkRecords() ist generatorbasiert; für einen sauberen Hard-Cap materialisieren wir lokal,
|
||||
// damit wir vor dem Append abbrechen können (keine Partial Writes).
|
||||
$recordsIterable = $this->knowledgeIngestService->buildChunkRecords($version);
|
||||
$records = is_array($recordsIterable)
|
||||
? $recordsIterable
|
||||
@@ -83,6 +70,30 @@ final readonly class IngestFlow
|
||||
$this->updateChuckCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* HARD DELETE FLOW
|
||||
*
|
||||
* Removes all chunks belonging to a document from index.ndjson
|
||||
* and rebuilds the vector index deterministically.
|
||||
*/
|
||||
public function deleteDocument(Uuid $documentId): void
|
||||
{
|
||||
$this->metaManager->validateAgainstCurrent();
|
||||
|
||||
$this->logger->info('Deleting document from RAG index.', [
|
||||
'document_id' => $documentId->toRfc4122(),
|
||||
]);
|
||||
|
||||
// Remove chunks for this document
|
||||
$this->chunkManager->compactByDocument($documentId);
|
||||
|
||||
// Rebuild vector index from updated NDJSON
|
||||
$this->vectorBuilder->rebuildFromNdjson();
|
||||
|
||||
// Update runtime stats
|
||||
$this->updateChuckCount();
|
||||
}
|
||||
|
||||
public function globalReindex(): void
|
||||
{
|
||||
$allRecords = $this->knowledgeIngestService->buildAllActiveChunkRecords();
|
||||
@@ -101,4 +112,4 @@ final readonly class IngestFlow
|
||||
$chunkCount = $this->chunkManager->countAllChunks();
|
||||
$this->metaManager->updateRuntimeStats($chunkCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user