harden code and ingester

This commit is contained in:
team 1
2026-02-12 14:31:29 +01:00
parent 5a52e07edc
commit 994f582f35
8 changed files with 77 additions and 496 deletions

View File

@@ -5,178 +5,52 @@ declare(strict_types=1);
namespace App\Ingest;
use App\Entity\DocumentVersion;
use App\Entity\IngestJob;
use App\Entity\User;
use App\Index\IndexMetaManager;
use App\Index\IndexStructureChangedException;
use App\Knowledge\ChunkManager;
use App\Service\IngestJobService;
use App\Service\LockService;
use App\Knowledge\Ingest\KnowledgeIngestService;
use App\Vector\VectorIndexBuilder;
use Doctrine\ORM\EntityManagerInterface;
final class IngestFlow
final readonly class IngestFlow
{
public function __construct(
private readonly LockService $lockService,
private readonly IngestJobService $jobService,
private readonly KnowledgeIngestService $knowledgeIngestService,
private readonly ChunkManager $chunkManager,
private readonly VectorIndexBuilder $vectorBuilder,
private readonly IndexMetaManager $metaManager,
private readonly EntityManagerInterface $em,
) {
private KnowledgeIngestService $knowledgeIngestService,
private ChunkManager $chunkManager,
private VectorIndexBuilder $vectorBuilder,
private IndexMetaManager $metaManager,
)
{
}
// ============================================================
// LOCAL DOCUMENT INGEST
// ============================================================
public function ingestDocumentVersion(
DocumentVersion $version,
User $user
): IngestJob {
DocumentVersion $version
): void
{
$this->metaManager->validateAgainstCurrent();
if (!$this->lockService->acquire()) {
throw new \RuntimeException('Another ingest job is already running.');
}
$this->chunkManager->compactByDocument(
$version->getDocument()->getId()
);
$job = null;
$records = $this->knowledgeIngestService
->buildChunkRecords($version);
try {
$this->chunkManager->appendChunks($records);
$job = $this->jobService->startJob(
IngestJob::TYPE_DOCUMENT,
$user,
$version->getDocument()->getId(),
$version->getId(),
);
$version->setIngestStatus(DocumentVersion::INGEST_RUNNING);
$this->em->flush();
// --------------------------------------------------
// Guardrail: Struktur prüfen
// --------------------------------------------------
$this->metaManager->validateAgainstCurrent();
// --------------------------------------------------
// Alte Chunks dieses Dokuments entfernen (Streaming)
// --------------------------------------------------
$this->chunkManager->compactByDocument(
$version->getDocument()->getId()
);
// --------------------------------------------------
// Neue Chunks erzeugen
// --------------------------------------------------
$records = $this->knowledgeIngestService
->buildChunkRecords($version);
// --------------------------------------------------
// Append in NDJSON
// --------------------------------------------------
$this->chunkManager->appendChunks($records);
// --------------------------------------------------
// FAISS komplett neu bauen (deterministisch)
// --------------------------------------------------
$logPath = $job->getLogPath();
$this->vectorBuilder->rebuildFromNdjson($logPath);
// --------------------------------------------------
// Erfolg
// --------------------------------------------------
$version->setIngestStatus(DocumentVersion::INGEST_INDEXED);
$this->jobService->markCompleted($job);
$this->em->flush();
} catch (IndexStructureChangedException $e) {
if ($job) {
$this->jobService->markFailed($job, $e->getMessage());
}
$version->setIngestStatus(DocumentVersion::INGEST_FAILED);
$this->em->flush();
throw $e;
} catch (\Throwable $e) {
if ($job) {
$this->jobService->markFailed($job, $e->getMessage());
}
$version->setIngestStatus(DocumentVersion::INGEST_FAILED);
$this->em->flush();
throw $e;
} finally {
$this->lockService->release();
}
return $job;
$this->vectorBuilder->rebuildFromNdjson();
}
// ============================================================
// GLOBAL REINDEX
// ============================================================
public function globalReindex(User $user): IngestJob
public function globalReindex(): void
{
if (!$this->lockService->acquire()) {
throw new \RuntimeException('Another ingest job is already running.');
}
$allRecords = $this->knowledgeIngestService
->buildAllActiveChunkRecords();
$job = null;
$this->chunkManager->rewriteAll($allRecords);
try {
$this->vectorBuilder->rebuildFromNdjson();
$job = $this->jobService->startJob(
IngestJob::TYPE_GLOBAL_REINDEX,
$user
);
// --------------------------------------------------
// Alle aktiven Dokumente neu ingestieren
// --------------------------------------------------
$allRecords = $this->knowledgeIngestService
->buildAllActiveChunkRecords();
// --------------------------------------------------
// Komplettes NDJSON neu schreiben
// --------------------------------------------------
$this->chunkManager->rewriteAll($allRecords);
// --------------------------------------------------
// FAISS komplett neu bauen
// --------------------------------------------------
$logPath = $job->getLogPath();
$this->vectorBuilder->rebuildFromNdjson($logPath);
// --------------------------------------------------
// Meta aktualisieren + index_version++
// --------------------------------------------------
$this->metaManager->writeMetaForGlobalReindex();
$this->jobService->markCompleted($job);
} catch (\Throwable $e) {
if ($job) {
$this->jobService->markFailed($job, $e->getMessage());
}
throw $e;
} finally {
$this->lockService->release();
}
return $job;
$this->metaManager->writeMetaForGlobalReindex();
}
}