harden code and ingester
This commit is contained in:
@@ -5,178 +5,52 @@ declare(strict_types=1);
|
||||
namespace App\Ingest;
|
||||
|
||||
use App\Entity\DocumentVersion;
|
||||
use App\Entity\IngestJob;
|
||||
use App\Entity\User;
|
||||
use App\Index\IndexMetaManager;
|
||||
use App\Index\IndexStructureChangedException;
|
||||
use App\Knowledge\ChunkManager;
|
||||
use App\Service\IngestJobService;
|
||||
use App\Service\LockService;
|
||||
use App\Knowledge\Ingest\KnowledgeIngestService;
|
||||
use App\Vector\VectorIndexBuilder;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
|
||||
final class IngestFlow
|
||||
final readonly class IngestFlow
|
||||
{
|
||||
public function __construct(
|
||||
private readonly LockService $lockService,
|
||||
private readonly IngestJobService $jobService,
|
||||
private readonly KnowledgeIngestService $knowledgeIngestService,
|
||||
private readonly ChunkManager $chunkManager,
|
||||
private readonly VectorIndexBuilder $vectorBuilder,
|
||||
private readonly IndexMetaManager $metaManager,
|
||||
private readonly EntityManagerInterface $em,
|
||||
) {
|
||||
private KnowledgeIngestService $knowledgeIngestService,
|
||||
private ChunkManager $chunkManager,
|
||||
private VectorIndexBuilder $vectorBuilder,
|
||||
private IndexMetaManager $metaManager,
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// LOCAL DOCUMENT INGEST
|
||||
// ============================================================
|
||||
|
||||
public function ingestDocumentVersion(
|
||||
DocumentVersion $version,
|
||||
User $user
|
||||
): IngestJob {
|
||||
DocumentVersion $version
|
||||
): void
|
||||
{
|
||||
$this->metaManager->validateAgainstCurrent();
|
||||
|
||||
if (!$this->lockService->acquire()) {
|
||||
throw new \RuntimeException('Another ingest job is already running.');
|
||||
}
|
||||
$this->chunkManager->compactByDocument(
|
||||
$version->getDocument()->getId()
|
||||
);
|
||||
|
||||
$job = null;
|
||||
$records = $this->knowledgeIngestService
|
||||
->buildChunkRecords($version);
|
||||
|
||||
try {
|
||||
$this->chunkManager->appendChunks($records);
|
||||
|
||||
$job = $this->jobService->startJob(
|
||||
IngestJob::TYPE_DOCUMENT,
|
||||
$user,
|
||||
$version->getDocument()->getId(),
|
||||
$version->getId(),
|
||||
);
|
||||
|
||||
$version->setIngestStatus(DocumentVersion::INGEST_RUNNING);
|
||||
$this->em->flush();
|
||||
|
||||
// --------------------------------------------------
|
||||
// Guardrail: Struktur prüfen
|
||||
// --------------------------------------------------
|
||||
$this->metaManager->validateAgainstCurrent();
|
||||
|
||||
// --------------------------------------------------
|
||||
// Alte Chunks dieses Dokuments entfernen (Streaming)
|
||||
// --------------------------------------------------
|
||||
$this->chunkManager->compactByDocument(
|
||||
$version->getDocument()->getId()
|
||||
);
|
||||
|
||||
// --------------------------------------------------
|
||||
// Neue Chunks erzeugen
|
||||
// --------------------------------------------------
|
||||
$records = $this->knowledgeIngestService
|
||||
->buildChunkRecords($version);
|
||||
|
||||
// --------------------------------------------------
|
||||
// Append in NDJSON
|
||||
// --------------------------------------------------
|
||||
$this->chunkManager->appendChunks($records);
|
||||
|
||||
// --------------------------------------------------
|
||||
// FAISS komplett neu bauen (deterministisch)
|
||||
// --------------------------------------------------
|
||||
$logPath = $job->getLogPath();
|
||||
$this->vectorBuilder->rebuildFromNdjson($logPath);
|
||||
|
||||
// --------------------------------------------------
|
||||
// Erfolg
|
||||
// --------------------------------------------------
|
||||
$version->setIngestStatus(DocumentVersion::INGEST_INDEXED);
|
||||
$this->jobService->markCompleted($job);
|
||||
|
||||
$this->em->flush();
|
||||
|
||||
} catch (IndexStructureChangedException $e) {
|
||||
|
||||
if ($job) {
|
||||
$this->jobService->markFailed($job, $e->getMessage());
|
||||
}
|
||||
|
||||
$version->setIngestStatus(DocumentVersion::INGEST_FAILED);
|
||||
$this->em->flush();
|
||||
|
||||
throw $e;
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
|
||||
if ($job) {
|
||||
$this->jobService->markFailed($job, $e->getMessage());
|
||||
}
|
||||
|
||||
$version->setIngestStatus(DocumentVersion::INGEST_FAILED);
|
||||
$this->em->flush();
|
||||
|
||||
throw $e;
|
||||
|
||||
} finally {
|
||||
$this->lockService->release();
|
||||
}
|
||||
|
||||
return $job;
|
||||
$this->vectorBuilder->rebuildFromNdjson();
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// GLOBAL REINDEX
|
||||
// ============================================================
|
||||
|
||||
public function globalReindex(User $user): IngestJob
|
||||
public function globalReindex(): void
|
||||
{
|
||||
if (!$this->lockService->acquire()) {
|
||||
throw new \RuntimeException('Another ingest job is already running.');
|
||||
}
|
||||
$allRecords = $this->knowledgeIngestService
|
||||
->buildAllActiveChunkRecords();
|
||||
|
||||
$job = null;
|
||||
$this->chunkManager->rewriteAll($allRecords);
|
||||
|
||||
try {
|
||||
$this->vectorBuilder->rebuildFromNdjson();
|
||||
|
||||
$job = $this->jobService->startJob(
|
||||
IngestJob::TYPE_GLOBAL_REINDEX,
|
||||
$user
|
||||
);
|
||||
|
||||
// --------------------------------------------------
|
||||
// Alle aktiven Dokumente neu ingestieren
|
||||
// --------------------------------------------------
|
||||
$allRecords = $this->knowledgeIngestService
|
||||
->buildAllActiveChunkRecords();
|
||||
|
||||
// --------------------------------------------------
|
||||
// Komplettes NDJSON neu schreiben
|
||||
// --------------------------------------------------
|
||||
$this->chunkManager->rewriteAll($allRecords);
|
||||
|
||||
// --------------------------------------------------
|
||||
// FAISS komplett neu bauen
|
||||
// --------------------------------------------------
|
||||
$logPath = $job->getLogPath();
|
||||
$this->vectorBuilder->rebuildFromNdjson($logPath);
|
||||
|
||||
// --------------------------------------------------
|
||||
// Meta aktualisieren + index_version++
|
||||
// --------------------------------------------------
|
||||
$this->metaManager->writeMetaForGlobalReindex();
|
||||
|
||||
$this->jobService->markCompleted($job);
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
|
||||
if ($job) {
|
||||
$this->jobService->markFailed($job, $e->getMessage());
|
||||
}
|
||||
|
||||
throw $e;
|
||||
|
||||
} finally {
|
||||
$this->lockService->release();
|
||||
}
|
||||
|
||||
return $job;
|
||||
$this->metaManager->writeMetaForGlobalReindex();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user