From 6cc58c7d0de8b3544ba48c472268d89215e5c7eb Mon Sep 17 00:00:00 2001 From: team 1 Date: Mon, 16 Feb 2026 08:48:35 +0100 Subject: [PATCH] harden document versions --- src/Controller/Admin/DocumentController.php | 50 +++++++++++++++++++-- src/Entity/IngestJob.php | 10 ++++- src/Service/DocumentService.php | 45 +++++-------------- src/Service/IngestOrchestrator.php | 34 ++++++++------ 4 files changed, 88 insertions(+), 51 deletions(-) diff --git a/src/Controller/Admin/DocumentController.php b/src/Controller/Admin/DocumentController.php index c1b8775..cffd794 100644 --- a/src/Controller/Admin/DocumentController.php +++ b/src/Controller/Admin/DocumentController.php @@ -173,7 +173,8 @@ class DocumentController extends AbstractController string $versionId, Request $request, EntityManagerInterface $em, - DocumentService $documentService + DocumentService $documentService, + IngestJobService $jobService, ): RedirectResponse { @@ -189,7 +190,50 @@ class DocumentController extends AbstractController try { $documentService->activateVersion($version); - $this->addFlash('success', 'Version aktiviert und Index aktualisiert.'); + // --------------------------------------------------------- + // Saubere IngestJob-Integration: + // 1) Job als QUEUED anlegen (spezieller Typ für Aktivierung) + // 2) Symfony-Command im Hintergrund starten + // 3) Direkt auf Job-Detailseite redirecten (Loader + Polling) + // --------------------------------------------------------- + + $job = $jobService->startJob( + IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE, + $this->getUser(), + $version->getDocument()->getId(), + $version->getId(), + null, + IngestJob::STATUS_QUEUED + ); + + // Hintergrundprozess starten (Provider-kompatibel, kein Worker/Daemon) + $projectDir = (string)$this->getParameter('kernel.project_dir'); + $console = $projectDir . '/bin/console'; + + $cmd = sprintf( + '%s %s %s %s > /dev/null 2>&1 &', + escapeshellarg($console), + escapeshellarg('mto:agent:ingest:run'), + escapeshellarg((string)$job->getId()), + escapeshellarg('--no-interaction'), + ); + + // Best effort: wenn exec deaktiviert ist, sauber abbrechen. + if (!function_exists('exec')) { + $jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).'); + $this->addFlash('danger', 'Aktivierung ok, aber Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).'); + return $this->redirectToRoute('admin_document_show', [ + 'id' => $version->getDocument()->getId() + ]); + } + + exec($cmd); + + $this->addFlash('success', 'Version aktiviert. Ingest-Job wurde erstellt und gestartet.'); + + return $this->redirectToRoute('admin_job_show', [ + 'id' => (string)$job->getId(), + ]); } catch (\Throwable $e) { $this->addFlash('danger', 'Aktivierung/Re-Ingest fehlgeschlagen: ' . $e->getMessage()); } @@ -310,4 +354,4 @@ class DocumentController extends AbstractController $this->addFlash('success', 'Das System wurde erfolgreich zurückgesetzt.'); return $this->redirectToRoute('admin_dashboard'); } -} \ No newline at end of file +} diff --git a/src/Entity/IngestJob.php b/src/Entity/IngestJob.php index b0bd125..a7e6481 100644 --- a/src/Entity/IngestJob.php +++ b/src/Entity/IngestJob.php @@ -10,6 +10,12 @@ class IngestJob { public const TYPE_DOCUMENT = 'DOCUMENT'; public const TYPE_GLOBAL_REINDEX = 'GLOBAL_REINDEX'; + /** + * Special job type used when a DocumentVersion is activated. + * Semantics: always re-ingest the selected version (even if it was previously INDEXED), + * ensuring the index reflects the currently active version. + */ + public const TYPE_DOCUMENT_VERSION_ACTIVATE = 'DOCUMENT_VERSION_ACTIVATE'; public const STATUS_QUEUED = 'QUEUED'; public const STATUS_RUNNING = 'RUNNING'; @@ -49,12 +55,12 @@ class IngestJob #[ORM\Column(type: 'text', nullable: true)] private ?string $errorMessage = null; - public function __construct(string $type) + public function __construct(string $type, string $status = self::STATUS_RUNNING) { $this->id = Uuid::v4(); $this->type = $type; $this->startedAt = new \DateTimeImmutable(); - $this->status = self::STATUS_RUNNING; + $this->status = $status; } public function getId(): Uuid { return $this->id; } diff --git a/src/Service/DocumentService.php b/src/Service/DocumentService.php index cf12874..6207b14 100644 --- a/src/Service/DocumentService.php +++ b/src/Service/DocumentService.php @@ -5,15 +5,12 @@ namespace App\Service; use App\Entity\Document; use App\Entity\DocumentVersion; use App\Entity\User; -use App\Ingest\IngestFlow; use Doctrine\ORM\EntityManagerInterface; class DocumentService { public function __construct( private EntityManagerInterface $em, - private LockService $lockService, - private IngestFlow $ingestFlow, ) {} /** @@ -83,39 +80,21 @@ class DocumentService */ public function activateVersion(DocumentVersion $version): void { - if (!$this->lockService->acquire()) { - throw new \RuntimeException('Another ingest job is already running.'); + $document = $version->getDocument(); + + // Aktiv-Status in DB konsistent setzen (genau 1 aktive Version) + foreach ($document->getVersions() as $existingVersion) { + $existingVersion->setActive(false); } - try { - $document = $version->getDocument(); + $version->setActive(true); + $document->setCurrentVersion($version); - // 1) Aktiv-Status in DB konsistent setzen (genau 1 aktive Version) - foreach ($document->getVersions() as $existingVersion) { - $existingVersion->setActive(false); - } + // Wichtig: Aktivierung soll einen Job auslösen. Damit der Job NICHT an "INDEXED" scheitert, + // setzen wir hier bewusst auf PENDING. + $version->setIngestStatus(DocumentVersion::INGEST_PENDING); - $version->setActive(true); - $document->setCurrentVersion($version); - - // 2) Ingest-Status (UI) – wird im Fehlerfall auf FAILED gesetzt - $version->setIngestStatus(DocumentVersion::INGEST_RUNNING); - $this->em->flush(); - - // 3) Deterministischer Re-Ingest: alte Chunks raus, neue rein, FAISS rebuild - $this->ingestFlow->ingestDocumentVersion($version); - - $version->setIngestStatus(DocumentVersion::INGEST_INDEXED); - $this->em->flush(); - - } catch (\Throwable $e) { - // Aktivierung bleibt in DB bestehen, aber Index ist ggf. nicht aktuell → Status markieren - $version->setIngestStatus(DocumentVersion::INGEST_FAILED); - $this->em->flush(); - throw $e; - } finally { - $this->lockService->release(); - } + $this->em->flush(); } /** @@ -152,4 +131,4 @@ class DocumentService return $max + 1; } -} \ No newline at end of file +} diff --git a/src/Service/IngestOrchestrator.php b/src/Service/IngestOrchestrator.php index c39c2c9..67f7c3f 100644 --- a/src/Service/IngestOrchestrator.php +++ b/src/Service/IngestOrchestrator.php @@ -115,7 +115,9 @@ final class IngestOrchestrator return; } - if ($job->getType() !== IngestJob::TYPE_DOCUMENT) { + $isActivateJob = $job->getType() === IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE; + + if (!$isActivateJob && $job->getType() !== IngestJob::TYPE_DOCUMENT) { throw new \RuntimeException(sprintf( 'Unsupported ingest job type "%s".', $job->getType() @@ -139,21 +141,27 @@ final class IngestOrchestrator $status = $version->getIngestStatus(); - // Nur blockieren wenn wirklich schon indexed - if ($status === DocumentVersion::INGEST_INDEXED) { - throw new \RuntimeException('DocumentVersion already indexed.'); + // Bei Aktivierungs-Jobs IMMER re-ingestieren (auch wenn die Version früher schon indexed war). + // Hintergrund: nach Aktivierung soll der Index deterministisch die aktive Version widerspiegeln. + if (!$isActivateJob) { + // Nur blockieren wenn wirklich schon indexed + if ($status === DocumentVersion::INGEST_INDEXED) { + throw new \RuntimeException('DocumentVersion already indexed.'); + } } // RUNNING darf hier erlaubt sein (async!) - if (!in_array($status, [ - DocumentVersion::INGEST_PENDING, - DocumentVersion::INGEST_FAILED, - DocumentVersion::INGEST_RUNNING, - ], true)) { - throw new \RuntimeException(sprintf( - 'Ingest not allowed for status "%s".', - $status - )); + if (!$isActivateJob) { + if (!in_array($status, [ + DocumentVersion::INGEST_PENDING, + DocumentVersion::INGEST_FAILED, + DocumentVersion::INGEST_RUNNING, + ], true)) { + throw new \RuntimeException(sprintf( + 'Ingest not allowed for status "%s".', + $status + )); + } } $version->setIngestStatus(DocumentVersion::INGEST_RUNNING);