harden code

This commit is contained in:
team 1
2026-02-15 16:01:08 +01:00
parent 5b100039e0
commit c099f72703
13 changed files with 397 additions and 59 deletions

View File

@@ -0,0 +1,60 @@
<?php
declare(strict_types=1);
namespace App\Command;
use App\Entity\IngestJob;
use App\Service\IngestOrchestrator;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
#[AsCommand(name: 'mto:agent:ingest:run')]
final class IngestRunJobCommand extends Command
{
public function __construct(
private readonly IngestOrchestrator $orchestrator,
private readonly EntityManagerInterface $em,
) {
parent::__construct();
}
protected function configure(): void
{
$this
->addArgument('jobId', InputArgument::REQUIRED, 'UUID of IngestJob');
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$jobId = (string) $input->getArgument('jobId');
/** @var IngestJob|null $job */
$job = $this->em->getRepository(IngestJob::class)->find($jobId);
if (!$job) {
$output->writeln('<error>IngestJob not found.</error>');
return Command::FAILURE;
}
// Idempotenz: wenn der Job bereits beendet ist, einfach ok zurück.
if (in_array($job->getStatus(), [IngestJob::STATUS_COMPLETED, IngestJob::STATUS_FAILED, IngestJob::STATUS_ABORTED], true)) {
$output->writeln('<info>Job already finished.</info>');
return Command::SUCCESS;
}
try {
$output->writeln(sprintf('<info>Running ingest job %s ...</info>', (string) $job->getId()));
$this->orchestrator->runExistingJob($job, false);
$output->writeln('<info>Job completed.</info>');
return Command::SUCCESS;
} catch (\Throwable $e) {
$output->writeln(sprintf('<error>Job failed: %s</error>', $e->getMessage()));
return Command::FAILURE;
}
}
}

View File

@@ -46,7 +46,7 @@ class KnowledgeIngestCommand extends Command
$output->writeln('Starting ingest...');
$job = $this->orchestrator->runForVersion($version, $user, false);
$job = $this->orchestrator->runForVersion($version, $user);
$output->writeln(sprintf('<info>Ingest completed. Job: %s</info>', (string) $job->getId()));

View File

@@ -1,13 +1,13 @@
<?php
namespace App\Controller\Admin;
use App\Entity\Document;
use App\Entity\DocumentVersion;
use App\Entity\IngestJob;
use App\Service\DocumentService;
use App\Service\IngestOrchestrator;
use App\Service\FormatText;
use App\Service\IngestJobService;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\RedirectResponse;
@@ -18,10 +18,15 @@ use Symfony\Component\Uid\Uuid;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\File\Exception\FileException;
#[Route('/admin/documents')]
class DocumentController extends AbstractController
{
public function __construct(
private readonly FormatText $formatText,
)
{
}
#[Route('', name: 'admin_documents')]
public function index(EntityManagerInterface $em): Response
{
@@ -62,8 +67,10 @@ class DocumentController extends AbstractController
{
if ($request->isMethod('POST')) {
$title = $request->request->get('title');
$file = $request->files->get('file');
$title = $request->request->get('title') ?: $file->getClientOriginalName();
$title = $this->formatText->slugify($title);
if (!$file || !$title) {
$this->addFlash('error', 'Titel und Datei sind erforderlich.');
@@ -191,7 +198,7 @@ class DocumentController extends AbstractController
string $versionId,
Request $request,
EntityManagerInterface $em,
IngestOrchestrator $orchestrator
IngestJobService $jobService,
): ?RedirectResponse {
$dryRun = false;
if (!$this->isCsrfTokenValid('ingest_version', $request->request->get('_token'))) {
@@ -214,14 +221,47 @@ class DocumentController extends AbstractController
return null;
}
$orchestrator->runForVersion(
$version,
// ---------------------------------------------------------
// Asynchroner Ingest (ohne Messenger):
// 1) Job als QUEUED anlegen
// 2) Symfony-Command im Hintergrund starten
// 3) Direkt auf Job-Detailseite redirecten (Loader + Polling)
// ---------------------------------------------------------
$job = $jobService->startJob(
IngestJob::TYPE_DOCUMENT,
$this->getUser(),
$dryRun
$version->getDocument()->getId(),
$version->getId(),
null,
IngestJob::STATUS_QUEUED
);
return $this->redirectToRoute('admin_document_show', [
'id' => $version->getDocument()->getId()
// Hintergrundprozess starten (Provider-kompatibel, kein Worker/Daemon)
$projectDir = (string) $this->getParameter('kernel.project_dir');
$console = $projectDir . '/bin/console';
$cmd = sprintf(
'%s %s %s %s > /dev/null 2>&1 &',
escapeshellarg($console),
escapeshellarg('mto:agent:ingest:run'),
escapeshellarg((string) $job->getId()),
escapeshellarg('--no-interaction'),
);
// Best effort: wenn exec deaktiviert ist, sauber abbrechen.
if (!\function_exists('exec')) {
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
$this->addFlash('error', 'Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
return $this->redirectToRoute('admin_document_show', [
'id' => $version->getDocument()->getId()
]);
}
exec($cmd);
return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(),
]);
}

View File

@@ -1,6 +1,5 @@
<?php
namespace App\Controller\Admin;
use App\Entity\IngestJob;
@@ -11,6 +10,7 @@ use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
use Symfony\Component\Routing\Attribute\Route;
use App\Ingest\IngestFlow;
use Symfony\Component\HttpFoundation\RedirectResponse;
use Symfony\Component\HttpFoundation\JsonResponse;
#[Route('/admin/jobs')]
class IngestJobController extends AbstractController
@@ -44,13 +44,40 @@ class IngestJobController extends AbstractController
]);
}
#[Route(
'/{id}/status',
name: 'admin_job_status',
requirements: ['id' => '[0-9a-fA-F\-]{36}'],
methods: ['GET']
)]
public function status(string $id, EntityManagerInterface $em): JsonResponse
{
$this->denyAccessUnlessGranted('ROLE_USER');
/** @var IngestJob|null $job */
$job = $em->getRepository(IngestJob::class)->find($id);
if (!$job) {
throw new NotFoundHttpException();
}
return $this->json([
'id' => (string) $job->getId(),
'type' => $job->getType(),
'status' => $job->getStatus(),
'startedAt' => $job->getStartedAt()->format(DATE_ATOM),
'finishedAt' => $job->getFinishedAt()?->format(DATE_ATOM),
'errorMessage' => $job->getErrorMessage(),
]);
}
#[Route('/global-reindex', name: 'admin_global_reindex', methods: ['POST'])]
public function globalReindex(
IngestFlow $flow
): RedirectResponse {
$this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
$flow->globalReindex($this->getUser());
$flow->globalReindex();
return $this->redirectToRoute('admin_jobs');
}

View File

@@ -11,6 +11,7 @@ class IngestJob
public const TYPE_DOCUMENT = 'DOCUMENT';
public const TYPE_GLOBAL_REINDEX = 'GLOBAL_REINDEX';
public const STATUS_QUEUED = 'QUEUED';
public const STATUS_RUNNING = 'RUNNING';
public const STATUS_COMPLETED = 'COMPLETED';
public const STATUS_FAILED = 'FAILED';
@@ -94,6 +95,11 @@ class IngestJob
$this->finishedAt = new \DateTimeImmutable();
}
public function markRunning(): void
{
$this->status = self::STATUS_RUNNING;
}
public function getErrorMessage(): ?string
{
return $this->errorMessage;

View File

@@ -28,16 +28,12 @@ final readonly class IngestFlow
): void
{
$this->metaManager->validateAgainstCurrent();
$this->chunkManager->compactByDocument(
$version->getDocument()->getId()
);
$records = $this->knowledgeIngestService
->buildChunkRecords($version);
$this->chunkManager->appendChunks($records);
$this->vectorBuilder->rebuildFromNdjson();
}

View File

@@ -0,0 +1,30 @@
<?php
namespace App\Service;
class FormatText
{
function slugify(string $text): string
{
$text = mb_strtolower($text, 'UTF-8');
// Umlaute ersetzen
$replacements = [
'ä' => 'ae',
'ö' => 'oe',
'ü' => 'ue',
'ß' => 'ss'
];
$text = str_replace(array_keys($replacements), $replacements, $text);
// Nicht erlaubte Zeichen entfernen
$text = preg_replace('/[^a-z0-9\s.-]/', '', $text);
// Leerzeichen zu Bindestrichen
$text = preg_replace('/[\s-]+/', '-', $text);
$text = preg_replace('/\./', '-', $text);
return trim($text, '-');
}
}

View File

@@ -1,6 +1,5 @@
<?php
namespace App\Service;
use App\Entity\IngestJob;
@@ -19,10 +18,11 @@ final class IngestJobService
?User $user = null,
?Uuid $documentId = null,
?Uuid $documentVersionId = null,
?string $logPath = null
?string $logPath = null,
string $status = IngestJob::STATUS_RUNNING
): IngestJob
{
$job = new IngestJob($type);
$job = new IngestJob($type, $status);
$job->setStartedBy($user);
$job->setDocumentId($documentId);
$job->setDocumentVersionId($documentVersionId);

View File

@@ -7,6 +7,7 @@ use App\Entity\IngestJob;
use App\Entity\User;
use App\Ingest\IngestFlow;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Component\Uid\Uuid;
final class IngestOrchestrator
{
@@ -19,14 +20,14 @@ final class IngestOrchestrator
}
/**
* Startet Ingest für eine bestimmte DocumentVersion (1 Job pro Run).
* @throws \Throwable
* SYNCHRONE Variante (falls noch genutzt)
*/
public function runForVersion(
DocumentVersion $version,
User $user,
bool $dryRun = false
): IngestJob {
if (!$this->lockService->acquire()) {
throw new \RuntimeException('Another ingest job is already running.');
}
@@ -34,16 +35,12 @@ final class IngestOrchestrator
$job = null;
try {
// Governance: nur PENDING/FAILED erlauben
$status = $version->getIngestStatus();
if (!in_array($status, [
DocumentVersion::INGEST_PENDING,
DocumentVersion::INGEST_FAILED,
], true)) {
throw new \RuntimeException(sprintf('Ingest not allowed for status "%s".', $status));
if ($status === DocumentVersion::INGEST_INDEXED) {
throw new \RuntimeException('DocumentVersion already indexed.');
}
// Job anlegen (einmal!)
$job = $this->jobService->startJob(
IngestJob::TYPE_DOCUMENT,
$user,
@@ -51,18 +48,15 @@ final class IngestOrchestrator
$version->getId(),
);
// Status → RUNNING
$version->setIngestStatus(DocumentVersion::INGEST_RUNNING);
$this->em->flush();
if ($dryRun) {
usleep(200000);
} else {
// Fachlogik ausführen (Flow erzeugt keine Jobs!)
$this->ingestFlow->ingestDocumentVersion($version);
}
// Erfolg
$version->setIngestStatus(DocumentVersion::INGEST_INDEXED);
$this->jobService->markCompleted($job);
$this->em->flush();
@@ -86,7 +80,120 @@ final class IngestOrchestrator
}
/**
* Globaler Reindex aller aktiven Dokumente.
* ASYNCHRONE Variante (Detached CLI)
*/
public function runExistingJob(IngestJob $job, bool $dryRun = false): void
{
if (!$this->lockService->acquire()) {
throw new \RuntimeException('Another ingest job is already running.');
}
try {
// Falls Job bereits final ist → nichts tun (idempotent)
if (in_array($job->getStatus(), [
IngestJob::STATUS_COMPLETED,
IngestJob::STATUS_FAILED,
IngestJob::STATUS_ABORTED,
], true)) {
return;
}
$job->markRunning();
$this->em->flush();
// Global Reindex
if ($job->getType() === IngestJob::TYPE_GLOBAL_REINDEX) {
if ($dryRun) {
usleep(200000);
} else {
$this->ingestFlow->globalReindex();
}
$this->jobService->markCompleted($job);
return;
}
if ($job->getType() !== IngestJob::TYPE_DOCUMENT) {
throw new \RuntimeException(sprintf(
'Unsupported ingest job type "%s".',
$job->getType()
));
}
$versionId = $job->getDocumentVersionId();
if (!$versionId instanceof Uuid) {
throw new \RuntimeException('Job has no document version id.');
}
/** @var DocumentVersion|null $version */
$version = $this->em
->getRepository(DocumentVersion::class)
->find($versionId);
if (!$version) {
throw new \RuntimeException('DocumentVersion not found.');
}
$status = $version->getIngestStatus();
// Nur blockieren wenn wirklich schon indexed
if ($status === DocumentVersion::INGEST_INDEXED) {
throw new \RuntimeException('DocumentVersion already indexed.');
}
// RUNNING darf hier erlaubt sein (async!)
if (!in_array($status, [
DocumentVersion::INGEST_PENDING,
DocumentVersion::INGEST_FAILED,
DocumentVersion::INGEST_RUNNING,
], true)) {
throw new \RuntimeException(sprintf(
'Ingest not allowed for status "%s".',
$status
));
}
$version->setIngestStatus(DocumentVersion::INGEST_RUNNING);
$this->em->flush();
if ($dryRun) {
usleep(200000);
} else {
$this->ingestFlow->ingestDocumentVersion($version);
}
$version->setIngestStatus(DocumentVersion::INGEST_INDEXED);
$this->jobService->markCompleted($job);
$this->em->flush();
} catch (\Throwable $e) {
$this->jobService->markFailed($job, $e->getMessage());
$versionId = $job->getDocumentVersionId();
if ($versionId instanceof Uuid) {
$version = $this->em
->getRepository(DocumentVersion::class)
->find($versionId);
if ($version) {
$version->setIngestStatus(DocumentVersion::INGEST_FAILED);
$this->em->flush();
}
}
throw $e;
} finally {
$this->lockService->release();
}
}
/**
* Globaler Reindex (synchron)
*/
public function runGlobal(User $user, bool $dryRun = false): IngestJob
{
@@ -97,12 +204,15 @@ final class IngestOrchestrator
$job = null;
try {
$job = $this->jobService->startJob(IngestJob::TYPE_GLOBAL_REINDEX, $user);
$job = $this->jobService->startJob(
IngestJob::TYPE_GLOBAL_REINDEX,
$user
);
if ($dryRun) {
usleep(200000);
} else {
$this->ingestFlow->globalReindex($job->getLogPath());
$this->ingestFlow->globalReindex();
}
$this->jobService->markCompleted($job);
@@ -110,6 +220,7 @@ final class IngestOrchestrator
return $job;
} catch (\Throwable $e) {
if ($job) {
$this->jobService->markFailed($job, $e->getMessage());
}