add new route for global reindex

This commit is contained in:
team 1
2026-02-17 16:00:59 +01:00
parent 0b96ce6188
commit 2c443c0f1e
4 changed files with 159 additions and 57 deletions

View File

@@ -11,6 +11,7 @@ use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
#[AsCommand(name: 'mto:agent:ingest:run')]
@@ -26,12 +27,14 @@ final class IngestRunJobCommand extends Command
protected function configure(): void
{
$this
->addArgument('jobId', InputArgument::REQUIRED, 'UUID of IngestJob');
->addArgument('jobId', InputArgument::REQUIRED, 'UUID of IngestJob')
->addOption('dry-run', null, InputOption::VALUE_NONE, 'Run without executing heavy operations');
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$jobId = (string) $input->getArgument('jobId');
$dryRun = (bool) $input->getOption('dry-run');
/** @var IngestJob|null $job */
$job = $this->em->getRepository(IngestJob::class)->find($jobId);
@@ -41,19 +44,37 @@ final class IngestRunJobCommand extends Command
return Command::FAILURE;
}
// Idempotenz: wenn der Job bereits beendet ist, einfach ok zurück.
if (in_array($job->getStatus(), [IngestJob::STATUS_COMPLETED, IngestJob::STATUS_FAILED, IngestJob::STATUS_ABORTED], true)) {
// Idempotenz: Bereits abgeschlossene Jobs nicht erneut ausführen
if (in_array($job->getStatus(), [
IngestJob::STATUS_COMPLETED,
IngestJob::STATUS_FAILED,
IngestJob::STATUS_ABORTED,
], true)) {
$output->writeln('<info>Job already finished.</info>');
return Command::SUCCESS;
}
try {
$output->writeln(sprintf('<info>Running ingest job %s ...</info>', (string) $job->getId()));
$this->orchestrator->runExistingJob($job, false);
$output->writeln('<info>Job completed.</info>');
$output->writeln(sprintf(
'<info>Running ingest job %s (type: %s)...</info>',
(string) $job->getId(),
$job->getType()
));
$this->orchestrator->runExistingJob($job, $dryRun);
$output->writeln('<info>Job completed successfully.</info>');
return Command::SUCCESS;
} catch (\Throwable $e) {
$output->writeln(sprintf('<error>Job failed: %s</error>', $e->getMessage()));
// Wichtig: Status wird im Orchestrator gesetzt
$output->writeln(sprintf(
'<error>Job failed: %s</error>',
$e->getMessage()
));
return Command::FAILURE;
}
}

View File

@@ -3,12 +3,12 @@
namespace App\Controller\Admin;
use App\Entity\IngestJob;
use App\Service\IngestJobService;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
use Symfony\Component\Routing\Attribute\Route;
use App\Ingest\IngestFlow;
use Symfony\Component\HttpFoundation\RedirectResponse;
use Symfony\Component\HttpFoundation\JsonResponse;
@@ -73,12 +73,50 @@ class IngestJobController extends AbstractController
#[Route('/global-reindex', name: 'admin_global_reindex', methods: ['POST'])]
public function globalReindex(
IngestFlow $flow
IngestJobService $jobService,
): RedirectResponse {
$this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
$flow->globalReindex();
// ---------------------------------------------------------
// 1) Job anlegen (QUEUED)
// ---------------------------------------------------------
$job = $jobService->startJob(
IngestJob::TYPE_GLOBAL_REINDEX,
$this->getUser(),
null,
null,
null,
IngestJob::STATUS_QUEUED
);
// ---------------------------------------------------------
// 2) CLI im Hintergrund starten
// ---------------------------------------------------------
$projectDir = (string)$this->getParameter('kernel.project_dir');
$console = $projectDir . '/bin/console';
$cmd = sprintf(
'%s %s %s %s > /dev/null 2>&1 &',
escapeshellarg($console),
escapeshellarg('mto:agent:ingest:run'),
escapeshellarg((string)$job->getId()),
escapeshellarg('--no-interaction'),
);
if (!function_exists('exec')) {
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
$this->addFlash('danger', 'Global Reindex konnte nicht gestartet werden.');
return $this->redirectToRoute('admin_jobs');
}
exec($cmd);
// ---------------------------------------------------------
// 3) Redirect auf Job-Detailseite (Loader)
// ---------------------------------------------------------
return $this->redirectToRoute('admin_job_show', [
'id' => (string)$job->getId(),
]);
}
}

View File

@@ -4,6 +4,7 @@ declare(strict_types=1);
namespace App\Ingest;
use App\Entity\Document;
use App\Entity\DocumentVersion;
use App\Index\IndexMetaManager;
use App\Knowledge\ChunkManager;
@@ -40,7 +41,6 @@ final readonly class IngestFlow
try {
// Entfernt alte Chunks dieses Dokuments
$this->chunkManager->compactByDocument($version->getDocument()->getId());
$existing = $this->chunkManager->countAllChunks();
@@ -86,10 +86,39 @@ final readonly class IngestFlow
public function globalReindex(): void
{
$records = $this->knowledgeIngestService->buildAllActiveChunkRecords();
$this->metaManager->validateAgainstCurrent();
// 1⃣ Prüfen ob aktive Dokumente existieren
$activeDocuments = $this->em
->getRepository(Document::class)
->createQueryBuilder('d')
->where('d.status = :status')
->setParameter('status', Document::STATUS_ACTIVE)
->getQuery()
->getResult();
if (empty($activeDocuments)) {
throw new \RuntimeException(
'Global Reindex abgebrochen: Es sind keine aktiven Dokumente vorhanden.'
);
}
// 2⃣ ChunkRecords erzeugen
$records = iterator_to_array(
$this->knowledgeIngestService->buildAllActiveChunkRecords(),
false
);
if (empty($records)) {
throw new \RuntimeException(
'Global Reindex abgebrochen: Es wurden keine Chunks erzeugt. Bitte prüfen Sie die Dokumente.'
);
}
// 3⃣ Rewrite NDJSON
$this->chunkManager->rewriteAll($records);
// 4⃣ Rebuild Vector Index
$this->rebuildIndex(true);
}
@@ -102,20 +131,17 @@ final readonly class IngestFlow
$this->metaManager->validateAgainstCurrent();
$document = $this->em
->getRepository(\App\Entity\Document::class)
->getRepository(Document::class)
->find($documentId);
if (!$document) {
throw new \RuntimeException('Document not found.');
}
// 1) NDJSON bereinigen
$this->chunkManager->compactByDocument($documentId);
// 2) Vector neu bauen
$this->rebuildIndex(false);
// 3) DB Delete (nach rebuild)
$this->em->remove($document);
$this->em->flush();
}

View File

@@ -33,6 +33,8 @@
<span class="badge bg-warning text-dark">RUNNING</span>
{% elseif job.status == 'FAILED' %}
<span class="badge bg-danger">FAILED</span>
{% elseif job.status == 'ABORTED' %}
<span class="badge bg-dark">ABORTED</span>
{% else %}
<span class="badge bg-secondary">{{ job.status }}</span>
{% endif %}
@@ -41,7 +43,14 @@
<div class="mb-2">
<strong>Dokument:</strong>
<a href="/admin/documents/{{ job.documentId }}" class="text-light">{{ job.documentId ?? '-' }}</a>
{% if job.documentId %}
<a href="{{ path('admin_document_show', {id: job.documentId}) }}"
class="text-light">
{{ job.documentId }}
</a>
{% else %}
-
{% endif %}
</div>
<div class="mb-2">
@@ -57,46 +66,44 @@
<div class="mb-2">
<strong>Beendet:</strong>
<span id="job-finished-at">
{% if job.finishedAt %}
{{ job.finishedAt|date('d.m.Y H:i:s') }}
{% else %}
-
{% endif %}
{{ job.finishedAt ? job.finishedAt|date('d.m.Y H:i:s') : '-' }}
</span>
</div>
<div class="mb-2">
<strong>Gestartet von:</strong>
{% if job.startedBy %}
{{ job.startedBy.email }}
{% else %}
-
{% endif %}
{{ job.startedBy ? job.startedBy.email : '-' }}
</div>
<div id="job-loader" class="mt-3" style="display:none;">
<div id="job-loader"
class="mt-3"
style="{% if job.status in ['QUEUED','RUNNING'] %}{% else %}display:none;{% endif %}">
<div class="d-flex align-items-center gap-2">
<div class="spinner-border spinner-border-sm text-info" role="status"></div>
<div>
<strong>Ingest läuft…</strong><br>
<small class="text-secondary">Diese Seite aktualisiert den Status automatisch.</small>
<strong>Prozess läuft…</strong><br>
<small class="text-secondary">
Diese Seite aktualisiert den Status automatisch.
</small>
</div>
</div>
</div>
<div id="job-error" class="alert alert-danger mt-3" style="display:none;">
{% if job.logPath %}
<div class="mt-3">
<strong>Log Datei:</strong><br>
<code>{{ job.logPath }}</code>
</div>
<div id="job-error"
class="alert alert-danger mt-3"
style="{% if job.status == 'FAILED' %}{% else %}display:none;{% endif %}">
{% if job.errorMessage %}
<strong>Fehler:</strong><br>
{{ job.errorMessage }}
{% endif %}
</div>
</div>
</div>
<script>
(function () {
const statusUrl = {{ path('admin_job_status', {id: job.id})|json_encode|raw }};
const badgeWrap = document.getElementById('job-status-badge');
const finishedAtEl = document.getElementById('job-finished-at');
@@ -114,11 +121,19 @@
function setBadge(status) {
let html = '';
if (status === 'COMPLETED') html = '<span class="badge bg-success">COMPLETED</span>';
else if (status === 'QUEUED') html = '<span class="badge bg-secondary">QUEUED</span>';
else if (status === 'RUNNING') html = '<span class="badge bg-warning text-dark">RUNNING</span>';
else if (status === 'FAILED') html = '<span class="badge bg-danger">FAILED</span>';
else html = '<span class="badge bg-secondary">' + status + '</span>';
if (status === 'COMPLETED')
html = '<span class="badge bg-success">COMPLETED</span>';
else if (status === 'QUEUED')
html = '<span class="badge bg-secondary">QUEUED</span>';
else if (status === 'RUNNING')
html = '<span class="badge bg-warning text-dark">RUNNING</span>';
else if (status === 'FAILED')
html = '<span class="badge bg-danger">FAILED</span>';
else if (status === 'ABORTED')
html = '<span class="badge bg-dark">ABORTED</span>';
else
html = '<span class="badge bg-secondary">' + status + '</span>';
badgeWrap.innerHTML = html;
}
@@ -131,6 +146,7 @@
const status = (data.status || '').toUpperCase();
setBadge(status);
finishedAtEl.textContent = data.finishedAt
? new Date(data.finishedAt).toLocaleString('de-DE')
: '-';
@@ -144,7 +160,8 @@
if (status === 'FAILED' && data.errorMessage) {
errorEl.style.display = '';
errorEl.innerHTML = '<strong>Fehler:</strong><br>' + data.errorMessage;
errorEl.innerHTML =
'<strong>Fehler:</strong><br>' + data.errorMessage;
}
} catch (e) {
@@ -154,8 +171,8 @@
timer = setInterval(poll, 1000);
poll();
})();
})();
</script>
{% endblock %}