first commit
This commit is contained in:
@@ -4,77 +4,84 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Catalog;
|
||||
|
||||
use App\Config\CatalogIntentConfig;
|
||||
use App\Entity\Document;
|
||||
use App\Tag\TagTypes;
|
||||
use App\Tag\TagVectorSearchClient;
|
||||
use Doctrine\DBAL\Connection;
|
||||
use Symfony\Component\Uid\Uuid;
|
||||
|
||||
/**
|
||||
* EntityCatalogService
|
||||
* Builds deterministic catalog lists from a validated catalog entity term.
|
||||
*
|
||||
* Deterministische Katalog-Listen auf Basis eines Entity-Terms:
|
||||
* - TagVectorSearch (Score-Gate + Ambiguity-Check)
|
||||
* - DB Query auf document_tag + document (ACTIVE)
|
||||
* - Rückgabe als EIN Textblock (string) oder null (Fallback auf normalen Retrieval)
|
||||
*
|
||||
* Schritt-3 Änderung:
|
||||
* - Headline ist NICHT mehr hardcoded
|
||||
* - Headline basiert dynamisch auf dem gefundenen Tag
|
||||
* This service is intentionally conservative:
|
||||
* - only strong catalog_entity matches may open the catalog path
|
||||
* - ambiguous matches fall back to normal retrieval
|
||||
* - only ACTIVE documents are listed
|
||||
*/
|
||||
final class EntityCatalogService
|
||||
{
|
||||
private const MIN_SCORE = 0.55;
|
||||
private const AMBIGUITY_DELTA = 0.05;
|
||||
private const SEARCH_LIMIT = 3;
|
||||
|
||||
public function __construct(
|
||||
private readonly TagVectorSearchClient $tagVectorClient,
|
||||
private readonly Connection $connection,
|
||||
) {}
|
||||
private readonly Connection $connection,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string|null Textblock oder null (wenn kein sicherer Catalog möglich ist)
|
||||
* Returns a catalog text block or null when no safe catalog path exists.
|
||||
*/
|
||||
public function listByTerm(string $entityTerm): ?string
|
||||
{
|
||||
$entityTerm = trim($entityTerm);
|
||||
|
||||
if ($entityTerm === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 1) Tag-Vektorsuche (Top 3 für Ambiguity-Prüfung)
|
||||
$hits = $this->tagVectorClient->search($entityTerm, 3);
|
||||
$hits = $this->tagVectorClient->search($entityTerm, self::SEARCH_LIMIT);
|
||||
|
||||
if ($hits === []) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$best = $hits[0];
|
||||
$bestScore = (float) ($best['score'] ?? 0.0);
|
||||
|
||||
$bestScore = isset($best['score']) ? (float)$best['score'] : 0.0;
|
||||
if ($bestScore < self::MIN_SCORE) {
|
||||
if ($bestScore < CatalogIntentConfig::MIN_SCORE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 2) Ambiguity: wenn Top2 zu nah ist → konservativ abbrechen
|
||||
if (isset($hits[1])) {
|
||||
$secondScore = isset($hits[1]['score']) ? (float)$hits[1]['score'] : 0.0;
|
||||
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
|
||||
$secondScore = (float) ($hits[1]['score'] ?? 0.0);
|
||||
|
||||
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
$tagHex = (string)($best['tag_id'] ?? '');
|
||||
if ($tagHex === '') {
|
||||
$tagId = trim((string) ($best['tag_id'] ?? ''));
|
||||
|
||||
if ($tagId === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
// OPTIONAL: Falls TagVectorSearchClient künftig tag_label zurückliefert,
|
||||
// kann das hier direkt verwendet werden.
|
||||
$tagLabel = isset($best['tag_label']) ? (string)$best['tag_label'] : null;
|
||||
try {
|
||||
$tagBinaryId = Uuid::fromString($tagId)->toBinary();
|
||||
} catch (\Throwable) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$tagLabel = trim((string) ($best['label'] ?? ''));
|
||||
|
||||
// 3) DB Query: alle ACTIVE Dokumente zu diesem Tag
|
||||
$rows = $this->connection->fetchAllAssociative(
|
||||
'
|
||||
SELECT d.title
|
||||
SELECT DISTINCT d.title
|
||||
FROM document d
|
||||
INNER JOIN document_tag dt ON dt.document_id = d.id
|
||||
WHERE dt.tag_id = :tagId
|
||||
@@ -82,8 +89,8 @@ final class EntityCatalogService
|
||||
ORDER BY d.title ASC
|
||||
',
|
||||
[
|
||||
'tagId' => Uuid::fromString($tagHex)->toBinary(),
|
||||
'status' => 'ACTIVE',
|
||||
'tagId' => $tagBinaryId,
|
||||
'status' => Document::STATUS_ACTIVE,
|
||||
]
|
||||
);
|
||||
|
||||
@@ -92,37 +99,42 @@ final class EntityCatalogService
|
||||
}
|
||||
|
||||
$titles = [];
|
||||
|
||||
foreach ($rows as $row) {
|
||||
$t = trim((string)($row['title'] ?? ''));
|
||||
if ($t !== '') {
|
||||
$titles[] = $t;
|
||||
$title = trim((string) ($row['title'] ?? ''));
|
||||
|
||||
if ($title === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$titles[$title] = $title;
|
||||
}
|
||||
|
||||
if ($titles === []) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return $this->buildTextBlock($tagLabel, $titles);
|
||||
return $this->buildTextBlock(
|
||||
$tagLabel !== '' ? $tagLabel : null,
|
||||
array_values($titles)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dynamische Headline:
|
||||
* - Wenn Tag-Label vorhanden → verwenden
|
||||
* - Sonst generischer Fallback
|
||||
* Builds a stable human-readable list block for the prompt.
|
||||
*
|
||||
* @param list<string> $titles
|
||||
*/
|
||||
private function buildTextBlock(?string $tagLabel, array $titles): string
|
||||
{
|
||||
$headline = 'Folgende Einträge sind verfügbar:';
|
||||
|
||||
if (\is_string($tagLabel) && \trim($tagLabel) !== '') {
|
||||
$headline = sprintf(
|
||||
'Folgende %s sind verfügbar:',
|
||||
$tagLabel
|
||||
);
|
||||
if ($tagLabel !== null && trim($tagLabel) !== '') {
|
||||
$headline = sprintf('Folgende %s sind verfügbar:', trim($tagLabel));
|
||||
}
|
||||
|
||||
$lines = [];
|
||||
|
||||
foreach ($titles as $title) {
|
||||
$lines[] = '- ' . $title;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
<?php
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
@@ -28,16 +27,15 @@ use Symfony\Component\Process\Process;
|
||||
final class SystemRebuildCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly IngestJobService $jobService,
|
||||
private readonly IngestOrchestrator $orchestrator,
|
||||
private readonly TagNdjsonExporter $tagExporter,
|
||||
private readonly TagVectorIndexBuilder $tagIndexBuilder,
|
||||
private readonly IndexMetaManager $metaManager,
|
||||
private readonly VectorIndexHealthService $health,
|
||||
private readonly IngestJobService $jobService,
|
||||
private readonly IngestOrchestrator $orchestrator,
|
||||
private readonly TagNdjsonExporter $tagExporter,
|
||||
private readonly TagVectorIndexBuilder $tagIndexBuilder,
|
||||
private readonly IndexMetaManager $metaManager,
|
||||
private readonly VectorIndexHealthService $health,
|
||||
private readonly TagVectorIndexHealthService $tagHealth,
|
||||
private readonly string $projectDir,
|
||||
)
|
||||
{
|
||||
private readonly string $projectDir,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
@@ -58,16 +56,37 @@ final class SystemRebuildCommand extends Command
|
||||
if (!$input->getOption('hard')) {
|
||||
$io->error('Safety switch missing: you must pass --hard to run this command.');
|
||||
$io->writeln('Example: bin/console mto:agent:system:rebuild --hard');
|
||||
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$dryRun = (bool)$input->getOption('dry-run');
|
||||
$dryRun = (bool) $input->getOption('dry-run');
|
||||
|
||||
$io->title('mto:agent:system:rebuild --hard');
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 1) GLOBAL REINDEX (chunks rewrite + vector rebuild)
|
||||
// ---------------------------------------------------------
|
||||
if (!$this->runGlobalReindex($io, $dryRun)) {
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
if (!$this->runTagRebuild($io, $input, $dryRun)) {
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
if (!$this->runVectorServiceReload($io, $input, $dryRun)) {
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
if (!$this->runHealthChecks($io, $input)) {
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$io->success('System rebuild finished.');
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
private function runGlobalReindex(SymfonyStyle $io, bool $dryRun): bool
|
||||
{
|
||||
$io->section('1/4 Global reindex (chunks + vector index)');
|
||||
|
||||
$job = $this->jobService->startJob(
|
||||
@@ -82,141 +101,181 @@ final class SystemRebuildCommand extends Command
|
||||
try {
|
||||
$this->orchestrator->runExistingJob($job, $dryRun);
|
||||
$io->success('Global reindex completed.');
|
||||
|
||||
return true;
|
||||
} catch (\Throwable $e) {
|
||||
$io->error('Global reindex failed: ' . $e->getMessage());
|
||||
return Command::FAILURE;
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 2) TAG REBUILD (tags.ndjson + vector_tags.index)
|
||||
// ---------------------------------------------------------
|
||||
if (!$input->getOption('no-tags')) {
|
||||
$io->section('2/4 Tag rebuild (tags.ndjson + vector_tags.index)');
|
||||
|
||||
if ($dryRun) {
|
||||
$io->note('dry-run enabled: tag rebuild skipped (would export + build tag index).');
|
||||
} else {
|
||||
try {
|
||||
$export = $this->tagExporter->export();
|
||||
|
||||
$io->writeln('<info>Exported tags.ndjson</info>');
|
||||
$io->writeln('Path: ' . $export['path']);
|
||||
$io->writeln('Tags: ' . $export['tags']);
|
||||
$io->writeln('Lines: ' . $export['lines']);
|
||||
$io->writeln('Bytes: ' . $export['bytes']);
|
||||
|
||||
$this->tagIndexBuilder->build();
|
||||
$io->writeln('<info>Built vector_tags.index</info>');
|
||||
|
||||
$this->metaManager->touchRuntime([
|
||||
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
|
||||
]);
|
||||
$io->success('Tag rebuild completed.');
|
||||
} catch (\Throwable $e) {
|
||||
$io->error('Tag rebuild failed: ' . $e->getMessage());
|
||||
return Command::FAILURE;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
private function runTagRebuild(SymfonyStyle $io, InputInterface $input, bool $dryRun): bool
|
||||
{
|
||||
if ((bool) $input->getOption('no-tags')) {
|
||||
$io->section('2/4 Tag rebuild');
|
||||
$io->note('Skipped due to --no-tags.');
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 3) VECTOR SERVICE (install deps + start + reload)
|
||||
// ---------------------------------------------------------
|
||||
if (!$input->getOption('no-reload')) {
|
||||
$io->section('3/4 Vector service reload (uvicorn)');
|
||||
$io->section('2/4 Tag rebuild (tags.ndjson + vector_tags.index)');
|
||||
|
||||
if ($dryRun) {
|
||||
$io->note('dry-run enabled: service reload skipped.');
|
||||
} else {
|
||||
$cmd = [
|
||||
'.venv/bin/python',
|
||||
'python/vector/vector_control.py',
|
||||
'--install',
|
||||
'--start',
|
||||
'--reload',
|
||||
'--port', '8090',
|
||||
'--host', '0.0.0.0'
|
||||
];
|
||||
if ($dryRun) {
|
||||
$io->note('dry-run enabled: tag rebuild skipped (would export + build tag index).');
|
||||
|
||||
$process = new Process($cmd, $this->projectDir);
|
||||
$process->setTimeout(600);
|
||||
$process->run();
|
||||
return true;
|
||||
}
|
||||
|
||||
$out = trim($process->getOutput());
|
||||
$err = trim($process->getErrorOutput());
|
||||
try {
|
||||
$export = $this->tagExporter->export();
|
||||
|
||||
if ($out !== '') {
|
||||
$io->writeln($out);
|
||||
}
|
||||
if ($err !== '') {
|
||||
$io->writeln('<comment>' . $err . '</comment>');
|
||||
}
|
||||
$io->writeln('<info>Exported tags.ndjson</info>');
|
||||
$io->writeln('Path: ' . (string) $export['path']);
|
||||
$io->writeln('Tags: ' . (string) $export['tags']);
|
||||
$io->writeln('Lines: ' . (string) $export['lines']);
|
||||
$io->writeln('Bytes: ' . (string) $export['bytes']);
|
||||
|
||||
if (!$process->isSuccessful()) {
|
||||
$io->error('Vector service reload failed (non-zero exit code).');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
$this->tagIndexBuilder->build();
|
||||
|
||||
$io->success('Vector service reloaded.');
|
||||
}
|
||||
} else {
|
||||
$io->success('Tag rebuild completed.');
|
||||
|
||||
return true;
|
||||
} catch (\Throwable $e) {
|
||||
$io->error('Tag rebuild failed: ' . $e->getMessage());
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private function runVectorServiceReload(SymfonyStyle $io, InputInterface $input, bool $dryRun): bool
|
||||
{
|
||||
if ((bool) $input->getOption('no-reload')) {
|
||||
$io->section('3/4 Vector service reload');
|
||||
$io->note('Skipped due to --no-reload.');
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 4) HEALTH CHECK (NDJSON vs vector meta)
|
||||
// ---------------------------------------------------------
|
||||
if (!$input->getOption('no-health')) {
|
||||
$io->section('4/4 Health check');
|
||||
$io->section('3/4 Vector service reload (uvicorn)');
|
||||
|
||||
try {
|
||||
$report = $this->health->check();
|
||||
} catch (\Throwable $e) {
|
||||
$io->error('Health check failed: ' . $e->getMessage());
|
||||
return Command::FAILURE;
|
||||
}
|
||||
if ($dryRun) {
|
||||
$io->note('dry-run enabled: service reload skipped.');
|
||||
|
||||
try {
|
||||
$reportTag = $this->tagHealth->check();
|
||||
} catch (\Throwable $e) {
|
||||
$io->error('Tag health check failed: ' . $e->getMessage());
|
||||
return Command::FAILURE;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
$io->definitionList(
|
||||
['ndjson_exists' => $report['ndjson_exists'] ? 'yes' : 'no'],
|
||||
['ndjson_chunk_count' => (string)$report['ndjson_chunk_count']],
|
||||
['vector_exists' => $report['vector_exists'] ? 'yes' : 'no'],
|
||||
['meta_exists' => $report['meta_exists'] ? 'yes' : 'no'],
|
||||
['vector_chunk_count' => (string)$report['vector_chunk_count']],
|
||||
['status' => (string)$report['status']],
|
||||
);
|
||||
$cmd = [
|
||||
'.venv/bin/python',
|
||||
'python/vector/vector_control.py',
|
||||
'--install',
|
||||
'--start',
|
||||
'--reload',
|
||||
'--port', '8090',
|
||||
'--host', '0.0.0.0',
|
||||
];
|
||||
|
||||
$io->definitionList(
|
||||
['tags_ndjson_exists' => $reportTag['tags_ndjson_exists'] ? 'yes' : 'no'],
|
||||
['tags_ndjson_count' => (string)$reportTag['tags_ndjson_count']],
|
||||
['tag_vector_exists' => $reportTag['vector_exists'] ? 'yes' : 'no'],
|
||||
['tag_meta_exists' => $reportTag['meta_exists'] ? 'yes' : 'no'],
|
||||
['vector_tag_count' => (string)$reportTag['vector_tag_count']],
|
||||
['status' => (string)$reportTag['status']],
|
||||
);
|
||||
$process = new Process($cmd, $this->projectDir);
|
||||
$process->setTimeout(600);
|
||||
$process->run();
|
||||
|
||||
if (!in_array($report['status'], ['OK', 'OK_EMPTY'], true)) {
|
||||
$io->error('Health check not OK: ' . $report['status']);
|
||||
return Command::FAILURE;
|
||||
}
|
||||
$stdout = trim($process->getOutput());
|
||||
$stderr = trim($process->getErrorOutput());
|
||||
|
||||
$io->success('Health check OK.');
|
||||
} else {
|
||||
if ($stdout !== '') {
|
||||
$io->writeln($stdout);
|
||||
}
|
||||
|
||||
if ($stderr !== '') {
|
||||
$io->writeln('<comment>' . $stderr . '</comment>');
|
||||
}
|
||||
|
||||
if (!$process->isSuccessful()) {
|
||||
$io->error('Vector service reload failed (non-zero exit code).');
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
$io->success('Vector service reloaded.');
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function runHealthChecks(SymfonyStyle $io, InputInterface $input): bool
|
||||
{
|
||||
if ((bool) $input->getOption('no-health')) {
|
||||
$io->section('4/4 Health check');
|
||||
$io->note('Skipped due to --no-health.');
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
$io->success('System rebuild finished.');
|
||||
return Command::SUCCESS;
|
||||
$io->section('4/4 Health check');
|
||||
|
||||
try {
|
||||
$chunkReport = $this->health->check();
|
||||
} catch (\Throwable $e) {
|
||||
$io->error('Health check failed: ' . $e->getMessage());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
$tagReport = $this->tagHealth->check();
|
||||
} catch (\Throwable $e) {
|
||||
$io->error('Tag health check failed: ' . $e->getMessage());
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->renderChunkHealth($io, $chunkReport);
|
||||
$this->renderTagHealth($io, $tagReport);
|
||||
|
||||
if (!$this->isHealthOk((string) ($chunkReport['status'] ?? 'UNKNOWN'))) {
|
||||
$io->error('Chunk health check not OK: ' . (string) ($chunkReport['status'] ?? 'UNKNOWN'));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!$this->isHealthOk((string) ($tagReport['status'] ?? 'UNKNOWN'))) {
|
||||
$io->error('Tag health check not OK: ' . (string) ($tagReport['status'] ?? 'UNKNOWN'));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
$io->success('Health check OK.');
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function renderChunkHealth(SymfonyStyle $io, array $report): void
|
||||
{
|
||||
$io->definitionList(
|
||||
['ndjson_exists' => !empty($report['ndjson_exists']) ? 'yes' : 'no'],
|
||||
['ndjson_chunk_count' => (string) ($report['ndjson_chunk_count'] ?? 0)],
|
||||
['vector_exists' => !empty($report['vector_exists']) ? 'yes' : 'no'],
|
||||
['meta_exists' => !empty($report['meta_exists']) ? 'yes' : 'no'],
|
||||
['vector_chunk_count' => (string) ($report['vector_chunk_count'] ?? 0)],
|
||||
['status' => (string) ($report['status'] ?? 'UNKNOWN')],
|
||||
);
|
||||
}
|
||||
|
||||
private function renderTagHealth(SymfonyStyle $io, array $report): void
|
||||
{
|
||||
$io->definitionList(
|
||||
['tags_ndjson_exists' => !empty($report['tags_ndjson_exists']) ? 'yes' : 'no'],
|
||||
['tags_ndjson_count' => (string) ($report['tags_ndjson_count'] ?? 0)],
|
||||
['tag_vector_exists' => !empty($report['vector_exists']) ? 'yes' : 'no'],
|
||||
['tag_meta_exists' => !empty($report['meta_exists']) ? 'yes' : 'no'],
|
||||
['vector_tag_count' => (string) ($report['vector_tag_count'] ?? 0)],
|
||||
['tags_with_active_document_ids' => (string) ($report['tags_with_active_document_ids'] ?? 0)],
|
||||
['meta_valid' => !empty($report['meta_valid']) ? 'yes' : 'no'],
|
||||
['status' => (string) ($report['status'] ?? 'UNKNOWN')],
|
||||
);
|
||||
}
|
||||
|
||||
private function isHealthOk(string $status): bool
|
||||
{
|
||||
return in_array($status, ['OK', 'OK_EMPTY'], true);
|
||||
}
|
||||
}
|
||||
@@ -8,11 +8,13 @@ use App\Tag\TagVectorIndexHealthService;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'mto:agent:tag:health',
|
||||
description: 'Health-Check für TAG/FAISS Konsistenz'
|
||||
description: 'Health-Check für Tag-/FAISS-Konsistenz'
|
||||
)]
|
||||
final class TagHealthCheckCommand extends Command
|
||||
{
|
||||
@@ -22,14 +24,87 @@ final class TagHealthCheckCommand extends Command
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function configure(): void
|
||||
{
|
||||
$this->addOption(
|
||||
'summary',
|
||||
null,
|
||||
InputOption::VALUE_NONE,
|
||||
'Gibt eine lesbare Zusammenfassung statt JSON aus.'
|
||||
);
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$result = $this->health->check();
|
||||
$status = trim((string) ($result['status'] ?? ''));
|
||||
|
||||
$output->writeln(json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES));
|
||||
if ($status === '') {
|
||||
$status = 'UNKNOWN';
|
||||
$result['status'] = $status;
|
||||
$result['error'] = 'Health service returned no status.';
|
||||
}
|
||||
|
||||
return str_starts_with($result['status'], 'OK')
|
||||
if ((bool) $input->getOption('summary')) {
|
||||
$this->renderSummary(new SymfonyStyle($input, $output), $result);
|
||||
} else {
|
||||
$this->renderJson($output, $result);
|
||||
}
|
||||
|
||||
return $this->isHealthy($status)
|
||||
? Command::SUCCESS
|
||||
: Command::FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $result
|
||||
*/
|
||||
private function renderSummary(SymfonyStyle $io, array $result): void
|
||||
{
|
||||
$io->title('Tag Vector Health');
|
||||
|
||||
$io->definitionList(
|
||||
['status' => (string) ($result['status'] ?? 'UNKNOWN')],
|
||||
['tags_ndjson_exists' => !empty($result['tags_ndjson_exists']) ? 'yes' : 'no'],
|
||||
['tags_ndjson_count' => (string) ($result['tags_ndjson_count'] ?? 0)],
|
||||
['vector_exists' => !empty($result['vector_exists']) ? 'yes' : 'no'],
|
||||
['meta_exists' => !empty($result['meta_exists']) ? 'yes' : 'no'],
|
||||
['vector_tag_count' => (string) ($result['vector_tag_count'] ?? 0)],
|
||||
['meta_valid' => !empty($result['meta_valid']) ? 'yes' : 'no'],
|
||||
['tags_with_active_document_ids' => (string) ($result['tags_with_active_document_ids'] ?? 0)],
|
||||
);
|
||||
|
||||
if (!empty($result['error'])) {
|
||||
$io->warning((string) $result['error']);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $result
|
||||
*/
|
||||
private function renderJson(OutputInterface $output, array $result): void
|
||||
{
|
||||
$json = json_encode(
|
||||
$result,
|
||||
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
|
||||
);
|
||||
|
||||
if (!is_string($json)) {
|
||||
$json = json_encode([
|
||||
'status' => 'UNKNOWN',
|
||||
'error' => 'json_encode_failed',
|
||||
], JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
|
||||
|
||||
if (!is_string($json)) {
|
||||
$json = "{\"status\":\"UNKNOWN\",\"error\":\"json_encode_failed\"}";
|
||||
}
|
||||
}
|
||||
|
||||
$output->writeln($json);
|
||||
}
|
||||
|
||||
private function isHealthy(string $status): bool
|
||||
{
|
||||
return in_array($status, ['OK', 'OK_EMPTY'], true);
|
||||
}
|
||||
}
|
||||
@@ -14,6 +14,7 @@ use Symfony\Component\Console\Input\InputArgument;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'mto:agent:tags:job:run',
|
||||
@@ -39,112 +40,152 @@ final class TagRebuildRunJobCommand extends Command
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$jobId = $input->getArgument('jobId');
|
||||
$io = new SymfonyStyle($input, $output);
|
||||
|
||||
$jobId = trim((string) $input->getArgument('jobId'));
|
||||
$create = (bool) $input->getOption('create');
|
||||
|
||||
if (!$create && !$jobId) {
|
||||
$output->writeln('<error>You must provide either a jobId or use --create.</error>');
|
||||
if (!$create && $jobId === '') {
|
||||
$io->error('You must provide either a jobId or use --create.');
|
||||
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
if ($create && $jobId) {
|
||||
$output->writeln('<error>Use either jobId OR --create, not both.</error>');
|
||||
if ($create && $jobId !== '') {
|
||||
$io->error('Use either jobId OR --create, not both.');
|
||||
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
if ($create) {
|
||||
$job = new TagRebuildJob();
|
||||
$this->em->persist($job);
|
||||
$this->em->flush();
|
||||
$jobId = $job->getId();
|
||||
$output->writeln('<info>Created new TagRebuildJob: ' . $jobId . '</info>');
|
||||
} else {
|
||||
/** @var TagRebuildJob|null $job */
|
||||
$job = $this->em->getRepository(TagRebuildJob::class)->find($jobId);
|
||||
|
||||
if (!$job instanceof TagRebuildJob) {
|
||||
$output->writeln('<error>Job not found.</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
$fh = null;
|
||||
$job = null;
|
||||
$lockHandle = null;
|
||||
|
||||
try {
|
||||
// ---------------------------------------------------------
|
||||
// LOCK INITIALIZATION
|
||||
// ---------------------------------------------------------
|
||||
$lockDir = \dirname($this->lockFilePath);
|
||||
$job = $create ? $this->createJob($io) : $this->findJob($jobId);
|
||||
$lockHandle = $this->acquireLock();
|
||||
|
||||
if (!\is_dir($lockDir) && !@\mkdir($lockDir, 0775, true) && !\is_dir($lockDir)) {
|
||||
throw new \RuntimeException('Cannot create lock directory.');
|
||||
}
|
||||
|
||||
$fh = @\fopen($this->lockFilePath, 'c+');
|
||||
if (!$fh) {
|
||||
throw new \RuntimeException('Cannot open lock file: ' . $this->lockFilePath);
|
||||
}
|
||||
|
||||
if (!@\flock($fh, LOCK_EX | LOCK_NB)) {
|
||||
throw new \RuntimeException('Another tag rebuild is currently running (lock busy).');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// MARK RUNNING
|
||||
// ---------------------------------------------------------
|
||||
$job->markRunning();
|
||||
$this->em->flush();
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// EXPORT TAGS (NDJSON)
|
||||
// ---------------------------------------------------------
|
||||
$export = $this->exporter->export();
|
||||
$this->assertValidExport($export);
|
||||
|
||||
if (
|
||||
!isset($export['path']) ||
|
||||
!\is_string($export['path']) ||
|
||||
!\file_exists($export['path'])
|
||||
) {
|
||||
throw new \RuntimeException('Export failed: NDJSON file missing.');
|
||||
}
|
||||
$io->writeln('<info>tags.ndjson exported</info>');
|
||||
$io->writeln('Path: ' . (string) $export['path']);
|
||||
$io->writeln('Tags: ' . (string) ($export['tags'] ?? 0));
|
||||
$io->writeln('Lines: ' . (string) ($export['lines'] ?? 0));
|
||||
$io->writeln('Bytes: ' . (string) ($export['bytes'] ?? 0));
|
||||
|
||||
if (isset($export['count']) && (int) $export['count'] === 0) {
|
||||
throw new \RuntimeException('Export produced zero tags.');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// BUILD VECTOR INDEX
|
||||
// ---------------------------------------------------------
|
||||
$this->builder->build();
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// MARK COMPLETED
|
||||
// ---------------------------------------------------------
|
||||
$job->markCompleted();
|
||||
$this->em->flush();
|
||||
|
||||
$output->writeln('<info>Tag rebuild successful.</info>');
|
||||
$output->writeln('NDJSON: ' . $export['path']);
|
||||
$io->success('Tag rebuild successful.');
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
catch (\Throwable $e) {
|
||||
|
||||
if (isset($job)) {
|
||||
$job->markFailed($e->getMessage());
|
||||
} catch (\Throwable $e) {
|
||||
if ($job instanceof TagRebuildJob) {
|
||||
$job->markFailed($this->buildSafeErrorMessage($e));
|
||||
$this->em->flush();
|
||||
}
|
||||
|
||||
$output->writeln('<error>FAILED: ' . $e->getMessage() . '</error>');
|
||||
$io->error('FAILED: ' . $e->getMessage());
|
||||
|
||||
return Command::FAILURE;
|
||||
}
|
||||
finally {
|
||||
|
||||
if ($fh) {
|
||||
@\flock($fh, LOCK_UN);
|
||||
@\fclose($fh);
|
||||
}
|
||||
} finally {
|
||||
$this->releaseLock($lockHandle);
|
||||
}
|
||||
}
|
||||
|
||||
private function createJob(SymfonyStyle $io): TagRebuildJob
|
||||
{
|
||||
$job = new TagRebuildJob();
|
||||
$this->em->persist($job);
|
||||
$this->em->flush();
|
||||
|
||||
$io->writeln('<info>Created new TagRebuildJob: ' . (string) $job->getId() . '</info>');
|
||||
|
||||
return $job;
|
||||
}
|
||||
|
||||
private function findJob(string $jobId): TagRebuildJob
|
||||
{
|
||||
/** @var TagRebuildJob|null $job */
|
||||
$job = $this->em->getRepository(TagRebuildJob::class)->find($jobId);
|
||||
|
||||
if (!$job instanceof TagRebuildJob) {
|
||||
throw new \RuntimeException('Job not found.');
|
||||
}
|
||||
|
||||
return $job;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return resource
|
||||
*/
|
||||
private function acquireLock()
|
||||
{
|
||||
$lockDir = \dirname($this->lockFilePath);
|
||||
|
||||
if (!\is_dir($lockDir) && !@\mkdir($lockDir, 0775, true) && !\is_dir($lockDir)) {
|
||||
throw new \RuntimeException('Cannot create lock directory.');
|
||||
}
|
||||
|
||||
$handle = @\fopen($this->lockFilePath, 'c+');
|
||||
|
||||
if ($handle === false) {
|
||||
throw new \RuntimeException('Cannot open lock file: ' . $this->lockFilePath);
|
||||
}
|
||||
|
||||
if (!@\flock($handle, LOCK_EX | LOCK_NB)) {
|
||||
@\fclose($handle);
|
||||
throw new \RuntimeException('Another tag rebuild is currently running (lock busy).');
|
||||
}
|
||||
|
||||
return $handle;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param resource|null $handle
|
||||
*/
|
||||
private function releaseLock($handle): void
|
||||
{
|
||||
if (!is_resource($handle)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@\flock($handle, LOCK_UN);
|
||||
@\fclose($handle);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $export
|
||||
*/
|
||||
private function assertValidExport(array $export): void
|
||||
{
|
||||
$path = trim((string) ($export['path'] ?? ''));
|
||||
|
||||
if ($path === '' || !\is_file($path)) {
|
||||
throw new \RuntimeException('Export failed: NDJSON file missing.');
|
||||
}
|
||||
|
||||
$tags = (int) ($export['tags'] ?? 0);
|
||||
$lines = (int) ($export['lines'] ?? 0);
|
||||
|
||||
if ($tags < 0 || $lines < 0) {
|
||||
throw new \RuntimeException('Export returned invalid statistics.');
|
||||
}
|
||||
}
|
||||
|
||||
private function buildSafeErrorMessage(\Throwable $e): string
|
||||
{
|
||||
$message = trim($e->getMessage());
|
||||
|
||||
if ($message === '') {
|
||||
return 'Unknown tag rebuild failure.';
|
||||
}
|
||||
|
||||
return mb_substr($message, 0, 4000);
|
||||
}
|
||||
}
|
||||
@@ -9,6 +9,7 @@ use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'mto:agent:tags:export',
|
||||
@@ -17,26 +18,51 @@ use Symfony\Component\Console\Output\OutputInterface;
|
||||
final class TagsExportCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private TagNdjsonExporter $exporter,
|
||||
private readonly TagNdjsonExporter $exporter,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$io = new SymfonyStyle($input, $output);
|
||||
|
||||
try {
|
||||
$result = $this->exporter->export();
|
||||
$this->assertValidExport($result);
|
||||
|
||||
$io->writeln('<info>Tags NDJSON exported</info>');
|
||||
$io->writeln('Path: ' . (string) ($result['path'] ?? ''));
|
||||
$io->writeln('Tags: ' . (string) ($result['tags'] ?? 0));
|
||||
$io->writeln('Lines: ' . (string) ($result['lines'] ?? 0));
|
||||
$io->writeln('Bytes: ' . (string) ($result['bytes'] ?? 0));
|
||||
$io->success('Tag export completed.');
|
||||
|
||||
return Command::SUCCESS;
|
||||
} catch (\Throwable $e) {
|
||||
$output->writeln('<error>ERROR: ' . $e->getMessage() . '</error>');
|
||||
$io->error($e->getMessage());
|
||||
|
||||
return Command::FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
$output->writeln('<info>Tags NDJSON exported</info>');
|
||||
$output->writeln('Path: ' . $result['path']);
|
||||
$output->writeln('Tags: ' . $result['tags']);
|
||||
$output->writeln('Lines: ' . $result['lines']);
|
||||
$output->writeln('Bytes: ' . $result['bytes']);
|
||||
/**
|
||||
* @param array<string, mixed> $result
|
||||
*/
|
||||
private function assertValidExport(array $result): void
|
||||
{
|
||||
$path = trim((string) ($result['path'] ?? ''));
|
||||
|
||||
return Command::SUCCESS;
|
||||
if ($path === '' || !is_file($path)) {
|
||||
throw new \RuntimeException('Tag export failed: tags.ndjson is missing.');
|
||||
}
|
||||
|
||||
$tags = (int) ($result['tags'] ?? 0);
|
||||
$lines = (int) ($result['lines'] ?? 0);
|
||||
$bytes = (int) ($result['bytes'] ?? 0);
|
||||
|
||||
if ($tags < 0 || $lines < 0 || $bytes < 0) {
|
||||
throw new \RuntimeException('Tag export returned invalid statistics.');
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,13 +4,13 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use App\Index\IndexMetaManager;
|
||||
use App\Tag\TagNdjsonExporter;
|
||||
use App\Tag\TagVectorIndexBuilder;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'mto:agent:tags:rebuild',
|
||||
@@ -21,45 +21,54 @@ final class TagsRebuildCommand extends Command
|
||||
public function __construct(
|
||||
private readonly TagNdjsonExporter $exporter,
|
||||
private readonly TagVectorIndexBuilder $builder,
|
||||
private readonly IndexMetaManager $metaManager,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$io = new SymfonyStyle($input, $output);
|
||||
|
||||
try {
|
||||
// -----------------------------------------
|
||||
// 1) Export tags.ndjson
|
||||
// -----------------------------------------
|
||||
$export = $this->exporter->export();
|
||||
$this->assertValidExport($export);
|
||||
|
||||
$output->writeln('<info>1/3 Exported tags.ndjson</info>');
|
||||
$output->writeln('Path: ' . $export['path']);
|
||||
$output->writeln('Tags: ' . $export['tags']);
|
||||
$output->writeln('Lines: ' . $export['lines']);
|
||||
$output->writeln('Bytes: ' . $export['bytes']);
|
||||
$io->writeln('<info>1/2 Exported tags.ndjson</info>');
|
||||
$io->writeln('Path: ' . (string) ($export['path'] ?? ''));
|
||||
$io->writeln('Tags: ' . (string) ($export['tags'] ?? 0));
|
||||
$io->writeln('Lines: ' . (string) ($export['lines'] ?? 0));
|
||||
$io->writeln('Bytes: ' . (string) ($export['bytes'] ?? 0));
|
||||
|
||||
// -----------------------------------------
|
||||
// 2) Build FAISS tag index
|
||||
// -----------------------------------------
|
||||
$this->builder->build();
|
||||
|
||||
$output->writeln('<info>2/3 Built vector_tags.index</info>');
|
||||
$io->writeln('<info>2/2 Built vector_tags.index</info>');
|
||||
$io->success('Tag rebuild completed.');
|
||||
|
||||
// -----------------------------------------
|
||||
// 3) Enterprise Commit Marker
|
||||
// -----------------------------------------
|
||||
$this->metaManager->touchRuntime([
|
||||
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
|
||||
]);
|
||||
|
||||
$output->writeln('<info>3/3 Runtime commit marker updated</info>');
|
||||
return Command::SUCCESS;
|
||||
} catch (\Throwable $e) {
|
||||
$output->writeln('<error>ERROR: ' . $e->getMessage() . '</error>');
|
||||
$io->error($e->getMessage());
|
||||
|
||||
return Command::FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
return Command::SUCCESS;
|
||||
/**
|
||||
* @param array<string, mixed> $export
|
||||
*/
|
||||
private function assertValidExport(array $export): void
|
||||
{
|
||||
$path = trim((string) ($export['path'] ?? ''));
|
||||
|
||||
if ($path === '' || !is_file($path)) {
|
||||
throw new \RuntimeException('Tag export failed: tags.ndjson is missing.');
|
||||
}
|
||||
|
||||
$tags = (int) ($export['tags'] ?? 0);
|
||||
$lines = (int) ($export['lines'] ?? 0);
|
||||
$bytes = (int) ($export['bytes'] ?? 0);
|
||||
|
||||
if ($tags < 0 || $lines < 0 || $bytes < 0) {
|
||||
throw new \RuntimeException('Tag export returned invalid statistics.');
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,62 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
class CatalogIntentConfig
|
||||
/**
|
||||
* Central thresholds for deterministic catalog-entity detection.
|
||||
*
|
||||
* The values in this class intentionally define a conservative gate:
|
||||
* - only strong semantic tag hits may open the catalog path
|
||||
* - small score gaps between the best and second-best hit are treated as ambiguous
|
||||
*/
|
||||
final class CatalogIntentConfig
|
||||
{
|
||||
// Minimum similarity score. Prevents noise.
|
||||
/**
|
||||
* Minimum semantic similarity required before a catalog entity is accepted.
|
||||
*/
|
||||
public const MIN_SCORE = 0.72;
|
||||
|
||||
// Difference between Top 1 and Top 2, so that no uncertain match is accepted.
|
||||
/**
|
||||
* Required distance between the best and second-best catalog entity hit.
|
||||
*/
|
||||
public const AMBIGUITY_DELTA = 0.02;
|
||||
|
||||
/**
|
||||
* Number of candidate tag hits to inspect during catalog intent detection.
|
||||
*
|
||||
* This is intentionally wider than the final accepted set so that strong
|
||||
* catalog_entity tags are not hidden behind generic tags in the raw result.
|
||||
*/
|
||||
public const SEARCH_LIMIT = 6;
|
||||
|
||||
/**
|
||||
* Conservative lower boundary for score normalization helpers.
|
||||
*/
|
||||
public const MIN_ALLOWED_SCORE = 0.0;
|
||||
|
||||
/**
|
||||
* Conservative upper boundary for score normalization helpers.
|
||||
*/
|
||||
public const MAX_ALLOWED_SCORE = 1.0;
|
||||
|
||||
public static function isScoreAccepted(float $score): bool
|
||||
{
|
||||
return $score >= self::MIN_SCORE;
|
||||
}
|
||||
|
||||
public static function isAmbiguous(float $bestScore, float $secondScore): bool
|
||||
{
|
||||
return abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA;
|
||||
}
|
||||
|
||||
public static function clampScore(float $score): float
|
||||
{
|
||||
return max(self::MIN_ALLOWED_SCORE, min(self::MAX_ALLOWED_SCORE, $score));
|
||||
}
|
||||
|
||||
private function __construct()
|
||||
{
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Controller\Admin;
|
||||
|
||||
@@ -17,25 +18,22 @@ final class DashboardController extends AbstractController
|
||||
#[Route('', name: 'admin_dashboard_null')]
|
||||
#[Route('/', name: 'admin_dashboard_trail')]
|
||||
#[Route('/admin', name: 'admin_dashboard_alias')]
|
||||
public function trailNull(IndexMetaManager $metaManager,VectorIndexHealthService $health): RedirectResponse
|
||||
public function redirectToDashboard(): RedirectResponse
|
||||
{
|
||||
return $this->redirectToRoute('admin_dashboard');
|
||||
}
|
||||
|
||||
|
||||
#[Route('/admin/dashboard', name: 'admin_dashboard')]
|
||||
public function dashboard(IndexMetaManager $metaManager,VectorIndexHealthService $health,TagVectorIndexHealthService $tagHealth): Response
|
||||
{
|
||||
$chunkCount = $metaManager->getRuntimeChunkCount();
|
||||
$limit = IngestFlow::CHUNK_LIMIT_HARD;
|
||||
|
||||
#[Route('/admin/dashboard', name: 'admin_dashboard', methods: ['GET'])]
|
||||
public function dashboard(
|
||||
IndexMetaManager $metaManager,
|
||||
VectorIndexHealthService $health,
|
||||
TagVectorIndexHealthService $tagHealth
|
||||
): Response {
|
||||
return $this->render('admin/dashboard/index.html.twig', [
|
||||
'chunkCount' => $chunkCount,
|
||||
'chunkLimit' => $limit,
|
||||
'chunkCount' => $metaManager->getRuntimeChunkCount(),
|
||||
'chunkLimit' => IngestFlow::CHUNK_LIMIT_HARD,
|
||||
'vectorHealth' => $health->check(),
|
||||
'tagVectorHealth' => $tagHealth->check(),
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,13 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Controller\Admin;
|
||||
|
||||
use App\Entity\Document;
|
||||
use App\Entity\DocumentVersion;
|
||||
use App\Entity\IngestJob;
|
||||
use App\Entity\User;
|
||||
use App\Service\DocumentService;
|
||||
use App\Service\FormatText;
|
||||
use App\Service\IngestJobService;
|
||||
@@ -23,9 +26,11 @@ use Symfony\Component\Routing\Attribute\Route;
|
||||
use Symfony\Component\Uid\Uuid;
|
||||
|
||||
#[Route('/admin/documents')]
|
||||
class DocumentController extends AbstractController
|
||||
final class DocumentController extends AbstractController
|
||||
{
|
||||
#[Route('', name: 'admin_documents')]
|
||||
private const INGEST_DUPLICATE_WINDOW_SECONDS = 3;
|
||||
|
||||
#[Route('', name: 'admin_documents', methods: ['GET'])]
|
||||
public function index(EntityManagerInterface $em): Response
|
||||
{
|
||||
$documents = $em->getRepository(Document::class)
|
||||
@@ -46,115 +51,106 @@ class DocumentController extends AbstractController
|
||||
#[Route(
|
||||
'/{id}',
|
||||
name: 'admin_document_show',
|
||||
requirements: ['id' => '[0-9a-fA-F\-]{36}']
|
||||
requirements: ['id' => '[0-9a-fA-F\-]{36}'],
|
||||
methods: ['GET']
|
||||
)]
|
||||
public function show(string $id, EntityManagerInterface $em): Response
|
||||
{
|
||||
try {
|
||||
$uuid = Uuid::fromString($id);
|
||||
} catch (\Exception) {
|
||||
throw new NotFoundHttpException();
|
||||
}
|
||||
|
||||
$document = $em->getRepository(Document::class)->find($uuid);
|
||||
|
||||
if (!$document) {
|
||||
$this->addFlash('danger', 'Das Dokument existiert nicht mehr.');
|
||||
}
|
||||
|
||||
return $this->render('admin/document/show.html.twig', [
|
||||
'document' => $document,
|
||||
'document' => $this->findDocument($id, $em),
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route('/new', name: 'admin_document_new')]
|
||||
#[Route('/new', name: 'admin_document_new', methods: ['GET', 'POST'])]
|
||||
public function new(
|
||||
Request $request,
|
||||
DocumentService $documentService,
|
||||
FormatText $formatText,
|
||||
IngestJobService $jobService,
|
||||
ParameterBagInterface $params
|
||||
Request $request,
|
||||
DocumentService $documentService,
|
||||
FormatText $formatText,
|
||||
IngestJobService $jobService,
|
||||
ParameterBagInterface $params,
|
||||
EntityManagerInterface $em,
|
||||
): Response {
|
||||
if (!$request->isMethod('POST')) {
|
||||
return $this->render('admin/document/new.html.twig');
|
||||
}
|
||||
|
||||
/** @var UploadedFile|null $file */
|
||||
$file = $request->files->get('file');
|
||||
if (!$file instanceof UploadedFile) {
|
||||
throw new \InvalidArgumentException('No valid file uploaded.');
|
||||
}
|
||||
if (!$this->isCsrfTokenValid('create_document', (string) $request->request->get('_token'))) {
|
||||
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
|
||||
|
||||
$rawTitle = $request->request->get('title');
|
||||
$title = is_string($rawTitle) && $rawTitle !== ''
|
||||
? $rawTitle
|
||||
: $formatText->slugify($file->getClientOriginalName());
|
||||
|
||||
if (!$title) {
|
||||
$this->addFlash('error', 'Titel ist erforderlich.');
|
||||
return $this->redirectToRoute('admin_document_new');
|
||||
}
|
||||
|
||||
$uploadDir = (string)$params->get('mto.vector.data.upload.path');
|
||||
$this->ensureDir($uploadDir);
|
||||
/** @var UploadedFile|null $file */
|
||||
$file = $request->files->get('file');
|
||||
if (!$file instanceof UploadedFile) {
|
||||
$this->addFlash('danger', 'Keine gültige Datei hochgeladen.');
|
||||
|
||||
$newFilename = uniqid('', true) . '_' . $file->getClientOriginalName();
|
||||
return $this->redirectToRoute('admin_document_new');
|
||||
}
|
||||
|
||||
$title = $this->resolveDocumentTitle($request, $file, $formatText);
|
||||
if ($title === '') {
|
||||
$this->addFlash('danger', 'Titel ist erforderlich.');
|
||||
|
||||
return $this->redirectToRoute('admin_document_new');
|
||||
}
|
||||
|
||||
$user = $this->requireUser();
|
||||
$uploadDir = trim((string) $params->get('mto.vector.data.upload.path'));
|
||||
|
||||
try {
|
||||
$file->move($uploadDir, $newFilename);
|
||||
} catch (FileException) {
|
||||
throw new \RuntimeException('File upload failed.');
|
||||
$this->ensureDir($uploadDir);
|
||||
$filePath = $this->moveUploadedFile($file, $uploadDir, $formatText);
|
||||
|
||||
$document = $documentService->createDocument($title, $filePath, $user);
|
||||
$version = $document->getCurrentVersion();
|
||||
|
||||
if (!$version instanceof DocumentVersion) {
|
||||
throw new \RuntimeException('Dokument erstellt, aber keine aktuelle Version vorhanden.');
|
||||
}
|
||||
|
||||
$job = $jobService->startJob(
|
||||
IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE,
|
||||
$user,
|
||||
$version->getDocument()->getId(),
|
||||
$version->getId(),
|
||||
null,
|
||||
IngestJob::STATUS_QUEUED
|
||||
);
|
||||
|
||||
$logFile = $this->prepareJobLogFile((string) $job->getId());
|
||||
$job->setLogPath($logFile);
|
||||
$em->flush();
|
||||
|
||||
if (!$this->canExec()) {
|
||||
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
|
||||
$this->addFlash('danger', 'Dokument erstellt, aber Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
|
||||
|
||||
return $this->redirectToRoute('admin_documents');
|
||||
}
|
||||
|
||||
$this->startIngestJob((string) $job->getId(), $logFile);
|
||||
|
||||
return $this->redirectToRoute('admin_job_show', [
|
||||
'id' => (string) $job->getId(),
|
||||
]);
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Dokument konnte nicht erstellt werden.'));
|
||||
|
||||
return $this->redirectToRoute('admin_document_new');
|
||||
}
|
||||
|
||||
$filePath = $uploadDir . '/' . $newFilename;
|
||||
|
||||
$document = $documentService->createDocument(
|
||||
$title,
|
||||
$filePath,
|
||||
$this->getUser()
|
||||
);
|
||||
|
||||
$version = $document->getCurrentVersion();
|
||||
if (!$version instanceof DocumentVersion) {
|
||||
$this->addFlash('danger', 'Dokument erstellt, aber es wurde keine aktuelle Version erzeugt.');
|
||||
return $this->redirectToRoute('admin_documents');
|
||||
}
|
||||
|
||||
$job = $jobService->startJob(
|
||||
IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE,
|
||||
$this->getUser(),
|
||||
$version->getDocument()->getId(),
|
||||
$version->getId(),
|
||||
null,
|
||||
IngestJob::STATUS_QUEUED
|
||||
);
|
||||
|
||||
if (!$this->canExec()) {
|
||||
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
|
||||
$this->addFlash('danger', 'Dokument erstellt, aber Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
|
||||
return $this->redirectToRoute('admin_documents');
|
||||
}
|
||||
|
||||
$this->startIngestJob((string)$job->getId());
|
||||
|
||||
return $this->redirectToRoute('admin_job_show', [
|
||||
'id' => (string)$job->getId(),
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route('/{id}/version/new', name: 'admin_document_version_new', requirements: ['id' => '[0-9a-fA-F\-]{36}'])]
|
||||
#[Route('/{id}/version/new', name: 'admin_document_version_new', requirements: ['id' => '[0-9a-fA-F\-]{36}'], methods: ['GET', 'POST'])]
|
||||
public function newVersion(
|
||||
string $id,
|
||||
Request $request,
|
||||
string $id,
|
||||
Request $request,
|
||||
EntityManagerInterface $em,
|
||||
DocumentService $documentService,
|
||||
ParameterBagInterface $params
|
||||
DocumentService $documentService,
|
||||
ParameterBagInterface $params,
|
||||
FormatText $formatText,
|
||||
): Response {
|
||||
$document = $em->getRepository(Document::class)->find($id);
|
||||
|
||||
if (!$document) {
|
||||
throw $this->createNotFoundException();
|
||||
}
|
||||
$document = $this->findDocument($id, $em);
|
||||
|
||||
if (!$request->isMethod('POST')) {
|
||||
return $this->render('admin/document/new_version.html.twig', [
|
||||
@@ -162,31 +158,33 @@ class DocumentController extends AbstractController
|
||||
]);
|
||||
}
|
||||
|
||||
/** @var UploadedFile|null $file */
|
||||
$file = $request->files->get('file');
|
||||
if (!$file instanceof UploadedFile) {
|
||||
$this->addFlash('error', 'Datei ist erforderlich.');
|
||||
if (!$this->isCsrfTokenValid('create_document_version_' . $id, (string) $request->request->get('_token'))) {
|
||||
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
|
||||
|
||||
return $this->redirectToRoute('admin_document_version_new', ['id' => $id]);
|
||||
}
|
||||
|
||||
$uploadDir = (string)$params->get('mto.vector.data.upload.path');
|
||||
$this->ensureDir($uploadDir);
|
||||
/** @var UploadedFile|null $file */
|
||||
$file = $request->files->get('file');
|
||||
if (!$file instanceof UploadedFile) {
|
||||
$this->addFlash('danger', 'Datei ist erforderlich.');
|
||||
|
||||
$newFilename = uniqid('', true) . '_' . $file->getClientOriginalName();
|
||||
|
||||
try {
|
||||
$file->move($uploadDir, $newFilename);
|
||||
} catch (FileException) {
|
||||
throw new \RuntimeException('File upload failed.');
|
||||
return $this->redirectToRoute('admin_document_version_new', ['id' => $id]);
|
||||
}
|
||||
|
||||
$filePath = $uploadDir . '/' . $newFilename;
|
||||
try {
|
||||
$user = $this->requireUser();
|
||||
$uploadDir = trim((string) $params->get('mto.vector.data.upload.path'));
|
||||
$this->ensureDir($uploadDir);
|
||||
$filePath = $this->moveUploadedFile($file, $uploadDir, $formatText);
|
||||
|
||||
$documentService->addVersion(
|
||||
$document,
|
||||
$filePath,
|
||||
$this->getUser()
|
||||
);
|
||||
$documentService->addVersion($document, $filePath, $user);
|
||||
$this->addFlash('success', 'Neue Dokumentversion wurde hochgeladen.');
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Neue Dokumentversion konnte nicht erstellt werden.'));
|
||||
|
||||
return $this->redirectToRoute('admin_document_version_new', ['id' => $id]);
|
||||
}
|
||||
|
||||
return $this->redirectToRoute('admin_document_show', ['id' => $id]);
|
||||
}
|
||||
@@ -198,54 +196,55 @@ class DocumentController extends AbstractController
|
||||
methods: ['POST']
|
||||
)]
|
||||
public function activateVersion(
|
||||
string $versionId,
|
||||
Request $request,
|
||||
string $versionId,
|
||||
Request $request,
|
||||
EntityManagerInterface $em,
|
||||
DocumentService $documentService,
|
||||
IngestJobService $jobService,
|
||||
DocumentService $documentService,
|
||||
IngestJobService $jobService,
|
||||
): RedirectResponse {
|
||||
if (!$this->isCsrfTokenValid('activate_version_' . $versionId, (string)$request->request->get('_token'))) {
|
||||
if (!$this->isCsrfTokenValid('activate_version_' . $versionId, (string) $request->request->get('_token'))) {
|
||||
throw $this->createAccessDeniedException();
|
||||
}
|
||||
|
||||
$version = $em->getRepository(DocumentVersion::class)->find($versionId);
|
||||
if (!$version) {
|
||||
throw $this->createNotFoundException();
|
||||
}
|
||||
$version = $this->findDocumentVersion($versionId, $em);
|
||||
|
||||
try {
|
||||
$documentService->activateVersion($version);
|
||||
|
||||
$job = $jobService->startJob(
|
||||
IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE,
|
||||
$this->getUser(),
|
||||
$this->requireUser(),
|
||||
$version->getDocument()->getId(),
|
||||
$version->getId(),
|
||||
null,
|
||||
IngestJob::STATUS_QUEUED
|
||||
);
|
||||
|
||||
$logFile = $this->prepareJobLogFile((string) $job->getId());
|
||||
$job->setLogPath($logFile);
|
||||
$em->flush();
|
||||
|
||||
if (!$this->canExec()) {
|
||||
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
|
||||
$this->addFlash('danger', 'Aktivierung ok, aber Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
|
||||
|
||||
return $this->redirectToRoute('admin_document_show', [
|
||||
'id' => $version->getDocument()->getId(),
|
||||
'id' => (string) $version->getDocument()->getId(),
|
||||
]);
|
||||
}
|
||||
|
||||
$this->startIngestJob((string)$job->getId());
|
||||
|
||||
$this->startIngestJob((string) $job->getId(), $logFile);
|
||||
$this->addFlash('success', 'Version aktiviert. Ingest-Job wurde erstellt und gestartet.');
|
||||
|
||||
return $this->redirectToRoute('admin_job_show', [
|
||||
'id' => (string)$job->getId(),
|
||||
'id' => (string) $job->getId(),
|
||||
]);
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', 'Aktivierung/Re-Ingest fehlgeschlagen: ' . $e->getMessage());
|
||||
$this->addFlash('danger', 'Aktivierung/Re-Ingest fehlgeschlagen: ' . $this->buildSafeErrorMessage($e, 'Unbekannter Fehler.'));
|
||||
}
|
||||
|
||||
return $this->redirectToRoute('admin_document_show', [
|
||||
'id' => $version->getDocument()->getId(),
|
||||
'id' => (string) $version->getDocument()->getId(),
|
||||
]);
|
||||
}
|
||||
|
||||
@@ -256,115 +255,135 @@ class DocumentController extends AbstractController
|
||||
methods: ['POST']
|
||||
)]
|
||||
public function ingestVersion(
|
||||
string $versionId,
|
||||
Request $request,
|
||||
string $versionId,
|
||||
Request $request,
|
||||
EntityManagerInterface $em,
|
||||
IngestJobService $jobService,
|
||||
): ?RedirectResponse {
|
||||
if (!$this->isCsrfTokenValid('ingest_version_' . $versionId, (string)$request->request->get('_token'))) {
|
||||
IngestJobService $jobService,
|
||||
): RedirectResponse {
|
||||
if (!$this->isCsrfTokenValid('ingest_version_' . $versionId, (string) $request->request->get('_token'))) {
|
||||
throw $this->createAccessDeniedException();
|
||||
}
|
||||
|
||||
$version = $em->getRepository(DocumentVersion::class)->find($versionId);
|
||||
if (!$version) {
|
||||
throw $this->createNotFoundException();
|
||||
}
|
||||
$version = $this->findDocumentVersion($versionId, $em);
|
||||
|
||||
/** @var IngestJob|null $existing */
|
||||
$existing = $em->getRepository(IngestJob::class)
|
||||
->findOneBy(
|
||||
['documentVersionId' => $version->getId()],
|
||||
['startedAt' => 'DESC']
|
||||
['startedAt' => 'DESC', 'id' => 'DESC']
|
||||
);
|
||||
|
||||
if ($existing && $existing->getStartedAt() > new \DateTimeImmutable('-3 seconds')) {
|
||||
return null;
|
||||
if (
|
||||
$existing instanceof IngestJob
|
||||
&& $existing->getStartedAt() > new \DateTimeImmutable('-' . self::INGEST_DUPLICATE_WINDOW_SECONDS . ' seconds')
|
||||
&& in_array($existing->getStatus(), [IngestJob::STATUS_QUEUED, IngestJob::STATUS_RUNNING], true)
|
||||
) {
|
||||
$this->addFlash('info', 'Für diese Version läuft bereits ein aktueller Ingest-Job.');
|
||||
|
||||
return $this->redirectToRoute('admin_job_show', [
|
||||
'id' => (string) $existing->getId(),
|
||||
]);
|
||||
}
|
||||
|
||||
$job = $jobService->startJob(
|
||||
IngestJob::TYPE_DOCUMENT,
|
||||
$this->getUser(),
|
||||
$this->requireUser(),
|
||||
$version->getDocument()->getId(),
|
||||
$version->getId(),
|
||||
null,
|
||||
IngestJob::STATUS_QUEUED
|
||||
);
|
||||
|
||||
$logFile = $this->prepareJobLogFile((string) $job->getId());
|
||||
$job->setLogPath($logFile);
|
||||
$em->flush();
|
||||
|
||||
if (!$this->canExec()) {
|
||||
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
|
||||
$this->addFlash('error', 'Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
|
||||
$this->addFlash('danger', 'Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
|
||||
|
||||
return $this->redirectToRoute('admin_document_show', [
|
||||
'id' => $version->getDocument()->getId(),
|
||||
'id' => (string) $version->getDocument()->getId(),
|
||||
]);
|
||||
}
|
||||
|
||||
$this->startIngestJob((string)$job->getId());
|
||||
try {
|
||||
$this->startIngestJob((string) $job->getId(), $logFile);
|
||||
} catch (\Throwable $e) {
|
||||
$jobService->markFailed($job, 'Ingest async start failed: ' . $e->getMessage());
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Ingest konnte nicht gestartet werden.'));
|
||||
|
||||
return $this->redirectToRoute('admin_document_show', [
|
||||
'id' => (string) $version->getDocument()->getId(),
|
||||
]);
|
||||
}
|
||||
|
||||
return $this->redirectToRoute('admin_job_show', [
|
||||
'id' => (string)$job->getId(),
|
||||
'id' => (string) $job->getId(),
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route(
|
||||
'/reset',
|
||||
name: 'admin_document_reset',
|
||||
methods: ['POST']
|
||||
)]
|
||||
public function resetCompleteSystem(ParameterBagInterface $params, Connection $connection): ?RedirectResponse
|
||||
{
|
||||
if (!$this->canExec()) {
|
||||
$this->addFlash('danger', 'Der Reset konnte nicht gestartet werden (exec deaktiviert).');
|
||||
#[Route('/reset', name: 'admin_document_reset', methods: ['POST'])]
|
||||
public function resetCompleteSystem(
|
||||
Request $request,
|
||||
ParameterBagInterface $params,
|
||||
Connection $connection,
|
||||
): RedirectResponse {
|
||||
$this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
|
||||
|
||||
if (!$this->isCsrfTokenValid('system_reset', (string) $request->request->get('_token'))) {
|
||||
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
|
||||
|
||||
return $this->redirectToRoute('admin_dashboard');
|
||||
}
|
||||
|
||||
@unlink((string)$params->get('mto.knowledge.ndjson'));
|
||||
@unlink((string)$params->get('mto.knowledge.vector_index'));
|
||||
@unlink((string)$params->get('mto.knowledge.vector_index_meta'));
|
||||
@unlink((string)$params->get('mto.knowledge.index_meta'));
|
||||
@unlink((string)$params->get('mto.runtime.meta'));
|
||||
if (!$this->canExec()) {
|
||||
$this->addFlash('danger', 'Der Reset konnte nicht gestartet werden (exec deaktiviert).');
|
||||
|
||||
@unlink((string)$params->get('mto.knowledge.tags_ndjson'));
|
||||
@unlink((string)$params->get('mto.knowledge.vector_tags_index'));
|
||||
@unlink((string)$params->get('mto.knowledge.vector_tags_index_meta'));
|
||||
return $this->redirectToRoute('admin_dashboard');
|
||||
}
|
||||
|
||||
$uploadDir = (string)$params->get('mto.knowledge.upload');
|
||||
foreach ([
|
||||
'mto.knowledge.ndjson',
|
||||
'mto.knowledge.vector_index',
|
||||
'mto.knowledge.vector_index_meta',
|
||||
'mto.knowledge.index_meta',
|
||||
'mto.runtime.meta',
|
||||
'mto.knowledge.tags_ndjson',
|
||||
'mto.knowledge.vector_tags_index',
|
||||
'mto.knowledge.vector_tags_index_meta',
|
||||
] as $parameterName) {
|
||||
$path = trim((string) $params->get($parameterName));
|
||||
if ($path !== '' && is_file($path)) {
|
||||
@unlink($path);
|
||||
}
|
||||
}
|
||||
|
||||
$uploadDir = trim((string) $params->get('mto.knowledge.upload'));
|
||||
if ($uploadDir !== '' && is_dir($uploadDir)) {
|
||||
exec('rm -rf ' . escapeshellarg($uploadDir));
|
||||
}
|
||||
|
||||
$lockDir = (string)$params->get('mto.locks.dir');
|
||||
$lockDir = trim((string) $params->get('mto.locks.dir'));
|
||||
if ($lockDir !== '' && is_dir($lockDir)) {
|
||||
exec('rm -rf ' . escapeshellarg($lockDir));
|
||||
}
|
||||
|
||||
$sql = '
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
TRUNCATE TABLE db.document;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
TRUNCATE TABLE db.document_version;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
TRUNCATE TABLE db.ingest_job;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
TRUNCATE TABLE db.knowledge_tag;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
TRUNCATE TABLE db.tag_rebuild_job;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
TRUNCATE TABLE db.document_tag;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
';
|
||||
$connection->executeQuery($sql);
|
||||
$sql = <<<'SQL'
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
TRUNCATE TABLE db.document_tag;
|
||||
TRUNCATE TABLE db.tag_rebuild_job;
|
||||
TRUNCATE TABLE db.knowledge_tag;
|
||||
TRUNCATE TABLE db.ingest_job;
|
||||
TRUNCATE TABLE db.document_version;
|
||||
TRUNCATE TABLE db.document;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
SQL;
|
||||
|
||||
$connection->executeStatement($sql);
|
||||
|
||||
$this->addFlash('success', 'Das System wurde erfolgreich zurückgesetzt.');
|
||||
|
||||
return $this->redirectToRoute('admin_dashboard');
|
||||
}
|
||||
|
||||
@@ -375,62 +394,63 @@ class DocumentController extends AbstractController
|
||||
methods: ['POST']
|
||||
)]
|
||||
public function deleteDocument(
|
||||
string $id,
|
||||
Request $request,
|
||||
string $id,
|
||||
Request $request,
|
||||
EntityManagerInterface $em,
|
||||
IngestJobService $jobService,
|
||||
LockService $lockService,
|
||||
IngestJobService $jobService,
|
||||
LockService $lockService,
|
||||
): RedirectResponse {
|
||||
if (!$this->isCsrfTokenValid('delete_document_' . $id, (string)$request->request->get('_token'))) {
|
||||
$this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
|
||||
|
||||
if (!$this->isCsrfTokenValid('delete_document_' . $id, (string) $request->request->get('_token'))) {
|
||||
throw $this->createAccessDeniedException();
|
||||
}
|
||||
|
||||
try {
|
||||
$uuid = Uuid::fromString($id);
|
||||
} catch (\Exception) {
|
||||
throw $this->createNotFoundException();
|
||||
}
|
||||
|
||||
/** @var Document|null $document */
|
||||
$document = $em->getRepository(Document::class)->find($uuid);
|
||||
if (!$document) {
|
||||
throw $this->createNotFoundException();
|
||||
}
|
||||
$document = $this->findDocument($id, $em);
|
||||
|
||||
if (!$lockService->acquire()) {
|
||||
$this->addFlash('danger', 'Ein Ingest-Job läuft bereits. Löschen derzeit nicht möglich.');
|
||||
|
||||
return $this->redirectToRoute('admin_documents');
|
||||
}
|
||||
$lockService->release();
|
||||
|
||||
$job = $jobService->startJob(
|
||||
IngestJob::TYPE_DOCUMENT_DELETE,
|
||||
$this->getUser(),
|
||||
$this->requireUser(),
|
||||
$document->getId(),
|
||||
null,
|
||||
null,
|
||||
IngestJob::STATUS_QUEUED
|
||||
);
|
||||
|
||||
$logFile = $this->prepareJobLogFile((string) $job->getId());
|
||||
$job->setLogPath($logFile);
|
||||
$em->flush();
|
||||
|
||||
if (!$this->canExec()) {
|
||||
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
|
||||
$this->addFlash('danger', 'Löschen konnte nicht gestartet werden (exec deaktiviert).');
|
||||
|
||||
return $this->redirectToRoute('admin_documents');
|
||||
}
|
||||
|
||||
$this->startIngestJob((string)$job->getId());
|
||||
try {
|
||||
$this->startIngestJob((string) $job->getId(), $logFile);
|
||||
} catch (\Throwable $e) {
|
||||
$jobService->markFailed($job, 'Delete async start failed: ' . $e->getMessage());
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Löschvorgang konnte nicht gestartet werden.'));
|
||||
|
||||
return $this->redirectToRoute('admin_documents');
|
||||
}
|
||||
|
||||
$this->addFlash('success', 'Löschvorgang gestartet. Dokument wird nach Index-Rebuild entfernt.');
|
||||
|
||||
return $this->redirectToRoute('admin_job_show', [
|
||||
'id' => (string)$job->getId(),
|
||||
'id' => (string) $job->getId(),
|
||||
]);
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// Helpers
|
||||
// =========================================================
|
||||
|
||||
private function canExec(): bool
|
||||
{
|
||||
if (!function_exists('exec')) {
|
||||
@@ -443,6 +463,7 @@ class DocumentController extends AbstractController
|
||||
}
|
||||
|
||||
$list = array_map('trim', explode(',', $disabled));
|
||||
|
||||
return !in_array('exec', $list, true);
|
||||
}
|
||||
|
||||
@@ -452,34 +473,209 @@ class DocumentController extends AbstractController
|
||||
throw new \RuntimeException('Upload directory not configured.');
|
||||
}
|
||||
|
||||
if (!is_dir($dir) && !mkdir($dir, 0777, true) && !is_dir($dir)) {
|
||||
if (!is_dir($dir) && !mkdir($dir, 0775, true) && !is_dir($dir)) {
|
||||
throw new \RuntimeException('Unable to create upload directory.');
|
||||
}
|
||||
}
|
||||
|
||||
private function startIngestJob(string $jobId): void
|
||||
private function moveUploadedFile(UploadedFile $file, string $uploadDir, FormatText $formatText): string
|
||||
{
|
||||
$projectDir = (string)$this->getParameter('kernel.project_dir');
|
||||
$originalName = trim((string) $file->getClientOriginalName());
|
||||
$baseName = pathinfo($originalName !== '' ? $originalName : 'document', PATHINFO_FILENAME);
|
||||
$extension = strtolower((string) $file->getClientOriginalExtension());
|
||||
|
||||
$safeBaseName = $formatText->slugify($baseName !== '' ? $baseName : 'document');
|
||||
if ($safeBaseName === '') {
|
||||
$safeBaseName = 'document';
|
||||
}
|
||||
|
||||
$newFilename = uniqid('', true) . '_' . $safeBaseName;
|
||||
if ($extension !== '') {
|
||||
$newFilename .= '.' . $extension;
|
||||
}
|
||||
|
||||
try {
|
||||
$file->move($uploadDir, $newFilename);
|
||||
} catch (FileException) {
|
||||
throw new \RuntimeException('File upload failed.');
|
||||
}
|
||||
|
||||
return rtrim($uploadDir, '/') . '/' . $newFilename;
|
||||
}
|
||||
|
||||
private function resolveDocumentTitle(Request $request, UploadedFile $file, FormatText $formatText): string
|
||||
{
|
||||
$rawTitle = trim((string) $request->request->get('title', ''));
|
||||
if ($rawTitle !== '') {
|
||||
return $rawTitle;
|
||||
}
|
||||
|
||||
$originalName = trim((string) $file->getClientOriginalName());
|
||||
$baseName = pathinfo($originalName, PATHINFO_FILENAME);
|
||||
|
||||
return trim((string) $formatText->slugify($baseName !== '' ? $baseName : $originalName));
|
||||
}
|
||||
|
||||
private function startIngestJob(string $jobId, string $logFile): void
|
||||
{
|
||||
$projectDir = $this->resolveProjectDir();
|
||||
$console = $projectDir . '/bin/console';
|
||||
|
||||
$logDir = $projectDir . '/var/log/ingest';
|
||||
if (!is_dir($logDir)) {
|
||||
@mkdir($logDir, 0777, true);
|
||||
if (!is_file($console)) {
|
||||
throw new \RuntimeException('bin/console not found: ' . $console);
|
||||
}
|
||||
$logFile = $logDir . '/job_' . $jobId . '.log';
|
||||
|
||||
// Wichtig: CLI-PHP verwenden, nicht PHP_BINARY aus FPM
|
||||
$php = 'php';
|
||||
$php = $this->resolvePhpBinary();
|
||||
|
||||
$cmd = sprintf(
|
||||
'%s %s --no-interaction %s %s >> %s 2>&1 &',
|
||||
escapeshellcmd($php),
|
||||
'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 & echo $!',
|
||||
escapeshellarg($projectDir),
|
||||
escapeshellarg($php),
|
||||
escapeshellarg($console),
|
||||
escapeshellarg('mto:agent:ingest:run'),
|
||||
escapeshellarg($jobId),
|
||||
escapeshellarg($logFile),
|
||||
);
|
||||
|
||||
exec($cmd);
|
||||
$output = [];
|
||||
$exitCode = 0;
|
||||
@exec($cmd, $output, $exitCode);
|
||||
|
||||
if ($exitCode !== 0) {
|
||||
throw new \RuntimeException('Background ingest bootstrap failed with exit code ' . $exitCode . '.');
|
||||
}
|
||||
}
|
||||
|
||||
private function prepareJobLogFile(string $jobId): string
|
||||
{
|
||||
$projectDir = $this->resolveProjectDir();
|
||||
$logDir = $projectDir . '/var/log/ingest';
|
||||
$this->ensureDir($logDir);
|
||||
|
||||
return $logDir . '/job_' . $jobId . '.log';
|
||||
}
|
||||
|
||||
private function resolveProjectDir(): string
|
||||
{
|
||||
$projectDir = trim((string) $this->getParameter('kernel.project_dir'));
|
||||
|
||||
if ($projectDir === '' || !is_dir($projectDir)) {
|
||||
throw new \RuntimeException('Project directory is invalid.');
|
||||
}
|
||||
|
||||
return rtrim($projectDir, '/');
|
||||
}
|
||||
|
||||
private function resolvePhpBinary(): string
|
||||
{
|
||||
$envCandidates = [
|
||||
trim((string) ($_SERVER['PHP_CLI_BINARY'] ?? '')),
|
||||
trim((string) ($_ENV['PHP_CLI_BINARY'] ?? '')),
|
||||
trim((string) getenv('PHP_CLI_BINARY')),
|
||||
];
|
||||
|
||||
foreach ($envCandidates as $candidate) {
|
||||
if ($this->isValidCliPhpBinary($candidate)) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
$phpBinary = defined('PHP_BINARY') ? trim((string) PHP_BINARY) : '';
|
||||
if ($this->isValidCliPhpBinary($phpBinary)) {
|
||||
return $phpBinary;
|
||||
}
|
||||
|
||||
$fallbackCandidates = [
|
||||
'/usr/bin/php',
|
||||
'/usr/local/bin/php',
|
||||
'/bin/php',
|
||||
'/opt/homebrew/bin/php',
|
||||
];
|
||||
|
||||
foreach ($fallbackCandidates as $candidate) {
|
||||
if ($this->isValidCliPhpBinary($candidate)) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
$whichPhp = trim((string) @shell_exec('command -v php 2>/dev/null'));
|
||||
if ($this->isValidCliPhpBinary($whichPhp)) {
|
||||
return $whichPhp;
|
||||
}
|
||||
|
||||
throw new \RuntimeException(
|
||||
'Could not resolve a CLI PHP binary. Set PHP_CLI_BINARY explicitly, e.g. /usr/bin/php.'
|
||||
);
|
||||
}
|
||||
|
||||
private function isValidCliPhpBinary(string $path): bool
|
||||
{
|
||||
$path = trim($path);
|
||||
|
||||
if ($path === '' || !is_file($path) || !is_executable($path)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$basename = strtolower(basename($path));
|
||||
|
||||
if (str_contains($basename, 'fpm') || str_contains($basename, 'cgi')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function findDocument(string $id, EntityManagerInterface $em): Document
|
||||
{
|
||||
try {
|
||||
$uuid = Uuid::fromString(trim($id));
|
||||
} catch (\Throwable) {
|
||||
throw new NotFoundHttpException();
|
||||
}
|
||||
|
||||
/** @var Document|null $document */
|
||||
$document = $em->getRepository(Document::class)->find($uuid);
|
||||
|
||||
if (!$document instanceof Document) {
|
||||
throw new NotFoundHttpException();
|
||||
}
|
||||
|
||||
return $document;
|
||||
}
|
||||
|
||||
private function findDocumentVersion(string $versionId, EntityManagerInterface $em): DocumentVersion
|
||||
{
|
||||
try {
|
||||
$uuid = Uuid::fromString(trim($versionId));
|
||||
} catch (\Throwable) {
|
||||
throw new NotFoundHttpException();
|
||||
}
|
||||
|
||||
/** @var DocumentVersion|null $version */
|
||||
$version = $em->getRepository(DocumentVersion::class)->find($uuid);
|
||||
|
||||
if (!$version instanceof DocumentVersion) {
|
||||
throw new NotFoundHttpException();
|
||||
}
|
||||
|
||||
return $version;
|
||||
}
|
||||
|
||||
private function requireUser(): User
|
||||
{
|
||||
$user = $this->getUser();
|
||||
|
||||
if (!$user instanceof User) {
|
||||
throw new \RuntimeException('No authenticated user available.');
|
||||
}
|
||||
|
||||
return $user;
|
||||
}
|
||||
|
||||
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
|
||||
{
|
||||
$message = trim($e->getMessage());
|
||||
|
||||
return $message !== '' ? $message : $fallback;
|
||||
}
|
||||
}
|
||||
@@ -19,44 +19,97 @@ final class DocumentTagController extends AbstractController
|
||||
#[Route('/{id}/tags', name: 'admin_document_tags_edit', methods: ['GET'])]
|
||||
public function edit(string $id, DocumentTagAdminService $svc): Response
|
||||
{
|
||||
$data = $svc->getEditData($id);
|
||||
$id = trim($id);
|
||||
|
||||
try {
|
||||
$data = $svc->getEditData($id);
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Dokument-Tags konnten nicht geladen werden.'));
|
||||
|
||||
return $this->redirectToRoute('admin_documents');
|
||||
}
|
||||
|
||||
return $this->render('admin/document_tags/edit.html.twig', [
|
||||
'document' => $data['document'],
|
||||
'allTags' => $data['allTags'],
|
||||
'latestJob' => $data['latestJob'],
|
||||
|
||||
'statusRunning' => TagRebuildJob::STATUS_RUNNING,
|
||||
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
|
||||
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
|
||||
'statusFailed' => TagRebuildJob::STATUS_FAILED,
|
||||
...$data,
|
||||
...$this->buildJobStatusViewData(),
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route('/{id}/tags/save', name: 'admin_document_tags_save', methods: ['POST'])]
|
||||
public function save(string $id, Request $request, DocumentTagAdminService $svc): RedirectResponse
|
||||
{
|
||||
$selected = $request->request->all('tag_ids') ?? [];
|
||||
$id = trim($id);
|
||||
|
||||
if (!$this->isCsrfTokenValid('admin_document_tags_save_' . $id, (string) $request->request->get('_token'))) {
|
||||
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
|
||||
|
||||
return $this->redirectToRoute('admin_document_tags_edit', ['id' => $id]);
|
||||
}
|
||||
|
||||
try {
|
||||
$svc->saveTags($id, $selected);
|
||||
$svc->saveTags($id, $this->normalizeStringList($request->request->all('tag_ids')));
|
||||
$this->addFlash('success', 'Tags wurden aktualisiert. Rebuild läuft im Hintergrund.');
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $e->getMessage());
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tags konnten nicht aktualisiert werden.'));
|
||||
}
|
||||
|
||||
return $this->redirectToRoute('admin_document_tags_edit', ['id' => $id]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wichtig: Ohne extra "admin/" im Pfad, weil Prefix schon /admin/documents ist.
|
||||
* Ergebnis: /admin/documents/tags/status
|
||||
*/
|
||||
#[Route('/tags/status', name: 'admin_tags_status', methods: ['GET'])]
|
||||
public function status(DocumentTagAdminService $svc): JsonResponse
|
||||
{
|
||||
$status = $svc->getLatestRebuildStatus();
|
||||
|
||||
return $this->json([
|
||||
'status' => $svc->getLatestRebuildStatus(),
|
||||
'status' => $status,
|
||||
'hasActiveJob' => $status === TagRebuildJob::STATUS_RUNNING
|
||||
|| $status === TagRebuildJob::STATUS_QUEUED,
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $values
|
||||
* @return list<string>
|
||||
*/
|
||||
private function normalizeStringList(mixed $values): array
|
||||
{
|
||||
if (!is_array($values)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$normalized = [];
|
||||
|
||||
foreach ($values as $value) {
|
||||
$value = trim((string) $value);
|
||||
|
||||
if ($value === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$normalized[] = $value;
|
||||
}
|
||||
|
||||
return array_values(array_unique($normalized));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, string>
|
||||
*/
|
||||
private function buildJobStatusViewData(): array
|
||||
{
|
||||
return [
|
||||
'statusRunning' => TagRebuildJob::STATUS_RUNNING,
|
||||
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
|
||||
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
|
||||
'statusFailed' => TagRebuildJob::STATUS_FAILED,
|
||||
];
|
||||
}
|
||||
|
||||
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
|
||||
{
|
||||
$message = trim($e->getMessage());
|
||||
|
||||
return $message !== '' ? $message : $fallback;
|
||||
}
|
||||
}
|
||||
@@ -1,46 +1,44 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Controller\Admin;
|
||||
|
||||
use App\Entity\IngestJob;
|
||||
use App\Service\IngestJobService;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
|
||||
use Symfony\Component\HttpFoundation\JsonResponse;
|
||||
use Symfony\Component\HttpFoundation\RedirectResponse;
|
||||
use Symfony\Component\HttpFoundation\Request;
|
||||
use Symfony\Component\HttpFoundation\Response;
|
||||
use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
|
||||
use Symfony\Component\Routing\Attribute\Route;
|
||||
use Symfony\Component\HttpFoundation\RedirectResponse;
|
||||
use Symfony\Component\HttpFoundation\JsonResponse;
|
||||
|
||||
#[Route('/admin/jobs')]
|
||||
class IngestJobController extends AbstractController
|
||||
final class IngestJobController extends AbstractController
|
||||
{
|
||||
#[Route('', name: 'admin_jobs')]
|
||||
#[Route('', name: 'admin_jobs', methods: ['GET'])]
|
||||
public function index(EntityManagerInterface $em): Response
|
||||
{
|
||||
$jobs = $em->getRepository(IngestJob::class)
|
||||
->findBy([], ['startedAt' => 'DESC']);
|
||||
->findBy([], ['startedAt' => 'DESC', 'id' => 'DESC']);
|
||||
|
||||
return $this->render('admin/job/index.html.twig', [
|
||||
'jobs' => $jobs
|
||||
'jobs' => $jobs,
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route(
|
||||
'/{id}',
|
||||
name: 'admin_job_show',
|
||||
requirements: ['id' => '[0-9a-fA-F\-]{36}']
|
||||
requirements: ['id' => '[0-9a-fA-F\-]{36}'],
|
||||
methods: ['GET']
|
||||
)]
|
||||
public function show(string $id, EntityManagerInterface $em): Response
|
||||
{
|
||||
$job = $em->getRepository(IngestJob::class)->find($id);
|
||||
|
||||
if (!$job) {
|
||||
throw new NotFoundHttpException();
|
||||
}
|
||||
|
||||
return $this->render('admin/job/show.html.twig', [
|
||||
'job' => $job
|
||||
'job' => $this->findJob($id, $em),
|
||||
]);
|
||||
}
|
||||
|
||||
@@ -54,12 +52,7 @@ class IngestJobController extends AbstractController
|
||||
{
|
||||
$this->denyAccessUnlessGranted('ROLE_USER');
|
||||
|
||||
/** @var IngestJob|null $job */
|
||||
$job = $em->getRepository(IngestJob::class)->find($id);
|
||||
|
||||
if (!$job) {
|
||||
throw new NotFoundHttpException();
|
||||
}
|
||||
$job = $this->findJob($id, $em);
|
||||
|
||||
return $this->json([
|
||||
'id' => (string) $job->getId(),
|
||||
@@ -68,58 +61,185 @@ class IngestJobController extends AbstractController
|
||||
'startedAt' => $job->getStartedAt()->format(DATE_ATOM),
|
||||
'finishedAt' => $job->getFinishedAt()?->format(DATE_ATOM),
|
||||
'errorMessage' => $job->getErrorMessage(),
|
||||
'logPath' => $job->getLogPath(),
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route('/global-reindex', name: 'admin_global_reindex', methods: ['POST'])]
|
||||
public function globalReindex(
|
||||
Request $request,
|
||||
IngestJobService $jobService,
|
||||
EntityManagerInterface $em,
|
||||
): RedirectResponse {
|
||||
|
||||
$this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 1) Job anlegen (QUEUED)
|
||||
// ---------------------------------------------------------
|
||||
$job = $jobService->startJob(
|
||||
IngestJob::TYPE_GLOBAL_REINDEX,
|
||||
$this->getUser(),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
IngestJob::STATUS_QUEUED
|
||||
);
|
||||
if (!$this->isCsrfTokenValid('global_reindex', (string) $request->request->get('_token'))) {
|
||||
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 2) CLI im Hintergrund starten
|
||||
// ---------------------------------------------------------
|
||||
$projectDir = (string)$this->getParameter('kernel.project_dir');
|
||||
$console = $projectDir . '/bin/console';
|
||||
|
||||
$logDir = $projectDir . '/var/log/ingest';
|
||||
if (!is_dir($logDir)) {
|
||||
@mkdir($logDir, 0777, true);
|
||||
return $this->redirectToRoute('admin_jobs');
|
||||
}
|
||||
$logFile = $logDir . '/job_' . (string)$job->getId() . '.log';
|
||||
|
||||
$php = 'php';
|
||||
try {
|
||||
$projectDir = $this->resolveProjectDir();
|
||||
$console = $projectDir . '/bin/console';
|
||||
|
||||
$cmd = sprintf(
|
||||
'%s %s --no-interaction %s %s >> %s 2>&1 &',
|
||||
escapeshellcmd($php),
|
||||
escapeshellarg($console),
|
||||
escapeshellarg('mto:agent:ingest:run'),
|
||||
escapeshellarg((string)$job->getId()),
|
||||
escapeshellarg($logFile),
|
||||
);
|
||||
if (!is_file($console)) {
|
||||
throw new \RuntimeException('bin/console not found: ' . $console);
|
||||
}
|
||||
|
||||
exec($cmd);
|
||||
$logDir = $projectDir . '/var/log/ingest';
|
||||
$this->ensureDirectoryExists($logDir);
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 3) Redirect auf Job-Detailseite (Loader)
|
||||
// ---------------------------------------------------------
|
||||
return $this->redirectToRoute('admin_job_show', [
|
||||
'id' => (string)$job->getId(),
|
||||
]);
|
||||
$job = $jobService->startJob(
|
||||
IngestJob::TYPE_GLOBAL_REINDEX,
|
||||
$this->getUser(),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
IngestJob::STATUS_QUEUED
|
||||
);
|
||||
|
||||
$logFile = $logDir . '/job_' . (string) $job->getId() . '.log';
|
||||
$job->setLogPath($logFile);
|
||||
$em->flush();
|
||||
|
||||
$phpBinary = $this->resolvePhpBinary();
|
||||
$cmd = sprintf(
|
||||
'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 & echo $!',
|
||||
escapeshellarg($projectDir),
|
||||
escapeshellarg($phpBinary),
|
||||
escapeshellarg($console),
|
||||
escapeshellarg('mto:agent:ingest:run'),
|
||||
escapeshellarg((string) $job->getId()),
|
||||
escapeshellarg($logFile),
|
||||
);
|
||||
|
||||
$output = [];
|
||||
$exitCode = 0;
|
||||
@exec($cmd, $output, $exitCode);
|
||||
|
||||
if ($exitCode !== 0) {
|
||||
$job->markFailed('Global reindex async bootstrap failed with exit code ' . $exitCode . '.');
|
||||
$em->flush();
|
||||
|
||||
$this->addFlash('danger', 'Global Reindex konnte nicht im Hintergrund gestartet werden.');
|
||||
|
||||
return $this->redirectToRoute('admin_job_show', [
|
||||
'id' => (string) $job->getId(),
|
||||
]);
|
||||
}
|
||||
|
||||
$this->addFlash('success', 'Global Reindex wurde gestartet.');
|
||||
|
||||
return $this->redirectToRoute('admin_job_show', [
|
||||
'id' => (string) $job->getId(),
|
||||
]);
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Global Reindex konnte nicht gestartet werden.'));
|
||||
|
||||
return $this->redirectToRoute('admin_jobs');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function findJob(string $id, EntityManagerInterface $em): IngestJob
|
||||
{
|
||||
$id = trim($id);
|
||||
|
||||
/** @var IngestJob|null $job */
|
||||
$job = $em->getRepository(IngestJob::class)->find($id);
|
||||
|
||||
if (!$job instanceof IngestJob) {
|
||||
throw new NotFoundHttpException();
|
||||
}
|
||||
|
||||
return $job;
|
||||
}
|
||||
|
||||
private function resolveProjectDir(): string
|
||||
{
|
||||
$projectDir = trim((string) $this->getParameter('kernel.project_dir'));
|
||||
|
||||
if ($projectDir === '' || !is_dir($projectDir)) {
|
||||
throw new \RuntimeException('Project directory is invalid.');
|
||||
}
|
||||
|
||||
return rtrim($projectDir, '/');
|
||||
}
|
||||
|
||||
private function resolvePhpBinary(): string
|
||||
{
|
||||
$envCandidates = [
|
||||
trim((string) ($_SERVER['PHP_CLI_BINARY'] ?? '')),
|
||||
trim((string) ($_ENV['PHP_CLI_BINARY'] ?? '')),
|
||||
trim((string) getenv('PHP_CLI_BINARY')),
|
||||
];
|
||||
|
||||
foreach ($envCandidates as $candidate) {
|
||||
if ($this->isValidCliPhpBinary($candidate)) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
$phpBinary = defined('PHP_BINARY') ? trim((string) PHP_BINARY) : '';
|
||||
if ($this->isValidCliPhpBinary($phpBinary)) {
|
||||
return $phpBinary;
|
||||
}
|
||||
|
||||
$fallbackCandidates = [
|
||||
'/usr/bin/php',
|
||||
'/usr/local/bin/php',
|
||||
'/bin/php',
|
||||
'/opt/homebrew/bin/php',
|
||||
];
|
||||
|
||||
foreach ($fallbackCandidates as $candidate) {
|
||||
if ($this->isValidCliPhpBinary($candidate)) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
$whichPhp = trim((string) @shell_exec('command -v php 2>/dev/null'));
|
||||
if ($this->isValidCliPhpBinary($whichPhp)) {
|
||||
return $whichPhp;
|
||||
}
|
||||
|
||||
throw new \RuntimeException(
|
||||
'Could not resolve a CLI PHP binary. Set PHP_CLI_BINARY explicitly, e.g. /usr/bin/php.'
|
||||
);
|
||||
}
|
||||
|
||||
private function isValidCliPhpBinary(string $path): bool
|
||||
{
|
||||
$path = trim($path);
|
||||
|
||||
if ($path === '' || !is_file($path) || !is_executable($path)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$basename = strtolower(basename($path));
|
||||
|
||||
if (str_contains($basename, 'fpm') || str_contains($basename, 'cgi')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private function ensureDirectoryExists(string $dir): void
|
||||
{
|
||||
if (is_dir($dir)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!@mkdir($dir, 0775, true) && !is_dir($dir)) {
|
||||
throw new \RuntimeException('Could not create ingest log directory.');
|
||||
}
|
||||
}
|
||||
|
||||
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
|
||||
{
|
||||
$message = trim($e->getMessage());
|
||||
|
||||
return $message !== '' ? $message : $fallback;
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ namespace App\Controller\Admin;
|
||||
|
||||
use App\Entity\TagRebuildJob;
|
||||
use App\Service\Admin\TagAdminService;
|
||||
use App\Tag\TagTypes;
|
||||
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
|
||||
use Symfony\Component\HttpFoundation\RedirectResponse;
|
||||
use Symfony\Component\HttpFoundation\Request;
|
||||
@@ -18,41 +19,32 @@ final class TagController extends AbstractController
|
||||
#[Route('', name: 'admin_tags_index', methods: ['GET'])]
|
||||
public function index(TagAdminService $svc): Response
|
||||
{
|
||||
$data = $svc->getIndexData();
|
||||
|
||||
return $this->render('admin/tag/index.html.twig', [
|
||||
...$data,
|
||||
'statusRunning' => TagRebuildJob::STATUS_RUNNING,
|
||||
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
|
||||
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
|
||||
'statusFailed' => TagRebuildJob::STATUS_FAILED,
|
||||
...$svc->getIndexData(),
|
||||
...$this->buildJobStatusViewData(),
|
||||
]);
|
||||
}
|
||||
|
||||
#[Route('/create', name: 'admin_tags_create', methods: ['POST'])]
|
||||
public function create(Request $request, TagAdminService $svc): RedirectResponse
|
||||
{
|
||||
if (!$this->isCsrfTokenValid(
|
||||
'admin_tag_create',
|
||||
$request->request->get('_token')
|
||||
)) {
|
||||
$this->addFlash('danger', 'Ungültiges CSRF Token.');
|
||||
if (!$this->isCsrfTokenValid('admin_tag_create', (string) $request->request->get('_token'))) {
|
||||
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
|
||||
|
||||
return $this->redirectToRoute('admin_tags_index');
|
||||
}
|
||||
|
||||
try {
|
||||
$svc->create(
|
||||
(string)$request->request->get('slug', ''),
|
||||
(string)$request->request->get('label', ''),
|
||||
$request->request->get('description')
|
||||
? (string)$request->request->get('description')
|
||||
: null,
|
||||
(string)$request->request->get('type', 'generic') // NEU
|
||||
(string) $request->request->get('slug', ''),
|
||||
(string) $request->request->get('label', ''),
|
||||
$this->normalizeNullableString($request->request->get('description')),
|
||||
TagTypes::normalize((string) $request->request->get('type', TagTypes::GENERIC))
|
||||
);
|
||||
|
||||
$this->addFlash('success', 'Tag wurde erstellt.');
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $e->getMessage());
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tag konnte nicht erstellt werden.'));
|
||||
}
|
||||
|
||||
return $this->redirectToRoute('admin_tags_index');
|
||||
@@ -61,58 +53,110 @@ final class TagController extends AbstractController
|
||||
#[Route('/{id}/delete', name: 'admin_tags_delete', methods: ['POST'])]
|
||||
public function delete(string $id, Request $request, TagAdminService $svc): RedirectResponse
|
||||
{
|
||||
if (!$this->isCsrfTokenValid(
|
||||
'admin_tag_delete_' . $id,
|
||||
$request->request->get('_token')
|
||||
)) {
|
||||
$this->addFlash('danger', 'Ungültiges CSRF Token.');
|
||||
if (!$this->isCsrfTokenValid('admin_tag_delete_' . $id, (string) $request->request->get('_token'))) {
|
||||
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
|
||||
|
||||
return $this->redirectToRoute('admin_tags_index');
|
||||
}
|
||||
|
||||
try {
|
||||
$svc->delete($id);
|
||||
$svc->delete(trim($id));
|
||||
$this->addFlash('success', 'Tag wurde gelöscht.');
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $e->getMessage());
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tag konnte nicht gelöscht werden.'));
|
||||
}
|
||||
|
||||
return $this->redirectToRoute('admin_tags_index');
|
||||
}
|
||||
|
||||
#[Route('/{id}/assign', name: 'admin_tags_assign', methods: ['GET', 'POST'])]
|
||||
public function assign(
|
||||
string $id,
|
||||
Request $request,
|
||||
TagAdminService $svc
|
||||
): Response {
|
||||
public function assign(string $id, Request $request, TagAdminService $svc): Response
|
||||
{
|
||||
$id = trim($id);
|
||||
|
||||
if ($request->isMethod('POST')) {
|
||||
if (!$this->isCsrfTokenValid('assign_tag_' . $id, (string) $request->request->get('_token'))) {
|
||||
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
|
||||
|
||||
if (!$this->isCsrfTokenValid(
|
||||
'assign_tag_' . $id,
|
||||
$request->request->get('_token')
|
||||
)) {
|
||||
throw $this->createAccessDeniedException();
|
||||
return $this->redirectToRoute('admin_tags_assign', ['id' => $id]);
|
||||
}
|
||||
|
||||
$svc->syncAssignments(
|
||||
$id,
|
||||
$request->request->all('documents') ?? []
|
||||
);
|
||||
|
||||
$this->addFlash('success', 'Zuweisungen aktualisiert.');
|
||||
try {
|
||||
$svc->syncAssignments($id, $this->normalizeStringList($request->request->all('documents')));
|
||||
$this->addFlash('success', 'Zuweisungen aktualisiert.');
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Zuweisungen konnten nicht aktualisiert werden.'));
|
||||
}
|
||||
|
||||
return $this->redirectToRoute('admin_tags_assign', ['id' => $id]);
|
||||
}
|
||||
|
||||
$data = $svc->getAssignData($id);
|
||||
try {
|
||||
$data = $svc->getAssignData($id);
|
||||
} catch (\Throwable $e) {
|
||||
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tag konnte nicht geladen werden.'));
|
||||
|
||||
return $this->redirectToRoute('admin_tags_index');
|
||||
}
|
||||
|
||||
return $this->render('admin/tag/assign.html.twig', [
|
||||
...$data,
|
||||
...$this->buildJobStatusViewData(),
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*/
|
||||
private function normalizeNullableString(mixed $value): ?string
|
||||
{
|
||||
$value = trim((string) $value);
|
||||
|
||||
return $value !== '' ? $value : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $values
|
||||
* @return list<string>
|
||||
*/
|
||||
private function normalizeStringList(mixed $values): array
|
||||
{
|
||||
if (!is_array($values)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$normalized = [];
|
||||
|
||||
foreach ($values as $value) {
|
||||
$value = trim((string) $value);
|
||||
|
||||
if ($value === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$normalized[] = $value;
|
||||
}
|
||||
|
||||
return array_values(array_unique($normalized));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, string>
|
||||
*/
|
||||
private function buildJobStatusViewData(): array
|
||||
{
|
||||
return [
|
||||
'statusRunning' => TagRebuildJob::STATUS_RUNNING,
|
||||
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
|
||||
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
|
||||
'statusFailed' => TagRebuildJob::STATUS_FAILED,
|
||||
]);
|
||||
];
|
||||
}
|
||||
|
||||
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
|
||||
{
|
||||
$message = trim($e->getMessage());
|
||||
|
||||
return $message !== '' ? $message : $fallback;
|
||||
}
|
||||
}
|
||||
@@ -10,38 +10,79 @@ use Symfony\Component\Routing\Attribute\Route;
|
||||
|
||||
final class TagRebuildStreamController
|
||||
{
|
||||
#[Route('/admin/tags/rebuild/stream', name: 'admin_tags_rebuild_stream')]
|
||||
private const POLL_INTERVAL_SECONDS = 2;
|
||||
private const KEEPALIVE_INTERVAL_SECONDS = 10;
|
||||
|
||||
#[Route('/admin/tags/rebuild/stream', name: 'admin_tags_rebuild_stream', methods: ['GET'])]
|
||||
public function stream(TagRebuildStatusProvider $provider): StreamedResponse
|
||||
{
|
||||
$response = new StreamedResponse(function () use ($provider) {
|
||||
$response = new StreamedResponse(function () use ($provider): void {
|
||||
self::disableOutputBuffering();
|
||||
|
||||
echo "event: ping\n";
|
||||
echo "data: " . json_encode(['init' => true]) . "\n\n";
|
||||
echo "retry: 3000\n";
|
||||
self::sendEvent('ping', ['init' => true]);
|
||||
|
||||
@ob_flush();
|
||||
@flush();
|
||||
$lastPayloadHash = null;
|
||||
$lastKeepaliveAt = time();
|
||||
|
||||
while (!connection_aborted()) {
|
||||
|
||||
$data = $provider->getLatestStatus();
|
||||
|
||||
if ($data !== null) {
|
||||
echo "event: message\n";
|
||||
echo "data: " . json_encode($data) . "\n\n";
|
||||
$payloadHash = md5(
|
||||
json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) ?: 'null'
|
||||
);
|
||||
|
||||
@ob_flush();
|
||||
@flush();
|
||||
if ($payloadHash !== $lastPayloadHash) {
|
||||
self::sendEvent('message', $data);
|
||||
$lastPayloadHash = $payloadHash;
|
||||
$lastKeepaliveAt = time();
|
||||
}
|
||||
}
|
||||
|
||||
sleep(2);
|
||||
if ((time() - $lastKeepaliveAt) >= self::KEEPALIVE_INTERVAL_SECONDS) {
|
||||
self::sendEvent('ping', [
|
||||
'ts' => (new \DateTimeImmutable())->format(DATE_ATOM),
|
||||
]);
|
||||
$lastKeepaliveAt = time();
|
||||
}
|
||||
|
||||
sleep(self::POLL_INTERVAL_SECONDS);
|
||||
}
|
||||
});
|
||||
|
||||
$response->headers->set('Content-Type', 'text/event-stream');
|
||||
$response->headers->set('Cache-Control', 'no-cache');
|
||||
$response->headers->set('Cache-Control', 'no-cache, no-store, must-revalidate');
|
||||
$response->headers->set('Pragma', 'no-cache');
|
||||
$response->headers->set('Expires', '0');
|
||||
$response->headers->set('Connection', 'keep-alive');
|
||||
$response->headers->set('X-Accel-Buffering', 'no');
|
||||
|
||||
return $response;
|
||||
}
|
||||
|
||||
private static function disableOutputBuffering(): void
|
||||
{
|
||||
while (ob_get_level() > 0) {
|
||||
@ob_end_flush();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $data
|
||||
*/
|
||||
private static function sendEvent(string $event, array $data): void
|
||||
{
|
||||
$json = json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
|
||||
if (!is_string($json)) {
|
||||
$json = '{"error":"json_encode_failed"}';
|
||||
}
|
||||
|
||||
echo 'event: ' . $event . "\n";
|
||||
echo 'data: ' . $json . "\n\n";
|
||||
|
||||
@ob_flush();
|
||||
@flush();
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,7 @@ use Doctrine\ORM\Mapping as ORM;
|
||||
|
||||
#[ORM\Entity]
|
||||
#[ORM\Table(name: 'document_tag')]
|
||||
#[ORM\Index(name: 'idx_document_tag_tag_id', columns: ['tag_id'])]
|
||||
class DocumentTag
|
||||
{
|
||||
#[ORM\Id]
|
||||
@@ -22,8 +23,8 @@ class DocumentTag
|
||||
|
||||
public function __construct(Document $document, Tag $tag)
|
||||
{
|
||||
$this->document = $document;
|
||||
$this->tag = $tag;
|
||||
$this->setDocument($document);
|
||||
$this->setTag($tag);
|
||||
}
|
||||
|
||||
public function getDocument(): Document
|
||||
@@ -35,4 +36,20 @@ class DocumentTag
|
||||
{
|
||||
return $this->tag;
|
||||
}
|
||||
|
||||
public function isSameRelation(Document $document, Tag $tag): bool
|
||||
{
|
||||
return $this->document->getId()->equals($document->getId())
|
||||
&& $this->tag->getId()->equals($tag->getId());
|
||||
}
|
||||
|
||||
private function setDocument(Document $document): void
|
||||
{
|
||||
$this->document = $document;
|
||||
}
|
||||
|
||||
private function setTag(Tag $tag): void
|
||||
{
|
||||
$this->tag = $tag;
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,12 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Entity;
|
||||
|
||||
use App\Tag\TagTypes;
|
||||
use Doctrine\ORM\Mapping as ORM;
|
||||
use InvalidArgumentException;
|
||||
use Symfony\Component\Uid\Uuid;
|
||||
|
||||
#[ORM\Entity]
|
||||
@@ -24,25 +28,25 @@ class Tag
|
||||
#[ORM\Column(type: 'text', nullable: true)]
|
||||
private ?string $description = null;
|
||||
|
||||
/**
|
||||
* NEU: Governance-Typ des Tags
|
||||
* - generic
|
||||
* - catalog_entity
|
||||
*/
|
||||
#[ORM\Column(length: 50)]
|
||||
private string $type = 'generic';
|
||||
private string $type = TagTypes::GENERIC;
|
||||
|
||||
#[ORM\Column]
|
||||
private \DateTimeImmutable $createdAt;
|
||||
|
||||
public function __construct(string $slug, string $label, ?string $description = null)
|
||||
{
|
||||
public function __construct(
|
||||
string $slug,
|
||||
string $label,
|
||||
?string $description = null,
|
||||
string $type = TagTypes::GENERIC,
|
||||
) {
|
||||
$this->id = Uuid::v4();
|
||||
$this->createdAt = new \DateTimeImmutable();
|
||||
|
||||
$this->slug = $slug;
|
||||
$this->label = $label;
|
||||
$this->description = $description;
|
||||
$this->setSlug($slug);
|
||||
$this->setLabel($label);
|
||||
$this->setDescription($description);
|
||||
$this->setType($type);
|
||||
}
|
||||
|
||||
public function getId(): Uuid
|
||||
@@ -57,7 +61,14 @@ class Tag
|
||||
|
||||
public function setSlug(string $slug): static
|
||||
{
|
||||
$slug = $this->normalizeSlug($slug);
|
||||
|
||||
if ($slug === '') {
|
||||
throw new InvalidArgumentException('Tag slug must not be empty.');
|
||||
}
|
||||
|
||||
$this->slug = $slug;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
@@ -68,7 +79,14 @@ class Tag
|
||||
|
||||
public function setLabel(string $label): static
|
||||
{
|
||||
$label = trim($label);
|
||||
|
||||
if ($label === '') {
|
||||
throw new InvalidArgumentException('Tag label must not be empty.');
|
||||
}
|
||||
|
||||
$this->label = $label;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
@@ -79,7 +97,9 @@ class Tag
|
||||
|
||||
public function setDescription(?string $description): static
|
||||
{
|
||||
$this->description = $description;
|
||||
$description = trim((string) $description);
|
||||
$this->description = $description !== '' ? $description : null;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
@@ -90,13 +110,43 @@ class Tag
|
||||
|
||||
public function setType(string $type): static
|
||||
{
|
||||
$type = trim($type);
|
||||
$this->type = $type !== '' ? $type : 'generic';
|
||||
$normalizedType = TagTypes::normalize($type);
|
||||
|
||||
if (!TagTypes::isValid($normalizedType)) {
|
||||
throw new InvalidArgumentException(sprintf('Unsupported tag type "%s".', $type));
|
||||
}
|
||||
|
||||
$this->type = $normalizedType;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function isGeneric(): bool
|
||||
{
|
||||
return $this->type === TagTypes::GENERIC;
|
||||
}
|
||||
|
||||
public function isCatalogEntity(): bool
|
||||
{
|
||||
return $this->type === TagTypes::CATALOG_ENTITY;
|
||||
}
|
||||
|
||||
public function isSalesSignal(): bool
|
||||
{
|
||||
return $this->type === TagTypes::SALES_SIGNAL;
|
||||
}
|
||||
|
||||
public function getCreatedAt(): \DateTimeImmutable
|
||||
{
|
||||
return $this->createdAt;
|
||||
}
|
||||
|
||||
private function normalizeSlug(string $slug): string
|
||||
{
|
||||
$slug = mb_strtolower(trim($slug));
|
||||
$slug = preg_replace('/\s+/u', '-', $slug) ?? $slug;
|
||||
$slug = preg_replace('/-+/u', '-', $slug) ?? $slug;
|
||||
|
||||
return trim($slug, '-');
|
||||
}
|
||||
}
|
||||
@@ -9,14 +9,16 @@ use Symfony\Component\Uid\Uuid;
|
||||
|
||||
#[ORM\Entity]
|
||||
#[ORM\Table(name: 'tag_rebuild_job')]
|
||||
#[ORM\Index(columns: ['status'], name: 'idx_tag_rebuild_job_status')]
|
||||
#[ORM\Index(columns: ['created_at'], name: 'idx_tag_rebuild_job_created_at')]
|
||||
#[ORM\Index(name: 'idx_tag_rebuild_job_status', columns: ['status'])]
|
||||
#[ORM\Index(name: 'idx_tag_rebuild_job_created_at', columns: ['created_at'])]
|
||||
class TagRebuildJob
|
||||
{
|
||||
public const STATUS_QUEUED = 'QUEUED';
|
||||
public const STATUS_RUNNING = 'RUNNING';
|
||||
public const STATUS_QUEUED = 'QUEUED';
|
||||
public const STATUS_RUNNING = 'RUNNING';
|
||||
public const STATUS_COMPLETED = 'COMPLETED';
|
||||
public const STATUS_FAILED = 'FAILED';
|
||||
public const STATUS_FAILED = 'FAILED';
|
||||
|
||||
private const ERROR_MESSAGE_MAX_LENGTH = 4000;
|
||||
|
||||
#[ORM\Id]
|
||||
#[ORM\Column(type: 'uuid', unique: true)]
|
||||
@@ -44,6 +46,19 @@ class TagRebuildJob
|
||||
$this->status = self::STATUS_QUEUED;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return list<string>
|
||||
*/
|
||||
public static function statuses(): array
|
||||
{
|
||||
return [
|
||||
self::STATUS_QUEUED,
|
||||
self::STATUS_RUNNING,
|
||||
self::STATUS_COMPLETED,
|
||||
self::STATUS_FAILED,
|
||||
];
|
||||
}
|
||||
|
||||
public function getId(): Uuid
|
||||
{
|
||||
return $this->id;
|
||||
@@ -54,24 +69,59 @@ class TagRebuildJob
|
||||
return $this->status;
|
||||
}
|
||||
|
||||
public function isQueued(): bool
|
||||
{
|
||||
return $this->status === self::STATUS_QUEUED;
|
||||
}
|
||||
|
||||
public function isRunning(): bool
|
||||
{
|
||||
return $this->status === self::STATUS_RUNNING;
|
||||
}
|
||||
|
||||
public function isCompleted(): bool
|
||||
{
|
||||
return $this->status === self::STATUS_COMPLETED;
|
||||
}
|
||||
|
||||
public function isFailed(): bool
|
||||
{
|
||||
return $this->status === self::STATUS_FAILED;
|
||||
}
|
||||
|
||||
public function isActive(): bool
|
||||
{
|
||||
return $this->isQueued() || $this->isRunning();
|
||||
}
|
||||
|
||||
public function markRunning(): void
|
||||
{
|
||||
$this->status = self::STATUS_RUNNING;
|
||||
$this->startedAt = new \DateTimeImmutable();
|
||||
$this->finishedAt = null;
|
||||
$this->errorMessage = null;
|
||||
}
|
||||
|
||||
public function markCompleted(): void
|
||||
{
|
||||
if ($this->startedAt === null) {
|
||||
$this->startedAt = new \DateTimeImmutable();
|
||||
}
|
||||
|
||||
$this->status = self::STATUS_COMPLETED;
|
||||
$this->finishedAt = new \DateTimeImmutable();
|
||||
$this->errorMessage = null;
|
||||
}
|
||||
|
||||
public function markFailed(string $message): void
|
||||
{
|
||||
if ($this->startedAt === null) {
|
||||
$this->startedAt = new \DateTimeImmutable();
|
||||
}
|
||||
|
||||
$this->status = self::STATUS_FAILED;
|
||||
$this->finishedAt = new \DateTimeImmutable();
|
||||
$this->errorMessage = $message;
|
||||
$this->errorMessage = $this->normalizeErrorMessage($message);
|
||||
}
|
||||
|
||||
public function getCreatedAt(): \DateTimeImmutable
|
||||
@@ -93,4 +143,19 @@ class TagRebuildJob
|
||||
{
|
||||
return $this->errorMessage;
|
||||
}
|
||||
|
||||
private function normalizeErrorMessage(string $message): ?string
|
||||
{
|
||||
$message = trim($message);
|
||||
|
||||
if ($message === '') {
|
||||
return 'Unknown tag rebuild failure.';
|
||||
}
|
||||
|
||||
if (mb_strlen($message) > self::ERROR_MESSAGE_MAX_LENGTH) {
|
||||
$message = mb_substr($message, 0, self::ERROR_MESSAGE_MAX_LENGTH);
|
||||
}
|
||||
|
||||
return $message;
|
||||
}
|
||||
}
|
||||
@@ -6,82 +6,132 @@ namespace App\Intent;
|
||||
|
||||
use App\Config\CatalogIntentConfig;
|
||||
use App\Knowledge\Retrieval\QueryCleaner;
|
||||
use App\Tag\TagVectorSearchClient;
|
||||
use App\Tag\TagTypes;
|
||||
use App\Tag\TagVectorSearchClient;
|
||||
|
||||
/**
|
||||
* CatalogIntentLite
|
||||
* Lightweight catalog entity detector.
|
||||
*
|
||||
* Reiner Entity-Detector.
|
||||
*
|
||||
* Verantwortlich nur für:
|
||||
* - Vector-Tag-Erkennung
|
||||
* - Score-Gate
|
||||
* - Ambiguity-Check
|
||||
* - Sicherstellen, dass TagType = catalog_entity
|
||||
*
|
||||
* KEIN:
|
||||
* - Listen-Signal
|
||||
* - SalesIntent
|
||||
* - Routing
|
||||
* Responsibilities:
|
||||
* - clean the user query for tag lookup
|
||||
* - query the tag vector index
|
||||
* - keep only catalog_entity hits
|
||||
* - apply confidence and ambiguity gates
|
||||
* - return one canonical entity label or null
|
||||
*/
|
||||
final readonly class CatalogIntentLite
|
||||
{
|
||||
/**
|
||||
* Slightly wider than the old top-3 search so generic tags do not crowd out
|
||||
* relevant catalog_entity hits too easily.
|
||||
*/
|
||||
private const SEARCH_LIMIT = 6;
|
||||
|
||||
public function __construct(
|
||||
private TagVectorSearchClient $tagVectorClient,
|
||||
private QueryCleaner $queryCleaner
|
||||
) {}
|
||||
private QueryCleaner $queryCleaner,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gibt das canonical Label der erkannten catalog_entity zurück
|
||||
* oder null, wenn kein sauberer Treffer.
|
||||
* Returns the canonical normalized label of the detected catalog entity,
|
||||
* or null when no safe entity match exists.
|
||||
*/
|
||||
public function detect(string $prompt): ?string
|
||||
{
|
||||
$prompt = trim($prompt);
|
||||
|
||||
if ($prompt === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$promptTag = $this->queryCleaner->clean($prompt);
|
||||
$cleanQuery = trim($this->queryCleaner->clean($prompt));
|
||||
|
||||
// 1) Tag-Vector-Suche
|
||||
$hits = $this->tagVectorClient->search($promptTag, 3);
|
||||
|
||||
if ($hits === []) {
|
||||
if ($cleanQuery === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$best = $hits[0];
|
||||
$bestScore = (float)($best['score'] ?? 0.0);
|
||||
$catalogHits = $this->filterCatalogEntityHits(
|
||||
$this->tagVectorClient->search($cleanQuery, self::SEARCH_LIMIT)
|
||||
);
|
||||
|
||||
if ($catalogHits === []) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$best = $catalogHits[0];
|
||||
$bestScore = (float) ($best['score'] ?? 0.0);
|
||||
|
||||
// 2) Score-Tags
|
||||
if ($bestScore < CatalogIntentConfig::MIN_SCORE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 3) Ambiguity-Check
|
||||
if (isset($hits[1])) {
|
||||
$secondScore = (float)($hits[1]['score'] ?? 0.0);
|
||||
if (isset($catalogHits[1])) {
|
||||
$secondScore = (float) ($catalogHits[1]['score'] ?? 0.0);
|
||||
|
||||
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// 4) Nur catalog_entity zulassen
|
||||
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
|
||||
return null;
|
||||
$label = $this->normalizeLabel((string) ($best['label'] ?? ''));
|
||||
|
||||
return $label !== '' ? $label : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array{
|
||||
* tag_id:string,
|
||||
* score:float,
|
||||
* label?:string,
|
||||
* tag_type?:string
|
||||
* }> $hits
|
||||
*
|
||||
* @return list<array{
|
||||
* tag_id:string,
|
||||
* score:float,
|
||||
* label?:string,
|
||||
* tag_type:string
|
||||
* }>
|
||||
*/
|
||||
private function filterCatalogEntityHits(array $hits): array
|
||||
{
|
||||
$filtered = [];
|
||||
|
||||
foreach ($hits as $hit) {
|
||||
$tagId = trim((string) ($hit['tag_id'] ?? ''));
|
||||
$score = (float) ($hit['score'] ?? 0.0);
|
||||
$tagType = TagTypes::normalize((string) ($hit['tag_type'] ?? TagTypes::GENERIC));
|
||||
|
||||
if ($tagId === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($tagType !== TagTypes::CATALOG_ENTITY) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$filtered[] = [
|
||||
'tag_id' => $tagId,
|
||||
'score' => $score,
|
||||
'label' => isset($hit['label']) ? (string) $hit['label'] : null,
|
||||
'tag_type' => $tagType,
|
||||
];
|
||||
}
|
||||
|
||||
// 5) Canonical Label
|
||||
$label = trim((string)($best['label'] ?? ''));
|
||||
usort(
|
||||
$filtered,
|
||||
static fn (array $left, array $right): int => ($right['score'] <=> $left['score'])
|
||||
);
|
||||
|
||||
if ($label === '') {
|
||||
return null;
|
||||
}
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
return mb_strtolower($label);
|
||||
private function normalizeLabel(string $label): string
|
||||
{
|
||||
$label = mb_strtolower(trim($label));
|
||||
$label = preg_replace('/\s+/u', ' ', $label) ?? $label;
|
||||
|
||||
return trim($label);
|
||||
}
|
||||
}
|
||||
@@ -8,65 +8,99 @@ use App\Entity\Document;
|
||||
use App\Entity\Tag;
|
||||
use App\Service\TagRebuildJobService;
|
||||
use App\Tag\TagService;
|
||||
use App\Tag\TagTypes;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use RuntimeException;
|
||||
|
||||
final class DocumentTagAdminService
|
||||
final readonly class DocumentTagAdminService
|
||||
{
|
||||
public function __construct(
|
||||
private readonly EntityManagerInterface $em,
|
||||
private readonly TagService $tagService,
|
||||
private readonly TagRebuildJobService $jobs,
|
||||
) {}
|
||||
private EntityManagerInterface $em,
|
||||
private TagService $tagService,
|
||||
private TagRebuildJobService $jobs,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{
|
||||
* document: Document,
|
||||
* allTags: list<Tag>,
|
||||
* latestJob: mixed
|
||||
* latestJob: mixed,
|
||||
* hasActiveJob: bool
|
||||
* }
|
||||
*/
|
||||
public function getEditData(string $documentId): array
|
||||
{
|
||||
$document = $this->em->getRepository(Document::class)->find($documentId);
|
||||
if (!$document instanceof Document) {
|
||||
throw new \RuntimeException('Document not found');
|
||||
}
|
||||
$document = $this->findDocumentById($documentId);
|
||||
|
||||
/** @var list<Tag> $allTags */
|
||||
$allTags = $this->em->createQueryBuilder()
|
||||
->select('t')
|
||||
->from(Tag::class, 't')
|
||||
->orderBy('t.label', 'ASC')
|
||||
->getQuery()
|
||||
->getResult();
|
||||
$allTags = $this->em->getRepository(Tag::class)->findAll();
|
||||
|
||||
$latestJob = $this->jobs->getLatestJob();
|
||||
usort(
|
||||
$allTags,
|
||||
static function (Tag $left, Tag $right): int {
|
||||
$typeOrder = [
|
||||
TagTypes::CATALOG_ENTITY => 10,
|
||||
TagTypes::GENERIC => 20,
|
||||
TagTypes::SALES_SIGNAL => 30,
|
||||
];
|
||||
|
||||
$leftTypeRank = $typeOrder[$left->getType()] ?? 999;
|
||||
$rightTypeRank = $typeOrder[$right->getType()] ?? 999;
|
||||
|
||||
if ($leftTypeRank !== $rightTypeRank) {
|
||||
return $leftTypeRank <=> $rightTypeRank;
|
||||
}
|
||||
|
||||
$labelComparison = strcasecmp($left->getLabel(), $right->getLabel());
|
||||
|
||||
if ($labelComparison !== 0) {
|
||||
return $labelComparison;
|
||||
}
|
||||
|
||||
return strcmp($left->getSlug(), $right->getSlug());
|
||||
}
|
||||
);
|
||||
|
||||
return [
|
||||
'document' => $document,
|
||||
'allTags' => $allTags,
|
||||
'latestJob' => $latestJob,
|
||||
'latestJob' => $this->jobs->getLatestJob(),
|
||||
'hasActiveJob' => $this->jobs->hasActiveJob(),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Speichert die Tag-Auswahl für ein Dokument (inkl. Sync-Logik).
|
||||
* Persists the selected tag set for a document via the central domain service.
|
||||
*
|
||||
* @param array<mixed> $selectedTagIds
|
||||
*/
|
||||
public function saveTags(string $documentId, array $selectedTagIds): void
|
||||
{
|
||||
$document = $this->em->getRepository(Document::class)->find($documentId);
|
||||
if (!$document instanceof Document) {
|
||||
throw new \RuntimeException('Document not found');
|
||||
}
|
||||
$document = $this->findDocumentById($documentId);
|
||||
|
||||
// Delegation an deine Domain-Logik (bleibt dort, wo sie hingehört)
|
||||
$this->tagService->syncDocumentTags($document, $selectedTagIds);
|
||||
}
|
||||
|
||||
public function getLatestRebuildStatus(): ?string
|
||||
{
|
||||
$job = $this->jobs->getLatestJob();
|
||||
return $this->jobs->getLatestJob()?->getStatus();
|
||||
}
|
||||
|
||||
return $job?->getStatus();
|
||||
private function findDocumentById(string $documentId): Document
|
||||
{
|
||||
$documentId = trim($documentId);
|
||||
|
||||
if ($documentId === '') {
|
||||
throw new RuntimeException('Document not found.');
|
||||
}
|
||||
|
||||
$document = $this->em->getRepository(Document::class)->find($documentId);
|
||||
|
||||
if (!$document instanceof Document) {
|
||||
throw new RuntimeException('Document not found.');
|
||||
}
|
||||
|
||||
return $document;
|
||||
}
|
||||
}
|
||||
@@ -9,23 +9,29 @@ use App\Entity\DocumentTag;
|
||||
use App\Entity\Tag;
|
||||
use App\Service\TagRebuildJobService;
|
||||
use App\Tag\TagService;
|
||||
use App\Tag\TagTypes;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use RuntimeException;
|
||||
|
||||
final readonly class TagAdminService
|
||||
{
|
||||
public function __construct(
|
||||
private EntityManagerInterface $em,
|
||||
private TagService $tagService,
|
||||
private TagRebuildJobService $jobs,
|
||||
) {}
|
||||
private TagService $tagService,
|
||||
private TagRebuildJobService $jobs,
|
||||
) {
|
||||
}
|
||||
|
||||
public function getIndexData(): array
|
||||
{
|
||||
/** @var list<Tag> $tags */
|
||||
$tags = $this->em->getRepository(Tag::class)
|
||||
->findBy([], ['label' => 'ASC']);
|
||||
->findBy([], ['type' => 'ASC', 'label' => 'ASC']);
|
||||
|
||||
return [
|
||||
'tags' => $tags,
|
||||
'tagTypeChoices' => TagTypes::choices(),
|
||||
'documentCountByTagId' => $this->buildDocumentCountByTagId(),
|
||||
'latestJob' => $this->jobs->getLatestJob(),
|
||||
'hasActiveJob' => $this->jobs->hasActiveJob(),
|
||||
];
|
||||
@@ -35,7 +41,7 @@ final readonly class TagAdminService
|
||||
string $slug,
|
||||
string $label,
|
||||
?string $description,
|
||||
string $type = 'generic' // NEU
|
||||
string $type = TagTypes::GENERIC,
|
||||
): void {
|
||||
$this->tagService->create($slug, $label, $description, $type);
|
||||
}
|
||||
@@ -47,35 +53,47 @@ final readonly class TagAdminService
|
||||
|
||||
public function getAssignData(string $tagId): array
|
||||
{
|
||||
$tag = $this->em->getRepository(Tag::class)->find($tagId);
|
||||
$tag = $this->findTagById($tagId);
|
||||
|
||||
if (!$tag instanceof Tag) {
|
||||
throw new \RuntimeException('Tag nicht gefunden.');
|
||||
}
|
||||
|
||||
$documents = $this->em->getRepository(Document::class)->findAll();
|
||||
/** @var list<Document> $documents */
|
||||
$documents = $this->em->getRepository(Document::class)->findBy(
|
||||
['status' => Document::STATUS_ACTIVE],
|
||||
['title' => 'ASC']
|
||||
);
|
||||
|
||||
$documentsData = array_map(
|
||||
fn(Document $d) => [
|
||||
'id' => (string)$d->getId(),
|
||||
'title' => $d->getTitle(),
|
||||
static fn (Document $document): array => [
|
||||
'id' => (string) $document->getId(),
|
||||
'title' => $document->getTitle(),
|
||||
],
|
||||
$documents
|
||||
);
|
||||
|
||||
/** @var list<DocumentTag> $existingRelations */
|
||||
$existingRelations = $this->em
|
||||
->getRepository(DocumentTag::class)
|
||||
->findBy(['tag' => $tag]);
|
||||
|
||||
$assignedDocIds = array_map(
|
||||
fn(DocumentTag $dt) => (string)$dt->getDocument()->getId(),
|
||||
$existingRelations
|
||||
$activeDocumentIds = array_map(
|
||||
static fn (Document $document): string => (string) $document->getId(),
|
||||
$documents
|
||||
);
|
||||
|
||||
$assignedDocIds = [];
|
||||
|
||||
foreach ($existingRelations as $relation) {
|
||||
$documentId = (string) $relation->getDocument()->getId();
|
||||
|
||||
if (in_array($documentId, $activeDocumentIds, true)) {
|
||||
$assignedDocIds[] = $documentId;
|
||||
}
|
||||
}
|
||||
|
||||
return [
|
||||
'tag' => $tag,
|
||||
'documents' => $documentsData,
|
||||
'assignedDocIds' => $assignedDocIds,
|
||||
'assignedDocIds' => array_values(array_unique($assignedDocIds)),
|
||||
'tagTypeChoices' => TagTypes::choices(),
|
||||
'latestJob' => $this->jobs->getLatestJob(),
|
||||
'hasActiveJob' => $this->jobs->hasActiveJob(),
|
||||
];
|
||||
@@ -83,12 +101,55 @@ final readonly class TagAdminService
|
||||
|
||||
public function syncAssignments(string $tagId, array $selectedDocIds): void
|
||||
{
|
||||
$tag = $this->findTagById($tagId);
|
||||
$this->tagService->syncTagDocuments($tag, $selectedDocIds);
|
||||
}
|
||||
|
||||
private function findTagById(string $tagId): Tag
|
||||
{
|
||||
$tagId = trim($tagId);
|
||||
|
||||
if ($tagId === '') {
|
||||
throw new RuntimeException('Tag nicht gefunden.');
|
||||
}
|
||||
|
||||
$tag = $this->em->getRepository(Tag::class)->find($tagId);
|
||||
|
||||
if (!$tag instanceof Tag) {
|
||||
throw new \RuntimeException('Tag nicht gefunden.');
|
||||
throw new RuntimeException('Tag nicht gefunden.');
|
||||
}
|
||||
|
||||
$this->tagService->syncTagDocuments($tag, $selectedDocIds);
|
||||
return $tag;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, int>
|
||||
*/
|
||||
private function buildDocumentCountByTagId(): array
|
||||
{
|
||||
$rows = $this->em->createQueryBuilder()
|
||||
->select('t AS tag', 'COUNT(d.id) AS documentCount')
|
||||
->from(Tag::class, 't')
|
||||
->leftJoin(DocumentTag::class, 'dt', 'WITH', 'dt.tag = t')
|
||||
->leftJoin('dt.document', 'd', 'WITH', 'd.status = :status')
|
||||
->groupBy('t.id')
|
||||
->setParameter('status', Document::STATUS_ACTIVE)
|
||||
->getQuery()
|
||||
->getResult();
|
||||
|
||||
$counts = [];
|
||||
|
||||
foreach ($rows as $row) {
|
||||
$tag = $row[0] ?? $row['tag'] ?? null;
|
||||
$documentCount = (int) ($row['documentCount'] ?? 0);
|
||||
|
||||
if (!$tag instanceof Tag) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$counts[$tag->getId()->toRfc4122()] = $documentCount;
|
||||
}
|
||||
|
||||
return $counts;
|
||||
}
|
||||
}
|
||||
@@ -1,29 +1,33 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Service;
|
||||
|
||||
use App\Entity\Document;
|
||||
use App\Entity\DocumentVersion;
|
||||
use App\Entity\User;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use RuntimeException;
|
||||
|
||||
class DocumentService
|
||||
final readonly class DocumentService
|
||||
{
|
||||
public function __construct(
|
||||
private EntityManagerInterface $em,
|
||||
) {}
|
||||
private TagRebuildJobService $tagRebuildJobService,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Erstellt ein neues Dokument inkl. Version 1
|
||||
* Creates a new document including version 1.
|
||||
*/
|
||||
public function createDocument(
|
||||
string $title,
|
||||
string $filePath,
|
||||
User $user
|
||||
): Document {
|
||||
|
||||
$document = new Document();
|
||||
$document->setTitle($title);
|
||||
$document->setTitle(trim($title));
|
||||
$document->setCreatedBy($user);
|
||||
|
||||
$version = new DocumentVersion();
|
||||
@@ -44,14 +48,13 @@ class DocumentService
|
||||
}
|
||||
|
||||
/**
|
||||
* Fügt neue Version hinzu (immutable)
|
||||
* Adds a new immutable version to an existing document.
|
||||
*/
|
||||
public function addVersion(
|
||||
Document $document,
|
||||
string $filePath,
|
||||
User $user
|
||||
): DocumentVersion {
|
||||
|
||||
$nextVersionNumber = $this->getNextVersionNumber($document);
|
||||
|
||||
$version = new DocumentVersion();
|
||||
@@ -70,7 +73,7 @@ class DocumentService
|
||||
}
|
||||
|
||||
/**
|
||||
* Aktiviert eine Version
|
||||
* Activates a document version and marks it for re-ingest.
|
||||
*/
|
||||
public function activateVersion(DocumentVersion $version): void
|
||||
{
|
||||
@@ -82,41 +85,77 @@ class DocumentService
|
||||
|
||||
$version->setActive(true);
|
||||
$document->setCurrentVersion($version);
|
||||
|
||||
$version->setIngestStatus(DocumentVersion::INGEST_PENDING);
|
||||
|
||||
$this->em->flush();
|
||||
}
|
||||
|
||||
/**
|
||||
* Archiviert Dokument
|
||||
* Archives a document.
|
||||
*
|
||||
* If the document had tag assignments, the tag index is rebuilt so the
|
||||
* routing layer no longer works with an outdated active document set.
|
||||
*/
|
||||
public function archive(Document $document): void
|
||||
{
|
||||
if ($document->getStatus() === Document::STATUS_ARCHIVED) {
|
||||
return;
|
||||
}
|
||||
|
||||
$shouldRebuildTags = $this->hasTagAssignments($document);
|
||||
|
||||
$document->archive();
|
||||
$this->em->flush();
|
||||
}
|
||||
|
||||
public function delete(Document $document): void
|
||||
{
|
||||
$this->em->remove($document);
|
||||
$this->em->flush();
|
||||
if ($shouldRebuildTags) {
|
||||
$this->triggerTagRebuildIfIdle();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Berechnet SHA256 Checksum
|
||||
* Deletes a document.
|
||||
*
|
||||
* If the document had tag assignments, the tag index is rebuilt after the
|
||||
* removal so stale document references disappear from tag-based routing.
|
||||
*/
|
||||
public function delete(Document $document): void
|
||||
{
|
||||
$shouldRebuildTags = $this->hasTagAssignments($document);
|
||||
|
||||
$this->em->remove($document);
|
||||
$this->em->flush();
|
||||
|
||||
if ($shouldRebuildTags) {
|
||||
$this->triggerTagRebuildIfIdle();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the SHA256 checksum for a file path.
|
||||
*/
|
||||
private function calculateChecksum(string $filePath): string
|
||||
{
|
||||
if (!file_exists($filePath)) {
|
||||
throw new \RuntimeException('File not found for checksum.');
|
||||
$filePath = trim($filePath);
|
||||
|
||||
if ($filePath === '') {
|
||||
throw new RuntimeException('File path must not be empty.');
|
||||
}
|
||||
|
||||
return hash_file('sha256', $filePath);
|
||||
if (!is_file($filePath)) {
|
||||
throw new RuntimeException('File not found for checksum.');
|
||||
}
|
||||
|
||||
$checksum = hash_file('sha256', $filePath);
|
||||
|
||||
if ($checksum === false) {
|
||||
throw new RuntimeException('Could not calculate file checksum.');
|
||||
}
|
||||
|
||||
return $checksum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ermittelt nächste Versionsnummer
|
||||
* Determines the next version number for a document.
|
||||
*/
|
||||
private function getNextVersionNumber(Document $document): int
|
||||
{
|
||||
@@ -128,4 +167,16 @@ class DocumentService
|
||||
|
||||
return $max + 1;
|
||||
}
|
||||
}
|
||||
|
||||
private function hasTagAssignments(Document $document): bool
|
||||
{
|
||||
return $document->getDocumentTags()->count() > 0;
|
||||
}
|
||||
|
||||
private function triggerTagRebuildIfIdle(): void
|
||||
{
|
||||
if (!$this->tagRebuildJobService->hasActiveJob()) {
|
||||
$this->tagRebuildJobService->enqueueAndStartAsync();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -11,16 +11,24 @@ use Psr\Log\LoggerInterface;
|
||||
final readonly class TagRebuildJobService
|
||||
{
|
||||
/**
|
||||
* Wenn ein QUEUED-Job länger nicht startet, gilt er als "stale" und wird auf FAILED gesetzt,
|
||||
* damit das System nicht dauerhaft blockiert.
|
||||
* If a QUEUED job does not transition into RUNNING in time,
|
||||
* it is treated as stale so the system does not stay blocked forever.
|
||||
*/
|
||||
private const STALE_QUEUED_AFTER_SECONDS = 300; // 5 Minuten
|
||||
private const STALE_QUEUED_AFTER_SECONDS = 300;
|
||||
|
||||
/**
|
||||
* The background runner should switch the job from QUEUED to RUNNING almost
|
||||
* immediately because markRunning() happens at the top of the command.
|
||||
*/
|
||||
private const ASYNC_START_TIMEOUT_SECONDS = 3;
|
||||
private const ASYNC_START_POLL_INTERVAL_MICROSECONDS = 250000;
|
||||
|
||||
public function __construct(
|
||||
private EntityManagerInterface $em,
|
||||
private LoggerInterface $agentLogger,
|
||||
private string $projectDir,
|
||||
) {}
|
||||
private LoggerInterface $agentLogger,
|
||||
private string $projectDir,
|
||||
) {
|
||||
}
|
||||
|
||||
public function enqueueAndStartAsync(): TagRebuildJob
|
||||
{
|
||||
@@ -29,14 +37,25 @@ final readonly class TagRebuildJobService
|
||||
$this->em->persist($job);
|
||||
$this->em->flush();
|
||||
|
||||
$this->startAsync($job);
|
||||
try {
|
||||
$this->startAsync($job);
|
||||
} catch (\Throwable $e) {
|
||||
$job->markFailed('Async tag rebuild start failed: ' . $e->getMessage());
|
||||
$this->em->flush();
|
||||
|
||||
$this->agentLogger->error('[tags] async job start failed', [
|
||||
'job' => (string) $job->getId(),
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
|
||||
throw $e;
|
||||
}
|
||||
|
||||
return $job;
|
||||
}
|
||||
|
||||
public function enqueueIfIdle(): ?TagRebuildJob
|
||||
{
|
||||
// Coalescing: Wenn ein Job läuft oder queued ist -> nichts tun
|
||||
if ($this->hasActiveJob()) {
|
||||
return null;
|
||||
}
|
||||
@@ -44,23 +63,18 @@ final readonly class TagRebuildJobService
|
||||
return $this->enqueueAndStartAsync();
|
||||
}
|
||||
|
||||
/**
|
||||
* Letzter Job (egal welcher Status).
|
||||
*/
|
||||
public function getLatestJob(): ?TagRebuildJob
|
||||
{
|
||||
return $this->em->createQueryBuilder()
|
||||
->select('j')
|
||||
->from(TagRebuildJob::class, 'j')
|
||||
->orderBy('j.createdAt', 'DESC')
|
||||
->addOrderBy('j.id', 'DESC')
|
||||
->setMaxResults(1)
|
||||
->getQuery()
|
||||
->getOneOrNullResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Letzter Job mit Status COMPLETED.
|
||||
*/
|
||||
public function getLatestCompletedJob(): ?TagRebuildJob
|
||||
{
|
||||
return $this->em->createQueryBuilder()
|
||||
@@ -69,18 +83,12 @@ final readonly class TagRebuildJobService
|
||||
->where('j.status = :status')
|
||||
->setParameter('status', TagRebuildJob::STATUS_COMPLETED)
|
||||
->orderBy('j.createdAt', 'DESC')
|
||||
->addOrderBy('j.id', 'DESC')
|
||||
->setMaxResults(1)
|
||||
->getQuery()
|
||||
->getOneOrNullResult();
|
||||
}
|
||||
|
||||
/**
|
||||
* Ob gerade ein Job aktiv ist:
|
||||
* - RUNNING ist immer aktiv
|
||||
* - QUEUED ist nur aktiv, wenn er nicht stale ist
|
||||
*
|
||||
* Zusätzlich: stale QUEUED Jobs werden auf FAILED gesetzt (Recovery).
|
||||
*/
|
||||
public function hasActiveJob(): bool
|
||||
{
|
||||
$this->markStaleQueuedJobsFailed();
|
||||
@@ -106,31 +114,33 @@ final readonly class TagRebuildJobService
|
||||
return (int) $qb->getQuery()->getSingleScalarResult() > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Startet den Job async über bin/console.
|
||||
* Wichtige Fixes:
|
||||
* - php explizit verwenden
|
||||
* - --no-interaction
|
||||
* - Logfile statt /dev/null
|
||||
*/
|
||||
private function startAsync(TagRebuildJob $job): void
|
||||
{
|
||||
$projectDir = rtrim($this->projectDir, '/');
|
||||
$console = $projectDir . '/bin/console';
|
||||
$projectDir = rtrim(trim($this->projectDir), '/');
|
||||
$console = $projectDir . '/bin/console';
|
||||
|
||||
if ($projectDir === '' || !is_dir($projectDir)) {
|
||||
throw new \RuntimeException('Project directory is invalid.');
|
||||
}
|
||||
|
||||
if (!is_file($console)) {
|
||||
throw new \RuntimeException('bin/console not found: ' . $console);
|
||||
}
|
||||
|
||||
$phpBinary = $this->resolvePhpBinary();
|
||||
$jobId = (string) $job->getId();
|
||||
|
||||
$logDir = $projectDir . '/var/log/tags';
|
||||
if (!is_dir($logDir)) {
|
||||
@mkdir($logDir, 0777, true);
|
||||
if (!is_dir($logDir) && !@mkdir($logDir, 0775, true) && !is_dir($logDir)) {
|
||||
throw new \RuntimeException('Could not create tag job log directory.');
|
||||
}
|
||||
|
||||
$logFile = $logDir . '/job_' . $jobId . '.log';
|
||||
|
||||
// Robust: cd ins Projekt, dann nohup php bin/console ...
|
||||
$cmd = sprintf(
|
||||
'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 &',
|
||||
'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 & echo $!',
|
||||
escapeshellarg($projectDir),
|
||||
escapeshellcmd('php'),
|
||||
escapeshellarg($phpBinary),
|
||||
escapeshellarg($console),
|
||||
escapeshellarg('mto:agent:tags:job:run'),
|
||||
escapeshellarg($jobId),
|
||||
@@ -141,15 +151,92 @@ final readonly class TagRebuildJobService
|
||||
'job' => $jobId,
|
||||
'cmd' => $cmd,
|
||||
'log' => $logFile,
|
||||
'php_binary' => $phpBinary,
|
||||
]);
|
||||
|
||||
@exec($cmd);
|
||||
$output = [];
|
||||
$exitCode = 0;
|
||||
@exec($cmd, $output, $exitCode);
|
||||
|
||||
$pid = isset($output[0]) ? trim((string) $output[0]) : '';
|
||||
|
||||
if ($exitCode !== 0) {
|
||||
throw new \RuntimeException('Async process bootstrap failed with exit code ' . $exitCode . '.');
|
||||
}
|
||||
|
||||
if ($pid === '' || !ctype_digit($pid)) {
|
||||
throw new \RuntimeException('Async process bootstrap did not return a valid PID.');
|
||||
}
|
||||
|
||||
$this->agentLogger->info('[tags] async job process started', [
|
||||
'job' => $jobId,
|
||||
'pid' => $pid,
|
||||
'log' => $logFile,
|
||||
'php_binary' => $phpBinary,
|
||||
]);
|
||||
|
||||
$this->waitForAsyncJobTransition($job, $logFile);
|
||||
}
|
||||
|
||||
private function resolvePhpBinary(): string
|
||||
{
|
||||
$envCandidates = [
|
||||
trim((string) ($_SERVER['PHP_CLI_BINARY'] ?? '')),
|
||||
trim((string) ($_ENV['PHP_CLI_BINARY'] ?? '')),
|
||||
trim((string) getenv('PHP_CLI_BINARY')),
|
||||
];
|
||||
|
||||
foreach ($envCandidates as $candidate) {
|
||||
if ($this->isValidCliPhpBinary($candidate)) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
$phpBinary = defined('PHP_BINARY') ? trim((string) PHP_BINARY) : '';
|
||||
if ($this->isValidCliPhpBinary($phpBinary)) {
|
||||
return $phpBinary;
|
||||
}
|
||||
|
||||
$fallbackCandidates = [
|
||||
'/usr/bin/php',
|
||||
'/usr/local/bin/php',
|
||||
'/bin/php',
|
||||
'/opt/homebrew/bin/php',
|
||||
];
|
||||
|
||||
foreach ($fallbackCandidates as $candidate) {
|
||||
if ($this->isValidCliPhpBinary($candidate)) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
$whichPhp = trim((string) @shell_exec('command -v php 2>/dev/null'));
|
||||
if ($this->isValidCliPhpBinary($whichPhp)) {
|
||||
return $whichPhp;
|
||||
}
|
||||
|
||||
throw new \RuntimeException(
|
||||
'Could not resolve a CLI PHP binary. Set PHP_CLI_BINARY explicitly, e.g. /usr/bin/php.'
|
||||
);
|
||||
}
|
||||
|
||||
private function isValidCliPhpBinary(string $path): bool
|
||||
{
|
||||
$path = trim($path);
|
||||
|
||||
if ($path === '' || !is_file($path) || !is_executable($path)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$basename = strtolower(basename($path));
|
||||
|
||||
if (str_contains($basename, 'fpm') || str_contains($basename, 'cgi')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recovery gegen "ewig QUEUED":
|
||||
* Setzt alte QUEUED Jobs auf FAILED, damit enqueueIfIdle() nicht dauerhaft blockiert.
|
||||
*/
|
||||
private function markStaleQueuedJobsFailed(): void
|
||||
{
|
||||
$cutoff = new \DateTimeImmutable('-' . self::STALE_QUEUED_AFTER_SECONDS . ' seconds');
|
||||
@@ -161,12 +248,13 @@ final readonly class TagRebuildJobService
|
||||
->andWhere('j.createdAt < :cutoff')
|
||||
->setParameter('queued', TagRebuildJob::STATUS_QUEUED)
|
||||
->setParameter('cutoff', $cutoff)
|
||||
->orderBy('j.createdAt', 'ASC')
|
||||
->setMaxResults(25);
|
||||
|
||||
/** @var TagRebuildJob[] $stale */
|
||||
/** @var list<TagRebuildJob> $stale */
|
||||
$stale = $qb->getQuery()->getResult();
|
||||
|
||||
if (!$stale) {
|
||||
if ($stale === []) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -183,4 +271,46 @@ final readonly class TagRebuildJobService
|
||||
|
||||
$this->em->flush();
|
||||
}
|
||||
|
||||
private function waitForAsyncJobTransition(TagRebuildJob $job, string $logFile): void
|
||||
{
|
||||
$deadline = microtime(true) + self::ASYNC_START_TIMEOUT_SECONDS;
|
||||
|
||||
while (microtime(true) < $deadline) {
|
||||
usleep(self::ASYNC_START_POLL_INTERVAL_MICROSECONDS);
|
||||
$this->em->refresh($job);
|
||||
|
||||
if (!$job->isQueued()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
$logHint = $this->readLogTail($logFile);
|
||||
|
||||
throw new \RuntimeException(
|
||||
'Async tag rebuild runner did not transition from QUEUED to RUNNING within '
|
||||
. self::ASYNC_START_TIMEOUT_SECONDS
|
||||
. ' seconds.'
|
||||
. ($logHint !== null ? ' Log tail: ' . $logHint : '')
|
||||
);
|
||||
}
|
||||
|
||||
private function readLogTail(string $logFile): ?string
|
||||
{
|
||||
if (!is_file($logFile) || !is_readable($logFile)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$content = @file_get_contents($logFile);
|
||||
|
||||
if (!is_string($content) || trim($content) === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$content = trim($content);
|
||||
$tail = mb_substr($content, -800);
|
||||
$tail = preg_replace('/\s+/u', ' ', $tail) ?? $tail;
|
||||
|
||||
return trim($tail) !== '' ? trim($tail) : null;
|
||||
}
|
||||
}
|
||||
@@ -11,29 +11,76 @@ final readonly class TagRebuildStatusProvider
|
||||
{
|
||||
public function __construct(
|
||||
private EntityManagerInterface $em
|
||||
) {}
|
||||
) {
|
||||
}
|
||||
|
||||
public function getLatestStatus(): ?array
|
||||
{
|
||||
$this->em->clear();
|
||||
|
||||
$job = $this->em->createQueryBuilder()
|
||||
->select('j')
|
||||
$row = $this->em->createQueryBuilder()
|
||||
->select(
|
||||
'j.status AS status',
|
||||
'j.createdAt AS createdAt',
|
||||
'j.startedAt AS startedAt',
|
||||
'j.finishedAt AS finishedAt',
|
||||
'j.errorMessage AS errorMessage'
|
||||
)
|
||||
->from(TagRebuildJob::class, 'j')
|
||||
->orderBy('j.createdAt', 'DESC')
|
||||
->addOrderBy('j.id', 'DESC')
|
||||
->setMaxResults(1)
|
||||
->getQuery()
|
||||
->getOneOrNullResult();
|
||||
->getOneOrNullResult(\Doctrine\ORM\Query::HYDRATE_ARRAY);
|
||||
|
||||
if (!$job instanceof TagRebuildJob) {
|
||||
if (!is_array($row)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$status = trim((string) ($row['status'] ?? ''));
|
||||
|
||||
if ($status === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
return [
|
||||
'status' => $job->getStatus(),
|
||||
'startedAt' => $job->getStartedAt()?->format(DATE_ATOM),
|
||||
'finishedAt' => $job->getFinishedAt()?->format(DATE_ATOM),
|
||||
'error' => $job->getErrorMessage(),
|
||||
'status' => $status,
|
||||
'createdAt' => $this->formatDateValue($row['createdAt'] ?? null),
|
||||
'startedAt' => $this->formatDateValue($row['startedAt'] ?? null),
|
||||
'finishedAt' => $this->formatDateValue($row['finishedAt'] ?? null),
|
||||
'error' => $this->normalizeNullableString($row['errorMessage'] ?? null),
|
||||
'hasActiveJob' => in_array($status, [
|
||||
TagRebuildJob::STATUS_QUEUED,
|
||||
TagRebuildJob::STATUS_RUNNING,
|
||||
], true),
|
||||
];
|
||||
}
|
||||
|
||||
private function formatDateValue(mixed $value): ?string
|
||||
{
|
||||
if ($value instanceof \DateTimeInterface) {
|
||||
return $value->format(DATE_ATOM);
|
||||
}
|
||||
|
||||
if (is_string($value)) {
|
||||
$value = trim($value);
|
||||
|
||||
if ($value === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return (new \DateTimeImmutable($value))->format(DATE_ATOM);
|
||||
} catch (\Throwable) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private function normalizeNullableString(mixed $value): ?string
|
||||
{
|
||||
$value = trim((string) $value);
|
||||
|
||||
return $value !== '' ? $value : null;
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Tag;
|
||||
|
||||
use App\Entity\Document;
|
||||
use App\Entity\DocumentTag;
|
||||
use App\Entity\Tag;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
@@ -12,148 +13,199 @@ final readonly class TagNdjsonExporter
|
||||
{
|
||||
public function __construct(
|
||||
private EntityManagerInterface $em,
|
||||
private string $tagsNdjsonPath,
|
||||
) {}
|
||||
private string $tagsNdjsonPath,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Export all tags into NDJSON (streaming) with atomic switch (.tmp + rename()).
|
||||
* Export all relevant tags into NDJSON (streaming) with atomic switch (.tmp + rename()).
|
||||
*
|
||||
* Line format:
|
||||
* {
|
||||
* "tag_id":"...",
|
||||
* "text":"label\nslug\noptional description",
|
||||
* "type":"catalog_entity|generic|...",
|
||||
* "type":"catalog_entity|generic|sales_signal",
|
||||
* "document_ids":["...","..."]
|
||||
* }
|
||||
*
|
||||
* Only ACTIVE document assignments are exported. Tags without active document
|
||||
* assignments are intentionally skipped so they do not influence retrieval.
|
||||
*
|
||||
* @return array{tags:int, lines:int, bytes:int, path:string}
|
||||
*/
|
||||
public function export(): array
|
||||
{
|
||||
$dir = \dirname($this->tagsNdjsonPath);
|
||||
if (!\is_dir($dir)) {
|
||||
@\mkdir($dir, 0775, true);
|
||||
}
|
||||
$this->ensureTargetDirectoryExists();
|
||||
|
||||
$tmpPath = $this->tagsNdjsonPath . '.tmp';
|
||||
$this->cleanupTemporaryFile($tmpPath);
|
||||
|
||||
$fh = @\fopen($tmpPath, 'wb');
|
||||
if (!$fh) {
|
||||
$fh = @fopen($tmpPath, 'wb');
|
||||
|
||||
if ($fh === false) {
|
||||
throw new \RuntimeException('Cannot write tags NDJSON: ' . $tmpPath);
|
||||
}
|
||||
|
||||
// 1) Load all tags
|
||||
$tags = $this->em->createQueryBuilder()
|
||||
->select('t')
|
||||
->from(Tag::class, 't')
|
||||
->orderBy('t.label', 'ASC')
|
||||
->getQuery()
|
||||
->getResult();
|
||||
try {
|
||||
/** @var list<Tag> $tags */
|
||||
$tags = $this->em->createQueryBuilder()
|
||||
->select('t')
|
||||
->from(Tag::class, 't')
|
||||
->orderBy('t.type', 'ASC')
|
||||
->addOrderBy('t.label', 'ASC')
|
||||
->getQuery()
|
||||
->getResult();
|
||||
|
||||
if (!\is_array($tags) || $tags === []) {
|
||||
\fclose($fh);
|
||||
if ($tags === []) {
|
||||
fclose($fh);
|
||||
$this->atomicReplace($tmpPath, $this->tagsNdjsonPath);
|
||||
|
||||
return [
|
||||
'tags' => 0,
|
||||
'lines' => 0,
|
||||
'bytes' => (int) @filesize($this->tagsNdjsonPath),
|
||||
'path' => $this->tagsNdjsonPath,
|
||||
];
|
||||
}
|
||||
|
||||
$tagToActiveDocs = $this->buildActiveDocumentMap();
|
||||
$lines = 0;
|
||||
|
||||
foreach ($tags as $tag) {
|
||||
$tagId = $tag->getId()->toRfc4122();
|
||||
$docIds = $tagToActiveDocs[$tagId] ?? [];
|
||||
|
||||
if ($docIds === []) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$line = [
|
||||
'tag_id' => $tagId,
|
||||
'text' => $this->buildEmbeddingText($tag),
|
||||
'type' => TagTypes::normalize($tag->getType()),
|
||||
'document_ids' => $docIds,
|
||||
];
|
||||
|
||||
$json = json_encode($line, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
|
||||
if (!is_string($json)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
fwrite($fh, $json . "\n");
|
||||
$lines++;
|
||||
}
|
||||
|
||||
fclose($fh);
|
||||
$this->atomicReplace($tmpPath, $this->tagsNdjsonPath);
|
||||
|
||||
return [
|
||||
'tags' => 0,
|
||||
'lines' => 0,
|
||||
'bytes' => (int) @\filesize($this->tagsNdjsonPath),
|
||||
'path' => $this->tagsNdjsonPath,
|
||||
'tags' => count($tags),
|
||||
'lines' => $lines,
|
||||
'bytes' => (int) @filesize($this->tagsNdjsonPath),
|
||||
'path' => $this->tagsNdjsonPath,
|
||||
];
|
||||
}
|
||||
} catch (\Throwable $e) {
|
||||
fclose($fh);
|
||||
$this->cleanupTemporaryFile($tmpPath);
|
||||
|
||||
// 2) Build tagId => docIds map
|
||||
$rows = $this->em->createQueryBuilder()
|
||||
->select('IDENTITY(dt.tag) AS tagId', 'IDENTITY(dt.document) AS docId')
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, list<string>>
|
||||
*/
|
||||
private function buildActiveDocumentMap(): array
|
||||
{
|
||||
/** @var list<DocumentTag> $relations */
|
||||
$relations = $this->em->createQueryBuilder()
|
||||
->select('dt')
|
||||
->addSelect('t', 'd')
|
||||
->from(DocumentTag::class, 'dt')
|
||||
->innerJoin('dt.tag', 't')
|
||||
->innerJoin('dt.document', 'd')
|
||||
->where('d.status = :status')
|
||||
->setParameter('status', Document::STATUS_ACTIVE)
|
||||
->getQuery()
|
||||
->getArrayResult();
|
||||
->getResult();
|
||||
|
||||
$tagToDocs = [];
|
||||
foreach ($rows as $r) {
|
||||
$tagId = (string) ($r['tagId'] ?? '');
|
||||
$docId = (string) ($r['docId'] ?? '');
|
||||
if ($tagId === '' || $docId === '') {
|
||||
continue;
|
||||
}
|
||||
$tagToDocs[$tagId][] = $docId;
|
||||
|
||||
foreach ($relations as $relation) {
|
||||
$tag = $relation->getTag();
|
||||
$document = $relation->getDocument();
|
||||
|
||||
$tagId = $tag->getId()->toRfc4122();
|
||||
$docId = $document->getId()->toRfc4122();
|
||||
|
||||
$tagToDocs[$tagId][$docId] = $docId;
|
||||
}
|
||||
|
||||
// 3) Stream NDJSON
|
||||
$lines = 0;
|
||||
|
||||
foreach ($tags as $tag) {
|
||||
if (!$tag instanceof Tag) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tagId = (string) $tag->getId();
|
||||
$docIds = $tagToDocs[$tagId] ?? [];
|
||||
|
||||
if ($docIds !== []) {
|
||||
$docIds = \array_values(\array_unique($docIds));
|
||||
}
|
||||
|
||||
// Embedding source
|
||||
$textParts = [
|
||||
$tag->getLabel(),
|
||||
$tag->getSlug(),
|
||||
];
|
||||
|
||||
$desc = $tag->getDescription();
|
||||
if (\is_string($desc) && \trim($desc) !== '') {
|
||||
$textParts[] = \trim($desc);
|
||||
}
|
||||
|
||||
$type = method_exists($tag, 'getType')
|
||||
? (string) $tag->getType()
|
||||
: 'generic';
|
||||
|
||||
if ($type === '') {
|
||||
$type = 'generic';
|
||||
}
|
||||
|
||||
$line = [
|
||||
'tag_id' => $tagId,
|
||||
'text' => \implode("\n", $textParts),
|
||||
'type' => $type, // 🔥 NEW
|
||||
'document_ids' => $docIds,
|
||||
];
|
||||
|
||||
$json = \json_encode($line, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
if (!\is_string($json)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
\fwrite($fh, $json . "\n");
|
||||
$lines++;
|
||||
foreach ($tagToDocs as $tagId => $docIds) {
|
||||
ksort($docIds);
|
||||
$tagToDocs[$tagId] = array_values($docIds);
|
||||
}
|
||||
|
||||
\fclose($fh);
|
||||
$this->atomicReplace($tmpPath, $this->tagsNdjsonPath);
|
||||
return $tagToDocs;
|
||||
}
|
||||
|
||||
return [
|
||||
'tags' => \count($tags),
|
||||
'lines' => $lines,
|
||||
'bytes' => (int) @\filesize($this->tagsNdjsonPath),
|
||||
'path' => $this->tagsNdjsonPath,
|
||||
private function buildEmbeddingText(Tag $tag): string
|
||||
{
|
||||
$parts = [
|
||||
trim($tag->getLabel()),
|
||||
trim($tag->getSlug()),
|
||||
];
|
||||
|
||||
$description = trim((string) $tag->getDescription());
|
||||
|
||||
if ($description !== '') {
|
||||
$parts[] = preg_replace('/\s+/u', ' ', $description) ?? $description;
|
||||
}
|
||||
|
||||
$parts = array_values(array_filter(
|
||||
array_unique($parts),
|
||||
static fn (string $part): bool => $part !== ''
|
||||
));
|
||||
|
||||
return implode("\n", $parts);
|
||||
}
|
||||
|
||||
private function ensureTargetDirectoryExists(): void
|
||||
{
|
||||
$dir = dirname($this->tagsNdjsonPath);
|
||||
|
||||
if (is_dir($dir)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!@mkdir($dir, 0775, true) && !is_dir($dir)) {
|
||||
throw new \RuntimeException('Cannot create tags NDJSON directory: ' . $dir);
|
||||
}
|
||||
}
|
||||
|
||||
private function cleanupTemporaryFile(string $tmpPath): void
|
||||
{
|
||||
if (is_file($tmpPath)) {
|
||||
@unlink($tmpPath);
|
||||
}
|
||||
}
|
||||
|
||||
private function atomicReplace(string $tmpPath, string $finalPath): void
|
||||
{
|
||||
if (\is_file($finalPath)) {
|
||||
@\chmod($finalPath, 0664);
|
||||
if (is_file($finalPath)) {
|
||||
@chmod($finalPath, 0664);
|
||||
}
|
||||
|
||||
if (!@\rename($tmpPath, $finalPath)) {
|
||||
if (!@\copy($tmpPath, $finalPath)) {
|
||||
@\unlink($tmpPath);
|
||||
if (!@rename($tmpPath, $finalPath)) {
|
||||
if (!@copy($tmpPath, $finalPath)) {
|
||||
@unlink($tmpPath);
|
||||
throw new \RuntimeException('Atomic replace failed for: ' . $finalPath);
|
||||
}
|
||||
@\unlink($tmpPath);
|
||||
|
||||
@unlink($tmpPath);
|
||||
}
|
||||
|
||||
@\chmod($finalPath, 0664);
|
||||
@chmod($finalPath, 0664);
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Tag;
|
||||
|
||||
use App\Entity\Document;
|
||||
use Doctrine\DBAL\ArrayParameterType;
|
||||
use Doctrine\DBAL\Exception;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
@@ -11,91 +12,239 @@ use Symfony\Component\Uid\Uuid;
|
||||
|
||||
final class TagRoutingService
|
||||
{
|
||||
/**
|
||||
* Number of raw tag hits requested from the vector service.
|
||||
*/
|
||||
private const DEFAULT_TOPK = 8;
|
||||
private const MIN_BEST_SCORE = 0.25;
|
||||
private const MAX_CANDIDATE_DOCS = 200;
|
||||
|
||||
/**
|
||||
* Hard minimum confidence required to activate tag-based document routing.
|
||||
*
|
||||
* This intentionally aligns with the tag vector client gate to avoid
|
||||
* misleading secondary thresholds in this class.
|
||||
*/
|
||||
private const MIN_BEST_SCORE = 0.72;
|
||||
|
||||
/**
|
||||
* Only keep tag hits that stay reasonably close to the best hit.
|
||||
* This reduces semantic spillover into weakly related document spaces.
|
||||
*/
|
||||
private const MAX_SCORE_DROP_FROM_BEST = 0.08;
|
||||
|
||||
/**
|
||||
* Maximum number of tag hits that may influence routing.
|
||||
*/
|
||||
private const MAX_ROUTING_TAGS = 5;
|
||||
|
||||
/**
|
||||
* Maximum number of candidate documents passed into scoped chunk search.
|
||||
*/
|
||||
private const MAX_CANDIDATE_DOCS = 80;
|
||||
|
||||
/**
|
||||
* Small bonus for documents matched by multiple routed tags.
|
||||
*/
|
||||
private const MULTI_TAG_BONUS_PER_EXTRA_TAG = 0.05;
|
||||
private const MAX_MULTI_TAG_BONUS = 0.15;
|
||||
|
||||
public function __construct(
|
||||
private readonly TagVectorSearchClient $tagSearch,
|
||||
private readonly EntityManagerInterface $em,
|
||||
) {}
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]|null
|
||||
* Returns ordered active document ids for tag-scoped retrieval.
|
||||
*
|
||||
* The method intentionally returns only document ids so the current
|
||||
* retriever pipeline can stay unchanged.
|
||||
*
|
||||
* @return list<string>|null
|
||||
* @throws Exception
|
||||
*/
|
||||
public function route(string $query): ?array
|
||||
{
|
||||
$query = trim($query);
|
||||
|
||||
if ($query === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$hits = $this->tagSearch->search($query, self::DEFAULT_TOPK);
|
||||
$hits = $this->filterRoutingHits(
|
||||
$this->tagSearch->search($query, self::DEFAULT_TOPK)
|
||||
);
|
||||
|
||||
if (!is_array($hits) || $hits === []) {
|
||||
if ($hits === []) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$bestScore = (float)($hits[0]['score'] ?? 0.0);
|
||||
if ($bestScore < self::MIN_BEST_SCORE) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Convert tag UUID strings to binary(16)
|
||||
$tagBinaryIds = [];
|
||||
$tagMetaById = [];
|
||||
|
||||
foreach ($hits as $hit) {
|
||||
$id = (string)($hit['tag_id'] ?? '');
|
||||
if ($id === '') {
|
||||
$tagId = (string) ($hit['tag_id'] ?? '');
|
||||
|
||||
if ($tagId === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
$tagBinaryIds[] = Uuid::fromString($id)->toBinary();
|
||||
$tagBinaryIds[] = Uuid::fromString($tagId)->toBinary();
|
||||
} catch (\Throwable) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tagMetaById[$tagId] = [
|
||||
'score' => (float) $hit['score'],
|
||||
'weight' => $this->resolveTypeWeight((string) $hit['tag_type']),
|
||||
];
|
||||
}
|
||||
|
||||
if ($tagBinaryIds === []) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Direct DBAL query (binary-safe)
|
||||
$conn = $this->em->getConnection();
|
||||
|
||||
$rows = $conn->executeQuery(
|
||||
'SELECT document_id
|
||||
FROM document_tag
|
||||
WHERE tag_id IN (:tagIds)',
|
||||
['tagIds' => $tagBinaryIds],
|
||||
['tagIds' => ArrayParameterType::BINARY]
|
||||
$rows = $this->em->getConnection()->executeQuery(
|
||||
'SELECT dt.document_id, dt.tag_id
|
||||
FROM document_tag dt
|
||||
INNER JOIN document d ON d.id = dt.document_id
|
||||
WHERE dt.tag_id IN (:tagIds)
|
||||
AND d.status = :status',
|
||||
[
|
||||
'tagIds' => $tagBinaryIds,
|
||||
'status' => Document::STATUS_ACTIVE,
|
||||
],
|
||||
[
|
||||
'tagIds' => ArrayParameterType::BINARY,
|
||||
]
|
||||
)->fetchAllAssociative();
|
||||
|
||||
if ($rows === []) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$docIds = [];
|
||||
$documentScores = [];
|
||||
$documentMatchedTags = [];
|
||||
|
||||
foreach ($rows as $row) {
|
||||
if (!isset($row['document_id'])) {
|
||||
if (!isset($row['document_id'], $row['tag_id'])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
$uuid = Uuid::fromBinary($row['document_id']);
|
||||
$docIds[(string)$uuid] = true;
|
||||
$documentId = (string) Uuid::fromBinary($row['document_id']);
|
||||
$tagId = (string) Uuid::fromBinary($row['tag_id']);
|
||||
} catch (\Throwable) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (count($docIds) >= self::MAX_CANDIDATE_DOCS) {
|
||||
if (!isset($tagMetaById[$tagId])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$documentScores[$documentId] = ($documentScores[$documentId] ?? 0.0)
|
||||
+ ($tagMetaById[$tagId]['score'] * $tagMetaById[$tagId]['weight']);
|
||||
|
||||
$documentMatchedTags[$documentId][$tagId] = true;
|
||||
}
|
||||
|
||||
if ($documentScores === []) {
|
||||
return null;
|
||||
}
|
||||
|
||||
foreach ($documentScores as $documentId => $score) {
|
||||
$matchedTagCount = isset($documentMatchedTags[$documentId])
|
||||
? count($documentMatchedTags[$documentId])
|
||||
: 0;
|
||||
|
||||
if ($matchedTagCount > 1) {
|
||||
$documentScores[$documentId] += min(
|
||||
self::MAX_MULTI_TAG_BONUS,
|
||||
($matchedTagCount - 1) * self::MULTI_TAG_BONUS_PER_EXTRA_TAG
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
arsort($documentScores, SORT_NUMERIC);
|
||||
|
||||
return array_slice(
|
||||
array_keys($documentScores),
|
||||
0,
|
||||
self::MAX_CANDIDATE_DOCS
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array{
|
||||
* tag_id:string,
|
||||
* score:float,
|
||||
* label?:string,
|
||||
* tag_type?:string
|
||||
* }> $hits
|
||||
*
|
||||
* @return list<array{
|
||||
* tag_id:string,
|
||||
* score:float,
|
||||
* tag_type:string
|
||||
* }>
|
||||
*/
|
||||
private function filterRoutingHits(array $hits): array
|
||||
{
|
||||
if ($hits === []) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$bestScore = (float) ($hits[0]['score'] ?? 0.0);
|
||||
|
||||
if ($bestScore < self::MIN_BEST_SCORE) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$minimumAcceptedScore = max(
|
||||
self::MIN_BEST_SCORE,
|
||||
$bestScore - self::MAX_SCORE_DROP_FROM_BEST
|
||||
);
|
||||
|
||||
$filtered = [];
|
||||
|
||||
foreach ($hits as $hit) {
|
||||
$tagId = (string) ($hit['tag_id'] ?? '');
|
||||
$score = (float) ($hit['score'] ?? 0.0);
|
||||
$tagType = TagTypes::normalize(
|
||||
(string) ($hit['tag_type'] ?? TagTypes::GENERIC)
|
||||
);
|
||||
|
||||
if ($tagId === '' || $score < $minimumAcceptedScore) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sales signals may still be useful elsewhere, but they should not
|
||||
// expand the document scope for semantic retrieval.
|
||||
if ($tagType === TagTypes::SALES_SIGNAL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$filtered[] = [
|
||||
'tag_id' => $tagId,
|
||||
'score' => $score,
|
||||
'tag_type' => $tagType,
|
||||
];
|
||||
|
||||
if (count($filtered) >= self::MAX_ROUTING_TAGS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return array_keys($docIds);
|
||||
return $filtered;
|
||||
}
|
||||
|
||||
private function resolveTypeWeight(string $tagType): float
|
||||
{
|
||||
return match (TagTypes::normalize($tagType)) {
|
||||
TagTypes::CATALOG_ENTITY => 1.20,
|
||||
TagTypes::GENERIC => 1.00,
|
||||
TagTypes::SALES_SIGNAL => 0.00,
|
||||
default => 1.00,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -4,42 +4,45 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Tag;
|
||||
|
||||
use App\Entity\Tag;
|
||||
use App\Entity\Document;
|
||||
use App\Entity\DocumentTag;
|
||||
use App\Entity\Tag;
|
||||
use App\Service\TagRebuildJobService;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use InvalidArgumentException;
|
||||
use RuntimeException;
|
||||
|
||||
final readonly class TagService
|
||||
{
|
||||
public function __construct(
|
||||
private EntityManagerInterface $em,
|
||||
private TagRebuildJobService $jobs,
|
||||
) {}
|
||||
|
||||
// =========================================================
|
||||
// TAG CREATE
|
||||
// =========================================================
|
||||
private TagRebuildJobService $jobs,
|
||||
) {
|
||||
}
|
||||
|
||||
public function create(
|
||||
string $slug,
|
||||
string $label,
|
||||
?string $description = null,
|
||||
string $type = 'generic' // NEU
|
||||
string $type = TagTypes::GENERIC,
|
||||
): Tag {
|
||||
$slug = trim($slug);
|
||||
$normalizedSlug = $this->normalizeSlug($slug);
|
||||
$label = trim($label);
|
||||
|
||||
if ($label === '' || $slug === '') {
|
||||
throw new \InvalidArgumentException('Label und Slug sind Pflichtfelder.');
|
||||
if ($normalizedSlug === '' || $label === '') {
|
||||
throw new InvalidArgumentException('Tag label and slug are required.');
|
||||
}
|
||||
|
||||
if ($this->slugExists($slug)) {
|
||||
throw new \RuntimeException('Slug existiert bereits.');
|
||||
if ($this->slugExists($normalizedSlug)) {
|
||||
throw new RuntimeException(sprintf('Tag slug "%s" already exists.', $normalizedSlug));
|
||||
}
|
||||
|
||||
$tag = new Tag($slug, $label, $description);
|
||||
$tag->setType($type); // NEU
|
||||
$tag = new Tag(
|
||||
$normalizedSlug,
|
||||
$label,
|
||||
$description,
|
||||
TagTypes::normalize($type)
|
||||
);
|
||||
|
||||
$this->em->persist($tag);
|
||||
$this->em->flush();
|
||||
@@ -49,18 +52,9 @@ final readonly class TagService
|
||||
return $tag;
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// TAG DELETE
|
||||
// =========================================================
|
||||
|
||||
public function deleteById(string $tagId): void
|
||||
{
|
||||
$tag = $this->em->getRepository(Tag::class)->find($tagId);
|
||||
|
||||
if (!$tag instanceof Tag) {
|
||||
throw new \RuntimeException('Tag nicht gefunden.');
|
||||
}
|
||||
|
||||
$tag = $this->findTagById($tagId);
|
||||
$this->delete($tag);
|
||||
}
|
||||
|
||||
@@ -72,87 +66,103 @@ final readonly class TagService
|
||||
$this->triggerRebuildIfIdle();
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// DOCUMENT TAG SYNC
|
||||
// =========================================================
|
||||
|
||||
public function syncDocumentTags(Document $document, array $newTagIds): void
|
||||
{
|
||||
$newTagIds = array_unique($newTagIds);
|
||||
$normalizedTagIds = $this->normalizeIdList($newTagIds);
|
||||
|
||||
/** @var list<DocumentTag> $currentRelations */
|
||||
$currentRelations = $this->em
|
||||
->getRepository(DocumentTag::class)
|
||||
->findBy(['document' => $document]);
|
||||
|
||||
$currentTagIds = array_map(
|
||||
fn(DocumentTag $dt) => (string) $dt->getTag()->getId(),
|
||||
static fn (DocumentTag $relation): string => (string) $relation->getTag()->getId(),
|
||||
$currentRelations
|
||||
);
|
||||
|
||||
$toAdd = array_diff($newTagIds, $currentTagIds);
|
||||
$toRemove = array_diff($currentTagIds, $newTagIds);
|
||||
$toAdd = array_values(array_diff($normalizedTagIds, $currentTagIds));
|
||||
$toRemove = array_values(array_diff($currentTagIds, $normalizedTagIds));
|
||||
|
||||
foreach ($toAdd as $tagId) {
|
||||
$tag = $this->em->getRepository(Tag::class)->find($tagId);
|
||||
|
||||
if ($tag instanceof Tag) {
|
||||
$this->em->persist(new DocumentTag($document, $tag));
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($currentRelations as $relation) {
|
||||
if (in_array((string) $relation->getTag()->getId(), $toRemove, true)) {
|
||||
$relationTagId = (string) $relation->getTag()->getId();
|
||||
|
||||
if (in_array($relationTagId, $toRemove, true)) {
|
||||
$this->em->remove($relation);
|
||||
}
|
||||
}
|
||||
|
||||
if ($toAdd || $toRemove) {
|
||||
if ($toAdd !== [] || $toRemove !== []) {
|
||||
$this->em->flush();
|
||||
$this->triggerRebuildIfIdle();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// TAG → DOCUMENT SYNC (Bulk Assign)
|
||||
// =========================================================
|
||||
|
||||
public function syncTagDocuments(Tag $tag, array $newDocumentIds): void
|
||||
{
|
||||
$newDocumentIds = array_unique($newDocumentIds);
|
||||
$normalizedDocumentIds = $this->normalizeIdList($newDocumentIds);
|
||||
|
||||
/** @var list<DocumentTag> $currentRelations */
|
||||
$currentRelations = $this->em
|
||||
->getRepository(DocumentTag::class)
|
||||
->findBy(['tag' => $tag]);
|
||||
|
||||
$currentDocumentIds = array_map(
|
||||
fn(DocumentTag $dt) => (string) $dt->getDocument()->getId(),
|
||||
static fn (DocumentTag $relation): string => (string) $relation->getDocument()->getId(),
|
||||
$currentRelations
|
||||
);
|
||||
|
||||
$toAdd = array_diff($newDocumentIds, $currentDocumentIds);
|
||||
$toRemove = array_diff($currentDocumentIds, $newDocumentIds);
|
||||
$toAdd = array_values(array_diff($normalizedDocumentIds, $currentDocumentIds));
|
||||
$toRemove = array_values(array_diff($currentDocumentIds, $normalizedDocumentIds));
|
||||
|
||||
foreach ($toAdd as $documentId) {
|
||||
$document = $this->em->getRepository(Document::class)->find($documentId);
|
||||
if ($document instanceof Document) {
|
||||
|
||||
if (
|
||||
$document instanceof Document
|
||||
&& $document->getStatus() === Document::STATUS_ACTIVE
|
||||
) {
|
||||
$this->em->persist(new DocumentTag($document, $tag));
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($currentRelations as $relation) {
|
||||
if (in_array((string) $relation->getDocument()->getId(), $toRemove, true)) {
|
||||
$relationDocumentId = (string) $relation->getDocument()->getId();
|
||||
|
||||
if (in_array($relationDocumentId, $toRemove, true)) {
|
||||
$this->em->remove($relation);
|
||||
}
|
||||
}
|
||||
|
||||
if ($toAdd || $toRemove) {
|
||||
if ($toAdd !== [] || $toRemove !== []) {
|
||||
$this->em->flush();
|
||||
$this->triggerRebuildIfIdle();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// INTERNAL HELPERS
|
||||
// =========================================================
|
||||
private function findTagById(string $tagId): Tag
|
||||
{
|
||||
$tagId = trim($tagId);
|
||||
|
||||
if ($tagId === '') {
|
||||
throw new InvalidArgumentException('Tag id must not be empty.');
|
||||
}
|
||||
|
||||
$tag = $this->em->getRepository(Tag::class)->find($tagId);
|
||||
|
||||
if (!$tag instanceof Tag) {
|
||||
throw new RuntimeException('Tag not found.');
|
||||
}
|
||||
|
||||
return $tag;
|
||||
}
|
||||
|
||||
private function slugExists(string $slug): bool
|
||||
{
|
||||
@@ -165,6 +175,36 @@ final readonly class TagService
|
||||
->getSingleScalarResult() > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<mixed> $ids
|
||||
* @return list<string>
|
||||
*/
|
||||
private function normalizeIdList(array $ids): array
|
||||
{
|
||||
$normalized = [];
|
||||
|
||||
foreach ($ids as $id) {
|
||||
$id = trim((string) $id);
|
||||
|
||||
if ($id === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$normalized[] = $id;
|
||||
}
|
||||
|
||||
return array_values(array_unique($normalized));
|
||||
}
|
||||
|
||||
private function normalizeSlug(string $slug): string
|
||||
{
|
||||
$slug = mb_strtolower(trim($slug));
|
||||
$slug = preg_replace('/\s+/u', '-', $slug) ?? $slug;
|
||||
$slug = preg_replace('/-+/u', '-', $slug) ?? $slug;
|
||||
|
||||
return trim($slug, '-');
|
||||
}
|
||||
|
||||
private function triggerRebuildIfIdle(): void
|
||||
{
|
||||
if (!$this->jobs->hasActiveJob()) {
|
||||
|
||||
@@ -5,8 +5,10 @@ declare(strict_types=1);
|
||||
namespace App\Tag;
|
||||
|
||||
/**
|
||||
* Zentrale Definition aller erlaubten Tag-Typen.
|
||||
* Verhindert Magic Strings im Code.
|
||||
* Central definition of all supported tag types.
|
||||
*
|
||||
* This class is intentionally tiny and dependency-free because it is the
|
||||
* foundation for entity validation, admin forms, routing, and catalog logic.
|
||||
*/
|
||||
final class TagTypes
|
||||
{
|
||||
@@ -14,6 +16,25 @@ final class TagTypes
|
||||
public const CATALOG_ENTITY = 'catalog_entity';
|
||||
public const SALES_SIGNAL = 'sales_signal';
|
||||
|
||||
/**
|
||||
* Returns the canonical list of allowed type values.
|
||||
*
|
||||
* @return list<string>
|
||||
*/
|
||||
public static function all(): array
|
||||
{
|
||||
return [
|
||||
self::GENERIC,
|
||||
self::CATALOG_ENTITY,
|
||||
self::SALES_SIGNAL,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns UI choices for forms and admin screens.
|
||||
*
|
||||
* @return array<string, string>
|
||||
*/
|
||||
public static function choices(): array
|
||||
{
|
||||
return [
|
||||
@@ -23,5 +44,53 @@ final class TagTypes
|
||||
];
|
||||
}
|
||||
|
||||
private function __construct() {}
|
||||
/**
|
||||
* Returns true if the given value is an allowed tag type.
|
||||
*/
|
||||
public static function isValid(?string $type): bool
|
||||
{
|
||||
if ($type === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return in_array(self::normalize($type), self::all(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes external input into a canonical internal value.
|
||||
*
|
||||
* Empty or unknown input falls back to the provided default.
|
||||
*/
|
||||
public static function normalize(?string $type, string $default = self::GENERIC): string
|
||||
{
|
||||
$type = mb_strtolower(trim((string) $type));
|
||||
$default = mb_strtolower(trim($default));
|
||||
|
||||
if ($type === '') {
|
||||
return self::isKnownDefault($default) ? $default : self::GENERIC;
|
||||
}
|
||||
|
||||
if (in_array($type, self::all(), true)) {
|
||||
return $type;
|
||||
}
|
||||
|
||||
return self::isKnownDefault($default) ? $default : self::GENERIC;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a human-readable label for a canonical type.
|
||||
*/
|
||||
public static function labelFor(string $type): string
|
||||
{
|
||||
return array_flip(self::choices())[self::normalize($type)] ?? 'Generic';
|
||||
}
|
||||
|
||||
private static function isKnownDefault(string $type): bool
|
||||
{
|
||||
return in_array($type, self::all(), true);
|
||||
}
|
||||
|
||||
private function __construct()
|
||||
{
|
||||
}
|
||||
}
|
||||
@@ -9,18 +9,81 @@ use Psr\Log\LoggerInterface;
|
||||
|
||||
final readonly class TagVectorIndexBuilder
|
||||
{
|
||||
private const GRACEFUL_TERMINATION_SECONDS = 2;
|
||||
|
||||
public function __construct(
|
||||
private string $pythonBin,
|
||||
private string $scriptPath,
|
||||
private string $tagsNdjsonPath,
|
||||
private string $vectorTagsIndexPath,
|
||||
private string $embeddingModel,
|
||||
private int $timeoutSeconds,
|
||||
private LoggerInterface $agentLogger,
|
||||
private IndexMetaManager $metaManager, // ✅ NEU
|
||||
) {}
|
||||
private string $pythonBin,
|
||||
private string $scriptPath,
|
||||
private string $tagsNdjsonPath,
|
||||
private string $vectorTagsIndexPath,
|
||||
private string $embeddingModel,
|
||||
private int $timeoutSeconds,
|
||||
private LoggerInterface $agentLogger,
|
||||
private IndexMetaManager $metaManager,
|
||||
) {
|
||||
}
|
||||
|
||||
public function build(): void
|
||||
{
|
||||
$this->assertPreconditions();
|
||||
|
||||
$tmpIndex = $this->vectorTagsIndexPath . '.tmp';
|
||||
$tmpMeta = $tmpIndex . '.meta.json';
|
||||
$finalIndex = $this->vectorTagsIndexPath;
|
||||
$finalMeta = $finalIndex . '.meta.json';
|
||||
|
||||
$this->ensureTargetDirectoryExists($finalIndex);
|
||||
$this->cleanupTemporaryArtifacts($tmpIndex, $tmpMeta);
|
||||
|
||||
if (!$this->hasEmbeddableTags()) {
|
||||
$this->agentLogger->info('[tags] no embeddable tags found, removing stale tag index artifacts.');
|
||||
$this->removeFileIfExists($finalIndex);
|
||||
$this->removeFileIfExists($finalMeta);
|
||||
$this->commitRuntime(false);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$cmd = $this->buildCommand($tmpIndex);
|
||||
|
||||
$this->agentLogger->info('[tags] build tag vector index', [
|
||||
'cmd' => $cmd,
|
||||
'timeout' => $this->timeoutSeconds,
|
||||
'embedding_model' => $this->embeddingModel,
|
||||
]);
|
||||
|
||||
try {
|
||||
$result = $this->runCommand($cmd);
|
||||
|
||||
if ($result['exit'] !== 0) {
|
||||
$this->agentLogger->error('[tags] tag vector ingest failed', [
|
||||
'exit' => $result['exit'],
|
||||
'stdout' => $result['stdout'],
|
||||
'stderr' => $result['stderr'],
|
||||
]);
|
||||
|
||||
throw new \RuntimeException('Tag vector ingest failed (exit=' . $result['exit'] . ')');
|
||||
}
|
||||
|
||||
if (!$this->isUsableArtifact($tmpIndex) || !$this->isUsableArtifact($tmpMeta)) {
|
||||
throw new \RuntimeException('Tag vector ingest produced incomplete artifacts.');
|
||||
}
|
||||
|
||||
$this->atomicReplace($tmpIndex, $finalIndex);
|
||||
$this->atomicReplace($tmpMeta, $finalMeta);
|
||||
$this->commitRuntime(true);
|
||||
|
||||
$this->agentLogger->info('[tags] tag vector index build completed + runtime committed', [
|
||||
'index' => $finalIndex,
|
||||
'meta' => $finalMeta,
|
||||
]);
|
||||
} catch (\Throwable $e) {
|
||||
$this->cleanupTemporaryArtifacts($tmpIndex, $tmpMeta);
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
private function assertPreconditions(): void
|
||||
{
|
||||
if (!is_file($this->tagsNdjsonPath)) {
|
||||
throw new \RuntimeException('tags.ndjson missing: ' . $this->tagsNdjsonPath);
|
||||
@@ -30,65 +93,178 @@ final readonly class TagVectorIndexBuilder
|
||||
throw new \RuntimeException('Tag ingest script missing: ' . $this->scriptPath);
|
||||
}
|
||||
|
||||
$tmpIndex = $this->vectorTagsIndexPath . '.tmp';
|
||||
$tmpMeta = $tmpIndex . '.meta.json';
|
||||
|
||||
$finalIndex = $this->vectorTagsIndexPath;
|
||||
$finalMeta = $finalIndex . '.meta.json';
|
||||
|
||||
$dir = \dirname($finalIndex);
|
||||
if (!\is_dir($dir)) {
|
||||
@\mkdir($dir, 0775, true);
|
||||
if (trim($this->pythonBin) === '') {
|
||||
throw new \RuntimeException('Python binary must not be empty.');
|
||||
}
|
||||
|
||||
@\unlink($tmpIndex);
|
||||
@\unlink($tmpMeta);
|
||||
if ($this->timeoutSeconds < 1) {
|
||||
throw new \RuntimeException('Tag vector timeout must be >= 1 second.');
|
||||
}
|
||||
}
|
||||
|
||||
$cmd = sprintf(
|
||||
'%s %s %s %s %s 2>&1',
|
||||
private function buildCommand(string $tmpIndex): string
|
||||
{
|
||||
return sprintf(
|
||||
'%s %s %s %s 2>&1',
|
||||
escapeshellarg($this->pythonBin),
|
||||
escapeshellarg($this->scriptPath),
|
||||
escapeshellarg($this->tagsNdjsonPath),
|
||||
escapeshellarg($tmpIndex),
|
||||
escapeshellarg($this->embeddingModel),
|
||||
);
|
||||
}
|
||||
|
||||
$this->agentLogger->info('[tags] build tag vector index', [
|
||||
'cmd' => $cmd,
|
||||
'timeout' => $this->timeoutSeconds,
|
||||
]);
|
||||
private function ensureTargetDirectoryExists(string $finalIndexPath): void
|
||||
{
|
||||
$dir = dirname($finalIndexPath);
|
||||
|
||||
$out = [];
|
||||
$exit = 0;
|
||||
|
||||
exec($cmd, $out, $exit);
|
||||
|
||||
if ($exit !== 0) {
|
||||
$this->agentLogger->error('[tags] tag vector ingest failed', [
|
||||
'exit' => $exit,
|
||||
'out' => $out,
|
||||
]);
|
||||
throw new \RuntimeException('Tag vector ingest failed (exit=' . $exit . ')');
|
||||
}
|
||||
|
||||
if (!is_file($tmpIndex) || !is_file($tmpMeta)) {
|
||||
@\unlink($tmpIndex);
|
||||
@\unlink($tmpMeta);
|
||||
$this->agentLogger->warning('[tags] no tag index produced (maybe 0 tags).');
|
||||
if (is_dir($dir)) {
|
||||
return;
|
||||
}
|
||||
|
||||
$this->atomicReplace($tmpIndex, $finalIndex);
|
||||
$this->atomicReplace($tmpMeta, $finalMeta);
|
||||
if (!@mkdir($dir, 0775, true) && !is_dir($dir)) {
|
||||
throw new \RuntimeException('Unable to create tag vector directory: ' . $dir);
|
||||
}
|
||||
}
|
||||
|
||||
// ✅ ENTERPRISE COMMIT MARKER
|
||||
private function hasEmbeddableTags(): bool
|
||||
{
|
||||
$fh = @fopen($this->tagsNdjsonPath, 'rb');
|
||||
|
||||
if ($fh === false) {
|
||||
throw new \RuntimeException('Unable to read tags NDJSON: ' . $this->tagsNdjsonPath);
|
||||
}
|
||||
|
||||
try {
|
||||
while (($line = fgets($fh)) !== false) {
|
||||
$line = trim($line);
|
||||
|
||||
if ($line === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$decoded = json_decode($line, true);
|
||||
|
||||
if (!is_array($decoded)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tagId = trim((string) ($decoded['tag_id'] ?? ''));
|
||||
$text = trim((string) ($decoded['text'] ?? ''));
|
||||
|
||||
if ($tagId !== '' && $text !== '') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
fclose($fh);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{exit:int, stdout:string, stderr:string}
|
||||
*/
|
||||
private function runCommand(string $cmd): array
|
||||
{
|
||||
$descriptorSpec = [
|
||||
0 => ['pipe', 'r'],
|
||||
1 => ['pipe', 'w'],
|
||||
2 => ['pipe', 'w'],
|
||||
];
|
||||
|
||||
$process = @proc_open($cmd, $descriptorSpec, $pipes);
|
||||
|
||||
if (!is_resource($process)) {
|
||||
throw new \RuntimeException('Could not start tag vector ingest process.');
|
||||
}
|
||||
|
||||
fclose($pipes[0]);
|
||||
stream_set_blocking($pipes[1], false);
|
||||
stream_set_blocking($pipes[2], false);
|
||||
|
||||
$stdout = '';
|
||||
$stderr = '';
|
||||
$startedAt = microtime(true);
|
||||
$timedOut = false;
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
$stdout .= stream_get_contents($pipes[1]) ?: '';
|
||||
$stderr .= stream_get_contents($pipes[2]) ?: '';
|
||||
|
||||
$status = proc_get_status($process);
|
||||
|
||||
if (!is_array($status) || ($status['running'] ?? false) !== true) {
|
||||
break;
|
||||
}
|
||||
|
||||
if ((microtime(true) - $startedAt) > $this->timeoutSeconds) {
|
||||
$timedOut = true;
|
||||
proc_terminate($process);
|
||||
usleep(self::GRACEFUL_TERMINATION_SECONDS * 1000000);
|
||||
|
||||
$status = proc_get_status($process);
|
||||
if (is_array($status) && ($status['running'] ?? false) === true) {
|
||||
proc_terminate($process, 9);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
usleep(100000);
|
||||
}
|
||||
|
||||
$stdout .= stream_get_contents($pipes[1]) ?: '';
|
||||
$stderr .= stream_get_contents($pipes[2]) ?: '';
|
||||
} finally {
|
||||
fclose($pipes[1]);
|
||||
fclose($pipes[2]);
|
||||
}
|
||||
|
||||
$exitCode = proc_close($process);
|
||||
|
||||
if ($timedOut) {
|
||||
$this->agentLogger->error('[tags] tag vector ingest timed out', [
|
||||
'timeout' => $this->timeoutSeconds,
|
||||
'stdout' => $stdout,
|
||||
'stderr' => $stderr,
|
||||
]);
|
||||
|
||||
throw new \RuntimeException('Tag vector ingest timed out after ' . $this->timeoutSeconds . ' seconds.');
|
||||
}
|
||||
|
||||
return [
|
||||
'exit' => is_int($exitCode) ? $exitCode : 1,
|
||||
'stdout' => trim($stdout),
|
||||
'stderr' => trim($stderr),
|
||||
];
|
||||
}
|
||||
|
||||
private function isUsableArtifact(string $path): bool
|
||||
{
|
||||
return is_file($path) && filesize($path) > 0;
|
||||
}
|
||||
|
||||
private function cleanupTemporaryArtifacts(string ...$paths): void
|
||||
{
|
||||
foreach ($paths as $path) {
|
||||
$this->removeFileIfExists($path);
|
||||
}
|
||||
}
|
||||
|
||||
private function removeFileIfExists(string $path): void
|
||||
{
|
||||
if (is_file($path)) {
|
||||
@unlink($path);
|
||||
}
|
||||
}
|
||||
|
||||
private function commitRuntime(bool $indexPresent): void
|
||||
{
|
||||
$this->metaManager->touchRuntime([
|
||||
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
|
||||
]);
|
||||
|
||||
$this->agentLogger->info('[tags] tag vector index build completed + runtime committed', [
|
||||
'index' => $finalIndex,
|
||||
'meta' => $finalMeta,
|
||||
'tags_index_present' => $indexPresent,
|
||||
]);
|
||||
}
|
||||
|
||||
@@ -99,6 +275,7 @@ final readonly class TagVectorIndexBuilder
|
||||
@unlink($tmp);
|
||||
throw new \RuntimeException('Atomic replace failed for: ' . $final);
|
||||
}
|
||||
|
||||
@unlink($tmp);
|
||||
}
|
||||
|
||||
|
||||
@@ -6,63 +6,210 @@ namespace App\Tag;
|
||||
|
||||
final readonly class TagVectorIndexHealthService
|
||||
{
|
||||
private const STATUS_OK = 'OK';
|
||||
private const STATUS_OK_EMPTY = 'OK_EMPTY';
|
||||
private const STATUS_INCONSISTENT_STALE_VECTOR = 'INCONSISTENT_STALE_VECTOR';
|
||||
private const STATUS_INCONSISTENT_MISSING_VECTOR = 'INCONSISTENT_MISSING_VECTOR';
|
||||
private const STATUS_INCONSISTENT_COUNT_MISMATCH = 'INCONSISTENT_COUNT_MISMATCH';
|
||||
private const STATUS_INCONSISTENT_INVALID_META = 'INCONSISTENT_INVALID_META';
|
||||
private const STATUS_UNKNOWN = 'UNKNOWN';
|
||||
|
||||
public function __construct(
|
||||
private string $tagsNdjsonPath,
|
||||
private string $vectorTagsIndexPath,
|
||||
private string $vectorTagsMetaPath
|
||||
) {}
|
||||
private string $vectorTagsMetaPath,
|
||||
) {
|
||||
}
|
||||
|
||||
public function check(): array
|
||||
{
|
||||
$ndjsonExists = is_file($this->tagsNdjsonPath);
|
||||
$vectorExists = is_file($this->vectorTagsIndexPath);
|
||||
$metaExists = is_file($this->vectorTagsMetaPath);
|
||||
$metaExists = is_file($this->vectorTagsMetaPath);
|
||||
|
||||
$ndjsonTagCount = 0;
|
||||
$ndjsonStats = $this->readNdjsonStats();
|
||||
$metaStats = $this->readMetaStats();
|
||||
|
||||
if ($ndjsonExists) {
|
||||
$h = @fopen($this->tagsNdjsonPath, 'r');
|
||||
if ($h !== false) {
|
||||
while (($line = fgets($h)) !== false) {
|
||||
$line = trim($line);
|
||||
if ($line === '') continue;
|
||||
|
||||
$data = json_decode($line, true);
|
||||
if (is_array($data) && !empty($data['tag_id']) && !empty($data['text'])) {
|
||||
$ndjsonTagCount++;
|
||||
}
|
||||
}
|
||||
fclose($h);
|
||||
}
|
||||
}
|
||||
|
||||
$vectorTagCount = 0;
|
||||
if ($metaExists) {
|
||||
$meta = json_decode((string) file_get_contents($this->vectorTagsMetaPath), true);
|
||||
if (is_array($meta)) {
|
||||
$vectorTagCount = count($meta);
|
||||
}
|
||||
}
|
||||
|
||||
$status = $this->determineStatus($ndjsonTagCount, $vectorExists, $metaExists, $vectorTagCount);
|
||||
$status = $this->determineStatus(
|
||||
$ndjsonStats['exported_tag_count'],
|
||||
$vectorExists,
|
||||
$metaExists,
|
||||
$metaStats['vector_tag_count'],
|
||||
$metaStats['meta_valid']
|
||||
);
|
||||
|
||||
return [
|
||||
'tags_ndjson_exists' => $ndjsonExists,
|
||||
'tags_ndjson_count' => $ndjsonTagCount,
|
||||
'vector_exists' => $vectorExists,
|
||||
'meta_exists' => $metaExists,
|
||||
'vector_tag_count' => $vectorTagCount,
|
||||
'status' => $status,
|
||||
'tags_ndjson_count' => $ndjsonStats['exported_tag_count'],
|
||||
'vector_exists' => $vectorExists,
|
||||
'meta_exists' => $metaExists,
|
||||
'vector_tag_count' => $metaStats['vector_tag_count'],
|
||||
'status' => $status,
|
||||
|
||||
// Extra diagnostics for admin/CLI.
|
||||
'tags_ndjson_lines_total' => $ndjsonStats['lines_total'],
|
||||
'tags_ndjson_invalid_lines' => $ndjsonStats['invalid_lines'],
|
||||
'tags_ndjson_empty_lines' => $ndjsonStats['empty_lines'],
|
||||
'tags_with_active_document_ids' => $ndjsonStats['tags_with_document_ids'],
|
||||
'meta_valid' => $metaStats['meta_valid'],
|
||||
'paths' => [
|
||||
'tags_ndjson' => $this->tagsNdjsonPath,
|
||||
'vector_index' => $this->vectorTagsIndexPath,
|
||||
'vector_meta' => $this->vectorTagsMetaPath,
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
private function determineStatus(int $ndjsonTagCount, bool $vectorExists, bool $metaExists, int $vectorTagCount): string
|
||||
/**
|
||||
* @return array{
|
||||
* lines_total:int,
|
||||
* empty_lines:int,
|
||||
* invalid_lines:int,
|
||||
* exported_tag_count:int,
|
||||
* tags_with_document_ids:int
|
||||
* }
|
||||
*/
|
||||
private function readNdjsonStats(): array
|
||||
{
|
||||
if ($ndjsonTagCount === 0 && !$vectorExists && !$metaExists) return 'OK_EMPTY';
|
||||
if ($ndjsonTagCount > 0 && $vectorExists && $metaExists && $vectorTagCount === $ndjsonTagCount) return 'OK';
|
||||
if ($ndjsonTagCount === 0 && ($vectorExists || $metaExists)) return 'INCONSISTENT_STALE_VECTOR';
|
||||
if ($ndjsonTagCount > 0 && (!$vectorExists || !$metaExists)) return 'INCONSISTENT_MISSING_VECTOR';
|
||||
if ($ndjsonTagCount !== $vectorTagCount) return 'INCONSISTENT_COUNT_MISMATCH';
|
||||
return 'UNKNOWN';
|
||||
$stats = [
|
||||
'lines_total' => 0,
|
||||
'empty_lines' => 0,
|
||||
'invalid_lines' => 0,
|
||||
'exported_tag_count' => 0,
|
||||
'tags_with_document_ids' => 0,
|
||||
];
|
||||
|
||||
if (!is_file($this->tagsNdjsonPath)) {
|
||||
return $stats;
|
||||
}
|
||||
|
||||
$handle = @fopen($this->tagsNdjsonPath, 'rb');
|
||||
|
||||
if ($handle === false) {
|
||||
return $stats;
|
||||
}
|
||||
|
||||
try {
|
||||
while (($line = fgets($handle)) !== false) {
|
||||
$stats['lines_total']++;
|
||||
$line = trim($line);
|
||||
|
||||
if ($line === '') {
|
||||
$stats['empty_lines']++;
|
||||
continue;
|
||||
}
|
||||
|
||||
$data = json_decode($line, true);
|
||||
|
||||
if (!is_array($data)) {
|
||||
$stats['invalid_lines']++;
|
||||
continue;
|
||||
}
|
||||
|
||||
$tagId = trim((string) ($data['tag_id'] ?? ''));
|
||||
$text = trim((string) ($data['text'] ?? ''));
|
||||
$documentIds = $data['document_ids'] ?? null;
|
||||
$hasDocumentIds = is_array($documentIds) && $documentIds !== [];
|
||||
|
||||
if ($tagId === '' || $text === '') {
|
||||
$stats['invalid_lines']++;
|
||||
continue;
|
||||
}
|
||||
|
||||
$stats['exported_tag_count']++;
|
||||
|
||||
if ($hasDocumentIds) {
|
||||
$stats['tags_with_document_ids']++;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
fclose($handle);
|
||||
}
|
||||
|
||||
return $stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{vector_tag_count:int, meta_valid:bool}
|
||||
*/
|
||||
private function readMetaStats(): array
|
||||
{
|
||||
if (!is_file($this->vectorTagsMetaPath)) {
|
||||
return [
|
||||
'vector_tag_count' => 0,
|
||||
'meta_valid' => false,
|
||||
];
|
||||
}
|
||||
|
||||
$raw = file_get_contents($this->vectorTagsMetaPath);
|
||||
|
||||
if (!is_string($raw) || trim($raw) === '') {
|
||||
return [
|
||||
'vector_tag_count' => 0,
|
||||
'meta_valid' => false,
|
||||
];
|
||||
}
|
||||
|
||||
$decoded = json_decode($raw, true);
|
||||
|
||||
if (is_array($decoded)) {
|
||||
if (array_is_list($decoded)) {
|
||||
return [
|
||||
'vector_tag_count' => count($decoded),
|
||||
'meta_valid' => true,
|
||||
];
|
||||
}
|
||||
|
||||
$numericKeys = array_filter(
|
||||
array_keys($decoded),
|
||||
static fn (string|int $key): bool => is_string($key) && ctype_digit($key)
|
||||
);
|
||||
|
||||
if ($numericKeys !== [] && count($numericKeys) === count($decoded)) {
|
||||
return [
|
||||
'vector_tag_count' => count($decoded),
|
||||
'meta_valid' => true,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return [
|
||||
'vector_tag_count' => 0,
|
||||
'meta_valid' => false,
|
||||
];
|
||||
}
|
||||
|
||||
private function determineStatus(
|
||||
int $ndjsonTagCount,
|
||||
bool $vectorExists,
|
||||
bool $metaExists,
|
||||
int $vectorTagCount,
|
||||
bool $metaValid
|
||||
): string {
|
||||
if ($ndjsonTagCount === 0 && !$vectorExists && !$metaExists) {
|
||||
return self::STATUS_OK_EMPTY;
|
||||
}
|
||||
|
||||
if ($ndjsonTagCount === 0 && ($vectorExists || $metaExists)) {
|
||||
return self::STATUS_INCONSISTENT_STALE_VECTOR;
|
||||
}
|
||||
|
||||
if ($ndjsonTagCount > 0 && (!$vectorExists || !$metaExists)) {
|
||||
return self::STATUS_INCONSISTENT_MISSING_VECTOR;
|
||||
}
|
||||
|
||||
if ($metaExists && !$metaValid) {
|
||||
return self::STATUS_INCONSISTENT_INVALID_META;
|
||||
}
|
||||
|
||||
if ($ndjsonTagCount > 0 && $vectorExists && $metaExists && $metaValid && $vectorTagCount === $ndjsonTagCount) {
|
||||
return self::STATUS_OK;
|
||||
}
|
||||
|
||||
if ($ndjsonTagCount !== $vectorTagCount) {
|
||||
return self::STATUS_INCONSISTENT_COUNT_MISMATCH;
|
||||
}
|
||||
|
||||
return self::STATUS_UNKNOWN;
|
||||
}
|
||||
}
|
||||
@@ -12,18 +12,29 @@ final readonly class TagVectorSearchClient
|
||||
/**
|
||||
* Minimum similarity score required for a tag to be considered.
|
||||
*/
|
||||
private const MIN_SCORE = 0.72;
|
||||
public const MIN_SCORE = 0.72;
|
||||
|
||||
/**
|
||||
* Default result size when callers do not specify a limit.
|
||||
*/
|
||||
private const DEFAULT_LIMIT = 8;
|
||||
|
||||
/**
|
||||
* Hard limit to prevent excessive requests.
|
||||
*/
|
||||
private const MAX_LIMIT = 50;
|
||||
|
||||
/**
|
||||
* HTTP timeout for the Python vector service.
|
||||
*/
|
||||
private const TIMEOUT_SECONDS = 10;
|
||||
|
||||
public function __construct(
|
||||
private HttpClientInterface $http,
|
||||
private string $serviceUrl,
|
||||
private LoggerInterface $agentLogger,
|
||||
) {}
|
||||
private string $serviceUrl,
|
||||
private LoggerInterface $agentLogger,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes a vector search against the Python tag index.
|
||||
@@ -33,43 +44,51 @@ final readonly class TagVectorSearchClient
|
||||
* {
|
||||
* "tag_id": "...",
|
||||
* "score": 0.73,
|
||||
* "label": "Geräte", // optional (new)
|
||||
* "tag_type": "catalog_entity" // optional (new)
|
||||
* "label": "Geräte",
|
||||
* "tag_type": "catalog_entity"
|
||||
* }
|
||||
* ]
|
||||
*
|
||||
* @return array<int, array{
|
||||
* @return list<array{
|
||||
* tag_id:string,
|
||||
* score:float,
|
||||
* label?:string,
|
||||
* tag_type?:string
|
||||
* label:string,
|
||||
* tag_type:string
|
||||
* }>
|
||||
*/
|
||||
public function search(string $query, int $limit = 8): array
|
||||
public function search(string $query, int $limit = self::DEFAULT_LIMIT): array
|
||||
{
|
||||
$query = trim($query);
|
||||
|
||||
if ($query === '') {
|
||||
return [];
|
||||
}
|
||||
|
||||
$limit = max(1, min($limit, self::MAX_LIMIT));
|
||||
$serviceUrl = rtrim(trim($this->serviceUrl), '/');
|
||||
|
||||
if ($serviceUrl === '') {
|
||||
$this->agentLogger->warning('Tag vector service URL is empty.');
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
$response = $this->http->request(
|
||||
'POST',
|
||||
rtrim($this->serviceUrl, '/') . '/search-tags',
|
||||
$serviceUrl . '/search-tags',
|
||||
[
|
||||
'json' => [
|
||||
'query' => $query,
|
||||
'limit' => $limit,
|
||||
],
|
||||
'timeout' => 10,
|
||||
'timeout' => self::TIMEOUT_SECONDS,
|
||||
]
|
||||
);
|
||||
|
||||
if ($response->getStatusCode() !== 200) {
|
||||
$this->agentLogger->warning(
|
||||
'Tag vector service returned non-200',
|
||||
'Tag vector service returned non-200.',
|
||||
['status' => $response->getStatusCode()]
|
||||
);
|
||||
|
||||
@@ -77,10 +96,9 @@ final readonly class TagVectorSearchClient
|
||||
}
|
||||
|
||||
$data = $response->toArray(false);
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
$this->agentLogger->warning(
|
||||
'Tag vector service unreachable',
|
||||
'Tag vector service unreachable.',
|
||||
['error' => $e->getMessage()]
|
||||
);
|
||||
|
||||
@@ -88,18 +106,33 @@ final readonly class TagVectorSearchClient
|
||||
}
|
||||
|
||||
if (!is_array($data)) {
|
||||
$this->agentLogger->warning('Tag vector service returned invalid payload');
|
||||
$this->agentLogger->warning('Tag vector service returned invalid payload.');
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
$hits = [];
|
||||
return $this->normalizeHits($data, $limit);
|
||||
}
|
||||
|
||||
foreach ($data as $row) {
|
||||
/**
|
||||
* @param array<mixed> $rows
|
||||
* @return list<array{
|
||||
* tag_id:string,
|
||||
* score:float,
|
||||
* label:string,
|
||||
* tag_type:string
|
||||
* }>
|
||||
*/
|
||||
private function normalizeHits(array $rows, int $limit): array
|
||||
{
|
||||
$hitsByTagId = [];
|
||||
|
||||
foreach ($rows as $row) {
|
||||
if (!is_array($row)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tagId = (string)($row['tag_id'] ?? '');
|
||||
$tagId = trim((string) ($row['tag_id'] ?? ''));
|
||||
$score = $row['score'] ?? null;
|
||||
|
||||
if ($tagId === '' || !is_numeric($score)) {
|
||||
@@ -112,24 +145,45 @@ final readonly class TagVectorSearchClient
|
||||
continue;
|
||||
}
|
||||
|
||||
$hit = [
|
||||
$normalizedHit = [
|
||||
'tag_id' => $tagId,
|
||||
'score' => $score,
|
||||
'score' => $score,
|
||||
'label' => trim((string) ($row['label'] ?? '')),
|
||||
'tag_type' => TagTypes::normalize((string) ($row['tag_type'] ?? TagTypes::GENERIC)),
|
||||
];
|
||||
|
||||
// Optional: label
|
||||
if (isset($row['label']) && is_string($row['label'])) {
|
||||
$hit['label'] = $row['label'];
|
||||
}
|
||||
$existingHit = $hitsByTagId[$tagId] ?? null;
|
||||
|
||||
// Optional: tag_type
|
||||
if (isset($row['tag_type']) && is_string($row['tag_type'])) {
|
||||
$hit['tag_type'] = $row['tag_type'];
|
||||
if ($existingHit === null || $normalizedHit['score'] > $existingHit['score']) {
|
||||
$hitsByTagId[$tagId] = $normalizedHit;
|
||||
}
|
||||
|
||||
$hits[] = $hit;
|
||||
}
|
||||
|
||||
return $hits;
|
||||
if ($hitsByTagId === []) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$hits = array_values($hitsByTagId);
|
||||
|
||||
usort(
|
||||
$hits,
|
||||
static function (array $left, array $right): int {
|
||||
$scoreComparison = $right['score'] <=> $left['score'];
|
||||
|
||||
if ($scoreComparison !== 0) {
|
||||
return $scoreComparison;
|
||||
}
|
||||
|
||||
$typeComparison = strcmp($left['tag_type'], $right['tag_type']);
|
||||
|
||||
if ($typeComparison !== 0) {
|
||||
return $typeComparison;
|
||||
}
|
||||
|
||||
return strcmp($left['tag_id'], $right['tag_id']);
|
||||
}
|
||||
);
|
||||
|
||||
return array_slice($hits, 0, $limit);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user