new version ndjson

This commit is contained in:
team 1
2026-02-12 11:22:56 +01:00
parent 0bb0c0b42f
commit 5a52e07edc
10 changed files with 375 additions and 492 deletions

View File

@@ -1,28 +1,25 @@
<?php
// src/Command/KnowledgeIngestCommand.php
declare(strict_types=1);
namespace App\Command;
use App\Knowledge\Ingest\KnowledgeIngestService;
use App\Entity\DocumentVersion;
use App\Entity\User;
use App\Ingest\IngestFlow;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Finder\Finder;
#[AsCommand(
name: 'mto:agent:knowledge:ingest',
description: 'Ingest one or multiple markdown/text documents into file-based knowledge chunks'
)]
final class KnowledgeIngestCommand extends Command
#[AsCommand(name: 'mto:agent:ingest:version')]
class KnowledgeIngestCommand extends Command
{
public function __construct(
private readonly KnowledgeIngestService $ingest,
private readonly string $uploadsDir,
private readonly IngestFlow $ingestFlow,
private readonly EntityManagerInterface $em,
) {
parent::__construct();
}
@@ -30,86 +27,28 @@ final class KnowledgeIngestCommand extends Command
protected function configure(): void
{
$this
->addArgument(
'file',
InputArgument::OPTIONAL,
'Path to a single .txt/.md file'
)
->addOption(
'all',
null,
InputOption::VALUE_NONE,
'Ingest all .md files from the uploads directory'
)
->addOption(
'optimize',
'o',
InputOption::VALUE_NONE,
'Optimize chunks for retrieval quality'
);
->addArgument('versionId', InputArgument::REQUIRED, 'UUID of DocumentVersion')
->addArgument('userId', InputArgument::REQUIRED, 'UUID of user triggering ingest');
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$files = [];
$optimize = (bool) $input->getOption('optimize');
$versionId = $input->getArgument('versionId');
$userId = $input->getArgument('userId');
if ($input->getOption('all')) {
if (!is_dir($this->uploadsDir)) {
$output->writeln('<error>❌ uploads directory not found</error>');
return Command::FAILURE;
}
$version = $this->em->getRepository(DocumentVersion::class)->find($versionId);
$user = $this->em->getRepository(User::class)->find($userId);
$finder = new Finder();
$finder
->files()
->in($this->uploadsDir)
->name('*.md');
if (!$finder->hasResults()) {
$output->writeln('<comment> No .md files found in uploads/</comment>');
return Command::SUCCESS;
}
foreach ($finder as $file) {
$files[] = $file->getRealPath();
}
$output->writeln(sprintf(
'📂 Ingesting %d markdown files from uploads (%s)',
count($files),
$optimize ? 'optimized' : 'standard'
));
} else {
$file = $input->getArgument('file');
if (!$file) {
$output->writeln('<error>❌ Either provide a file or use --all</error>');
return Command::FAILURE;
}
$files[] = (string) $file;
if (!$version || !$user) {
$output->writeln('<error>Version or User not found.</error>');
return Command::FAILURE;
}
$totalWritten = 0;
$output->writeln('Starting ingest...');
foreach ($files as $filePath) {
$output->writeln('➡️ Ingesting: ' . $filePath);
$this->ingestFlow->ingestDocumentVersion($version, $user);
$written = $this->ingest->ingestFile(
$filePath,
optimize: $optimize
);
$totalWritten += count($written);
foreach ($written as $chunk) {
$output->writeln(' - ' . $chunk);
}
}
$output->writeln('');
$output->writeln('✅ Total written chunks: ' . $totalWritten);
$output->writeln('<info>Ingest completed.</info>');
return Command::SUCCESS;
}