new version ndjson
This commit is contained in:
@@ -1,28 +1,25 @@
|
||||
<?php
|
||||
// src/Command/KnowledgeIngestCommand.php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use App\Knowledge\Ingest\KnowledgeIngestService;
|
||||
use App\Entity\DocumentVersion;
|
||||
use App\Entity\User;
|
||||
use App\Ingest\IngestFlow;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputArgument;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Finder\Finder;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'mto:agent:knowledge:ingest',
|
||||
description: 'Ingest one or multiple markdown/text documents into file-based knowledge chunks'
|
||||
)]
|
||||
final class KnowledgeIngestCommand extends Command
|
||||
#[AsCommand(name: 'mto:agent:ingest:version')]
|
||||
class KnowledgeIngestCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly KnowledgeIngestService $ingest,
|
||||
private readonly string $uploadsDir,
|
||||
private readonly IngestFlow $ingestFlow,
|
||||
private readonly EntityManagerInterface $em,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
@@ -30,86 +27,28 @@ final class KnowledgeIngestCommand extends Command
|
||||
protected function configure(): void
|
||||
{
|
||||
$this
|
||||
->addArgument(
|
||||
'file',
|
||||
InputArgument::OPTIONAL,
|
||||
'Path to a single .txt/.md file'
|
||||
)
|
||||
->addOption(
|
||||
'all',
|
||||
null,
|
||||
InputOption::VALUE_NONE,
|
||||
'Ingest all .md files from the uploads directory'
|
||||
)
|
||||
->addOption(
|
||||
'optimize',
|
||||
'o',
|
||||
InputOption::VALUE_NONE,
|
||||
'Optimize chunks for retrieval quality'
|
||||
);
|
||||
->addArgument('versionId', InputArgument::REQUIRED, 'UUID of DocumentVersion')
|
||||
->addArgument('userId', InputArgument::REQUIRED, 'UUID of user triggering ingest');
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$files = [];
|
||||
$optimize = (bool) $input->getOption('optimize');
|
||||
$versionId = $input->getArgument('versionId');
|
||||
$userId = $input->getArgument('userId');
|
||||
|
||||
if ($input->getOption('all')) {
|
||||
if (!is_dir($this->uploadsDir)) {
|
||||
$output->writeln('<error>❌ uploads directory not found</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
$version = $this->em->getRepository(DocumentVersion::class)->find($versionId);
|
||||
$user = $this->em->getRepository(User::class)->find($userId);
|
||||
|
||||
$finder = new Finder();
|
||||
$finder
|
||||
->files()
|
||||
->in($this->uploadsDir)
|
||||
->name('*.md');
|
||||
|
||||
if (!$finder->hasResults()) {
|
||||
$output->writeln('<comment>ℹ️ No .md files found in uploads/</comment>');
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
foreach ($finder as $file) {
|
||||
$files[] = $file->getRealPath();
|
||||
}
|
||||
|
||||
$output->writeln(sprintf(
|
||||
'📂 Ingesting %d markdown files from uploads (%s)',
|
||||
count($files),
|
||||
$optimize ? 'optimized' : 'standard'
|
||||
));
|
||||
} else {
|
||||
$file = $input->getArgument('file');
|
||||
|
||||
if (!$file) {
|
||||
$output->writeln('<error>❌ Either provide a file or use --all</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$files[] = (string) $file;
|
||||
if (!$version || !$user) {
|
||||
$output->writeln('<error>Version or User not found.</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$totalWritten = 0;
|
||||
$output->writeln('Starting ingest...');
|
||||
|
||||
foreach ($files as $filePath) {
|
||||
$output->writeln('➡️ Ingesting: ' . $filePath);
|
||||
$this->ingestFlow->ingestDocumentVersion($version, $user);
|
||||
|
||||
$written = $this->ingest->ingestFile(
|
||||
$filePath,
|
||||
optimize: $optimize
|
||||
);
|
||||
|
||||
$totalWritten += count($written);
|
||||
|
||||
foreach ($written as $chunk) {
|
||||
$output->writeln(' - ' . $chunk);
|
||||
}
|
||||
}
|
||||
|
||||
$output->writeln('');
|
||||
$output->writeln('✅ Total written chunks: ' . $totalWritten);
|
||||
$output->writeln('<info>Ingest completed.</info>');
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user