new version ndjson
This commit is contained in:
@@ -1,28 +1,25 @@
|
||||
<?php
|
||||
// src/Command/KnowledgeIngestCommand.php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use App\Knowledge\Ingest\KnowledgeIngestService;
|
||||
use App\Entity\DocumentVersion;
|
||||
use App\Entity\User;
|
||||
use App\Ingest\IngestFlow;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputArgument;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Finder\Finder;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'mto:agent:knowledge:ingest',
|
||||
description: 'Ingest one or multiple markdown/text documents into file-based knowledge chunks'
|
||||
)]
|
||||
final class KnowledgeIngestCommand extends Command
|
||||
#[AsCommand(name: 'mto:agent:ingest:version')]
|
||||
class KnowledgeIngestCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly KnowledgeIngestService $ingest,
|
||||
private readonly string $uploadsDir,
|
||||
private readonly IngestFlow $ingestFlow,
|
||||
private readonly EntityManagerInterface $em,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
@@ -30,86 +27,28 @@ final class KnowledgeIngestCommand extends Command
|
||||
protected function configure(): void
|
||||
{
|
||||
$this
|
||||
->addArgument(
|
||||
'file',
|
||||
InputArgument::OPTIONAL,
|
||||
'Path to a single .txt/.md file'
|
||||
)
|
||||
->addOption(
|
||||
'all',
|
||||
null,
|
||||
InputOption::VALUE_NONE,
|
||||
'Ingest all .md files from the uploads directory'
|
||||
)
|
||||
->addOption(
|
||||
'optimize',
|
||||
'o',
|
||||
InputOption::VALUE_NONE,
|
||||
'Optimize chunks for retrieval quality'
|
||||
);
|
||||
->addArgument('versionId', InputArgument::REQUIRED, 'UUID of DocumentVersion')
|
||||
->addArgument('userId', InputArgument::REQUIRED, 'UUID of user triggering ingest');
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$files = [];
|
||||
$optimize = (bool) $input->getOption('optimize');
|
||||
$versionId = $input->getArgument('versionId');
|
||||
$userId = $input->getArgument('userId');
|
||||
|
||||
if ($input->getOption('all')) {
|
||||
if (!is_dir($this->uploadsDir)) {
|
||||
$output->writeln('<error>❌ uploads directory not found</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
$version = $this->em->getRepository(DocumentVersion::class)->find($versionId);
|
||||
$user = $this->em->getRepository(User::class)->find($userId);
|
||||
|
||||
$finder = new Finder();
|
||||
$finder
|
||||
->files()
|
||||
->in($this->uploadsDir)
|
||||
->name('*.md');
|
||||
|
||||
if (!$finder->hasResults()) {
|
||||
$output->writeln('<comment>ℹ️ No .md files found in uploads/</comment>');
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
foreach ($finder as $file) {
|
||||
$files[] = $file->getRealPath();
|
||||
}
|
||||
|
||||
$output->writeln(sprintf(
|
||||
'📂 Ingesting %d markdown files from uploads (%s)',
|
||||
count($files),
|
||||
$optimize ? 'optimized' : 'standard'
|
||||
));
|
||||
} else {
|
||||
$file = $input->getArgument('file');
|
||||
|
||||
if (!$file) {
|
||||
$output->writeln('<error>❌ Either provide a file or use --all</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$files[] = (string) $file;
|
||||
if (!$version || !$user) {
|
||||
$output->writeln('<error>Version or User not found.</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$totalWritten = 0;
|
||||
$output->writeln('Starting ingest...');
|
||||
|
||||
foreach ($files as $filePath) {
|
||||
$output->writeln('➡️ Ingesting: ' . $filePath);
|
||||
$this->ingestFlow->ingestDocumentVersion($version, $user);
|
||||
|
||||
$written = $this->ingest->ingestFile(
|
||||
$filePath,
|
||||
optimize: $optimize
|
||||
);
|
||||
|
||||
$totalWritten += count($written);
|
||||
|
||||
foreach ($written as $chunk) {
|
||||
$output->writeln(' - ' . $chunk);
|
||||
}
|
||||
}
|
||||
|
||||
$output->writeln('');
|
||||
$output->writeln('✅ Total written chunks: ' . $totalWritten);
|
||||
$output->writeln('<info>Ingest completed.</info>');
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
@@ -4,86 +4,27 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use App\Vector\VectorIndexBuilder;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'mto:agent:vector:ingest',
|
||||
description: 'Builds the FAISS vector index from index.json'
|
||||
)]
|
||||
final class VectorIngestCommand extends Command
|
||||
#[AsCommand(name: 'mto:agent:vector:rebuild')]
|
||||
class VectorIngestCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly string $vectorDir,
|
||||
private readonly string $projectDir
|
||||
private readonly VectorIndexBuilder $builder
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$vectorDir = rtrim($this->vectorDir, '/');
|
||||
$output->writeln('Rebuilding vector index...');
|
||||
$this->builder->rebuildFromNdjson();
|
||||
$output->writeln('Done.');
|
||||
|
||||
if (!is_dir($vectorDir)) {
|
||||
$output->writeln('<error>Vector directory not found</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$script = $vectorDir . '/vector_ingest.py';
|
||||
|
||||
if (!is_file($script)) {
|
||||
$output->writeln('<error>vector_ingest.py not found</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
// -------------------------------------------------
|
||||
// Enforce venv usage
|
||||
// -------------------------------------------------
|
||||
$venvPython = $vectorDir . '/.venv/bin/python';
|
||||
|
||||
if (!is_file($venvPython)) {
|
||||
$output->writeln('<error>No Python virtual environment found.</error>');
|
||||
$output->writeln('<comment>Run first:</comment>');
|
||||
$output->writeln('<info> php bin/console mto:agent:vector:install</info>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$knowledgeDir = rtrim($this->projectDir, '/') . '/var/knowledge';
|
||||
|
||||
if (!is_dir($knowledgeDir)) {
|
||||
$output->writeln('<error>Knowledge directory not found:</error>');
|
||||
$output->writeln($knowledgeDir);
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$output->writeln('<info>Building FAISS vector index…</info>');
|
||||
$output->writeln(sprintf(
|
||||
'<comment>Vector dir:</comment> %s',
|
||||
$vectorDir
|
||||
));
|
||||
$output->writeln(sprintf(
|
||||
'<comment>Knowledge dir:</comment> %s',
|
||||
$knowledgeDir
|
||||
));
|
||||
|
||||
$cmd = sprintf(
|
||||
'%s %s %s %s 2>&1',
|
||||
escapeshellarg($venvPython),
|
||||
escapeshellarg($script),
|
||||
escapeshellarg($vectorDir),
|
||||
escapeshellarg($knowledgeDir)
|
||||
);
|
||||
|
||||
exec($cmd, $out, $exitCode);
|
||||
|
||||
foreach ($out as $line) {
|
||||
$output->writeln($line);
|
||||
}
|
||||
|
||||
return $exitCode === 0
|
||||
? Command::SUCCESS
|
||||
: Command::FAILURE;
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
|
||||
/**
|
||||
* System requirements (once per environment):
|
||||
* sudo apt update
|
||||
* sudo apt install -y python3-venv python3-pip
|
||||
*/
|
||||
#[AsCommand(
|
||||
name: 'mto:agent:vector:install',
|
||||
description: 'Creates a Python venv and installs vector dependencies'
|
||||
)]
|
||||
final class VectorInstallCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly string $vectorDir
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
if (!is_dir($this->vectorDir)) {
|
||||
$output->writeln('<error>Vector directory not found</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$vectorDir = rtrim($this->vectorDir, '/');
|
||||
$venvDir = $vectorDir . '/.venv';
|
||||
$venvPython = $venvDir . '/bin/python';
|
||||
|
||||
// -------------------------------------------------
|
||||
// 1) Create venv if missing
|
||||
// -------------------------------------------------
|
||||
if (!is_dir($venvDir)) {
|
||||
$output->writeln('<info>Creating Python virtual environment…</info>');
|
||||
|
||||
$cmd = sprintf(
|
||||
'python3 -m venv %s 2>&1',
|
||||
escapeshellarg($venvDir)
|
||||
);
|
||||
|
||||
exec($cmd, $out, $exitCode);
|
||||
|
||||
foreach ($out as $line) {
|
||||
$output->writeln($line);
|
||||
}
|
||||
|
||||
if ($exitCode !== 0 || !is_file($venvPython)) {
|
||||
$output->writeln('');
|
||||
$output->writeln('<error>Failed to create Python virtual environment.</error>');
|
||||
$output->writeln('<comment>Ensure that python3-venv is installed on the system.</comment>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
} else {
|
||||
$output->writeln('<info>Using existing Python virtual environment</info>');
|
||||
}
|
||||
|
||||
// -------------------------------------------------
|
||||
// 2) Ensure pip exists inside venv
|
||||
// -------------------------------------------------
|
||||
$cmd = sprintf(
|
||||
'%s -m pip --version 2>&1',
|
||||
escapeshellarg($venvPython)
|
||||
);
|
||||
|
||||
exec($cmd, $out, $exitCode);
|
||||
|
||||
if ($exitCode !== 0) {
|
||||
$output->writeln('');
|
||||
$output->writeln('<error>The existing virtual environment has no pip.</error>');
|
||||
$output->writeln('<comment>This usually means it was created before python3-pip was installed.</comment>');
|
||||
$output->writeln('<comment>Fix:</comment>');
|
||||
$output->writeln(sprintf('<info> rm -rf %s</info>', $venvDir));
|
||||
$output->writeln('<info> php bin/console mto:agent:vector:install</info>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
// -------------------------------------------------
|
||||
// 3) Install / update dependencies
|
||||
// -------------------------------------------------
|
||||
$output->writeln('<info>Installing vector dependencies…</info>');
|
||||
|
||||
$cmd = sprintf(
|
||||
'%s -m pip install --upgrade faiss-cpu sentence-transformers 2>&1',
|
||||
escapeshellarg($venvPython)
|
||||
);
|
||||
|
||||
exec($cmd, $out, $exitCode);
|
||||
|
||||
foreach ($out as $line) {
|
||||
$output->writeln($line);
|
||||
}
|
||||
|
||||
if ($exitCode !== 0) {
|
||||
$output->writeln('<error>Dependency installation failed</error>');
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$output->writeln('');
|
||||
$output->writeln('<info>Vector dependencies installed successfully</info>');
|
||||
$output->writeln(sprintf('<comment>venv:</comment> %s', $venvDir));
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user