Files
MtoRagSystem/src/Vector/VectorIndexBuilder.php
2026-02-22 18:04:53 +01:00

118 lines
3.6 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Vector;
use App\Index\IndexConfigurationProvider;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
final class VectorIndexBuilder
{
private string $pythonBin;
private string $scriptPath;
private string $indexNdjsonPath;
private string $vectorIndexPath;
private string $vectorMetaPath;
private int $timeoutSeconds;
private IndexConfigurationProvider $configurationProvider;
public function __construct(
string $pythonBin,
string $scriptPath,
string $indexNdjsonPath,
string $vectorIndexPath,
int $timeoutSeconds,
IndexConfigurationProvider $configurationProvider
) {
$this->pythonBin = $pythonBin;
$this->scriptPath = $scriptPath;
$this->indexNdjsonPath = $indexNdjsonPath;
$this->vectorIndexPath = $vectorIndexPath;
$this->vectorMetaPath = $vectorIndexPath . '.meta.json';
$this->timeoutSeconds = $timeoutSeconds;
$this->configurationProvider = $configurationProvider;
}
public function rebuildFromNdjson(?string $logPath = null): void
{
$this->assertPreconditions();
if (!is_file($this->indexNdjsonPath) || filesize($this->indexNdjsonPath) === 0) {
@unlink($this->vectorIndexPath);
@unlink($this->vectorMetaPath);
return;
}
$config = $this->configurationProvider->getConfiguration();
$embeddingModel = $config->getEmbeddingModel();
$tmpVectorIndexPath = $this->vectorIndexPath . '.tmp';
$tmpVectorMetaPath = $tmpVectorIndexPath . '.meta.json';
@unlink($tmpVectorIndexPath);
@unlink($tmpVectorMetaPath);
$cmd = [
$this->pythonBin,
$this->scriptPath,
'--index', $this->indexNdjsonPath,
'--out', $tmpVectorIndexPath,
'--model', $embeddingModel,
];
$process = new Process($cmd);
$process->setTimeout($this->timeoutSeconds);
$this->runProcess($process, $logPath);
$this->validateOutputs($tmpVectorIndexPath, $tmpVectorMetaPath);
$this->atomicSwitchPair(
$tmpVectorIndexPath,
$tmpVectorMetaPath
);
}
private function assertPreconditions(): void
{
if (!is_file($this->scriptPath)) {
throw new \RuntimeException('Vector build script not found.');
}
if (!is_file($this->indexNdjsonPath)) {
throw new \RuntimeException('index.ndjson not found.');
}
}
private function validateOutputs(string $tmpIndex, string $tmpMeta): void
{
if (!is_file($tmpIndex) || filesize($tmpIndex) === 0) {
throw new \RuntimeException('Vector index tmp missing or empty');
}
if (!is_file($tmpMeta) || filesize($tmpMeta) === 0) {
throw new \RuntimeException('Vector meta tmp missing or empty');
}
}
private function atomicSwitchPair(string $tmpIndex, string $tmpMeta): void
{
if (!rename($tmpIndex, $this->vectorIndexPath)) {
throw new \RuntimeException('Atomic switch failed for vector index');
}
if (!rename($tmpMeta, $this->vectorMetaPath)) {
throw new \RuntimeException('Atomic switch failed for vector meta');
}
}
private function runProcess(Process $process, ?string $logPath): void
{
$process->run();
if (!$process->isSuccessful()) {
throw new ProcessFailedException($process);
}
}
}