118 lines
3.6 KiB
PHP
118 lines
3.6 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Vector;
|
|
|
|
use App\Index\IndexConfigurationProvider;
|
|
use Symfony\Component\Process\Exception\ProcessFailedException;
|
|
use Symfony\Component\Process\Process;
|
|
|
|
final class VectorIndexBuilder
|
|
{
|
|
private string $pythonBin;
|
|
private string $scriptPath;
|
|
private string $indexNdjsonPath;
|
|
private string $vectorIndexPath;
|
|
private string $vectorMetaPath;
|
|
private int $timeoutSeconds;
|
|
|
|
private IndexConfigurationProvider $configurationProvider;
|
|
|
|
public function __construct(
|
|
string $pythonBin,
|
|
string $scriptPath,
|
|
string $indexNdjsonPath,
|
|
string $vectorIndexPath,
|
|
int $timeoutSeconds,
|
|
IndexConfigurationProvider $configurationProvider
|
|
) {
|
|
$this->pythonBin = $pythonBin;
|
|
$this->scriptPath = $scriptPath;
|
|
$this->indexNdjsonPath = $indexNdjsonPath;
|
|
$this->vectorIndexPath = $vectorIndexPath;
|
|
$this->vectorMetaPath = $vectorIndexPath . '.meta.json';
|
|
$this->timeoutSeconds = $timeoutSeconds;
|
|
$this->configurationProvider = $configurationProvider;
|
|
}
|
|
|
|
public function rebuildFromNdjson(?string $logPath = null): void
|
|
{
|
|
$this->assertPreconditions();
|
|
|
|
if (!is_file($this->indexNdjsonPath) || filesize($this->indexNdjsonPath) === 0) {
|
|
@unlink($this->vectorIndexPath);
|
|
@unlink($this->vectorMetaPath);
|
|
return;
|
|
}
|
|
|
|
$config = $this->configurationProvider->getConfiguration();
|
|
$embeddingModel = $config->getEmbeddingModel();
|
|
|
|
$tmpVectorIndexPath = $this->vectorIndexPath . '.tmp';
|
|
$tmpVectorMetaPath = $tmpVectorIndexPath . '.meta.json';
|
|
|
|
@unlink($tmpVectorIndexPath);
|
|
@unlink($tmpVectorMetaPath);
|
|
|
|
$cmd = [
|
|
$this->pythonBin,
|
|
$this->scriptPath,
|
|
'--index', $this->indexNdjsonPath,
|
|
'--out', $tmpVectorIndexPath,
|
|
'--model', $embeddingModel,
|
|
];
|
|
|
|
$process = new Process($cmd);
|
|
$process->setTimeout($this->timeoutSeconds);
|
|
|
|
$this->runProcess($process, $logPath);
|
|
|
|
$this->validateOutputs($tmpVectorIndexPath, $tmpVectorMetaPath);
|
|
|
|
$this->atomicSwitchPair(
|
|
$tmpVectorIndexPath,
|
|
$tmpVectorMetaPath
|
|
);
|
|
}
|
|
|
|
private function assertPreconditions(): void
|
|
{
|
|
if (!is_file($this->scriptPath)) {
|
|
throw new \RuntimeException('Vector build script not found.');
|
|
}
|
|
if (!is_file($this->indexNdjsonPath)) {
|
|
throw new \RuntimeException('index.ndjson not found.');
|
|
}
|
|
}
|
|
|
|
private function validateOutputs(string $tmpIndex, string $tmpMeta): void
|
|
{
|
|
if (!is_file($tmpIndex) || filesize($tmpIndex) === 0) {
|
|
throw new \RuntimeException('Vector index tmp missing or empty');
|
|
}
|
|
if (!is_file($tmpMeta) || filesize($tmpMeta) === 0) {
|
|
throw new \RuntimeException('Vector meta tmp missing or empty');
|
|
}
|
|
}
|
|
|
|
private function atomicSwitchPair(string $tmpIndex, string $tmpMeta): void
|
|
{
|
|
if (!rename($tmpIndex, $this->vectorIndexPath)) {
|
|
throw new \RuntimeException('Atomic switch failed for vector index');
|
|
}
|
|
|
|
if (!rename($tmpMeta, $this->vectorMetaPath)) {
|
|
throw new \RuntimeException('Atomic switch failed for vector meta');
|
|
}
|
|
}
|
|
|
|
private function runProcess(Process $process, ?string $logPath): void
|
|
{
|
|
$process->run();
|
|
|
|
if (!$process->isSuccessful()) {
|
|
throw new ProcessFailedException($process);
|
|
}
|
|
}
|
|
} |