stash light

This commit is contained in:
team 1
2026-02-12 10:03:52 +01:00
parent 5b650a8f28
commit 0bb0c0b42f
51 changed files with 6864 additions and 72 deletions

View File

@@ -0,0 +1,164 @@
<?php
declare(strict_types=1);
namespace App\Vector;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
final class VectorIndexBuilder
{
private string $pythonBin;
private string $scriptPath;
private string $indexNdjsonPath;
private string $vectorIndexPath;
private int $timeoutSeconds;
public function __construct(
string $projectDir,
string $pythonBin = 'python3',
string $relativeScriptPath = '/vector/vector_ingest.py',
string $relativeIndexNdjsonPath = '/var/knowledge/index.ndjson',
string $relativeVectorIndexPath = '/var/knowledge/vector.index',
int $timeoutSeconds = 600
)
{
$base = rtrim($projectDir, '/');
$this->pythonBin = $pythonBin;
$this->scriptPath = $base . $relativeScriptPath;
$this->indexNdjsonPath = $base . $relativeIndexNdjsonPath;
$this->vectorIndexPath = $base . $relativeVectorIndexPath;
$this->timeoutSeconds = $timeoutSeconds;
}
public function getIndexNdjsonPath(): string
{
return $this->indexNdjsonPath;
}
public function getVectorIndexPath(): string
{
return $this->vectorIndexPath;
}
public function getScriptPath(): string
{
return $this->scriptPath;
}
/**
* Rebuild FAISS Index deterministisch aus index.ndjson.
*
* Erwartung: Python schreibt in $tmpVectorIndexPath, wir schalten atomar um.
*
* @param string|null $logPath Optional: stdout/stderr dorthin appenden
*/
public function rebuildFromNdjson(?string $logPath = null): void
{
if (!is_file($this->scriptPath)) {
throw new \RuntimeException('vector_ingest.py not found at: ' . $this->scriptPath);
}
if (!is_file($this->indexNdjsonPath)) {
throw new \RuntimeException('index.ndjson not found at: ' . $this->indexNdjsonPath);
}
$dir = \dirname($this->vectorIndexPath);
if (!is_dir($dir) && !mkdir($dir, 0777, true) && !is_dir($dir)) {
throw new \RuntimeException('Unable to create vector index directory: ' . $dir);
}
$tmpVectorIndexPath = $this->vectorIndexPath . '.tmp';
// Vorheriges tmp entfernen (Sicherheit)
if (is_file($tmpVectorIndexPath)) {
@unlink($tmpVectorIndexPath);
}
// ----------------------------
// Python-Aufruf (konservativ)
// ----------------------------
// Wir erwarten/standardisieren (ab jetzt) CLI-Args:
// --index <path-to-index.ndjson>
// --out <path-to-vector.index.tmp>
//
// Falls dein Python-Script aktuell andere Args hat,
// passen wir es im nächsten Schritt konsistent an.
$cmd = [
$this->pythonBin,
$this->scriptPath,
'--index', $this->indexNdjsonPath,
'--out', $tmpVectorIndexPath,
];
$process = new Process($cmd);
$process->setTimeout($this->timeoutSeconds);
$this->runProcess($process, $logPath);
// Python muss tmp erzeugt haben
if (!is_file($tmpVectorIndexPath) || filesize($tmpVectorIndexPath) === 0) {
throw new \RuntimeException('Vector index rebuild failed: tmp output missing or empty: ' . $tmpVectorIndexPath);
}
// Atomarer Switch
$this->atomicSwitch($tmpVectorIndexPath, $this->vectorIndexPath);
}
// -------------------------
// Internals
// -------------------------
private function runProcess(Process $process, ?string $logPath): void
{
if ($logPath !== null) {
$this->appendLog($logPath, "\n=== VectorIndexBuilder START " . (new \DateTimeImmutable())->format(DATE_ATOM) . " ===\n");
$this->appendLog($logPath, "CMD: " . $process->getCommandLine() . "\n");
}
$process->run(function (string $type, string $buffer) use ($logPath) {
if ($logPath === null) {
return;
}
// TYPE: Process::OUT / Process::ERR
$this->appendLog($logPath, $buffer);
});
if (!$process->isSuccessful()) {
if ($logPath !== null) {
$this->appendLog($logPath, "\n=== VectorIndexBuilder FAILED ===\n");
$this->appendLog($logPath, "ExitCode: " . $process->getExitCode() . "\n");
$this->appendLog($logPath, "STDERR:\n" . $process->getErrorOutput() . "\n");
}
throw new ProcessFailedException($process);
}
if ($logPath !== null) {
$this->appendLog($logPath, "\n=== VectorIndexBuilder OK " . (new \DateTimeImmutable())->format(DATE_ATOM) . " ===\n");
}
}
private function appendLog(string $logPath, string $content): void
{
$dir = \dirname($logPath);
if (!is_dir($dir) && !mkdir($dir, 0777, true) && !is_dir($dir)) {
// Wenn Log nicht möglich ist: nicht hart scheitern (Build ist wichtiger)
return;
}
@file_put_contents($logPath, $content, FILE_APPEND);
}
private function atomicSwitch(string $tmp, string $final): void
{
if (!rename($tmp, $final)) {
@unlink($tmp);
throw new \RuntimeException('Atomic switch failed for vector.index');
}
}
}