new ingest und profile settings

This commit is contained in:
team 1
2026-02-16 14:38:02 +01:00
parent ece93e4cb4
commit 8666b05570
15 changed files with 655 additions and 199 deletions

View File

@@ -4,7 +4,7 @@ declare(strict_types=1);
namespace App\Vector;
use App\Index\IndexConfiguration;
use App\Index\IndexConfigurationProvider;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
@@ -18,7 +18,7 @@ final class VectorIndexBuilder
private string $vectorMetaPath;
private int $timeoutSeconds;
private IndexConfiguration $indexConfiguration;
private IndexConfigurationProvider $configurationProvider;
public function __construct(
string $pythonBin,
@@ -27,7 +27,7 @@ final class VectorIndexBuilder
string $indexMetaPath,
string $vectorIndexPath,
int $timeoutSeconds,
IndexConfiguration $indexConfiguration
IndexConfigurationProvider $configurationProvider
) {
$this->pythonBin = $pythonBin;
$this->scriptPath = $scriptPath;
@@ -36,39 +36,29 @@ final class VectorIndexBuilder
$this->vectorIndexPath = $vectorIndexPath;
$this->vectorMetaPath = $vectorIndexPath . '.meta.json';
$this->timeoutSeconds = $timeoutSeconds;
$this->indexConfiguration = $indexConfiguration;
$this->configurationProvider = $configurationProvider;
}
/**
* Rebuild FAISS Index deterministisch aus index.ndjson.
*/
public function rebuildFromNdjson(?string $logPath = null): void
{
if (!is_file($this->scriptPath)) {
throw new \RuntimeException('vector_ingest.py not found at: ' . $this->scriptPath);
}
if (!is_file($this->indexNdjsonPath)) {
throw new \RuntimeException('index.ndjson not found at: ' . $this->indexNdjsonPath);
}
$this->assertPreconditions();
if (!is_file($this->indexMetaPath)) {
$this->initializeIndexMeta();
}
$indexMeta = json_decode((string) file_get_contents($this->indexMetaPath), true);
$indexMeta = $this->readIndexMeta();
if (!is_array($indexMeta) || empty($indexMeta['embedding_model'])) {
throw new \RuntimeException('Invalid index_meta.json');
}
$embeddingModel = (string) $indexMeta['embedding_model'];
$embeddingModel = $indexMeta['embedding_model'];
$tmpVectorIndexPath = $this->vectorIndexPath . '.tmp';
// Wichtig: Python erzeugt meta basierend auf endgültigem Namen
$finalMetaPath = $this->vectorMetaPath;
$tmpMetaPath = dirname($this->vectorIndexPath) . '/' . basename($this->vectorIndexPath, '.index') . '.index.meta.json';
// Clean leftovers
@unlink($tmpVectorIndexPath);
@unlink($finalMetaPath);
@unlink($this->vectorMetaPath);
$cmd = [
$this->pythonBin,
@@ -80,21 +70,41 @@ final class VectorIndexBuilder
$process = new Process($cmd);
$process->setTimeout($this->timeoutSeconds);
$process->mustRun();
if (!is_file($tmpVectorIndexPath) || filesize($tmpVectorIndexPath) === 0) {
throw new \RuntimeException('Vector index tmp missing or empty');
$this->runProcess($process, $logPath);
$this->validatePythonOutputs($tmpVectorIndexPath);
$this->atomicSwitch($tmpVectorIndexPath);
}
// -----------------------------------------------------
// Internals
// -----------------------------------------------------
private function assertPreconditions(): void
{
if (!is_file($this->scriptPath)) {
throw new \RuntimeException('vector_ingest.py not found at: ' . $this->scriptPath);
}
// Python erzeugt vector.index.meta.json (nicht tmp.meta!)
if (!is_file($this->vectorMetaPath) || filesize($this->vectorMetaPath) === 0) {
throw new \RuntimeException('Vector meta missing or empty');
if (!is_file($this->indexNdjsonPath)) {
throw new \RuntimeException('index.ndjson not found at: ' . $this->indexNdjsonPath);
}
}
private function readIndexMeta(): array
{
$meta = json_decode(
(string) file_get_contents($this->indexMetaPath),
true
);
if (!is_array($meta) || empty($meta['embedding_model'])) {
throw new \RuntimeException('Invalid index_meta.json');
}
// Atomarer Switch für Index
if (!rename($tmpVectorIndexPath, $this->vectorIndexPath)) {
throw new \RuntimeException('Atomic switch failed for vector index');
}
return $meta;
}
private function initializeIndexMeta(): void
@@ -105,14 +115,16 @@ final class VectorIndexBuilder
throw new \RuntimeException('Cannot create knowledge directory');
}
$config = $this->configurationProvider->getConfiguration();
$data = [
'index_version' => 1,
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
'embedding_model' => $this->indexConfiguration->getEmbeddingModel(),
'embedding_dimension' => $this->indexConfiguration->getEmbeddingDimension(),
'chunk_size' => $this->indexConfiguration->getChunkSize(),
'chunk_overlap' => $this->indexConfiguration->getChunkOverlap(),
'scoring_version' => $this->indexConfiguration->getScoringVersion(),
'embedding_model' => $config->getEmbeddingModel(),
'embedding_dimension' => $config->getEmbeddingDimension(),
'chunk_size' => $config->getChunkSize(),
'chunk_overlap' => $config->getChunkOverlap(),
'scoring_version' => $config->getScoringVersion(),
'index_format' => 'ndjson',
'vector_backend' => 'faiss',
];
@@ -123,6 +135,24 @@ final class VectorIndexBuilder
);
}
private function validatePythonOutputs(string $tmpVectorIndexPath): void
{
if (!is_file($tmpVectorIndexPath) || filesize($tmpVectorIndexPath) === 0) {
throw new \RuntimeException('Vector index tmp missing or empty');
}
if (!is_file($this->vectorMetaPath) || filesize($this->vectorMetaPath) === 0) {
throw new \RuntimeException('Vector meta missing or empty');
}
}
private function atomicSwitch(string $tmpVectorIndexPath): void
{
if (!rename($tmpVectorIndexPath, $this->vectorIndexPath)) {
throw new \RuntimeException('Atomic switch failed for vector index');
}
}
private function runProcess(Process $process, ?string $logPath): void
{
if ($logPath !== null) {