new ingest und profile settings

This commit is contained in:
team 1
2026-02-16 14:38:02 +01:00
parent ece93e4cb4
commit 8666b05570
15 changed files with 655 additions and 199 deletions

View File

@@ -0,0 +1,63 @@
<?php
declare(strict_types=1);
namespace App\Index;
use App\Repository\IngestProfileRepository;
final class IndexConfigurationProvider
{
private IngestProfileRepository $repository;
private int $fallbackChunkSize;
private int $fallbackChunkOverlap;
private string $fallbackEmbeddingModel;
private int $fallbackEmbeddingDimension;
private int $fallbackScoringVersion;
public function __construct(
IngestProfileRepository $repository,
int $fallbackChunkSize,
int $fallbackChunkOverlap,
string $fallbackEmbeddingModel,
int $fallbackEmbeddingDimension,
int $fallbackScoringVersion
) {
$this->repository = $repository;
$this->fallbackChunkSize = $fallbackChunkSize;
$this->fallbackChunkOverlap = $fallbackChunkOverlap;
$this->fallbackEmbeddingModel = $fallbackEmbeddingModel;
$this->fallbackEmbeddingDimension = $fallbackEmbeddingDimension;
$this->fallbackScoringVersion = $fallbackScoringVersion;
}
public function getConfiguration(): IndexConfiguration
{
$active = $this->repository->findActive();
if ($active === null) {
// Fallback auf YAML
return new IndexConfiguration(
$this->fallbackChunkSize,
$this->fallbackChunkOverlap,
$this->fallbackEmbeddingModel,
$this->fallbackEmbeddingDimension,
$this->fallbackScoringVersion,
'ndjson',
'faiss'
);
}
return new IndexConfiguration(
$active->getChunkSize(),
$active->getChunkOverlap(),
$active->getEmbeddingModel(),
$active->getEmbeddingDimension(),
$active->getScoringVersion(),
'ndjson',
'faiss'
);
}
}

View File

@@ -7,157 +7,90 @@ namespace App\Index;
final class IndexMetaManager
{
private string $metaPath;
private IndexConfiguration $config;
private IndexConfigurationProvider $provider;
public function __construct(
string $metaPath,
IndexConfiguration $config
IndexConfigurationProvider $provider
) {
$this->metaPath = $metaPath;
$this->config = $config;
$this->provider = $provider;
}
public function getMetaPath(): string
// -----------------------------------------------------
// Public API
// -----------------------------------------------------
public function ensureExists(): void
{
return $this->metaPath;
if (!is_file($this->metaPath)) {
$this->writeMeta(1);
}
}
/**
* @return array<string,mixed>|null
*/
public function readMeta(): ?array
{
if (!is_file($this->metaPath)) {
return null;
}
$raw = file_get_contents($this->metaPath);
if ($raw === false) {
throw new \RuntimeException('Unable to read index_meta.json');
}
$data = json_decode($raw, true);
if (!is_array($data)) {
throw new \RuntimeException('index_meta.json is invalid JSON');
}
return $data;
return json_decode(
(string) file_get_contents($this->metaPath),
true
);
}
/**
* Guardrail:
* - Wenn Meta fehlt → initialisieren
* - Wenn Struktur driftet → Exception
*/
public function validateAgainstCurrent(): void
{
$meta = $this->readMeta();
$current = $this->provider
->getConfiguration()
->toStructureArray();
if ($meta === null) {
$meta = $this->createInitialMeta();
return;
}
$expected = $this->config->toStructureArray();
$diff = $this->diffStructure($meta, $expected);
if ($diff !== []) {
throw new IndexStructureChangedException(
'Index structure changed. Global Reindex required.',
$diff
);
}
}
/**
* Wird beim Global Reindex aufgerufen
*/
public function writeMetaForGlobalReindex(): array
{
$current = $this->readMeta();
$nextVersion = 1;
if (is_array($current) && isset($current['index_version']) && is_int($current['index_version'])) {
$nextVersion = $current['index_version'] + 1;
}
$meta = $this->buildMetaPayload($nextVersion);
$this->atomicWriteJson($meta);
return $meta;
}
public function getConfig(): IndexConfiguration
{
return $this->config;
}
// ---------------------------------------------------------
// Internals
// ---------------------------------------------------------
private function createInitialMeta(): array
{
$meta = $this->buildMetaPayload(1);
$this->atomicWriteJson($meta);
return $meta;
}
private function buildMetaPayload(int $indexVersion): array
{
$structure = $this->config->toStructureArray();
return [
'index_version' => $indexVersion,
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
'embedding_model' => $structure['embedding_model'],
'embedding_dimension' => $structure['embedding_dimension'],
'chunk_size' => $structure['chunk_size'],
'chunk_overlap' => $structure['chunk_overlap'],
'scoring_version' => $structure['scoring_version'],
'index_format' => $structure['index_format'],
'vector_backend' => $structure['vector_backend'],
];
}
private function diffStructure(array $meta, array $expected): array
{
$diff = [];
foreach ($expected as $key => $value) {
$actual = $meta[$key] ?? null;
if ($actual !== $value) {
$diff[$key] = [
'expected' => $value,
'actual' => $actual,
];
foreach ($current as $key => $value) {
if (($meta[$key] ?? null) !== $value) {
throw new \RuntimeException(
'Index structure changed. Global Reindex required.'
);
}
}
return $diff;
}
private function atomicWriteJson(array $payload): void
public function writeMetaForGlobalReindex(): void
{
$current = $this->readMeta();
$nextVersion = ($current['index_version'] ?? 0) + 1;
$this->writeMeta($nextVersion);
}
public function writeMeta(int $indexVersion): void
{
$config = $this->provider->getConfiguration();
$payload = array_merge(
[
'index_version' => $indexVersion,
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
],
$config->toStructureArray()
);
$dir = dirname($this->metaPath);
if (!is_dir($dir) && !mkdir($dir, 0777, true) && !is_dir($dir)) {
throw new \RuntimeException('Unable to create directory: ' . $dir);
if (!is_dir($dir)) {
mkdir($dir, 0777, true);
}
$tmp = $this->metaPath . '.tmp';
$json = json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
if ($json === false) {
throw new \RuntimeException('Unable to encode index_meta.json');
}
if (file_put_contents($tmp, $json . PHP_EOL) === false) {
throw new \RuntimeException('Unable to write temp meta file');
}
if (!rename($tmp, $this->metaPath)) {
@unlink($tmp);
throw new \RuntimeException('Unable to switch meta file atomically');
}
file_put_contents(
$this->metaPath,
json_encode(
$payload,
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES
)
);
}
}

View File

@@ -0,0 +1,37 @@
<?php
declare(strict_types=1);
namespace App\Index;
final class IndexStructureComparator
{
public function compare(?array $meta, array $profileStructure): array
{
$result = [];
$keys = array_unique(
array_merge(
array_keys($profileStructure),
$meta ? array_keys($meta) : []
)
);
foreach ($keys as $key) {
if (in_array($key, ['index_version', 'created_at'], true)) {
continue;
}
$metaValue = $meta[$key] ?? null;
$profileValue = $profileStructure[$key] ?? null;
$result[$key] = [
'meta' => $metaValue,
'profile' => $profileValue,
'equal' => $metaValue === $profileValue,
];
}
return $result;
}
}