stash light
This commit is contained in:
206
src/Index/IndexMetaManager.php
Normal file
206
src/Index/IndexMetaManager.php
Normal file
@@ -0,0 +1,206 @@
|
||||
<?php
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Index;
|
||||
|
||||
final class IndexMetaManager
|
||||
{
|
||||
private string $metaPath;
|
||||
|
||||
public function __construct(
|
||||
string $projectDir,
|
||||
private readonly IndexConfiguration $config,
|
||||
string $relativeMetaPath = '/var/knowledge/index_meta.json'
|
||||
)
|
||||
{
|
||||
$this->metaPath = rtrim($projectDir, '/') . $relativeMetaPath;
|
||||
}
|
||||
|
||||
public function getMetaPath(): string
|
||||
{
|
||||
return $this->metaPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gibt null zurück, wenn noch kein Meta existiert (frisches System).
|
||||
*
|
||||
* @return array<string,mixed>|null
|
||||
*/
|
||||
public function readMeta(): ?array
|
||||
{
|
||||
if (!is_file($this->metaPath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$raw = file_get_contents($this->metaPath);
|
||||
if ($raw === false) {
|
||||
throw new \RuntimeException('Unable to read index_meta.json');
|
||||
}
|
||||
|
||||
$data = json_decode($raw, true);
|
||||
if (!is_array($data)) {
|
||||
throw new \RuntimeException('index_meta.json is invalid JSON');
|
||||
}
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Erstellt Meta, falls nicht vorhanden (z. B. nach erstem Global Reindex).
|
||||
* Überschreibt NICHT automatisch, wenn vorhanden.
|
||||
*
|
||||
* @return array<string,mixed>
|
||||
*/
|
||||
public function createInitialMetaIfMissing(): array
|
||||
{
|
||||
$existing = $this->readMeta();
|
||||
if ($existing !== null) {
|
||||
return $existing;
|
||||
}
|
||||
|
||||
$meta = $this->buildMetaPayload(indexVersion: 1);
|
||||
$this->atomicWriteJson($meta);
|
||||
|
||||
return $meta;
|
||||
}
|
||||
|
||||
/**
|
||||
* Guardrail: Prüft, ob die aktuelle Config kompatibel zur gespeicherten Meta ist.
|
||||
* Wenn nicht: IndexStructureChangedException -> Global Reindex erzwingen.
|
||||
*/
|
||||
public function validateAgainstCurrent(): void
|
||||
{
|
||||
$meta = $this->readMeta();
|
||||
|
||||
// Wenn noch kein Meta existiert, lassen wir lokale Ingests NICHT einfach laufen.
|
||||
// Governance: Erst Global Reindex erzeugt Meta sauber.
|
||||
if ($meta === null) {
|
||||
throw new IndexStructureChangedException(
|
||||
'index_meta.json missing. Please run a Global Reindex to initialize index structure metadata.',
|
||||
['reason' => 'missing_meta']
|
||||
);
|
||||
}
|
||||
|
||||
$expected = $this->config->toStructureArray();
|
||||
|
||||
$diff = $this->diffStructure($meta, $expected);
|
||||
|
||||
if ($diff !== []) {
|
||||
throw new IndexStructureChangedException(
|
||||
'Index structure changed. Global Reindex required.',
|
||||
$diff
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wird beim Global Reindex verwendet:
|
||||
* - index_version++ (oder initialisieren)
|
||||
* - Meta atomar schreiben
|
||||
*
|
||||
* @return array<string,mixed> new meta
|
||||
*/
|
||||
public function writeMetaForGlobalReindex(): array
|
||||
{
|
||||
$current = $this->readMeta();
|
||||
|
||||
$nextVersion = 1;
|
||||
if (is_array($current) && isset($current['index_version']) && is_int($current['index_version'])) {
|
||||
$nextVersion = $current['index_version'] + 1;
|
||||
}
|
||||
|
||||
$meta = $this->buildMetaPayload($nextVersion);
|
||||
$this->atomicWriteJson($meta);
|
||||
|
||||
return $meta;
|
||||
}
|
||||
|
||||
public function getConfig(): IndexConfiguration
|
||||
{
|
||||
return $this->config;
|
||||
}
|
||||
|
||||
// -------------------------
|
||||
// Internals
|
||||
// -------------------------
|
||||
|
||||
/**
|
||||
* @return array<string,mixed>
|
||||
*/
|
||||
private function buildMetaPayload(int $indexVersion): array
|
||||
{
|
||||
$structure = $this->config->toStructureArray();
|
||||
|
||||
return [
|
||||
'index_version' => $indexVersion,
|
||||
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
|
||||
'embedding_model' => $structure['embedding_model'],
|
||||
'embedding_dimension' => $structure['embedding_dimension'],
|
||||
'chunk_size' => $structure['chunk_size'],
|
||||
'chunk_overlap' => $structure['chunk_overlap'],
|
||||
'scoring_version' => $structure['scoring_version'],
|
||||
'index_format' => $structure['index_format'],
|
||||
'vector_backend' => $structure['vector_backend'],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string,mixed> $meta
|
||||
* @param array<string,mixed> $expected
|
||||
* @return array<string,mixed> diff
|
||||
*/
|
||||
private function diffStructure(array $meta, array $expected): array
|
||||
{
|
||||
$diff = [];
|
||||
|
||||
foreach ($expected as $key => $value) {
|
||||
$actual = $meta[$key] ?? null;
|
||||
if ($actual !== $value) {
|
||||
$diff[$key] = [
|
||||
'expected' => $value,
|
||||
'actual' => $actual,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// index_format ist zwingend
|
||||
if (($meta['index_format'] ?? null) !== 'ndjson') {
|
||||
$diff['index_format'] = [
|
||||
'expected' => 'ndjson',
|
||||
'actual' => $meta['index_format'] ?? null,
|
||||
];
|
||||
}
|
||||
|
||||
return $diff;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string,mixed> $payload
|
||||
*/
|
||||
private function atomicWriteJson(array $payload): void
|
||||
{
|
||||
$dir = \dirname($this->metaPath);
|
||||
if (!is_dir($dir) && !mkdir($dir, 0777, true) && !is_dir($dir)) {
|
||||
throw new \RuntimeException('Unable to create directory: ' . $dir);
|
||||
}
|
||||
|
||||
$tmp = $this->metaPath . '.tmp';
|
||||
|
||||
$json = json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
||||
if ($json === false) {
|
||||
throw new \RuntimeException('Unable to encode index_meta.json');
|
||||
}
|
||||
|
||||
if (file_put_contents($tmp, $json . PHP_EOL) === false) {
|
||||
throw new \RuntimeException('Unable to write temp meta file');
|
||||
}
|
||||
|
||||
// atomarer Switch
|
||||
if (!rename($tmp, $this->metaPath)) {
|
||||
@unlink($tmp);
|
||||
throw new \RuntimeException('Unable to switch meta file atomically');
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user