stash light

This commit is contained in:
team 1
2026-02-12 10:03:52 +01:00
parent 5b650a8f28
commit 0bb0c0b42f
51 changed files with 6864 additions and 72 deletions

View File

@@ -0,0 +1,95 @@
<?php
declare(strict_types=1);
namespace App\Index;
/**
* Beschreibt die "Struktur" des Index (nicht den Inhalt).
* Diese Werte müssen bei lokalem Ingest mit index_meta.json kompatibel sein,
* sonst muss ein Global Reindex erzwungen werden.
*/
final class IndexConfiguration
{
public function __construct(
private readonly int $chunkSize,
private readonly int $chunkOverlap,
private readonly string $embeddingModel,
private readonly int $embeddingDimension,
private readonly int $scoringVersion,
private readonly string $indexFormat = 'ndjson', // bindend: 'ndjson'
private readonly string $vectorBackend = 'faiss', // informativ
)
{
if ($this->chunkSize <= 0) {
throw new \InvalidArgumentException('chunkSize must be > 0');
}
if ($this->chunkOverlap < 0) {
throw new \InvalidArgumentException('chunkOverlap must be >= 0');
}
if ($this->chunkOverlap >= $this->chunkSize) {
throw new \InvalidArgumentException('chunkOverlap must be < chunkSize');
}
if ($this->embeddingDimension <= 0) {
throw new \InvalidArgumentException('embeddingDimension must be > 0');
}
if ($this->scoringVersion <= 0) {
throw new \InvalidArgumentException('scoringVersion must be > 0');
}
if ($this->indexFormat !== 'ndjson') {
throw new \InvalidArgumentException('indexFormat must be "ndjson"');
}
}
public function getChunkSize(): int
{
return $this->chunkSize;
}
public function getChunkOverlap(): int
{
return $this->chunkOverlap;
}
public function getEmbeddingModel(): string
{
return $this->embeddingModel;
}
public function getEmbeddingDimension(): int
{
return $this->embeddingDimension;
}
public function getScoringVersion(): int
{
return $this->scoringVersion;
}
public function getIndexFormat(): string
{
return $this->indexFormat;
}
public function getVectorBackend(): string
{
return $this->vectorBackend;
}
/**
* Canonical representation: nur strukturelle Felder (ohne created_at, index_version).
*/
public function toStructureArray(): array
{
return [
'embedding_model' => $this->embeddingModel,
'embedding_dimension' => $this->embeddingDimension,
'chunk_size' => $this->chunkSize,
'chunk_overlap' => $this->chunkOverlap,
'scoring_version' => $this->scoringVersion,
'index_format' => $this->indexFormat,
'vector_backend' => $this->vectorBackend,
];
}
}

View File

@@ -0,0 +1,206 @@
<?php
declare(strict_types=1);
namespace App\Index;
final class IndexMetaManager
{
private string $metaPath;
public function __construct(
string $projectDir,
private readonly IndexConfiguration $config,
string $relativeMetaPath = '/var/knowledge/index_meta.json'
)
{
$this->metaPath = rtrim($projectDir, '/') . $relativeMetaPath;
}
public function getMetaPath(): string
{
return $this->metaPath;
}
/**
* Gibt null zurück, wenn noch kein Meta existiert (frisches System).
*
* @return array<string,mixed>|null
*/
public function readMeta(): ?array
{
if (!is_file($this->metaPath)) {
return null;
}
$raw = file_get_contents($this->metaPath);
if ($raw === false) {
throw new \RuntimeException('Unable to read index_meta.json');
}
$data = json_decode($raw, true);
if (!is_array($data)) {
throw new \RuntimeException('index_meta.json is invalid JSON');
}
return $data;
}
/**
* Erstellt Meta, falls nicht vorhanden (z. B. nach erstem Global Reindex).
* Überschreibt NICHT automatisch, wenn vorhanden.
*
* @return array<string,mixed>
*/
public function createInitialMetaIfMissing(): array
{
$existing = $this->readMeta();
if ($existing !== null) {
return $existing;
}
$meta = $this->buildMetaPayload(indexVersion: 1);
$this->atomicWriteJson($meta);
return $meta;
}
/**
* Guardrail: Prüft, ob die aktuelle Config kompatibel zur gespeicherten Meta ist.
* Wenn nicht: IndexStructureChangedException -> Global Reindex erzwingen.
*/
public function validateAgainstCurrent(): void
{
$meta = $this->readMeta();
// Wenn noch kein Meta existiert, lassen wir lokale Ingests NICHT einfach laufen.
// Governance: Erst Global Reindex erzeugt Meta sauber.
if ($meta === null) {
throw new IndexStructureChangedException(
'index_meta.json missing. Please run a Global Reindex to initialize index structure metadata.',
['reason' => 'missing_meta']
);
}
$expected = $this->config->toStructureArray();
$diff = $this->diffStructure($meta, $expected);
if ($diff !== []) {
throw new IndexStructureChangedException(
'Index structure changed. Global Reindex required.',
$diff
);
}
}
/**
* Wird beim Global Reindex verwendet:
* - index_version++ (oder initialisieren)
* - Meta atomar schreiben
*
* @return array<string,mixed> new meta
*/
public function writeMetaForGlobalReindex(): array
{
$current = $this->readMeta();
$nextVersion = 1;
if (is_array($current) && isset($current['index_version']) && is_int($current['index_version'])) {
$nextVersion = $current['index_version'] + 1;
}
$meta = $this->buildMetaPayload($nextVersion);
$this->atomicWriteJson($meta);
return $meta;
}
public function getConfig(): IndexConfiguration
{
return $this->config;
}
// -------------------------
// Internals
// -------------------------
/**
* @return array<string,mixed>
*/
private function buildMetaPayload(int $indexVersion): array
{
$structure = $this->config->toStructureArray();
return [
'index_version' => $indexVersion,
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
'embedding_model' => $structure['embedding_model'],
'embedding_dimension' => $structure['embedding_dimension'],
'chunk_size' => $structure['chunk_size'],
'chunk_overlap' => $structure['chunk_overlap'],
'scoring_version' => $structure['scoring_version'],
'index_format' => $structure['index_format'],
'vector_backend' => $structure['vector_backend'],
];
}
/**
* @param array<string,mixed> $meta
* @param array<string,mixed> $expected
* @return array<string,mixed> diff
*/
private function diffStructure(array $meta, array $expected): array
{
$diff = [];
foreach ($expected as $key => $value) {
$actual = $meta[$key] ?? null;
if ($actual !== $value) {
$diff[$key] = [
'expected' => $value,
'actual' => $actual,
];
}
}
// index_format ist zwingend
if (($meta['index_format'] ?? null) !== 'ndjson') {
$diff['index_format'] = [
'expected' => 'ndjson',
'actual' => $meta['index_format'] ?? null,
];
}
return $diff;
}
/**
* @param array<string,mixed> $payload
*/
private function atomicWriteJson(array $payload): void
{
$dir = \dirname($this->metaPath);
if (!is_dir($dir) && !mkdir($dir, 0777, true) && !is_dir($dir)) {
throw new \RuntimeException('Unable to create directory: ' . $dir);
}
$tmp = $this->metaPath . '.tmp';
$json = json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
if ($json === false) {
throw new \RuntimeException('Unable to encode index_meta.json');
}
if (file_put_contents($tmp, $json . PHP_EOL) === false) {
throw new \RuntimeException('Unable to write temp meta file');
}
// atomarer Switch
if (!rename($tmp, $this->metaPath)) {
@unlink($tmp);
throw new \RuntimeException('Unable to switch meta file atomically');
}
}
}

View File

@@ -0,0 +1,34 @@
<?php
declare(strict_types=1);
namespace App\Index;
/**
* Wird geworfen, wenn lokale Ingests nicht mehr kompatibel sind
* und ein Global Reindex erzwungen werden muss.
*/
final class IndexStructureChangedException extends \RuntimeException
{
/**
* @param array<string,mixed> $diff
*/
public function __construct(
string $message,
private readonly array $diff = [],
int $code = 0,
?\Throwable $previous = null
)
{
parent::__construct($message, $code, $previous);
}
/**
* @return array<string,mixed>
*/
public function getDiff(): array
{
return $this->diff;
}
}