phase a audit

This commit is contained in:
team2
2026-02-22 13:51:45 +01:00
parent 5656a10930
commit b3e9110dd1
14 changed files with 222 additions and 463 deletions

View File

@@ -23,13 +23,9 @@ final class ChunkManager
}
// ============================================================
// COUNT (für Guardrails / Limits)
// COUNT (Streaming, robust)
// ============================================================
/**
* Zählt Datensätze (NDJSON-Zeilen) im index.ndjson streaming-basiert.
* Leere / kaputte Zeilen werden ignoriert.
*/
public function countAllChunks(): int
{
if (!is_file($this->indexPath)) {
@@ -42,6 +38,7 @@ final class ChunkManager
}
$count = 0;
try {
while (($line = fgets($handle)) !== false) {
$line = trim($line);
@@ -49,7 +46,6 @@ final class ChunkManager
continue;
}
// NDJSON besteht aus JSON-Objekten; wir zählen nur valide Arrays.
$data = json_decode($line, true);
if (is_array($data)) {
$count++;
@@ -63,7 +59,7 @@ final class ChunkManager
}
// ============================================================
// APPEND
// APPEND (Streaming + Exception Safe)
// ============================================================
/**
@@ -82,27 +78,34 @@ final class ChunkManager
throw new \RuntimeException('Unable to open index.ndjson for append');
}
foreach ($records as $record) {
$json = json_encode($record, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
if ($json === false) {
fclose($handle);
throw new \RuntimeException('Unable to encode chunk record');
try {
foreach ($records as $record) {
$json = json_encode(
$record,
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
);
if ($json === false) {
throw new \RuntimeException('Unable to encode chunk record');
}
if (fwrite($handle, $json . PHP_EOL) === false) {
throw new \RuntimeException('Unable to write chunk to index');
}
}
fwrite($handle, $json . PHP_EOL);
} finally {
fclose($handle);
}
fclose($handle);
}
// ============================================================
// COMPACTION Entfernt alle Chunks eines Dokuments
// COMPACTION (Streaming + Safe Handles)
// ============================================================
public function compactByDocument(Uuid $documentId): void
{
if (!is_file($this->indexPath)) {
return; // nichts zu kompaktieren
return;
}
$tmpPath = $this->indexPath . '.tmp';
@@ -116,32 +119,36 @@ final class ChunkManager
$docIdString = $documentId->toRfc4122();
while (($line = fgets($in)) !== false) {
$line = trim($line);
if ($line === '') {
continue;
}
try {
while (($line = fgets($in)) !== false) {
$line = trim($line);
if ($line === '') {
continue;
}
$data = json_decode($line, true);
if (!is_array($data)) {
continue; // skip corrupted line
}
$data = json_decode($line, true);
if (!is_array($data)) {
continue;
}
if (($data['document_id'] ?? null) === $docIdString) {
continue; // skip this document's chunks
}
if (($data['document_id'] ?? null) === $docIdString) {
continue;
}
fwrite($out, $line . PHP_EOL);
if (fwrite($out, $line . PHP_EOL) === false) {
throw new \RuntimeException('Unable to write compacted chunk');
}
}
} finally {
fclose($in);
fclose($out);
}
fclose($in);
fclose($out);
$this->atomicSwitch($tmpPath, $this->indexPath);
}
// ============================================================
// FULL REWRITE (Global Reindex)
// FULL REWRITE (Streaming + Atomic)
// ============================================================
/**
@@ -162,23 +169,30 @@ final class ChunkManager
throw new \RuntimeException('Unable to open temp index file');
}
foreach ($records as $record) {
$json = json_encode($record, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
if ($json === false) {
fclose($handle);
throw new \RuntimeException('Unable to encode chunk record');
try {
foreach ($records as $record) {
$json = json_encode(
$record,
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
);
if ($json === false) {
throw new \RuntimeException('Unable to encode chunk record');
}
if (fwrite($handle, $json . PHP_EOL) === false) {
throw new \RuntimeException('Unable to write chunk during rewrite');
}
}
fwrite($handle, $json . PHP_EOL);
} finally {
fclose($handle);
}
fclose($handle);
$this->atomicSwitch($tmpPath, $this->indexPath);
}
// ============================================================
// STREAM READ (für FAISS rebuild)
// STREAM READ (FAISS rebuild safe)
// ============================================================
/**
@@ -223,4 +237,4 @@ final class ChunkManager
throw new \RuntimeException('Atomic switch failed for index.ndjson');
}
}
}
}