Files
MtoRagSystem/src/Vector/VectorIndexHealthService.php
2026-03-01 16:52:50 +01:00

94 lines
2.7 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Vector;
final readonly class VectorIndexHealthService
{
public function __construct(
private string $indexNdjsonPath,
private string $vectorIndexPath,
private string $vectorMetaPath
) {}
public function check(): array
{
$ndjsonExists = is_file($this->indexNdjsonPath);
$vectorExists = is_file($this->vectorIndexPath);
$metaExists = is_file($this->vectorMetaPath);
$ndjsonChunkCount = 0;
if ($ndjsonExists) {
$h = @fopen($this->indexNdjsonPath, 'r');
if ($h !== false) {
while (($line = fgets($h)) !== false) {
$line = trim($line);
if ($line === '') {
continue;
}
$data = json_decode($line, true);
if (is_array($data) && !empty($data['chunk_id']) && !empty($data['text'])) {
$ndjsonChunkCount++;
}
}
fclose($h);
}
}
$vectorChunkCount = 0;
if ($metaExists) {
$meta = json_decode((string) file_get_contents($this->vectorMetaPath), true);
if (is_array($meta)) {
$vectorChunkCount = count($meta);
}
}
$status = $this->determineStatus(
$ndjsonChunkCount,
$vectorExists,
$metaExists,
$vectorChunkCount
);
return [
'ndjson_exists' => $ndjsonExists,
'ndjson_chunk_count' => $ndjsonChunkCount,
'vector_exists' => $vectorExists,
'meta_exists' => $metaExists,
'vector_chunk_count' => $vectorChunkCount,
'status' => $status,
];
}
private function determineStatus(
int $ndjsonChunkCount,
bool $vectorExists,
bool $metaExists,
int $vectorChunkCount
): string {
if ($ndjsonChunkCount === 0 && !$vectorExists && !$metaExists) {
return 'OK_EMPTY';
}
if ($ndjsonChunkCount > 0 && $vectorExists && $metaExists && $vectorChunkCount === $ndjsonChunkCount) {
return 'OK';
}
if ($ndjsonChunkCount === 0 && ($vectorExists || $metaExists)) {
return 'INCONSISTENT_STALE_VECTOR';
}
if ($ndjsonChunkCount > 0 && (!$vectorExists || !$metaExists)) {
return 'INCONSISTENT_MISSING_VECTOR';
}
if ($ndjsonChunkCount !== $vectorChunkCount) {
return 'INCONSISTENT_COUNT_MISMATCH';
}
return 'UNKNOWN';
}
}