indexNdjsonPath); $vectorExists = is_file($this->vectorIndexPath); $metaExists = is_file($this->vectorMetaPath); $ndjsonChunkCount = 0; if ($ndjsonExists) { $h = @fopen($this->indexNdjsonPath, 'r'); if ($h !== false) { while (($line = fgets($h)) !== false) { $line = trim($line); if ($line === '') { continue; } $data = json_decode($line, true); if (is_array($data) && !empty($data['chunk_id']) && !empty($data['text'])) { $ndjsonChunkCount++; } } fclose($h); } } $vectorChunkCount = 0; if ($metaExists) { $meta = json_decode((string) file_get_contents($this->vectorMetaPath), true); if (is_array($meta)) { $vectorChunkCount = count($meta); } } $status = $this->determineStatus( $ndjsonChunkCount, $vectorExists, $metaExists, $vectorChunkCount ); return [ 'ndjson_exists' => $ndjsonExists, 'ndjson_chunk_count' => $ndjsonChunkCount, 'vector_exists' => $vectorExists, 'meta_exists' => $metaExists, 'vector_chunk_count' => $vectorChunkCount, 'status' => $status, ]; } private function determineStatus( int $ndjsonChunkCount, bool $vectorExists, bool $metaExists, int $vectorChunkCount ): string { if ($ndjsonChunkCount === 0 && !$vectorExists && !$metaExists) { return 'OK_EMPTY'; } if ($ndjsonChunkCount > 0 && $vectorExists && $metaExists && $vectorChunkCount === $ndjsonChunkCount) { return 'OK'; } if ($ndjsonChunkCount === 0 && ($vectorExists || $metaExists)) { return 'INCONSISTENT_STALE_VECTOR'; } if ($ndjsonChunkCount > 0 && (!$vectorExists || !$metaExists)) { return 'INCONSISTENT_MISSING_VECTOR'; } if ($ndjsonChunkCount !== $vectorChunkCount) { return 'INCONSISTENT_COUNT_MISMATCH'; } return 'UNKNOWN'; } }