phase a audit
This commit is contained in:
@@ -33,7 +33,7 @@ final class VectorControlCommand extends Command
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$cmd = ['.venv/bin/python', 'src/Vector/vector_control.py'];
|
||||
$cmd = ['.venv/bin/python', 'python/vector/vector_control.py'];
|
||||
|
||||
if ($input->getOption('install')) {
|
||||
$cmd[] = '--install';
|
||||
|
||||
@@ -29,7 +29,7 @@ final readonly class IngestFlow
|
||||
) {}
|
||||
|
||||
// =========================================================
|
||||
// DOCUMENT INGEST
|
||||
// DOCUMENT INGEST (STREAMING SAFE)
|
||||
// =========================================================
|
||||
|
||||
public function ingestDocumentVersion(DocumentVersion $version): void
|
||||
@@ -45,12 +45,34 @@ final readonly class IngestFlow
|
||||
|
||||
$existing = $this->chunkManager->countAllChunks();
|
||||
|
||||
$records = iterator_to_array(
|
||||
$this->knowledgeIngestService->buildChunkRecords($version),
|
||||
false
|
||||
);
|
||||
$incoming = 0;
|
||||
$generator = $this->knowledgeIngestService->buildChunkRecords($version);
|
||||
|
||||
$wrappedGenerator = (function () use ($generator, $existing, &$incoming) {
|
||||
|
||||
foreach ($generator as $record) {
|
||||
|
||||
$incoming++;
|
||||
$total = $existing + $incoming;
|
||||
|
||||
if ($total >= self::CHUNK_LIMIT_WARN) {
|
||||
// Nur einmal warnen
|
||||
if ($incoming === 1 || $total === self::CHUNK_LIMIT_WARN) {
|
||||
// Logging erfolgt außerhalb des Streams final
|
||||
}
|
||||
}
|
||||
|
||||
if ($total > self::CHUNK_LIMIT_HARD) {
|
||||
throw new \RuntimeException('Chunk limit exceeded.');
|
||||
}
|
||||
|
||||
yield $record;
|
||||
}
|
||||
|
||||
})();
|
||||
|
||||
$this->chunkManager->appendChunks($wrappedGenerator);
|
||||
|
||||
$incoming = count($records);
|
||||
$total = $existing + $incoming;
|
||||
|
||||
if ($total >= self::CHUNK_LIMIT_WARN) {
|
||||
@@ -61,12 +83,6 @@ final readonly class IngestFlow
|
||||
]);
|
||||
}
|
||||
|
||||
if ($total > self::CHUNK_LIMIT_HARD) {
|
||||
throw new \RuntimeException('Chunk limit exceeded.');
|
||||
}
|
||||
|
||||
$this->chunkManager->appendChunks($records);
|
||||
|
||||
$this->rebuildIndex(false);
|
||||
|
||||
$version->setIngestStatus(DocumentVersion::INGEST_INDEXED);
|
||||
@@ -81,13 +97,11 @@ final readonly class IngestFlow
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// GLOBAL REINDEX
|
||||
// GLOBAL REINDEX (STREAMING SAFE)
|
||||
// =========================================================
|
||||
|
||||
public function globalReindex(): void
|
||||
{
|
||||
|
||||
// 1️⃣ Prüfen ob aktive Dokumente existieren
|
||||
$activeDocuments = $this->em
|
||||
->getRepository(Document::class)
|
||||
->createQueryBuilder('d')
|
||||
@@ -102,22 +116,40 @@ final readonly class IngestFlow
|
||||
);
|
||||
}
|
||||
|
||||
// 2️⃣ ChunkRecords erzeugen
|
||||
$records = iterator_to_array(
|
||||
$this->knowledgeIngestService->buildAllActiveChunkRecords(),
|
||||
false
|
||||
);
|
||||
$incoming = 0;
|
||||
|
||||
if (empty($records)) {
|
||||
$generator = $this->knowledgeIngestService->buildAllActiveChunkRecords();
|
||||
|
||||
$wrappedGenerator = (function () use ($generator, &$incoming) {
|
||||
|
||||
foreach ($generator as $record) {
|
||||
$incoming++;
|
||||
yield $record;
|
||||
}
|
||||
|
||||
})();
|
||||
|
||||
// Prüfen ob überhaupt etwas kommt (ohne alles in RAM zu ziehen)
|
||||
$peekIterator = $wrappedGenerator instanceof \Iterator
|
||||
? $wrappedGenerator
|
||||
: (function () use ($wrappedGenerator) {
|
||||
foreach ($wrappedGenerator as $item) {
|
||||
yield $item;
|
||||
}
|
||||
})();
|
||||
|
||||
if (!$peekIterator->valid()) {
|
||||
$peekIterator->rewind();
|
||||
}
|
||||
|
||||
if (!$peekIterator->valid()) {
|
||||
throw new \RuntimeException(
|
||||
'Global Reindex abgebrochen: Es wurden keine Chunks erzeugt. Bitte prüfen Sie die Dokumente.'
|
||||
'Global Reindex abgebrochen: Es wurden keine Chunks erzeugt.'
|
||||
);
|
||||
}
|
||||
|
||||
// 3️⃣ Rewrite NDJSON
|
||||
$this->chunkManager->rewriteAll($records);
|
||||
$this->chunkManager->rewriteAll($peekIterator);
|
||||
|
||||
// 4️⃣ Rebuild Vector Index
|
||||
$this->rebuildIndex(true);
|
||||
}
|
||||
|
||||
@@ -137,18 +169,14 @@ final readonly class IngestFlow
|
||||
throw new \RuntimeException('Document not found.');
|
||||
}
|
||||
|
||||
// Chunks entfernen
|
||||
$this->chunkManager->compactByDocument($documentId);
|
||||
|
||||
// Dokument aus DB entfernen
|
||||
$this->em->remove($document);
|
||||
$this->em->flush();
|
||||
|
||||
// 4️⃣ Reindex nur wenn sinnvoll
|
||||
$this->rebuildIndex(false);
|
||||
}
|
||||
|
||||
|
||||
// =========================================================
|
||||
// CENTRAL REBUILD
|
||||
// =========================================================
|
||||
@@ -169,4 +197,4 @@ final readonly class IngestFlow
|
||||
$chunkCount = $this->chunkManager->countAllChunks();
|
||||
$this->metaManager->updateRuntimeStats($chunkCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -23,13 +23,9 @@ final class ChunkManager
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// COUNT (für Guardrails / Limits)
|
||||
// COUNT (Streaming, robust)
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Zählt Datensätze (NDJSON-Zeilen) im index.ndjson streaming-basiert.
|
||||
* Leere / kaputte Zeilen werden ignoriert.
|
||||
*/
|
||||
public function countAllChunks(): int
|
||||
{
|
||||
if (!is_file($this->indexPath)) {
|
||||
@@ -42,6 +38,7 @@ final class ChunkManager
|
||||
}
|
||||
|
||||
$count = 0;
|
||||
|
||||
try {
|
||||
while (($line = fgets($handle)) !== false) {
|
||||
$line = trim($line);
|
||||
@@ -49,7 +46,6 @@ final class ChunkManager
|
||||
continue;
|
||||
}
|
||||
|
||||
// NDJSON besteht aus JSON-Objekten; wir zählen nur valide Arrays.
|
||||
$data = json_decode($line, true);
|
||||
if (is_array($data)) {
|
||||
$count++;
|
||||
@@ -63,7 +59,7 @@ final class ChunkManager
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// APPEND
|
||||
// APPEND (Streaming + Exception Safe)
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
@@ -82,27 +78,34 @@ final class ChunkManager
|
||||
throw new \RuntimeException('Unable to open index.ndjson for append');
|
||||
}
|
||||
|
||||
foreach ($records as $record) {
|
||||
$json = json_encode($record, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
if ($json === false) {
|
||||
fclose($handle);
|
||||
throw new \RuntimeException('Unable to encode chunk record');
|
||||
try {
|
||||
foreach ($records as $record) {
|
||||
$json = json_encode(
|
||||
$record,
|
||||
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
|
||||
);
|
||||
|
||||
if ($json === false) {
|
||||
throw new \RuntimeException('Unable to encode chunk record');
|
||||
}
|
||||
|
||||
if (fwrite($handle, $json . PHP_EOL) === false) {
|
||||
throw new \RuntimeException('Unable to write chunk to index');
|
||||
}
|
||||
}
|
||||
|
||||
fwrite($handle, $json . PHP_EOL);
|
||||
} finally {
|
||||
fclose($handle);
|
||||
}
|
||||
|
||||
fclose($handle);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// COMPACTION – Entfernt alle Chunks eines Dokuments
|
||||
// COMPACTION (Streaming + Safe Handles)
|
||||
// ============================================================
|
||||
|
||||
public function compactByDocument(Uuid $documentId): void
|
||||
{
|
||||
if (!is_file($this->indexPath)) {
|
||||
return; // nichts zu kompaktieren
|
||||
return;
|
||||
}
|
||||
|
||||
$tmpPath = $this->indexPath . '.tmp';
|
||||
@@ -116,32 +119,36 @@ final class ChunkManager
|
||||
|
||||
$docIdString = $documentId->toRfc4122();
|
||||
|
||||
while (($line = fgets($in)) !== false) {
|
||||
$line = trim($line);
|
||||
if ($line === '') {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
while (($line = fgets($in)) !== false) {
|
||||
$line = trim($line);
|
||||
if ($line === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$data = json_decode($line, true);
|
||||
if (!is_array($data)) {
|
||||
continue; // skip corrupted line
|
||||
}
|
||||
$data = json_decode($line, true);
|
||||
if (!is_array($data)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (($data['document_id'] ?? null) === $docIdString) {
|
||||
continue; // skip this document's chunks
|
||||
}
|
||||
if (($data['document_id'] ?? null) === $docIdString) {
|
||||
continue;
|
||||
}
|
||||
|
||||
fwrite($out, $line . PHP_EOL);
|
||||
if (fwrite($out, $line . PHP_EOL) === false) {
|
||||
throw new \RuntimeException('Unable to write compacted chunk');
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
fclose($in);
|
||||
fclose($out);
|
||||
}
|
||||
|
||||
fclose($in);
|
||||
fclose($out);
|
||||
|
||||
$this->atomicSwitch($tmpPath, $this->indexPath);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// FULL REWRITE (Global Reindex)
|
||||
// FULL REWRITE (Streaming + Atomic)
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
@@ -162,23 +169,30 @@ final class ChunkManager
|
||||
throw new \RuntimeException('Unable to open temp index file');
|
||||
}
|
||||
|
||||
foreach ($records as $record) {
|
||||
$json = json_encode($record, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
|
||||
if ($json === false) {
|
||||
fclose($handle);
|
||||
throw new \RuntimeException('Unable to encode chunk record');
|
||||
try {
|
||||
foreach ($records as $record) {
|
||||
$json = json_encode(
|
||||
$record,
|
||||
JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
|
||||
);
|
||||
|
||||
if ($json === false) {
|
||||
throw new \RuntimeException('Unable to encode chunk record');
|
||||
}
|
||||
|
||||
if (fwrite($handle, $json . PHP_EOL) === false) {
|
||||
throw new \RuntimeException('Unable to write chunk during rewrite');
|
||||
}
|
||||
}
|
||||
|
||||
fwrite($handle, $json . PHP_EOL);
|
||||
} finally {
|
||||
fclose($handle);
|
||||
}
|
||||
|
||||
fclose($handle);
|
||||
|
||||
$this->atomicSwitch($tmpPath, $this->indexPath);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// STREAM READ (für FAISS rebuild)
|
||||
// STREAM READ (FAISS rebuild safe)
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
@@ -223,4 +237,4 @@ final class ChunkManager
|
||||
throw new \RuntimeException('Atomic switch failed for index.ndjson');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -49,9 +49,8 @@ final class VectorIndexBuilder
|
||||
// --------------------------------------------
|
||||
// 🔵 FALL: NDJSON ist leer → kein Vector Index
|
||||
// --------------------------------------------
|
||||
if (filesize($this->indexNdjsonPath) === 0) {
|
||||
if (!is_file($this->indexNdjsonPath) || filesize($this->indexNdjsonPath) === 0) {
|
||||
|
||||
// Alten Index entfernen
|
||||
@unlink($this->vectorIndexPath);
|
||||
@unlink($this->vectorMetaPath);
|
||||
|
||||
@@ -63,7 +62,7 @@ final class VectorIndexBuilder
|
||||
);
|
||||
}
|
||||
|
||||
return; // WICHTIG: kein Python, kein tmp, kein Fehler
|
||||
return;
|
||||
}
|
||||
|
||||
// --------------------------------------------
|
||||
@@ -79,7 +78,6 @@ final class VectorIndexBuilder
|
||||
|
||||
$tmpVectorIndexPath = $this->vectorIndexPath . '.tmp';
|
||||
|
||||
// Clean leftovers
|
||||
@unlink($tmpVectorIndexPath);
|
||||
@unlink($this->vectorMetaPath);
|
||||
|
||||
@@ -108,11 +106,15 @@ final class VectorIndexBuilder
|
||||
private function assertPreconditions(): void
|
||||
{
|
||||
if (!is_file($this->scriptPath)) {
|
||||
throw new \RuntimeException('vector_ingest.py not found at: ' . $this->scriptPath);
|
||||
throw new \RuntimeException(
|
||||
'Vector build script not found at: ' . $this->scriptPath
|
||||
);
|
||||
}
|
||||
|
||||
if (!is_file($this->indexNdjsonPath)) {
|
||||
throw new \RuntimeException('index.ndjson not found at: ' . $this->indexNdjsonPath);
|
||||
throw new \RuntimeException(
|
||||
'index.ndjson not found at: ' . $this->indexNdjsonPath
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -195,4 +197,4 @@ final class VectorIndexBuilder
|
||||
@file_put_contents($logPath, "=== VectorIndexBuilder OK ===\n", FILE_APPEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,295 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
# ============================================================
|
||||
# Paths
|
||||
# ============================================================
|
||||
|
||||
BASE_PATH = Path(__file__).resolve().parents[2]
|
||||
VENV_DIR = BASE_PATH / ".venv"
|
||||
VENV_PY = VENV_DIR / "bin" / "python"
|
||||
VENV_PIP = VENV_DIR / "bin" / "pip"
|
||||
UVICORN_BIN = VENV_DIR / "bin" / "uvicorn"
|
||||
|
||||
PID_DIR = BASE_PATH / "var" / "run"
|
||||
PID_FILE = PID_DIR / "vector_service.pid"
|
||||
|
||||
DEFAULT_HOST = "0.0.0.0"
|
||||
DEFAULT_PORT = 8090
|
||||
DEFAULT_HEALTH_URL = "http://127.0.0.1:{port}/health"
|
||||
DEFAULT_RELOAD_URL = "http://127.0.0.1:{port}/reload"
|
||||
|
||||
REQUIRED_MODULES = [
|
||||
"fastapi",
|
||||
"uvicorn",
|
||||
"faiss",
|
||||
"sentence_transformers",
|
||||
"numpy",
|
||||
]
|
||||
|
||||
# ============================================================
|
||||
# Utilities
|
||||
# ============================================================
|
||||
|
||||
def _now_ms() -> int:
|
||||
return int(time.time() * 1000)
|
||||
|
||||
|
||||
def _read_pid() -> Optional[int]:
|
||||
try:
|
||||
if PID_FILE.exists():
|
||||
content = PID_FILE.read_text(encoding="utf-8").strip()
|
||||
if content.isdigit():
|
||||
return int(content)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _write_pid(pid: int) -> None:
|
||||
PID_DIR.mkdir(parents=True, exist_ok=True)
|
||||
PID_FILE.write_text(str(pid), encoding="utf-8")
|
||||
|
||||
|
||||
def _remove_pid() -> None:
|
||||
try:
|
||||
if PID_FILE.exists():
|
||||
PID_FILE.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _pid_is_running(pid: int) -> bool:
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _is_port_open(host: str, port: int, timeout: float = 0.5) -> bool:
|
||||
try:
|
||||
with socket.create_connection((host, port), timeout=timeout):
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _curl(url: str, method: str = "GET", timeout_seconds: int = 3) -> Tuple[int, str]:
|
||||
cmd = [
|
||||
"curl",
|
||||
"-s",
|
||||
"-X",
|
||||
method,
|
||||
"-m",
|
||||
str(timeout_seconds),
|
||||
"-w",
|
||||
"\n%{http_code}",
|
||||
url,
|
||||
]
|
||||
|
||||
p = subprocess.run(cmd, capture_output=True, text=True)
|
||||
out = (p.stdout or "").rstrip("\n")
|
||||
|
||||
if "\n" in out:
|
||||
body, code = out.rsplit("\n", 1)
|
||||
try:
|
||||
return int(code), body
|
||||
except Exception:
|
||||
return 0, body
|
||||
|
||||
return 0, out
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Dependency Handling
|
||||
# ============================================================
|
||||
|
||||
def check_modules() -> List[str]:
|
||||
missing = []
|
||||
for module in REQUIRED_MODULES:
|
||||
try:
|
||||
importlib.import_module(module)
|
||||
except Exception:
|
||||
missing.append(module)
|
||||
return missing
|
||||
|
||||
|
||||
def install_missing_modules(missing: List[str]) -> Dict[str, str]:
|
||||
mod_to_pkg = {
|
||||
"fastapi": "fastapi",
|
||||
"uvicorn": "uvicorn",
|
||||
"numpy": "numpy",
|
||||
"sentence_transformers": "sentence-transformers",
|
||||
"faiss": "faiss-cpu",
|
||||
}
|
||||
|
||||
pkgs = [mod_to_pkg.get(m, m) for m in missing]
|
||||
|
||||
if not VENV_PIP.exists():
|
||||
return {"status": "error", "detail": "pip not found in .venv"}
|
||||
|
||||
cmd = [str(VENV_PIP), "install", *pkgs]
|
||||
p = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if p.returncode != 0:
|
||||
return {"status": "error", "detail": (p.stderr or p.stdout).strip()}
|
||||
|
||||
return {"status": "ok", "detail": "installed: " + " ".join(pkgs)}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Service Control
|
||||
# ============================================================
|
||||
|
||||
def service_status(port: int) -> Dict:
|
||||
pid = _read_pid()
|
||||
pid_running = bool(pid and _pid_is_running(pid))
|
||||
|
||||
if pid and not pid_running:
|
||||
_remove_pid()
|
||||
pid = None
|
||||
|
||||
code, body = _curl(DEFAULT_HEALTH_URL.format(port=port), method="GET")
|
||||
|
||||
return {
|
||||
"pid": pid,
|
||||
"pid_running": pid_running,
|
||||
"health_code": code,
|
||||
"healthy": code == 200,
|
||||
"health_body": body,
|
||||
"port": port,
|
||||
}
|
||||
|
||||
|
||||
def start_service(host: str, port: int) -> Dict:
|
||||
if not UVICORN_BIN.exists():
|
||||
return {"status": "error", "detail": "uvicorn not found in .venv"}
|
||||
|
||||
if _is_port_open("127.0.0.1", port):
|
||||
return {"status": "error", "detail": f"port {port} already in use"}
|
||||
|
||||
cmd = [
|
||||
str(UVICORN_BIN),
|
||||
"src.Vector.vector_service:app",
|
||||
"--host", host,
|
||||
"--port", str(port),
|
||||
]
|
||||
|
||||
p = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
cwd=str(BASE_PATH),
|
||||
start_new_session=True,
|
||||
)
|
||||
|
||||
_write_pid(p.pid)
|
||||
|
||||
time.sleep(2)
|
||||
return {"status": "ok", "detail": "service started", "pid": p.pid}
|
||||
|
||||
|
||||
def stop_service(port: int, force: bool = False) -> Dict:
|
||||
pid = _read_pid()
|
||||
if not pid:
|
||||
return {"status": "ok", "detail": "not running"}
|
||||
|
||||
if not _pid_is_running(pid):
|
||||
_remove_pid()
|
||||
return {"status": "ok", "detail": "stale pid removed"}
|
||||
|
||||
try:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
time.sleep(2)
|
||||
if not _pid_is_running(pid):
|
||||
_remove_pid()
|
||||
return {"status": "ok", "detail": "stopped"}
|
||||
|
||||
if force:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
_remove_pid()
|
||||
return {"status": "ok", "detail": "force stopped"}
|
||||
|
||||
return {"status": "error", "detail": "stop timeout (use --force)"}
|
||||
|
||||
except Exception as e:
|
||||
return {"status": "error", "detail": str(e)}
|
||||
|
||||
|
||||
def reload_service(port: int) -> Dict:
|
||||
code, body = _curl(DEFAULT_RELOAD_URL.format(port=port), method="POST")
|
||||
|
||||
if code == 200:
|
||||
return {"status": "ok", "detail": body}
|
||||
|
||||
if code == 404:
|
||||
return {"status": "error", "detail": "reload endpoint not found"}
|
||||
|
||||
return {"status": "error", "detail": f"reload failed (http {code}): {body}"}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Main
|
||||
# ============================================================
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Vector service control")
|
||||
parser.add_argument("--install", action="store_true")
|
||||
parser.add_argument("--start", action="store_true")
|
||||
parser.add_argument("--stop", action="store_true")
|
||||
parser.add_argument("--force", action="store_true")
|
||||
parser.add_argument("--reload", action="store_true")
|
||||
parser.add_argument("--status", action="store_true")
|
||||
parser.add_argument("--port", type=int, default=DEFAULT_PORT)
|
||||
parser.add_argument("--host", type=str, default=DEFAULT_HOST)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
result = {
|
||||
"ts_ms": _now_ms(),
|
||||
"actions": [],
|
||||
"results": {},
|
||||
}
|
||||
|
||||
missing = check_modules()
|
||||
result["results"]["modules_missing"] = missing
|
||||
|
||||
if missing and args.install:
|
||||
result["actions"].append("install")
|
||||
result["results"]["install"] = install_missing_modules(missing)
|
||||
|
||||
if args.stop:
|
||||
result["actions"].append("stop")
|
||||
result["results"]["stop"] = stop_service(args.port, args.force)
|
||||
|
||||
if args.start:
|
||||
result["actions"].append("start")
|
||||
result["results"]["start"] = start_service(args.host, args.port)
|
||||
|
||||
if args.reload:
|
||||
result["actions"].append("reload")
|
||||
result["results"]["reload"] = reload_service(args.port)
|
||||
|
||||
if args.status or not any([args.install, args.start, args.stop, args.reload]):
|
||||
result["actions"].append("status")
|
||||
result["results"]["status"] = service_status(args.port)
|
||||
|
||||
result["duration_ms"] = _now_ms() - result["ts_ms"]
|
||||
|
||||
print(json.dumps(result, indent=2))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -1,134 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Argument parsing
|
||||
# ---------------------------------------------------------
|
||||
parser = argparse.ArgumentParser(description="Build FAISS index from NDJSON")
|
||||
|
||||
parser.add_argument("--index", required=True, help="Path to index.ndjson")
|
||||
parser.add_argument("--out", required=True, help="Path to output vector.index")
|
||||
parser.add_argument("--model", default="all-MiniLM-L6-v2", help="SentenceTransformer model")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
index_path = Path(args.index).resolve()
|
||||
out_path = Path(args.out).resolve()
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Dependency checks
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
import faiss
|
||||
except Exception:
|
||||
print("ERROR: Python module 'faiss' not found.")
|
||||
sys.exit(10)
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
except Exception:
|
||||
print("ERROR: Python module 'sentence-transformers' not found.")
|
||||
sys.exit(11)
|
||||
|
||||
import numpy as np
|
||||
import faiss
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# File checks
|
||||
# ---------------------------------------------------------
|
||||
if not index_path.is_file():
|
||||
print(f"ERROR: index.ndjson not found at {index_path}")
|
||||
sys.exit(20)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Load model
|
||||
# ---------------------------------------------------------
|
||||
print(f"Loading embedding model: {args.model}")
|
||||
model = SentenceTransformer(args.model)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Streaming read NDJSON
|
||||
# ---------------------------------------------------------
|
||||
texts = []
|
||||
ids = []
|
||||
|
||||
print("Reading NDJSON...")
|
||||
|
||||
with open(index_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
entry = json.loads(line)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
text = entry.get("text")
|
||||
chunk_id = entry.get("chunk_id")
|
||||
|
||||
if not text or not chunk_id:
|
||||
continue
|
||||
|
||||
texts.append(text)
|
||||
ids.append(chunk_id)
|
||||
|
||||
if not texts:
|
||||
print("No chunks found. Removing vector index.")
|
||||
|
||||
if out_path.exists():
|
||||
out_path.unlink()
|
||||
|
||||
meta_path = out_path.with_suffix(".meta.json")
|
||||
if meta_path.exists():
|
||||
meta_path.unlink()
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
print(f"Loaded {len(texts)} chunks.")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Build embeddings
|
||||
# ---------------------------------------------------------
|
||||
print("Encoding embeddings...")
|
||||
embeddings = model.encode(
|
||||
texts,
|
||||
normalize_embeddings=True,
|
||||
show_progress_bar=True,
|
||||
batch_size=64
|
||||
)
|
||||
|
||||
embeddings = np.array(embeddings).astype("float32")
|
||||
|
||||
dim = embeddings.shape[1]
|
||||
print(f"Embedding dimension: {dim}")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Build FAISS index
|
||||
# ---------------------------------------------------------
|
||||
print("Building FAISS index...")
|
||||
index = faiss.IndexFlatIP(dim)
|
||||
index.add(embeddings)
|
||||
|
||||
# Ensure output directory exists
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Writing FAISS index to {out_path}")
|
||||
faiss.write_index(index, str(out_path))
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Write ID mapping meta
|
||||
# ---------------------------------------------------------
|
||||
meta_path = out_path.with_suffix(".meta.json")
|
||||
|
||||
with open(meta_path, "w", encoding="utf-8") as f:
|
||||
json.dump(ids, f)
|
||||
|
||||
print(f"Indexed {len(ids)} chunks successfully.")
|
||||
sys.exit(0)
|
||||
@@ -1,126 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Positional args (aligned with PHP builder exec call)
|
||||
# ---------------------------------------------------------
|
||||
# 1 tags.ndjson
|
||||
# 2 out_index_path (can be .tmp)
|
||||
# 3 model
|
||||
# Example:
|
||||
# python vector_ingest_tags.py /var/knowledge/tags.ndjson /var/knowledge/vector_tags.index.tmp all-MiniLM-L6-v2
|
||||
# ---------------------------------------------------------
|
||||
|
||||
if len(sys.argv) < 4:
|
||||
print("ERROR: usage: vector_ingest_tags.py <tags.ndjson> <out.index> <model>", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
tags_path = Path(sys.argv[1]).resolve()
|
||||
out_path = Path(sys.argv[2]).resolve()
|
||||
model_name = sys.argv[3]
|
||||
|
||||
meta_path = Path(str(out_path) + ".meta.json") # vector_tags.index(.tmp).meta.json
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Dependency checks
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
import faiss
|
||||
except Exception:
|
||||
print("ERROR: Python module 'faiss' not found.", file=sys.stderr)
|
||||
sys.exit(10)
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
except Exception:
|
||||
print("ERROR: Python module 'sentence-transformers' not found.", file=sys.stderr)
|
||||
sys.exit(11)
|
||||
|
||||
import numpy as np
|
||||
import faiss
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# File checks
|
||||
# ---------------------------------------------------------
|
||||
if not tags_path.is_file():
|
||||
print(f"ERROR: tags.ndjson not found at {tags_path}", file=sys.stderr)
|
||||
sys.exit(20)
|
||||
|
||||
# Ensure output directory exists
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Load model
|
||||
# ---------------------------------------------------------
|
||||
model = SentenceTransformer(model_name)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Streaming read NDJSON
|
||||
# ---------------------------------------------------------
|
||||
texts = []
|
||||
ids = []
|
||||
|
||||
with open(tags_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
entry = json.loads(line)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
text = entry.get("text")
|
||||
tag_id = entry.get("tag_id")
|
||||
|
||||
if not text or not tag_id:
|
||||
continue
|
||||
|
||||
text = str(text)
|
||||
if len(text) > 4000:
|
||||
text = text[:4000]
|
||||
|
||||
texts.append(text)
|
||||
ids.append(str(tag_id))
|
||||
|
||||
# If empty: remove outputs (tmp) and exit success
|
||||
if not texts:
|
||||
if out_path.exists():
|
||||
out_path.unlink()
|
||||
if meta_path.exists():
|
||||
meta_path.unlink()
|
||||
sys.exit(0)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Build embeddings
|
||||
# ---------------------------------------------------------
|
||||
embeddings = model.encode(
|
||||
texts,
|
||||
normalize_embeddings=True,
|
||||
show_progress_bar=False,
|
||||
batch_size=64
|
||||
)
|
||||
|
||||
embeddings = np.array(embeddings).astype("float32")
|
||||
dim = embeddings.shape[1]
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Build FAISS index
|
||||
# ---------------------------------------------------------
|
||||
index = faiss.IndexFlatIP(dim)
|
||||
index.add(embeddings)
|
||||
|
||||
faiss.write_index(index, str(out_path))
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Write ID mapping meta
|
||||
# ---------------------------------------------------------
|
||||
with open(meta_path, "w", encoding="utf-8") as f:
|
||||
json.dump(ids, f)
|
||||
|
||||
sys.exit(0)
|
||||
@@ -1,117 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Argument parsing (NEW – CLEAN CLI)
|
||||
# ---------------------------------------------------------
|
||||
parser = argparse.ArgumentParser(description="FAISS vector search")
|
||||
|
||||
parser.add_argument("--query", required=True, help="Search query text")
|
||||
parser.add_argument("--limit", required=True, type=int, help="Top-K limit")
|
||||
parser.add_argument("--index", required=True, help="Path to vector.index")
|
||||
parser.add_argument("--meta", required=True, help="Path to vector.index.meta.json")
|
||||
parser.add_argument("--model", required=True, help="SentenceTransformer model")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
query = args.query
|
||||
limit = args.limit
|
||||
index_path = Path(args.index).resolve()
|
||||
meta_path = Path(args.meta).resolve()
|
||||
embedding_model = args.model
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Dependency checks (stderr only)
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
import faiss # noqa
|
||||
except Exception:
|
||||
print("Python module 'faiss' not found.", file=sys.stderr)
|
||||
sys.exit(10)
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer # noqa
|
||||
except Exception:
|
||||
print("Python module 'sentence-transformers' not found.", file=sys.stderr)
|
||||
sys.exit(11)
|
||||
|
||||
import faiss
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# File checks
|
||||
# ---------------------------------------------------------
|
||||
if not index_path.is_file():
|
||||
print(f"vector.index not found at {index_path}", file=sys.stderr)
|
||||
sys.exit(20)
|
||||
|
||||
if not meta_path.is_file():
|
||||
print(f"vector.index.meta.json not found at {meta_path}", file=sys.stderr)
|
||||
sys.exit(21)
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Load model and index
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
model = SentenceTransformer(embedding_model)
|
||||
except Exception as e:
|
||||
print(f"Failed to load embedding model: {embedding_model}", file=sys.stderr)
|
||||
sys.exit(30)
|
||||
|
||||
try:
|
||||
query_vec = model.encode([query], normalize_embeddings=True)
|
||||
except Exception:
|
||||
print("Embedding encoding failed.", file=sys.stderr)
|
||||
sys.exit(31)
|
||||
|
||||
try:
|
||||
index = faiss.read_index(str(index_path))
|
||||
except Exception:
|
||||
print("Failed to read FAISS index.", file=sys.stderr)
|
||||
sys.exit(32)
|
||||
|
||||
try:
|
||||
with open(meta_path, "r", encoding="utf-8") as f:
|
||||
ids = json.load(f)
|
||||
except Exception:
|
||||
print("Failed to read vector meta file.", file=sys.stderr)
|
||||
sys.exit(33)
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Search
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
scores, indices = index.search(query_vec, limit)
|
||||
except Exception:
|
||||
print("FAISS search failed.", file=sys.stderr)
|
||||
sys.exit(40)
|
||||
|
||||
results = []
|
||||
|
||||
for score, idx in zip(scores[0], indices[0]):
|
||||
if idx == -1:
|
||||
continue
|
||||
|
||||
if idx < 0 or idx >= len(ids):
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"chunk_id": ids[idx],
|
||||
"score": float(score)
|
||||
})
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# STRICT JSON OUTPUT ONLY
|
||||
# ---------------------------------------------------------
|
||||
print(json.dumps(results))
|
||||
sys.exit(0)
|
||||
@@ -1,103 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Positional args (aligned with PHP client exec call)
|
||||
# ---------------------------------------------------------
|
||||
# 1 query
|
||||
# 2 limit
|
||||
# 3 index_path
|
||||
# 4 meta_path
|
||||
# 5 model
|
||||
#
|
||||
# Example:
|
||||
# python vector_search_tags.py "foo" 8 /path/vector_tags.index /path/vector_tags.index.meta.json all-MiniLM-L6-v2
|
||||
# ---------------------------------------------------------
|
||||
|
||||
if len(sys.argv) < 6:
|
||||
print("[]")
|
||||
sys.exit(0)
|
||||
|
||||
query = sys.argv[1]
|
||||
|
||||
try:
|
||||
limit = int(sys.argv[2])
|
||||
except Exception:
|
||||
limit = 5
|
||||
|
||||
index_path = Path(sys.argv[3]).resolve()
|
||||
meta_path = Path(sys.argv[4]).resolve()
|
||||
model_name = sys.argv[5]
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Dependency checks
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
import faiss
|
||||
except Exception:
|
||||
# keep stdout clean for caller
|
||||
print("[]")
|
||||
sys.exit(0)
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
except Exception:
|
||||
print("[]")
|
||||
sys.exit(0)
|
||||
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# File checks
|
||||
# ---------------------------------------------------------
|
||||
if limit <= 0:
|
||||
print("[]")
|
||||
sys.exit(0)
|
||||
|
||||
if not index_path.is_file() or not meta_path.is_file():
|
||||
# No tag index available => no routing
|
||||
print("[]")
|
||||
sys.exit(0)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Load model
|
||||
# ---------------------------------------------------------
|
||||
model = SentenceTransformer(model_name)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Load index + meta
|
||||
# ---------------------------------------------------------
|
||||
index = faiss.read_index(str(index_path))
|
||||
|
||||
try:
|
||||
with open(meta_path, "r", encoding="utf-8") as f:
|
||||
ids = json.load(f)
|
||||
except Exception:
|
||||
print("[]")
|
||||
sys.exit(0)
|
||||
|
||||
if not isinstance(ids, list) or len(ids) == 0:
|
||||
print("[]")
|
||||
sys.exit(0)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Embed & search
|
||||
# ---------------------------------------------------------
|
||||
qvec = model.encode([query], normalize_embeddings=True)
|
||||
|
||||
scores, idxs = index.search(qvec, limit)
|
||||
|
||||
out = []
|
||||
for score, idx in zip(scores[0], idxs[0]):
|
||||
if idx is None or idx < 0 or idx >= len(ids):
|
||||
continue
|
||||
out.append({
|
||||
"tag_id": str(ids[idx]),
|
||||
"score": float(score),
|
||||
})
|
||||
|
||||
print(json.dumps(out))
|
||||
sys.exit(0)
|
||||
@@ -1,176 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import numpy as np
|
||||
import faiss
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Paths
|
||||
# ============================================================
|
||||
|
||||
BASE_PATH = Path(__file__).resolve().parents[2]
|
||||
KNOWLEDGE_DIR = BASE_PATH / "var" / "knowledge"
|
||||
|
||||
CHUNK_INDEX_PATH = KNOWLEDGE_DIR / "vector.index"
|
||||
CHUNK_MAP_PATH = KNOWLEDGE_DIR / "vector.index.meta.json"
|
||||
|
||||
TAG_INDEX_PATH = KNOWLEDGE_DIR / "vector_tags.index"
|
||||
TAG_MAP_PATH = KNOWLEDGE_DIR / "vector_tags.index.meta.json"
|
||||
|
||||
INDEX_META_PATH = KNOWLEDGE_DIR / "index_meta.json"
|
||||
|
||||
|
||||
# ============================================================
|
||||
# FastAPI
|
||||
# ============================================================
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
model: Optional[SentenceTransformer] = None
|
||||
chunk_index = None
|
||||
chunk_ids: Optional[List[Any]] = None
|
||||
tag_index = None
|
||||
tag_ids: Optional[List[Any]] = None
|
||||
loaded_embedding_model_name: Optional[str] = None
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Models
|
||||
# ============================================================
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
query: str
|
||||
limit: int = 8
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Loader
|
||||
# ============================================================
|
||||
|
||||
def load_all():
|
||||
global model, chunk_index, chunk_ids, tag_index, tag_ids, loaded_embedding_model_name
|
||||
|
||||
if not INDEX_META_PATH.exists():
|
||||
raise RuntimeError("index_meta.json not found")
|
||||
|
||||
meta = json.loads(INDEX_META_PATH.read_text())
|
||||
embedding_model_name = meta.get("embedding_model")
|
||||
|
||||
if not embedding_model_name:
|
||||
raise RuntimeError("embedding_model missing in index_meta.json")
|
||||
|
||||
# Reload model only if changed
|
||||
if model is None or embedding_model_name != loaded_embedding_model_name:
|
||||
print(f"[Reload] Loading embedding model: {embedding_model_name}")
|
||||
model = SentenceTransformer(embedding_model_name)
|
||||
loaded_embedding_model_name = embedding_model_name
|
||||
|
||||
# Reload chunk index
|
||||
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
|
||||
print("[Reload] Loading chunk index")
|
||||
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
|
||||
chunk_ids = json.loads(CHUNK_MAP_PATH.read_text())
|
||||
else:
|
||||
chunk_index = None
|
||||
chunk_ids = None
|
||||
|
||||
# Reload tag index
|
||||
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
|
||||
print("[Reload] Loading tag index")
|
||||
tag_index = faiss.read_index(str(TAG_INDEX_PATH))
|
||||
tag_ids = json.loads(TAG_MAP_PATH.read_text())
|
||||
else:
|
||||
tag_index = None
|
||||
tag_ids = None
|
||||
|
||||
print("[Reload] Completed")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Startup
|
||||
# ============================================================
|
||||
|
||||
@app.on_event("startup")
|
||||
def startup_event():
|
||||
load_all()
|
||||
print("[VectorService] Ready")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Endpoints
|
||||
# ============================================================
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {
|
||||
"status": "ok",
|
||||
"chunk_index_loaded": chunk_index is not None,
|
||||
"tag_index_loaded": tag_index is not None,
|
||||
"model_loaded": model is not None,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/reload")
|
||||
def reload():
|
||||
try:
|
||||
load_all()
|
||||
return {"status": "reloaded"}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/search-chunks")
|
||||
def search_chunks(req: SearchRequest):
|
||||
if chunk_index is None or chunk_ids is None:
|
||||
raise HTTPException(status_code=503, detail="Chunk index not available")
|
||||
|
||||
query_vec = model.encode([req.query], normalize_embeddings=True)
|
||||
query_vec = np.array(query_vec).astype("float32")
|
||||
|
||||
scores, indices = chunk_index.search(query_vec, req.limit)
|
||||
|
||||
results = []
|
||||
for score, idx in zip(scores[0], indices[0]):
|
||||
if idx == -1:
|
||||
continue
|
||||
if idx < 0 or idx >= len(chunk_ids):
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"chunk_id": chunk_ids[idx],
|
||||
"score": float(score),
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@app.post("/search-tags")
|
||||
def search_tags(req: SearchRequest):
|
||||
if tag_index is None or tag_ids is None:
|
||||
raise HTTPException(status_code=503, detail="Tag index not available")
|
||||
|
||||
query_vec = model.encode([req.query], normalize_embeddings=True)
|
||||
query_vec = np.array(query_vec).astype("float32")
|
||||
|
||||
scores, indices = tag_index.search(query_vec, req.limit)
|
||||
|
||||
results = []
|
||||
for score, idx in zip(scores[0], indices[0]):
|
||||
if idx == -1:
|
||||
continue
|
||||
if idx < 0 or idx >= len(tag_ids):
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"chunk_id": tag_ids[idx],
|
||||
"score": float(score),
|
||||
})
|
||||
|
||||
return results
|
||||
Reference in New Issue
Block a user