optimize auto reload uvicornserver load new vector libs if changed by py

This commit is contained in:
team2
2026-02-26 16:00:24 +01:00
parent 052ff55eda
commit deba7cd06f
4 changed files with 280 additions and 67 deletions

View File

@@ -1,6 +1,10 @@
#!/usr/bin/env python3
import json
import logging
from logging.handlers import RotatingFileHandler
import threading
import time
from pathlib import Path
from typing import Any, List, Optional, Dict
@@ -17,6 +21,8 @@ from sentence_transformers import SentenceTransformer
BASE_PATH = Path(__file__).resolve().parents[2]
KNOWLEDGE_DIR = BASE_PATH / "var" / "knowledge"
LOG_DIR = BASE_PATH / "var" / "log"
LOG_FILE = LOG_DIR / "vector_service.log"
CHUNK_INDEX_PATH = KNOWLEDGE_DIR / "vector.index"
CHUNK_MAP_PATH = KNOWLEDGE_DIR / "vector.index.meta.json"
@@ -25,9 +31,47 @@ TAG_INDEX_PATH = KNOWLEDGE_DIR / "vector_tags.index"
TAG_MAP_PATH = KNOWLEDGE_DIR / "vector_tags.index.meta.json"
INDEX_META_PATH = KNOWLEDGE_DIR / "index_meta.json"
INDEX_RUNTIME_PATH = KNOWLEDGE_DIR / "index_runtime.json"
INDEX_NDJSON_PATH = KNOWLEDGE_DIR / "index.ndjson"
# ============================================================
# Logging
# ============================================================
logger = logging.getLogger("vector_service")
logger.setLevel(logging.INFO)
def setup_logging() -> None:
LOG_DIR.mkdir(parents=True, exist_ok=True)
fmt = logging.Formatter(
fmt="%(asctime)s %(levelname)s %(message)s",
datefmt="%Y-%m-%dT%H:%M:%S%z",
)
# Rotating file
file_handler = RotatingFileHandler(
str(LOG_FILE),
maxBytes=10 * 1024 * 1024, # 10MB
backupCount=5,
encoding="utf-8",
)
file_handler.setFormatter(fmt)
file_handler.setLevel(logging.INFO)
# Console (stdout)
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(fmt)
stream_handler.setLevel(logging.INFO)
# avoid duplicate handlers if uvicorn reloads workers
if not any(isinstance(h, RotatingFileHandler) for h in logger.handlers):
logger.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
logger.addHandler(stream_handler)
# ============================================================
# FastAPI
# ============================================================
@@ -44,6 +88,11 @@ tag_ids: Optional[List[Any]] = None
loaded_embedding_model_name: Optional[str] = None
current_index_version: Optional[int] = None
current_runtime_stamp: Optional[str] = None
reload_lock = threading.Lock()
# ============================================================
# Models
@@ -52,14 +101,24 @@ loaded_embedding_model_name: Optional[str] = None
class SearchRequest(BaseModel):
query: str
limit: int = 8
doc_ids: Optional[List[str]] = None # NEW
doc_ids: Optional[List[str]] = None
# ============================================================
# Loader
# ============================================================
def load_chunk_doc_map():
def _safe_read_json(path: Path) -> Optional[dict]:
try:
if not path.exists():
return None
return json.loads(path.read_text(encoding="utf-8"))
except Exception as e:
logger.warning("Failed to read json %s: %s", str(path), str(e))
return None
def load_chunk_doc_map() -> None:
global chunk_doc_map
chunk_doc_map = {}
@@ -67,61 +126,131 @@ def load_chunk_doc_map():
if not INDEX_NDJSON_PATH.exists():
return
with INDEX_NDJSON_PATH.open("r", encoding="utf-8") as f:
for line in f:
try:
row = json.loads(line)
except Exception:
try:
with INDEX_NDJSON_PATH.open("r", encoding="utf-8") as f:
for line in f:
try:
row = json.loads(line)
except Exception:
continue
chunk_id = row.get("chunk_id")
document_id = row.get("document_id")
if isinstance(chunk_id, str) and isinstance(document_id, str):
chunk_doc_map[chunk_id] = document_id
except Exception as e:
logger.warning("Failed to load chunk-doc map from ndjson: %s", str(e))
def load_all() -> None:
"""
Reload everything deterministically (model + indices + maps),
guarded by reload_lock (thread-safe).
"""
global model, chunk_index, chunk_ids
global tag_index, tag_ids
global loaded_embedding_model_name
global current_index_version
global current_runtime_stamp
with reload_lock:
meta = _safe_read_json(INDEX_META_PATH)
if not isinstance(meta, dict):
raise RuntimeError("index_meta.json not found or invalid")
embedding_model_name = meta.get("embedding_model")
index_version = meta.get("index_version")
if not embedding_model_name:
raise RuntimeError("embedding_model missing in index_meta.json")
# Reload model if needed
if model is None or embedding_model_name != loaded_embedding_model_name:
logger.info("[Reload] Loading embedding model: %s", embedding_model_name)
model = SentenceTransformer(embedding_model_name)
loaded_embedding_model_name = embedding_model_name
# Reload chunk index
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
logger.info("[Reload] Loading chunk index")
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
chunk_ids = _safe_read_json(CHUNK_MAP_PATH) or None
if not isinstance(chunk_ids, list):
chunk_index = None
chunk_ids = None
logger.warning("[Reload] chunk_ids meta invalid -> chunk index disabled")
else:
chunk_index = None
chunk_ids = None
# Load chunk → document map
logger.info("[Reload] Loading chunk-doc map")
load_chunk_doc_map()
# Reload tag index
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
logger.info("[Reload] Loading tag index")
tag_index = faiss.read_index(str(TAG_INDEX_PATH))
tag_ids = _safe_read_json(TAG_MAP_PATH) or None
if not isinstance(tag_ids, list):
tag_index = None
tag_ids = None
logger.warning("[Reload] tag_ids meta invalid -> tag index disabled")
else:
tag_index = None
tag_ids = None
# Runtime stamp (commit marker for tags+chunks)
runtime = _safe_read_json(INDEX_RUNTIME_PATH)
if isinstance(runtime, dict):
v = runtime.get("last_rebuild_at")
current_runtime_stamp = v if isinstance(v, str) else None
else:
current_runtime_stamp = None
current_index_version = index_version if isinstance(index_version, int) else None
logger.info("[Reload] Completed (index_version=%s runtime=%s)", str(current_index_version), str(current_runtime_stamp))
# ============================================================
# Observer (Enterprise Auto Reload)
# ============================================================
def observer_loop() -> None:
global current_index_version
global current_runtime_stamp
while True:
time.sleep(2)
try:
meta = _safe_read_json(INDEX_META_PATH)
if not isinstance(meta, dict):
continue
chunk_id = row.get("chunk_id")
document_id = row.get("document_id")
new_version = meta.get("index_version") if isinstance(meta.get("index_version"), int) else None
if isinstance(chunk_id, str) and isinstance(document_id, str):
chunk_doc_map[chunk_id] = document_id
runtime = _safe_read_json(INDEX_RUNTIME_PATH)
new_runtime = None
if isinstance(runtime, dict):
v = runtime.get("last_rebuild_at")
new_runtime = v if isinstance(v, str) else None
# Structure change (embedding, dim, scoring_version, etc.) -> reload
if new_version != current_index_version:
logger.info("[Observer] index_version changed (%s -> %s) -> Reload", str(current_index_version), str(new_version))
load_all()
continue
def load_all():
global model, chunk_index, chunk_ids, tag_index, tag_ids, loaded_embedding_model_name
# Content change (chunks OR tags) -> reload
if new_runtime != current_runtime_stamp:
logger.info("[Observer] runtime changed (%s -> %s) -> Reload", str(current_runtime_stamp), str(new_runtime))
load_all()
if not INDEX_META_PATH.exists():
raise RuntimeError("index_meta.json not found")
meta = json.loads(INDEX_META_PATH.read_text())
embedding_model_name = meta.get("embedding_model")
if not embedding_model_name:
raise RuntimeError("embedding_model missing in index_meta.json")
# Reload model only if changed
if model is None or embedding_model_name != loaded_embedding_model_name:
print(f"[Reload] Loading embedding model: {embedding_model_name}")
model = SentenceTransformer(embedding_model_name)
loaded_embedding_model_name = embedding_model_name
# Reload chunk index
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
print("[Reload] Loading chunk index")
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
chunk_ids = json.loads(CHUNK_MAP_PATH.read_text())
else:
chunk_index = None
chunk_ids = None
# Load chunk → document map
print("[Reload] Loading chunk-doc map")
load_chunk_doc_map()
# Reload tag index
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
print("[Reload] Loading tag index")
tag_index = faiss.read_index(str(TAG_INDEX_PATH))
tag_ids = json.loads(TAG_MAP_PATH.read_text())
else:
tag_index = None
tag_ids = None
print("[Reload] Completed")
except Exception as e:
logger.error("[Observer ERROR] %s", str(e))
# ============================================================
@@ -130,8 +259,15 @@ def load_all():
@app.on_event("startup")
def startup_event():
setup_logging()
logger.info("[VectorService] Startup")
load_all()
print("[VectorService] Ready")
t = threading.Thread(target=observer_loop, daemon=True)
t.start()
logger.info("[VectorService] Ready (log=%s)", str(LOG_FILE))
# ============================================================
@@ -145,11 +281,18 @@ def health():
"chunk_index_loaded": chunk_index is not None,
"tag_index_loaded": tag_index is not None,
"model_loaded": model is not None,
"index_version": current_index_version,
"runtime_stamp": current_runtime_stamp,
"log_file": str(LOG_FILE),
}
@app.post("/reload")
def reload():
"""
Manual reload endpoint (kept for compatibility with mto:agent:vector:control --reload).
Auto-reload still runs via observer_loop.
"""
try:
load_all()
return {"status": "reloaded"}
@@ -159,13 +302,12 @@ def reload():
@app.post("/search-chunks")
def search_chunks(req: SearchRequest):
if chunk_index is None or chunk_ids is None:
if chunk_index is None or chunk_ids is None or model is None:
raise HTTPException(status_code=503, detail="Chunk index not available")
query_vec = model.encode([req.query], normalize_embeddings=True)
query_vec = np.array(query_vec).astype("float32")
# Wenn doc_ids gesetzt sind → mehr holen, dann filtern
effective_limit = req.limit
if req.doc_ids:
effective_limit = max(req.limit * 5, 50)
@@ -182,7 +324,6 @@ def search_chunks(req: SearchRequest):
chunk_id = chunk_ids[idx]
# NEW: doc-scoped filter
if req.doc_ids:
doc_id = chunk_doc_map.get(chunk_id)
if doc_id not in req.doc_ids:
@@ -201,7 +342,7 @@ def search_chunks(req: SearchRequest):
@app.post("/search-tags")
def search_tags(req: SearchRequest):
if tag_index is None or tag_ids is None:
if tag_index is None or tag_ids is None or model is None:
raise HTTPException(status_code=503, detail="Tag index not available")
query_vec = model.encode([req.query], normalize_embeddings=True)
@@ -210,6 +351,7 @@ def search_tags(req: SearchRequest):
scores, indices = tag_index.search(query_vec, req.limit)
results = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue