MtoRagSystem/python/vector/vector_service.py

#!/usr/bin/env python3

import json
from pathlib import Path
from typing import Any, List, Optional, Dict

import numpy as np
import faiss
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer


# ============================================================
# Paths
# ============================================================

BASE_PATH = Path(__file__).resolve().parents[2]
KNOWLEDGE_DIR = BASE_PATH / "var" / "knowledge"

CHUNK_INDEX_PATH = KNOWLEDGE_DIR / "vector.index"
CHUNK_MAP_PATH = KNOWLEDGE_DIR / "vector.index.meta.json"

TAG_INDEX_PATH = KNOWLEDGE_DIR / "vector_tags.index"
TAG_MAP_PATH = KNOWLEDGE_DIR / "vector_tags.index.meta.json"

INDEX_META_PATH = KNOWLEDGE_DIR / "index_meta.json"
INDEX_NDJSON_PATH = KNOWLEDGE_DIR / "index.ndjson"


# ============================================================
# FastAPI
# ============================================================

app = FastAPI()

model: Optional[SentenceTransformer] = None
chunk_index = None
chunk_ids: Optional[List[Any]] = None
chunk_doc_map: Dict[str, str] = {}

tag_index = None
tag_ids: Optional[List[Any]] = None

loaded_embedding_model_name: Optional[str] = None


# ============================================================
# Models
# ============================================================

class SearchRequest(BaseModel):
    query: str
    limit: int = 8
    doc_ids: Optional[List[str]] = None  # NEW


# ============================================================
# Loader
# ============================================================

def load_chunk_doc_map():
    global chunk_doc_map

    chunk_doc_map = {}

    if not INDEX_NDJSON_PATH.exists():
        return

    with INDEX_NDJSON_PATH.open("r", encoding="utf-8") as f:
        for line in f:
            try:
                row = json.loads(line)
            except Exception:
                continue

            chunk_id = row.get("chunk_id")
            document_id = row.get("document_id")

            if isinstance(chunk_id, str) and isinstance(document_id, str):
                chunk_doc_map[chunk_id] = document_id


def load_all():
    global model, chunk_index, chunk_ids, tag_index, tag_ids, loaded_embedding_model_name

    if not INDEX_META_PATH.exists():
        raise RuntimeError("index_meta.json not found")

    meta = json.loads(INDEX_META_PATH.read_text())
    embedding_model_name = meta.get("embedding_model")

    if not embedding_model_name:
        raise RuntimeError("embedding_model missing in index_meta.json")

    # Reload model only if changed
    if model is None or embedding_model_name != loaded_embedding_model_name:
        print(f"[Reload] Loading embedding model: {embedding_model_name}")
        model = SentenceTransformer(embedding_model_name)
        loaded_embedding_model_name = embedding_model_name

    # Reload chunk index
    if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
        print("[Reload] Loading chunk index")
        chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
        chunk_ids = json.loads(CHUNK_MAP_PATH.read_text())
    else:
        chunk_index = None
        chunk_ids = None

    # Load chunk → document map
    print("[Reload] Loading chunk-doc map")
    load_chunk_doc_map()

    # Reload tag index
    if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
        print("[Reload] Loading tag index")
        tag_index = faiss.read_index(str(TAG_INDEX_PATH))
        tag_ids = json.loads(TAG_MAP_PATH.read_text())
    else:
        tag_index = None
        tag_ids = None

    print("[Reload] Completed")


# ============================================================
# Startup
# ============================================================

@app.on_event("startup")
def startup_event():
    load_all()
    print("[VectorService] Ready")


# ============================================================
# Endpoints
# ============================================================

@app.get("/health")
def health():
    return {
        "status": "ok",
        "chunk_index_loaded": chunk_index is not None,
        "tag_index_loaded": tag_index is not None,
        "model_loaded": model is not None,
    }


@app.post("/reload")
def reload():
    try:
        load_all()
        return {"status": "reloaded"}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/search-chunks")
def search_chunks(req: SearchRequest):
    if chunk_index is None or chunk_ids is None:
        raise HTTPException(status_code=503, detail="Chunk index not available")

    query_vec = model.encode([req.query], normalize_embeddings=True)
    query_vec = np.array(query_vec).astype("float32")

    # Wenn doc_ids gesetzt sind → mehr holen, dann filtern
    effective_limit = req.limit
    if req.doc_ids:
        effective_limit = max(req.limit * 5, 50)

    scores, indices = chunk_index.search(query_vec, effective_limit)

    results = []

    for score, idx in zip(scores[0], indices[0]):
        if idx == -1:
            continue
        if idx < 0 or idx >= len(chunk_ids):
            continue

        chunk_id = chunk_ids[idx]

        # NEW: doc-scoped filter
        if req.doc_ids:
            doc_id = chunk_doc_map.get(chunk_id)
            if doc_id not in req.doc_ids:
                continue

        results.append({
            "chunk_id": chunk_id,
            "score": float(score),
        })

        if len(results) >= req.limit:
            break

    return results


@app.post("/search-tags")
def search_tags(req: SearchRequest):
    if tag_index is None or tag_ids is None:
        raise HTTPException(status_code=503, detail="Tag index not available")

    query_vec = model.encode([req.query], normalize_embeddings=True)
    query_vec = np.array(query_vec).astype("float32")

    scores, indices = tag_index.search(query_vec, req.limit)

    results = []
    for score, idx in zip(scores[0], indices[0]):
        if idx == -1:
            continue
        if idx < 0 or idx >= len(tag_ids):
            continue

        results.append({
            "tag_id": tag_ids[idx],
            "score": float(score),
        })

    return results