Files
MtoRagSystem/python/vector/vector_search.py
2026-02-22 13:51:45 +01:00

118 lines
3.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
import sys
import json
import argparse
from pathlib import Path
# ---------------------------------------------------------
# Argument parsing (NEW CLEAN CLI)
# ---------------------------------------------------------
parser = argparse.ArgumentParser(description="FAISS vector search")
parser.add_argument("--query", required=True, help="Search query text")
parser.add_argument("--limit", required=True, type=int, help="Top-K limit")
parser.add_argument("--index", required=True, help="Path to vector.index")
parser.add_argument("--meta", required=True, help="Path to vector.index.meta.json")
parser.add_argument("--model", required=True, help="SentenceTransformer model")
args = parser.parse_args()
query = args.query
limit = args.limit
index_path = Path(args.index).resolve()
meta_path = Path(args.meta).resolve()
embedding_model = args.model
# ---------------------------------------------------------
# Dependency checks (stderr only)
# ---------------------------------------------------------
try:
import faiss # noqa
except Exception:
print("Python module 'faiss' not found.", file=sys.stderr)
sys.exit(10)
try:
from sentence_transformers import SentenceTransformer # noqa
except Exception:
print("Python module 'sentence-transformers' not found.", file=sys.stderr)
sys.exit(11)
import faiss
from sentence_transformers import SentenceTransformer
# ---------------------------------------------------------
# File checks
# ---------------------------------------------------------
if not index_path.is_file():
print(f"vector.index not found at {index_path}", file=sys.stderr)
sys.exit(20)
if not meta_path.is_file():
print(f"vector.index.meta.json not found at {meta_path}", file=sys.stderr)
sys.exit(21)
# ---------------------------------------------------------
# Load model and index
# ---------------------------------------------------------
try:
model = SentenceTransformer(embedding_model)
except Exception as e:
print(f"Failed to load embedding model: {embedding_model}", file=sys.stderr)
sys.exit(30)
try:
query_vec = model.encode([query], normalize_embeddings=True)
except Exception:
print("Embedding encoding failed.", file=sys.stderr)
sys.exit(31)
try:
index = faiss.read_index(str(index_path))
except Exception:
print("Failed to read FAISS index.", file=sys.stderr)
sys.exit(32)
try:
with open(meta_path, "r", encoding="utf-8") as f:
ids = json.load(f)
except Exception:
print("Failed to read vector meta file.", file=sys.stderr)
sys.exit(33)
# ---------------------------------------------------------
# Search
# ---------------------------------------------------------
try:
scores, indices = index.search(query_vec, limit)
except Exception:
print("FAISS search failed.", file=sys.stderr)
sys.exit(40)
results = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
if idx < 0 or idx >= len(ids):
continue
results.append({
"chunk_id": ids[idx],
"score": float(score)
})
# ---------------------------------------------------------
# STRICT JSON OUTPUT ONLY
# ---------------------------------------------------------
print(json.dumps(results))
sys.exit(0)