#!/usr/bin/env python3 import sys import json import argparse from pathlib import Path # --------------------------------------------------------- # Argument parsing (NEW – CLEAN CLI) # --------------------------------------------------------- parser = argparse.ArgumentParser(description="FAISS vector search") parser.add_argument("--query", required=True, help="Search query text") parser.add_argument("--limit", required=True, type=int, help="Top-K limit") parser.add_argument("--index", required=True, help="Path to vector.index") parser.add_argument("--meta", required=True, help="Path to vector.index.meta.json") parser.add_argument("--model", required=True, help="SentenceTransformer model") args = parser.parse_args() query = args.query limit = args.limit index_path = Path(args.index).resolve() meta_path = Path(args.meta).resolve() embedding_model = args.model # --------------------------------------------------------- # Dependency checks (stderr only) # --------------------------------------------------------- try: import faiss # noqa except Exception: print("Python module 'faiss' not found.", file=sys.stderr) sys.exit(10) try: from sentence_transformers import SentenceTransformer # noqa except Exception: print("Python module 'sentence-transformers' not found.", file=sys.stderr) sys.exit(11) import faiss from sentence_transformers import SentenceTransformer # --------------------------------------------------------- # File checks # --------------------------------------------------------- if not index_path.is_file(): print(f"vector.index not found at {index_path}", file=sys.stderr) sys.exit(20) if not meta_path.is_file(): print(f"vector.index.meta.json not found at {meta_path}", file=sys.stderr) sys.exit(21) # --------------------------------------------------------- # Load model and index # --------------------------------------------------------- try: model = SentenceTransformer(embedding_model) except Exception as e: print(f"Failed to load embedding model: {embedding_model}", file=sys.stderr) sys.exit(30) try: query_vec = model.encode([query], normalize_embeddings=True) except Exception: print("Embedding encoding failed.", file=sys.stderr) sys.exit(31) try: index = faiss.read_index(str(index_path)) except Exception: print("Failed to read FAISS index.", file=sys.stderr) sys.exit(32) try: with open(meta_path, "r", encoding="utf-8") as f: ids = json.load(f) except Exception: print("Failed to read vector meta file.", file=sys.stderr) sys.exit(33) # --------------------------------------------------------- # Search # --------------------------------------------------------- try: scores, indices = index.search(query_vec, limit) except Exception: print("FAISS search failed.", file=sys.stderr) sys.exit(40) results = [] for score, idx in zip(scores[0], indices[0]): if idx == -1: continue if idx < 0 or idx >= len(ids): continue results.append({ "chunk_id": ids[idx], "score": float(score) }) # --------------------------------------------------------- # STRICT JSON OUTPUT ONLY # --------------------------------------------------------- print(json.dumps(results)) sys.exit(0)