118 lines
3.2 KiB
Python
118 lines
3.2 KiB
Python
#!/usr/bin/env python3
|
||
|
||
import sys
|
||
import json
|
||
import argparse
|
||
from pathlib import Path
|
||
|
||
|
||
# ---------------------------------------------------------
|
||
# Argument parsing (NEW – CLEAN CLI)
|
||
# ---------------------------------------------------------
|
||
parser = argparse.ArgumentParser(description="FAISS vector search")
|
||
|
||
parser.add_argument("--query", required=True, help="Search query text")
|
||
parser.add_argument("--limit", required=True, type=int, help="Top-K limit")
|
||
parser.add_argument("--index", required=True, help="Path to vector.index")
|
||
parser.add_argument("--meta", required=True, help="Path to vector.index.meta.json")
|
||
parser.add_argument("--model", required=True, help="SentenceTransformer model")
|
||
|
||
args = parser.parse_args()
|
||
|
||
query = args.query
|
||
limit = args.limit
|
||
index_path = Path(args.index).resolve()
|
||
meta_path = Path(args.meta).resolve()
|
||
embedding_model = args.model
|
||
|
||
|
||
# ---------------------------------------------------------
|
||
# Dependency checks (stderr only)
|
||
# ---------------------------------------------------------
|
||
try:
|
||
import faiss # noqa
|
||
except Exception:
|
||
print("Python module 'faiss' not found.", file=sys.stderr)
|
||
sys.exit(10)
|
||
|
||
try:
|
||
from sentence_transformers import SentenceTransformer # noqa
|
||
except Exception:
|
||
print("Python module 'sentence-transformers' not found.", file=sys.stderr)
|
||
sys.exit(11)
|
||
|
||
import faiss
|
||
from sentence_transformers import SentenceTransformer
|
||
|
||
|
||
# ---------------------------------------------------------
|
||
# File checks
|
||
# ---------------------------------------------------------
|
||
if not index_path.is_file():
|
||
print(f"vector.index not found at {index_path}", file=sys.stderr)
|
||
sys.exit(20)
|
||
|
||
if not meta_path.is_file():
|
||
print(f"vector.index.meta.json not found at {meta_path}", file=sys.stderr)
|
||
sys.exit(21)
|
||
|
||
|
||
# ---------------------------------------------------------
|
||
# Load model and index
|
||
# ---------------------------------------------------------
|
||
try:
|
||
model = SentenceTransformer(embedding_model)
|
||
except Exception as e:
|
||
print(f"Failed to load embedding model: {embedding_model}", file=sys.stderr)
|
||
sys.exit(30)
|
||
|
||
try:
|
||
query_vec = model.encode([query], normalize_embeddings=True)
|
||
except Exception:
|
||
print("Embedding encoding failed.", file=sys.stderr)
|
||
sys.exit(31)
|
||
|
||
try:
|
||
index = faiss.read_index(str(index_path))
|
||
except Exception:
|
||
print("Failed to read FAISS index.", file=sys.stderr)
|
||
sys.exit(32)
|
||
|
||
try:
|
||
with open(meta_path, "r", encoding="utf-8") as f:
|
||
ids = json.load(f)
|
||
except Exception:
|
||
print("Failed to read vector meta file.", file=sys.stderr)
|
||
sys.exit(33)
|
||
|
||
|
||
# ---------------------------------------------------------
|
||
# Search
|
||
# ---------------------------------------------------------
|
||
try:
|
||
scores, indices = index.search(query_vec, limit)
|
||
except Exception:
|
||
print("FAISS search failed.", file=sys.stderr)
|
||
sys.exit(40)
|
||
|
||
results = []
|
||
|
||
for score, idx in zip(scores[0], indices[0]):
|
||
if idx == -1:
|
||
continue
|
||
|
||
if idx < 0 or idx >= len(ids):
|
||
continue
|
||
|
||
results.append({
|
||
"chunk_id": ids[idx],
|
||
"score": float(score)
|
||
})
|
||
|
||
|
||
# ---------------------------------------------------------
|
||
# STRICT JSON OUTPUT ONLY
|
||
# ---------------------------------------------------------
|
||
print(json.dumps(results))
|
||
sys.exit(0)
|