phase a audit
This commit is contained in:
117
python/vector/vector_search.py
Normal file
117
python/vector/vector_search.py
Normal file
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Argument parsing (NEW – CLEAN CLI)
|
||||
# ---------------------------------------------------------
|
||||
parser = argparse.ArgumentParser(description="FAISS vector search")
|
||||
|
||||
parser.add_argument("--query", required=True, help="Search query text")
|
||||
parser.add_argument("--limit", required=True, type=int, help="Top-K limit")
|
||||
parser.add_argument("--index", required=True, help="Path to vector.index")
|
||||
parser.add_argument("--meta", required=True, help="Path to vector.index.meta.json")
|
||||
parser.add_argument("--model", required=True, help="SentenceTransformer model")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
query = args.query
|
||||
limit = args.limit
|
||||
index_path = Path(args.index).resolve()
|
||||
meta_path = Path(args.meta).resolve()
|
||||
embedding_model = args.model
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Dependency checks (stderr only)
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
import faiss # noqa
|
||||
except Exception:
|
||||
print("Python module 'faiss' not found.", file=sys.stderr)
|
||||
sys.exit(10)
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer # noqa
|
||||
except Exception:
|
||||
print("Python module 'sentence-transformers' not found.", file=sys.stderr)
|
||||
sys.exit(11)
|
||||
|
||||
import faiss
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# File checks
|
||||
# ---------------------------------------------------------
|
||||
if not index_path.is_file():
|
||||
print(f"vector.index not found at {index_path}", file=sys.stderr)
|
||||
sys.exit(20)
|
||||
|
||||
if not meta_path.is_file():
|
||||
print(f"vector.index.meta.json not found at {meta_path}", file=sys.stderr)
|
||||
sys.exit(21)
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Load model and index
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
model = SentenceTransformer(embedding_model)
|
||||
except Exception as e:
|
||||
print(f"Failed to load embedding model: {embedding_model}", file=sys.stderr)
|
||||
sys.exit(30)
|
||||
|
||||
try:
|
||||
query_vec = model.encode([query], normalize_embeddings=True)
|
||||
except Exception:
|
||||
print("Embedding encoding failed.", file=sys.stderr)
|
||||
sys.exit(31)
|
||||
|
||||
try:
|
||||
index = faiss.read_index(str(index_path))
|
||||
except Exception:
|
||||
print("Failed to read FAISS index.", file=sys.stderr)
|
||||
sys.exit(32)
|
||||
|
||||
try:
|
||||
with open(meta_path, "r", encoding="utf-8") as f:
|
||||
ids = json.load(f)
|
||||
except Exception:
|
||||
print("Failed to read vector meta file.", file=sys.stderr)
|
||||
sys.exit(33)
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Search
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
scores, indices = index.search(query_vec, limit)
|
||||
except Exception:
|
||||
print("FAISS search failed.", file=sys.stderr)
|
||||
sys.exit(40)
|
||||
|
||||
results = []
|
||||
|
||||
for score, idx in zip(scores[0], indices[0]):
|
||||
if idx == -1:
|
||||
continue
|
||||
|
||||
if idx < 0 or idx >= len(ids):
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"chunk_id": ids[idx],
|
||||
"score": float(score)
|
||||
})
|
||||
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# STRICT JSON OUTPUT ONLY
|
||||
# ---------------------------------------------------------
|
||||
print(json.dumps(results))
|
||||
sys.exit(0)
|
||||
Reference in New Issue
Block a user