phase a audit

This commit is contained in:
team2
2026-02-22 13:51:45 +01:00
parent 5656a10930
commit b3e9110dd1
14 changed files with 222 additions and 463 deletions

View File

@@ -0,0 +1,117 @@
#!/usr/bin/env python3
import sys
import json
import argparse
from pathlib import Path
# ---------------------------------------------------------
# Argument parsing (NEW CLEAN CLI)
# ---------------------------------------------------------
parser = argparse.ArgumentParser(description="FAISS vector search")
parser.add_argument("--query", required=True, help="Search query text")
parser.add_argument("--limit", required=True, type=int, help="Top-K limit")
parser.add_argument("--index", required=True, help="Path to vector.index")
parser.add_argument("--meta", required=True, help="Path to vector.index.meta.json")
parser.add_argument("--model", required=True, help="SentenceTransformer model")
args = parser.parse_args()
query = args.query
limit = args.limit
index_path = Path(args.index).resolve()
meta_path = Path(args.meta).resolve()
embedding_model = args.model
# ---------------------------------------------------------
# Dependency checks (stderr only)
# ---------------------------------------------------------
try:
import faiss # noqa
except Exception:
print("Python module 'faiss' not found.", file=sys.stderr)
sys.exit(10)
try:
from sentence_transformers import SentenceTransformer # noqa
except Exception:
print("Python module 'sentence-transformers' not found.", file=sys.stderr)
sys.exit(11)
import faiss
from sentence_transformers import SentenceTransformer
# ---------------------------------------------------------
# File checks
# ---------------------------------------------------------
if not index_path.is_file():
print(f"vector.index not found at {index_path}", file=sys.stderr)
sys.exit(20)
if not meta_path.is_file():
print(f"vector.index.meta.json not found at {meta_path}", file=sys.stderr)
sys.exit(21)
# ---------------------------------------------------------
# Load model and index
# ---------------------------------------------------------
try:
model = SentenceTransformer(embedding_model)
except Exception as e:
print(f"Failed to load embedding model: {embedding_model}", file=sys.stderr)
sys.exit(30)
try:
query_vec = model.encode([query], normalize_embeddings=True)
except Exception:
print("Embedding encoding failed.", file=sys.stderr)
sys.exit(31)
try:
index = faiss.read_index(str(index_path))
except Exception:
print("Failed to read FAISS index.", file=sys.stderr)
sys.exit(32)
try:
with open(meta_path, "r", encoding="utf-8") as f:
ids = json.load(f)
except Exception:
print("Failed to read vector meta file.", file=sys.stderr)
sys.exit(33)
# ---------------------------------------------------------
# Search
# ---------------------------------------------------------
try:
scores, indices = index.search(query_vec, limit)
except Exception:
print("FAISS search failed.", file=sys.stderr)
sys.exit(40)
results = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
if idx < 0 or idx >= len(ids):
continue
results.append({
"chunk_id": ids[idx],
"score": float(score)
})
# ---------------------------------------------------------
# STRICT JSON OUTPUT ONLY
# ---------------------------------------------------------
print(json.dumps(results))
sys.exit(0)