phase a audit

This commit is contained in:
team2
2026-02-22 18:04:53 +01:00
parent b3e9110dd1
commit 3b2e1bc772
10 changed files with 608 additions and 516 deletions

View File

@@ -11,7 +11,8 @@ from pathlib import Path
parser = argparse.ArgumentParser(description="Build FAISS index from NDJSON")
parser.add_argument("--index", required=True, help="Path to index.ndjson")
parser.add_argument("--out", required=True, help="Path to output vector.index")
parser.add_argument("--out", required=True, help="Path to output vector.index (tmp)")
parser.add_argument("--model", default="all-MiniLM-L6-v2", help="SentenceTransformer model")
args = parser.parse_args()
@@ -82,13 +83,7 @@ with open(index_path, "r", encoding="utf-8") as f:
if not texts:
print("No chunks found. Removing vector index.")
if out_path.exists():
out_path.unlink()
meta_path = out_path.with_suffix(".meta.json")
if meta_path.exists():
meta_path.unlink()
# Entferne final erst später in PHP atomar
sys.exit(0)
print(f"Loaded {len(texts)} chunks.")
@@ -119,16 +114,19 @@ index.add(embeddings)
# Ensure output directory exists
out_path.parent.mkdir(parents=True, exist_ok=True)
# ---------------------------------------------------------
# Write FAISS index (tmp)
# ---------------------------------------------------------
print(f"Writing FAISS index to {out_path}")
faiss.write_index(index, str(out_path))
# ---------------------------------------------------------
# Write ID mapping meta
# Write ID mapping meta (tmp)
# ---------------------------------------------------------
meta_path = out_path.with_suffix(".meta.json")
meta_tmp_path = Path(str(out_path) + ".meta.json")
with open(meta_path, "w", encoding="utf-8") as f:
with open(meta_tmp_path, "w", encoding="utf-8") as f:
json.dump(ids, f)
print(f"Indexed {len(ids)} chunks successfully.")
sys.exit(0)
sys.exit(0)