phase a audit
This commit is contained in:
@@ -11,7 +11,8 @@ from pathlib import Path
|
||||
parser = argparse.ArgumentParser(description="Build FAISS index from NDJSON")
|
||||
|
||||
parser.add_argument("--index", required=True, help="Path to index.ndjson")
|
||||
parser.add_argument("--out", required=True, help="Path to output vector.index")
|
||||
parser.add_argument("--out", required=True, help="Path to output vector.index (tmp)")
|
||||
|
||||
parser.add_argument("--model", default="all-MiniLM-L6-v2", help="SentenceTransformer model")
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -82,13 +83,7 @@ with open(index_path, "r", encoding="utf-8") as f:
|
||||
if not texts:
|
||||
print("No chunks found. Removing vector index.")
|
||||
|
||||
if out_path.exists():
|
||||
out_path.unlink()
|
||||
|
||||
meta_path = out_path.with_suffix(".meta.json")
|
||||
if meta_path.exists():
|
||||
meta_path.unlink()
|
||||
|
||||
# Entferne final erst später in PHP atomar
|
||||
sys.exit(0)
|
||||
|
||||
print(f"Loaded {len(texts)} chunks.")
|
||||
@@ -119,16 +114,19 @@ index.add(embeddings)
|
||||
# Ensure output directory exists
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Write FAISS index (tmp)
|
||||
# ---------------------------------------------------------
|
||||
print(f"Writing FAISS index to {out_path}")
|
||||
faiss.write_index(index, str(out_path))
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Write ID mapping meta
|
||||
# Write ID mapping meta (tmp)
|
||||
# ---------------------------------------------------------
|
||||
meta_path = out_path.with_suffix(".meta.json")
|
||||
meta_tmp_path = Path(str(out_path) + ".meta.json")
|
||||
|
||||
with open(meta_path, "w", encoding="utf-8") as f:
|
||||
with open(meta_tmp_path, "w", encoding="utf-8") as f:
|
||||
json.dump(ids, f)
|
||||
|
||||
print(f"Indexed {len(ids)} chunks successfully.")
|
||||
sys.exit(0)
|
||||
sys.exit(0)
|
||||
Reference in New Issue
Block a user