diff --git a/python/vector/vector_ingest.py b/python/vector/vector_ingest.py index 12973dd..2fefdc7 100644 --- a/python/vector/vector_ingest.py +++ b/python/vector/vector_ingest.py @@ -77,13 +77,14 @@ with open(index_path, "r", encoding="utf-8") as f: if not text or not chunk_id: continue - texts.append(text) + # ------------------------------------------------- + # E5 requires "passage:" prefix for document chunks + # ------------------------------------------------- + texts.append(f"passage: {text}") ids.append(chunk_id) if not texts: print("No chunks found. Removing vector index.") - - # Entferne final erst später in PHP atomar sys.exit(0) print(f"Loaded {len(texts)} chunks.") diff --git a/python/vector/vector_ingest_tags.py b/python/vector/vector_ingest_tags.py index 41a4f4c..2f9b75a 100644 --- a/python/vector/vector_ingest_tags.py +++ b/python/vector/vector_ingest_tags.py @@ -85,7 +85,10 @@ with open(tags_path, "r", encoding="utf-8") as f: if len(text) > 4000: text = text[:4000] - texts.append(text) + # ------------------------------------------------- + # E5 requires "passage:" prefix for indexed texts + # ------------------------------------------------- + texts.append(f"passage: {text}") ids.append(str(tag_id)) # If empty: remove outputs (tmp) and exit success diff --git a/python/vector/vector_service.py b/python/vector/vector_service.py index 006a81d..20f4d9f 100644 --- a/python/vector/vector_service.py +++ b/python/vector/vector_service.py @@ -50,22 +50,19 @@ def setup_logging() -> None: datefmt="%Y-%m-%dT%H:%M:%S%z", ) - # Rotating file file_handler = RotatingFileHandler( str(LOG_FILE), - maxBytes=10 * 1024 * 1024, # 10MB + maxBytes=10 * 1024 * 1024, backupCount=5, encoding="utf-8", ) file_handler.setFormatter(fmt) file_handler.setLevel(logging.INFO) - # Console (stdout) stream_handler = logging.StreamHandler() stream_handler.setFormatter(fmt) stream_handler.setLevel(logging.INFO) - # avoid duplicate handlers if uvicorn reloads workers if not any(isinstance(h, RotatingFileHandler) for h in logger.handlers): logger.addHandler(file_handler) if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers): @@ -144,10 +141,6 @@ def load_chunk_doc_map() -> None: def load_all() -> None: - """ - Reload everything deterministically (model + indices + maps), - guarded by reload_lock (thread-safe). - """ global model, chunk_index, chunk_ids global tag_index, tag_ids global loaded_embedding_model_name @@ -165,13 +158,11 @@ def load_all() -> None: if not embedding_model_name: raise RuntimeError("embedding_model missing in index_meta.json") - # Reload model if needed if model is None or embedding_model_name != loaded_embedding_model_name: logger.info("[Reload] Loading embedding model: %s", embedding_model_name) model = SentenceTransformer(embedding_model_name) loaded_embedding_model_name = embedding_model_name - # Reload chunk index if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists(): logger.info("[Reload] Loading chunk index") chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH)) @@ -184,11 +175,9 @@ def load_all() -> None: chunk_index = None chunk_ids = None - # Load chunk → document map logger.info("[Reload] Loading chunk-doc map") load_chunk_doc_map() - # Reload tag index if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists(): logger.info("[Reload] Loading tag index") tag_index = faiss.read_index(str(TAG_INDEX_PATH)) @@ -201,7 +190,6 @@ def load_all() -> None: tag_index = None tag_ids = None - # Runtime stamp (commit marker for tags+chunks) runtime = _safe_read_json(INDEX_RUNTIME_PATH) if isinstance(runtime, dict): v = runtime.get("last_rebuild_at") @@ -238,13 +226,11 @@ def observer_loop() -> None: v = runtime.get("last_rebuild_at") new_runtime = v if isinstance(v, str) else None - # Structure change (embedding, dim, scoring_version, etc.) -> reload if new_version != current_index_version: logger.info("[Observer] index_version changed (%s -> %s) -> Reload", str(current_index_version), str(new_version)) load_all() continue - # Content change (chunks OR tags) -> reload if new_runtime != current_runtime_stamp: logger.info("[Observer] runtime changed (%s -> %s) -> Reload", str(current_runtime_stamp), str(new_runtime)) load_all() @@ -289,10 +275,6 @@ def health(): @app.post("/reload") def reload(): - """ - Manual reload endpoint (kept for compatibility with mto:agent:vector:control --reload). - Auto-reload still runs via observer_loop. - """ try: load_all() return {"status": "reloaded"} @@ -305,7 +287,10 @@ def search_chunks(req: SearchRequest): if chunk_index is None or chunk_ids is None or model is None: raise HTTPException(status_code=503, detail="Chunk index not available") - query_vec = model.encode([req.query], normalize_embeddings=True) + query_vec = model.encode( + [f"query: {req.query}"], + normalize_embeddings=True + ) query_vec = np.array(query_vec).astype("float32") effective_limit = req.limit @@ -345,7 +330,10 @@ def search_tags(req: SearchRequest): if tag_index is None or tag_ids is None or model is None: raise HTTPException(status_code=503, detail="Tag index not available") - query_vec = model.encode([req.query], normalize_embeddings=True) + query_vec = model.encode( + [f"query: {req.query}"], + normalize_embeddings=True + ) query_vec = np.array(query_vec).astype("float32") scores, indices = tag_index.search(query_vec, req.limit) diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php index c7e83f1..970de00 100644 --- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php +++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php @@ -13,7 +13,7 @@ use App\Vector\VectorSearchClient; final class NdjsonHybridRetriever implements RetrieverInterface { - private const VECTOR_SCORE_THRESHOLD = 0.22; + private const VECTOR_SCORE_THRESHOLD = 0.45; private const VECTOR_TOPK_MULTIPLIER_WHEN_ROUTED = 3; private const HARD_MAX_CHUNKS = 200; diff --git a/templates/admin/ingest_profile/create.html.twig b/templates/admin/ingest_profile/create.html.twig index 9beb402..fed0034 100644 --- a/templates/admin/ingest_profile/create.html.twig +++ b/templates/admin/ingest_profile/create.html.twig @@ -46,7 +46,7 @@ - {% for i in range(50, 200, 25) %} + {% for i in range(25, 200, 25) %} @@ -103,6 +103,9 @@ +
Das Embedding-Modell erzeugt numerische Vektoren aus Text. @@ -114,7 +117,7 @@