optimize with new transformer rmodel
This commit is contained in:
@@ -77,13 +77,14 @@ with open(index_path, "r", encoding="utf-8") as f:
|
|||||||
if not text or not chunk_id:
|
if not text or not chunk_id:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
texts.append(text)
|
# -------------------------------------------------
|
||||||
|
# E5 requires "passage:" prefix for document chunks
|
||||||
|
# -------------------------------------------------
|
||||||
|
texts.append(f"passage: {text}")
|
||||||
ids.append(chunk_id)
|
ids.append(chunk_id)
|
||||||
|
|
||||||
if not texts:
|
if not texts:
|
||||||
print("No chunks found. Removing vector index.")
|
print("No chunks found. Removing vector index.")
|
||||||
|
|
||||||
# Entferne final erst später in PHP atomar
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
print(f"Loaded {len(texts)} chunks.")
|
print(f"Loaded {len(texts)} chunks.")
|
||||||
|
|||||||
@@ -85,7 +85,10 @@ with open(tags_path, "r", encoding="utf-8") as f:
|
|||||||
if len(text) > 4000:
|
if len(text) > 4000:
|
||||||
text = text[:4000]
|
text = text[:4000]
|
||||||
|
|
||||||
texts.append(text)
|
# -------------------------------------------------
|
||||||
|
# E5 requires "passage:" prefix for indexed texts
|
||||||
|
# -------------------------------------------------
|
||||||
|
texts.append(f"passage: {text}")
|
||||||
ids.append(str(tag_id))
|
ids.append(str(tag_id))
|
||||||
|
|
||||||
# If empty: remove outputs (tmp) and exit success
|
# If empty: remove outputs (tmp) and exit success
|
||||||
|
|||||||
@@ -50,22 +50,19 @@ def setup_logging() -> None:
|
|||||||
datefmt="%Y-%m-%dT%H:%M:%S%z",
|
datefmt="%Y-%m-%dT%H:%M:%S%z",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Rotating file
|
|
||||||
file_handler = RotatingFileHandler(
|
file_handler = RotatingFileHandler(
|
||||||
str(LOG_FILE),
|
str(LOG_FILE),
|
||||||
maxBytes=10 * 1024 * 1024, # 10MB
|
maxBytes=10 * 1024 * 1024,
|
||||||
backupCount=5,
|
backupCount=5,
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
file_handler.setFormatter(fmt)
|
file_handler.setFormatter(fmt)
|
||||||
file_handler.setLevel(logging.INFO)
|
file_handler.setLevel(logging.INFO)
|
||||||
|
|
||||||
# Console (stdout)
|
|
||||||
stream_handler = logging.StreamHandler()
|
stream_handler = logging.StreamHandler()
|
||||||
stream_handler.setFormatter(fmt)
|
stream_handler.setFormatter(fmt)
|
||||||
stream_handler.setLevel(logging.INFO)
|
stream_handler.setLevel(logging.INFO)
|
||||||
|
|
||||||
# avoid duplicate handlers if uvicorn reloads workers
|
|
||||||
if not any(isinstance(h, RotatingFileHandler) for h in logger.handlers):
|
if not any(isinstance(h, RotatingFileHandler) for h in logger.handlers):
|
||||||
logger.addHandler(file_handler)
|
logger.addHandler(file_handler)
|
||||||
if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
|
if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
|
||||||
@@ -144,10 +141,6 @@ def load_chunk_doc_map() -> None:
|
|||||||
|
|
||||||
|
|
||||||
def load_all() -> None:
|
def load_all() -> None:
|
||||||
"""
|
|
||||||
Reload everything deterministically (model + indices + maps),
|
|
||||||
guarded by reload_lock (thread-safe).
|
|
||||||
"""
|
|
||||||
global model, chunk_index, chunk_ids
|
global model, chunk_index, chunk_ids
|
||||||
global tag_index, tag_ids
|
global tag_index, tag_ids
|
||||||
global loaded_embedding_model_name
|
global loaded_embedding_model_name
|
||||||
@@ -165,13 +158,11 @@ def load_all() -> None:
|
|||||||
if not embedding_model_name:
|
if not embedding_model_name:
|
||||||
raise RuntimeError("embedding_model missing in index_meta.json")
|
raise RuntimeError("embedding_model missing in index_meta.json")
|
||||||
|
|
||||||
# Reload model if needed
|
|
||||||
if model is None or embedding_model_name != loaded_embedding_model_name:
|
if model is None or embedding_model_name != loaded_embedding_model_name:
|
||||||
logger.info("[Reload] Loading embedding model: %s", embedding_model_name)
|
logger.info("[Reload] Loading embedding model: %s", embedding_model_name)
|
||||||
model = SentenceTransformer(embedding_model_name)
|
model = SentenceTransformer(embedding_model_name)
|
||||||
loaded_embedding_model_name = embedding_model_name
|
loaded_embedding_model_name = embedding_model_name
|
||||||
|
|
||||||
# Reload chunk index
|
|
||||||
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
|
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
|
||||||
logger.info("[Reload] Loading chunk index")
|
logger.info("[Reload] Loading chunk index")
|
||||||
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
|
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
|
||||||
@@ -184,11 +175,9 @@ def load_all() -> None:
|
|||||||
chunk_index = None
|
chunk_index = None
|
||||||
chunk_ids = None
|
chunk_ids = None
|
||||||
|
|
||||||
# Load chunk → document map
|
|
||||||
logger.info("[Reload] Loading chunk-doc map")
|
logger.info("[Reload] Loading chunk-doc map")
|
||||||
load_chunk_doc_map()
|
load_chunk_doc_map()
|
||||||
|
|
||||||
# Reload tag index
|
|
||||||
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
|
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
|
||||||
logger.info("[Reload] Loading tag index")
|
logger.info("[Reload] Loading tag index")
|
||||||
tag_index = faiss.read_index(str(TAG_INDEX_PATH))
|
tag_index = faiss.read_index(str(TAG_INDEX_PATH))
|
||||||
@@ -201,7 +190,6 @@ def load_all() -> None:
|
|||||||
tag_index = None
|
tag_index = None
|
||||||
tag_ids = None
|
tag_ids = None
|
||||||
|
|
||||||
# Runtime stamp (commit marker for tags+chunks)
|
|
||||||
runtime = _safe_read_json(INDEX_RUNTIME_PATH)
|
runtime = _safe_read_json(INDEX_RUNTIME_PATH)
|
||||||
if isinstance(runtime, dict):
|
if isinstance(runtime, dict):
|
||||||
v = runtime.get("last_rebuild_at")
|
v = runtime.get("last_rebuild_at")
|
||||||
@@ -238,13 +226,11 @@ def observer_loop() -> None:
|
|||||||
v = runtime.get("last_rebuild_at")
|
v = runtime.get("last_rebuild_at")
|
||||||
new_runtime = v if isinstance(v, str) else None
|
new_runtime = v if isinstance(v, str) else None
|
||||||
|
|
||||||
# Structure change (embedding, dim, scoring_version, etc.) -> reload
|
|
||||||
if new_version != current_index_version:
|
if new_version != current_index_version:
|
||||||
logger.info("[Observer] index_version changed (%s -> %s) -> Reload", str(current_index_version), str(new_version))
|
logger.info("[Observer] index_version changed (%s -> %s) -> Reload", str(current_index_version), str(new_version))
|
||||||
load_all()
|
load_all()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Content change (chunks OR tags) -> reload
|
|
||||||
if new_runtime != current_runtime_stamp:
|
if new_runtime != current_runtime_stamp:
|
||||||
logger.info("[Observer] runtime changed (%s -> %s) -> Reload", str(current_runtime_stamp), str(new_runtime))
|
logger.info("[Observer] runtime changed (%s -> %s) -> Reload", str(current_runtime_stamp), str(new_runtime))
|
||||||
load_all()
|
load_all()
|
||||||
@@ -289,10 +275,6 @@ def health():
|
|||||||
|
|
||||||
@app.post("/reload")
|
@app.post("/reload")
|
||||||
def reload():
|
def reload():
|
||||||
"""
|
|
||||||
Manual reload endpoint (kept for compatibility with mto:agent:vector:control --reload).
|
|
||||||
Auto-reload still runs via observer_loop.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
load_all()
|
load_all()
|
||||||
return {"status": "reloaded"}
|
return {"status": "reloaded"}
|
||||||
@@ -305,7 +287,10 @@ def search_chunks(req: SearchRequest):
|
|||||||
if chunk_index is None or chunk_ids is None or model is None:
|
if chunk_index is None or chunk_ids is None or model is None:
|
||||||
raise HTTPException(status_code=503, detail="Chunk index not available")
|
raise HTTPException(status_code=503, detail="Chunk index not available")
|
||||||
|
|
||||||
query_vec = model.encode([req.query], normalize_embeddings=True)
|
query_vec = model.encode(
|
||||||
|
[f"query: {req.query}"],
|
||||||
|
normalize_embeddings=True
|
||||||
|
)
|
||||||
query_vec = np.array(query_vec).astype("float32")
|
query_vec = np.array(query_vec).astype("float32")
|
||||||
|
|
||||||
effective_limit = req.limit
|
effective_limit = req.limit
|
||||||
@@ -345,7 +330,10 @@ def search_tags(req: SearchRequest):
|
|||||||
if tag_index is None or tag_ids is None or model is None:
|
if tag_index is None or tag_ids is None or model is None:
|
||||||
raise HTTPException(status_code=503, detail="Tag index not available")
|
raise HTTPException(status_code=503, detail="Tag index not available")
|
||||||
|
|
||||||
query_vec = model.encode([req.query], normalize_embeddings=True)
|
query_vec = model.encode(
|
||||||
|
[f"query: {req.query}"],
|
||||||
|
normalize_embeddings=True
|
||||||
|
)
|
||||||
query_vec = np.array(query_vec).astype("float32")
|
query_vec = np.array(query_vec).astype("float32")
|
||||||
|
|
||||||
scores, indices = tag_index.search(query_vec, req.limit)
|
scores, indices = tag_index.search(query_vec, req.limit)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ use App\Vector\VectorSearchClient;
|
|||||||
|
|
||||||
final class NdjsonHybridRetriever implements RetrieverInterface
|
final class NdjsonHybridRetriever implements RetrieverInterface
|
||||||
{
|
{
|
||||||
private const VECTOR_SCORE_THRESHOLD = 0.22;
|
private const VECTOR_SCORE_THRESHOLD = 0.45;
|
||||||
private const VECTOR_TOPK_MULTIPLIER_WHEN_ROUTED = 3;
|
private const VECTOR_TOPK_MULTIPLIER_WHEN_ROUTED = 3;
|
||||||
|
|
||||||
private const HARD_MAX_CHUNKS = 200;
|
private const HARD_MAX_CHUNKS = 200;
|
||||||
|
|||||||
@@ -46,7 +46,7 @@
|
|||||||
<select name="chunk_size"
|
<select name="chunk_size"
|
||||||
class="form-select bg-dark text-light border-secondary"
|
class="form-select bg-dark text-light border-secondary"
|
||||||
required>
|
required>
|
||||||
{% for i in range(250, 2500, 50) %}
|
{% for i in range(50, 1000, 25) %}
|
||||||
<option value="{{ i }}">
|
<option value="{{ i }}">
|
||||||
{{ i }}
|
{{ i }}
|
||||||
</option>
|
</option>
|
||||||
@@ -68,7 +68,7 @@
|
|||||||
<select name="chunk_overlap"
|
<select name="chunk_overlap"
|
||||||
class="form-select bg-dark text-light border-secondary"
|
class="form-select bg-dark text-light border-secondary"
|
||||||
required>
|
required>
|
||||||
{% for i in range(50, 200, 25) %}
|
{% for i in range(25, 200, 25) %}
|
||||||
<option value="{{ i }}">
|
<option value="{{ i }}">
|
||||||
{{ i }}
|
{{ i }}
|
||||||
</option>
|
</option>
|
||||||
@@ -103,6 +103,9 @@
|
|||||||
<option value="all-MiniLM-L6-v2">
|
<option value="all-MiniLM-L6-v2">
|
||||||
all-MiniLM-L6-v2 (384 Dimensionen)
|
all-MiniLM-L6-v2 (384 Dimensionen)
|
||||||
</option>
|
</option>
|
||||||
|
<option value="intfloat/multilingual-e5-base">
|
||||||
|
intfloat/multilingual-e5-base (768 Dimensionen)
|
||||||
|
</option>
|
||||||
</select>
|
</select>
|
||||||
<div class="form-text text-secondary">
|
<div class="form-text text-secondary">
|
||||||
Das Embedding-Modell erzeugt numerische Vektoren aus Text.
|
Das Embedding-Modell erzeugt numerische Vektoren aus Text.
|
||||||
@@ -114,7 +117,7 @@
|
|||||||
<label class="form-label">Embedding Dimension</label>
|
<label class="form-label">Embedding Dimension</label>
|
||||||
<input type="number"
|
<input type="number"
|
||||||
name="embedding_dimension"
|
name="embedding_dimension"
|
||||||
value="384"
|
value="768"
|
||||||
class="form-control bg-dark text-light border-secondary"
|
class="form-control bg-dark text-light border-secondary"
|
||||||
readonly>
|
readonly>
|
||||||
<div class="form-text text-secondary">
|
<div class="form-text text-secondary">
|
||||||
|
|||||||
Reference in New Issue
Block a user