first commit

This commit is contained in:
team 1
2026-04-20 16:36:28 +02:00
parent a0ec07a99c
commit 2587ac8b4b
41 changed files with 5126 additions and 2280 deletions

View File

@@ -1,42 +1,44 @@
#!/usr/bin/env python3
import sys
import json
import sys
from pathlib import Path
from typing import Any, Dict, List, Tuple
def fail(message: str, code: int) -> None:
print(f"ERROR: {message}", file=sys.stderr)
sys.exit(code)
# ---------------------------------------------------------
# Positional args
# 1 tags.ndjson
# 2 out_index_path (can be .tmp)
# ---------------------------------------------------------
if len(sys.argv) < 3:
print("ERROR: usage: vector_ingest_tags.py <tags.ndjson> <out.index>", file=sys.stderr)
sys.exit(2)
fail("usage: vector_ingest_tags.py <tags.ndjson> <out.index>", 2)
tags_path = Path(sys.argv[1]).resolve()
out_path = Path(sys.argv[2]).resolve()
meta_path = Path(str(out_path) + ".meta.json")
# ---------------------------------------------------------
# Dependency checks
# ---------------------------------------------------------
try:
import faiss
except Exception:
print("ERROR: Python module 'faiss' not found.", file=sys.stderr)
sys.exit(10)
fail("Python module 'faiss' not found.", 10)
try:
from sentence_transformers import SentenceTransformer
except Exception:
print("ERROR: Python module 'sentence-transformers' not found.", file=sys.stderr)
sys.exit(11)
fail("Python module 'sentence-transformers' not found.", 11)
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
# ---------------------------------------------------------
# Load embedding model from index_meta.json (Single Source of Truth)
@@ -45,64 +47,122 @@ BASE_PATH = Path(__file__).resolve().parents[2]
INDEX_META_PATH = BASE_PATH / "var" / "knowledge" / "index_meta.json"
if not INDEX_META_PATH.exists():
print("ERROR: index_meta.json not found", file=sys.stderr)
sys.exit(30)
fail("index_meta.json not found", 30)
try:
meta = json.loads(INDEX_META_PATH.read_text(encoding="utf-8"))
except Exception:
fail("index_meta.json is invalid", 30)
embedding_model = meta.get("embedding_model")
if not isinstance(embedding_model, str) or embedding_model.strip() == "":
fail("embedding_model missing in index_meta.json", 31)
if not embedding_model:
print("ERROR: embedding_model missing in index_meta.json", file=sys.stderr)
sys.exit(31)
model = SentenceTransformer(embedding_model.strip())
model = SentenceTransformer(embedding_model)
# ---------------------------------------------------------
# File checks
# ---------------------------------------------------------
if not tags_path.is_file():
print(f"ERROR: tags.ndjson not found at {tags_path}", file=sys.stderr)
sys.exit(20)
fail(f"tags.ndjson not found at {tags_path}", 20)
out_path.parent.mkdir(parents=True, exist_ok=True)
# ---------------------------------------------------------
# Helpers
# ---------------------------------------------------------
def cleanup_outputs() -> None:
if out_path.exists():
out_path.unlink()
if meta_path.exists():
meta_path.unlink()
def normalize_text(value: Any) -> str:
text = str(value).strip()
text = " ".join(text.split())
if len(text) > 4000:
text = text[:4000].rstrip()
return text
# ---------------------------------------------------------
# Streaming read NDJSON
# ---------------------------------------------------------
texts = []
ids = []
def load_rows(path: Path) -> Tuple[List[str], List[str], Dict[str, int]]:
texts: List[str] = []
ids: List[str] = []
seen_ids = set()
with open(tags_path, "r", encoding="utf-8") as f:
for line in f:
stats = {
"lines_total": 0,
"lines_empty": 0,
"lines_invalid_json": 0,
"rows_missing_fields": 0,
"rows_duplicate_tag_id": 0,
"rows_accepted": 0,
}
with path.open("r", encoding="utf-8") as handle:
for line in handle:
stats["lines_total"] += 1
line = line.strip()
if not line:
if line == "":
stats["lines_empty"] += 1
continue
try:
entry = json.loads(line)
except Exception:
stats["lines_invalid_json"] += 1
continue
text = entry.get("text")
tag_id = entry.get("tag_id")
if not text or not tag_id:
if not isinstance(entry, dict):
stats["rows_missing_fields"] += 1
continue
text = str(text)
if len(text) > 4000:
text = text[:4000]
tag_id = str(entry.get("tag_id", "")).strip()
text = normalize_text(entry.get("text", ""))
if tag_id == "" or text == "":
stats["rows_missing_fields"] += 1
continue
if tag_id in seen_ids:
stats["rows_duplicate_tag_id"] += 1
continue
seen_ids.add(tag_id)
ids.append(tag_id)
texts.append(f"passage: {text}")
ids.append(str(tag_id))
stats["rows_accepted"] += 1
return texts, ids, stats
texts, ids, stats = load_rows(tags_path)
print(
json.dumps(
{
"event": "tag_rows_loaded",
**stats,
},
ensure_ascii=False,
),
file=sys.stderr,
)
if not texts:
if out_path.exists():
out_path.unlink()
if meta_path.exists():
meta_path.unlink()
cleanup_outputs()
sys.exit(0)
# ---------------------------------------------------------
# Build embeddings
# ---------------------------------------------------------
@@ -110,18 +170,32 @@ embeddings = model.encode(
texts,
normalize_embeddings=True,
show_progress_bar=True,
batch_size=128
batch_size=128,
)
embeddings = np.array(embeddings).astype("float32")
dim = embeddings.shape[1]
embeddings = np.array(embeddings, dtype="float32")
if embeddings.ndim != 2 or embeddings.shape[0] != len(ids) or embeddings.shape[0] == 0:
cleanup_outputs()
fail("tag embeddings have invalid shape", 40)
if embeddings.shape[1] <= 0:
cleanup_outputs()
fail("tag embeddings have invalid dimension", 41)
dim = int(embeddings.shape[1])
index = faiss.IndexFlatIP(dim)
index.add(embeddings)
faiss.write_index(index, str(out_path))
if int(index.ntotal) != len(ids):
cleanup_outputs()
fail("FAISS tag index count does not match meta ids", 42)
with open(meta_path, "w", encoding="utf-8") as f:
json.dump(ids, f)
faiss.write_index(index, str(out_path))
meta_path.write_text(
json.dumps(ids, ensure_ascii=False),
encoding="utf-8",
)
sys.exit(0)

View File

@@ -6,10 +6,10 @@ from logging.handlers import RotatingFileHandler
import threading
import time
from pathlib import Path
from typing import Any, List, Optional, Dict
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import faiss
import numpy as np
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
from pydantic import BaseModel
@@ -20,7 +20,7 @@ from sentence_transformers import SentenceTransformer
# Service Stamp (to verify you are running THIS file)
# ============================================================
SERVICE_STAMP = "vector_service.py@2026-02-28T10:20+01:00"
SERVICE_STAMP = "vector_service.py@2026-04-20T00:00+02:00"
# ============================================================
@@ -41,8 +41,6 @@ TAG_MAP_PATH = KNOWLEDGE_DIR / "vector_tags.index.meta.json"
INDEX_META_PATH = KNOWLEDGE_DIR / "index_meta.json"
INDEX_RUNTIME_PATH = KNOWLEDGE_DIR / "index_runtime.json"
INDEX_NDJSON_PATH = KNOWLEDGE_DIR / "index.ndjson"
# NEW: Tags NDJSON (exported by PHP) used to enrich /search-tags responses
TAGS_NDJSON_PATH = KNOWLEDGE_DIR / "tags.ndjson"
@@ -54,6 +52,48 @@ logger = logging.getLogger("vector_service")
logger.setLevel(logging.INFO)
# ============================================================
# App State
# ============================================================
app = FastAPI()
model: Optional[SentenceTransformer] = None
chunk_index = None
chunk_ids: Optional[List[Any]] = None
chunk_doc_map: Dict[str, str] = {}
chunk_pos_map: Dict[str, int] = {}
tag_index = None
tag_ids: Optional[List[Any]] = None
# tag_id -> {"label": "...", "tag_type": "..."}
tag_meta_map: Dict[str, Dict[str, str]] = {}
loaded_embedding_model_name: Optional[str] = None
current_index_version: Optional[int] = None
current_chunk_runtime_stamp: Optional[str] = None
current_tags_runtime_stamp: Optional[str] = None
current_tags_index_present: Optional[bool] = None
reload_lock = threading.Lock()
# ============================================================
# Models
# ============================================================
class SearchRequest(BaseModel):
query: str
limit: int = 8
doc_ids: Optional[List[str]] = None
# ============================================================
# Helpers
# ============================================================
def setup_logging() -> None:
LOG_DIR.mkdir(parents=True, exist_ok=True)
@@ -77,10 +117,9 @@ def setup_logging() -> None:
if not any(isinstance(h, RotatingFileHandler) for h in logger.handlers):
logger.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
if not any(type(h) is logging.StreamHandler for h in logger.handlers):
logger.addHandler(stream_handler)
# Capture uvicorn logs in the same file as well (critical for hidden 500s)
uvicorn_error = logging.getLogger("uvicorn.error")
uvicorn_access = logging.getLogger("uvicorn.access")
@@ -89,62 +128,22 @@ def setup_logging() -> None:
if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_error.handlers):
uvicorn_error.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in uvicorn_error.handlers):
if not any(type(h) is logging.StreamHandler for h in uvicorn_error.handlers):
uvicorn_error.addHandler(stream_handler)
if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_access.handlers):
uvicorn_access.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in uvicorn_access.handlers):
if not any(type(h) is logging.StreamHandler for h in uvicorn_access.handlers):
uvicorn_access.addHandler(stream_handler)
# ============================================================
# FastAPI
# ============================================================
app = FastAPI()
model: Optional[SentenceTransformer] = None
chunk_index = None
chunk_ids: Optional[List[Any]] = None
chunk_doc_map: Dict[str, str] = {}
chunk_pos_map: Dict[str, int] = {}
tag_index = None
tag_ids: Optional[List[Any]] = None
# NEW: tag_id -> {"label": "...", "tag_type": "..."}
tag_meta_map: Dict[str, Dict[str, str]] = {}
loaded_embedding_model_name: Optional[str] = None
current_index_version: Optional[int] = None
current_runtime_stamp: Optional[str] = None
reload_lock = threading.Lock()
# ============================================================
# Models
# ============================================================
class SearchRequest(BaseModel):
query: str
limit: int = 8
doc_ids: Optional[List[str]] = None
# ============================================================
# Loader Helpers
# ============================================================
def _safe_read_json(path: Path) -> Optional[Any]:
try:
if not path.exists():
return None
return json.loads(path.read_text(encoding="utf-8"))
except Exception as e:
logger.warning("Failed to read json %s: %s", str(path), str(e))
except Exception as exc:
logger.warning("Failed to read json %s: %s", str(path), str(exc))
return None
@@ -152,25 +151,97 @@ def _as_key(value: Any) -> Optional[str]:
if value is None:
return None
if isinstance(value, str):
v = value.strip()
return v if v else None
value = value.strip()
return value or None
try:
v = str(value).strip()
return v if v else None
value = str(value).strip()
return value or None
except Exception:
return None
def _sanitize_limit(limit: int, default: int = 8, max_limit: int = 200) -> int:
try:
v = int(limit)
value = int(limit)
except Exception:
return default
if v <= 0:
if value <= 0:
return default
if v > max_limit:
if value > max_limit:
return max_limit
return v
return value
def _normalize_meta_list(value: Any) -> Optional[List[Any]]:
"""
Accepts:
- list: ok
- dict like {"0": "...", "1": "..."}: convert to list sorted by numeric key
Returns None if invalid.
"""
if isinstance(value, list):
return value
if isinstance(value, dict):
try:
keys = sorted(int(key) for key in value.keys())
return [value[str(i)] for i in keys]
except Exception:
return None
return None
def _normalize_tag_type(value: Any) -> str:
normalized = _as_key(value)
if normalized is None:
return "generic"
normalized = normalized.lower()
if normalized in {"generic", "catalog_entity", "sales_signal"}:
return normalized
return "generic"
def _extract_runtime_state(runtime: Any) -> Tuple[Optional[str], Optional[str], Optional[bool]]:
if not isinstance(runtime, dict):
return None, None, None
chunk_runtime = runtime.get("last_rebuild_at")
tags_runtime = runtime.get("last_tags_rebuild_at")
tags_index_present = runtime.get("tags_index_present")
if not isinstance(chunk_runtime, str):
chunk_runtime = None
if not isinstance(tags_runtime, str):
tags_runtime = None
if not isinstance(tags_index_present, bool):
tags_index_present = None
return chunk_runtime, tags_runtime, tags_index_present
def _validate_index_alignment(index_obj: Any, ids: Optional[List[Any]], label: str) -> Tuple[Any, Optional[List[Any]]]:
if index_obj is None or ids is None:
return None, None
try:
index_count = int(index_obj.ntotal)
except Exception:
logger.warning("[Reload] %s index has no ntotal -> disabled", label)
return None, None
if index_count != len(ids):
logger.warning(
"[Reload] %s meta/index mismatch (ids=%s index=%s) -> disabled",
label,
len(ids),
index_count,
)
return None, None
return index_obj, ids
def load_chunk_maps_from_ndjson() -> None:
@@ -183,8 +254,8 @@ def load_chunk_maps_from_ndjson() -> None:
return
try:
with INDEX_NDJSON_PATH.open("r", encoding="utf-8") as f:
for line in f:
with INDEX_NDJSON_PATH.open("r", encoding="utf-8") as handle:
for line in handle:
line = line.strip()
if not line:
continue
@@ -201,29 +272,32 @@ def load_chunk_maps_from_ndjson() -> None:
if doc_id_key:
chunk_doc_map[chunk_id_key] = doc_id_key
ci = row.get("chunk_index")
if isinstance(ci, int):
chunk_pos_map[chunk_id_key] = ci
elif isinstance(ci, str):
s = ci.strip()
if s.isdigit():
chunk_index_value = row.get("chunk_index")
if isinstance(chunk_index_value, int):
chunk_pos_map[chunk_id_key] = chunk_index_value
elif isinstance(chunk_index_value, str):
stripped = chunk_index_value.strip()
if stripped.isdigit():
try:
chunk_pos_map[chunk_id_key] = int(s)
chunk_pos_map[chunk_id_key] = int(stripped)
except Exception:
pass
except Exception as e:
logger.warning("Failed to load chunk maps from ndjson: %s", str(e))
except Exception as exc:
logger.warning("Failed to load chunk maps from ndjson: %s", str(exc))
def load_tag_meta_from_tags_ndjson() -> None:
"""
Loads minimal tag metadata from tags.ndjson to enrich /search-tags results.
Expected line format (from PHP exporter / ingester pipeline):
{"tag_id":"...","text":"LABEL\\nSLUG\\noptional description", ...}
We extract:
label = first line of "text" (fallback: "")
tag_type = "type" if present (preferred), else "generic"
Expected line format:
{
"tag_id": "...",
"text": "LABEL\\nSLUG\\noptional description",
"type": "catalog_entity|generic|sales_signal",
"document_ids": ["..."]
}
Only tags with at least one exported document id are kept.
"""
global tag_meta_map
@@ -234,11 +308,12 @@ def load_tag_meta_from_tags_ndjson() -> None:
return
try:
with TAGS_NDJSON_PATH.open("r", encoding="utf-8") as f:
for line in f:
with TAGS_NDJSON_PATH.open("r", encoding="utf-8") as handle:
for line in handle:
line = line.strip()
if not line:
continue
try:
row = json.loads(line)
except Exception:
@@ -248,55 +323,33 @@ def load_tag_meta_from_tags_ndjson() -> None:
if not tag_id:
continue
# Prefer explicit fields if present
ttype = row.get("type")
if isinstance(ttype, str) and ttype.strip():
tag_type = ttype.strip()
else:
tag_type = "generic"
document_ids = row.get("document_ids")
if isinstance(document_ids, list) and len(document_ids) == 0:
continue
tag_type = _normalize_tag_type(row.get("type"))
label = ""
txt = row.get("text")
if isinstance(txt, str) and txt.strip():
first = txt.splitlines()[0].strip() if txt.splitlines() else ""
label = first
if label:
tag_meta_map[tag_id] = {"label": label, "tag_type": tag_type}
else:
tag_meta_map[tag_id] = {"label": "", "tag_type": tag_type}
text_value = row.get("text")
if isinstance(text_value, str) and text_value.strip():
first_line = text_value.splitlines()[0].strip() if text_value.splitlines() else ""
label = first_line
except Exception as e:
logger.warning("Failed to load tag meta from tags.ndjson: %s", str(e))
tag_meta_map[tag_id] = {
"label": label,
"tag_type": tag_type,
}
except Exception as exc:
logger.warning("Failed to load tag meta from tags.ndjson: %s", str(exc))
tag_meta_map = {}
def _normalize_meta_list(value: Any) -> Optional[List[Any]]:
"""
Accepts:
- list: ok
- dict like {"0": "...", "1": "..."}: convert to list sorted by numeric key
Returns None if invalid.
"""
if isinstance(value, list):
return value
if isinstance(value, dict):
try:
keys = sorted(int(k) for k in value.keys())
return [value[str(i)] for i in keys]
except Exception:
return None
return None
def load_all() -> None:
global model, chunk_index, chunk_ids
global tag_index, tag_ids
global loaded_embedding_model_name
global current_index_version
global current_runtime_stamp
global current_chunk_runtime_stamp, current_tags_runtime_stamp, current_tags_index_present
with reload_lock:
meta = _safe_read_json(INDEX_META_PATH)
@@ -314,15 +367,21 @@ def load_all() -> None:
model = SentenceTransformer(embedding_model_name)
loaded_embedding_model_name = embedding_model_name
runtime = _safe_read_json(INDEX_RUNTIME_PATH)
chunk_runtime_stamp, tags_runtime_stamp, tags_index_present = _extract_runtime_state(runtime)
# Chunks
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
logger.info("[Reload] Loading chunk index")
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
raw = _safe_read_json(CHUNK_MAP_PATH)
chunk_ids = _normalize_meta_list(raw)
if chunk_ids is None:
loaded_chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
raw_chunk_meta = _safe_read_json(CHUNK_MAP_PATH)
loaded_chunk_ids = _normalize_meta_list(raw_chunk_meta)
if loaded_chunk_ids is None:
chunk_index = None
chunk_ids = None
logger.warning("[Reload] chunk_ids meta invalid -> chunk index disabled")
else:
chunk_index, chunk_ids = _validate_index_alignment(loaded_chunk_index, loaded_chunk_ids, "chunk")
else:
chunk_index = None
chunk_ids = None
@@ -331,35 +390,38 @@ def load_all() -> None:
load_chunk_maps_from_ndjson()
# Tags
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
should_load_tag_index = tags_index_present is not False
if should_load_tag_index and TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
logger.info("[Reload] Loading tag index")
tag_index = faiss.read_index(str(TAG_INDEX_PATH))
raw = _safe_read_json(TAG_MAP_PATH)
tag_ids = _normalize_meta_list(raw)
if tag_ids is None:
loaded_tag_index = faiss.read_index(str(TAG_INDEX_PATH))
raw_tag_meta = _safe_read_json(TAG_MAP_PATH)
loaded_tag_ids = _normalize_meta_list(raw_tag_meta)
if loaded_tag_ids is None:
tag_index = None
tag_ids = None
logger.warning("[Reload] tag_ids meta invalid -> tag index disabled")
else:
tag_index, tag_ids = _validate_index_alignment(loaded_tag_index, loaded_tag_ids, "tag")
else:
tag_index = None
tag_ids = None
if tags_index_present is False:
logger.info("[Reload] Runtime marks tags index as absent -> tag index disabled")
# NEW: load tag meta for enrichment
logger.info("[Reload] Loading tag meta from tags.ndjson")
load_tag_meta_from_tags_ndjson()
runtime = _safe_read_json(INDEX_RUNTIME_PATH)
if isinstance(runtime, dict):
v = runtime.get("last_rebuild_at")
current_runtime_stamp = v if isinstance(v, str) else None
else:
current_runtime_stamp = None
current_index_version = index_version if isinstance(index_version, int) else None
current_chunk_runtime_stamp = chunk_runtime_stamp
current_tags_runtime_stamp = tags_runtime_stamp
current_tags_index_present = tags_index_present
logger.info(
"[Reload] Completed (index_version=%s runtime=%s embedding_model=%s tag_meta=%s stamp=%s file=%s)",
"[Reload] Completed (index_version=%s chunk_runtime=%s tags_runtime=%s tags_index_present=%s embedding_model=%s tag_meta=%s stamp=%s file=%s)",
str(current_index_version),
str(current_runtime_stamp),
str(current_chunk_runtime_stamp),
str(current_tags_runtime_stamp),
str(current_tags_index_present),
str(loaded_embedding_model_name),
str(len(tag_meta_map)),
SERVICE_STAMP,
@@ -373,7 +435,7 @@ def load_all() -> None:
def observer_loop() -> None:
global current_index_version
global current_runtime_stamp
global current_chunk_runtime_stamp, current_tags_runtime_stamp, current_tags_index_present
while True:
time.sleep(2)
@@ -384,28 +446,50 @@ def observer_loop() -> None:
continue
new_version = meta.get("index_version") if isinstance(meta.get("index_version"), int) else None
runtime = _safe_read_json(INDEX_RUNTIME_PATH)
new_runtime = None
if isinstance(runtime, dict):
v = runtime.get("last_rebuild_at")
new_runtime = v if isinstance(v, str) else None
new_chunk_runtime, new_tags_runtime, new_tags_index_present = _extract_runtime_state(runtime)
if new_version != current_index_version:
logger.info("[Observer] index_version changed (%s -> %s) -> Reload", str(current_index_version), str(new_version))
logger.info(
"[Observer] index_version changed (%s -> %s) -> Reload",
str(current_index_version),
str(new_version),
)
load_all()
continue
if new_runtime != current_runtime_stamp:
logger.info("[Observer] runtime changed (%s -> %s) -> Reload", str(current_runtime_stamp), str(new_runtime))
if new_chunk_runtime != current_chunk_runtime_stamp:
logger.info(
"[Observer] chunk runtime changed (%s -> %s) -> Reload",
str(current_chunk_runtime_stamp),
str(new_chunk_runtime),
)
load_all()
continue
if new_tags_runtime != current_tags_runtime_stamp:
logger.info(
"[Observer] tags runtime changed (%s -> %s) -> Reload",
str(current_tags_runtime_stamp),
str(new_tags_runtime),
)
load_all()
continue
if new_tags_index_present != current_tags_index_present:
logger.info(
"[Observer] tags_index_present changed (%s -> %s) -> Reload",
str(current_tags_index_present),
str(new_tags_index_present),
)
load_all()
except Exception as e:
logger.exception("[Observer ERROR] %s", str(e))
except Exception as exc:
logger.exception("[Observer ERROR] %s", str(exc))
# ============================================================
# Global Exception Handler (forces JSON + logs)
# Global Exception Handler
# ============================================================
@app.exception_handler(Exception)
@@ -427,12 +511,12 @@ async def unhandled_exception_handler(request: Request, exc: Exception):
# ============================================================
@app.on_event("startup")
def startup_event():
def startup_event() -> None:
setup_logging()
logger.info("[VectorService] Startup stamp=%s file=%s", SERVICE_STAMP, str(Path(__file__).resolve()))
load_all()
t = threading.Thread(target=observer_loop, daemon=True)
t.start()
observer = threading.Thread(target=observer_loop, daemon=True)
observer.start()
logger.info("[VectorService] Ready (log=%s)", str(LOG_FILE))
@@ -441,7 +525,7 @@ def startup_event():
# ============================================================
@app.get("/health")
def health():
def health() -> Dict[str, Any]:
return {
"status": "ok",
"stamp": SERVICE_STAMP,
@@ -451,7 +535,9 @@ def health():
"model_loaded": model is not None,
"embedding_model": loaded_embedding_model_name,
"index_version": current_index_version,
"runtime_stamp": current_runtime_stamp,
"chunk_runtime_stamp": current_chunk_runtime_stamp,
"tags_runtime_stamp": current_tags_runtime_stamp,
"tags_index_present": current_tags_index_present,
"tag_meta_type": type(tag_ids).__name__ if tag_ids is not None else None,
"tag_meta_len": len(tag_ids) if isinstance(tag_ids, list) else None,
"chunk_meta_type": type(chunk_ids).__name__ if chunk_ids is not None else None,
@@ -463,17 +549,17 @@ def health():
@app.post("/reload")
def reload():
def reload() -> Dict[str, str]:
try:
load_all()
return {"status": "reloaded", "stamp": SERVICE_STAMP}
except Exception as e:
except Exception as exc:
logger.exception("reload failed")
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail=str(exc))
@app.post("/search-chunks")
def search_chunks(req: SearchRequest):
def search_chunks(req: SearchRequest) -> List[Dict[str, Any]]:
if chunk_index is None or chunk_ids is None or model is None:
raise HTTPException(status_code=503, detail="Chunk index not available")
@@ -491,16 +577,16 @@ def search_chunks(req: SearchRequest):
doc_filter: Optional[List[str]] = None
if req.doc_ids:
doc_filter = []
for d in req.doc_ids:
dk = _as_key(d)
if dk:
doc_filter.append(dk)
for document_id in req.doc_ids:
document_key = _as_key(document_id)
if document_key:
doc_filter.append(document_key)
effective_limit = max(limit * 5, 50)
effective_limit = min(effective_limit, 500)
scores, indices = chunk_index.search(query_vec, effective_limit)
results = []
results: List[Dict[str, Any]] = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
@@ -512,20 +598,20 @@ def search_chunks(req: SearchRequest):
if not chunk_id_key:
continue
doc_id = chunk_doc_map.get(chunk_id_key)
document_id = chunk_doc_map.get(chunk_id_key)
if doc_filter is not None:
if doc_id is None or doc_id not in doc_filter:
if document_id is None or document_id not in doc_filter:
continue
payload = {
payload: Dict[str, Any] = {
"chunk_id": raw_chunk_id,
"score": float(score),
"document_id": doc_id,
"document_id": document_id,
}
ci = chunk_pos_map.get(chunk_id_key)
if isinstance(ci, int):
payload["chunk_index"] = ci
chunk_position = chunk_pos_map.get(chunk_id_key)
if isinstance(chunk_position, int):
payload["chunk_index"] = chunk_position
results.append(payload)
@@ -536,13 +622,13 @@ def search_chunks(req: SearchRequest):
except HTTPException:
raise
except Exception as e:
except Exception as exc:
logger.exception("search-chunks failure")
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail=str(exc))
@app.post("/search-tags")
def search_tags(req: SearchRequest):
def search_tags(req: SearchRequest) -> List[Dict[str, Any]]:
if tag_index is None or tag_ids is None or model is None:
raise HTTPException(status_code=503, detail="Tag index not available")
@@ -564,37 +650,47 @@ def search_tags(req: SearchRequest):
scores, indices = tag_index.search(query_vec, limit)
results = []
results: List[Dict[str, Any]] = []
seen_tag_ids = set()
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
if idx < 0 or idx >= len(tag_ids):
continue
tag_id = tag_ids[idx]
tag_id_key = _as_key(tag_id) or ""
raw_tag_id = tag_ids[idx]
tag_id_key = _as_key(raw_tag_id)
if not tag_id_key or tag_id_key in seen_tag_ids:
continue
payload: Dict[str, Any] = {
"tag_id": tag_id,
"tag_id": raw_tag_id,
"score": float(score),
}
meta = tag_meta_map.get(tag_id_key)
if isinstance(meta, dict):
label = meta.get("label")
ttype = meta.get("tag_type")
tag_type = meta.get("tag_type")
if isinstance(label, str) and label.strip():
payload["label"] = label
if isinstance(ttype, str) and ttype.strip():
payload["tag_type"] = ttype
if isinstance(label, str):
payload["label"] = label.strip()
payload["tag_type"] = _normalize_tag_type(tag_type)
else:
payload["label"] = ""
payload["tag_type"] = "generic"
results.append(payload)
seen_tag_ids.add(tag_id_key)
if len(results) >= limit:
break
return results
except HTTPException:
raise
except Exception as e:
except Exception as exc:
logger.exception("search-tags failure")
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail=str(exc))

View File

@@ -4,77 +4,84 @@ declare(strict_types=1);
namespace App\Catalog;
use App\Config\CatalogIntentConfig;
use App\Entity\Document;
use App\Tag\TagTypes;
use App\Tag\TagVectorSearchClient;
use Doctrine\DBAL\Connection;
use Symfony\Component\Uid\Uuid;
/**
* EntityCatalogService
* Builds deterministic catalog lists from a validated catalog entity term.
*
* Deterministische Katalog-Listen auf Basis eines Entity-Terms:
* - TagVectorSearch (Score-Gate + Ambiguity-Check)
* - DB Query auf document_tag + document (ACTIVE)
* - Rückgabe als EIN Textblock (string) oder null (Fallback auf normalen Retrieval)
*
* Schritt-3 Änderung:
* - Headline ist NICHT mehr hardcoded
* - Headline basiert dynamisch auf dem gefundenen Tag
* This service is intentionally conservative:
* - only strong catalog_entity matches may open the catalog path
* - ambiguous matches fall back to normal retrieval
* - only ACTIVE documents are listed
*/
final class EntityCatalogService
{
private const MIN_SCORE = 0.55;
private const AMBIGUITY_DELTA = 0.05;
private const SEARCH_LIMIT = 3;
public function __construct(
private readonly TagVectorSearchClient $tagVectorClient,
private readonly Connection $connection,
) {}
) {
}
/**
* @return string|null Textblock oder null (wenn kein sicherer Catalog möglich ist)
* Returns a catalog text block or null when no safe catalog path exists.
*/
public function listByTerm(string $entityTerm): ?string
{
$entityTerm = trim($entityTerm);
if ($entityTerm === '') {
return null;
}
// 1) Tag-Vektorsuche (Top 3 für Ambiguity-Prüfung)
$hits = $this->tagVectorClient->search($entityTerm, 3);
$hits = $this->tagVectorClient->search($entityTerm, self::SEARCH_LIMIT);
if ($hits === []) {
return null;
}
$best = $hits[0];
$bestScore = (float) ($best['score'] ?? 0.0);
$bestScore = isset($best['score']) ? (float)$best['score'] : 0.0;
if ($bestScore < self::MIN_SCORE) {
if ($bestScore < CatalogIntentConfig::MIN_SCORE) {
return null;
}
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
return null;
}
// 2) Ambiguity: wenn Top2 zu nah ist → konservativ abbrechen
if (isset($hits[1])) {
$secondScore = isset($hits[1]['score']) ? (float)$hits[1]['score'] : 0.0;
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
$secondScore = (float) ($hits[1]['score'] ?? 0.0);
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) {
return null;
}
}
$tagHex = (string)($best['tag_id'] ?? '');
if ($tagHex === '') {
$tagId = trim((string) ($best['tag_id'] ?? ''));
if ($tagId === '') {
return null;
}
// OPTIONAL: Falls TagVectorSearchClient künftig tag_label zurückliefert,
// kann das hier direkt verwendet werden.
$tagLabel = isset($best['tag_label']) ? (string)$best['tag_label'] : null;
try {
$tagBinaryId = Uuid::fromString($tagId)->toBinary();
} catch (\Throwable) {
return null;
}
$tagLabel = trim((string) ($best['label'] ?? ''));
// 3) DB Query: alle ACTIVE Dokumente zu diesem Tag
$rows = $this->connection->fetchAllAssociative(
'
SELECT d.title
SELECT DISTINCT d.title
FROM document d
INNER JOIN document_tag dt ON dt.document_id = d.id
WHERE dt.tag_id = :tagId
@@ -82,8 +89,8 @@ final class EntityCatalogService
ORDER BY d.title ASC
',
[
'tagId' => Uuid::fromString($tagHex)->toBinary(),
'status' => 'ACTIVE',
'tagId' => $tagBinaryId,
'status' => Document::STATUS_ACTIVE,
]
);
@@ -92,37 +99,42 @@ final class EntityCatalogService
}
$titles = [];
foreach ($rows as $row) {
$t = trim((string)($row['title'] ?? ''));
if ($t !== '') {
$titles[] = $t;
$title = trim((string) ($row['title'] ?? ''));
if ($title === '') {
continue;
}
$titles[$title] = $title;
}
if ($titles === []) {
return null;
}
return $this->buildTextBlock($tagLabel, $titles);
return $this->buildTextBlock(
$tagLabel !== '' ? $tagLabel : null,
array_values($titles)
);
}
/**
* Dynamische Headline:
* - Wenn Tag-Label vorhanden → verwenden
* - Sonst generischer Fallback
* Builds a stable human-readable list block for the prompt.
*
* @param list<string> $titles
*/
private function buildTextBlock(?string $tagLabel, array $titles): string
{
$headline = 'Folgende Einträge sind verfügbar:';
if (\is_string($tagLabel) && \trim($tagLabel) !== '') {
$headline = sprintf(
'Folgende %s sind verfügbar:',
$tagLabel
);
if ($tagLabel !== null && trim($tagLabel) !== '') {
$headline = sprintf('Folgende %s sind verfügbar:', trim($tagLabel));
}
$lines = [];
foreach ($titles as $title) {
$lines[] = '- ' . $title;
}

View File

@@ -1,6 +1,5 @@
<?php
declare(strict_types=1);
namespace App\Command;
@@ -36,8 +35,7 @@ final class SystemRebuildCommand extends Command
private readonly VectorIndexHealthService $health,
private readonly TagVectorIndexHealthService $tagHealth,
private readonly string $projectDir,
)
{
) {
parent::__construct();
}
@@ -58,6 +56,7 @@ final class SystemRebuildCommand extends Command
if (!$input->getOption('hard')) {
$io->error('Safety switch missing: you must pass --hard to run this command.');
$io->writeln('Example: bin/console mto:agent:system:rebuild --hard');
return Command::FAILURE;
}
@@ -65,9 +64,29 @@ final class SystemRebuildCommand extends Command
$io->title('mto:agent:system:rebuild --hard');
// ---------------------------------------------------------
// 1) GLOBAL REINDEX (chunks rewrite + vector rebuild)
// ---------------------------------------------------------
if (!$this->runGlobalReindex($io, $dryRun)) {
return Command::FAILURE;
}
if (!$this->runTagRebuild($io, $input, $dryRun)) {
return Command::FAILURE;
}
if (!$this->runVectorServiceReload($io, $input, $dryRun)) {
return Command::FAILURE;
}
if (!$this->runHealthChecks($io, $input)) {
return Command::FAILURE;
}
$io->success('System rebuild finished.');
return Command::SUCCESS;
}
private function runGlobalReindex(SymfonyStyle $io, bool $dryRun): bool
{
$io->section('1/4 Global reindex (chunks + vector index)');
$job = $this->jobService->startJob(
@@ -82,55 +101,70 @@ final class SystemRebuildCommand extends Command
try {
$this->orchestrator->runExistingJob($job, $dryRun);
$io->success('Global reindex completed.');
return true;
} catch (\Throwable $e) {
$io->error('Global reindex failed: ' . $e->getMessage());
return Command::FAILURE;
return false;
}
}
private function runTagRebuild(SymfonyStyle $io, InputInterface $input, bool $dryRun): bool
{
if ((bool) $input->getOption('no-tags')) {
$io->section('2/4 Tag rebuild');
$io->note('Skipped due to --no-tags.');
return true;
}
// ---------------------------------------------------------
// 2) TAG REBUILD (tags.ndjson + vector_tags.index)
// ---------------------------------------------------------
if (!$input->getOption('no-tags')) {
$io->section('2/4 Tag rebuild (tags.ndjson + vector_tags.index)');
if ($dryRun) {
$io->note('dry-run enabled: tag rebuild skipped (would export + build tag index).');
} else {
return true;
}
try {
$export = $this->tagExporter->export();
$io->writeln('<info>Exported tags.ndjson</info>');
$io->writeln('Path: ' . $export['path']);
$io->writeln('Tags: ' . $export['tags']);
$io->writeln('Lines: ' . $export['lines']);
$io->writeln('Bytes: ' . $export['bytes']);
$io->writeln('Path: ' . (string) $export['path']);
$io->writeln('Tags: ' . (string) $export['tags']);
$io->writeln('Lines: ' . (string) $export['lines']);
$io->writeln('Bytes: ' . (string) $export['bytes']);
$this->tagIndexBuilder->build();
$io->writeln('<info>Built vector_tags.index</info>');
$this->metaManager->touchRuntime([
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
]);
$io->success('Tag rebuild completed.');
return true;
} catch (\Throwable $e) {
$io->error('Tag rebuild failed: ' . $e->getMessage());
return Command::FAILURE;
return false;
}
}
} else {
$io->section('2/4 Tag rebuild');
$io->note('Skipped due to --no-tags.');
}
// ---------------------------------------------------------
// 3) VECTOR SERVICE (install deps + start + reload)
// ---------------------------------------------------------
if (!$input->getOption('no-reload')) {
private function runVectorServiceReload(SymfonyStyle $io, InputInterface $input, bool $dryRun): bool
{
if ((bool) $input->getOption('no-reload')) {
$io->section('3/4 Vector service reload');
$io->note('Skipped due to --no-reload.');
return true;
}
$io->section('3/4 Vector service reload (uvicorn)');
if ($dryRun) {
$io->note('dry-run enabled: service reload skipped.');
} else {
return true;
}
$cmd = [
'.venv/bin/python',
'python/vector/vector_control.py',
@@ -138,85 +172,110 @@ final class SystemRebuildCommand extends Command
'--start',
'--reload',
'--port', '8090',
'--host', '0.0.0.0'
'--host', '0.0.0.0',
];
$process = new Process($cmd, $this->projectDir);
$process->setTimeout(600);
$process->run();
$out = trim($process->getOutput());
$err = trim($process->getErrorOutput());
$stdout = trim($process->getOutput());
$stderr = trim($process->getErrorOutput());
if ($out !== '') {
$io->writeln($out);
if ($stdout !== '') {
$io->writeln($stdout);
}
if ($err !== '') {
$io->writeln('<comment>' . $err . '</comment>');
if ($stderr !== '') {
$io->writeln('<comment>' . $stderr . '</comment>');
}
if (!$process->isSuccessful()) {
$io->error('Vector service reload failed (non-zero exit code).');
return Command::FAILURE;
return false;
}
$io->success('Vector service reloaded.');
}
} else {
$io->section('3/4 Vector service reload');
$io->note('Skipped due to --no-reload.');
return true;
}
private function runHealthChecks(SymfonyStyle $io, InputInterface $input): bool
{
if ((bool) $input->getOption('no-health')) {
$io->section('4/4 Health check');
$io->note('Skipped due to --no-health.');
return true;
}
// ---------------------------------------------------------
// 4) HEALTH CHECK (NDJSON vs vector meta)
// ---------------------------------------------------------
if (!$input->getOption('no-health')) {
$io->section('4/4 Health check');
try {
$report = $this->health->check();
$chunkReport = $this->health->check();
} catch (\Throwable $e) {
$io->error('Health check failed: ' . $e->getMessage());
return Command::FAILURE;
return false;
}
try {
$reportTag = $this->tagHealth->check();
$tagReport = $this->tagHealth->check();
} catch (\Throwable $e) {
$io->error('Tag health check failed: ' . $e->getMessage());
return Command::FAILURE;
return false;
}
$io->definitionList(
['ndjson_exists' => $report['ndjson_exists'] ? 'yes' : 'no'],
['ndjson_chunk_count' => (string)$report['ndjson_chunk_count']],
['vector_exists' => $report['vector_exists'] ? 'yes' : 'no'],
['meta_exists' => $report['meta_exists'] ? 'yes' : 'no'],
['vector_chunk_count' => (string)$report['vector_chunk_count']],
['status' => (string)$report['status']],
);
$this->renderChunkHealth($io, $chunkReport);
$this->renderTagHealth($io, $tagReport);
$io->definitionList(
['tags_ndjson_exists' => $reportTag['tags_ndjson_exists'] ? 'yes' : 'no'],
['tags_ndjson_count' => (string)$reportTag['tags_ndjson_count']],
['tag_vector_exists' => $reportTag['vector_exists'] ? 'yes' : 'no'],
['tag_meta_exists' => $reportTag['meta_exists'] ? 'yes' : 'no'],
['vector_tag_count' => (string)$reportTag['vector_tag_count']],
['status' => (string)$reportTag['status']],
);
if (!$this->isHealthOk((string) ($chunkReport['status'] ?? 'UNKNOWN'))) {
$io->error('Chunk health check not OK: ' . (string) ($chunkReport['status'] ?? 'UNKNOWN'));
if (!in_array($report['status'], ['OK', 'OK_EMPTY'], true)) {
$io->error('Health check not OK: ' . $report['status']);
return Command::FAILURE;
return false;
}
if (!$this->isHealthOk((string) ($tagReport['status'] ?? 'UNKNOWN'))) {
$io->error('Tag health check not OK: ' . (string) ($tagReport['status'] ?? 'UNKNOWN'));
return false;
}
$io->success('Health check OK.');
} else {
$io->section('4/4 Health check');
$io->note('Skipped due to --no-health.');
return true;
}
$io->success('System rebuild finished.');
return Command::SUCCESS;
private function renderChunkHealth(SymfonyStyle $io, array $report): void
{
$io->definitionList(
['ndjson_exists' => !empty($report['ndjson_exists']) ? 'yes' : 'no'],
['ndjson_chunk_count' => (string) ($report['ndjson_chunk_count'] ?? 0)],
['vector_exists' => !empty($report['vector_exists']) ? 'yes' : 'no'],
['meta_exists' => !empty($report['meta_exists']) ? 'yes' : 'no'],
['vector_chunk_count' => (string) ($report['vector_chunk_count'] ?? 0)],
['status' => (string) ($report['status'] ?? 'UNKNOWN')],
);
}
private function renderTagHealth(SymfonyStyle $io, array $report): void
{
$io->definitionList(
['tags_ndjson_exists' => !empty($report['tags_ndjson_exists']) ? 'yes' : 'no'],
['tags_ndjson_count' => (string) ($report['tags_ndjson_count'] ?? 0)],
['tag_vector_exists' => !empty($report['vector_exists']) ? 'yes' : 'no'],
['tag_meta_exists' => !empty($report['meta_exists']) ? 'yes' : 'no'],
['vector_tag_count' => (string) ($report['vector_tag_count'] ?? 0)],
['tags_with_active_document_ids' => (string) ($report['tags_with_active_document_ids'] ?? 0)],
['meta_valid' => !empty($report['meta_valid']) ? 'yes' : 'no'],
['status' => (string) ($report['status'] ?? 'UNKNOWN')],
);
}
private function isHealthOk(string $status): bool
{
return in_array($status, ['OK', 'OK_EMPTY'], true);
}
}

View File

@@ -8,11 +8,13 @@ use App\Tag\TagVectorIndexHealthService;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'mto:agent:tag:health',
description: 'Health-Check für TAG/FAISS Konsistenz'
description: 'Health-Check für Tag-/FAISS-Konsistenz'
)]
final class TagHealthCheckCommand extends Command
{
@@ -22,14 +24,87 @@ final class TagHealthCheckCommand extends Command
parent::__construct();
}
protected function configure(): void
{
$this->addOption(
'summary',
null,
InputOption::VALUE_NONE,
'Gibt eine lesbare Zusammenfassung statt JSON aus.'
);
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$result = $this->health->check();
$status = trim((string) ($result['status'] ?? ''));
$output->writeln(json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES));
if ($status === '') {
$status = 'UNKNOWN';
$result['status'] = $status;
$result['error'] = 'Health service returned no status.';
}
return str_starts_with($result['status'], 'OK')
if ((bool) $input->getOption('summary')) {
$this->renderSummary(new SymfonyStyle($input, $output), $result);
} else {
$this->renderJson($output, $result);
}
return $this->isHealthy($status)
? Command::SUCCESS
: Command::FAILURE;
}
/**
* @param array<string, mixed> $result
*/
private function renderSummary(SymfonyStyle $io, array $result): void
{
$io->title('Tag Vector Health');
$io->definitionList(
['status' => (string) ($result['status'] ?? 'UNKNOWN')],
['tags_ndjson_exists' => !empty($result['tags_ndjson_exists']) ? 'yes' : 'no'],
['tags_ndjson_count' => (string) ($result['tags_ndjson_count'] ?? 0)],
['vector_exists' => !empty($result['vector_exists']) ? 'yes' : 'no'],
['meta_exists' => !empty($result['meta_exists']) ? 'yes' : 'no'],
['vector_tag_count' => (string) ($result['vector_tag_count'] ?? 0)],
['meta_valid' => !empty($result['meta_valid']) ? 'yes' : 'no'],
['tags_with_active_document_ids' => (string) ($result['tags_with_active_document_ids'] ?? 0)],
);
if (!empty($result['error'])) {
$io->warning((string) $result['error']);
}
}
/**
* @param array<string, mixed> $result
*/
private function renderJson(OutputInterface $output, array $result): void
{
$json = json_encode(
$result,
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
);
if (!is_string($json)) {
$json = json_encode([
'status' => 'UNKNOWN',
'error' => 'json_encode_failed',
], JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
if (!is_string($json)) {
$json = "{\"status\":\"UNKNOWN\",\"error\":\"json_encode_failed\"}";
}
}
$output->writeln($json);
}
private function isHealthy(string $status): bool
{
return in_array($status, ['OK', 'OK_EMPTY'], true);
}
}

View File

@@ -14,6 +14,7 @@ use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'mto:agent:tags:job:run',
@@ -39,112 +40,152 @@ final class TagRebuildRunJobCommand extends Command
protected function execute(InputInterface $input, OutputInterface $output): int
{
$jobId = $input->getArgument('jobId');
$io = new SymfonyStyle($input, $output);
$jobId = trim((string) $input->getArgument('jobId'));
$create = (bool) $input->getOption('create');
if (!$create && !$jobId) {
$output->writeln('<error>You must provide either a jobId or use --create.</error>');
if (!$create && $jobId === '') {
$io->error('You must provide either a jobId or use --create.');
return Command::FAILURE;
}
if ($create && $jobId) {
$output->writeln('<error>Use either jobId OR --create, not both.</error>');
if ($create && $jobId !== '') {
$io->error('Use either jobId OR --create, not both.');
return Command::FAILURE;
}
if ($create) {
$job = null;
$lockHandle = null;
try {
$job = $create ? $this->createJob($io) : $this->findJob($jobId);
$lockHandle = $this->acquireLock();
$job->markRunning();
$this->em->flush();
$export = $this->exporter->export();
$this->assertValidExport($export);
$io->writeln('<info>tags.ndjson exported</info>');
$io->writeln('Path: ' . (string) $export['path']);
$io->writeln('Tags: ' . (string) ($export['tags'] ?? 0));
$io->writeln('Lines: ' . (string) ($export['lines'] ?? 0));
$io->writeln('Bytes: ' . (string) ($export['bytes'] ?? 0));
$this->builder->build();
$job->markCompleted();
$this->em->flush();
$io->success('Tag rebuild successful.');
return Command::SUCCESS;
} catch (\Throwable $e) {
if ($job instanceof TagRebuildJob) {
$job->markFailed($this->buildSafeErrorMessage($e));
$this->em->flush();
}
$io->error('FAILED: ' . $e->getMessage());
return Command::FAILURE;
} finally {
$this->releaseLock($lockHandle);
}
}
private function createJob(SymfonyStyle $io): TagRebuildJob
{
$job = new TagRebuildJob();
$this->em->persist($job);
$this->em->flush();
$jobId = $job->getId();
$output->writeln('<info>Created new TagRebuildJob: ' . $jobId . '</info>');
} else {
$io->writeln('<info>Created new TagRebuildJob: ' . (string) $job->getId() . '</info>');
return $job;
}
private function findJob(string $jobId): TagRebuildJob
{
/** @var TagRebuildJob|null $job */
$job = $this->em->getRepository(TagRebuildJob::class)->find($jobId);
if (!$job instanceof TagRebuildJob) {
$output->writeln('<error>Job not found.</error>');
return Command::FAILURE;
}
throw new \RuntimeException('Job not found.');
}
$fh = null;
return $job;
}
try {
// ---------------------------------------------------------
// LOCK INITIALIZATION
// ---------------------------------------------------------
/**
* @return resource
*/
private function acquireLock()
{
$lockDir = \dirname($this->lockFilePath);
if (!\is_dir($lockDir) && !@\mkdir($lockDir, 0775, true) && !\is_dir($lockDir)) {
throw new \RuntimeException('Cannot create lock directory.');
}
$fh = @\fopen($this->lockFilePath, 'c+');
if (!$fh) {
$handle = @\fopen($this->lockFilePath, 'c+');
if ($handle === false) {
throw new \RuntimeException('Cannot open lock file: ' . $this->lockFilePath);
}
if (!@\flock($fh, LOCK_EX | LOCK_NB)) {
if (!@\flock($handle, LOCK_EX | LOCK_NB)) {
@\fclose($handle);
throw new \RuntimeException('Another tag rebuild is currently running (lock busy).');
}
// ---------------------------------------------------------
// MARK RUNNING
// ---------------------------------------------------------
$job->markRunning();
$this->em->flush();
return $handle;
}
// ---------------------------------------------------------
// EXPORT TAGS (NDJSON)
// ---------------------------------------------------------
$export = $this->exporter->export();
/**
* @param resource|null $handle
*/
private function releaseLock($handle): void
{
if (!is_resource($handle)) {
return;
}
if (
!isset($export['path']) ||
!\is_string($export['path']) ||
!\file_exists($export['path'])
) {
@\flock($handle, LOCK_UN);
@\fclose($handle);
}
/**
* @param array<string, mixed> $export
*/
private function assertValidExport(array $export): void
{
$path = trim((string) ($export['path'] ?? ''));
if ($path === '' || !\is_file($path)) {
throw new \RuntimeException('Export failed: NDJSON file missing.');
}
if (isset($export['count']) && (int) $export['count'] === 0) {
throw new \RuntimeException('Export produced zero tags.');
}
$tags = (int) ($export['tags'] ?? 0);
$lines = (int) ($export['lines'] ?? 0);
// ---------------------------------------------------------
// BUILD VECTOR INDEX
// ---------------------------------------------------------
$this->builder->build();
// ---------------------------------------------------------
// MARK COMPLETED
// ---------------------------------------------------------
$job->markCompleted();
$this->em->flush();
$output->writeln('<info>Tag rebuild successful.</info>');
$output->writeln('NDJSON: ' . $export['path']);
return Command::SUCCESS;
}
catch (\Throwable $e) {
if (isset($job)) {
$job->markFailed($e->getMessage());
$this->em->flush();
}
$output->writeln('<error>FAILED: ' . $e->getMessage() . '</error>');
return Command::FAILURE;
}
finally {
if ($fh) {
@\flock($fh, LOCK_UN);
@\fclose($fh);
if ($tags < 0 || $lines < 0) {
throw new \RuntimeException('Export returned invalid statistics.');
}
}
private function buildSafeErrorMessage(\Throwable $e): string
{
$message = trim($e->getMessage());
if ($message === '') {
return 'Unknown tag rebuild failure.';
}
return mb_substr($message, 0, 4000);
}
}

View File

@@ -9,6 +9,7 @@ use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'mto:agent:tags:export',
@@ -17,26 +18,51 @@ use Symfony\Component\Console\Output\OutputInterface;
final class TagsExportCommand extends Command
{
public function __construct(
private TagNdjsonExporter $exporter,
private readonly TagNdjsonExporter $exporter,
) {
parent::__construct();
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
try {
$result = $this->exporter->export();
} catch (\Throwable $e) {
$output->writeln('<error>ERROR: ' . $e->getMessage() . '</error>');
return Command::FAILURE;
}
$this->assertValidExport($result);
$output->writeln('<info>Tags NDJSON exported</info>');
$output->writeln('Path: ' . $result['path']);
$output->writeln('Tags: ' . $result['tags']);
$output->writeln('Lines: ' . $result['lines']);
$output->writeln('Bytes: ' . $result['bytes']);
$io->writeln('<info>Tags NDJSON exported</info>');
$io->writeln('Path: ' . (string) ($result['path'] ?? ''));
$io->writeln('Tags: ' . (string) ($result['tags'] ?? 0));
$io->writeln('Lines: ' . (string) ($result['lines'] ?? 0));
$io->writeln('Bytes: ' . (string) ($result['bytes'] ?? 0));
$io->success('Tag export completed.');
return Command::SUCCESS;
} catch (\Throwable $e) {
$io->error($e->getMessage());
return Command::FAILURE;
}
}
/**
* @param array<string, mixed> $result
*/
private function assertValidExport(array $result): void
{
$path = trim((string) ($result['path'] ?? ''));
if ($path === '' || !is_file($path)) {
throw new \RuntimeException('Tag export failed: tags.ndjson is missing.');
}
$tags = (int) ($result['tags'] ?? 0);
$lines = (int) ($result['lines'] ?? 0);
$bytes = (int) ($result['bytes'] ?? 0);
if ($tags < 0 || $lines < 0 || $bytes < 0) {
throw new \RuntimeException('Tag export returned invalid statistics.');
}
}
}

View File

@@ -4,13 +4,13 @@ declare(strict_types=1);
namespace App\Command;
use App\Index\IndexMetaManager;
use App\Tag\TagNdjsonExporter;
use App\Tag\TagVectorIndexBuilder;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'mto:agent:tags:rebuild',
@@ -21,45 +21,54 @@ final class TagsRebuildCommand extends Command
public function __construct(
private readonly TagNdjsonExporter $exporter,
private readonly TagVectorIndexBuilder $builder,
private readonly IndexMetaManager $metaManager,
) {
parent::__construct();
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
try {
// -----------------------------------------
// 1) Export tags.ndjson
// -----------------------------------------
$export = $this->exporter->export();
$this->assertValidExport($export);
$output->writeln('<info>1/3 Exported tags.ndjson</info>');
$output->writeln('Path: ' . $export['path']);
$output->writeln('Tags: ' . $export['tags']);
$output->writeln('Lines: ' . $export['lines']);
$output->writeln('Bytes: ' . $export['bytes']);
$io->writeln('<info>1/2 Exported tags.ndjson</info>');
$io->writeln('Path: ' . (string) ($export['path'] ?? ''));
$io->writeln('Tags: ' . (string) ($export['tags'] ?? 0));
$io->writeln('Lines: ' . (string) ($export['lines'] ?? 0));
$io->writeln('Bytes: ' . (string) ($export['bytes'] ?? 0));
// -----------------------------------------
// 2) Build FAISS tag index
// -----------------------------------------
$this->builder->build();
$output->writeln('<info>2/3 Built vector_tags.index</info>');
// -----------------------------------------
// 3) Enterprise Commit Marker
// -----------------------------------------
$this->metaManager->touchRuntime([
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
]);
$output->writeln('<info>3/3 Runtime commit marker updated</info>');
} catch (\Throwable $e) {
$output->writeln('<error>ERROR: ' . $e->getMessage() . '</error>');
return Command::FAILURE;
}
$io->writeln('<info>2/2 Built vector_tags.index</info>');
$io->success('Tag rebuild completed.');
return Command::SUCCESS;
} catch (\Throwable $e) {
$io->error($e->getMessage());
return Command::FAILURE;
}
}
/**
* @param array<string, mixed> $export
*/
private function assertValidExport(array $export): void
{
$path = trim((string) ($export['path'] ?? ''));
if ($path === '' || !is_file($path)) {
throw new \RuntimeException('Tag export failed: tags.ndjson is missing.');
}
$tags = (int) ($export['tags'] ?? 0);
$lines = (int) ($export['lines'] ?? 0);
$bytes = (int) ($export['bytes'] ?? 0);
if ($tags < 0 || $lines < 0 || $bytes < 0) {
throw new \RuntimeException('Tag export returned invalid statistics.');
}
}
}

View File

@@ -1,12 +1,62 @@
<?php
declare(strict_types=1);
namespace App\Config;
class CatalogIntentConfig
/**
* Central thresholds for deterministic catalog-entity detection.
*
* The values in this class intentionally define a conservative gate:
* - only strong semantic tag hits may open the catalog path
* - small score gaps between the best and second-best hit are treated as ambiguous
*/
final class CatalogIntentConfig
{
// Minimum similarity score. Prevents noise.
/**
* Minimum semantic similarity required before a catalog entity is accepted.
*/
public const MIN_SCORE = 0.72;
// Difference between Top 1 and Top 2, so that no uncertain match is accepted.
/**
* Required distance between the best and second-best catalog entity hit.
*/
public const AMBIGUITY_DELTA = 0.02;
/**
* Number of candidate tag hits to inspect during catalog intent detection.
*
* This is intentionally wider than the final accepted set so that strong
* catalog_entity tags are not hidden behind generic tags in the raw result.
*/
public const SEARCH_LIMIT = 6;
/**
* Conservative lower boundary for score normalization helpers.
*/
public const MIN_ALLOWED_SCORE = 0.0;
/**
* Conservative upper boundary for score normalization helpers.
*/
public const MAX_ALLOWED_SCORE = 1.0;
public static function isScoreAccepted(float $score): bool
{
return $score >= self::MIN_SCORE;
}
public static function isAmbiguous(float $bestScore, float $secondScore): bool
{
return abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA;
}
public static function clampScore(float $score): float
{
return max(self::MIN_ALLOWED_SCORE, min(self::MAX_ALLOWED_SCORE, $score));
}
private function __construct()
{
}
}

View File

@@ -1,5 +1,6 @@
<?php
declare(strict_types=1);
namespace App\Controller\Admin;
@@ -17,25 +18,22 @@ final class DashboardController extends AbstractController
#[Route('', name: 'admin_dashboard_null')]
#[Route('/', name: 'admin_dashboard_trail')]
#[Route('/admin', name: 'admin_dashboard_alias')]
public function trailNull(IndexMetaManager $metaManager,VectorIndexHealthService $health): RedirectResponse
public function redirectToDashboard(): RedirectResponse
{
return $this->redirectToRoute('admin_dashboard');
}
#[Route('/admin/dashboard', name: 'admin_dashboard')]
public function dashboard(IndexMetaManager $metaManager,VectorIndexHealthService $health,TagVectorIndexHealthService $tagHealth): Response
{
$chunkCount = $metaManager->getRuntimeChunkCount();
$limit = IngestFlow::CHUNK_LIMIT_HARD;
#[Route('/admin/dashboard', name: 'admin_dashboard', methods: ['GET'])]
public function dashboard(
IndexMetaManager $metaManager,
VectorIndexHealthService $health,
TagVectorIndexHealthService $tagHealth
): Response {
return $this->render('admin/dashboard/index.html.twig', [
'chunkCount' => $chunkCount,
'chunkLimit' => $limit,
'chunkCount' => $metaManager->getRuntimeChunkCount(),
'chunkLimit' => IngestFlow::CHUNK_LIMIT_HARD,
'vectorHealth' => $health->check(),
'tagVectorHealth' => $tagHealth->check(),
]);
}
}

View File

@@ -1,10 +1,13 @@
<?php
declare(strict_types=1);
namespace App\Controller\Admin;
use App\Entity\Document;
use App\Entity\DocumentVersion;
use App\Entity\IngestJob;
use App\Entity\User;
use App\Service\DocumentService;
use App\Service\FormatText;
use App\Service\IngestJobService;
@@ -23,9 +26,11 @@ use Symfony\Component\Routing\Attribute\Route;
use Symfony\Component\Uid\Uuid;
#[Route('/admin/documents')]
class DocumentController extends AbstractController
final class DocumentController extends AbstractController
{
#[Route('', name: 'admin_documents')]
private const INGEST_DUPLICATE_WINDOW_SECONDS = 3;
#[Route('', name: 'admin_documents', methods: ['GET'])]
public function index(EntityManagerInterface $em): Response
{
$documents = $em->getRepository(Document::class)
@@ -46,115 +51,106 @@ class DocumentController extends AbstractController
#[Route(
'/{id}',
name: 'admin_document_show',
requirements: ['id' => '[0-9a-fA-F\-]{36}']
requirements: ['id' => '[0-9a-fA-F\-]{36}'],
methods: ['GET']
)]
public function show(string $id, EntityManagerInterface $em): Response
{
try {
$uuid = Uuid::fromString($id);
} catch (\Exception) {
throw new NotFoundHttpException();
}
$document = $em->getRepository(Document::class)->find($uuid);
if (!$document) {
$this->addFlash('danger', 'Das Dokument existiert nicht mehr.');
}
return $this->render('admin/document/show.html.twig', [
'document' => $document,
'document' => $this->findDocument($id, $em),
]);
}
#[Route('/new', name: 'admin_document_new')]
#[Route('/new', name: 'admin_document_new', methods: ['GET', 'POST'])]
public function new(
Request $request,
DocumentService $documentService,
FormatText $formatText,
IngestJobService $jobService,
ParameterBagInterface $params
ParameterBagInterface $params,
EntityManagerInterface $em,
): Response {
if (!$request->isMethod('POST')) {
return $this->render('admin/document/new.html.twig');
}
/** @var UploadedFile|null $file */
$file = $request->files->get('file');
if (!$file instanceof UploadedFile) {
throw new \InvalidArgumentException('No valid file uploaded.');
}
if (!$this->isCsrfTokenValid('create_document', (string) $request->request->get('_token'))) {
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
$rawTitle = $request->request->get('title');
$title = is_string($rawTitle) && $rawTitle !== ''
? $rawTitle
: $formatText->slugify($file->getClientOriginalName());
if (!$title) {
$this->addFlash('error', 'Titel ist erforderlich.');
return $this->redirectToRoute('admin_document_new');
}
$uploadDir = (string)$params->get('mto.vector.data.upload.path');
$this->ensureDir($uploadDir);
/** @var UploadedFile|null $file */
$file = $request->files->get('file');
if (!$file instanceof UploadedFile) {
$this->addFlash('danger', 'Keine gültige Datei hochgeladen.');
$newFilename = uniqid('', true) . '_' . $file->getClientOriginalName();
try {
$file->move($uploadDir, $newFilename);
} catch (FileException) {
throw new \RuntimeException('File upload failed.');
return $this->redirectToRoute('admin_document_new');
}
$filePath = $uploadDir . '/' . $newFilename;
$title = $this->resolveDocumentTitle($request, $file, $formatText);
if ($title === '') {
$this->addFlash('danger', 'Titel ist erforderlich.');
$document = $documentService->createDocument(
$title,
$filePath,
$this->getUser()
);
return $this->redirectToRoute('admin_document_new');
}
$user = $this->requireUser();
$uploadDir = trim((string) $params->get('mto.vector.data.upload.path'));
try {
$this->ensureDir($uploadDir);
$filePath = $this->moveUploadedFile($file, $uploadDir, $formatText);
$document = $documentService->createDocument($title, $filePath, $user);
$version = $document->getCurrentVersion();
if (!$version instanceof DocumentVersion) {
$this->addFlash('danger', 'Dokument erstellt, aber es wurde keine aktuelle Version erzeugt.');
return $this->redirectToRoute('admin_documents');
throw new \RuntimeException('Dokument erstellt, aber keine aktuelle Version vorhanden.');
}
$job = $jobService->startJob(
IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE,
$this->getUser(),
$user,
$version->getDocument()->getId(),
$version->getId(),
null,
IngestJob::STATUS_QUEUED
);
$logFile = $this->prepareJobLogFile((string) $job->getId());
$job->setLogPath($logFile);
$em->flush();
if (!$this->canExec()) {
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
$this->addFlash('danger', 'Dokument erstellt, aber Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
return $this->redirectToRoute('admin_documents');
}
$this->startIngestJob((string)$job->getId());
$this->startIngestJob((string) $job->getId(), $logFile);
return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(),
]);
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Dokument konnte nicht erstellt werden.'));
return $this->redirectToRoute('admin_document_new');
}
}
#[Route('/{id}/version/new', name: 'admin_document_version_new', requirements: ['id' => '[0-9a-fA-F\-]{36}'])]
#[Route('/{id}/version/new', name: 'admin_document_version_new', requirements: ['id' => '[0-9a-fA-F\-]{36}'], methods: ['GET', 'POST'])]
public function newVersion(
string $id,
Request $request,
EntityManagerInterface $em,
DocumentService $documentService,
ParameterBagInterface $params
ParameterBagInterface $params,
FormatText $formatText,
): Response {
$document = $em->getRepository(Document::class)->find($id);
if (!$document) {
throw $this->createNotFoundException();
}
$document = $this->findDocument($id, $em);
if (!$request->isMethod('POST')) {
return $this->render('admin/document/new_version.html.twig', [
@@ -162,31 +158,33 @@ class DocumentController extends AbstractController
]);
}
/** @var UploadedFile|null $file */
$file = $request->files->get('file');
if (!$file instanceof UploadedFile) {
$this->addFlash('error', 'Datei ist erforderlich.');
if (!$this->isCsrfTokenValid('create_document_version_' . $id, (string) $request->request->get('_token'))) {
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
return $this->redirectToRoute('admin_document_version_new', ['id' => $id]);
}
$uploadDir = (string)$params->get('mto.vector.data.upload.path');
$this->ensureDir($uploadDir);
/** @var UploadedFile|null $file */
$file = $request->files->get('file');
if (!$file instanceof UploadedFile) {
$this->addFlash('danger', 'Datei ist erforderlich.');
$newFilename = uniqid('', true) . '_' . $file->getClientOriginalName();
try {
$file->move($uploadDir, $newFilename);
} catch (FileException) {
throw new \RuntimeException('File upload failed.');
return $this->redirectToRoute('admin_document_version_new', ['id' => $id]);
}
$filePath = $uploadDir . '/' . $newFilename;
try {
$user = $this->requireUser();
$uploadDir = trim((string) $params->get('mto.vector.data.upload.path'));
$this->ensureDir($uploadDir);
$filePath = $this->moveUploadedFile($file, $uploadDir, $formatText);
$documentService->addVersion(
$document,
$filePath,
$this->getUser()
);
$documentService->addVersion($document, $filePath, $user);
$this->addFlash('success', 'Neue Dokumentversion wurde hochgeladen.');
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Neue Dokumentversion konnte nicht erstellt werden.'));
return $this->redirectToRoute('admin_document_version_new', ['id' => $id]);
}
return $this->redirectToRoute('admin_document_show', ['id' => $id]);
}
@@ -208,44 +206,45 @@ class DocumentController extends AbstractController
throw $this->createAccessDeniedException();
}
$version = $em->getRepository(DocumentVersion::class)->find($versionId);
if (!$version) {
throw $this->createNotFoundException();
}
$version = $this->findDocumentVersion($versionId, $em);
try {
$documentService->activateVersion($version);
$job = $jobService->startJob(
IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE,
$this->getUser(),
$this->requireUser(),
$version->getDocument()->getId(),
$version->getId(),
null,
IngestJob::STATUS_QUEUED
);
$logFile = $this->prepareJobLogFile((string) $job->getId());
$job->setLogPath($logFile);
$em->flush();
if (!$this->canExec()) {
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
$this->addFlash('danger', 'Aktivierung ok, aber Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
return $this->redirectToRoute('admin_document_show', [
'id' => $version->getDocument()->getId(),
'id' => (string) $version->getDocument()->getId(),
]);
}
$this->startIngestJob((string)$job->getId());
$this->startIngestJob((string) $job->getId(), $logFile);
$this->addFlash('success', 'Version aktiviert. Ingest-Job wurde erstellt und gestartet.');
return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(),
]);
} catch (\Throwable $e) {
$this->addFlash('danger', 'Aktivierung/Re-Ingest fehlgeschlagen: ' . $e->getMessage());
$this->addFlash('danger', 'Aktivierung/Re-Ingest fehlgeschlagen: ' . $this->buildSafeErrorMessage($e, 'Unbekannter Fehler.'));
}
return $this->redirectToRoute('admin_document_show', [
'id' => $version->getDocument()->getId(),
'id' => (string) $version->getDocument()->getId(),
]);
}
@@ -260,111 +259,131 @@ class DocumentController extends AbstractController
Request $request,
EntityManagerInterface $em,
IngestJobService $jobService,
): ?RedirectResponse {
): RedirectResponse {
if (!$this->isCsrfTokenValid('ingest_version_' . $versionId, (string) $request->request->get('_token'))) {
throw $this->createAccessDeniedException();
}
$version = $em->getRepository(DocumentVersion::class)->find($versionId);
if (!$version) {
throw $this->createNotFoundException();
}
$version = $this->findDocumentVersion($versionId, $em);
/** @var IngestJob|null $existing */
$existing = $em->getRepository(IngestJob::class)
->findOneBy(
['documentVersionId' => $version->getId()],
['startedAt' => 'DESC']
['startedAt' => 'DESC', 'id' => 'DESC']
);
if ($existing && $existing->getStartedAt() > new \DateTimeImmutable('-3 seconds')) {
return null;
if (
$existing instanceof IngestJob
&& $existing->getStartedAt() > new \DateTimeImmutable('-' . self::INGEST_DUPLICATE_WINDOW_SECONDS . ' seconds')
&& in_array($existing->getStatus(), [IngestJob::STATUS_QUEUED, IngestJob::STATUS_RUNNING], true)
) {
$this->addFlash('info', 'Für diese Version läuft bereits ein aktueller Ingest-Job.');
return $this->redirectToRoute('admin_job_show', [
'id' => (string) $existing->getId(),
]);
}
$job = $jobService->startJob(
IngestJob::TYPE_DOCUMENT,
$this->getUser(),
$this->requireUser(),
$version->getDocument()->getId(),
$version->getId(),
null,
IngestJob::STATUS_QUEUED
);
$logFile = $this->prepareJobLogFile((string) $job->getId());
$job->setLogPath($logFile);
$em->flush();
if (!$this->canExec()) {
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
$this->addFlash('error', 'Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
$this->addFlash('danger', 'Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
return $this->redirectToRoute('admin_document_show', [
'id' => $version->getDocument()->getId(),
'id' => (string) $version->getDocument()->getId(),
]);
}
$this->startIngestJob((string)$job->getId());
try {
$this->startIngestJob((string) $job->getId(), $logFile);
} catch (\Throwable $e) {
$jobService->markFailed($job, 'Ingest async start failed: ' . $e->getMessage());
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Ingest konnte nicht gestartet werden.'));
return $this->redirectToRoute('admin_document_show', [
'id' => (string) $version->getDocument()->getId(),
]);
}
return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(),
]);
}
#[Route(
'/reset',
name: 'admin_document_reset',
methods: ['POST']
)]
public function resetCompleteSystem(ParameterBagInterface $params, Connection $connection): ?RedirectResponse
{
if (!$this->canExec()) {
$this->addFlash('danger', 'Der Reset konnte nicht gestartet werden (exec deaktiviert).');
#[Route('/reset', name: 'admin_document_reset', methods: ['POST'])]
public function resetCompleteSystem(
Request $request,
ParameterBagInterface $params,
Connection $connection,
): RedirectResponse {
$this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
if (!$this->isCsrfTokenValid('system_reset', (string) $request->request->get('_token'))) {
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
return $this->redirectToRoute('admin_dashboard');
}
@unlink((string)$params->get('mto.knowledge.ndjson'));
@unlink((string)$params->get('mto.knowledge.vector_index'));
@unlink((string)$params->get('mto.knowledge.vector_index_meta'));
@unlink((string)$params->get('mto.knowledge.index_meta'));
@unlink((string)$params->get('mto.runtime.meta'));
if (!$this->canExec()) {
$this->addFlash('danger', 'Der Reset konnte nicht gestartet werden (exec deaktiviert).');
@unlink((string)$params->get('mto.knowledge.tags_ndjson'));
@unlink((string)$params->get('mto.knowledge.vector_tags_index'));
@unlink((string)$params->get('mto.knowledge.vector_tags_index_meta'));
return $this->redirectToRoute('admin_dashboard');
}
$uploadDir = (string)$params->get('mto.knowledge.upload');
foreach ([
'mto.knowledge.ndjson',
'mto.knowledge.vector_index',
'mto.knowledge.vector_index_meta',
'mto.knowledge.index_meta',
'mto.runtime.meta',
'mto.knowledge.tags_ndjson',
'mto.knowledge.vector_tags_index',
'mto.knowledge.vector_tags_index_meta',
] as $parameterName) {
$path = trim((string) $params->get($parameterName));
if ($path !== '' && is_file($path)) {
@unlink($path);
}
}
$uploadDir = trim((string) $params->get('mto.knowledge.upload'));
if ($uploadDir !== '' && is_dir($uploadDir)) {
exec('rm -rf ' . escapeshellarg($uploadDir));
}
$lockDir = (string)$params->get('mto.locks.dir');
$lockDir = trim((string) $params->get('mto.locks.dir'));
if ($lockDir !== '' && is_dir($lockDir)) {
exec('rm -rf ' . escapeshellarg($lockDir));
}
$sql = '
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.document;
SET FOREIGN_KEY_CHECKS = 1;
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.document_version;
SET FOREIGN_KEY_CHECKS = 1;
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.ingest_job;
SET FOREIGN_KEY_CHECKS = 1;
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.knowledge_tag;
SET FOREIGN_KEY_CHECKS = 1;
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.tag_rebuild_job;
SET FOREIGN_KEY_CHECKS = 1;
$sql = <<<'SQL'
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.document_tag;
TRUNCATE TABLE db.tag_rebuild_job;
TRUNCATE TABLE db.knowledge_tag;
TRUNCATE TABLE db.ingest_job;
TRUNCATE TABLE db.document_version;
TRUNCATE TABLE db.document;
SET FOREIGN_KEY_CHECKS = 1;
';
$connection->executeQuery($sql);
SQL;
$connection->executeStatement($sql);
$this->addFlash('success', 'Das System wurde erfolgreich zurückgesetzt.');
return $this->redirectToRoute('admin_dashboard');
}
@@ -381,44 +400,49 @@ class DocumentController extends AbstractController
IngestJobService $jobService,
LockService $lockService,
): RedirectResponse {
$this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
if (!$this->isCsrfTokenValid('delete_document_' . $id, (string) $request->request->get('_token'))) {
throw $this->createAccessDeniedException();
}
try {
$uuid = Uuid::fromString($id);
} catch (\Exception) {
throw $this->createNotFoundException();
}
/** @var Document|null $document */
$document = $em->getRepository(Document::class)->find($uuid);
if (!$document) {
throw $this->createNotFoundException();
}
$document = $this->findDocument($id, $em);
if (!$lockService->acquire()) {
$this->addFlash('danger', 'Ein Ingest-Job läuft bereits. Löschen derzeit nicht möglich.');
return $this->redirectToRoute('admin_documents');
}
$lockService->release();
$job = $jobService->startJob(
IngestJob::TYPE_DOCUMENT_DELETE,
$this->getUser(),
$this->requireUser(),
$document->getId(),
null,
null,
IngestJob::STATUS_QUEUED
);
$logFile = $this->prepareJobLogFile((string) $job->getId());
$job->setLogPath($logFile);
$em->flush();
if (!$this->canExec()) {
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
$this->addFlash('danger', 'Löschen konnte nicht gestartet werden (exec deaktiviert).');
return $this->redirectToRoute('admin_documents');
}
$this->startIngestJob((string)$job->getId());
try {
$this->startIngestJob((string) $job->getId(), $logFile);
} catch (\Throwable $e) {
$jobService->markFailed($job, 'Delete async start failed: ' . $e->getMessage());
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Löschvorgang konnte nicht gestartet werden.'));
return $this->redirectToRoute('admin_documents');
}
$this->addFlash('success', 'Löschvorgang gestartet. Dokument wird nach Index-Rebuild entfernt.');
@@ -427,10 +451,6 @@ class DocumentController extends AbstractController
]);
}
// =========================================================
// Helpers
// =========================================================
private function canExec(): bool
{
if (!function_exists('exec')) {
@@ -443,6 +463,7 @@ class DocumentController extends AbstractController
}
$list = array_map('trim', explode(',', $disabled));
return !in_array('exec', $list, true);
}
@@ -452,34 +473,209 @@ class DocumentController extends AbstractController
throw new \RuntimeException('Upload directory not configured.');
}
if (!is_dir($dir) && !mkdir($dir, 0777, true) && !is_dir($dir)) {
if (!is_dir($dir) && !mkdir($dir, 0775, true) && !is_dir($dir)) {
throw new \RuntimeException('Unable to create upload directory.');
}
}
private function startIngestJob(string $jobId): void
private function moveUploadedFile(UploadedFile $file, string $uploadDir, FormatText $formatText): string
{
$projectDir = (string)$this->getParameter('kernel.project_dir');
$originalName = trim((string) $file->getClientOriginalName());
$baseName = pathinfo($originalName !== '' ? $originalName : 'document', PATHINFO_FILENAME);
$extension = strtolower((string) $file->getClientOriginalExtension());
$safeBaseName = $formatText->slugify($baseName !== '' ? $baseName : 'document');
if ($safeBaseName === '') {
$safeBaseName = 'document';
}
$newFilename = uniqid('', true) . '_' . $safeBaseName;
if ($extension !== '') {
$newFilename .= '.' . $extension;
}
try {
$file->move($uploadDir, $newFilename);
} catch (FileException) {
throw new \RuntimeException('File upload failed.');
}
return rtrim($uploadDir, '/') . '/' . $newFilename;
}
private function resolveDocumentTitle(Request $request, UploadedFile $file, FormatText $formatText): string
{
$rawTitle = trim((string) $request->request->get('title', ''));
if ($rawTitle !== '') {
return $rawTitle;
}
$originalName = trim((string) $file->getClientOriginalName());
$baseName = pathinfo($originalName, PATHINFO_FILENAME);
return trim((string) $formatText->slugify($baseName !== '' ? $baseName : $originalName));
}
private function startIngestJob(string $jobId, string $logFile): void
{
$projectDir = $this->resolveProjectDir();
$console = $projectDir . '/bin/console';
$logDir = $projectDir . '/var/log/ingest';
if (!is_dir($logDir)) {
@mkdir($logDir, 0777, true);
if (!is_file($console)) {
throw new \RuntimeException('bin/console not found: ' . $console);
}
$logFile = $logDir . '/job_' . $jobId . '.log';
// Wichtig: CLI-PHP verwenden, nicht PHP_BINARY aus FPM
$php = 'php';
$php = $this->resolvePhpBinary();
$cmd = sprintf(
'%s %s --no-interaction %s %s >> %s 2>&1 &',
escapeshellcmd($php),
'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 & echo $!',
escapeshellarg($projectDir),
escapeshellarg($php),
escapeshellarg($console),
escapeshellarg('mto:agent:ingest:run'),
escapeshellarg($jobId),
escapeshellarg($logFile),
);
exec($cmd);
$output = [];
$exitCode = 0;
@exec($cmd, $output, $exitCode);
if ($exitCode !== 0) {
throw new \RuntimeException('Background ingest bootstrap failed with exit code ' . $exitCode . '.');
}
}
private function prepareJobLogFile(string $jobId): string
{
$projectDir = $this->resolveProjectDir();
$logDir = $projectDir . '/var/log/ingest';
$this->ensureDir($logDir);
return $logDir . '/job_' . $jobId . '.log';
}
private function resolveProjectDir(): string
{
$projectDir = trim((string) $this->getParameter('kernel.project_dir'));
if ($projectDir === '' || !is_dir($projectDir)) {
throw new \RuntimeException('Project directory is invalid.');
}
return rtrim($projectDir, '/');
}
private function resolvePhpBinary(): string
{
$envCandidates = [
trim((string) ($_SERVER['PHP_CLI_BINARY'] ?? '')),
trim((string) ($_ENV['PHP_CLI_BINARY'] ?? '')),
trim((string) getenv('PHP_CLI_BINARY')),
];
foreach ($envCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$phpBinary = defined('PHP_BINARY') ? trim((string) PHP_BINARY) : '';
if ($this->isValidCliPhpBinary($phpBinary)) {
return $phpBinary;
}
$fallbackCandidates = [
'/usr/bin/php',
'/usr/local/bin/php',
'/bin/php',
'/opt/homebrew/bin/php',
];
foreach ($fallbackCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$whichPhp = trim((string) @shell_exec('command -v php 2>/dev/null'));
if ($this->isValidCliPhpBinary($whichPhp)) {
return $whichPhp;
}
throw new \RuntimeException(
'Could not resolve a CLI PHP binary. Set PHP_CLI_BINARY explicitly, e.g. /usr/bin/php.'
);
}
private function isValidCliPhpBinary(string $path): bool
{
$path = trim($path);
if ($path === '' || !is_file($path) || !is_executable($path)) {
return false;
}
$basename = strtolower(basename($path));
if (str_contains($basename, 'fpm') || str_contains($basename, 'cgi')) {
return false;
}
return true;
}
private function findDocument(string $id, EntityManagerInterface $em): Document
{
try {
$uuid = Uuid::fromString(trim($id));
} catch (\Throwable) {
throw new NotFoundHttpException();
}
/** @var Document|null $document */
$document = $em->getRepository(Document::class)->find($uuid);
if (!$document instanceof Document) {
throw new NotFoundHttpException();
}
return $document;
}
private function findDocumentVersion(string $versionId, EntityManagerInterface $em): DocumentVersion
{
try {
$uuid = Uuid::fromString(trim($versionId));
} catch (\Throwable) {
throw new NotFoundHttpException();
}
/** @var DocumentVersion|null $version */
$version = $em->getRepository(DocumentVersion::class)->find($uuid);
if (!$version instanceof DocumentVersion) {
throw new NotFoundHttpException();
}
return $version;
}
private function requireUser(): User
{
$user = $this->getUser();
if (!$user instanceof User) {
throw new \RuntimeException('No authenticated user available.');
}
return $user;
}
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
{
$message = trim($e->getMessage());
return $message !== '' ? $message : $fallback;
}
}

View File

@@ -19,44 +19,97 @@ final class DocumentTagController extends AbstractController
#[Route('/{id}/tags', name: 'admin_document_tags_edit', methods: ['GET'])]
public function edit(string $id, DocumentTagAdminService $svc): Response
{
$id = trim($id);
try {
$data = $svc->getEditData($id);
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Dokument-Tags konnten nicht geladen werden.'));
return $this->redirectToRoute('admin_documents');
}
return $this->render('admin/document_tags/edit.html.twig', [
'document' => $data['document'],
'allTags' => $data['allTags'],
'latestJob' => $data['latestJob'],
'statusRunning' => TagRebuildJob::STATUS_RUNNING,
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
'statusFailed' => TagRebuildJob::STATUS_FAILED,
...$data,
...$this->buildJobStatusViewData(),
]);
}
#[Route('/{id}/tags/save', name: 'admin_document_tags_save', methods: ['POST'])]
public function save(string $id, Request $request, DocumentTagAdminService $svc): RedirectResponse
{
$selected = $request->request->all('tag_ids') ?? [];
$id = trim($id);
if (!$this->isCsrfTokenValid('admin_document_tags_save_' . $id, (string) $request->request->get('_token'))) {
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
return $this->redirectToRoute('admin_document_tags_edit', ['id' => $id]);
}
try {
$svc->saveTags($id, $selected);
$svc->saveTags($id, $this->normalizeStringList($request->request->all('tag_ids')));
$this->addFlash('success', 'Tags wurden aktualisiert. Rebuild läuft im Hintergrund.');
} catch (\Throwable $e) {
$this->addFlash('danger', $e->getMessage());
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tags konnten nicht aktualisiert werden.'));
}
return $this->redirectToRoute('admin_document_tags_edit', ['id' => $id]);
}
/**
* Wichtig: Ohne extra "admin/" im Pfad, weil Prefix schon /admin/documents ist.
* Ergebnis: /admin/documents/tags/status
*/
#[Route('/tags/status', name: 'admin_tags_status', methods: ['GET'])]
public function status(DocumentTagAdminService $svc): JsonResponse
{
$status = $svc->getLatestRebuildStatus();
return $this->json([
'status' => $svc->getLatestRebuildStatus(),
'status' => $status,
'hasActiveJob' => $status === TagRebuildJob::STATUS_RUNNING
|| $status === TagRebuildJob::STATUS_QUEUED,
]);
}
/**
* @param mixed $values
* @return list<string>
*/
private function normalizeStringList(mixed $values): array
{
if (!is_array($values)) {
return [];
}
$normalized = [];
foreach ($values as $value) {
$value = trim((string) $value);
if ($value === '') {
continue;
}
$normalized[] = $value;
}
return array_values(array_unique($normalized));
}
/**
* @return array<string, string>
*/
private function buildJobStatusViewData(): array
{
return [
'statusRunning' => TagRebuildJob::STATUS_RUNNING,
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
'statusFailed' => TagRebuildJob::STATUS_FAILED,
];
}
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
{
$message = trim($e->getMessage());
return $message !== '' ? $message : $fallback;
}
}

View File

@@ -1,46 +1,44 @@
<?php
declare(strict_types=1);
namespace App\Controller\Admin;
use App\Entity\IngestJob;
use App\Service\IngestJobService;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\JsonResponse;
use Symfony\Component\HttpFoundation\RedirectResponse;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
use Symfony\Component\Routing\Attribute\Route;
use Symfony\Component\HttpFoundation\RedirectResponse;
use Symfony\Component\HttpFoundation\JsonResponse;
#[Route('/admin/jobs')]
class IngestJobController extends AbstractController
final class IngestJobController extends AbstractController
{
#[Route('', name: 'admin_jobs')]
#[Route('', name: 'admin_jobs', methods: ['GET'])]
public function index(EntityManagerInterface $em): Response
{
$jobs = $em->getRepository(IngestJob::class)
->findBy([], ['startedAt' => 'DESC']);
->findBy([], ['startedAt' => 'DESC', 'id' => 'DESC']);
return $this->render('admin/job/index.html.twig', [
'jobs' => $jobs
'jobs' => $jobs,
]);
}
#[Route(
'/{id}',
name: 'admin_job_show',
requirements: ['id' => '[0-9a-fA-F\-]{36}']
requirements: ['id' => '[0-9a-fA-F\-]{36}'],
methods: ['GET']
)]
public function show(string $id, EntityManagerInterface $em): Response
{
$job = $em->getRepository(IngestJob::class)->find($id);
if (!$job) {
throw new NotFoundHttpException();
}
return $this->render('admin/job/show.html.twig', [
'job' => $job
'job' => $this->findJob($id, $em),
]);
}
@@ -54,12 +52,7 @@ class IngestJobController extends AbstractController
{
$this->denyAccessUnlessGranted('ROLE_USER');
/** @var IngestJob|null $job */
$job = $em->getRepository(IngestJob::class)->find($id);
if (!$job) {
throw new NotFoundHttpException();
}
$job = $this->findJob($id, $em);
return $this->json([
'id' => (string) $job->getId(),
@@ -68,19 +61,35 @@ class IngestJobController extends AbstractController
'startedAt' => $job->getStartedAt()->format(DATE_ATOM),
'finishedAt' => $job->getFinishedAt()?->format(DATE_ATOM),
'errorMessage' => $job->getErrorMessage(),
'logPath' => $job->getLogPath(),
]);
}
#[Route('/global-reindex', name: 'admin_global_reindex', methods: ['POST'])]
public function globalReindex(
Request $request,
IngestJobService $jobService,
EntityManagerInterface $em,
): RedirectResponse {
$this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
// ---------------------------------------------------------
// 1) Job anlegen (QUEUED)
// ---------------------------------------------------------
if (!$this->isCsrfTokenValid('global_reindex', (string) $request->request->get('_token'))) {
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
return $this->redirectToRoute('admin_jobs');
}
try {
$projectDir = $this->resolveProjectDir();
$console = $projectDir . '/bin/console';
if (!is_file($console)) {
throw new \RuntimeException('bin/console not found: ' . $console);
}
$logDir = $projectDir . '/var/log/ingest';
$this->ensureDirectoryExists($logDir);
$job = $jobService->startJob(
IngestJob::TYPE_GLOBAL_REINDEX,
$this->getUser(),
@@ -90,36 +99,147 @@ class IngestJobController extends AbstractController
IngestJob::STATUS_QUEUED
);
// ---------------------------------------------------------
// 2) CLI im Hintergrund starten
// ---------------------------------------------------------
$projectDir = (string)$this->getParameter('kernel.project_dir');
$console = $projectDir . '/bin/console';
$logDir = $projectDir . '/var/log/ingest';
if (!is_dir($logDir)) {
@mkdir($logDir, 0777, true);
}
$logFile = $logDir . '/job_' . (string) $job->getId() . '.log';
$job->setLogPath($logFile);
$em->flush();
$php = 'php';
$phpBinary = $this->resolvePhpBinary();
$cmd = sprintf(
'%s %s --no-interaction %s %s >> %s 2>&1 &',
escapeshellcmd($php),
'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 & echo $!',
escapeshellarg($projectDir),
escapeshellarg($phpBinary),
escapeshellarg($console),
escapeshellarg('mto:agent:ingest:run'),
escapeshellarg((string) $job->getId()),
escapeshellarg($logFile),
);
exec($cmd);
$output = [];
$exitCode = 0;
@exec($cmd, $output, $exitCode);
if ($exitCode !== 0) {
$job->markFailed('Global reindex async bootstrap failed with exit code ' . $exitCode . '.');
$em->flush();
$this->addFlash('danger', 'Global Reindex konnte nicht im Hintergrund gestartet werden.');
// ---------------------------------------------------------
// 3) Redirect auf Job-Detailseite (Loader)
// ---------------------------------------------------------
return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(),
]);
}
$this->addFlash('success', 'Global Reindex wurde gestartet.');
return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(),
]);
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Global Reindex konnte nicht gestartet werden.'));
return $this->redirectToRoute('admin_jobs');
}
}
private function findJob(string $id, EntityManagerInterface $em): IngestJob
{
$id = trim($id);
/** @var IngestJob|null $job */
$job = $em->getRepository(IngestJob::class)->find($id);
if (!$job instanceof IngestJob) {
throw new NotFoundHttpException();
}
return $job;
}
private function resolveProjectDir(): string
{
$projectDir = trim((string) $this->getParameter('kernel.project_dir'));
if ($projectDir === '' || !is_dir($projectDir)) {
throw new \RuntimeException('Project directory is invalid.');
}
return rtrim($projectDir, '/');
}
private function resolvePhpBinary(): string
{
$envCandidates = [
trim((string) ($_SERVER['PHP_CLI_BINARY'] ?? '')),
trim((string) ($_ENV['PHP_CLI_BINARY'] ?? '')),
trim((string) getenv('PHP_CLI_BINARY')),
];
foreach ($envCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$phpBinary = defined('PHP_BINARY') ? trim((string) PHP_BINARY) : '';
if ($this->isValidCliPhpBinary($phpBinary)) {
return $phpBinary;
}
$fallbackCandidates = [
'/usr/bin/php',
'/usr/local/bin/php',
'/bin/php',
'/opt/homebrew/bin/php',
];
foreach ($fallbackCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$whichPhp = trim((string) @shell_exec('command -v php 2>/dev/null'));
if ($this->isValidCliPhpBinary($whichPhp)) {
return $whichPhp;
}
throw new \RuntimeException(
'Could not resolve a CLI PHP binary. Set PHP_CLI_BINARY explicitly, e.g. /usr/bin/php.'
);
}
private function isValidCliPhpBinary(string $path): bool
{
$path = trim($path);
if ($path === '' || !is_file($path) || !is_executable($path)) {
return false;
}
$basename = strtolower(basename($path));
if (str_contains($basename, 'fpm') || str_contains($basename, 'cgi')) {
return false;
}
return true;
}
private function ensureDirectoryExists(string $dir): void
{
if (is_dir($dir)) {
return;
}
if (!@mkdir($dir, 0775, true) && !is_dir($dir)) {
throw new \RuntimeException('Could not create ingest log directory.');
}
}
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
{
$message = trim($e->getMessage());
return $message !== '' ? $message : $fallback;
}
}

View File

@@ -6,6 +6,7 @@ namespace App\Controller\Admin;
use App\Entity\TagRebuildJob;
use App\Service\Admin\TagAdminService;
use App\Tag\TagTypes;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\RedirectResponse;
use Symfony\Component\HttpFoundation\Request;
@@ -18,25 +19,18 @@ final class TagController extends AbstractController
#[Route('', name: 'admin_tags_index', methods: ['GET'])]
public function index(TagAdminService $svc): Response
{
$data = $svc->getIndexData();
return $this->render('admin/tag/index.html.twig', [
...$data,
'statusRunning' => TagRebuildJob::STATUS_RUNNING,
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
'statusFailed' => TagRebuildJob::STATUS_FAILED,
...$svc->getIndexData(),
...$this->buildJobStatusViewData(),
]);
}
#[Route('/create', name: 'admin_tags_create', methods: ['POST'])]
public function create(Request $request, TagAdminService $svc): RedirectResponse
{
if (!$this->isCsrfTokenValid(
'admin_tag_create',
$request->request->get('_token')
)) {
$this->addFlash('danger', 'Ungültiges CSRF Token.');
if (!$this->isCsrfTokenValid('admin_tag_create', (string) $request->request->get('_token'))) {
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
return $this->redirectToRoute('admin_tags_index');
}
@@ -44,15 +38,13 @@ final class TagController extends AbstractController
$svc->create(
(string) $request->request->get('slug', ''),
(string) $request->request->get('label', ''),
$request->request->get('description')
? (string)$request->request->get('description')
: null,
(string)$request->request->get('type', 'generic') // NEU
$this->normalizeNullableString($request->request->get('description')),
TagTypes::normalize((string) $request->request->get('type', TagTypes::GENERIC))
);
$this->addFlash('success', 'Tag wurde erstellt.');
} catch (\Throwable $e) {
$this->addFlash('danger', $e->getMessage());
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tag konnte nicht erstellt werden.'));
}
return $this->redirectToRoute('admin_tags_index');
@@ -61,58 +53,110 @@ final class TagController extends AbstractController
#[Route('/{id}/delete', name: 'admin_tags_delete', methods: ['POST'])]
public function delete(string $id, Request $request, TagAdminService $svc): RedirectResponse
{
if (!$this->isCsrfTokenValid(
'admin_tag_delete_' . $id,
$request->request->get('_token')
)) {
$this->addFlash('danger', 'Ungültiges CSRF Token.');
if (!$this->isCsrfTokenValid('admin_tag_delete_' . $id, (string) $request->request->get('_token'))) {
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
return $this->redirectToRoute('admin_tags_index');
}
try {
$svc->delete($id);
$svc->delete(trim($id));
$this->addFlash('success', 'Tag wurde gelöscht.');
} catch (\Throwable $e) {
$this->addFlash('danger', $e->getMessage());
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tag konnte nicht gelöscht werden.'));
}
return $this->redirectToRoute('admin_tags_index');
}
#[Route('/{id}/assign', name: 'admin_tags_assign', methods: ['GET', 'POST'])]
public function assign(
string $id,
Request $request,
TagAdminService $svc
): Response {
public function assign(string $id, Request $request, TagAdminService $svc): Response
{
$id = trim($id);
if ($request->isMethod('POST')) {
if (!$this->isCsrfTokenValid(
'assign_tag_' . $id,
$request->request->get('_token')
)) {
throw $this->createAccessDeniedException();
}
$svc->syncAssignments(
$id,
$request->request->all('documents') ?? []
);
$this->addFlash('success', 'Zuweisungen aktualisiert.');
if (!$this->isCsrfTokenValid('assign_tag_' . $id, (string) $request->request->get('_token'))) {
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
return $this->redirectToRoute('admin_tags_assign', ['id' => $id]);
}
try {
$svc->syncAssignments($id, $this->normalizeStringList($request->request->all('documents')));
$this->addFlash('success', 'Zuweisungen aktualisiert.');
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Zuweisungen konnten nicht aktualisiert werden.'));
}
return $this->redirectToRoute('admin_tags_assign', ['id' => $id]);
}
try {
$data = $svc->getAssignData($id);
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tag konnte nicht geladen werden.'));
return $this->redirectToRoute('admin_tags_index');
}
return $this->render('admin/tag/assign.html.twig', [
...$data,
...$this->buildJobStatusViewData(),
]);
}
/**
* @param mixed $value
*/
private function normalizeNullableString(mixed $value): ?string
{
$value = trim((string) $value);
return $value !== '' ? $value : null;
}
/**
* @param mixed $values
* @return list<string>
*/
private function normalizeStringList(mixed $values): array
{
if (!is_array($values)) {
return [];
}
$normalized = [];
foreach ($values as $value) {
$value = trim((string) $value);
if ($value === '') {
continue;
}
$normalized[] = $value;
}
return array_values(array_unique($normalized));
}
/**
* @return array<string, string>
*/
private function buildJobStatusViewData(): array
{
return [
'statusRunning' => TagRebuildJob::STATUS_RUNNING,
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
'statusFailed' => TagRebuildJob::STATUS_FAILED,
]);
];
}
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
{
$message = trim($e->getMessage());
return $message !== '' ? $message : $fallback;
}
}

View File

@@ -10,38 +10,79 @@ use Symfony\Component\Routing\Attribute\Route;
final class TagRebuildStreamController
{
#[Route('/admin/tags/rebuild/stream', name: 'admin_tags_rebuild_stream')]
private const POLL_INTERVAL_SECONDS = 2;
private const KEEPALIVE_INTERVAL_SECONDS = 10;
#[Route('/admin/tags/rebuild/stream', name: 'admin_tags_rebuild_stream', methods: ['GET'])]
public function stream(TagRebuildStatusProvider $provider): StreamedResponse
{
$response = new StreamedResponse(function () use ($provider) {
$response = new StreamedResponse(function () use ($provider): void {
self::disableOutputBuffering();
echo "event: ping\n";
echo "data: " . json_encode(['init' => true]) . "\n\n";
echo "retry: 3000\n";
self::sendEvent('ping', ['init' => true]);
@ob_flush();
@flush();
$lastPayloadHash = null;
$lastKeepaliveAt = time();
while (!connection_aborted()) {
$data = $provider->getLatestStatus();
if ($data !== null) {
echo "event: message\n";
echo "data: " . json_encode($data) . "\n\n";
$payloadHash = md5(
json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) ?: 'null'
);
@ob_flush();
@flush();
if ($payloadHash !== $lastPayloadHash) {
self::sendEvent('message', $data);
$lastPayloadHash = $payloadHash;
$lastKeepaliveAt = time();
}
}
sleep(2);
if ((time() - $lastKeepaliveAt) >= self::KEEPALIVE_INTERVAL_SECONDS) {
self::sendEvent('ping', [
'ts' => (new \DateTimeImmutable())->format(DATE_ATOM),
]);
$lastKeepaliveAt = time();
}
sleep(self::POLL_INTERVAL_SECONDS);
}
});
$response->headers->set('Content-Type', 'text/event-stream');
$response->headers->set('Cache-Control', 'no-cache');
$response->headers->set('Cache-Control', 'no-cache, no-store, must-revalidate');
$response->headers->set('Pragma', 'no-cache');
$response->headers->set('Expires', '0');
$response->headers->set('Connection', 'keep-alive');
$response->headers->set('X-Accel-Buffering', 'no');
return $response;
}
private static function disableOutputBuffering(): void
{
while (ob_get_level() > 0) {
@ob_end_flush();
}
}
/**
* @param array<string, mixed> $data
*/
private static function sendEvent(string $event, array $data): void
{
$json = json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
if (!is_string($json)) {
$json = '{"error":"json_encode_failed"}';
}
echo 'event: ' . $event . "\n";
echo 'data: ' . $json . "\n\n";
@ob_flush();
@flush();
}
}

View File

@@ -8,6 +8,7 @@ use Doctrine\ORM\Mapping as ORM;
#[ORM\Entity]
#[ORM\Table(name: 'document_tag')]
#[ORM\Index(name: 'idx_document_tag_tag_id', columns: ['tag_id'])]
class DocumentTag
{
#[ORM\Id]
@@ -22,8 +23,8 @@ class DocumentTag
public function __construct(Document $document, Tag $tag)
{
$this->document = $document;
$this->tag = $tag;
$this->setDocument($document);
$this->setTag($tag);
}
public function getDocument(): Document
@@ -35,4 +36,20 @@ class DocumentTag
{
return $this->tag;
}
public function isSameRelation(Document $document, Tag $tag): bool
{
return $this->document->getId()->equals($document->getId())
&& $this->tag->getId()->equals($tag->getId());
}
private function setDocument(Document $document): void
{
$this->document = $document;
}
private function setTag(Tag $tag): void
{
$this->tag = $tag;
}
}

View File

@@ -1,8 +1,12 @@
<?php
declare(strict_types=1);
namespace App\Entity;
use App\Tag\TagTypes;
use Doctrine\ORM\Mapping as ORM;
use InvalidArgumentException;
use Symfony\Component\Uid\Uuid;
#[ORM\Entity]
@@ -24,25 +28,25 @@ class Tag
#[ORM\Column(type: 'text', nullable: true)]
private ?string $description = null;
/**
* NEU: Governance-Typ des Tags
* - generic
* - catalog_entity
*/
#[ORM\Column(length: 50)]
private string $type = 'generic';
private string $type = TagTypes::GENERIC;
#[ORM\Column]
private \DateTimeImmutable $createdAt;
public function __construct(string $slug, string $label, ?string $description = null)
{
public function __construct(
string $slug,
string $label,
?string $description = null,
string $type = TagTypes::GENERIC,
) {
$this->id = Uuid::v4();
$this->createdAt = new \DateTimeImmutable();
$this->slug = $slug;
$this->label = $label;
$this->description = $description;
$this->setSlug($slug);
$this->setLabel($label);
$this->setDescription($description);
$this->setType($type);
}
public function getId(): Uuid
@@ -57,7 +61,14 @@ class Tag
public function setSlug(string $slug): static
{
$slug = $this->normalizeSlug($slug);
if ($slug === '') {
throw new InvalidArgumentException('Tag slug must not be empty.');
}
$this->slug = $slug;
return $this;
}
@@ -68,7 +79,14 @@ class Tag
public function setLabel(string $label): static
{
$label = trim($label);
if ($label === '') {
throw new InvalidArgumentException('Tag label must not be empty.');
}
$this->label = $label;
return $this;
}
@@ -79,7 +97,9 @@ class Tag
public function setDescription(?string $description): static
{
$this->description = $description;
$description = trim((string) $description);
$this->description = $description !== '' ? $description : null;
return $this;
}
@@ -90,13 +110,43 @@ class Tag
public function setType(string $type): static
{
$type = trim($type);
$this->type = $type !== '' ? $type : 'generic';
$normalizedType = TagTypes::normalize($type);
if (!TagTypes::isValid($normalizedType)) {
throw new InvalidArgumentException(sprintf('Unsupported tag type "%s".', $type));
}
$this->type = $normalizedType;
return $this;
}
public function isGeneric(): bool
{
return $this->type === TagTypes::GENERIC;
}
public function isCatalogEntity(): bool
{
return $this->type === TagTypes::CATALOG_ENTITY;
}
public function isSalesSignal(): bool
{
return $this->type === TagTypes::SALES_SIGNAL;
}
public function getCreatedAt(): \DateTimeImmutable
{
return $this->createdAt;
}
private function normalizeSlug(string $slug): string
{
$slug = mb_strtolower(trim($slug));
$slug = preg_replace('/\s+/u', '-', $slug) ?? $slug;
$slug = preg_replace('/-+/u', '-', $slug) ?? $slug;
return trim($slug, '-');
}
}

View File

@@ -9,8 +9,8 @@ use Symfony\Component\Uid\Uuid;
#[ORM\Entity]
#[ORM\Table(name: 'tag_rebuild_job')]
#[ORM\Index(columns: ['status'], name: 'idx_tag_rebuild_job_status')]
#[ORM\Index(columns: ['created_at'], name: 'idx_tag_rebuild_job_created_at')]
#[ORM\Index(name: 'idx_tag_rebuild_job_status', columns: ['status'])]
#[ORM\Index(name: 'idx_tag_rebuild_job_created_at', columns: ['created_at'])]
class TagRebuildJob
{
public const STATUS_QUEUED = 'QUEUED';
@@ -18,6 +18,8 @@ class TagRebuildJob
public const STATUS_COMPLETED = 'COMPLETED';
public const STATUS_FAILED = 'FAILED';
private const ERROR_MESSAGE_MAX_LENGTH = 4000;
#[ORM\Id]
#[ORM\Column(type: 'uuid', unique: true)]
private Uuid $id;
@@ -44,6 +46,19 @@ class TagRebuildJob
$this->status = self::STATUS_QUEUED;
}
/**
* @return list<string>
*/
public static function statuses(): array
{
return [
self::STATUS_QUEUED,
self::STATUS_RUNNING,
self::STATUS_COMPLETED,
self::STATUS_FAILED,
];
}
public function getId(): Uuid
{
return $this->id;
@@ -54,24 +69,59 @@ class TagRebuildJob
return $this->status;
}
public function isQueued(): bool
{
return $this->status === self::STATUS_QUEUED;
}
public function isRunning(): bool
{
return $this->status === self::STATUS_RUNNING;
}
public function isCompleted(): bool
{
return $this->status === self::STATUS_COMPLETED;
}
public function isFailed(): bool
{
return $this->status === self::STATUS_FAILED;
}
public function isActive(): bool
{
return $this->isQueued() || $this->isRunning();
}
public function markRunning(): void
{
$this->status = self::STATUS_RUNNING;
$this->startedAt = new \DateTimeImmutable();
$this->finishedAt = null;
$this->errorMessage = null;
}
public function markCompleted(): void
{
if ($this->startedAt === null) {
$this->startedAt = new \DateTimeImmutable();
}
$this->status = self::STATUS_COMPLETED;
$this->finishedAt = new \DateTimeImmutable();
$this->errorMessage = null;
}
public function markFailed(string $message): void
{
if ($this->startedAt === null) {
$this->startedAt = new \DateTimeImmutable();
}
$this->status = self::STATUS_FAILED;
$this->finishedAt = new \DateTimeImmutable();
$this->errorMessage = $message;
$this->errorMessage = $this->normalizeErrorMessage($message);
}
public function getCreatedAt(): \DateTimeImmutable
@@ -93,4 +143,19 @@ class TagRebuildJob
{
return $this->errorMessage;
}
private function normalizeErrorMessage(string $message): ?string
{
$message = trim($message);
if ($message === '') {
return 'Unknown tag rebuild failure.';
}
if (mb_strlen($message) > self::ERROR_MESSAGE_MAX_LENGTH) {
$message = mb_substr($message, 0, self::ERROR_MESSAGE_MAX_LENGTH);
}
return $message;
}
}

View File

@@ -6,82 +6,132 @@ namespace App\Intent;
use App\Config\CatalogIntentConfig;
use App\Knowledge\Retrieval\QueryCleaner;
use App\Tag\TagVectorSearchClient;
use App\Tag\TagTypes;
use App\Tag\TagVectorSearchClient;
/**
* CatalogIntentLite
* Lightweight catalog entity detector.
*
* Reiner Entity-Detector.
*
* Verantwortlich nur für:
* - Vector-Tag-Erkennung
* - Score-Gate
* - Ambiguity-Check
* - Sicherstellen, dass TagType = catalog_entity
*
* KEIN:
* - Listen-Signal
* - SalesIntent
* - Routing
* Responsibilities:
* - clean the user query for tag lookup
* - query the tag vector index
* - keep only catalog_entity hits
* - apply confidence and ambiguity gates
* - return one canonical entity label or null
*/
final readonly class CatalogIntentLite
{
/**
* Slightly wider than the old top-3 search so generic tags do not crowd out
* relevant catalog_entity hits too easily.
*/
private const SEARCH_LIMIT = 6;
public function __construct(
private TagVectorSearchClient $tagVectorClient,
private QueryCleaner $queryCleaner
) {}
private QueryCleaner $queryCleaner,
) {
}
/**
* Gibt das canonical Label der erkannten catalog_entity zurück
* oder null, wenn kein sauberer Treffer.
* Returns the canonical normalized label of the detected catalog entity,
* or null when no safe entity match exists.
*/
public function detect(string $prompt): ?string
{
$prompt = trim($prompt);
if ($prompt === '') {
return null;
}
$promptTag = $this->queryCleaner->clean($prompt);
$cleanQuery = trim($this->queryCleaner->clean($prompt));
// 1) Tag-Vector-Suche
$hits = $this->tagVectorClient->search($promptTag, 3);
if ($hits === []) {
if ($cleanQuery === '') {
return null;
}
$best = $hits[0];
$catalogHits = $this->filterCatalogEntityHits(
$this->tagVectorClient->search($cleanQuery, self::SEARCH_LIMIT)
);
if ($catalogHits === []) {
return null;
}
$best = $catalogHits[0];
$bestScore = (float) ($best['score'] ?? 0.0);
// 2) Score-Tags
if ($bestScore < CatalogIntentConfig::MIN_SCORE) {
return null;
}
// 3) Ambiguity-Check
if (isset($hits[1])) {
$secondScore = (float)($hits[1]['score'] ?? 0.0);
if (isset($catalogHits[1])) {
$secondScore = (float) ($catalogHits[1]['score'] ?? 0.0);
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) {
return null;
}
}
// 4) Nur catalog_entity zulassen
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
return null;
$label = $this->normalizeLabel((string) ($best['label'] ?? ''));
return $label !== '' ? $label : null;
}
// 5) Canonical Label
$label = trim((string)($best['label'] ?? ''));
/**
* @param array<int, array{
* tag_id:string,
* score:float,
* label?:string,
* tag_type?:string
* }> $hits
*
* @return list<array{
* tag_id:string,
* score:float,
* label?:string,
* tag_type:string
* }>
*/
private function filterCatalogEntityHits(array $hits): array
{
$filtered = [];
if ($label === '') {
return null;
foreach ($hits as $hit) {
$tagId = trim((string) ($hit['tag_id'] ?? ''));
$score = (float) ($hit['score'] ?? 0.0);
$tagType = TagTypes::normalize((string) ($hit['tag_type'] ?? TagTypes::GENERIC));
if ($tagId === '') {
continue;
}
return mb_strtolower($label);
if ($tagType !== TagTypes::CATALOG_ENTITY) {
continue;
}
$filtered[] = [
'tag_id' => $tagId,
'score' => $score,
'label' => isset($hit['label']) ? (string) $hit['label'] : null,
'tag_type' => $tagType,
];
}
usort(
$filtered,
static fn (array $left, array $right): int => ($right['score'] <=> $left['score'])
);
return $filtered;
}
private function normalizeLabel(string $label): string
{
$label = mb_strtolower(trim($label));
$label = preg_replace('/\s+/u', ' ', $label) ?? $label;
return trim($label);
}
}

View File

@@ -8,65 +8,99 @@ use App\Entity\Document;
use App\Entity\Tag;
use App\Service\TagRebuildJobService;
use App\Tag\TagService;
use App\Tag\TagTypes;
use Doctrine\ORM\EntityManagerInterface;
use RuntimeException;
final class DocumentTagAdminService
final readonly class DocumentTagAdminService
{
public function __construct(
private readonly EntityManagerInterface $em,
private readonly TagService $tagService,
private readonly TagRebuildJobService $jobs,
) {}
private EntityManagerInterface $em,
private TagService $tagService,
private TagRebuildJobService $jobs,
) {
}
/**
* @return array{
* document: Document,
* allTags: list<Tag>,
* latestJob: mixed
* latestJob: mixed,
* hasActiveJob: bool
* }
*/
public function getEditData(string $documentId): array
{
$document = $this->em->getRepository(Document::class)->find($documentId);
if (!$document instanceof Document) {
throw new \RuntimeException('Document not found');
}
$document = $this->findDocumentById($documentId);
/** @var list<Tag> $allTags */
$allTags = $this->em->createQueryBuilder()
->select('t')
->from(Tag::class, 't')
->orderBy('t.label', 'ASC')
->getQuery()
->getResult();
$allTags = $this->em->getRepository(Tag::class)->findAll();
$latestJob = $this->jobs->getLatestJob();
usort(
$allTags,
static function (Tag $left, Tag $right): int {
$typeOrder = [
TagTypes::CATALOG_ENTITY => 10,
TagTypes::GENERIC => 20,
TagTypes::SALES_SIGNAL => 30,
];
$leftTypeRank = $typeOrder[$left->getType()] ?? 999;
$rightTypeRank = $typeOrder[$right->getType()] ?? 999;
if ($leftTypeRank !== $rightTypeRank) {
return $leftTypeRank <=> $rightTypeRank;
}
$labelComparison = strcasecmp($left->getLabel(), $right->getLabel());
if ($labelComparison !== 0) {
return $labelComparison;
}
return strcmp($left->getSlug(), $right->getSlug());
}
);
return [
'document' => $document,
'allTags' => $allTags,
'latestJob' => $latestJob,
'latestJob' => $this->jobs->getLatestJob(),
'hasActiveJob' => $this->jobs->hasActiveJob(),
];
}
/**
* Speichert die Tag-Auswahl für ein Dokument (inkl. Sync-Logik).
* Persists the selected tag set for a document via the central domain service.
*
* @param array<mixed> $selectedTagIds
*/
public function saveTags(string $documentId, array $selectedTagIds): void
{
$document = $this->em->getRepository(Document::class)->find($documentId);
if (!$document instanceof Document) {
throw new \RuntimeException('Document not found');
}
$document = $this->findDocumentById($documentId);
// Delegation an deine Domain-Logik (bleibt dort, wo sie hingehört)
$this->tagService->syncDocumentTags($document, $selectedTagIds);
}
public function getLatestRebuildStatus(): ?string
{
$job = $this->jobs->getLatestJob();
return $this->jobs->getLatestJob()?->getStatus();
}
return $job?->getStatus();
private function findDocumentById(string $documentId): Document
{
$documentId = trim($documentId);
if ($documentId === '') {
throw new RuntimeException('Document not found.');
}
$document = $this->em->getRepository(Document::class)->find($documentId);
if (!$document instanceof Document) {
throw new RuntimeException('Document not found.');
}
return $document;
}
}

View File

@@ -9,7 +9,9 @@ use App\Entity\DocumentTag;
use App\Entity\Tag;
use App\Service\TagRebuildJobService;
use App\Tag\TagService;
use App\Tag\TagTypes;
use Doctrine\ORM\EntityManagerInterface;
use RuntimeException;
final readonly class TagAdminService
{
@@ -17,15 +19,19 @@ final readonly class TagAdminService
private EntityManagerInterface $em,
private TagService $tagService,
private TagRebuildJobService $jobs,
) {}
) {
}
public function getIndexData(): array
{
/** @var list<Tag> $tags */
$tags = $this->em->getRepository(Tag::class)
->findBy([], ['label' => 'ASC']);
->findBy([], ['type' => 'ASC', 'label' => 'ASC']);
return [
'tags' => $tags,
'tagTypeChoices' => TagTypes::choices(),
'documentCountByTagId' => $this->buildDocumentCountByTagId(),
'latestJob' => $this->jobs->getLatestJob(),
'hasActiveJob' => $this->jobs->hasActiveJob(),
];
@@ -35,7 +41,7 @@ final readonly class TagAdminService
string $slug,
string $label,
?string $description,
string $type = 'generic' // NEU
string $type = TagTypes::GENERIC,
): void {
$this->tagService->create($slug, $label, $description, $type);
}
@@ -47,35 +53,47 @@ final readonly class TagAdminService
public function getAssignData(string $tagId): array
{
$tag = $this->em->getRepository(Tag::class)->find($tagId);
$tag = $this->findTagById($tagId);
if (!$tag instanceof Tag) {
throw new \RuntimeException('Tag nicht gefunden.');
}
$documents = $this->em->getRepository(Document::class)->findAll();
/** @var list<Document> $documents */
$documents = $this->em->getRepository(Document::class)->findBy(
['status' => Document::STATUS_ACTIVE],
['title' => 'ASC']
);
$documentsData = array_map(
fn(Document $d) => [
'id' => (string)$d->getId(),
'title' => $d->getTitle(),
static fn (Document $document): array => [
'id' => (string) $document->getId(),
'title' => $document->getTitle(),
],
$documents
);
/** @var list<DocumentTag> $existingRelations */
$existingRelations = $this->em
->getRepository(DocumentTag::class)
->findBy(['tag' => $tag]);
$assignedDocIds = array_map(
fn(DocumentTag $dt) => (string)$dt->getDocument()->getId(),
$existingRelations
$activeDocumentIds = array_map(
static fn (Document $document): string => (string) $document->getId(),
$documents
);
$assignedDocIds = [];
foreach ($existingRelations as $relation) {
$documentId = (string) $relation->getDocument()->getId();
if (in_array($documentId, $activeDocumentIds, true)) {
$assignedDocIds[] = $documentId;
}
}
return [
'tag' => $tag,
'documents' => $documentsData,
'assignedDocIds' => $assignedDocIds,
'assignedDocIds' => array_values(array_unique($assignedDocIds)),
'tagTypeChoices' => TagTypes::choices(),
'latestJob' => $this->jobs->getLatestJob(),
'hasActiveJob' => $this->jobs->hasActiveJob(),
];
@@ -83,12 +101,55 @@ final readonly class TagAdminService
public function syncAssignments(string $tagId, array $selectedDocIds): void
{
$tag = $this->findTagById($tagId);
$this->tagService->syncTagDocuments($tag, $selectedDocIds);
}
private function findTagById(string $tagId): Tag
{
$tagId = trim($tagId);
if ($tagId === '') {
throw new RuntimeException('Tag nicht gefunden.');
}
$tag = $this->em->getRepository(Tag::class)->find($tagId);
if (!$tag instanceof Tag) {
throw new \RuntimeException('Tag nicht gefunden.');
throw new RuntimeException('Tag nicht gefunden.');
}
$this->tagService->syncTagDocuments($tag, $selectedDocIds);
return $tag;
}
/**
* @return array<string, int>
*/
private function buildDocumentCountByTagId(): array
{
$rows = $this->em->createQueryBuilder()
->select('t AS tag', 'COUNT(d.id) AS documentCount')
->from(Tag::class, 't')
->leftJoin(DocumentTag::class, 'dt', 'WITH', 'dt.tag = t')
->leftJoin('dt.document', 'd', 'WITH', 'd.status = :status')
->groupBy('t.id')
->setParameter('status', Document::STATUS_ACTIVE)
->getQuery()
->getResult();
$counts = [];
foreach ($rows as $row) {
$tag = $row[0] ?? $row['tag'] ?? null;
$documentCount = (int) ($row['documentCount'] ?? 0);
if (!$tag instanceof Tag) {
continue;
}
$counts[$tag->getId()->toRfc4122()] = $documentCount;
}
return $counts;
}
}

View File

@@ -1,29 +1,33 @@
<?php
declare(strict_types=1);
namespace App\Service;
use App\Entity\Document;
use App\Entity\DocumentVersion;
use App\Entity\User;
use Doctrine\ORM\EntityManagerInterface;
use RuntimeException;
class DocumentService
final readonly class DocumentService
{
public function __construct(
private EntityManagerInterface $em,
) {}
private TagRebuildJobService $tagRebuildJobService,
) {
}
/**
* Erstellt ein neues Dokument inkl. Version 1
* Creates a new document including version 1.
*/
public function createDocument(
string $title,
string $filePath,
User $user
): Document {
$document = new Document();
$document->setTitle($title);
$document->setTitle(trim($title));
$document->setCreatedBy($user);
$version = new DocumentVersion();
@@ -44,14 +48,13 @@ class DocumentService
}
/**
* Fügt neue Version hinzu (immutable)
* Adds a new immutable version to an existing document.
*/
public function addVersion(
Document $document,
string $filePath,
User $user
): DocumentVersion {
$nextVersionNumber = $this->getNextVersionNumber($document);
$version = new DocumentVersion();
@@ -70,7 +73,7 @@ class DocumentService
}
/**
* Aktiviert eine Version
* Activates a document version and marks it for re-ingest.
*/
public function activateVersion(DocumentVersion $version): void
{
@@ -82,41 +85,77 @@ class DocumentService
$version->setActive(true);
$document->setCurrentVersion($version);
$version->setIngestStatus(DocumentVersion::INGEST_PENDING);
$this->em->flush();
}
/**
* Archiviert Dokument
* Archives a document.
*
* If the document had tag assignments, the tag index is rebuilt so the
* routing layer no longer works with an outdated active document set.
*/
public function archive(Document $document): void
{
$document->archive();
$this->em->flush();
if ($document->getStatus() === Document::STATUS_ARCHIVED) {
return;
}
public function delete(Document $document): void
{
$this->em->remove($document);
$shouldRebuildTags = $this->hasTagAssignments($document);
$document->archive();
$this->em->flush();
if ($shouldRebuildTags) {
$this->triggerTagRebuildIfIdle();
}
}
/**
* Berechnet SHA256 Checksum
* Deletes a document.
*
* If the document had tag assignments, the tag index is rebuilt after the
* removal so stale document references disappear from tag-based routing.
*/
public function delete(Document $document): void
{
$shouldRebuildTags = $this->hasTagAssignments($document);
$this->em->remove($document);
$this->em->flush();
if ($shouldRebuildTags) {
$this->triggerTagRebuildIfIdle();
}
}
/**
* Calculates the SHA256 checksum for a file path.
*/
private function calculateChecksum(string $filePath): string
{
if (!file_exists($filePath)) {
throw new \RuntimeException('File not found for checksum.');
$filePath = trim($filePath);
if ($filePath === '') {
throw new RuntimeException('File path must not be empty.');
}
return hash_file('sha256', $filePath);
if (!is_file($filePath)) {
throw new RuntimeException('File not found for checksum.');
}
$checksum = hash_file('sha256', $filePath);
if ($checksum === false) {
throw new RuntimeException('Could not calculate file checksum.');
}
return $checksum;
}
/**
* Ermittelt nächste Versionsnummer
* Determines the next version number for a document.
*/
private function getNextVersionNumber(Document $document): int
{
@@ -128,4 +167,16 @@ class DocumentService
return $max + 1;
}
private function hasTagAssignments(Document $document): bool
{
return $document->getDocumentTags()->count() > 0;
}
private function triggerTagRebuildIfIdle(): void
{
if (!$this->tagRebuildJobService->hasActiveJob()) {
$this->tagRebuildJobService->enqueueAndStartAsync();
}
}
}

View File

@@ -11,16 +11,24 @@ use Psr\Log\LoggerInterface;
final readonly class TagRebuildJobService
{
/**
* Wenn ein QUEUED-Job länger nicht startet, gilt er als "stale" und wird auf FAILED gesetzt,
* damit das System nicht dauerhaft blockiert.
* If a QUEUED job does not transition into RUNNING in time,
* it is treated as stale so the system does not stay blocked forever.
*/
private const STALE_QUEUED_AFTER_SECONDS = 300; // 5 Minuten
private const STALE_QUEUED_AFTER_SECONDS = 300;
/**
* The background runner should switch the job from QUEUED to RUNNING almost
* immediately because markRunning() happens at the top of the command.
*/
private const ASYNC_START_TIMEOUT_SECONDS = 3;
private const ASYNC_START_POLL_INTERVAL_MICROSECONDS = 250000;
public function __construct(
private EntityManagerInterface $em,
private LoggerInterface $agentLogger,
private string $projectDir,
) {}
) {
}
public function enqueueAndStartAsync(): TagRebuildJob
{
@@ -29,14 +37,25 @@ final readonly class TagRebuildJobService
$this->em->persist($job);
$this->em->flush();
try {
$this->startAsync($job);
} catch (\Throwable $e) {
$job->markFailed('Async tag rebuild start failed: ' . $e->getMessage());
$this->em->flush();
$this->agentLogger->error('[tags] async job start failed', [
'job' => (string) $job->getId(),
'error' => $e->getMessage(),
]);
throw $e;
}
return $job;
}
public function enqueueIfIdle(): ?TagRebuildJob
{
// Coalescing: Wenn ein Job läuft oder queued ist -> nichts tun
if ($this->hasActiveJob()) {
return null;
}
@@ -44,23 +63,18 @@ final readonly class TagRebuildJobService
return $this->enqueueAndStartAsync();
}
/**
* Letzter Job (egal welcher Status).
*/
public function getLatestJob(): ?TagRebuildJob
{
return $this->em->createQueryBuilder()
->select('j')
->from(TagRebuildJob::class, 'j')
->orderBy('j.createdAt', 'DESC')
->addOrderBy('j.id', 'DESC')
->setMaxResults(1)
->getQuery()
->getOneOrNullResult();
}
/**
* Letzter Job mit Status COMPLETED.
*/
public function getLatestCompletedJob(): ?TagRebuildJob
{
return $this->em->createQueryBuilder()
@@ -69,18 +83,12 @@ final readonly class TagRebuildJobService
->where('j.status = :status')
->setParameter('status', TagRebuildJob::STATUS_COMPLETED)
->orderBy('j.createdAt', 'DESC')
->addOrderBy('j.id', 'DESC')
->setMaxResults(1)
->getQuery()
->getOneOrNullResult();
}
/**
* Ob gerade ein Job aktiv ist:
* - RUNNING ist immer aktiv
* - QUEUED ist nur aktiv, wenn er nicht stale ist
*
* Zusätzlich: stale QUEUED Jobs werden auf FAILED gesetzt (Recovery).
*/
public function hasActiveJob(): bool
{
$this->markStaleQueuedJobsFailed();
@@ -106,31 +114,33 @@ final readonly class TagRebuildJobService
return (int) $qb->getQuery()->getSingleScalarResult() > 0;
}
/**
* Startet den Job async über bin/console.
* Wichtige Fixes:
* - php explizit verwenden
* - --no-interaction
* - Logfile statt /dev/null
*/
private function startAsync(TagRebuildJob $job): void
{
$projectDir = rtrim($this->projectDir, '/');
$projectDir = rtrim(trim($this->projectDir), '/');
$console = $projectDir . '/bin/console';
if ($projectDir === '' || !is_dir($projectDir)) {
throw new \RuntimeException('Project directory is invalid.');
}
if (!is_file($console)) {
throw new \RuntimeException('bin/console not found: ' . $console);
}
$phpBinary = $this->resolvePhpBinary();
$jobId = (string) $job->getId();
$logDir = $projectDir . '/var/log/tags';
if (!is_dir($logDir)) {
@mkdir($logDir, 0777, true);
if (!is_dir($logDir) && !@mkdir($logDir, 0775, true) && !is_dir($logDir)) {
throw new \RuntimeException('Could not create tag job log directory.');
}
$logFile = $logDir . '/job_' . $jobId . '.log';
// Robust: cd ins Projekt, dann nohup php bin/console ...
$cmd = sprintf(
'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 &',
'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 & echo $!',
escapeshellarg($projectDir),
escapeshellcmd('php'),
escapeshellarg($phpBinary),
escapeshellarg($console),
escapeshellarg('mto:agent:tags:job:run'),
escapeshellarg($jobId),
@@ -141,15 +151,92 @@ final readonly class TagRebuildJobService
'job' => $jobId,
'cmd' => $cmd,
'log' => $logFile,
'php_binary' => $phpBinary,
]);
@exec($cmd);
$output = [];
$exitCode = 0;
@exec($cmd, $output, $exitCode);
$pid = isset($output[0]) ? trim((string) $output[0]) : '';
if ($exitCode !== 0) {
throw new \RuntimeException('Async process bootstrap failed with exit code ' . $exitCode . '.');
}
if ($pid === '' || !ctype_digit($pid)) {
throw new \RuntimeException('Async process bootstrap did not return a valid PID.');
}
$this->agentLogger->info('[tags] async job process started', [
'job' => $jobId,
'pid' => $pid,
'log' => $logFile,
'php_binary' => $phpBinary,
]);
$this->waitForAsyncJobTransition($job, $logFile);
}
private function resolvePhpBinary(): string
{
$envCandidates = [
trim((string) ($_SERVER['PHP_CLI_BINARY'] ?? '')),
trim((string) ($_ENV['PHP_CLI_BINARY'] ?? '')),
trim((string) getenv('PHP_CLI_BINARY')),
];
foreach ($envCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$phpBinary = defined('PHP_BINARY') ? trim((string) PHP_BINARY) : '';
if ($this->isValidCliPhpBinary($phpBinary)) {
return $phpBinary;
}
$fallbackCandidates = [
'/usr/bin/php',
'/usr/local/bin/php',
'/bin/php',
'/opt/homebrew/bin/php',
];
foreach ($fallbackCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$whichPhp = trim((string) @shell_exec('command -v php 2>/dev/null'));
if ($this->isValidCliPhpBinary($whichPhp)) {
return $whichPhp;
}
throw new \RuntimeException(
'Could not resolve a CLI PHP binary. Set PHP_CLI_BINARY explicitly, e.g. /usr/bin/php.'
);
}
private function isValidCliPhpBinary(string $path): bool
{
$path = trim($path);
if ($path === '' || !is_file($path) || !is_executable($path)) {
return false;
}
$basename = strtolower(basename($path));
if (str_contains($basename, 'fpm') || str_contains($basename, 'cgi')) {
return false;
}
return true;
}
/**
* Recovery gegen "ewig QUEUED":
* Setzt alte QUEUED Jobs auf FAILED, damit enqueueIfIdle() nicht dauerhaft blockiert.
*/
private function markStaleQueuedJobsFailed(): void
{
$cutoff = new \DateTimeImmutable('-' . self::STALE_QUEUED_AFTER_SECONDS . ' seconds');
@@ -161,12 +248,13 @@ final readonly class TagRebuildJobService
->andWhere('j.createdAt < :cutoff')
->setParameter('queued', TagRebuildJob::STATUS_QUEUED)
->setParameter('cutoff', $cutoff)
->orderBy('j.createdAt', 'ASC')
->setMaxResults(25);
/** @var TagRebuildJob[] $stale */
/** @var list<TagRebuildJob> $stale */
$stale = $qb->getQuery()->getResult();
if (!$stale) {
if ($stale === []) {
return;
}
@@ -183,4 +271,46 @@ final readonly class TagRebuildJobService
$this->em->flush();
}
private function waitForAsyncJobTransition(TagRebuildJob $job, string $logFile): void
{
$deadline = microtime(true) + self::ASYNC_START_TIMEOUT_SECONDS;
while (microtime(true) < $deadline) {
usleep(self::ASYNC_START_POLL_INTERVAL_MICROSECONDS);
$this->em->refresh($job);
if (!$job->isQueued()) {
return;
}
}
$logHint = $this->readLogTail($logFile);
throw new \RuntimeException(
'Async tag rebuild runner did not transition from QUEUED to RUNNING within '
. self::ASYNC_START_TIMEOUT_SECONDS
. ' seconds.'
. ($logHint !== null ? ' Log tail: ' . $logHint : '')
);
}
private function readLogTail(string $logFile): ?string
{
if (!is_file($logFile) || !is_readable($logFile)) {
return null;
}
$content = @file_get_contents($logFile);
if (!is_string($content) || trim($content) === '') {
return null;
}
$content = trim($content);
$tail = mb_substr($content, -800);
$tail = preg_replace('/\s+/u', ' ', $tail) ?? $tail;
return trim($tail) !== '' ? trim($tail) : null;
}
}

View File

@@ -11,29 +11,76 @@ final readonly class TagRebuildStatusProvider
{
public function __construct(
private EntityManagerInterface $em
) {}
) {
}
public function getLatestStatus(): ?array
{
$this->em->clear();
$job = $this->em->createQueryBuilder()
->select('j')
$row = $this->em->createQueryBuilder()
->select(
'j.status AS status',
'j.createdAt AS createdAt',
'j.startedAt AS startedAt',
'j.finishedAt AS finishedAt',
'j.errorMessage AS errorMessage'
)
->from(TagRebuildJob::class, 'j')
->orderBy('j.createdAt', 'DESC')
->addOrderBy('j.id', 'DESC')
->setMaxResults(1)
->getQuery()
->getOneOrNullResult();
->getOneOrNullResult(\Doctrine\ORM\Query::HYDRATE_ARRAY);
if (!$job instanceof TagRebuildJob) {
if (!is_array($row)) {
return null;
}
$status = trim((string) ($row['status'] ?? ''));
if ($status === '') {
return null;
}
return [
'status' => $job->getStatus(),
'startedAt' => $job->getStartedAt()?->format(DATE_ATOM),
'finishedAt' => $job->getFinishedAt()?->format(DATE_ATOM),
'error' => $job->getErrorMessage(),
'status' => $status,
'createdAt' => $this->formatDateValue($row['createdAt'] ?? null),
'startedAt' => $this->formatDateValue($row['startedAt'] ?? null),
'finishedAt' => $this->formatDateValue($row['finishedAt'] ?? null),
'error' => $this->normalizeNullableString($row['errorMessage'] ?? null),
'hasActiveJob' => in_array($status, [
TagRebuildJob::STATUS_QUEUED,
TagRebuildJob::STATUS_RUNNING,
], true),
];
}
private function formatDateValue(mixed $value): ?string
{
if ($value instanceof \DateTimeInterface) {
return $value->format(DATE_ATOM);
}
if (is_string($value)) {
$value = trim($value);
if ($value === '') {
return null;
}
try {
return (new \DateTimeImmutable($value))->format(DATE_ATOM);
} catch (\Throwable) {
return null;
}
}
return null;
}
private function normalizeNullableString(mixed $value): ?string
{
$value = trim((string) $value);
return $value !== '' ? $value : null;
}
}

View File

@@ -4,6 +4,7 @@ declare(strict_types=1);
namespace App\Tag;
use App\Entity\Document;
use App\Entity\DocumentTag;
use App\Entity\Tag;
use Doctrine\ORM\EntityManagerInterface;
@@ -13,147 +14,198 @@ final readonly class TagNdjsonExporter
public function __construct(
private EntityManagerInterface $em,
private string $tagsNdjsonPath,
) {}
) {
}
/**
* Export all tags into NDJSON (streaming) with atomic switch (.tmp + rename()).
* Export all relevant tags into NDJSON (streaming) with atomic switch (.tmp + rename()).
*
* Line format:
* {
* "tag_id":"...",
* "text":"label\nslug\noptional description",
* "type":"catalog_entity|generic|...",
* "type":"catalog_entity|generic|sales_signal",
* "document_ids":["...","..."]
* }
*
* Only ACTIVE document assignments are exported. Tags without active document
* assignments are intentionally skipped so they do not influence retrieval.
*
* @return array{tags:int, lines:int, bytes:int, path:string}
*/
public function export(): array
{
$dir = \dirname($this->tagsNdjsonPath);
if (!\is_dir($dir)) {
@\mkdir($dir, 0775, true);
}
$this->ensureTargetDirectoryExists();
$tmpPath = $this->tagsNdjsonPath . '.tmp';
$this->cleanupTemporaryFile($tmpPath);
$fh = @\fopen($tmpPath, 'wb');
if (!$fh) {
$fh = @fopen($tmpPath, 'wb');
if ($fh === false) {
throw new \RuntimeException('Cannot write tags NDJSON: ' . $tmpPath);
}
// 1) Load all tags
try {
/** @var list<Tag> $tags */
$tags = $this->em->createQueryBuilder()
->select('t')
->from(Tag::class, 't')
->orderBy('t.label', 'ASC')
->orderBy('t.type', 'ASC')
->addOrderBy('t.label', 'ASC')
->getQuery()
->getResult();
if (!\is_array($tags) || $tags === []) {
\fclose($fh);
if ($tags === []) {
fclose($fh);
$this->atomicReplace($tmpPath, $this->tagsNdjsonPath);
return [
'tags' => 0,
'lines' => 0,
'bytes' => (int) @\filesize($this->tagsNdjsonPath),
'bytes' => (int) @filesize($this->tagsNdjsonPath),
'path' => $this->tagsNdjsonPath,
];
}
// 2) Build tagId => docIds map
$rows = $this->em->createQueryBuilder()
->select('IDENTITY(dt.tag) AS tagId', 'IDENTITY(dt.document) AS docId')
->from(DocumentTag::class, 'dt')
->getQuery()
->getArrayResult();
$tagToDocs = [];
foreach ($rows as $r) {
$tagId = (string) ($r['tagId'] ?? '');
$docId = (string) ($r['docId'] ?? '');
if ($tagId === '' || $docId === '') {
continue;
}
$tagToDocs[$tagId][] = $docId;
}
// 3) Stream NDJSON
$tagToActiveDocs = $this->buildActiveDocumentMap();
$lines = 0;
foreach ($tags as $tag) {
if (!$tag instanceof Tag) {
$tagId = $tag->getId()->toRfc4122();
$docIds = $tagToActiveDocs[$tagId] ?? [];
if ($docIds === []) {
continue;
}
$tagId = (string) $tag->getId();
$docIds = $tagToDocs[$tagId] ?? [];
if ($docIds !== []) {
$docIds = \array_values(\array_unique($docIds));
}
// Embedding source
$textParts = [
$tag->getLabel(),
$tag->getSlug(),
];
$desc = $tag->getDescription();
if (\is_string($desc) && \trim($desc) !== '') {
$textParts[] = \trim($desc);
}
$type = method_exists($tag, 'getType')
? (string) $tag->getType()
: 'generic';
if ($type === '') {
$type = 'generic';
}
$line = [
'tag_id' => $tagId,
'text' => \implode("\n", $textParts),
'type' => $type, // 🔥 NEW
'text' => $this->buildEmbeddingText($tag),
'type' => TagTypes::normalize($tag->getType()),
'document_ids' => $docIds,
];
$json = \json_encode($line, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
if (!\is_string($json)) {
$json = json_encode($line, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
if (!is_string($json)) {
continue;
}
\fwrite($fh, $json . "\n");
fwrite($fh, $json . "\n");
$lines++;
}
\fclose($fh);
fclose($fh);
$this->atomicReplace($tmpPath, $this->tagsNdjsonPath);
return [
'tags' => \count($tags),
'tags' => count($tags),
'lines' => $lines,
'bytes' => (int) @\filesize($this->tagsNdjsonPath),
'bytes' => (int) @filesize($this->tagsNdjsonPath),
'path' => $this->tagsNdjsonPath,
];
} catch (\Throwable $e) {
fclose($fh);
$this->cleanupTemporaryFile($tmpPath);
throw $e;
}
}
/**
* @return array<string, list<string>>
*/
private function buildActiveDocumentMap(): array
{
/** @var list<DocumentTag> $relations */
$relations = $this->em->createQueryBuilder()
->select('dt')
->addSelect('t', 'd')
->from(DocumentTag::class, 'dt')
->innerJoin('dt.tag', 't')
->innerJoin('dt.document', 'd')
->where('d.status = :status')
->setParameter('status', Document::STATUS_ACTIVE)
->getQuery()
->getResult();
$tagToDocs = [];
foreach ($relations as $relation) {
$tag = $relation->getTag();
$document = $relation->getDocument();
$tagId = $tag->getId()->toRfc4122();
$docId = $document->getId()->toRfc4122();
$tagToDocs[$tagId][$docId] = $docId;
}
foreach ($tagToDocs as $tagId => $docIds) {
ksort($docIds);
$tagToDocs[$tagId] = array_values($docIds);
}
return $tagToDocs;
}
private function buildEmbeddingText(Tag $tag): string
{
$parts = [
trim($tag->getLabel()),
trim($tag->getSlug()),
];
$description = trim((string) $tag->getDescription());
if ($description !== '') {
$parts[] = preg_replace('/\s+/u', ' ', $description) ?? $description;
}
$parts = array_values(array_filter(
array_unique($parts),
static fn (string $part): bool => $part !== ''
));
return implode("\n", $parts);
}
private function ensureTargetDirectoryExists(): void
{
$dir = dirname($this->tagsNdjsonPath);
if (is_dir($dir)) {
return;
}
if (!@mkdir($dir, 0775, true) && !is_dir($dir)) {
throw new \RuntimeException('Cannot create tags NDJSON directory: ' . $dir);
}
}
private function cleanupTemporaryFile(string $tmpPath): void
{
if (is_file($tmpPath)) {
@unlink($tmpPath);
}
}
private function atomicReplace(string $tmpPath, string $finalPath): void
{
if (\is_file($finalPath)) {
@\chmod($finalPath, 0664);
if (is_file($finalPath)) {
@chmod($finalPath, 0664);
}
if (!@\rename($tmpPath, $finalPath)) {
if (!@\copy($tmpPath, $finalPath)) {
@\unlink($tmpPath);
if (!@rename($tmpPath, $finalPath)) {
if (!@copy($tmpPath, $finalPath)) {
@unlink($tmpPath);
throw new \RuntimeException('Atomic replace failed for: ' . $finalPath);
}
@\unlink($tmpPath);
@unlink($tmpPath);
}
@\chmod($finalPath, 0664);
@chmod($finalPath, 0664);
}
}

View File

@@ -4,6 +4,7 @@ declare(strict_types=1);
namespace App\Tag;
use App\Entity\Document;
use Doctrine\DBAL\ArrayParameterType;
use Doctrine\DBAL\Exception;
use Doctrine\ORM\EntityManagerInterface;
@@ -11,91 +12,239 @@ use Symfony\Component\Uid\Uuid;
final class TagRoutingService
{
/**
* Number of raw tag hits requested from the vector service.
*/
private const DEFAULT_TOPK = 8;
private const MIN_BEST_SCORE = 0.25;
private const MAX_CANDIDATE_DOCS = 200;
/**
* Hard minimum confidence required to activate tag-based document routing.
*
* This intentionally aligns with the tag vector client gate to avoid
* misleading secondary thresholds in this class.
*/
private const MIN_BEST_SCORE = 0.72;
/**
* Only keep tag hits that stay reasonably close to the best hit.
* This reduces semantic spillover into weakly related document spaces.
*/
private const MAX_SCORE_DROP_FROM_BEST = 0.08;
/**
* Maximum number of tag hits that may influence routing.
*/
private const MAX_ROUTING_TAGS = 5;
/**
* Maximum number of candidate documents passed into scoped chunk search.
*/
private const MAX_CANDIDATE_DOCS = 80;
/**
* Small bonus for documents matched by multiple routed tags.
*/
private const MULTI_TAG_BONUS_PER_EXTRA_TAG = 0.05;
private const MAX_MULTI_TAG_BONUS = 0.15;
public function __construct(
private readonly TagVectorSearchClient $tagSearch,
private readonly EntityManagerInterface $em,
) {}
) {
}
/**
* @return string[]|null
* Returns ordered active document ids for tag-scoped retrieval.
*
* The method intentionally returns only document ids so the current
* retriever pipeline can stay unchanged.
*
* @return list<string>|null
* @throws Exception
*/
public function route(string $query): ?array
{
$query = trim($query);
if ($query === '') {
return null;
}
$hits = $this->tagSearch->search($query, self::DEFAULT_TOPK);
$hits = $this->filterRoutingHits(
$this->tagSearch->search($query, self::DEFAULT_TOPK)
);
if (!is_array($hits) || $hits === []) {
if ($hits === []) {
return null;
}
$bestScore = (float)($hits[0]['score'] ?? 0.0);
if ($bestScore < self::MIN_BEST_SCORE) {
return null;
}
// Convert tag UUID strings to binary(16)
$tagBinaryIds = [];
$tagMetaById = [];
foreach ($hits as $hit) {
$id = (string)($hit['tag_id'] ?? '');
if ($id === '') {
$tagId = (string) ($hit['tag_id'] ?? '');
if ($tagId === '') {
continue;
}
try {
$tagBinaryIds[] = Uuid::fromString($id)->toBinary();
$tagBinaryIds[] = Uuid::fromString($tagId)->toBinary();
} catch (\Throwable) {
continue;
}
$tagMetaById[$tagId] = [
'score' => (float) $hit['score'],
'weight' => $this->resolveTypeWeight((string) $hit['tag_type']),
];
}
if ($tagBinaryIds === []) {
return null;
}
// Direct DBAL query (binary-safe)
$conn = $this->em->getConnection();
$rows = $conn->executeQuery(
'SELECT document_id
FROM document_tag
WHERE tag_id IN (:tagIds)',
['tagIds' => $tagBinaryIds],
['tagIds' => ArrayParameterType::BINARY]
$rows = $this->em->getConnection()->executeQuery(
'SELECT dt.document_id, dt.tag_id
FROM document_tag dt
INNER JOIN document d ON d.id = dt.document_id
WHERE dt.tag_id IN (:tagIds)
AND d.status = :status',
[
'tagIds' => $tagBinaryIds,
'status' => Document::STATUS_ACTIVE,
],
[
'tagIds' => ArrayParameterType::BINARY,
]
)->fetchAllAssociative();
if ($rows === []) {
return null;
}
$docIds = [];
$documentScores = [];
$documentMatchedTags = [];
foreach ($rows as $row) {
if (!isset($row['document_id'])) {
if (!isset($row['document_id'], $row['tag_id'])) {
continue;
}
try {
$uuid = Uuid::fromBinary($row['document_id']);
$docIds[(string)$uuid] = true;
$documentId = (string) Uuid::fromBinary($row['document_id']);
$tagId = (string) Uuid::fromBinary($row['tag_id']);
} catch (\Throwable) {
continue;
}
if (count($docIds) >= self::MAX_CANDIDATE_DOCS) {
if (!isset($tagMetaById[$tagId])) {
continue;
}
$documentScores[$documentId] = ($documentScores[$documentId] ?? 0.0)
+ ($tagMetaById[$tagId]['score'] * $tagMetaById[$tagId]['weight']);
$documentMatchedTags[$documentId][$tagId] = true;
}
if ($documentScores === []) {
return null;
}
foreach ($documentScores as $documentId => $score) {
$matchedTagCount = isset($documentMatchedTags[$documentId])
? count($documentMatchedTags[$documentId])
: 0;
if ($matchedTagCount > 1) {
$documentScores[$documentId] += min(
self::MAX_MULTI_TAG_BONUS,
($matchedTagCount - 1) * self::MULTI_TAG_BONUS_PER_EXTRA_TAG
);
}
}
arsort($documentScores, SORT_NUMERIC);
return array_slice(
array_keys($documentScores),
0,
self::MAX_CANDIDATE_DOCS
);
}
/**
* @param array<int, array{
* tag_id:string,
* score:float,
* label?:string,
* tag_type?:string
* }> $hits
*
* @return list<array{
* tag_id:string,
* score:float,
* tag_type:string
* }>
*/
private function filterRoutingHits(array $hits): array
{
if ($hits === []) {
return [];
}
$bestScore = (float) ($hits[0]['score'] ?? 0.0);
if ($bestScore < self::MIN_BEST_SCORE) {
return [];
}
$minimumAcceptedScore = max(
self::MIN_BEST_SCORE,
$bestScore - self::MAX_SCORE_DROP_FROM_BEST
);
$filtered = [];
foreach ($hits as $hit) {
$tagId = (string) ($hit['tag_id'] ?? '');
$score = (float) ($hit['score'] ?? 0.0);
$tagType = TagTypes::normalize(
(string) ($hit['tag_type'] ?? TagTypes::GENERIC)
);
if ($tagId === '' || $score < $minimumAcceptedScore) {
continue;
}
// Sales signals may still be useful elsewhere, but they should not
// expand the document scope for semantic retrieval.
if ($tagType === TagTypes::SALES_SIGNAL) {
continue;
}
$filtered[] = [
'tag_id' => $tagId,
'score' => $score,
'tag_type' => $tagType,
];
if (count($filtered) >= self::MAX_ROUTING_TAGS) {
break;
}
}
return array_keys($docIds);
return $filtered;
}
private function resolveTypeWeight(string $tagType): float
{
return match (TagTypes::normalize($tagType)) {
TagTypes::CATALOG_ENTITY => 1.20,
TagTypes::GENERIC => 1.00,
TagTypes::SALES_SIGNAL => 0.00,
default => 1.00,
};
}
}

View File

@@ -4,42 +4,45 @@ declare(strict_types=1);
namespace App\Tag;
use App\Entity\Tag;
use App\Entity\Document;
use App\Entity\DocumentTag;
use App\Entity\Tag;
use App\Service\TagRebuildJobService;
use Doctrine\ORM\EntityManagerInterface;
use InvalidArgumentException;
use RuntimeException;
final readonly class TagService
{
public function __construct(
private EntityManagerInterface $em,
private TagRebuildJobService $jobs,
) {}
// =========================================================
// TAG CREATE
// =========================================================
) {
}
public function create(
string $slug,
string $label,
?string $description = null,
string $type = 'generic' // NEU
string $type = TagTypes::GENERIC,
): Tag {
$slug = trim($slug);
$normalizedSlug = $this->normalizeSlug($slug);
$label = trim($label);
if ($label === '' || $slug === '') {
throw new \InvalidArgumentException('Label und Slug sind Pflichtfelder.');
if ($normalizedSlug === '' || $label === '') {
throw new InvalidArgumentException('Tag label and slug are required.');
}
if ($this->slugExists($slug)) {
throw new \RuntimeException('Slug existiert bereits.');
if ($this->slugExists($normalizedSlug)) {
throw new RuntimeException(sprintf('Tag slug "%s" already exists.', $normalizedSlug));
}
$tag = new Tag($slug, $label, $description);
$tag->setType($type); // NEU
$tag = new Tag(
$normalizedSlug,
$label,
$description,
TagTypes::normalize($type)
);
$this->em->persist($tag);
$this->em->flush();
@@ -49,18 +52,9 @@ final readonly class TagService
return $tag;
}
// =========================================================
// TAG DELETE
// =========================================================
public function deleteById(string $tagId): void
{
$tag = $this->em->getRepository(Tag::class)->find($tagId);
if (!$tag instanceof Tag) {
throw new \RuntimeException('Tag nicht gefunden.');
}
$tag = $this->findTagById($tagId);
$this->delete($tag);
}
@@ -72,87 +66,103 @@ final readonly class TagService
$this->triggerRebuildIfIdle();
}
// =========================================================
// DOCUMENT TAG SYNC
// =========================================================
public function syncDocumentTags(Document $document, array $newTagIds): void
{
$newTagIds = array_unique($newTagIds);
$normalizedTagIds = $this->normalizeIdList($newTagIds);
/** @var list<DocumentTag> $currentRelations */
$currentRelations = $this->em
->getRepository(DocumentTag::class)
->findBy(['document' => $document]);
$currentTagIds = array_map(
fn(DocumentTag $dt) => (string) $dt->getTag()->getId(),
static fn (DocumentTag $relation): string => (string) $relation->getTag()->getId(),
$currentRelations
);
$toAdd = array_diff($newTagIds, $currentTagIds);
$toRemove = array_diff($currentTagIds, $newTagIds);
$toAdd = array_values(array_diff($normalizedTagIds, $currentTagIds));
$toRemove = array_values(array_diff($currentTagIds, $normalizedTagIds));
foreach ($toAdd as $tagId) {
$tag = $this->em->getRepository(Tag::class)->find($tagId);
if ($tag instanceof Tag) {
$this->em->persist(new DocumentTag($document, $tag));
}
}
foreach ($currentRelations as $relation) {
if (in_array((string) $relation->getTag()->getId(), $toRemove, true)) {
$relationTagId = (string) $relation->getTag()->getId();
if (in_array($relationTagId, $toRemove, true)) {
$this->em->remove($relation);
}
}
if ($toAdd || $toRemove) {
if ($toAdd !== [] || $toRemove !== []) {
$this->em->flush();
$this->triggerRebuildIfIdle();
}
}
// =========================================================
// TAG → DOCUMENT SYNC (Bulk Assign)
// =========================================================
public function syncTagDocuments(Tag $tag, array $newDocumentIds): void
{
$newDocumentIds = array_unique($newDocumentIds);
$normalizedDocumentIds = $this->normalizeIdList($newDocumentIds);
/** @var list<DocumentTag> $currentRelations */
$currentRelations = $this->em
->getRepository(DocumentTag::class)
->findBy(['tag' => $tag]);
$currentDocumentIds = array_map(
fn(DocumentTag $dt) => (string) $dt->getDocument()->getId(),
static fn (DocumentTag $relation): string => (string) $relation->getDocument()->getId(),
$currentRelations
);
$toAdd = array_diff($newDocumentIds, $currentDocumentIds);
$toRemove = array_diff($currentDocumentIds, $newDocumentIds);
$toAdd = array_values(array_diff($normalizedDocumentIds, $currentDocumentIds));
$toRemove = array_values(array_diff($currentDocumentIds, $normalizedDocumentIds));
foreach ($toAdd as $documentId) {
$document = $this->em->getRepository(Document::class)->find($documentId);
if ($document instanceof Document) {
if (
$document instanceof Document
&& $document->getStatus() === Document::STATUS_ACTIVE
) {
$this->em->persist(new DocumentTag($document, $tag));
}
}
foreach ($currentRelations as $relation) {
if (in_array((string) $relation->getDocument()->getId(), $toRemove, true)) {
$relationDocumentId = (string) $relation->getDocument()->getId();
if (in_array($relationDocumentId, $toRemove, true)) {
$this->em->remove($relation);
}
}
if ($toAdd || $toRemove) {
if ($toAdd !== [] || $toRemove !== []) {
$this->em->flush();
$this->triggerRebuildIfIdle();
}
}
// =========================================================
// INTERNAL HELPERS
// =========================================================
private function findTagById(string $tagId): Tag
{
$tagId = trim($tagId);
if ($tagId === '') {
throw new InvalidArgumentException('Tag id must not be empty.');
}
$tag = $this->em->getRepository(Tag::class)->find($tagId);
if (!$tag instanceof Tag) {
throw new RuntimeException('Tag not found.');
}
return $tag;
}
private function slugExists(string $slug): bool
{
@@ -165,6 +175,36 @@ final readonly class TagService
->getSingleScalarResult() > 0;
}
/**
* @param array<mixed> $ids
* @return list<string>
*/
private function normalizeIdList(array $ids): array
{
$normalized = [];
foreach ($ids as $id) {
$id = trim((string) $id);
if ($id === '') {
continue;
}
$normalized[] = $id;
}
return array_values(array_unique($normalized));
}
private function normalizeSlug(string $slug): string
{
$slug = mb_strtolower(trim($slug));
$slug = preg_replace('/\s+/u', '-', $slug) ?? $slug;
$slug = preg_replace('/-+/u', '-', $slug) ?? $slug;
return trim($slug, '-');
}
private function triggerRebuildIfIdle(): void
{
if (!$this->jobs->hasActiveJob()) {

View File

@@ -5,8 +5,10 @@ declare(strict_types=1);
namespace App\Tag;
/**
* Zentrale Definition aller erlaubten Tag-Typen.
* Verhindert Magic Strings im Code.
* Central definition of all supported tag types.
*
* This class is intentionally tiny and dependency-free because it is the
* foundation for entity validation, admin forms, routing, and catalog logic.
*/
final class TagTypes
{
@@ -14,6 +16,25 @@ final class TagTypes
public const CATALOG_ENTITY = 'catalog_entity';
public const SALES_SIGNAL = 'sales_signal';
/**
* Returns the canonical list of allowed type values.
*
* @return list<string>
*/
public static function all(): array
{
return [
self::GENERIC,
self::CATALOG_ENTITY,
self::SALES_SIGNAL,
];
}
/**
* Returns UI choices for forms and admin screens.
*
* @return array<string, string>
*/
public static function choices(): array
{
return [
@@ -23,5 +44,53 @@ final class TagTypes
];
}
private function __construct() {}
/**
* Returns true if the given value is an allowed tag type.
*/
public static function isValid(?string $type): bool
{
if ($type === null) {
return false;
}
return in_array(self::normalize($type), self::all(), true);
}
/**
* Normalizes external input into a canonical internal value.
*
* Empty or unknown input falls back to the provided default.
*/
public static function normalize(?string $type, string $default = self::GENERIC): string
{
$type = mb_strtolower(trim((string) $type));
$default = mb_strtolower(trim($default));
if ($type === '') {
return self::isKnownDefault($default) ? $default : self::GENERIC;
}
if (in_array($type, self::all(), true)) {
return $type;
}
return self::isKnownDefault($default) ? $default : self::GENERIC;
}
/**
* Returns a human-readable label for a canonical type.
*/
public static function labelFor(string $type): string
{
return array_flip(self::choices())[self::normalize($type)] ?? 'Generic';
}
private static function isKnownDefault(string $type): bool
{
return in_array($type, self::all(), true);
}
private function __construct()
{
}
}

View File

@@ -9,6 +9,8 @@ use Psr\Log\LoggerInterface;
final readonly class TagVectorIndexBuilder
{
private const GRACEFUL_TERMINATION_SECONDS = 2;
public function __construct(
private string $pythonBin,
private string $scriptPath,
@@ -17,10 +19,71 @@ final readonly class TagVectorIndexBuilder
private string $embeddingModel,
private int $timeoutSeconds,
private LoggerInterface $agentLogger,
private IndexMetaManager $metaManager, // ✅ NEU
) {}
private IndexMetaManager $metaManager,
) {
}
public function build(): void
{
$this->assertPreconditions();
$tmpIndex = $this->vectorTagsIndexPath . '.tmp';
$tmpMeta = $tmpIndex . '.meta.json';
$finalIndex = $this->vectorTagsIndexPath;
$finalMeta = $finalIndex . '.meta.json';
$this->ensureTargetDirectoryExists($finalIndex);
$this->cleanupTemporaryArtifacts($tmpIndex, $tmpMeta);
if (!$this->hasEmbeddableTags()) {
$this->agentLogger->info('[tags] no embeddable tags found, removing stale tag index artifacts.');
$this->removeFileIfExists($finalIndex);
$this->removeFileIfExists($finalMeta);
$this->commitRuntime(false);
return;
}
$cmd = $this->buildCommand($tmpIndex);
$this->agentLogger->info('[tags] build tag vector index', [
'cmd' => $cmd,
'timeout' => $this->timeoutSeconds,
'embedding_model' => $this->embeddingModel,
]);
try {
$result = $this->runCommand($cmd);
if ($result['exit'] !== 0) {
$this->agentLogger->error('[tags] tag vector ingest failed', [
'exit' => $result['exit'],
'stdout' => $result['stdout'],
'stderr' => $result['stderr'],
]);
throw new \RuntimeException('Tag vector ingest failed (exit=' . $result['exit'] . ')');
}
if (!$this->isUsableArtifact($tmpIndex) || !$this->isUsableArtifact($tmpMeta)) {
throw new \RuntimeException('Tag vector ingest produced incomplete artifacts.');
}
$this->atomicReplace($tmpIndex, $finalIndex);
$this->atomicReplace($tmpMeta, $finalMeta);
$this->commitRuntime(true);
$this->agentLogger->info('[tags] tag vector index build completed + runtime committed', [
'index' => $finalIndex,
'meta' => $finalMeta,
]);
} catch (\Throwable $e) {
$this->cleanupTemporaryArtifacts($tmpIndex, $tmpMeta);
throw $e;
}
}
private function assertPreconditions(): void
{
if (!is_file($this->tagsNdjsonPath)) {
throw new \RuntimeException('tags.ndjson missing: ' . $this->tagsNdjsonPath);
@@ -30,65 +93,178 @@ final readonly class TagVectorIndexBuilder
throw new \RuntimeException('Tag ingest script missing: ' . $this->scriptPath);
}
$tmpIndex = $this->vectorTagsIndexPath . '.tmp';
$tmpMeta = $tmpIndex . '.meta.json';
$finalIndex = $this->vectorTagsIndexPath;
$finalMeta = $finalIndex . '.meta.json';
$dir = \dirname($finalIndex);
if (!\is_dir($dir)) {
@\mkdir($dir, 0775, true);
if (trim($this->pythonBin) === '') {
throw new \RuntimeException('Python binary must not be empty.');
}
@\unlink($tmpIndex);
@\unlink($tmpMeta);
if ($this->timeoutSeconds < 1) {
throw new \RuntimeException('Tag vector timeout must be >= 1 second.');
}
}
$cmd = sprintf(
'%s %s %s %s %s 2>&1',
private function buildCommand(string $tmpIndex): string
{
return sprintf(
'%s %s %s %s 2>&1',
escapeshellarg($this->pythonBin),
escapeshellarg($this->scriptPath),
escapeshellarg($this->tagsNdjsonPath),
escapeshellarg($tmpIndex),
escapeshellarg($this->embeddingModel),
);
$this->agentLogger->info('[tags] build tag vector index', [
'cmd' => $cmd,
'timeout' => $this->timeoutSeconds,
]);
$out = [];
$exit = 0;
exec($cmd, $out, $exit);
if ($exit !== 0) {
$this->agentLogger->error('[tags] tag vector ingest failed', [
'exit' => $exit,
'out' => $out,
]);
throw new \RuntimeException('Tag vector ingest failed (exit=' . $exit . ')');
}
if (!is_file($tmpIndex) || !is_file($tmpMeta)) {
@\unlink($tmpIndex);
@\unlink($tmpMeta);
$this->agentLogger->warning('[tags] no tag index produced (maybe 0 tags).');
private function ensureTargetDirectoryExists(string $finalIndexPath): void
{
$dir = dirname($finalIndexPath);
if (is_dir($dir)) {
return;
}
$this->atomicReplace($tmpIndex, $finalIndex);
$this->atomicReplace($tmpMeta, $finalMeta);
if (!@mkdir($dir, 0775, true) && !is_dir($dir)) {
throw new \RuntimeException('Unable to create tag vector directory: ' . $dir);
}
}
// ✅ ENTERPRISE COMMIT MARKER
$this->metaManager->touchRuntime([
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
private function hasEmbeddableTags(): bool
{
$fh = @fopen($this->tagsNdjsonPath, 'rb');
if ($fh === false) {
throw new \RuntimeException('Unable to read tags NDJSON: ' . $this->tagsNdjsonPath);
}
try {
while (($line = fgets($fh)) !== false) {
$line = trim($line);
if ($line === '') {
continue;
}
$decoded = json_decode($line, true);
if (!is_array($decoded)) {
continue;
}
$tagId = trim((string) ($decoded['tag_id'] ?? ''));
$text = trim((string) ($decoded['text'] ?? ''));
if ($tagId !== '' && $text !== '') {
return true;
}
}
} finally {
fclose($fh);
}
return false;
}
/**
* @return array{exit:int, stdout:string, stderr:string}
*/
private function runCommand(string $cmd): array
{
$descriptorSpec = [
0 => ['pipe', 'r'],
1 => ['pipe', 'w'],
2 => ['pipe', 'w'],
];
$process = @proc_open($cmd, $descriptorSpec, $pipes);
if (!is_resource($process)) {
throw new \RuntimeException('Could not start tag vector ingest process.');
}
fclose($pipes[0]);
stream_set_blocking($pipes[1], false);
stream_set_blocking($pipes[2], false);
$stdout = '';
$stderr = '';
$startedAt = microtime(true);
$timedOut = false;
try {
while (true) {
$stdout .= stream_get_contents($pipes[1]) ?: '';
$stderr .= stream_get_contents($pipes[2]) ?: '';
$status = proc_get_status($process);
if (!is_array($status) || ($status['running'] ?? false) !== true) {
break;
}
if ((microtime(true) - $startedAt) > $this->timeoutSeconds) {
$timedOut = true;
proc_terminate($process);
usleep(self::GRACEFUL_TERMINATION_SECONDS * 1000000);
$status = proc_get_status($process);
if (is_array($status) && ($status['running'] ?? false) === true) {
proc_terminate($process, 9);
}
break;
}
usleep(100000);
}
$stdout .= stream_get_contents($pipes[1]) ?: '';
$stderr .= stream_get_contents($pipes[2]) ?: '';
} finally {
fclose($pipes[1]);
fclose($pipes[2]);
}
$exitCode = proc_close($process);
if ($timedOut) {
$this->agentLogger->error('[tags] tag vector ingest timed out', [
'timeout' => $this->timeoutSeconds,
'stdout' => $stdout,
'stderr' => $stderr,
]);
$this->agentLogger->info('[tags] tag vector index build completed + runtime committed', [
'index' => $finalIndex,
'meta' => $finalMeta,
throw new \RuntimeException('Tag vector ingest timed out after ' . $this->timeoutSeconds . ' seconds.');
}
return [
'exit' => is_int($exitCode) ? $exitCode : 1,
'stdout' => trim($stdout),
'stderr' => trim($stderr),
];
}
private function isUsableArtifact(string $path): bool
{
return is_file($path) && filesize($path) > 0;
}
private function cleanupTemporaryArtifacts(string ...$paths): void
{
foreach ($paths as $path) {
$this->removeFileIfExists($path);
}
}
private function removeFileIfExists(string $path): void
{
if (is_file($path)) {
@unlink($path);
}
}
private function commitRuntime(bool $indexPresent): void
{
$this->metaManager->touchRuntime([
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
'tags_index_present' => $indexPresent,
]);
}
@@ -99,6 +275,7 @@ final readonly class TagVectorIndexBuilder
@unlink($tmp);
throw new \RuntimeException('Atomic replace failed for: ' . $final);
}
@unlink($tmp);
}

View File

@@ -6,11 +6,20 @@ namespace App\Tag;
final readonly class TagVectorIndexHealthService
{
private const STATUS_OK = 'OK';
private const STATUS_OK_EMPTY = 'OK_EMPTY';
private const STATUS_INCONSISTENT_STALE_VECTOR = 'INCONSISTENT_STALE_VECTOR';
private const STATUS_INCONSISTENT_MISSING_VECTOR = 'INCONSISTENT_MISSING_VECTOR';
private const STATUS_INCONSISTENT_COUNT_MISMATCH = 'INCONSISTENT_COUNT_MISMATCH';
private const STATUS_INCONSISTENT_INVALID_META = 'INCONSISTENT_INVALID_META';
private const STATUS_UNKNOWN = 'UNKNOWN';
public function __construct(
private string $tagsNdjsonPath,
private string $vectorTagsIndexPath,
private string $vectorTagsMetaPath
) {}
private string $vectorTagsMetaPath,
) {
}
public function check(): array
{
@@ -18,51 +27,189 @@ final readonly class TagVectorIndexHealthService
$vectorExists = is_file($this->vectorTagsIndexPath);
$metaExists = is_file($this->vectorTagsMetaPath);
$ndjsonTagCount = 0;
$ndjsonStats = $this->readNdjsonStats();
$metaStats = $this->readMetaStats();
if ($ndjsonExists) {
$h = @fopen($this->tagsNdjsonPath, 'r');
if ($h !== false) {
while (($line = fgets($h)) !== false) {
$line = trim($line);
if ($line === '') continue;
$data = json_decode($line, true);
if (is_array($data) && !empty($data['tag_id']) && !empty($data['text'])) {
$ndjsonTagCount++;
}
}
fclose($h);
}
}
$vectorTagCount = 0;
if ($metaExists) {
$meta = json_decode((string) file_get_contents($this->vectorTagsMetaPath), true);
if (is_array($meta)) {
$vectorTagCount = count($meta);
}
}
$status = $this->determineStatus($ndjsonTagCount, $vectorExists, $metaExists, $vectorTagCount);
$status = $this->determineStatus(
$ndjsonStats['exported_tag_count'],
$vectorExists,
$metaExists,
$metaStats['vector_tag_count'],
$metaStats['meta_valid']
);
return [
'tags_ndjson_exists' => $ndjsonExists,
'tags_ndjson_count' => $ndjsonTagCount,
'tags_ndjson_count' => $ndjsonStats['exported_tag_count'],
'vector_exists' => $vectorExists,
'meta_exists' => $metaExists,
'vector_tag_count' => $vectorTagCount,
'vector_tag_count' => $metaStats['vector_tag_count'],
'status' => $status,
// Extra diagnostics for admin/CLI.
'tags_ndjson_lines_total' => $ndjsonStats['lines_total'],
'tags_ndjson_invalid_lines' => $ndjsonStats['invalid_lines'],
'tags_ndjson_empty_lines' => $ndjsonStats['empty_lines'],
'tags_with_active_document_ids' => $ndjsonStats['tags_with_document_ids'],
'meta_valid' => $metaStats['meta_valid'],
'paths' => [
'tags_ndjson' => $this->tagsNdjsonPath,
'vector_index' => $this->vectorTagsIndexPath,
'vector_meta' => $this->vectorTagsMetaPath,
],
];
}
private function determineStatus(int $ndjsonTagCount, bool $vectorExists, bool $metaExists, int $vectorTagCount): string
/**
* @return array{
* lines_total:int,
* empty_lines:int,
* invalid_lines:int,
* exported_tag_count:int,
* tags_with_document_ids:int
* }
*/
private function readNdjsonStats(): array
{
if ($ndjsonTagCount === 0 && !$vectorExists && !$metaExists) return 'OK_EMPTY';
if ($ndjsonTagCount > 0 && $vectorExists && $metaExists && $vectorTagCount === $ndjsonTagCount) return 'OK';
if ($ndjsonTagCount === 0 && ($vectorExists || $metaExists)) return 'INCONSISTENT_STALE_VECTOR';
if ($ndjsonTagCount > 0 && (!$vectorExists || !$metaExists)) return 'INCONSISTENT_MISSING_VECTOR';
if ($ndjsonTagCount !== $vectorTagCount) return 'INCONSISTENT_COUNT_MISMATCH';
return 'UNKNOWN';
$stats = [
'lines_total' => 0,
'empty_lines' => 0,
'invalid_lines' => 0,
'exported_tag_count' => 0,
'tags_with_document_ids' => 0,
];
if (!is_file($this->tagsNdjsonPath)) {
return $stats;
}
$handle = @fopen($this->tagsNdjsonPath, 'rb');
if ($handle === false) {
return $stats;
}
try {
while (($line = fgets($handle)) !== false) {
$stats['lines_total']++;
$line = trim($line);
if ($line === '') {
$stats['empty_lines']++;
continue;
}
$data = json_decode($line, true);
if (!is_array($data)) {
$stats['invalid_lines']++;
continue;
}
$tagId = trim((string) ($data['tag_id'] ?? ''));
$text = trim((string) ($data['text'] ?? ''));
$documentIds = $data['document_ids'] ?? null;
$hasDocumentIds = is_array($documentIds) && $documentIds !== [];
if ($tagId === '' || $text === '') {
$stats['invalid_lines']++;
continue;
}
$stats['exported_tag_count']++;
if ($hasDocumentIds) {
$stats['tags_with_document_ids']++;
}
}
} finally {
fclose($handle);
}
return $stats;
}
/**
* @return array{vector_tag_count:int, meta_valid:bool}
*/
private function readMetaStats(): array
{
if (!is_file($this->vectorTagsMetaPath)) {
return [
'vector_tag_count' => 0,
'meta_valid' => false,
];
}
$raw = file_get_contents($this->vectorTagsMetaPath);
if (!is_string($raw) || trim($raw) === '') {
return [
'vector_tag_count' => 0,
'meta_valid' => false,
];
}
$decoded = json_decode($raw, true);
if (is_array($decoded)) {
if (array_is_list($decoded)) {
return [
'vector_tag_count' => count($decoded),
'meta_valid' => true,
];
}
$numericKeys = array_filter(
array_keys($decoded),
static fn (string|int $key): bool => is_string($key) && ctype_digit($key)
);
if ($numericKeys !== [] && count($numericKeys) === count($decoded)) {
return [
'vector_tag_count' => count($decoded),
'meta_valid' => true,
];
}
}
return [
'vector_tag_count' => 0,
'meta_valid' => false,
];
}
private function determineStatus(
int $ndjsonTagCount,
bool $vectorExists,
bool $metaExists,
int $vectorTagCount,
bool $metaValid
): string {
if ($ndjsonTagCount === 0 && !$vectorExists && !$metaExists) {
return self::STATUS_OK_EMPTY;
}
if ($ndjsonTagCount === 0 && ($vectorExists || $metaExists)) {
return self::STATUS_INCONSISTENT_STALE_VECTOR;
}
if ($ndjsonTagCount > 0 && (!$vectorExists || !$metaExists)) {
return self::STATUS_INCONSISTENT_MISSING_VECTOR;
}
if ($metaExists && !$metaValid) {
return self::STATUS_INCONSISTENT_INVALID_META;
}
if ($ndjsonTagCount > 0 && $vectorExists && $metaExists && $metaValid && $vectorTagCount === $ndjsonTagCount) {
return self::STATUS_OK;
}
if ($ndjsonTagCount !== $vectorTagCount) {
return self::STATUS_INCONSISTENT_COUNT_MISMATCH;
}
return self::STATUS_UNKNOWN;
}
}

View File

@@ -12,18 +12,29 @@ final readonly class TagVectorSearchClient
/**
* Minimum similarity score required for a tag to be considered.
*/
private const MIN_SCORE = 0.72;
public const MIN_SCORE = 0.72;
/**
* Default result size when callers do not specify a limit.
*/
private const DEFAULT_LIMIT = 8;
/**
* Hard limit to prevent excessive requests.
*/
private const MAX_LIMIT = 50;
/**
* HTTP timeout for the Python vector service.
*/
private const TIMEOUT_SECONDS = 10;
public function __construct(
private HttpClientInterface $http,
private string $serviceUrl,
private LoggerInterface $agentLogger,
) {}
) {
}
/**
* Executes a vector search against the Python tag index.
@@ -33,43 +44,51 @@ final readonly class TagVectorSearchClient
* {
* "tag_id": "...",
* "score": 0.73,
* "label": "Geräte", // optional (new)
* "tag_type": "catalog_entity" // optional (new)
* "label": "Geräte",
* "tag_type": "catalog_entity"
* }
* ]
*
* @return array<int, array{
* @return list<array{
* tag_id:string,
* score:float,
* label?:string,
* tag_type?:string
* label:string,
* tag_type:string
* }>
*/
public function search(string $query, int $limit = 8): array
public function search(string $query, int $limit = self::DEFAULT_LIMIT): array
{
$query = trim($query);
if ($query === '') {
return [];
}
$limit = max(1, min($limit, self::MAX_LIMIT));
$serviceUrl = rtrim(trim($this->serviceUrl), '/');
if ($serviceUrl === '') {
$this->agentLogger->warning('Tag vector service URL is empty.');
return [];
}
try {
$response = $this->http->request(
'POST',
rtrim($this->serviceUrl, '/') . '/search-tags',
$serviceUrl . '/search-tags',
[
'json' => [
'query' => $query,
'limit' => $limit,
],
'timeout' => 10,
'timeout' => self::TIMEOUT_SECONDS,
]
);
if ($response->getStatusCode() !== 200) {
$this->agentLogger->warning(
'Tag vector service returned non-200',
'Tag vector service returned non-200.',
['status' => $response->getStatusCode()]
);
@@ -77,10 +96,9 @@ final readonly class TagVectorSearchClient
}
$data = $response->toArray(false);
} catch (\Throwable $e) {
$this->agentLogger->warning(
'Tag vector service unreachable',
'Tag vector service unreachable.',
['error' => $e->getMessage()]
);
@@ -88,18 +106,33 @@ final readonly class TagVectorSearchClient
}
if (!is_array($data)) {
$this->agentLogger->warning('Tag vector service returned invalid payload');
$this->agentLogger->warning('Tag vector service returned invalid payload.');
return [];
}
$hits = [];
return $this->normalizeHits($data, $limit);
}
foreach ($data as $row) {
/**
* @param array<mixed> $rows
* @return list<array{
* tag_id:string,
* score:float,
* label:string,
* tag_type:string
* }>
*/
private function normalizeHits(array $rows, int $limit): array
{
$hitsByTagId = [];
foreach ($rows as $row) {
if (!is_array($row)) {
continue;
}
$tagId = (string)($row['tag_id'] ?? '');
$tagId = trim((string) ($row['tag_id'] ?? ''));
$score = $row['score'] ?? null;
if ($tagId === '' || !is_numeric($score)) {
@@ -112,24 +145,45 @@ final readonly class TagVectorSearchClient
continue;
}
$hit = [
$normalizedHit = [
'tag_id' => $tagId,
'score' => $score,
'label' => trim((string) ($row['label'] ?? '')),
'tag_type' => TagTypes::normalize((string) ($row['tag_type'] ?? TagTypes::GENERIC)),
];
// Optional: label
if (isset($row['label']) && is_string($row['label'])) {
$hit['label'] = $row['label'];
$existingHit = $hitsByTagId[$tagId] ?? null;
if ($existingHit === null || $normalizedHit['score'] > $existingHit['score']) {
$hitsByTagId[$tagId] = $normalizedHit;
}
}
// Optional: tag_type
if (isset($row['tag_type']) && is_string($row['tag_type'])) {
$hit['tag_type'] = $row['tag_type'];
if ($hitsByTagId === []) {
return [];
}
$hits[] = $hit;
$hits = array_values($hitsByTagId);
usort(
$hits,
static function (array $left, array $right): int {
$scoreComparison = $right['score'] <=> $left['score'];
if ($scoreComparison !== 0) {
return $scoreComparison;
}
return $hits;
$typeComparison = strcmp($left['tag_type'], $right['tag_type']);
if ($typeComparison !== 0) {
return $typeComparison;
}
return strcmp($left['tag_id'], $right['tag_id']);
}
);
return array_slice($hits, 0, $limit);
}
}

View File

@@ -5,90 +5,98 @@
{% block body %}
<div class="container-fluid">
<!-- ===================================================== -->
<!-- HEADER -->
<!-- ===================================================== -->
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0"><i class="bi bi-hdd-rack"></i> Systemübersicht</h1>
<span class="badge bg-secondary">RAG Enterprise</span>
</div>
<!-- ===================================================== -->
<!-- KPI ROW (NUR STATUS-AMPELN) -->
<!-- ===================================================== -->
<div class="row g-4 mb-4">
{# ================= CHUNK VECTOR STATUS ================= #}
{% if vectorHealth is defined %}
{% set status = vectorHealth.status %}
{% set badgeClass =
status starts with 'OK'
{% set chunkStatus = vectorHealth.status|default('UNKNOWN') %}
{% set chunkBadgeClass =
chunkStatus starts with 'OK'
? 'bg-success'
: (status == 'INCONSISTENT_MISSING_VECTOR'
: (chunkStatus == 'INCONSISTENT_MISSING_VECTOR'
? 'bg-warning text-dark'
: 'bg-danger') %}
{% endif %}
: 'bg-danger')
%}
<div class="col-lg-6 col-xl-3">
<div class="card bg-black border-secondary text-light h-100">
<div class="card-body">
<div class="small text-light mb-2"><i class="bi bi-files"></i> Chunk-Vektor</div>
{% if vectorHealth is defined %}
<h4 class="mb-0">
<span class="badge {{ badgeClass }}">
{{ vectorHealth.status }}
</span>
</h4>
{% else %}
<div class="small text-light">
Keine Daten verfügbar.
</div>
{% endif %}
</div>
</div>
</div>
{# ================= TAG VECTOR STATUS ================= #}
{% if tagVectorHealth is defined %}
{% set tagStatus = tagVectorHealth.status %}
{% set tagStatus = tagVectorHealth.status|default('UNKNOWN') %}
{% set tagBadgeClass =
tagStatus starts with 'OK'
? 'bg-success'
: (tagStatus == 'INCONSISTENT_MISSING_VECTOR'
? 'bg-warning text-dark'
: 'bg-danger') %}
: 'bg-danger')
%}
{% set percent = chunkLimit > 0 ? (chunkCount / chunkLimit * 100)|round(3) : 0 %}
{% set percentClass =
percent >= 95
? 'bg-danger'
: (percent >= 85 ? 'bg-warning text-dark' : 'bg-success')
%}
{% set chunkHealthy = chunkStatus in ['OK', 'OK_EMPTY'] %}
{% set tagHealthy = tagStatus in ['OK', 'OK_EMPTY'] %}
{% set anyHealthIssue = not chunkHealthy or not tagHealthy %}
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<h1 class="h3 mb-0">
<i class="bi bi-hdd-rack"></i> Systemübersicht
</h1>
<span class="badge bg-secondary">RetrieX Admin</span>
</div>
{% if anyHealthIssue %}
<div class="alert alert-warning shadow-sm mb-4">
<strong>Achtung:</strong>
Mindestens ein Index-Zustand ist nicht konsistent.
Prüfe die Detailkarten unten und führe bei Bedarf einen Global Reindex aus.
</div>
{% endif %}
<div class="col-lg-6 col-xl-3">
<div class="card bg-black border-secondary text-light h-100">
<div class="card-body">
<div class="small text-light mb-2"><i class="bi bi-tags"></i> Tag-Vektor</div>
<div class="row g-4 mb-4">
{% if tagVectorHealth is defined %}
<h4 class="mb-0">
<span class="badge {{ tagBadgeClass }}">
{{ tagVectorHealth.status }}
<div class="col-lg-6 col-xl-3">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<div class="small text-light mb-2">
<i class="bi bi-files"></i> Chunk-Vektor
</div>
<h4 class="mb-2">
<span class="badge {{ chunkBadgeClass }}">
{{ chunkStatus }}
</span>
</h4>
{% else %}
<div class="small text-light">
Keine Daten verfügbar.
</div>
{% endif %}
</div>
</div>
</div>
{# ================= KNOWLEDGE CAPACITY ================= #}
{% set percent = chunkLimit > 0 ? (chunkCount / chunkLimit * 100)|round(3) : 0 %}
<div class="small text-muted">
Keyword-/Chunk-Retrieval-Grundlage des Systems
</div>
</div>
</div>
</div>
<div class="col-lg-6 col-xl-3">
<div class="card bg-black border-secondary text-light h-100">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<div class="small text-light mb-2"><i class="bi bi-robot"></i> Wissenskapazität</div>
<div class="small text-light mb-2">
<i class="bi bi-tags"></i> Tag-Vektor
</div>
<h4 class="mb-2">
<span class="badge {{ tagBadgeClass }}">
{{ tagStatus }}
</span>
</h4>
<div class="small text-muted">
Semantisches Tag-Routing für Dokumenträume und Entity-Erkennung
</div>
</div>
</div>
</div>
<div class="col-lg-6 col-xl-3">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<div class="small text-light mb-2">
<i class="bi bi-robot"></i> Wissenskapazität
</div>
<h4 class="mb-2">
{{ chunkCount|number_format(0, ',', '.') }}
@@ -98,14 +106,7 @@
</h4>
<div class="progress bg-dark mb-2" style="height: 14px;">
<div class="progress-bar
{% if percent >= 95 %}
bg-danger
{% elseif percent >= 85 %}
bg-warning text-dark
{% else %}
bg-success
{% endif %}"
<div class="progress-bar {{ percentClass }}"
style="width: {{ percent }}%;">
</div>
</div>
@@ -117,20 +118,21 @@
</div>
</div>
{# ================= GOVERNANCE ================= #}
<div class="col-lg-6 col-xl-3">
<div class="card bg-black border-secondary text-light h-100">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<div class="small text-light mb-2"><i class="bi bi-shield-check"></i> System-Governance</div>
<div class="small text-light mb-2">
<i class="bi bi-shield-check"></i> System-Governance
</div>
<div class="small">
<strong>Benutzer</strong><br>
{{ app.user.userIdentifier }}
{{ app.user ? app.user.userIdentifier : '-' }}
</div>
<div class="small mt-3">
<strong>Rollen</strong><br>
{{ app.user.roles|join(', ') }}
{{ app.user ? app.user.roles|join(', ') : '-' }}
</div>
</div>
</div>
@@ -138,65 +140,94 @@
</div>
<!-- ===================================================== -->
<!-- DETAIL ROW (HIER SIND DIE ZAHLEN) -->
<!-- ===================================================== -->
<div class="row g-4">
{% if vectorHealth is defined %}
<div class="col-lg-4">
<div class="card bg-black border-secondary text-light h-100">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3"><i class="bi bi-files"></i> Chunk-Vektor-Details</h5>
<h5 class="text-info mb-3">
<i class="bi bi-files"></i> Chunk-Vektor-Details
</h5>
<div class="small text-info">NDJSON-Chunks</div>
<div class="h5 mb-3">
{{ vectorHealth.ndjson_chunk_count|number_format(0, ',', '.') }}
{{ vectorHealth.ndjson_chunk_count|default(0)|number_format(0, ',', '.') }}
</div>
<div class="small text-info">Vektor-Index-Chunks</div>
<div class="h5">
{{ vectorHealth.vector_chunk_count|number_format(0, ',', '.') }}
</div>
</div>
</div>
</div>
{% endif %}
{% if tagVectorHealth is defined %}
<div class="col-lg-4">
<div class="card bg-black border-secondary text-light h-100">
<div class="card-body">
<h5 class="text-info mb-3"><i class="bi bi-tags"></i> Tag-Vektor-Details</h5>
<div class="small text-info">NDJSON-Tags</div>
<div class="h5 mb-3">
{{ tagVectorHealth.tags_ndjson_count|number_format(0, ',', '.') }}
{{ vectorHealth.vector_chunk_count|default(0)|number_format(0, ',', '.') }}
</div>
<div class="d-flex flex-wrap gap-2 mt-3">
<span class="badge {{ vectorHealth.ndjson_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
NDJSON {{ vectorHealth.ndjson_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
<span class="badge {{ vectorHealth.vector_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
Index {{ vectorHealth.vector_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
<span class="badge {{ vectorHealth.meta_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
Meta {{ vectorHealth.meta_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
</div>
</div>
</div>
</div>
<div class="col-lg-4">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3">
<i class="bi bi-tags"></i> Tag-Vektor-Details
</h5>
<div class="small text-info">Exportierte Tags (NDJSON)</div>
<div class="h5 mb-3">
{{ tagVectorHealth.tags_ndjson_count|default(0)|number_format(0, ',', '.') }}
</div>
<div class="small text-info">Vektor-Index-Tags</div>
<div class="h5">
{{ tagVectorHealth.vector_tag_count|number_format(0, ',', '.') }}
<div class="h5 mb-3">
{{ tagVectorHealth.vector_tag_count|default(0)|number_format(0, ',', '.') }}
</div>
<div class="small text-info">Tags mit aktiven Dokumenten</div>
<div class="h5 mb-3">
{{ tagVectorHealth.tags_with_active_document_ids|default(0)|number_format(0, ',', '.') }}
</div>
<div class="d-flex flex-wrap gap-2 mt-3">
<span class="badge {{ tagVectorHealth.tags_ndjson_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
NDJSON {{ tagVectorHealth.tags_ndjson_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
<span class="badge {{ tagVectorHealth.vector_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
Index {{ tagVectorHealth.vector_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
<span class="badge {{ tagVectorHealth.meta_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
Meta {{ tagVectorHealth.meta_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
<span class="badge {{ tagVectorHealth.meta_valid|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
Meta {{ tagVectorHealth.meta_valid|default(false) ? 'gültig' : 'ungültig' }}
</span>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
{% endif %}
<!-- INDEXIERUNG -->
<div class="col-lg-4">
<div class="card bg-black border-secondary text-light h-100">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3"><i class="bi bi-search"></i> Indexierung (Ingest Jobs)</h5>
<h5 class="text-info mb-3">
<i class="bi bi-search"></i> Indexierung (Ingest Jobs)
</h5>
<div class="text-muted small mb-3">
Erstellt den kompletten Wissensindex neu.
Kann je nach Datenmenge mehrere Minuten dauern.
Erstellt den kompletten Wissensindex neu und zieht dabei auch die
physischen Retrieval-Artefakte wieder gerade.
</div>
<form method="post"
action="/admin/jobs/global-reindex"
action="{{ path('admin_global_reindex') }}"
onsubmit="return confirm('Global Reindex starten? Dies kann mehrere Minuten dauern.');">
<input type="hidden"
@@ -208,15 +239,23 @@
Global Reindex starten
</button>
</form>
{% if anyHealthIssue %}
<div class="alert alert-dark border border-warning text-light small mt-3 mb-0">
Empfohlen bei inkonsistentem Chunk- oder Tag-Zustand.
</div>
{% endif %}
</div>
</div>
</div>
{% if is_granted('ROLE_SUPER_ADMIN') %}
<div class="col-lg-4">
<div class="card bg-black border-danger text-light h-100">
<div class="card bg-black border-danger text-light h-100 shadow-sm">
<div class="card-body">
<h5 class="text-danger mb-3"><i class="bi bi-sign-stop-fill"></i> Kritische Systemoperationen</h5>
<h5 class="text-danger mb-3">
<i class="bi bi-sign-stop-fill"></i> Kritische Systemoperationen
</h5>
<div class="small mb-3 text-secondary">
Entfernt alle Dokumente, Versionen, Indizes und Jobs.

View File

@@ -4,8 +4,15 @@
{% block body %}
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0"><i class="bi bi-card-list"></i> Dokumente</h1>
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<div>
<h1 class="h3 mb-1">
<i class="bi bi-card-list"></i> Dokumente
</h1>
<div class="small text-muted">
Übersicht über Dokumente, aktive Versionen, Ingest-Zustände und Tag-Zuordnungen.
</div>
</div>
<a href="{{ path('admin_document_new') }}"
class="btn btn-sm btn-outline-info">
@@ -13,50 +20,107 @@
</a>
</div>
{% for message in app.flashes('success') %}
<div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('danger') %}
<div class="alert alert-danger shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('info') %}
<div class="alert alert-info shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Worauf achten?</h5>
<ul class="small mb-0">
<li><strong>INDEXED</strong> bedeutet: aktive Version ist sauber im Wissensindex.</li>
<li><strong>PENDING</strong> oder <strong>FAILED</strong> bedeuten: Dokument prüfen und ggf. Ingest erneut anstoßen.</li>
<li><strong>Tags</strong> sollten fachlich präzise sein und nicht nur generische Oberbegriffe abbilden.</li>
<li>Die aktive Version ist die fachlich führende Version des Dokuments.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Schnellzugriff</h5>
<div class="small text-light">
Über <strong>Tags</strong> gelangst du direkt in die Tag-Pflege des Dokuments.
Über <strong>Details</strong> steuerst du Versionen, Aktivierung, Re-Ingest und Löschung.
</div>
</div>
</div>
</div>
{% if documents is empty %}
<div class="alert alert-secondary">
<div class="alert alert-secondary shadow-sm">
Keine Dokumente vorhanden.
</div>
{% else %}
<div class="card bg-black border-secondary">
<div class="card bg-black border-secondary shadow-sm">
<div class="card-body p-0">
<div class="d-flex justify-content-between align-items-center px-3 py-3 border-bottom border-secondary flex-wrap gap-2">
<div>
<strong class="text-info">Vorhandene Dokumente</strong>
<span class="small text-muted ms-2">{{ documents|length }} Einträge</span>
</div>
<div class="small text-muted">
Neueste Dokumente stehen oben.
</div>
</div>
<div class="table-responsive">
<table class="table table-dark table-striped table-hover align-middle mb-0">
<thead class="table-secondary text-dark">
<tr>
<th>Titel</th>
<th>ID</th>
<th>Typ</th>
<th>Status</th>
<th>Indexierung</th>
<th>Versionen</th>
<th>Aktive Version</th>
<th>Erstellt</th>
<th class="text-end">Aktionen</th>
<th style="width: 20%">Titel</th>
<th style="width: 14%">ID</th>
<th style="width: 8%">Typ</th>
<th style="width: 8%">Status</th>
<th style="width: 10%">Indexierung</th>
<th style="width: 7%">Versionen</th>
<th style="width: 8%">Aktive Version</th>
<th style="width: 7%">Tags</th>
<th style="width: 8%">Erstellt</th>
<th class="text-end" style="width: 10%">Aktionen</th>
</tr>
</thead>
<tbody>
{% for document in documents %}
<tr>
{# Titel #}
<td>
<div class="fw-semibold">
<a href="{{ path('admin_document_show', {id: document.id}) }}"
class="text-light text-decoration-none">
{{ document.title }}
</a>
</div>
{% if document.currentVersion and document.currentVersion.filePath %}
<div class="small text-muted mt-1">
Aktive Datei vorhanden
</div>
{% endif %}
</td>
{# ID #}
<td class="small text-info">
{{ document.id }}
<code>{{ document.id }}</code>
</td>
{# Typ #}
<td>
{% if document.currentVersion %}
<span class="badge bg-secondary">
@@ -69,7 +133,6 @@
{% endif %}
</td>
{# Dokument Status #}
<td>
{% if document.status == 'ACTIVE' %}
<span class="badge bg-success">Aktiv</span>
@@ -78,18 +141,19 @@
{% endif %}
</td>
{# Ingest Status #}
<td>
{% if document.currentVersion %}
{% if document.currentVersion.ingestStatus == 'INDEXED' %}
<span class="badge bg-success">INDEXED</span>
{% elseif document.currentVersion.ingestStatus == 'PENDING' %}
<span class="badge bg-warning text-dark">PENDING</span>
{% elseif document.currentVersion.ingestStatus == 'RUNNING' %}
<span class="badge bg-warning text-dark">RUNNING</span>
{% elseif document.currentVersion.ingestStatus == 'FAILED' %}
<span class="badge bg-danger">FAILED</span>
{% else %}
<span class="badge bg-dark border border-secondary">
{{ document.currentVersion.ingestStatus }}
{{ document.currentVersion.ingestStatus ?: '-' }}
</span>
{% endif %}
{% else %}
@@ -97,34 +161,40 @@
{% endif %}
</td>
{# Version Count #}
<td>
<span class="badge text-bg-dark border border-secondary">
{{ document.versions|length }}
</span>
</td>
{# Aktive Version #}
<td>
{% if document.currentVersion %}
<span class="badge bg-info text-dark">
v{{ document.currentVersion.versionNumber }}
</span>
{% else %}
-
{% endif %}
</td>
{# Created At #}
<td>
<span class="badge text-bg-dark border border-secondary">
{{ document.tags|length }}
</span>
</td>
<td class="small">
{{ document.createdAt|date('d.m.Y H:i') }}
</td>
{# Aktionen #}
<td class="text-end">
<a class="btn btn-sm btn-outline-info me-2"
<div class="d-flex justify-content-end flex-wrap gap-2">
<a class="btn btn-sm btn-outline-info"
href="{{ path('admin_document_tags_edit', {id: document.id}) }}">
Tags
</a>
<a class="btn btn-sm btn-outline-light me-2"
<a class="btn btn-sm btn-outline-light"
href="{{ path('admin_document_show', {id: document.id}) }}">
Details
</a>
@@ -133,8 +203,7 @@
<form method="post"
action="{{ path('admin_document_delete', {id: document.id}) }}"
class="d-inline"
onsubmit="return confirm('Dokument wirklich endgültig löschen? Diese Aktion entfernt Dokument, Versionen und Index-Daten.');">
onsubmit="return confirm('Dokument wirklich löschen? Der Inhalt wird per Delete-Job aus dem Index entfernt.');">
<input type="hidden"
name="_token"
value="{{ csrf_token('delete_document_' ~ document.id) }}">
@@ -144,23 +213,29 @@
</button>
</form>
{% endif %}
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
{% endif %}
<div class="mt-4 small text-secondary">
Hinweis: Das Löschen eines Dokuments entfernt alle Versionen und
erfordert eine Aktualisierung des NDJSON-Indexes.
<div class="card bg-dark border-secondary text-light mt-4 shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3">Hinweis zum Dokument-Lifecycle</h5>
<div class="small text-light">
Änderungen an aktiven Versionen und Löschvorgänge wirken sich direkt auf den Wissensindex aus.
Zugewiesene Tags beeinflussen zusätzlich die semantische Routing-Ebene des Systems.
Dokumente mit schwachen oder fehlenden Tags sind oft ein guter Kandidat für fachliche Nachpflege.
</div>
</div>
</div>
{% endblock %}

View File

@@ -4,8 +4,13 @@
{% block body %}
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3">Neues Dokument</h1>
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<div>
<h1 class="h3 mb-1">Neues Dokument</h1>
<div class="small text-muted">
Neuer Upload mit initialer Version und anschließendem asynchronen Ingest.
</div>
</div>
<a href="{{ path('admin_documents') }}"
class="btn btn-sm btn-outline-secondary">
@@ -13,7 +18,49 @@
</a>
</div>
<div class="card bg-black border-secondary text-light">
{% for message in app.flashes('success') %}
<div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('danger') %}
<div class="alert alert-danger shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('info') %}
<div class="alert alert-info shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Warum ist der Titel wichtig?</h5>
<ul class="small mb-0">
<li>Der Titel wird später Teil des fachlichen Kontexts des Dokuments.</li>
<li>Ein präziser Titel verbessert Retrieval, Chunk-Einordnung und spätere Tag-Pflege.</li>
<li>Generische Titel wie <code>Dokument 1</code> oder nur Dateinamen sind deutlich schwächer.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Gute Beispiele</h5>
<ul class="small mb-0">
<li><code>Testomat 808 Technisches Datenblatt</code></li>
<li><code>Resthärte-Messung Produktübersicht</code></li>
<li><code>Indikator 300 Anwendung und Dosierung</code></li>
</ul>
</div>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body">
<form method="post" enctype="multipart/form-data">
@@ -22,31 +69,24 @@
name="_token"
value="{{ csrf_token('create_document') }}">
{# ============================= #}
{# Titel #}
{# ============================= #}
<div class="mb-4">
<label class="form-label">Titel</label>
<div class="alert alert-secondary small">
<strong>Hinweis zur Qualität:</strong><br>
Der Titel ist entscheidend für die semantische Einordnung
der erzeugten Chunks. Jeder Chunk erhält den Titel als Kontext,
wodurch Retrieval und Antwortqualität signifikant verbessert werden.<br><br>
Wird kein Titel angegeben, wird automatisch der Dateiname
verwendet (nicht empfohlen).
Verwende einen fachlich präzisen Titel, der Produkt, Thema oder Dokumenttyp klar beschreibt.
Wenn kein Titel angegeben wird, wird automatisch der Dateiname verwendet.
</div>
<input class="form-control bg-dark text-light border-secondary"
name="title"
placeholder="z. B. Sicherheitsdatenblatt Produkt XY">
</div>
value="{{ app.request.get('title') }}"
placeholder="z. B. Testomat 808 Technisches Datenblatt">
{# ============================= #}
{# Datei Upload #}
{# ============================= #}
<div class="form-text text-secondary">
Der Titel muss nicht lang sein, aber fachlich eindeutig.
</div>
</div>
<div class="mb-4">
<label class="form-label">Datei</label>
@@ -58,14 +98,22 @@
<div class="form-text text-secondary">
Unterstützte Formate: PDF, DOCX, TXT, MD.
Das Dokument wird versioniert gespeichert und anschließend
indexiert.
Nach dem Upload wird automatisch Version 1 erstellt und ein Ingest-Job gestartet.
</div>
</div>
{# ============================= #}
{# Submit #}
{# ============================= #}
<div class="card bg-dark border-secondary mb-4">
<div class="card-body">
<h6 class="text-info mb-3">Was passiert nach dem Speichern?</h6>
<ul class="small mb-0">
<li>Das Dokument wird versioniert gespeichert.</li>
<li>Die erste Version wird als aktuelle Version gesetzt.</li>
<li>Ein asynchroner Ingest-Job verarbeitet das Dokument für den Wissensindex.</li>
<li>Später können dem Dokument gezielt Tags zugewiesen werden.</li>
</ul>
</div>
</div>
<div class="d-flex justify-content-end">
<button class="btn btn-outline-info">
@@ -79,8 +127,7 @@
</div>
<div class="mt-4 small text-secondary">
Hinweis: Nach dem Upload wird automatisch eine neue Dokumentversion erstellt.
Die Indexierung erfolgt asynchron über einen Ingest-Job.
Hinweis: Ein sauber benanntes Dokument ist die beste Grundlage für gutes Retrieval und späteres präzises Tagging.
</div>
{% endblock %}

View File

@@ -4,10 +4,13 @@
{% block body %}
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0">
Neue Version
</h1>
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<div>
<h1 class="h3 mb-1">Neue Version</h1>
<div class="small text-muted">
Neue unveränderliche Version für ein bestehendes Dokument hochladen.
</div>
</div>
<a href="{{ path('admin_document_show', {id: document.id}) }}"
class="btn btn-sm btn-outline-secondary">
@@ -15,36 +18,99 @@
</a>
</div>
<div class="card bg-dark border-secondary mb-4 text-light">
<div class="card-body">
{% for message in app.flashes('success') %}
<div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="mb-3">
{% for message in app.flashes('danger') %}
<div class="alert alert-danger shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('info') %}
<div class="alert alert-info shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Dokumentkontext</h5>
<div class="mb-2">
<strong>Dokument:</strong>
<span class="text-light">{{ document.title }}</span>
</div>
<div class="small text-secondary">
Das Hochladen einer neuen Version erzeugt eine zusätzliche
unveränderliche Dokumentversion. Die Aktivierung erfolgt separat
und löst einen deterministischen Re-Ingest aus.
</div>
Eine neue Version erzeugt eine zusätzliche, unveränderliche Dokumentversion.
Die bestehende aktive Version bleibt zunächst unverändert.
</div>
</div>
<div class="card bg-black border-secondary text-light">
<div class="col-lg-5">
<h5 class="text-info mb-3">Aktueller Stand</h5>
<div class="small mb-2">
<strong>Aktive Version:</strong>
{% if document.currentVersion %}
<span class="badge bg-info text-dark">
v{{ document.currentVersion.versionNumber }}
</span>
{% else %}
-
{% endif %}
</div>
<div class="small mb-2">
<strong>Vorhandene Versionen:</strong>
{{ document.versions|length }}
</div>
<div class="small">
<strong>Zugewiesene Tags:</strong>
{{ document.tags|length }}
</div>
</div>
</div>
</div>
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Wichtig für den Lifecycle</h5>
<ul class="small mb-0">
<li>Der Upload erzeugt nur eine <strong>neue Version</strong>, aber aktiviert sie nicht automatisch.</li>
<li>Erst die spätere <strong>Aktivierung</strong> löst den deterministischen Re-Ingest aus.</li>
<li>Tags bleiben auf <strong>Dokumentebene</strong> bestehen und gelten weiterhin für das Dokument als Ganzes.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Gute Praxis</h5>
<ul class="small mb-0">
<li>Nur fachlich wirklich passende Nachfolgeversionen hochladen.</li>
<li>Kein anderes Thema oder anderes Produkt in dieselbe Dokumentlinie mischen.</li>
<li>Bei stark verändertem Fachinhalt später Tagging mitprüfen.</li>
</ul>
</div>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body">
<form method="post" enctype="multipart/form-data">
<input type="hidden"
name="_token"
value="{{ csrf_token('create_document_version_' ~ document.id) }}">
{# ============================= #}
{# Datei Upload #}
{# ============================= #}
<div class="mb-4">
<label class="form-label">Datei auswählen</label>
@@ -54,15 +120,23 @@
required>
<div class="form-text text-secondary">
Unterstützte Formate: PDF, DOCX, TXT, MD.<br>
Die Datei wird versioniert gespeichert und mit einer
eindeutigen Checksum versehen.
Unterstützte Formate: PDF, DOCX, TXT, MD.
Die Datei wird versioniert gespeichert und mit einer eindeutigen Checksum versehen.
</div>
</div>
{# ============================= #}
{# Submit #}
{# ============================= #}
<div class="card bg-dark border-secondary mb-4">
<div class="card-body">
<h6 class="text-info mb-3">Was passiert nach dem Upload?</h6>
<ul class="small mb-0">
<li>Es wird eine neue, unveränderliche Dokumentversion angelegt.</li>
<li>Die aktive Version bleibt zunächst unverändert.</li>
<li>Ein Re-Ingest erfolgt erst nach späterer Aktivierung dieser Version.</li>
<li>Danach wird der Wissensindex deterministisch neu aufgebaut.</li>
</ul>
</div>
</div>
{% if is_granted('ROLE_SUPER_ADMIN') %}
<div class="d-flex justify-content-end">
@@ -71,16 +145,14 @@
</button>
</div>
{% endif %}
</form>
</div>
</div>
<div class="mt-4 small text-secondary">
Hinweis: Eine neue Version ersetzt nicht automatisch die aktive Version.
Erst nach Aktivierung wird ein Re-Ingest durchgeführt und der Index
neu aufgebaut.
Hinweis: Eine neue Version verbessert den Dokument-Lifecycle nur dann sauber, wenn sie fachlich wirklich zu diesem Dokument gehört.
Bei stark verändertem Inhalt sollten nach der späteren Aktivierung auch die Tags geprüft werden.
</div>
{% endblock %}

View File

@@ -4,45 +4,67 @@
{% block body %}
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0">{{ document.title ?? 'Ein Fehler trat auf' }}</h1>
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<div>
<h1 class="h3 mb-1">{{ document.title }}</h1>
<div class="small text-muted">
Detailansicht für Dokument, Versionen und Tag-Zuordnung.
</div>
</div>
<div class="d-flex flex-wrap gap-2">
<a href="{{ path('admin_document_tags_edit', {id: document.id}) }}"
class="btn btn-sm btn-outline-info">
Tags bearbeiten
</a>
<a href="{{ path('admin_documents') }}"
class="btn btn-sm btn-outline-secondary">
Zurück zur Übersicht
</a>
</div>
</div>
{% if document %}
{% for message in app.flashes('success') %}
<div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
{# ============================= #}
{# Dokument-Meta #}
{# ============================= #}
{% for message in app.flashes('danger') %}
<div class="alert alert-danger shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="card bg-dark border-secondary mb-5 text-light">
{% for message in app.flashes('info') %}
<div class="alert alert-info shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="row g-4 mb-4">
<div class="col-lg-7">
<div class="card bg-dark border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3">Dokument-Metadaten</h5>
<div class="mb-2">
<strong>Status:</strong>
<div class="row g-3">
<div class="col-md-6">
<div class="small text-muted mb-1">Status</div>
<div>
{% if document.status == 'ACTIVE' %}
<span class="badge bg-success">Aktiv</span>
{% else %}
<span class="badge bg-secondary">Archiviert</span>
{% endif %}
</div>
<div class="mb-2">
<strong>Erstellt von:</strong>
{{ document.createdBy ? document.createdBy.email : '-' }}
</div>
<div class="mb-2">
<strong>Erstellt am:</strong>
{{ document.createdAt|date('d.m.Y H:i') }}
</div>
<div class="mb-2">
<strong>Aktive Version:</strong>
<div class="col-md-6">
<div class="small text-muted mb-1">Aktive Version</div>
<div>
{% if document.currentVersion %}
<span class="badge bg-info text-dark">
v{{ document.currentVersion.versionNumber }}
@@ -51,16 +73,104 @@
-
{% endif %}
</div>
</div>
<div class="col-md-6">
<div class="small text-muted mb-1">Erstellt von</div>
<div>{{ document.createdBy ? document.createdBy.email : '-' }}</div>
</div>
<div class="col-md-6">
<div class="small text-muted mb-1">Erstellt am</div>
<div>{{ document.createdAt|date('d.m.Y H:i:s') }}</div>
</div>
<div class="col-md-6">
<div class="small text-muted mb-1">Anzahl Versionen</div>
<div>{{ document.versions|length }}</div>
</div>
<div class="col-md-6">
<div class="small text-muted mb-1">Zugewiesene Tags</div>
<div>{{ document.tags|length }}</div>
</div>
</div>
{# ============================= #}
{# Versionen #}
{# ============================= #}
{% if is_granted('ROLE_SUPER_ADMIN') %}
<hr class="border-secondary">
<div class="d-flex flex-wrap gap-2">
<a href="{{ path('admin_document_version_new', {id: document.id}) }}"
class="btn btn-sm btn-outline-info">
Neue Version
</a>
<form method="post"
action="{{ path('admin_document_delete', {id: document.id}) }}"
class="d-inline"
onsubmit="return confirm('Dokument wirklich löschen? Der Inhalt wird per Delete-Job aus dem Index entfernt.');">
<input type="hidden"
name="_token"
value="{{ csrf_token('delete_document_' ~ document.id) }}">
<button class="btn btn-sm btn-outline-danger">
Dokument löschen
</button>
</form>
</div>
{% endif %}
</div>
</div>
</div>
<div class="col-lg-5">
<div class="card bg-dark border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<div class="d-flex justify-content-between align-items-center mb-3">
<h2 class="h5 mb-0">Versionen</h2>
<h5 class="text-info mb-0">Tags</h5>
<a href="{{ path('admin_document_tags_edit', {id: document.id}) }}"
class="btn btn-sm btn-outline-light">
Bearbeiten
</a>
</div>
{% if document.tags is empty %}
<div class="alert alert-secondary mb-0">
Diesem Dokument sind noch keine Tags zugewiesen.
</div>
{% else %}
<div class="d-flex flex-wrap gap-2">
{% for tag in document.tags %}
<span class="badge px-3 py-2
{% if tag.type == 'catalog_entity' %}
text-bg-info
{% elseif tag.type == 'sales_signal' %}
text-bg-warning
{% else %}
text-bg-secondary
{% endif %}">
{{ tag.label }}
</span>
{% endfor %}
</div>
<div class="small text-muted mt-3">
Tags steuern die semantische Routing-Ebene. Weise nur fachlich wirklich passende Tags zu.
</div>
{% endif %}
</div>
</div>
</div>
</div>
<div class="d-flex justify-content-between align-items-center mb-3 flex-wrap gap-2">
<div>
<h2 class="h5 mb-1">Versionen</h2>
<div class="small text-muted">
Beim Aktivieren einer Version wird automatisch ein Re-Ingest ausgelöst.
</div>
</div>
{% if is_granted('ROLE_SUPER_ADMIN') %}
<a href="{{ path('admin_document_version_new', {id: document.id}) }}"
@@ -72,48 +182,47 @@
{% if document.versions is empty %}
<div class="alert alert-secondary">
<div class="alert alert-secondary shadow-sm">
Keine Versionen vorhanden.
</div>
{% else %}
<div class="card bg-black border-secondary">
<div class="card-body">
<div class="card bg-black border-secondary shadow-sm">
<div class="card-body p-0">
<div class="table-responsive">
<table class="table table-dark table-striped table-hover align-middle mb-0">
<thead class="table-secondary text-dark">
<tr>
<th>Version</th>
<th>Status</th>
<th>Ingest</th>
<th>Checksum</th>
<th>Erstellt von</th>
<th>Datum</th>
<th class="text-end">Aktionen</th>
<th style="width: 10%">Version</th>
<th style="width: 10%">Aktiv</th>
<th style="width: 14%">Ingest</th>
<th style="width: 18%">Checksum</th>
<th style="width: 16%">Erstellt von</th>
<th style="width: 14%">Datum</th>
<th class="text-end" style="width: 18%">Aktionen</th>
</tr>
</thead>
<tbody>
{% for version in document.versions %}
<tr>
<td>
<strong>v{{ version.versionNumber }}</strong>
{% if document.currentVersion and version.id == document.currentVersion.id %}
<div class="small text-info mt-1">Current</div>
{% endif %}
</td>
{# Aktivstatus #}
<td>
{% if version.isActive %}
<span class="badge bg-success">Aktiv</span>
{% else %}
<span class="badge bg-dark border border-secondary">
Inaktiv
</span>
<span class="badge bg-dark border border-secondary">Inaktiv</span>
{% endif %}
</td>
{# Ingest Status #}
<td>
{% if version.ingestStatus == 'INDEXED' %}
<span class="badge bg-success">INDEXED</span>
@@ -125,99 +234,85 @@
<span class="badge bg-secondary">PENDING</span>
{% else %}
<span class="badge bg-dark border border-secondary">
{{ version.ingestStatus }}
{{ version.ingestStatus ?: '-' }}
</span>
{% endif %}
</td>
{# Checksum #}
<td class="small text-secondary">
{{ version.checksum ? version.checksum[:10] ~ '…' : '-' }}
{% if version.checksum %}
<code>{{ version.checksum[:12] ~ '…' }}</code>
{% else %}
-
{% endif %}
</td>
{# Created by #}
<td>
{{ version.createdBy ? version.createdBy.email : '-' }}
</td>
{# Date #}
<td class="small">
{{ version.createdAt|date('d.m.Y H:i') }}
{{ version.createdAt|date('d.m.Y H:i:s') }}
</td>
{# Aktionen #}
<td class="text-end">
<div class="d-flex justify-content-end flex-wrap gap-2">
{% if version.isActive %}
{% if version.ingestStatus in ['PENDING', 'FAILED'] and is_granted('ROLE_SUPER_ADMIN') %}
<form method="post"
action="{{ path('admin_document_version_ingest', {versionId: version.id}) }}"
class="d-inline"
onsubmit="return confirm('Ingest erneut starten?');">
<input type="hidden"
name="_token"
value="{{ csrf_token('ingest_version_' ~ version.id) }}">
<button class="btn btn-sm btn-outline-info">
Ingest starten
</button>
</form>
{% else %}
<span class="text-success small">
Bereits indexiert
<span class="small text-success align-self-center">
Keine Aktion nötig
</span>
{% endif %}
{% else %}
{% if is_granted('ROLE_SUPER_ADMIN') %}
<form method="post"
action="{{ path('admin_document_version_activate', {versionId: version.id}) }}"
class="d-inline"
onsubmit="return confirm('Diese Version aktivieren? Es wird ein Re-Ingest ausgelöst.');">
<input type="hidden"
name="_token"
value="{{ csrf_token('activate_version_' ~ version.id) }}">
<button class="btn btn-sm btn-outline-light">
Aktivieren
</button>
</form>
{% endif %}
{% endif %}
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
{% endif %}
<div class="mt-4 small text-secondary">
Hinweis: Beim Aktivieren einer Version wird automatisch ein Re-Ingest
durchgeführt. Der NDJSON-Index und der FAISS-Index werden deterministisch
neu aufgebaut.
<div class="card bg-dark border-secondary text-light mt-4 shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3">Hinweis zum Lifecycle</h5>
<div class="small text-light">
Beim Aktivieren einer Version wird automatisch ein Re-Ingest durchgeführt.
Der NDJSON-Bestand und der Vektorindex werden deterministisch neu aufgebaut.
Wenn Tags zugewiesen sind, beeinflusst dieses Dokument zusätzlich die semantische Routing-Ebene.
</div>
</div>
{% else %}
<div class="alert alert-danger">
Dokument nicht gefunden.
</div>
{% endif %}
{% endblock %}

View File

@@ -4,81 +4,87 @@
{% block body %}
{# ============================================= #}
{# Tag-Rebuild Status (Echte Live-Anzeige) #}
{# ============================================= #}
<div id="rebuild-status" class="mb-5" style="min-height:54px"></div>
<div id="rebuild-status" class="mb-4">
{% if latestJob %}
<div class="alert alert-secondary shadow-sm mb-0">
Status wird geladen…
</div>
{% endif %}
</div>
<script>
let polling = null;
const statusBox = document.getElementById('rebuild-status');
const source = new EventSource("{{ path('admin_tags_rebuild_stream') }}");
function renderStatus(status) {
const el = document.getElementById('rebuild-status');
source.onmessage = function (event) {
const data = JSON.parse(event.data);
let html = '';
if (!status) {
el.innerHTML = '';
return;
}
if (status === 'RUNNING') {
el.innerHTML = `
<div class="alert alert-info d-flex justify-content-between align-items-center">
<div><strong>Dokument-Tag-Rebuild läuft…</strong></div>
if (data.status === '{{ statusRunning }}') {
html = `
<div class="alert alert-info shadow-sm d-flex justify-content-between align-items-center mb-0">
<div>
<strong>Dokument-Tag-Rebuild läuft</strong><br>
${data.startedAt ? 'Gestartet: ' + new Date(data.startedAt).toLocaleString() : ''}
</div>
<div class="spinner-border spinner-border-sm"></div>
</div>
`;
} else if (status === 'QUEUED') {
el.innerHTML = `
<div class="alert alert-secondary">
Dokument-Tag-Rebuild in Warteschlange
} else if (data.status === '{{ statusQueued }}') {
html = `
<div class="alert alert-secondary shadow-sm mb-0">
<strong>Dokument-Tag-Rebuild in Warteschlange</strong>
</div>
`;
} else if (status === 'COMPLETED') {
el.innerHTML = `
<div class="alert alert-success fw-bold">
Dokument-Tag-Rebuild erfolgreich abgeschlossen.
} else if (data.status === '{{ statusCompleted }}') {
html = `
<div class="alert alert-success shadow-sm mb-0">
<i class="bi bi-check-lg"></i> Dokument-Tag-Rebuild erfolgreich abgeschlossen
</div>
`;
stopPolling();
} else if (status === 'FAILED') {
el.innerHTML = `
<div class="alert alert-danger">
Dokument-Tag-Rebuild fehlgeschlagen.
} else if (data.status === '{{ statusFailed }}') {
html = `
<div class="alert alert-danger shadow-sm mb-0">
<strong>Dokument-Tag-Rebuild fehlgeschlagen</strong><br>
${data.error ? '<code>' + data.error + '</code>' : ''}
</div>
`;
stopPolling();
}
}
function checkStatus() {
fetch('{{ path('admin_tags_status') }}')
.then(r => r.json())
.then(data => renderStatus(data.status))
.catch(() => stopPolling());
}
statusBox.innerHTML = html;
};
function startPolling() {
polling = setInterval(checkStatus, 2000);
}
source.onerror = function () {
console.warn('SSE Verbindung verloren');
};
function stopPolling() {
if (polling) {
clearInterval(polling);
polling = null;
}
}
// Start polling sofort
checkStatus();
startPolling();
window.addEventListener('beforeunload', function () {
source.close();
});
</script>
{% for message in app.flashes('success') %}
<div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('danger') %}
<div class="alert alert-danger shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0">
<div>
<h1 class="h3 mb-1">
Tags für Dokument
<span class="text-info">{{ document.title }}</span>
</h1>
<div class="small text-muted">
Weise nur Tags zu, die den fachlichen Kern des Dokuments wirklich beschreiben.
</div>
</div>
<a href="{{ path('admin_documents') }}"
class="btn btn-sm btn-outline-light">
@@ -86,14 +92,40 @@
</a>
</div>
{# ============================================= #}
{# Bereits zugewiesene Tags #}
{# ============================================= #}
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Hinweis für gutes Tagging</h5>
<div class="card bg-dark border-secondary mb-4">
<ul class="small mb-0">
<li><strong>Präzise statt breit:</strong> lieber produkt- oder themenscharfe Tags als allgemeine Oberbegriffe.</li>
<li><strong>Catalog Entity</strong> nur bei echten Produktfamilien, Katalogbegriffen oder klaren Entitäten.</li>
<li><strong>Generic</strong> nur als unterstützende Zusatzsemantik.</li>
<li><strong>Sales Signal</strong> sparsam und bewusst einsetzen, nicht als Ersatz für Fach-Tags.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Aktueller Stand</h5>
<div class="d-flex flex-wrap gap-2">
<span class="badge text-bg-dark border border-secondary">
Zugewiesen: {{ document.tags|length }}
</span>
<span class="badge text-bg-dark border border-secondary">
Verfügbar: {{ allTags|length }}
</span>
<span class="badge text-bg-dark border border-secondary">
Nicht zugewiesen: {{ allTags|length - document.tags|length }}
</span>
</div>
</div>
</div>
</div>
<div class="card bg-dark border-secondary mb-4 shadow-sm">
<div class="card-body">
<h5 class="mb-3">Zugewiesene Tags für: <span class="text-info ">{{ document.title }}</span></h5>
<h5 class="mb-3">Bereits zugewiesene Tags</h5>
{% if document.tags is empty %}
<div class="alert alert-secondary mb-0">
@@ -101,22 +133,26 @@
</div>
{% else %}
<div class="d-flex flex-wrap gap-2">
{% for tag in document.tags %}
<span class="badge bg-info text-dark px-3 py-2">
{% for tag in allTags %}
{% if tag in document.tags %}
<span class="badge px-3 py-2
{% if tag.type == 'catalog_entity' %}
text-bg-info
{% elseif tag.type == 'sales_signal' %}
text-bg-warning
{% else %}
text-bg-secondary
{% endif %}">
{{ tag.label }}
</span>
{% endif %}
{% endfor %}
</div>
{% endif %}
</div>
</div>
{# ============================================= #}
{# Tag-Zuweisung Formular #}
{# ============================================= #}
<div class="card bg-black border-secondary">
<div class="card bg-black border-secondary shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3">Tags zuweisen</h5>
@@ -128,38 +164,125 @@
name="_token"
value="{{ csrf_token('admin_document_tags_save_' ~ document.id) }}">
<div class="row g-4">
<div class="col-lg-6">
<div class="card bg-dark border-secondary h-100">
<div class="card-header bg-secondary-subtle text-dark fw-semibold">
Zugewiesene Tags
</div>
<div class="card-body">
<div class="row">
{% set hasAssigned = false %}
{% for tag in allTags %}
<div class="col-md-2 mb-2">
{% if tag in document.tags %}
{% set hasAssigned = true %}
<div class="col-md-6 mb-3">
<div class="form-check">
<input
class="form-check-input"
type="checkbox"
name="tag_ids[]"
value="{{ tag.id }}"
id="tag_{{ tag.id }}"
{% if tag in document.tags %}checked{% endif %}
checked
>
<label class="form-check-label bg-info text-black badge"{% if tag not in document.tags %} style="opacity: .5;"{% endif %}
for="tag_{{ tag.id }}">
{{ tag.label }}
<label class="form-check-label w-100" for="tag_{{ tag.id }}">
<span class="badge
{% if tag.type == 'catalog_entity' %}
text-bg-info
{% elseif tag.type == 'sales_signal' %}
text-bg-warning
{% else %}
text-bg-secondary
{% endif %}">
{{ tag.type }}
</span>
<span class="ms-2 fw-semibold">{{ tag.label }}</span>
{% if tag.description %}
<div class="small text-muted mt-1">{{ tag.description }}</div>
{% endif %}
</label>
</div>
</div>
{% endif %}
{% endfor %}
{% if not hasAssigned %}
<div class="col-12">
<div class="text-muted">
Noch keine Tags zugewiesen.
</div>
</div>
{% endif %}
</div>
</div>
</div>
</div>
<div class="col-lg-6">
<div class="card bg-dark border-secondary h-100">
<div class="card-header bg-secondary-subtle text-dark fw-semibold">
Verfügbare Tags
</div>
<div class="card-body">
<div class="row">
{% set hasAvailable = false %}
{% for tag in allTags %}
{% if tag not in document.tags %}
{% set hasAvailable = true %}
<div class="col-md-6 mb-3">
<div class="form-check">
<input
class="form-check-input"
type="checkbox"
name="tag_ids[]"
value="{{ tag.id }}"
id="tag_{{ tag.id }}"
>
<label class="form-check-label w-100" for="tag_{{ tag.id }}">
<span class="badge
{% if tag.type == 'catalog_entity' %}
text-bg-info
{% elseif tag.type == 'sales_signal' %}
text-bg-warning
{% else %}
text-bg-secondary
{% endif %}">
{{ tag.type }}
</span>
<span class="ms-2">{{ tag.label }}</span>
{% if tag.description %}
<div class="small text-muted mt-1">{{ tag.description }}</div>
{% endif %}
</label>
</div>
</div>
{% endif %}
{% endfor %}
{% if not hasAvailable %}
<div class="col-12">
<div class="text-muted">
Keine weiteren Tags verfügbar.
</div>
</div>
{% endif %}
</div>
</div>
</div>
</div>
</div>
<hr class="border-secondary">
<div class="d-flex justify-content-end">
<button type="submit"
class="btn btn-sm btn-outline-info">
Speichern
</button>
</div>
</form>

View File

@@ -4,8 +4,17 @@
{% block body %}
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3"><i class="bi bi-terminal"></i> Indexierung (Ingest Jobs)</h1>
{% set latestJob = jobs is not empty ? jobs|first : null %}
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<div>
<h1 class="h3 mb-1">
<i class="bi bi-terminal"></i> Indexierung (Ingest Jobs)
</h1>
<div class="small text-muted">
Übersicht über Reindex-, Dokument- und Aktivierungsjobs des Systems.
</div>
</div>
{% if is_granted('ROLE_SUPER_ADMIN') %}
<form method="post"
@@ -25,40 +34,143 @@
{% endif %}
</div>
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Was sieht man hier?</h5>
<ul class="small mb-0">
<li><strong>DOCUMENT</strong> verarbeitet ein einzelnes Dokument neu.</li>
<li><strong>DOCUMENT_VERSION_ACTIVATE</strong> zieht eine aktivierte Version deterministisch neu in den Index.</li>
<li><strong>DOCUMENT_DELETE</strong> entfernt Dokumentinhalt wieder sauber aus den Index-Artefakten.</li>
<li><strong>GLOBAL_REINDEX</strong> baut den Wissensindex vollständig neu auf und ist der stärkste Reparaturpfad.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Worauf achten?</h5>
<ul class="small mb-0">
<li><strong>RUNNING</strong> und <strong>QUEUED</strong> bedeuten: keine unnötigen parallelen Rebuilds starten.</li>
<li><strong>FAILED</strong> oder <strong>ABORTED</strong> direkt prüfen.</li>
<li>Bei inkonsistentem Indexzustand ist meist ein <strong>Global Reindex</strong> der richtige Reparaturschritt.</li>
</ul>
</div>
</div>
</div>
{% if latestJob %}
<div class="card bg-black border-secondary text-light mb-4 shadow-sm">
<div class="card-body">
<div class="d-flex justify-content-between align-items-start flex-wrap gap-3">
<div>
<div class="small text-muted mb-1">Letzter Job</div>
<div class="fw-semibold">
<a href="{{ path('admin_job_show', {id: latestJob.id}) }}"
class="text-light text-decoration-none">
{{ latestJob.id }}
</a>
</div>
</div>
<div>
<div class="small text-muted mb-1">Typ</div>
<span class="badge bg-info text-dark">{{ latestJob.type }}</span>
</div>
<div>
<div class="small text-muted mb-1">Status</div>
{% if latestJob.status == 'COMPLETED' %}
<span class="badge bg-success">COMPLETED</span>
{% elseif latestJob.status == 'QUEUED' %}
<span class="badge bg-secondary">QUEUED</span>
{% elseif latestJob.status == 'RUNNING' %}
<span class="badge bg-warning text-dark">RUNNING</span>
{% elseif latestJob.status == 'FAILED' %}
<span class="badge bg-danger">FAILED</span>
{% elseif latestJob.status == 'ABORTED' %}
<span class="badge bg-dark border border-danger text-danger">ABORTED</span>
{% else %}
<span class="badge bg-dark border border-secondary">{{ latestJob.status }}</span>
{% endif %}
</div>
<div>
<div class="small text-muted mb-1">Gestartet</div>
<div class="small">
{{ latestJob.startedAt ? latestJob.startedAt|date('d.m.Y H:i:s') : '-' }}
</div>
</div>
<div>
<div class="small text-muted mb-1">Beendet</div>
<div class="small">
{{ latestJob.finishedAt ? latestJob.finishedAt|date('d.m.Y H:i:s') : 'läuft noch / offen' }}
</div>
</div>
</div>
{% if latestJob.errorMessage %}
<div class="alert alert-danger small mt-3 mb-0">
<strong>Fehler:</strong>
{{ latestJob.errorMessage|slice(0, 250) }}{% if latestJob.errorMessage|length > 250 %}{% endif %}
</div>
{% endif %}
</div>
</div>
{% endif %}
{% if jobs is empty %}
<div class="alert alert-secondary">
<div class="alert alert-secondary shadow-sm">
Keine Ingest Jobs vorhanden.
</div>
{% else %}
<div class="card bg-black border-secondary">
<div class="card bg-black border-secondary shadow-sm">
<div class="card-body p-0">
<div class="d-flex justify-content-between align-items-center px-3 py-3 border-bottom border-secondary flex-wrap gap-2">
<div>
<strong class="text-info">Vorhandene Jobs</strong>
<span class="small text-muted ms-2">{{ jobs|length }} Einträge</span>
</div>
<div class="small text-muted">
Neueste Jobs stehen oben.
</div>
</div>
<div class="table-responsive">
<table class="table table-dark table-striped table-hover align-middle mb-0">
<thead class="table-secondary text-dark">
<tr>
<th>Job-ID</th>
<th>Typ</th>
<th>Status</th>
<th>Dokument</th>
<th>Version</th>
<th>Gestartet</th>
<th>Beendet</th>
<th>Benutzer</th>
<th style="width: 18%">Job</th>
<th style="width: 14%">Typ</th>
<th style="width: 12%">Status</th>
<th style="width: 18%">Bezug</th>
<th style="width: 12%">Gestartet</th>
<th style="width: 12%">Beendet</th>
<th style="width: 14%">Benutzer</th>
</tr>
</thead>
<tbody>
{% for job in jobs %}
<tr>
<td class="small">
<div class="fw-semibold">
<a href="{{ path('admin_job_show', {id: job.id}) }}"
class="text-light text-decoration-none">
{{ job.id }}
</a>
</div>
{% if job.errorMessage %}
<div class="text-danger small mt-1"
title="{{ job.errorMessage }}">
{{ job.errorMessage|slice(0, 120) }}{% if job.errorMessage|length > 120 %}{% endif %}
</div>
{% endif %}
</td>
<td>
@@ -76,6 +188,8 @@
<span class="badge bg-warning text-dark">RUNNING</span>
{% elseif job.status == 'FAILED' %}
<span class="badge bg-danger">FAILED</span>
{% elseif job.status == 'ABORTED' %}
<span class="badge bg-dark border border-danger text-danger">ABORTED</span>
{% else %}
<span class="badge bg-dark border border-secondary">
{{ job.status }}
@@ -83,37 +197,44 @@
{% endif %}
</td>
<td>
<td class="small">
{% if job.documentId %}
<div>
<span class="text-muted">Dokument:</span>
<a href="{{ path('admin_document_show', {id: job.documentId}) }}"
class="text-light text-decoration-none">
{{ job.documentId }}
</a>
{% else %}
</div>
{% endif %}
{% if job.documentVersionId %}
<div class="mt-1">
<span class="text-muted">Version:</span>
{{ job.documentVersionId }}
</div>
{% endif %}
{% if not job.documentId and not job.documentVersionId %}
-
{% endif %}
</td>
<td>
{{ job.documentVersionId ?? '-' }}
</td>
<td class="small">
{{ job.startedAt ? job.startedAt|date('d.m.Y H:i:s') : '-' }}
</td>
<td class="small">
{{ job.finishedAt ? job.finishedAt|date('d.m.Y H:i:s') : '-' }}
{{ job.finishedAt ? job.finishedAt|date('d.m.Y H:i:s') : 'offen' }}
</td>
<td class="small">
{{ job.startedBy ? job.startedBy.email : '-' }}
</td>
</tr>
{% else %}
<tr>
<td colspan="8" class="text-center text-secondary py-4">
<td colspan="7" class="text-center text-secondary py-4">
Keine Jobs gefunden.
</td>
</tr>
@@ -121,6 +242,7 @@
</tbody>
</table>
</div>
</div>
</div>
@@ -128,8 +250,8 @@
{% endif %}
<div class="mt-4 small text-secondary">
Hinweis: Während laufender Jobs (Status RUNNING) sollten keine
parallelen Reindex-Prozesse gestartet werden.
Hinweis: Während laufender Jobs (Status <strong>RUNNING</strong>) oder wartender Jobs (<strong>QUEUED</strong>)
sollten keine unnötigen parallelen Reindex-Prozesse gestartet werden.
</div>
{% endblock %}

View File

@@ -4,8 +4,18 @@
{% block body %}
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0">Ingest Job</h1>
{% set jobStatus = job.status|upper %}
{% set isActiveJob = jobStatus in ['QUEUED', 'RUNNING'] %}
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<div>
<h1 class="h3 mb-1">
<i class="bi bi-terminal"></i> Ingest Job
</h1>
<div class="small text-muted">
Detailansicht für einen einzelnen Indexierungs- oder Rebuild-Job.
</div>
</div>
<a href="{{ path('admin_jobs') }}"
class="btn btn-sm btn-outline-secondary">
@@ -13,26 +23,82 @@
</a>
</div>
<div class="card bg-black border-secondary text-light">
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Einordnung</h5>
<ul class="small mb-0">
<li><strong>DOCUMENT</strong> verarbeitet ein einzelnes Dokument neu.</li>
<li><strong>DOCUMENT_VERSION_ACTIVATE</strong> aktiviert eine Version und zieht sie deterministisch neu in den Index.</li>
<li><strong>DOCUMENT_DELETE</strong> entfernt Dokumentinhalt wieder sauber aus dem Wissensbestand.</li>
<li><strong>GLOBAL_REINDEX</strong> baut den Gesamtindex vollständig neu auf.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Aktueller Zustand</h5>
<div class="d-flex flex-wrap gap-2">
{% if jobStatus == 'COMPLETED' %}
<span class="badge bg-success">COMPLETED</span>
{% elseif jobStatus == 'QUEUED' %}
<span class="badge bg-secondary">QUEUED</span>
{% elseif jobStatus == 'RUNNING' %}
<span class="badge bg-warning text-dark">RUNNING</span>
{% elseif jobStatus == 'FAILED' %}
<span class="badge bg-danger">FAILED</span>
{% elseif jobStatus == 'ABORTED' %}
<span class="badge bg-dark border border-danger text-danger">ABORTED</span>
{% else %}
<span class="badge bg-dark border border-secondary">{{ jobStatus }}</span>
{% endif %}
{% if isActiveJob %}
<span class="badge text-bg-info">Polling aktiv</span>
{% endif %}
</div>
</div>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body">
<div class="mb-2">
<strong>ID:</strong>
<span class="small text-light">{{ job.id }}</span>
<div class="row g-4">
<div class="col-lg-6">
<div class="mb-3">
<div class="small text-muted mb-1">Job-ID</div>
<div class="fw-semibold small text-light">{{ job.id }}</div>
</div>
<div class="mb-2">
<strong>Typ:</strong>
<div class="mb-3">
<div class="small text-muted mb-1">Typ</div>
<div>
<span class="badge bg-info text-dark">{{ job.type }}</span>
</div>
<div class="mb-2">
<strong>Status:</strong>
<span id="job-status-badge"></span>
</div>
<div class="mb-2">
<strong>Dokument:</strong>
<div class="mb-3">
<div class="small text-muted mb-1">Status</div>
<div id="job-status-badge">
{% if jobStatus == 'COMPLETED' %}
<span class="badge bg-success">COMPLETED</span>
{% elseif jobStatus == 'QUEUED' %}
<span class="badge bg-secondary">QUEUED</span>
{% elseif jobStatus == 'RUNNING' %}
<span class="badge bg-warning text-dark">RUNNING</span>
{% elseif jobStatus == 'FAILED' %}
<span class="badge bg-danger">FAILED</span>
{% elseif jobStatus == 'ABORTED' %}
<span class="badge bg-dark border border-danger text-danger">ABORTED</span>
{% else %}
<span class="badge bg-dark border border-secondary">{{ jobStatus }}</span>
{% endif %}
</div>
</div>
<div class="mb-3">
<div class="small text-muted mb-1">Dokument</div>
<div>
{% if job.documentId %}
<a href="{{ path('admin_document_show', {id: job.documentId}) }}"
class="text-light text-decoration-none">
@@ -42,32 +108,49 @@
-
{% endif %}
</div>
<div class="mb-2">
<strong>Version:</strong>
{{ job.documentVersionId ?? '-' }}
</div>
<div class="mb-2">
<strong>Gestartet:</strong>
{{ job.startedAt|date('d.m.Y H:i:s') }}
<div class="mb-0">
<div class="small text-muted mb-1">Dokumentversion</div>
<div>{{ job.documentVersionId ?? '-' }}</div>
</div>
</div>
<div class="mb-2">
<strong>Beendet:</strong>
<span id="job-finished-at">
<div class="col-lg-6">
<div class="mb-3">
<div class="small text-muted mb-1">Gestartet</div>
<div>
{{ job.startedAt ? job.startedAt|date('d.m.Y H:i:s') : '-' }}
</div>
</div>
<div class="mb-3">
<div class="small text-muted mb-1">Beendet</div>
<div id="job-finished-at">
{{ job.finishedAt ? job.finishedAt|date('d.m.Y H:i:s') : '-' }}
</span>
</div>
</div>
<div class="mb-2">
<strong>Gestartet von:</strong>
{{ job.startedBy ? job.startedBy.email : '-' }}
<div class="mb-3">
<div class="small text-muted mb-1">Gestartet von</div>
<div>{{ job.startedBy ? job.startedBy.email : '-' }}</div>
</div>
<div class="mb-0">
<div class="small text-muted mb-1">Polling</div>
<div class="small text-light">
{% if isActiveJob %}
Status wird automatisch aktualisiert.
{% else %}
Kein Live-Polling nötig.
{% endif %}
</div>
</div>
</div>
</div>
{# Loader #}
<div id="job-loader"
class="mt-3 d-none">
class="mt-4 {% if not isActiveJob %}d-none{% endif %}">
<div class="d-flex align-items-center gap-2">
<div class="spinner-border spinner-border-sm text-info" role="status"></div>
<div>
@@ -79,10 +162,10 @@
</div>
</div>
{# Fehlerbereich #}
<div id="job-error"
class="alert alert-danger mt-3 {% if not job.errorMessage %}d-none{% endif %}">
class="alert alert-danger mt-4 {% if not job.errorMessage %}d-none{% endif %}">
{% if job.errorMessage %}
<strong>Fehler:</strong><br>
{{ job.errorMessage }}
{% endif %}
</div>
@@ -91,13 +174,13 @@
</div>
<div class="mt-4 small text-secondary">
Hinweis: Bei DOCUMENT_VERSION_ACTIVATE-Jobs wird ein vollständiger
NDJSON-Rebuild und FAISS-Reindex durchgeführt.
Hinweis: Bei <strong>DOCUMENT_VERSION_ACTIVATE</strong>-Jobs wird ein vollständiger
NDJSON-Rebuild und FAISS-Reindex durchgeführt. Bei <strong>GLOBAL_REINDEX</strong>
wird der gesamte Wissensindex neu aufgebaut.
</div>
<script>
(function () {
const statusUrl = {{ path('admin_job_status', {id: job.id})|json_encode|raw }};
const badgeWrap = document.getElementById('job-status-badge');
const finishedAtEl = document.getElementById('job-finished-at');
@@ -106,18 +189,26 @@
let timer = null;
function escapeHtml(value) {
return String(value)
.replaceAll('&', '&amp;')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&#039;');
}
function renderBadge(status) {
const map = {
COMPLETED: 'bg-success',
QUEUED: 'bg-secondary',
RUNNING: 'bg-warning text-dark',
FAILED: 'bg-danger',
ABORTED: 'bg-dark'
ABORTED: 'bg-dark border border-danger text-danger'
};
const css = map[status] || 'bg-secondary';
badgeWrap.innerHTML =
`<span class="badge ${css}">${status}</span>`;
const css = map[status] || 'bg-dark border border-secondary';
badgeWrap.innerHTML = `<span class="badge ${css}">${escapeHtml(status || 'UNKNOWN')}</span>`;
}
function stopPolling() {
@@ -127,18 +218,37 @@
}
}
function renderError(message) {
if (!message) {
errorEl.classList.add('d-none');
errorEl.innerHTML = '';
return;
}
errorEl.classList.remove('d-none');
errorEl.innerHTML = `<strong>Fehler:</strong><br>${escapeHtml(message)}`;
}
async function poll() {
try {
const res = await fetch(statusUrl);
if (!res.ok) return;
const res = await fetch(statusUrl, {
headers: {
'Accept': 'application/json'
},
cache: 'no-store'
});
if (!res.ok) {
stopPolling();
return;
}
const data = await res.json();
const status = (data.status || '').toUpperCase();
const status = String(data.status || '').toUpperCase();
renderBadge(status);
finishedAtEl.textContent =
data.finishedAt
finishedAtEl.textContent = data.finishedAt
? new Date(data.finishedAt).toLocaleString('de-DE')
: '-';
@@ -149,25 +259,21 @@
stopPolling();
}
if (status === 'FAILED' && data.errorMessage) {
errorEl.classList.remove('d-none');
errorEl.innerHTML =
`<strong>Fehler:</strong><br>${data.errorMessage}`;
if (status === 'FAILED' || status === 'ABORTED') {
renderError(data.errorMessage || '');
} else {
renderError('');
}
} catch (e) {
stopPolling();
}
}
// Initial render from server state
renderBadge("{{ job.status|upper }}");
renderBadge({{ jobStatus|json_encode|raw }});
if (["QUEUED", "RUNNING"].includes("{{ job.status|upper }}")) {
loaderEl.classList.remove('d-none');
if ({{ isActiveJob ? 'true' : 'false' }}) {
timer = setInterval(poll, 2000);
}
})();
</script>

View File

@@ -4,27 +4,31 @@
{% block body %}
{# ========================================================= #}
{# LIVE REBUILD STATUS (SSE) #}
{# ========================================================= #}
<div id="rebuild-status" class="mb-5">
<div class="alert alert-secondary shadow-sm">
<div id="rebuild-status" class="mb-4">
{% if latestJob %}
<div class="alert alert-secondary shadow-sm mb-0">
Status wird geladen…
</div>
{% endif %}
</div>
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0">
<div>
<h1 class="h3 mb-1">
<i class="bi bi-tag-fill"></i> Tag: {{ tag.label }}
</h1>
<div class="small text-muted">
Slug: <code>{{ tag.slug }}</code>
</div>
</div>
<a href="{{ path('admin_tags_index') }}"
class="btn btn-sm btn-outline-secondary">
Zurück
</a>
</div>
<script>
const statusBox = document.getElementById('rebuild-status');
const source = new EventSource("{{ path('admin_tags_rebuild_stream') }}");
@@ -35,9 +39,9 @@
if (data.status === '{{ statusRunning }}') {
html = `
<div class="alert alert-info shadow-sm d-flex justify-content-between align-items-center">
<div class="alert alert-info shadow-sm d-flex justify-content-between align-items-center mb-0">
<div>
Tag-Rebuild läuft<br>
<strong>Tag-Rebuild läuft</strong><br>
${data.startedAt ? 'Gestartet: ' + new Date(data.startedAt).toLocaleString() : ''}
</div>
<div class="spinner-border spinner-border-sm"></div>
@@ -45,20 +49,20 @@
`;
} else if (data.status === '{{ statusQueued }}') {
html = `
<div class="alert alert-secondary shadow-sm">
Tag-Rebuild in Warteschlange
<div class="alert alert-secondary shadow-sm mb-0">
<strong>Tag-Rebuild in Warteschlange</strong>
</div>
`;
} else if (data.status === '{{ statusCompleted }}') {
html = `
<div class="alert alert-success shadow-sm">
<div class="alert alert-success shadow-sm mb-0">
<i class="bi bi-check-lg"></i> Tag-Rebuild erfolgreich abgeschlossen
</div>
`;
} else if (data.status === '{{ statusFailed }}') {
html = `
<div class="alert alert-danger shadow-sm">
Tag-Rebuild fehlgeschlagen<br>
<div class="alert alert-danger shadow-sm mb-0">
<strong>Tag-Rebuild fehlgeschlagen</strong><br>
${data.error ? '<code>' + data.error + '</code>' : ''}
</div>
`;
@@ -70,48 +74,96 @@
source.onerror = function () {
console.warn('SSE Verbindung verloren');
};
window.addEventListener('beforeunload', function () {
source.close();
});
</script>
{# ============================= #}
{# Flash Messages #}
{# ============================= #}
{% for message in app.flashes('success') %}
<div class="alert alert-success">
<div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('danger') %}
<div class="alert alert-danger">
<div class="alert alert-danger shadow-sm">
{{ message }}
</div>
{% endfor %}
{# ============================= #}
{# Tag → Dokumente #}
{# ============================= #}
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Einordnung des Tags</h5>
<div class="mb-2">
{% if tag.type == 'catalog_entity' %}
<span class="badge text-bg-info">Catalog Entity</span>
{% elseif tag.type == 'sales_signal' %}
<span class="badge text-bg-warning">Sales Signal</span>
{% else %}
<span class="badge text-bg-secondary">Generic</span>
{% endif %}
</div>
<p class="small mb-2">
{{ tag.description ?: 'Keine Beschreibung hinterlegt.' }}
</p>
<p class="small text-muted mb-0">
Weise diesen Tag nur Dokumenten zu, die fachlich wirklich denselben Gegenstand,
dieselbe Produktfamilie oder denselben Anwendungsfall abbilden.
Zu breite Zuweisungen machen das Routing weicher.
</p>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Aktueller Stand</h5>
<div class="d-flex flex-wrap gap-2">
<span class="badge text-bg-dark border border-secondary">
Zugewiesen: {{ assignedDocIds|length }}
</span>
<span class="badge text-bg-dark border border-secondary">
Verfügbar: {{ documents|length }}
</span>
<span class="badge text-bg-dark border border-secondary">
Nicht zugewiesen: {{ documents|length - assignedDocIds|length }}
</span>
</div>
</div>
</div>
</div>
<form method="post">
<input type="hidden"
name="_token"
value="{{ csrf_token('assign_tag_' ~ tag.id) }}">
<div class="card bg-black border-secondary">
<div class="card-body p-0 row">
<div class="row g-4">
<div class="col-lg-6">
<div class="card bg-black border-secondary shadow-sm h-100">
<div class="card-header bg-secondary-subtle text-dark fw-semibold">
Zugewiesene Dokumente
</div>
<div class="card-body p-0">
<div class="table-responsive">
<table class="table table-dark table-striped table-hover mb-0 align-middle">
<thead class="table-secondary text-dark">
<tr>
<th style="width:60px;"><i class="bi bi-three-dots"></i></th>
<th>Zugewiesene Dokumente</th>
<th style="width: 60px;">
<i class="bi bi-check2-square"></i>
</th>
<th>Dokument</th>
</tr>
</thead>
<tbody>
{% set hasAssigned = false %}
{% for doc in documents %}
{% if doc.id in assignedDocIds %}
{% set hasAssigned = true %}
<tr>
<td>
<input type="checkbox"
@@ -119,51 +171,82 @@
value="{{ doc.id }}"
checked>
</td>
<td>
<td class="fw-semibold">
{{ doc.title }}
</td>
</tr>
{% endif %}
{% endfor %}
{% if not hasAssigned %}
<tr>
<td colspan="2" class="text-center text-muted p-4">
Noch keine Dokumente zugewiesen.
</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
</div>
</div>
</div>
<div class="col-lg-6">
<table class="table table-dark table-striped table-hover mb-0 align-middle col-lg-6">
<div class="card bg-black border-secondary shadow-sm h-100">
<div class="card-header bg-secondary-subtle text-dark fw-semibold">
Verfügbare Dokumente
</div>
<div class="card-body p-0">
<div class="table-responsive">
<table class="table table-dark table-striped table-hover mb-0 align-middle">
<thead class="table-secondary text-dark">
<tr>
<th style="width:60px;"><i class="bi bi-three-dots"></i></th>
<th>Nicht zugewiesene Dokumente</th>
<th style="width: 60px;">
<i class="bi bi-square"></i>
</th>
<th>Dokument</th>
</tr>
</thead>
<tbody>
{% set hasUnassigned = false %}
{% for doc in documents %}
{% if doc.id not in assignedDocIds %}
{% set hasUnassigned = true %}
<tr>
<td>
<input type="checkbox"
name="documents[]"
value="{{ doc.id }}"
>
value="{{ doc.id }}">
</td>
<td class="opacity-50">
<td class="opacity-75">
{{ doc.title }}
</td>
</tr>
{% endif %}
{% endfor %}
{% if not hasUnassigned %}
<tr>
<td colspan="2" class="text-center text-muted p-4">
Keine weiteren aktiven Dokumente verfügbar.
</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
</div>
</div>
</div>
</div>
<button class="btn btn-primary mt-3">
Speichern
<div class="d-flex justify-content-end mt-4">
<button class="btn btn-primary">
Zuweisungen speichern
</button>
</div>
</form>
{% endblock %}

View File

@@ -4,77 +4,52 @@
{% block body %}
{# ========================================================= #}
{# LIVE REBUILD STATUS (SSE) #}
{# ========================================================= #}
<div id="rebuild-status" class="mb-5">
<div id="rebuild-status" class="mb-4">
{% if latestJob %}
<div class="alert alert-secondary shadow-sm">
<div class="alert alert-secondary shadow-sm mb-0">
Status wird geladen…
</div>
{% endif %}
</div>
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0"><i class="bi bi-tag-fill"></i> Tag-Management</h1>
<h1 class="h3 mb-0">
<i class="bi bi-tag-fill"></i> Tag-Management
</h1>
</div>
{# ========================================================= #}
{# TAG SYSTEM DESCRIPTION #}
{# ========================================================= #}
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row">
<div class="card-body row g-4">
<div class="col-lg-6">
<h5 class="text-info mb-3">Was machen Tags im System?</h5>
<p class="small text-light mb-2">
Tags dienen als semantische Routing-Ebene innerhalb des RAG-Systems.
Sie strukturieren Dokumente thematisch und beeinflussen,
welche Inhalte bei einer Nutzeranfrage priorisiert werden.
Tags sind die semantische Routing-Ebene innerhalb des Systems.
Sie helfen dabei, thematisch passende Dokumenträume schneller zu erkennen
und gute Retrieval-Kandidaten zu priorisieren.
</p>
<ul class="small text-light mb-3">
<li>
Tags werden Dokumenten manuell zugewiesen.
</li>
<li>
Beim Rebuild wird aus allen Tags eine eigene
<code>tags.ndjson</code> erzeugt.
</li>
<li>
Zusätzlich wird ein separater Vektorindex
(<code>vector_tags.index</code>) aufgebaut.
</li>
<li>
Bei einer Anfrage erfolgt zunächst ein Tag-Matching,
danach wird das Chunk-Retrieval entsprechend gewichtet.
</li>
<ul class="small text-light mb-0">
<li>Tags werden Dokumenten manuell zugewiesen.</li>
<li>Beim Rebuild wird aus den aktiven Tag-Zuordnungen eine <code>tags.ndjson</code> erzeugt.</li>
<li>Zusätzlich wird ein eigener Tag-Vektorindex (<code>vector_tags.index</code>) gebaut.</li>
<li>Bei Anfragen erfolgt zunächst ein semantisches Tag-Matching, danach das eigentliche Chunk-Retrieval.</li>
</ul>
</div>
<div class="col-lg-6">
<h6 class="text-info mt-3">Wie werden Tags bewertet?</h6>
<h5 class="text-info mb-3">Was ist gutes Tagging?</h5>
<p class="small text-light mb-2">
Die Bewertung erfolgt über einen eigenen Vektor-Similarity-Score
im Tag-Index. Das System berechnet:
</p>
<ul class="small text-light">
<li>
Ähnlichkeit zwischen Nutzeranfrage und Tag-Embedding
</li>
<li>
Top-K Treffer im Tag-Index
</li>
<li>
Gewichtete Übergabe an das Chunk-Retrieval
</li>
<ul class="small text-light mb-3">
<li><strong>Präzise statt generisch:</strong> lieber <code>Produktnamen</code> als <code>Gerät</code>.</li>
<li><strong>Fachlich sauber:</strong> Tags sollen echte Produktfamilien, Anwendungsfälle oder Entitäten abbilden.</li>
<li><strong>Wenig Überschneidung:</strong> keine unnötig breiten oder doppeldeutigen Tags.</li>
<li><strong>Bewusst typisieren:</strong> <code>catalog_entity</code> für echte Katalog-/Entity-Tags, <code>generic</code> nur für allgemeine Zusatzsemantik.</li>
</ul>
<p class="small text-light mt-2 mb-0">
Tags wirken somit als semantischer Verstärker.
Sie ersetzen kein Chunk-Retrieval, sondern steuern dessen Priorisierung.
<p class="small text-warning mb-0">
Zu breite Tags wie „Produkt“, „System“ oder „Gerät“ machen das Routing weicher
und bringen meist weniger Nutzen als präzise fachliche Tags.
</p>
</div>
</div>
@@ -90,9 +65,9 @@
if (data.status === '{{ statusRunning }}') {
html = `
<div class="alert alert-info shadow-sm d-flex justify-content-between align-items-center">
<div class="alert alert-info shadow-sm d-flex justify-content-between align-items-center mb-0">
<div>
Tag-Rebuild läuft<br>
<strong>Tag-Rebuild läuft</strong><br>
${data.startedAt ? 'Gestartet: ' + new Date(data.startedAt).toLocaleString() : ''}
</div>
<div class="spinner-border spinner-border-sm"></div>
@@ -100,20 +75,20 @@
`;
} else if (data.status === '{{ statusQueued }}') {
html = `
<div class="alert alert-secondary shadow-sm">
Tag-Rebuild in Warteschlange
<div class="alert alert-secondary shadow-sm mb-0">
<strong>Tag-Rebuild in Warteschlange</strong>
</div>
`;
} else if (data.status === '{{ statusCompleted }}') {
html = `
<div class="alert alert-success shadow-sm">
<div class="alert alert-success shadow-sm mb-0">
<i class="bi bi-check-lg"></i> Tag-Rebuild erfolgreich abgeschlossen
</div>
`;
} else if (data.status === '{{ statusFailed }}') {
html = `
<div class="alert alert-danger shadow-sm">
Tag-Rebuild fehlgeschlagen<br>
<div class="alert alert-danger shadow-sm mb-0">
<strong>Tag-Rebuild fehlgeschlagen</strong><br>
${data.error ? '<code>' + data.error + '</code>' : ''}
</div>
`;
@@ -125,11 +100,12 @@
source.onerror = function () {
console.warn('SSE Verbindung verloren');
};
window.addEventListener('beforeunload', function () {
source.close();
});
</script>
{# ========================================================= #}
{# Create Tag Card #}
{# ========================================================= #}
<div class="card bg-black border-secondary text-light mb-4 shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3">Neuen Tag hinzufügen</h5>
@@ -153,24 +129,26 @@
required/>
</div>
<div class="col-md-4">
<label class="form-label small text-muted">Beschreibung</label>
<input class="form-control form-control-sm"
name="description"
placeholder="Semantische Beschreibung des Tags"
required/>
</div>
<div class="mb-3">
<label class="form-label">Type</label>
<select name="type" class="form-select">
<option value="generic">Generic</option>
<option value="catalog_entity">Catalog Entity</option>
<option value="sales_signal">Sales Signal</option>
<div class="col-md-3">
<label class="form-label small text-muted">Typ</label>
<select name="type" class="form-select form-select-sm">
{% for choiceLabel, choiceValue in tagTypeChoices %}
<option value="{{ choiceValue }}"
{% if choiceValue == 'generic' %}selected{% endif %}>
{{ choiceLabel }}
</option>
{% endfor %}
</select>
</div>
<div class="col-md-2 d-grid align-items-end">
<div class="col-md-3">
<label class="form-label small text-muted">Beschreibung</label>
<input class="form-control form-control-sm"
name="description"
placeholder="Optional: fachlicher Kontext des Tags"/>
</div>
<div class="col-12 d-grid d-md-flex justify-content-md-end">
<button class="btn btn-sm btn-outline-info">
Anlegen
</button>
@@ -179,36 +157,56 @@
</div>
</div>
{# ========================================================= #}
{# Tag Table #}
{# ========================================================= #}
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body">
<div class="mb-3">
<div class="mb-3 d-flex justify-content-between align-items-center flex-wrap gap-2">
<div>
<strong class="text-info">Vorhandene Tags:</strong>
<span class="text-muted small ms-2">
{{ tags|length }} Einträge
</span>
</div>
<div class="small text-muted">
Dokumentanzahl bezieht sich auf aktive Dokumente.
</div>
</div>
<div class="table-responsive">
<table class="table table-dark table-striped table-hover mb-0 align-middle">
<thead class="table-secondary text-dark">
<tr>
<th style="width: 25%">Label</th>
<th style="width: 25%">Slug</th>
<th style="width: 35%">Beschreibung</th>
<th style="width: 18%">Label</th>
<th style="width: 18%">Slug</th>
<th style="width: 14%">Typ</th>
<th style="width: 10%">Aktive Dokumente</th>
<th style="width: 25%">Beschreibung</th>
<th class="text-end" style="width: 15%">Aktion</th>
</tr>
</thead>
<tbody>
{% for tag in tags %}
{% set activeDocumentCount = documentCountByTagId[tag.id.toRfc4122] ?? 0 %}
<tr>
<td class="fw-semibold">{{ tag.label }}</td>
<td><code>{{ tag.slug }}</code></td>
<td>
{% if tag.type == 'catalog_entity' %}
<span class="badge text-bg-info">Catalog Entity</span>
{% elseif tag.type == 'sales_signal' %}
<span class="badge text-bg-warning">Sales Signal</span>
{% else %}
<span class="badge text-bg-secondary">Generic</span>
{% endif %}
</td>
<td>
<span class="badge text-bg-dark border border-secondary">
{{ activeDocumentCount }}
</span>
</td>
<td>{{ tag.description ?: '-' }}</td>
<td class="text-end">
<a href="{{ path('admin_tags_assign', { id: tag.id }) }}"
class="btn btn-sm btn-outline-info me-2">
Zuweisen
@@ -217,7 +215,6 @@
<form method="post"
action="{{ path('admin_tags_delete', {id: tag.id}) }}"
style="display:inline-block;">
<input type="hidden"
name="_token"
value="{{ csrf_token('admin_tag_delete_' ~ tag.id) }}"/>
@@ -227,18 +224,18 @@
Löschen
</button>
</form>
</td>
</tr>
{% else %}
<tr>
<td colspan="4" class="p-4 text-center text-muted">
<td colspan="6" class="p-4 text-center text-muted">
Noch keine Tags vorhanden.
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>