first commit

This commit is contained in:
team 1
2026-04-20 16:36:28 +02:00
parent a0ec07a99c
commit 2587ac8b4b
41 changed files with 5126 additions and 2280 deletions

View File

@@ -1,42 +1,44 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys
import json import json
import sys
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Tuple
def fail(message: str, code: int) -> None:
print(f"ERROR: {message}", file=sys.stderr)
sys.exit(code)
# --------------------------------------------------------- # ---------------------------------------------------------
# Positional args # Positional args
# 1 tags.ndjson # 1 tags.ndjson
# 2 out_index_path (can be .tmp) # 2 out_index_path (can be .tmp)
# --------------------------------------------------------- # ---------------------------------------------------------
if len(sys.argv) < 3: if len(sys.argv) < 3:
print("ERROR: usage: vector_ingest_tags.py <tags.ndjson> <out.index>", file=sys.stderr) fail("usage: vector_ingest_tags.py <tags.ndjson> <out.index>", 2)
sys.exit(2)
tags_path = Path(sys.argv[1]).resolve() tags_path = Path(sys.argv[1]).resolve()
out_path = Path(sys.argv[2]).resolve() out_path = Path(sys.argv[2]).resolve()
meta_path = Path(str(out_path) + ".meta.json") meta_path = Path(str(out_path) + ".meta.json")
# --------------------------------------------------------- # ---------------------------------------------------------
# Dependency checks # Dependency checks
# --------------------------------------------------------- # ---------------------------------------------------------
try: try:
import faiss import faiss
except Exception: except Exception:
print("ERROR: Python module 'faiss' not found.", file=sys.stderr) fail("Python module 'faiss' not found.", 10)
sys.exit(10)
try: try:
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
except Exception: except Exception:
print("ERROR: Python module 'sentence-transformers' not found.", file=sys.stderr) fail("Python module 'sentence-transformers' not found.", 11)
sys.exit(11)
import numpy as np import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
# --------------------------------------------------------- # ---------------------------------------------------------
# Load embedding model from index_meta.json (Single Source of Truth) # Load embedding model from index_meta.json (Single Source of Truth)
@@ -45,64 +47,122 @@ BASE_PATH = Path(__file__).resolve().parents[2]
INDEX_META_PATH = BASE_PATH / "var" / "knowledge" / "index_meta.json" INDEX_META_PATH = BASE_PATH / "var" / "knowledge" / "index_meta.json"
if not INDEX_META_PATH.exists(): if not INDEX_META_PATH.exists():
print("ERROR: index_meta.json not found", file=sys.stderr) fail("index_meta.json not found", 30)
sys.exit(30)
try:
meta = json.loads(INDEX_META_PATH.read_text(encoding="utf-8")) meta = json.loads(INDEX_META_PATH.read_text(encoding="utf-8"))
except Exception:
fail("index_meta.json is invalid", 30)
embedding_model = meta.get("embedding_model") embedding_model = meta.get("embedding_model")
if not isinstance(embedding_model, str) or embedding_model.strip() == "":
fail("embedding_model missing in index_meta.json", 31)
if not embedding_model: model = SentenceTransformer(embedding_model.strip())
print("ERROR: embedding_model missing in index_meta.json", file=sys.stderr)
sys.exit(31)
model = SentenceTransformer(embedding_model)
# --------------------------------------------------------- # ---------------------------------------------------------
# File checks # File checks
# --------------------------------------------------------- # ---------------------------------------------------------
if not tags_path.is_file(): if not tags_path.is_file():
print(f"ERROR: tags.ndjson not found at {tags_path}", file=sys.stderr) fail(f"tags.ndjson not found at {tags_path}", 20)
sys.exit(20)
out_path.parent.mkdir(parents=True, exist_ok=True) out_path.parent.mkdir(parents=True, exist_ok=True)
# ---------------------------------------------------------
# Helpers
# ---------------------------------------------------------
def cleanup_outputs() -> None:
if out_path.exists():
out_path.unlink()
if meta_path.exists():
meta_path.unlink()
def normalize_text(value: Any) -> str:
text = str(value).strip()
text = " ".join(text.split())
if len(text) > 4000:
text = text[:4000].rstrip()
return text
# --------------------------------------------------------- # ---------------------------------------------------------
# Streaming read NDJSON # Streaming read NDJSON
# --------------------------------------------------------- # ---------------------------------------------------------
texts = [] def load_rows(path: Path) -> Tuple[List[str], List[str], Dict[str, int]]:
ids = [] texts: List[str] = []
ids: List[str] = []
seen_ids = set()
with open(tags_path, "r", encoding="utf-8") as f: stats = {
for line in f: "lines_total": 0,
"lines_empty": 0,
"lines_invalid_json": 0,
"rows_missing_fields": 0,
"rows_duplicate_tag_id": 0,
"rows_accepted": 0,
}
with path.open("r", encoding="utf-8") as handle:
for line in handle:
stats["lines_total"] += 1
line = line.strip() line = line.strip()
if not line:
if line == "":
stats["lines_empty"] += 1
continue continue
try: try:
entry = json.loads(line) entry = json.loads(line)
except Exception: except Exception:
stats["lines_invalid_json"] += 1
continue continue
text = entry.get("text") if not isinstance(entry, dict):
tag_id = entry.get("tag_id") stats["rows_missing_fields"] += 1
if not text or not tag_id:
continue continue
text = str(text) tag_id = str(entry.get("tag_id", "")).strip()
if len(text) > 4000: text = normalize_text(entry.get("text", ""))
text = text[:4000]
if tag_id == "" or text == "":
stats["rows_missing_fields"] += 1
continue
if tag_id in seen_ids:
stats["rows_duplicate_tag_id"] += 1
continue
seen_ids.add(tag_id)
ids.append(tag_id)
texts.append(f"passage: {text}") texts.append(f"passage: {text}")
ids.append(str(tag_id)) stats["rows_accepted"] += 1
return texts, ids, stats
texts, ids, stats = load_rows(tags_path)
print(
json.dumps(
{
"event": "tag_rows_loaded",
**stats,
},
ensure_ascii=False,
),
file=sys.stderr,
)
if not texts: if not texts:
if out_path.exists(): cleanup_outputs()
out_path.unlink()
if meta_path.exists():
meta_path.unlink()
sys.exit(0) sys.exit(0)
# --------------------------------------------------------- # ---------------------------------------------------------
# Build embeddings # Build embeddings
# --------------------------------------------------------- # ---------------------------------------------------------
@@ -110,18 +170,32 @@ embeddings = model.encode(
texts, texts,
normalize_embeddings=True, normalize_embeddings=True,
show_progress_bar=True, show_progress_bar=True,
batch_size=128 batch_size=128,
) )
embeddings = np.array(embeddings).astype("float32") embeddings = np.array(embeddings, dtype="float32")
dim = embeddings.shape[1]
if embeddings.ndim != 2 or embeddings.shape[0] != len(ids) or embeddings.shape[0] == 0:
cleanup_outputs()
fail("tag embeddings have invalid shape", 40)
if embeddings.shape[1] <= 0:
cleanup_outputs()
fail("tag embeddings have invalid dimension", 41)
dim = int(embeddings.shape[1])
index = faiss.IndexFlatIP(dim) index = faiss.IndexFlatIP(dim)
index.add(embeddings) index.add(embeddings)
faiss.write_index(index, str(out_path)) if int(index.ntotal) != len(ids):
cleanup_outputs()
fail("FAISS tag index count does not match meta ids", 42)
with open(meta_path, "w", encoding="utf-8") as f: faiss.write_index(index, str(out_path))
json.dump(ids, f) meta_path.write_text(
json.dumps(ids, ensure_ascii=False),
encoding="utf-8",
)
sys.exit(0) sys.exit(0)

View File

@@ -6,10 +6,10 @@ from logging.handlers import RotatingFileHandler
import threading import threading
import time import time
from pathlib import Path from pathlib import Path
from typing import Any, List, Optional, Dict from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import faiss import faiss
import numpy as np
from fastapi import FastAPI, HTTPException, Request from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from pydantic import BaseModel from pydantic import BaseModel
@@ -20,7 +20,7 @@ from sentence_transformers import SentenceTransformer
# Service Stamp (to verify you are running THIS file) # Service Stamp (to verify you are running THIS file)
# ============================================================ # ============================================================
SERVICE_STAMP = "vector_service.py@2026-02-28T10:20+01:00" SERVICE_STAMP = "vector_service.py@2026-04-20T00:00+02:00"
# ============================================================ # ============================================================
@@ -41,8 +41,6 @@ TAG_MAP_PATH = KNOWLEDGE_DIR / "vector_tags.index.meta.json"
INDEX_META_PATH = KNOWLEDGE_DIR / "index_meta.json" INDEX_META_PATH = KNOWLEDGE_DIR / "index_meta.json"
INDEX_RUNTIME_PATH = KNOWLEDGE_DIR / "index_runtime.json" INDEX_RUNTIME_PATH = KNOWLEDGE_DIR / "index_runtime.json"
INDEX_NDJSON_PATH = KNOWLEDGE_DIR / "index.ndjson" INDEX_NDJSON_PATH = KNOWLEDGE_DIR / "index.ndjson"
# NEW: Tags NDJSON (exported by PHP) used to enrich /search-tags responses
TAGS_NDJSON_PATH = KNOWLEDGE_DIR / "tags.ndjson" TAGS_NDJSON_PATH = KNOWLEDGE_DIR / "tags.ndjson"
@@ -54,6 +52,48 @@ logger = logging.getLogger("vector_service")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
# ============================================================
# App State
# ============================================================
app = FastAPI()
model: Optional[SentenceTransformer] = None
chunk_index = None
chunk_ids: Optional[List[Any]] = None
chunk_doc_map: Dict[str, str] = {}
chunk_pos_map: Dict[str, int] = {}
tag_index = None
tag_ids: Optional[List[Any]] = None
# tag_id -> {"label": "...", "tag_type": "..."}
tag_meta_map: Dict[str, Dict[str, str]] = {}
loaded_embedding_model_name: Optional[str] = None
current_index_version: Optional[int] = None
current_chunk_runtime_stamp: Optional[str] = None
current_tags_runtime_stamp: Optional[str] = None
current_tags_index_present: Optional[bool] = None
reload_lock = threading.Lock()
# ============================================================
# Models
# ============================================================
class SearchRequest(BaseModel):
query: str
limit: int = 8
doc_ids: Optional[List[str]] = None
# ============================================================
# Helpers
# ============================================================
def setup_logging() -> None: def setup_logging() -> None:
LOG_DIR.mkdir(parents=True, exist_ok=True) LOG_DIR.mkdir(parents=True, exist_ok=True)
@@ -77,10 +117,9 @@ def setup_logging() -> None:
if not any(isinstance(h, RotatingFileHandler) for h in logger.handlers): if not any(isinstance(h, RotatingFileHandler) for h in logger.handlers):
logger.addHandler(file_handler) logger.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers): if not any(type(h) is logging.StreamHandler for h in logger.handlers):
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
# Capture uvicorn logs in the same file as well (critical for hidden 500s)
uvicorn_error = logging.getLogger("uvicorn.error") uvicorn_error = logging.getLogger("uvicorn.error")
uvicorn_access = logging.getLogger("uvicorn.access") uvicorn_access = logging.getLogger("uvicorn.access")
@@ -89,62 +128,22 @@ def setup_logging() -> None:
if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_error.handlers): if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_error.handlers):
uvicorn_error.addHandler(file_handler) uvicorn_error.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in uvicorn_error.handlers): if not any(type(h) is logging.StreamHandler for h in uvicorn_error.handlers):
uvicorn_error.addHandler(stream_handler) uvicorn_error.addHandler(stream_handler)
if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_access.handlers): if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_access.handlers):
uvicorn_access.addHandler(file_handler) uvicorn_access.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in uvicorn_access.handlers): if not any(type(h) is logging.StreamHandler for h in uvicorn_access.handlers):
uvicorn_access.addHandler(stream_handler) uvicorn_access.addHandler(stream_handler)
# ============================================================
# FastAPI
# ============================================================
app = FastAPI()
model: Optional[SentenceTransformer] = None
chunk_index = None
chunk_ids: Optional[List[Any]] = None
chunk_doc_map: Dict[str, str] = {}
chunk_pos_map: Dict[str, int] = {}
tag_index = None
tag_ids: Optional[List[Any]] = None
# NEW: tag_id -> {"label": "...", "tag_type": "..."}
tag_meta_map: Dict[str, Dict[str, str]] = {}
loaded_embedding_model_name: Optional[str] = None
current_index_version: Optional[int] = None
current_runtime_stamp: Optional[str] = None
reload_lock = threading.Lock()
# ============================================================
# Models
# ============================================================
class SearchRequest(BaseModel):
query: str
limit: int = 8
doc_ids: Optional[List[str]] = None
# ============================================================
# Loader Helpers
# ============================================================
def _safe_read_json(path: Path) -> Optional[Any]: def _safe_read_json(path: Path) -> Optional[Any]:
try: try:
if not path.exists(): if not path.exists():
return None return None
return json.loads(path.read_text(encoding="utf-8")) return json.loads(path.read_text(encoding="utf-8"))
except Exception as e: except Exception as exc:
logger.warning("Failed to read json %s: %s", str(path), str(e)) logger.warning("Failed to read json %s: %s", str(path), str(exc))
return None return None
@@ -152,25 +151,97 @@ def _as_key(value: Any) -> Optional[str]:
if value is None: if value is None:
return None return None
if isinstance(value, str): if isinstance(value, str):
v = value.strip() value = value.strip()
return v if v else None return value or None
try: try:
v = str(value).strip() value = str(value).strip()
return v if v else None return value or None
except Exception: except Exception:
return None return None
def _sanitize_limit(limit: int, default: int = 8, max_limit: int = 200) -> int: def _sanitize_limit(limit: int, default: int = 8, max_limit: int = 200) -> int:
try: try:
v = int(limit) value = int(limit)
except Exception: except Exception:
return default return default
if v <= 0: if value <= 0:
return default return default
if v > max_limit: if value > max_limit:
return max_limit return max_limit
return v return value
def _normalize_meta_list(value: Any) -> Optional[List[Any]]:
"""
Accepts:
- list: ok
- dict like {"0": "...", "1": "..."}: convert to list sorted by numeric key
Returns None if invalid.
"""
if isinstance(value, list):
return value
if isinstance(value, dict):
try:
keys = sorted(int(key) for key in value.keys())
return [value[str(i)] for i in keys]
except Exception:
return None
return None
def _normalize_tag_type(value: Any) -> str:
normalized = _as_key(value)
if normalized is None:
return "generic"
normalized = normalized.lower()
if normalized in {"generic", "catalog_entity", "sales_signal"}:
return normalized
return "generic"
def _extract_runtime_state(runtime: Any) -> Tuple[Optional[str], Optional[str], Optional[bool]]:
if not isinstance(runtime, dict):
return None, None, None
chunk_runtime = runtime.get("last_rebuild_at")
tags_runtime = runtime.get("last_tags_rebuild_at")
tags_index_present = runtime.get("tags_index_present")
if not isinstance(chunk_runtime, str):
chunk_runtime = None
if not isinstance(tags_runtime, str):
tags_runtime = None
if not isinstance(tags_index_present, bool):
tags_index_present = None
return chunk_runtime, tags_runtime, tags_index_present
def _validate_index_alignment(index_obj: Any, ids: Optional[List[Any]], label: str) -> Tuple[Any, Optional[List[Any]]]:
if index_obj is None or ids is None:
return None, None
try:
index_count = int(index_obj.ntotal)
except Exception:
logger.warning("[Reload] %s index has no ntotal -> disabled", label)
return None, None
if index_count != len(ids):
logger.warning(
"[Reload] %s meta/index mismatch (ids=%s index=%s) -> disabled",
label,
len(ids),
index_count,
)
return None, None
return index_obj, ids
def load_chunk_maps_from_ndjson() -> None: def load_chunk_maps_from_ndjson() -> None:
@@ -183,8 +254,8 @@ def load_chunk_maps_from_ndjson() -> None:
return return
try: try:
with INDEX_NDJSON_PATH.open("r", encoding="utf-8") as f: with INDEX_NDJSON_PATH.open("r", encoding="utf-8") as handle:
for line in f: for line in handle:
line = line.strip() line = line.strip()
if not line: if not line:
continue continue
@@ -201,29 +272,32 @@ def load_chunk_maps_from_ndjson() -> None:
if doc_id_key: if doc_id_key:
chunk_doc_map[chunk_id_key] = doc_id_key chunk_doc_map[chunk_id_key] = doc_id_key
ci = row.get("chunk_index") chunk_index_value = row.get("chunk_index")
if isinstance(ci, int): if isinstance(chunk_index_value, int):
chunk_pos_map[chunk_id_key] = ci chunk_pos_map[chunk_id_key] = chunk_index_value
elif isinstance(ci, str): elif isinstance(chunk_index_value, str):
s = ci.strip() stripped = chunk_index_value.strip()
if s.isdigit(): if stripped.isdigit():
try: try:
chunk_pos_map[chunk_id_key] = int(s) chunk_pos_map[chunk_id_key] = int(stripped)
except Exception: except Exception:
pass pass
except Exception as exc:
except Exception as e: logger.warning("Failed to load chunk maps from ndjson: %s", str(exc))
logger.warning("Failed to load chunk maps from ndjson: %s", str(e))
def load_tag_meta_from_tags_ndjson() -> None: def load_tag_meta_from_tags_ndjson() -> None:
""" """
Loads minimal tag metadata from tags.ndjson to enrich /search-tags results. Loads minimal tag metadata from tags.ndjson to enrich /search-tags results.
Expected line format (from PHP exporter / ingester pipeline): Expected line format:
{"tag_id":"...","text":"LABEL\\nSLUG\\noptional description", ...} {
We extract: "tag_id": "...",
label = first line of "text" (fallback: "") "text": "LABEL\\nSLUG\\noptional description",
tag_type = "type" if present (preferred), else "generic" "type": "catalog_entity|generic|sales_signal",
"document_ids": ["..."]
}
Only tags with at least one exported document id are kept.
""" """
global tag_meta_map global tag_meta_map
@@ -234,11 +308,12 @@ def load_tag_meta_from_tags_ndjson() -> None:
return return
try: try:
with TAGS_NDJSON_PATH.open("r", encoding="utf-8") as f: with TAGS_NDJSON_PATH.open("r", encoding="utf-8") as handle:
for line in f: for line in handle:
line = line.strip() line = line.strip()
if not line: if not line:
continue continue
try: try:
row = json.loads(line) row = json.loads(line)
except Exception: except Exception:
@@ -248,55 +323,33 @@ def load_tag_meta_from_tags_ndjson() -> None:
if not tag_id: if not tag_id:
continue continue
# Prefer explicit fields if present document_ids = row.get("document_ids")
ttype = row.get("type") if isinstance(document_ids, list) and len(document_ids) == 0:
if isinstance(ttype, str) and ttype.strip(): continue
tag_type = ttype.strip()
else:
tag_type = "generic"
tag_type = _normalize_tag_type(row.get("type"))
label = "" label = ""
txt = row.get("text")
if isinstance(txt, str) and txt.strip():
first = txt.splitlines()[0].strip() if txt.splitlines() else ""
label = first
if label: text_value = row.get("text")
tag_meta_map[tag_id] = {"label": label, "tag_type": tag_type} if isinstance(text_value, str) and text_value.strip():
else: first_line = text_value.splitlines()[0].strip() if text_value.splitlines() else ""
tag_meta_map[tag_id] = {"label": "", "tag_type": tag_type} label = first_line
except Exception as e: tag_meta_map[tag_id] = {
logger.warning("Failed to load tag meta from tags.ndjson: %s", str(e)) "label": label,
"tag_type": tag_type,
}
except Exception as exc:
logger.warning("Failed to load tag meta from tags.ndjson: %s", str(exc))
tag_meta_map = {} tag_meta_map = {}
def _normalize_meta_list(value: Any) -> Optional[List[Any]]:
"""
Accepts:
- list: ok
- dict like {"0": "...", "1": "..."}: convert to list sorted by numeric key
Returns None if invalid.
"""
if isinstance(value, list):
return value
if isinstance(value, dict):
try:
keys = sorted(int(k) for k in value.keys())
return [value[str(i)] for i in keys]
except Exception:
return None
return None
def load_all() -> None: def load_all() -> None:
global model, chunk_index, chunk_ids global model, chunk_index, chunk_ids
global tag_index, tag_ids global tag_index, tag_ids
global loaded_embedding_model_name global loaded_embedding_model_name
global current_index_version global current_index_version
global current_runtime_stamp global current_chunk_runtime_stamp, current_tags_runtime_stamp, current_tags_index_present
with reload_lock: with reload_lock:
meta = _safe_read_json(INDEX_META_PATH) meta = _safe_read_json(INDEX_META_PATH)
@@ -314,15 +367,21 @@ def load_all() -> None:
model = SentenceTransformer(embedding_model_name) model = SentenceTransformer(embedding_model_name)
loaded_embedding_model_name = embedding_model_name loaded_embedding_model_name = embedding_model_name
runtime = _safe_read_json(INDEX_RUNTIME_PATH)
chunk_runtime_stamp, tags_runtime_stamp, tags_index_present = _extract_runtime_state(runtime)
# Chunks # Chunks
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists(): if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
logger.info("[Reload] Loading chunk index") logger.info("[Reload] Loading chunk index")
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH)) loaded_chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
raw = _safe_read_json(CHUNK_MAP_PATH) raw_chunk_meta = _safe_read_json(CHUNK_MAP_PATH)
chunk_ids = _normalize_meta_list(raw) loaded_chunk_ids = _normalize_meta_list(raw_chunk_meta)
if chunk_ids is None: if loaded_chunk_ids is None:
chunk_index = None chunk_index = None
chunk_ids = None
logger.warning("[Reload] chunk_ids meta invalid -> chunk index disabled") logger.warning("[Reload] chunk_ids meta invalid -> chunk index disabled")
else:
chunk_index, chunk_ids = _validate_index_alignment(loaded_chunk_index, loaded_chunk_ids, "chunk")
else: else:
chunk_index = None chunk_index = None
chunk_ids = None chunk_ids = None
@@ -331,35 +390,38 @@ def load_all() -> None:
load_chunk_maps_from_ndjson() load_chunk_maps_from_ndjson()
# Tags # Tags
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists(): should_load_tag_index = tags_index_present is not False
if should_load_tag_index and TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
logger.info("[Reload] Loading tag index") logger.info("[Reload] Loading tag index")
tag_index = faiss.read_index(str(TAG_INDEX_PATH)) loaded_tag_index = faiss.read_index(str(TAG_INDEX_PATH))
raw = _safe_read_json(TAG_MAP_PATH) raw_tag_meta = _safe_read_json(TAG_MAP_PATH)
tag_ids = _normalize_meta_list(raw) loaded_tag_ids = _normalize_meta_list(raw_tag_meta)
if tag_ids is None: if loaded_tag_ids is None:
tag_index = None tag_index = None
tag_ids = None
logger.warning("[Reload] tag_ids meta invalid -> tag index disabled") logger.warning("[Reload] tag_ids meta invalid -> tag index disabled")
else:
tag_index, tag_ids = _validate_index_alignment(loaded_tag_index, loaded_tag_ids, "tag")
else: else:
tag_index = None tag_index = None
tag_ids = None tag_ids = None
if tags_index_present is False:
logger.info("[Reload] Runtime marks tags index as absent -> tag index disabled")
# NEW: load tag meta for enrichment
logger.info("[Reload] Loading tag meta from tags.ndjson") logger.info("[Reload] Loading tag meta from tags.ndjson")
load_tag_meta_from_tags_ndjson() load_tag_meta_from_tags_ndjson()
runtime = _safe_read_json(INDEX_RUNTIME_PATH)
if isinstance(runtime, dict):
v = runtime.get("last_rebuild_at")
current_runtime_stamp = v if isinstance(v, str) else None
else:
current_runtime_stamp = None
current_index_version = index_version if isinstance(index_version, int) else None current_index_version = index_version if isinstance(index_version, int) else None
current_chunk_runtime_stamp = chunk_runtime_stamp
current_tags_runtime_stamp = tags_runtime_stamp
current_tags_index_present = tags_index_present
logger.info( logger.info(
"[Reload] Completed (index_version=%s runtime=%s embedding_model=%s tag_meta=%s stamp=%s file=%s)", "[Reload] Completed (index_version=%s chunk_runtime=%s tags_runtime=%s tags_index_present=%s embedding_model=%s tag_meta=%s stamp=%s file=%s)",
str(current_index_version), str(current_index_version),
str(current_runtime_stamp), str(current_chunk_runtime_stamp),
str(current_tags_runtime_stamp),
str(current_tags_index_present),
str(loaded_embedding_model_name), str(loaded_embedding_model_name),
str(len(tag_meta_map)), str(len(tag_meta_map)),
SERVICE_STAMP, SERVICE_STAMP,
@@ -373,7 +435,7 @@ def load_all() -> None:
def observer_loop() -> None: def observer_loop() -> None:
global current_index_version global current_index_version
global current_runtime_stamp global current_chunk_runtime_stamp, current_tags_runtime_stamp, current_tags_index_present
while True: while True:
time.sleep(2) time.sleep(2)
@@ -384,28 +446,50 @@ def observer_loop() -> None:
continue continue
new_version = meta.get("index_version") if isinstance(meta.get("index_version"), int) else None new_version = meta.get("index_version") if isinstance(meta.get("index_version"), int) else None
runtime = _safe_read_json(INDEX_RUNTIME_PATH) runtime = _safe_read_json(INDEX_RUNTIME_PATH)
new_runtime = None new_chunk_runtime, new_tags_runtime, new_tags_index_present = _extract_runtime_state(runtime)
if isinstance(runtime, dict):
v = runtime.get("last_rebuild_at")
new_runtime = v if isinstance(v, str) else None
if new_version != current_index_version: if new_version != current_index_version:
logger.info("[Observer] index_version changed (%s -> %s) -> Reload", str(current_index_version), str(new_version)) logger.info(
"[Observer] index_version changed (%s -> %s) -> Reload",
str(current_index_version),
str(new_version),
)
load_all() load_all()
continue continue
if new_runtime != current_runtime_stamp: if new_chunk_runtime != current_chunk_runtime_stamp:
logger.info("[Observer] runtime changed (%s -> %s) -> Reload", str(current_runtime_stamp), str(new_runtime)) logger.info(
"[Observer] chunk runtime changed (%s -> %s) -> Reload",
str(current_chunk_runtime_stamp),
str(new_chunk_runtime),
)
load_all()
continue
if new_tags_runtime != current_tags_runtime_stamp:
logger.info(
"[Observer] tags runtime changed (%s -> %s) -> Reload",
str(current_tags_runtime_stamp),
str(new_tags_runtime),
)
load_all()
continue
if new_tags_index_present != current_tags_index_present:
logger.info(
"[Observer] tags_index_present changed (%s -> %s) -> Reload",
str(current_tags_index_present),
str(new_tags_index_present),
)
load_all() load_all()
except Exception as e: except Exception as exc:
logger.exception("[Observer ERROR] %s", str(e)) logger.exception("[Observer ERROR] %s", str(exc))
# ============================================================ # ============================================================
# Global Exception Handler (forces JSON + logs) # Global Exception Handler
# ============================================================ # ============================================================
@app.exception_handler(Exception) @app.exception_handler(Exception)
@@ -427,12 +511,12 @@ async def unhandled_exception_handler(request: Request, exc: Exception):
# ============================================================ # ============================================================
@app.on_event("startup") @app.on_event("startup")
def startup_event(): def startup_event() -> None:
setup_logging() setup_logging()
logger.info("[VectorService] Startup stamp=%s file=%s", SERVICE_STAMP, str(Path(__file__).resolve())) logger.info("[VectorService] Startup stamp=%s file=%s", SERVICE_STAMP, str(Path(__file__).resolve()))
load_all() load_all()
t = threading.Thread(target=observer_loop, daemon=True) observer = threading.Thread(target=observer_loop, daemon=True)
t.start() observer.start()
logger.info("[VectorService] Ready (log=%s)", str(LOG_FILE)) logger.info("[VectorService] Ready (log=%s)", str(LOG_FILE))
@@ -441,7 +525,7 @@ def startup_event():
# ============================================================ # ============================================================
@app.get("/health") @app.get("/health")
def health(): def health() -> Dict[str, Any]:
return { return {
"status": "ok", "status": "ok",
"stamp": SERVICE_STAMP, "stamp": SERVICE_STAMP,
@@ -451,7 +535,9 @@ def health():
"model_loaded": model is not None, "model_loaded": model is not None,
"embedding_model": loaded_embedding_model_name, "embedding_model": loaded_embedding_model_name,
"index_version": current_index_version, "index_version": current_index_version,
"runtime_stamp": current_runtime_stamp, "chunk_runtime_stamp": current_chunk_runtime_stamp,
"tags_runtime_stamp": current_tags_runtime_stamp,
"tags_index_present": current_tags_index_present,
"tag_meta_type": type(tag_ids).__name__ if tag_ids is not None else None, "tag_meta_type": type(tag_ids).__name__ if tag_ids is not None else None,
"tag_meta_len": len(tag_ids) if isinstance(tag_ids, list) else None, "tag_meta_len": len(tag_ids) if isinstance(tag_ids, list) else None,
"chunk_meta_type": type(chunk_ids).__name__ if chunk_ids is not None else None, "chunk_meta_type": type(chunk_ids).__name__ if chunk_ids is not None else None,
@@ -463,17 +549,17 @@ def health():
@app.post("/reload") @app.post("/reload")
def reload(): def reload() -> Dict[str, str]:
try: try:
load_all() load_all()
return {"status": "reloaded", "stamp": SERVICE_STAMP} return {"status": "reloaded", "stamp": SERVICE_STAMP}
except Exception as e: except Exception as exc:
logger.exception("reload failed") logger.exception("reload failed")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(exc))
@app.post("/search-chunks") @app.post("/search-chunks")
def search_chunks(req: SearchRequest): def search_chunks(req: SearchRequest) -> List[Dict[str, Any]]:
if chunk_index is None or chunk_ids is None or model is None: if chunk_index is None or chunk_ids is None or model is None:
raise HTTPException(status_code=503, detail="Chunk index not available") raise HTTPException(status_code=503, detail="Chunk index not available")
@@ -491,16 +577,16 @@ def search_chunks(req: SearchRequest):
doc_filter: Optional[List[str]] = None doc_filter: Optional[List[str]] = None
if req.doc_ids: if req.doc_ids:
doc_filter = [] doc_filter = []
for d in req.doc_ids: for document_id in req.doc_ids:
dk = _as_key(d) document_key = _as_key(document_id)
if dk: if document_key:
doc_filter.append(dk) doc_filter.append(document_key)
effective_limit = max(limit * 5, 50) effective_limit = max(limit * 5, 50)
effective_limit = min(effective_limit, 500) effective_limit = min(effective_limit, 500)
scores, indices = chunk_index.search(query_vec, effective_limit) scores, indices = chunk_index.search(query_vec, effective_limit)
results = [] results: List[Dict[str, Any]] = []
for score, idx in zip(scores[0], indices[0]): for score, idx in zip(scores[0], indices[0]):
if idx == -1: if idx == -1:
continue continue
@@ -512,20 +598,20 @@ def search_chunks(req: SearchRequest):
if not chunk_id_key: if not chunk_id_key:
continue continue
doc_id = chunk_doc_map.get(chunk_id_key) document_id = chunk_doc_map.get(chunk_id_key)
if doc_filter is not None: if doc_filter is not None:
if doc_id is None or doc_id not in doc_filter: if document_id is None or document_id not in doc_filter:
continue continue
payload = { payload: Dict[str, Any] = {
"chunk_id": raw_chunk_id, "chunk_id": raw_chunk_id,
"score": float(score), "score": float(score),
"document_id": doc_id, "document_id": document_id,
} }
ci = chunk_pos_map.get(chunk_id_key) chunk_position = chunk_pos_map.get(chunk_id_key)
if isinstance(ci, int): if isinstance(chunk_position, int):
payload["chunk_index"] = ci payload["chunk_index"] = chunk_position
results.append(payload) results.append(payload)
@@ -536,13 +622,13 @@ def search_chunks(req: SearchRequest):
except HTTPException: except HTTPException:
raise raise
except Exception as e: except Exception as exc:
logger.exception("search-chunks failure") logger.exception("search-chunks failure")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(exc))
@app.post("/search-tags") @app.post("/search-tags")
def search_tags(req: SearchRequest): def search_tags(req: SearchRequest) -> List[Dict[str, Any]]:
if tag_index is None or tag_ids is None or model is None: if tag_index is None or tag_ids is None or model is None:
raise HTTPException(status_code=503, detail="Tag index not available") raise HTTPException(status_code=503, detail="Tag index not available")
@@ -564,37 +650,47 @@ def search_tags(req: SearchRequest):
scores, indices = tag_index.search(query_vec, limit) scores, indices = tag_index.search(query_vec, limit)
results = [] results: List[Dict[str, Any]] = []
seen_tag_ids = set()
for score, idx in zip(scores[0], indices[0]): for score, idx in zip(scores[0], indices[0]):
if idx == -1: if idx == -1:
continue continue
if idx < 0 or idx >= len(tag_ids): if idx < 0 or idx >= len(tag_ids):
continue continue
tag_id = tag_ids[idx] raw_tag_id = tag_ids[idx]
tag_id_key = _as_key(tag_id) or "" tag_id_key = _as_key(raw_tag_id)
if not tag_id_key or tag_id_key in seen_tag_ids:
continue
payload: Dict[str, Any] = { payload: Dict[str, Any] = {
"tag_id": tag_id, "tag_id": raw_tag_id,
"score": float(score), "score": float(score),
} }
meta = tag_meta_map.get(tag_id_key) meta = tag_meta_map.get(tag_id_key)
if isinstance(meta, dict): if isinstance(meta, dict):
label = meta.get("label") label = meta.get("label")
ttype = meta.get("tag_type") tag_type = meta.get("tag_type")
if isinstance(label, str) and label.strip(): if isinstance(label, str):
payload["label"] = label payload["label"] = label.strip()
if isinstance(ttype, str) and ttype.strip(): payload["tag_type"] = _normalize_tag_type(tag_type)
payload["tag_type"] = ttype else:
payload["label"] = ""
payload["tag_type"] = "generic"
results.append(payload) results.append(payload)
seen_tag_ids.add(tag_id_key)
if len(results) >= limit:
break
return results return results
except HTTPException: except HTTPException:
raise raise
except Exception as e: except Exception as exc:
logger.exception("search-tags failure") logger.exception("search-tags failure")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(exc))

View File

@@ -4,77 +4,84 @@ declare(strict_types=1);
namespace App\Catalog; namespace App\Catalog;
use App\Config\CatalogIntentConfig;
use App\Entity\Document;
use App\Tag\TagTypes;
use App\Tag\TagVectorSearchClient; use App\Tag\TagVectorSearchClient;
use Doctrine\DBAL\Connection; use Doctrine\DBAL\Connection;
use Symfony\Component\Uid\Uuid; use Symfony\Component\Uid\Uuid;
/** /**
* EntityCatalogService * Builds deterministic catalog lists from a validated catalog entity term.
* *
* Deterministische Katalog-Listen auf Basis eines Entity-Terms: * This service is intentionally conservative:
* - TagVectorSearch (Score-Gate + Ambiguity-Check) * - only strong catalog_entity matches may open the catalog path
* - DB Query auf document_tag + document (ACTIVE) * - ambiguous matches fall back to normal retrieval
* - Rückgabe als EIN Textblock (string) oder null (Fallback auf normalen Retrieval) * - only ACTIVE documents are listed
*
* Schritt-3 Änderung:
* - Headline ist NICHT mehr hardcoded
* - Headline basiert dynamisch auf dem gefundenen Tag
*/ */
final class EntityCatalogService final class EntityCatalogService
{ {
private const MIN_SCORE = 0.55; private const SEARCH_LIMIT = 3;
private const AMBIGUITY_DELTA = 0.05;
public function __construct( public function __construct(
private readonly TagVectorSearchClient $tagVectorClient, private readonly TagVectorSearchClient $tagVectorClient,
private readonly Connection $connection, private readonly Connection $connection,
) {} ) {
}
/** /**
* @return string|null Textblock oder null (wenn kein sicherer Catalog möglich ist) * Returns a catalog text block or null when no safe catalog path exists.
*/ */
public function listByTerm(string $entityTerm): ?string public function listByTerm(string $entityTerm): ?string
{ {
$entityTerm = trim($entityTerm); $entityTerm = trim($entityTerm);
if ($entityTerm === '') { if ($entityTerm === '') {
return null; return null;
} }
// 1) Tag-Vektorsuche (Top 3 für Ambiguity-Prüfung) $hits = $this->tagVectorClient->search($entityTerm, self::SEARCH_LIMIT);
$hits = $this->tagVectorClient->search($entityTerm, 3);
if ($hits === []) { if ($hits === []) {
return null; return null;
} }
$best = $hits[0]; $best = $hits[0];
$bestScore = (float) ($best['score'] ?? 0.0);
$bestScore = isset($best['score']) ? (float)$best['score'] : 0.0; if ($bestScore < CatalogIntentConfig::MIN_SCORE) {
if ($bestScore < self::MIN_SCORE) { return null;
}
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
return null; return null;
} }
// 2) Ambiguity: wenn Top2 zu nah ist → konservativ abbrechen
if (isset($hits[1])) { if (isset($hits[1])) {
$secondScore = isset($hits[1]['score']) ? (float)$hits[1]['score'] : 0.0; $secondScore = (float) ($hits[1]['score'] ?? 0.0);
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) {
return null; return null;
} }
} }
$tagHex = (string)($best['tag_id'] ?? ''); $tagId = trim((string) ($best['tag_id'] ?? ''));
if ($tagHex === '') {
if ($tagId === '') {
return null; return null;
} }
// OPTIONAL: Falls TagVectorSearchClient künftig tag_label zurückliefert, try {
// kann das hier direkt verwendet werden. $tagBinaryId = Uuid::fromString($tagId)->toBinary();
$tagLabel = isset($best['tag_label']) ? (string)$best['tag_label'] : null; } catch (\Throwable) {
return null;
}
$tagLabel = trim((string) ($best['label'] ?? ''));
// 3) DB Query: alle ACTIVE Dokumente zu diesem Tag
$rows = $this->connection->fetchAllAssociative( $rows = $this->connection->fetchAllAssociative(
' '
SELECT d.title SELECT DISTINCT d.title
FROM document d FROM document d
INNER JOIN document_tag dt ON dt.document_id = d.id INNER JOIN document_tag dt ON dt.document_id = d.id
WHERE dt.tag_id = :tagId WHERE dt.tag_id = :tagId
@@ -82,8 +89,8 @@ final class EntityCatalogService
ORDER BY d.title ASC ORDER BY d.title ASC
', ',
[ [
'tagId' => Uuid::fromString($tagHex)->toBinary(), 'tagId' => $tagBinaryId,
'status' => 'ACTIVE', 'status' => Document::STATUS_ACTIVE,
] ]
); );
@@ -92,37 +99,42 @@ final class EntityCatalogService
} }
$titles = []; $titles = [];
foreach ($rows as $row) { foreach ($rows as $row) {
$t = trim((string)($row['title'] ?? '')); $title = trim((string) ($row['title'] ?? ''));
if ($t !== '') {
$titles[] = $t; if ($title === '') {
continue;
} }
$titles[$title] = $title;
} }
if ($titles === []) { if ($titles === []) {
return null; return null;
} }
return $this->buildTextBlock($tagLabel, $titles); return $this->buildTextBlock(
$tagLabel !== '' ? $tagLabel : null,
array_values($titles)
);
} }
/** /**
* Dynamische Headline: * Builds a stable human-readable list block for the prompt.
* - Wenn Tag-Label vorhanden → verwenden *
* - Sonst generischer Fallback * @param list<string> $titles
*/ */
private function buildTextBlock(?string $tagLabel, array $titles): string private function buildTextBlock(?string $tagLabel, array $titles): string
{ {
$headline = 'Folgende Einträge sind verfügbar:'; $headline = 'Folgende Einträge sind verfügbar:';
if (\is_string($tagLabel) && \trim($tagLabel) !== '') { if ($tagLabel !== null && trim($tagLabel) !== '') {
$headline = sprintf( $headline = sprintf('Folgende %s sind verfügbar:', trim($tagLabel));
'Folgende %s sind verfügbar:',
$tagLabel
);
} }
$lines = []; $lines = [];
foreach ($titles as $title) { foreach ($titles as $title) {
$lines[] = '- ' . $title; $lines[] = '- ' . $title;
} }

View File

@@ -1,6 +1,5 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
namespace App\Command; namespace App\Command;
@@ -36,8 +35,7 @@ final class SystemRebuildCommand extends Command
private readonly VectorIndexHealthService $health, private readonly VectorIndexHealthService $health,
private readonly TagVectorIndexHealthService $tagHealth, private readonly TagVectorIndexHealthService $tagHealth,
private readonly string $projectDir, private readonly string $projectDir,
) ) {
{
parent::__construct(); parent::__construct();
} }
@@ -58,6 +56,7 @@ final class SystemRebuildCommand extends Command
if (!$input->getOption('hard')) { if (!$input->getOption('hard')) {
$io->error('Safety switch missing: you must pass --hard to run this command.'); $io->error('Safety switch missing: you must pass --hard to run this command.');
$io->writeln('Example: bin/console mto:agent:system:rebuild --hard'); $io->writeln('Example: bin/console mto:agent:system:rebuild --hard');
return Command::FAILURE; return Command::FAILURE;
} }
@@ -65,9 +64,29 @@ final class SystemRebuildCommand extends Command
$io->title('mto:agent:system:rebuild --hard'); $io->title('mto:agent:system:rebuild --hard');
// --------------------------------------------------------- if (!$this->runGlobalReindex($io, $dryRun)) {
// 1) GLOBAL REINDEX (chunks rewrite + vector rebuild) return Command::FAILURE;
// --------------------------------------------------------- }
if (!$this->runTagRebuild($io, $input, $dryRun)) {
return Command::FAILURE;
}
if (!$this->runVectorServiceReload($io, $input, $dryRun)) {
return Command::FAILURE;
}
if (!$this->runHealthChecks($io, $input)) {
return Command::FAILURE;
}
$io->success('System rebuild finished.');
return Command::SUCCESS;
}
private function runGlobalReindex(SymfonyStyle $io, bool $dryRun): bool
{
$io->section('1/4 Global reindex (chunks + vector index)'); $io->section('1/4 Global reindex (chunks + vector index)');
$job = $this->jobService->startJob( $job = $this->jobService->startJob(
@@ -82,55 +101,70 @@ final class SystemRebuildCommand extends Command
try { try {
$this->orchestrator->runExistingJob($job, $dryRun); $this->orchestrator->runExistingJob($job, $dryRun);
$io->success('Global reindex completed.'); $io->success('Global reindex completed.');
return true;
} catch (\Throwable $e) { } catch (\Throwable $e) {
$io->error('Global reindex failed: ' . $e->getMessage()); $io->error('Global reindex failed: ' . $e->getMessage());
return Command::FAILURE;
return false;
}
}
private function runTagRebuild(SymfonyStyle $io, InputInterface $input, bool $dryRun): bool
{
if ((bool) $input->getOption('no-tags')) {
$io->section('2/4 Tag rebuild');
$io->note('Skipped due to --no-tags.');
return true;
} }
// ---------------------------------------------------------
// 2) TAG REBUILD (tags.ndjson + vector_tags.index)
// ---------------------------------------------------------
if (!$input->getOption('no-tags')) {
$io->section('2/4 Tag rebuild (tags.ndjson + vector_tags.index)'); $io->section('2/4 Tag rebuild (tags.ndjson + vector_tags.index)');
if ($dryRun) { if ($dryRun) {
$io->note('dry-run enabled: tag rebuild skipped (would export + build tag index).'); $io->note('dry-run enabled: tag rebuild skipped (would export + build tag index).');
} else {
return true;
}
try { try {
$export = $this->tagExporter->export(); $export = $this->tagExporter->export();
$io->writeln('<info>Exported tags.ndjson</info>'); $io->writeln('<info>Exported tags.ndjson</info>');
$io->writeln('Path: ' . $export['path']); $io->writeln('Path: ' . (string) $export['path']);
$io->writeln('Tags: ' . $export['tags']); $io->writeln('Tags: ' . (string) $export['tags']);
$io->writeln('Lines: ' . $export['lines']); $io->writeln('Lines: ' . (string) $export['lines']);
$io->writeln('Bytes: ' . $export['bytes']); $io->writeln('Bytes: ' . (string) $export['bytes']);
$this->tagIndexBuilder->build(); $this->tagIndexBuilder->build();
$io->writeln('<info>Built vector_tags.index</info>');
$this->metaManager->touchRuntime([
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
]);
$io->success('Tag rebuild completed.'); $io->success('Tag rebuild completed.');
return true;
} catch (\Throwable $e) { } catch (\Throwable $e) {
$io->error('Tag rebuild failed: ' . $e->getMessage()); $io->error('Tag rebuild failed: ' . $e->getMessage());
return Command::FAILURE;
return false;
} }
} }
} else {
$io->section('2/4 Tag rebuild');
$io->note('Skipped due to --no-tags.');
}
// --------------------------------------------------------- private function runVectorServiceReload(SymfonyStyle $io, InputInterface $input, bool $dryRun): bool
// 3) VECTOR SERVICE (install deps + start + reload) {
// --------------------------------------------------------- if ((bool) $input->getOption('no-reload')) {
if (!$input->getOption('no-reload')) { $io->section('3/4 Vector service reload');
$io->note('Skipped due to --no-reload.');
return true;
}
$io->section('3/4 Vector service reload (uvicorn)'); $io->section('3/4 Vector service reload (uvicorn)');
if ($dryRun) { if ($dryRun) {
$io->note('dry-run enabled: service reload skipped.'); $io->note('dry-run enabled: service reload skipped.');
} else {
return true;
}
$cmd = [ $cmd = [
'.venv/bin/python', '.venv/bin/python',
'python/vector/vector_control.py', 'python/vector/vector_control.py',
@@ -138,85 +172,110 @@ final class SystemRebuildCommand extends Command
'--start', '--start',
'--reload', '--reload',
'--port', '8090', '--port', '8090',
'--host', '0.0.0.0' '--host', '0.0.0.0',
]; ];
$process = new Process($cmd, $this->projectDir); $process = new Process($cmd, $this->projectDir);
$process->setTimeout(600); $process->setTimeout(600);
$process->run(); $process->run();
$out = trim($process->getOutput()); $stdout = trim($process->getOutput());
$err = trim($process->getErrorOutput()); $stderr = trim($process->getErrorOutput());
if ($out !== '') { if ($stdout !== '') {
$io->writeln($out); $io->writeln($stdout);
} }
if ($err !== '') {
$io->writeln('<comment>' . $err . '</comment>'); if ($stderr !== '') {
$io->writeln('<comment>' . $stderr . '</comment>');
} }
if (!$process->isSuccessful()) { if (!$process->isSuccessful()) {
$io->error('Vector service reload failed (non-zero exit code).'); $io->error('Vector service reload failed (non-zero exit code).');
return Command::FAILURE;
return false;
} }
$io->success('Vector service reloaded.'); $io->success('Vector service reloaded.');
}
} else { return true;
$io->section('3/4 Vector service reload'); }
$io->note('Skipped due to --no-reload.');
private function runHealthChecks(SymfonyStyle $io, InputInterface $input): bool
{
if ((bool) $input->getOption('no-health')) {
$io->section('4/4 Health check');
$io->note('Skipped due to --no-health.');
return true;
} }
// ---------------------------------------------------------
// 4) HEALTH CHECK (NDJSON vs vector meta)
// ---------------------------------------------------------
if (!$input->getOption('no-health')) {
$io->section('4/4 Health check'); $io->section('4/4 Health check');
try { try {
$report = $this->health->check(); $chunkReport = $this->health->check();
} catch (\Throwable $e) { } catch (\Throwable $e) {
$io->error('Health check failed: ' . $e->getMessage()); $io->error('Health check failed: ' . $e->getMessage());
return Command::FAILURE;
return false;
} }
try { try {
$reportTag = $this->tagHealth->check(); $tagReport = $this->tagHealth->check();
} catch (\Throwable $e) { } catch (\Throwable $e) {
$io->error('Tag health check failed: ' . $e->getMessage()); $io->error('Tag health check failed: ' . $e->getMessage());
return Command::FAILURE;
return false;
} }
$io->definitionList( $this->renderChunkHealth($io, $chunkReport);
['ndjson_exists' => $report['ndjson_exists'] ? 'yes' : 'no'], $this->renderTagHealth($io, $tagReport);
['ndjson_chunk_count' => (string)$report['ndjson_chunk_count']],
['vector_exists' => $report['vector_exists'] ? 'yes' : 'no'],
['meta_exists' => $report['meta_exists'] ? 'yes' : 'no'],
['vector_chunk_count' => (string)$report['vector_chunk_count']],
['status' => (string)$report['status']],
);
$io->definitionList( if (!$this->isHealthOk((string) ($chunkReport['status'] ?? 'UNKNOWN'))) {
['tags_ndjson_exists' => $reportTag['tags_ndjson_exists'] ? 'yes' : 'no'], $io->error('Chunk health check not OK: ' . (string) ($chunkReport['status'] ?? 'UNKNOWN'));
['tags_ndjson_count' => (string)$reportTag['tags_ndjson_count']],
['tag_vector_exists' => $reportTag['vector_exists'] ? 'yes' : 'no'],
['tag_meta_exists' => $reportTag['meta_exists'] ? 'yes' : 'no'],
['vector_tag_count' => (string)$reportTag['vector_tag_count']],
['status' => (string)$reportTag['status']],
);
if (!in_array($report['status'], ['OK', 'OK_EMPTY'], true)) { return false;
$io->error('Health check not OK: ' . $report['status']); }
return Command::FAILURE;
if (!$this->isHealthOk((string) ($tagReport['status'] ?? 'UNKNOWN'))) {
$io->error('Tag health check not OK: ' . (string) ($tagReport['status'] ?? 'UNKNOWN'));
return false;
} }
$io->success('Health check OK.'); $io->success('Health check OK.');
} else {
$io->section('4/4 Health check'); return true;
$io->note('Skipped due to --no-health.');
} }
$io->success('System rebuild finished.'); private function renderChunkHealth(SymfonyStyle $io, array $report): void
return Command::SUCCESS; {
$io->definitionList(
['ndjson_exists' => !empty($report['ndjson_exists']) ? 'yes' : 'no'],
['ndjson_chunk_count' => (string) ($report['ndjson_chunk_count'] ?? 0)],
['vector_exists' => !empty($report['vector_exists']) ? 'yes' : 'no'],
['meta_exists' => !empty($report['meta_exists']) ? 'yes' : 'no'],
['vector_chunk_count' => (string) ($report['vector_chunk_count'] ?? 0)],
['status' => (string) ($report['status'] ?? 'UNKNOWN')],
);
}
private function renderTagHealth(SymfonyStyle $io, array $report): void
{
$io->definitionList(
['tags_ndjson_exists' => !empty($report['tags_ndjson_exists']) ? 'yes' : 'no'],
['tags_ndjson_count' => (string) ($report['tags_ndjson_count'] ?? 0)],
['tag_vector_exists' => !empty($report['vector_exists']) ? 'yes' : 'no'],
['tag_meta_exists' => !empty($report['meta_exists']) ? 'yes' : 'no'],
['vector_tag_count' => (string) ($report['vector_tag_count'] ?? 0)],
['tags_with_active_document_ids' => (string) ($report['tags_with_active_document_ids'] ?? 0)],
['meta_valid' => !empty($report['meta_valid']) ? 'yes' : 'no'],
['status' => (string) ($report['status'] ?? 'UNKNOWN')],
);
}
private function isHealthOk(string $status): bool
{
return in_array($status, ['OK', 'OK_EMPTY'], true);
} }
} }

View File

@@ -8,11 +8,13 @@ use App\Tag\TagVectorIndexHealthService;
use Symfony\Component\Console\Attribute\AsCommand; use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand( #[AsCommand(
name: 'mto:agent:tag:health', name: 'mto:agent:tag:health',
description: 'Health-Check für TAG/FAISS Konsistenz' description: 'Health-Check für Tag-/FAISS-Konsistenz'
)] )]
final class TagHealthCheckCommand extends Command final class TagHealthCheckCommand extends Command
{ {
@@ -22,14 +24,87 @@ final class TagHealthCheckCommand extends Command
parent::__construct(); parent::__construct();
} }
protected function configure(): void
{
$this->addOption(
'summary',
null,
InputOption::VALUE_NONE,
'Gibt eine lesbare Zusammenfassung statt JSON aus.'
);
}
protected function execute(InputInterface $input, OutputInterface $output): int protected function execute(InputInterface $input, OutputInterface $output): int
{ {
$result = $this->health->check(); $result = $this->health->check();
$status = trim((string) ($result['status'] ?? ''));
$output->writeln(json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES)); if ($status === '') {
$status = 'UNKNOWN';
$result['status'] = $status;
$result['error'] = 'Health service returned no status.';
}
return str_starts_with($result['status'], 'OK') if ((bool) $input->getOption('summary')) {
$this->renderSummary(new SymfonyStyle($input, $output), $result);
} else {
$this->renderJson($output, $result);
}
return $this->isHealthy($status)
? Command::SUCCESS ? Command::SUCCESS
: Command::FAILURE; : Command::FAILURE;
} }
/**
* @param array<string, mixed> $result
*/
private function renderSummary(SymfonyStyle $io, array $result): void
{
$io->title('Tag Vector Health');
$io->definitionList(
['status' => (string) ($result['status'] ?? 'UNKNOWN')],
['tags_ndjson_exists' => !empty($result['tags_ndjson_exists']) ? 'yes' : 'no'],
['tags_ndjson_count' => (string) ($result['tags_ndjson_count'] ?? 0)],
['vector_exists' => !empty($result['vector_exists']) ? 'yes' : 'no'],
['meta_exists' => !empty($result['meta_exists']) ? 'yes' : 'no'],
['vector_tag_count' => (string) ($result['vector_tag_count'] ?? 0)],
['meta_valid' => !empty($result['meta_valid']) ? 'yes' : 'no'],
['tags_with_active_document_ids' => (string) ($result['tags_with_active_document_ids'] ?? 0)],
);
if (!empty($result['error'])) {
$io->warning((string) $result['error']);
}
}
/**
* @param array<string, mixed> $result
*/
private function renderJson(OutputInterface $output, array $result): void
{
$json = json_encode(
$result,
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
);
if (!is_string($json)) {
$json = json_encode([
'status' => 'UNKNOWN',
'error' => 'json_encode_failed',
], JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
if (!is_string($json)) {
$json = "{\"status\":\"UNKNOWN\",\"error\":\"json_encode_failed\"}";
}
}
$output->writeln($json);
}
private function isHealthy(string $status): bool
{
return in_array($status, ['OK', 'OK_EMPTY'], true);
}
} }

View File

@@ -14,6 +14,7 @@ use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand( #[AsCommand(
name: 'mto:agent:tags:job:run', name: 'mto:agent:tags:job:run',
@@ -39,112 +40,152 @@ final class TagRebuildRunJobCommand extends Command
protected function execute(InputInterface $input, OutputInterface $output): int protected function execute(InputInterface $input, OutputInterface $output): int
{ {
$jobId = $input->getArgument('jobId'); $io = new SymfonyStyle($input, $output);
$jobId = trim((string) $input->getArgument('jobId'));
$create = (bool) $input->getOption('create'); $create = (bool) $input->getOption('create');
if (!$create && !$jobId) { if (!$create && $jobId === '') {
$output->writeln('<error>You must provide either a jobId or use --create.</error>'); $io->error('You must provide either a jobId or use --create.');
return Command::FAILURE; return Command::FAILURE;
} }
if ($create && $jobId) { if ($create && $jobId !== '') {
$output->writeln('<error>Use either jobId OR --create, not both.</error>'); $io->error('Use either jobId OR --create, not both.');
return Command::FAILURE; return Command::FAILURE;
} }
if ($create) { $job = null;
$lockHandle = null;
try {
$job = $create ? $this->createJob($io) : $this->findJob($jobId);
$lockHandle = $this->acquireLock();
$job->markRunning();
$this->em->flush();
$export = $this->exporter->export();
$this->assertValidExport($export);
$io->writeln('<info>tags.ndjson exported</info>');
$io->writeln('Path: ' . (string) $export['path']);
$io->writeln('Tags: ' . (string) ($export['tags'] ?? 0));
$io->writeln('Lines: ' . (string) ($export['lines'] ?? 0));
$io->writeln('Bytes: ' . (string) ($export['bytes'] ?? 0));
$this->builder->build();
$job->markCompleted();
$this->em->flush();
$io->success('Tag rebuild successful.');
return Command::SUCCESS;
} catch (\Throwable $e) {
if ($job instanceof TagRebuildJob) {
$job->markFailed($this->buildSafeErrorMessage($e));
$this->em->flush();
}
$io->error('FAILED: ' . $e->getMessage());
return Command::FAILURE;
} finally {
$this->releaseLock($lockHandle);
}
}
private function createJob(SymfonyStyle $io): TagRebuildJob
{
$job = new TagRebuildJob(); $job = new TagRebuildJob();
$this->em->persist($job); $this->em->persist($job);
$this->em->flush(); $this->em->flush();
$jobId = $job->getId();
$output->writeln('<info>Created new TagRebuildJob: ' . $jobId . '</info>'); $io->writeln('<info>Created new TagRebuildJob: ' . (string) $job->getId() . '</info>');
} else {
return $job;
}
private function findJob(string $jobId): TagRebuildJob
{
/** @var TagRebuildJob|null $job */ /** @var TagRebuildJob|null $job */
$job = $this->em->getRepository(TagRebuildJob::class)->find($jobId); $job = $this->em->getRepository(TagRebuildJob::class)->find($jobId);
if (!$job instanceof TagRebuildJob) { if (!$job instanceof TagRebuildJob) {
$output->writeln('<error>Job not found.</error>'); throw new \RuntimeException('Job not found.');
return Command::FAILURE;
}
} }
$fh = null; return $job;
}
try { /**
// --------------------------------------------------------- * @return resource
// LOCK INITIALIZATION */
// --------------------------------------------------------- private function acquireLock()
{
$lockDir = \dirname($this->lockFilePath); $lockDir = \dirname($this->lockFilePath);
if (!\is_dir($lockDir) && !@\mkdir($lockDir, 0775, true) && !\is_dir($lockDir)) { if (!\is_dir($lockDir) && !@\mkdir($lockDir, 0775, true) && !\is_dir($lockDir)) {
throw new \RuntimeException('Cannot create lock directory.'); throw new \RuntimeException('Cannot create lock directory.');
} }
$fh = @\fopen($this->lockFilePath, 'c+'); $handle = @\fopen($this->lockFilePath, 'c+');
if (!$fh) {
if ($handle === false) {
throw new \RuntimeException('Cannot open lock file: ' . $this->lockFilePath); throw new \RuntimeException('Cannot open lock file: ' . $this->lockFilePath);
} }
if (!@\flock($fh, LOCK_EX | LOCK_NB)) { if (!@\flock($handle, LOCK_EX | LOCK_NB)) {
@\fclose($handle);
throw new \RuntimeException('Another tag rebuild is currently running (lock busy).'); throw new \RuntimeException('Another tag rebuild is currently running (lock busy).');
} }
// --------------------------------------------------------- return $handle;
// MARK RUNNING }
// ---------------------------------------------------------
$job->markRunning();
$this->em->flush();
// --------------------------------------------------------- /**
// EXPORT TAGS (NDJSON) * @param resource|null $handle
// --------------------------------------------------------- */
$export = $this->exporter->export(); private function releaseLock($handle): void
{
if (!is_resource($handle)) {
return;
}
if ( @\flock($handle, LOCK_UN);
!isset($export['path']) || @\fclose($handle);
!\is_string($export['path']) || }
!\file_exists($export['path'])
) { /**
* @param array<string, mixed> $export
*/
private function assertValidExport(array $export): void
{
$path = trim((string) ($export['path'] ?? ''));
if ($path === '' || !\is_file($path)) {
throw new \RuntimeException('Export failed: NDJSON file missing.'); throw new \RuntimeException('Export failed: NDJSON file missing.');
} }
if (isset($export['count']) && (int) $export['count'] === 0) { $tags = (int) ($export['tags'] ?? 0);
throw new \RuntimeException('Export produced zero tags.'); $lines = (int) ($export['lines'] ?? 0);
}
// --------------------------------------------------------- if ($tags < 0 || $lines < 0) {
// BUILD VECTOR INDEX throw new \RuntimeException('Export returned invalid statistics.');
// ---------------------------------------------------------
$this->builder->build();
// ---------------------------------------------------------
// MARK COMPLETED
// ---------------------------------------------------------
$job->markCompleted();
$this->em->flush();
$output->writeln('<info>Tag rebuild successful.</info>');
$output->writeln('NDJSON: ' . $export['path']);
return Command::SUCCESS;
}
catch (\Throwable $e) {
if (isset($job)) {
$job->markFailed($e->getMessage());
$this->em->flush();
}
$output->writeln('<error>FAILED: ' . $e->getMessage() . '</error>');
return Command::FAILURE;
}
finally {
if ($fh) {
@\flock($fh, LOCK_UN);
@\fclose($fh);
} }
} }
private function buildSafeErrorMessage(\Throwable $e): string
{
$message = trim($e->getMessage());
if ($message === '') {
return 'Unknown tag rebuild failure.';
}
return mb_substr($message, 0, 4000);
} }
} }

View File

@@ -9,6 +9,7 @@ use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand( #[AsCommand(
name: 'mto:agent:tags:export', name: 'mto:agent:tags:export',
@@ -17,26 +18,51 @@ use Symfony\Component\Console\Output\OutputInterface;
final class TagsExportCommand extends Command final class TagsExportCommand extends Command
{ {
public function __construct( public function __construct(
private TagNdjsonExporter $exporter, private readonly TagNdjsonExporter $exporter,
) { ) {
parent::__construct(); parent::__construct();
} }
protected function execute(InputInterface $input, OutputInterface $output): int protected function execute(InputInterface $input, OutputInterface $output): int
{ {
$io = new SymfonyStyle($input, $output);
try { try {
$result = $this->exporter->export(); $result = $this->exporter->export();
} catch (\Throwable $e) { $this->assertValidExport($result);
$output->writeln('<error>ERROR: ' . $e->getMessage() . '</error>');
return Command::FAILURE;
}
$output->writeln('<info>Tags NDJSON exported</info>'); $io->writeln('<info>Tags NDJSON exported</info>');
$output->writeln('Path: ' . $result['path']); $io->writeln('Path: ' . (string) ($result['path'] ?? ''));
$output->writeln('Tags: ' . $result['tags']); $io->writeln('Tags: ' . (string) ($result['tags'] ?? 0));
$output->writeln('Lines: ' . $result['lines']); $io->writeln('Lines: ' . (string) ($result['lines'] ?? 0));
$output->writeln('Bytes: ' . $result['bytes']); $io->writeln('Bytes: ' . (string) ($result['bytes'] ?? 0));
$io->success('Tag export completed.');
return Command::SUCCESS; return Command::SUCCESS;
} catch (\Throwable $e) {
$io->error($e->getMessage());
return Command::FAILURE;
}
}
/**
* @param array<string, mixed> $result
*/
private function assertValidExport(array $result): void
{
$path = trim((string) ($result['path'] ?? ''));
if ($path === '' || !is_file($path)) {
throw new \RuntimeException('Tag export failed: tags.ndjson is missing.');
}
$tags = (int) ($result['tags'] ?? 0);
$lines = (int) ($result['lines'] ?? 0);
$bytes = (int) ($result['bytes'] ?? 0);
if ($tags < 0 || $lines < 0 || $bytes < 0) {
throw new \RuntimeException('Tag export returned invalid statistics.');
}
} }
} }

View File

@@ -4,13 +4,13 @@ declare(strict_types=1);
namespace App\Command; namespace App\Command;
use App\Index\IndexMetaManager;
use App\Tag\TagNdjsonExporter; use App\Tag\TagNdjsonExporter;
use App\Tag\TagVectorIndexBuilder; use App\Tag\TagVectorIndexBuilder;
use Symfony\Component\Console\Attribute\AsCommand; use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand( #[AsCommand(
name: 'mto:agent:tags:rebuild', name: 'mto:agent:tags:rebuild',
@@ -21,45 +21,54 @@ final class TagsRebuildCommand extends Command
public function __construct( public function __construct(
private readonly TagNdjsonExporter $exporter, private readonly TagNdjsonExporter $exporter,
private readonly TagVectorIndexBuilder $builder, private readonly TagVectorIndexBuilder $builder,
private readonly IndexMetaManager $metaManager,
) { ) {
parent::__construct(); parent::__construct();
} }
protected function execute(InputInterface $input, OutputInterface $output): int protected function execute(InputInterface $input, OutputInterface $output): int
{ {
$io = new SymfonyStyle($input, $output);
try { try {
// -----------------------------------------
// 1) Export tags.ndjson
// -----------------------------------------
$export = $this->exporter->export(); $export = $this->exporter->export();
$this->assertValidExport($export);
$output->writeln('<info>1/3 Exported tags.ndjson</info>'); $io->writeln('<info>1/2 Exported tags.ndjson</info>');
$output->writeln('Path: ' . $export['path']); $io->writeln('Path: ' . (string) ($export['path'] ?? ''));
$output->writeln('Tags: ' . $export['tags']); $io->writeln('Tags: ' . (string) ($export['tags'] ?? 0));
$output->writeln('Lines: ' . $export['lines']); $io->writeln('Lines: ' . (string) ($export['lines'] ?? 0));
$output->writeln('Bytes: ' . $export['bytes']); $io->writeln('Bytes: ' . (string) ($export['bytes'] ?? 0));
// -----------------------------------------
// 2) Build FAISS tag index
// -----------------------------------------
$this->builder->build(); $this->builder->build();
$output->writeln('<info>2/3 Built vector_tags.index</info>'); $io->writeln('<info>2/2 Built vector_tags.index</info>');
$io->success('Tag rebuild completed.');
// -----------------------------------------
// 3) Enterprise Commit Marker
// -----------------------------------------
$this->metaManager->touchRuntime([
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
]);
$output->writeln('<info>3/3 Runtime commit marker updated</info>');
} catch (\Throwable $e) {
$output->writeln('<error>ERROR: ' . $e->getMessage() . '</error>');
return Command::FAILURE;
}
return Command::SUCCESS; return Command::SUCCESS;
} catch (\Throwable $e) {
$io->error($e->getMessage());
return Command::FAILURE;
}
}
/**
* @param array<string, mixed> $export
*/
private function assertValidExport(array $export): void
{
$path = trim((string) ($export['path'] ?? ''));
if ($path === '' || !is_file($path)) {
throw new \RuntimeException('Tag export failed: tags.ndjson is missing.');
}
$tags = (int) ($export['tags'] ?? 0);
$lines = (int) ($export['lines'] ?? 0);
$bytes = (int) ($export['bytes'] ?? 0);
if ($tags < 0 || $lines < 0 || $bytes < 0) {
throw new \RuntimeException('Tag export returned invalid statistics.');
}
} }
} }

View File

@@ -1,12 +1,62 @@
<?php <?php
declare(strict_types=1);
namespace App\Config; namespace App\Config;
class CatalogIntentConfig /**
* Central thresholds for deterministic catalog-entity detection.
*
* The values in this class intentionally define a conservative gate:
* - only strong semantic tag hits may open the catalog path
* - small score gaps between the best and second-best hit are treated as ambiguous
*/
final class CatalogIntentConfig
{ {
// Minimum similarity score. Prevents noise. /**
* Minimum semantic similarity required before a catalog entity is accepted.
*/
public const MIN_SCORE = 0.72; public const MIN_SCORE = 0.72;
// Difference between Top 1 and Top 2, so that no uncertain match is accepted. /**
* Required distance between the best and second-best catalog entity hit.
*/
public const AMBIGUITY_DELTA = 0.02; public const AMBIGUITY_DELTA = 0.02;
/**
* Number of candidate tag hits to inspect during catalog intent detection.
*
* This is intentionally wider than the final accepted set so that strong
* catalog_entity tags are not hidden behind generic tags in the raw result.
*/
public const SEARCH_LIMIT = 6;
/**
* Conservative lower boundary for score normalization helpers.
*/
public const MIN_ALLOWED_SCORE = 0.0;
/**
* Conservative upper boundary for score normalization helpers.
*/
public const MAX_ALLOWED_SCORE = 1.0;
public static function isScoreAccepted(float $score): bool
{
return $score >= self::MIN_SCORE;
}
public static function isAmbiguous(float $bestScore, float $secondScore): bool
{
return abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA;
}
public static function clampScore(float $score): float
{
return max(self::MIN_ALLOWED_SCORE, min(self::MAX_ALLOWED_SCORE, $score));
}
private function __construct()
{
}
} }

View File

@@ -1,5 +1,6 @@
<?php <?php
declare(strict_types=1);
namespace App\Controller\Admin; namespace App\Controller\Admin;
@@ -17,25 +18,22 @@ final class DashboardController extends AbstractController
#[Route('', name: 'admin_dashboard_null')] #[Route('', name: 'admin_dashboard_null')]
#[Route('/', name: 'admin_dashboard_trail')] #[Route('/', name: 'admin_dashboard_trail')]
#[Route('/admin', name: 'admin_dashboard_alias')] #[Route('/admin', name: 'admin_dashboard_alias')]
public function trailNull(IndexMetaManager $metaManager,VectorIndexHealthService $health): RedirectResponse public function redirectToDashboard(): RedirectResponse
{ {
return $this->redirectToRoute('admin_dashboard'); return $this->redirectToRoute('admin_dashboard');
} }
#[Route('/admin/dashboard', name: 'admin_dashboard', methods: ['GET'])]
#[Route('/admin/dashboard', name: 'admin_dashboard')] public function dashboard(
public function dashboard(IndexMetaManager $metaManager,VectorIndexHealthService $health,TagVectorIndexHealthService $tagHealth): Response IndexMetaManager $metaManager,
{ VectorIndexHealthService $health,
$chunkCount = $metaManager->getRuntimeChunkCount(); TagVectorIndexHealthService $tagHealth
$limit = IngestFlow::CHUNK_LIMIT_HARD; ): Response {
return $this->render('admin/dashboard/index.html.twig', [ return $this->render('admin/dashboard/index.html.twig', [
'chunkCount' => $chunkCount, 'chunkCount' => $metaManager->getRuntimeChunkCount(),
'chunkLimit' => $limit, 'chunkLimit' => IngestFlow::CHUNK_LIMIT_HARD,
'vectorHealth' => $health->check(), 'vectorHealth' => $health->check(),
'tagVectorHealth' => $tagHealth->check(), 'tagVectorHealth' => $tagHealth->check(),
]); ]);
} }
} }

View File

@@ -1,10 +1,13 @@
<?php <?php
declare(strict_types=1);
namespace App\Controller\Admin; namespace App\Controller\Admin;
use App\Entity\Document; use App\Entity\Document;
use App\Entity\DocumentVersion; use App\Entity\DocumentVersion;
use App\Entity\IngestJob; use App\Entity\IngestJob;
use App\Entity\User;
use App\Service\DocumentService; use App\Service\DocumentService;
use App\Service\FormatText; use App\Service\FormatText;
use App\Service\IngestJobService; use App\Service\IngestJobService;
@@ -23,9 +26,11 @@ use Symfony\Component\Routing\Attribute\Route;
use Symfony\Component\Uid\Uuid; use Symfony\Component\Uid\Uuid;
#[Route('/admin/documents')] #[Route('/admin/documents')]
class DocumentController extends AbstractController final class DocumentController extends AbstractController
{ {
#[Route('', name: 'admin_documents')] private const INGEST_DUPLICATE_WINDOW_SECONDS = 3;
#[Route('', name: 'admin_documents', methods: ['GET'])]
public function index(EntityManagerInterface $em): Response public function index(EntityManagerInterface $em): Response
{ {
$documents = $em->getRepository(Document::class) $documents = $em->getRepository(Document::class)
@@ -46,115 +51,106 @@ class DocumentController extends AbstractController
#[Route( #[Route(
'/{id}', '/{id}',
name: 'admin_document_show', name: 'admin_document_show',
requirements: ['id' => '[0-9a-fA-F\-]{36}'] requirements: ['id' => '[0-9a-fA-F\-]{36}'],
methods: ['GET']
)] )]
public function show(string $id, EntityManagerInterface $em): Response public function show(string $id, EntityManagerInterface $em): Response
{ {
try {
$uuid = Uuid::fromString($id);
} catch (\Exception) {
throw new NotFoundHttpException();
}
$document = $em->getRepository(Document::class)->find($uuid);
if (!$document) {
$this->addFlash('danger', 'Das Dokument existiert nicht mehr.');
}
return $this->render('admin/document/show.html.twig', [ return $this->render('admin/document/show.html.twig', [
'document' => $document, 'document' => $this->findDocument($id, $em),
]); ]);
} }
#[Route('/new', name: 'admin_document_new')] #[Route('/new', name: 'admin_document_new', methods: ['GET', 'POST'])]
public function new( public function new(
Request $request, Request $request,
DocumentService $documentService, DocumentService $documentService,
FormatText $formatText, FormatText $formatText,
IngestJobService $jobService, IngestJobService $jobService,
ParameterBagInterface $params ParameterBagInterface $params,
EntityManagerInterface $em,
): Response { ): Response {
if (!$request->isMethod('POST')) { if (!$request->isMethod('POST')) {
return $this->render('admin/document/new.html.twig'); return $this->render('admin/document/new.html.twig');
} }
/** @var UploadedFile|null $file */ if (!$this->isCsrfTokenValid('create_document', (string) $request->request->get('_token'))) {
$file = $request->files->get('file'); $this->addFlash('danger', 'Ungültiges CSRF-Token.');
if (!$file instanceof UploadedFile) {
throw new \InvalidArgumentException('No valid file uploaded.');
}
$rawTitle = $request->request->get('title');
$title = is_string($rawTitle) && $rawTitle !== ''
? $rawTitle
: $formatText->slugify($file->getClientOriginalName());
if (!$title) {
$this->addFlash('error', 'Titel ist erforderlich.');
return $this->redirectToRoute('admin_document_new'); return $this->redirectToRoute('admin_document_new');
} }
$uploadDir = (string)$params->get('mto.vector.data.upload.path'); /** @var UploadedFile|null $file */
$this->ensureDir($uploadDir); $file = $request->files->get('file');
if (!$file instanceof UploadedFile) {
$this->addFlash('danger', 'Keine gültige Datei hochgeladen.');
$newFilename = uniqid('', true) . '_' . $file->getClientOriginalName(); return $this->redirectToRoute('admin_document_new');
try {
$file->move($uploadDir, $newFilename);
} catch (FileException) {
throw new \RuntimeException('File upload failed.');
} }
$filePath = $uploadDir . '/' . $newFilename; $title = $this->resolveDocumentTitle($request, $file, $formatText);
if ($title === '') {
$this->addFlash('danger', 'Titel ist erforderlich.');
$document = $documentService->createDocument( return $this->redirectToRoute('admin_document_new');
$title, }
$filePath,
$this->getUser()
);
$user = $this->requireUser();
$uploadDir = trim((string) $params->get('mto.vector.data.upload.path'));
try {
$this->ensureDir($uploadDir);
$filePath = $this->moveUploadedFile($file, $uploadDir, $formatText);
$document = $documentService->createDocument($title, $filePath, $user);
$version = $document->getCurrentVersion(); $version = $document->getCurrentVersion();
if (!$version instanceof DocumentVersion) { if (!$version instanceof DocumentVersion) {
$this->addFlash('danger', 'Dokument erstellt, aber es wurde keine aktuelle Version erzeugt.'); throw new \RuntimeException('Dokument erstellt, aber keine aktuelle Version vorhanden.');
return $this->redirectToRoute('admin_documents');
} }
$job = $jobService->startJob( $job = $jobService->startJob(
IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE, IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE,
$this->getUser(), $user,
$version->getDocument()->getId(), $version->getDocument()->getId(),
$version->getId(), $version->getId(),
null, null,
IngestJob::STATUS_QUEUED IngestJob::STATUS_QUEUED
); );
$logFile = $this->prepareJobLogFile((string) $job->getId());
$job->setLogPath($logFile);
$em->flush();
if (!$this->canExec()) { if (!$this->canExec()) {
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).'); $jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
$this->addFlash('danger', 'Dokument erstellt, aber Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).'); $this->addFlash('danger', 'Dokument erstellt, aber Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
return $this->redirectToRoute('admin_documents'); return $this->redirectToRoute('admin_documents');
} }
$this->startIngestJob((string)$job->getId()); $this->startIngestJob((string) $job->getId(), $logFile);
return $this->redirectToRoute('admin_job_show', [ return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(), 'id' => (string) $job->getId(),
]); ]);
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Dokument konnte nicht erstellt werden.'));
return $this->redirectToRoute('admin_document_new');
}
} }
#[Route('/{id}/version/new', name: 'admin_document_version_new', requirements: ['id' => '[0-9a-fA-F\-]{36}'])] #[Route('/{id}/version/new', name: 'admin_document_version_new', requirements: ['id' => '[0-9a-fA-F\-]{36}'], methods: ['GET', 'POST'])]
public function newVersion( public function newVersion(
string $id, string $id,
Request $request, Request $request,
EntityManagerInterface $em, EntityManagerInterface $em,
DocumentService $documentService, DocumentService $documentService,
ParameterBagInterface $params ParameterBagInterface $params,
FormatText $formatText,
): Response { ): Response {
$document = $em->getRepository(Document::class)->find($id); $document = $this->findDocument($id, $em);
if (!$document) {
throw $this->createNotFoundException();
}
if (!$request->isMethod('POST')) { if (!$request->isMethod('POST')) {
return $this->render('admin/document/new_version.html.twig', [ return $this->render('admin/document/new_version.html.twig', [
@@ -162,31 +158,33 @@ class DocumentController extends AbstractController
]); ]);
} }
/** @var UploadedFile|null $file */ if (!$this->isCsrfTokenValid('create_document_version_' . $id, (string) $request->request->get('_token'))) {
$file = $request->files->get('file'); $this->addFlash('danger', 'Ungültiges CSRF-Token.');
if (!$file instanceof UploadedFile) {
$this->addFlash('error', 'Datei ist erforderlich.');
return $this->redirectToRoute('admin_document_version_new', ['id' => $id]); return $this->redirectToRoute('admin_document_version_new', ['id' => $id]);
} }
$uploadDir = (string)$params->get('mto.vector.data.upload.path'); /** @var UploadedFile|null $file */
$this->ensureDir($uploadDir); $file = $request->files->get('file');
if (!$file instanceof UploadedFile) {
$this->addFlash('danger', 'Datei ist erforderlich.');
$newFilename = uniqid('', true) . '_' . $file->getClientOriginalName(); return $this->redirectToRoute('admin_document_version_new', ['id' => $id]);
try {
$file->move($uploadDir, $newFilename);
} catch (FileException) {
throw new \RuntimeException('File upload failed.');
} }
$filePath = $uploadDir . '/' . $newFilename; try {
$user = $this->requireUser();
$uploadDir = trim((string) $params->get('mto.vector.data.upload.path'));
$this->ensureDir($uploadDir);
$filePath = $this->moveUploadedFile($file, $uploadDir, $formatText);
$documentService->addVersion( $documentService->addVersion($document, $filePath, $user);
$document, $this->addFlash('success', 'Neue Dokumentversion wurde hochgeladen.');
$filePath, } catch (\Throwable $e) {
$this->getUser() $this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Neue Dokumentversion konnte nicht erstellt werden.'));
);
return $this->redirectToRoute('admin_document_version_new', ['id' => $id]);
}
return $this->redirectToRoute('admin_document_show', ['id' => $id]); return $this->redirectToRoute('admin_document_show', ['id' => $id]);
} }
@@ -208,44 +206,45 @@ class DocumentController extends AbstractController
throw $this->createAccessDeniedException(); throw $this->createAccessDeniedException();
} }
$version = $em->getRepository(DocumentVersion::class)->find($versionId); $version = $this->findDocumentVersion($versionId, $em);
if (!$version) {
throw $this->createNotFoundException();
}
try { try {
$documentService->activateVersion($version); $documentService->activateVersion($version);
$job = $jobService->startJob( $job = $jobService->startJob(
IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE, IngestJob::TYPE_DOCUMENT_VERSION_ACTIVATE,
$this->getUser(), $this->requireUser(),
$version->getDocument()->getId(), $version->getDocument()->getId(),
$version->getId(), $version->getId(),
null, null,
IngestJob::STATUS_QUEUED IngestJob::STATUS_QUEUED
); );
$logFile = $this->prepareJobLogFile((string) $job->getId());
$job->setLogPath($logFile);
$em->flush();
if (!$this->canExec()) { if (!$this->canExec()) {
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).'); $jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
$this->addFlash('danger', 'Aktivierung ok, aber Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).'); $this->addFlash('danger', 'Aktivierung ok, aber Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
return $this->redirectToRoute('admin_document_show', [ return $this->redirectToRoute('admin_document_show', [
'id' => $version->getDocument()->getId(), 'id' => (string) $version->getDocument()->getId(),
]); ]);
} }
$this->startIngestJob((string)$job->getId()); $this->startIngestJob((string) $job->getId(), $logFile);
$this->addFlash('success', 'Version aktiviert. Ingest-Job wurde erstellt und gestartet.'); $this->addFlash('success', 'Version aktiviert. Ingest-Job wurde erstellt und gestartet.');
return $this->redirectToRoute('admin_job_show', [ return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(), 'id' => (string) $job->getId(),
]); ]);
} catch (\Throwable $e) { } catch (\Throwable $e) {
$this->addFlash('danger', 'Aktivierung/Re-Ingest fehlgeschlagen: ' . $e->getMessage()); $this->addFlash('danger', 'Aktivierung/Re-Ingest fehlgeschlagen: ' . $this->buildSafeErrorMessage($e, 'Unbekannter Fehler.'));
} }
return $this->redirectToRoute('admin_document_show', [ return $this->redirectToRoute('admin_document_show', [
'id' => $version->getDocument()->getId(), 'id' => (string) $version->getDocument()->getId(),
]); ]);
} }
@@ -260,111 +259,131 @@ class DocumentController extends AbstractController
Request $request, Request $request,
EntityManagerInterface $em, EntityManagerInterface $em,
IngestJobService $jobService, IngestJobService $jobService,
): ?RedirectResponse { ): RedirectResponse {
if (!$this->isCsrfTokenValid('ingest_version_' . $versionId, (string) $request->request->get('_token'))) { if (!$this->isCsrfTokenValid('ingest_version_' . $versionId, (string) $request->request->get('_token'))) {
throw $this->createAccessDeniedException(); throw $this->createAccessDeniedException();
} }
$version = $em->getRepository(DocumentVersion::class)->find($versionId); $version = $this->findDocumentVersion($versionId, $em);
if (!$version) {
throw $this->createNotFoundException();
}
/** @var IngestJob|null $existing */ /** @var IngestJob|null $existing */
$existing = $em->getRepository(IngestJob::class) $existing = $em->getRepository(IngestJob::class)
->findOneBy( ->findOneBy(
['documentVersionId' => $version->getId()], ['documentVersionId' => $version->getId()],
['startedAt' => 'DESC'] ['startedAt' => 'DESC', 'id' => 'DESC']
); );
if ($existing && $existing->getStartedAt() > new \DateTimeImmutable('-3 seconds')) { if (
return null; $existing instanceof IngestJob
&& $existing->getStartedAt() > new \DateTimeImmutable('-' . self::INGEST_DUPLICATE_WINDOW_SECONDS . ' seconds')
&& in_array($existing->getStatus(), [IngestJob::STATUS_QUEUED, IngestJob::STATUS_RUNNING], true)
) {
$this->addFlash('info', 'Für diese Version läuft bereits ein aktueller Ingest-Job.');
return $this->redirectToRoute('admin_job_show', [
'id' => (string) $existing->getId(),
]);
} }
$job = $jobService->startJob( $job = $jobService->startJob(
IngestJob::TYPE_DOCUMENT, IngestJob::TYPE_DOCUMENT,
$this->getUser(), $this->requireUser(),
$version->getDocument()->getId(), $version->getDocument()->getId(),
$version->getId(), $version->getId(),
null, null,
IngestJob::STATUS_QUEUED IngestJob::STATUS_QUEUED
); );
$logFile = $this->prepareJobLogFile((string) $job->getId());
$job->setLogPath($logFile);
$em->flush();
if (!$this->canExec()) { if (!$this->canExec()) {
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).'); $jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
$this->addFlash('error', 'Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).'); $this->addFlash('danger', 'Ingest konnte nicht asynchron gestartet werden (exec deaktiviert).');
return $this->redirectToRoute('admin_document_show', [ return $this->redirectToRoute('admin_document_show', [
'id' => $version->getDocument()->getId(), 'id' => (string) $version->getDocument()->getId(),
]); ]);
} }
$this->startIngestJob((string)$job->getId()); try {
$this->startIngestJob((string) $job->getId(), $logFile);
} catch (\Throwable $e) {
$jobService->markFailed($job, 'Ingest async start failed: ' . $e->getMessage());
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Ingest konnte nicht gestartet werden.'));
return $this->redirectToRoute('admin_document_show', [
'id' => (string) $version->getDocument()->getId(),
]);
}
return $this->redirectToRoute('admin_job_show', [ return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(), 'id' => (string) $job->getId(),
]); ]);
} }
#[Route( #[Route('/reset', name: 'admin_document_reset', methods: ['POST'])]
'/reset', public function resetCompleteSystem(
name: 'admin_document_reset', Request $request,
methods: ['POST'] ParameterBagInterface $params,
)] Connection $connection,
public function resetCompleteSystem(ParameterBagInterface $params, Connection $connection): ?RedirectResponse ): RedirectResponse {
{ $this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
if (!$this->canExec()) {
$this->addFlash('danger', 'Der Reset konnte nicht gestartet werden (exec deaktiviert).'); if (!$this->isCsrfTokenValid('system_reset', (string) $request->request->get('_token'))) {
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
return $this->redirectToRoute('admin_dashboard'); return $this->redirectToRoute('admin_dashboard');
} }
@unlink((string)$params->get('mto.knowledge.ndjson')); if (!$this->canExec()) {
@unlink((string)$params->get('mto.knowledge.vector_index')); $this->addFlash('danger', 'Der Reset konnte nicht gestartet werden (exec deaktiviert).');
@unlink((string)$params->get('mto.knowledge.vector_index_meta'));
@unlink((string)$params->get('mto.knowledge.index_meta'));
@unlink((string)$params->get('mto.runtime.meta'));
@unlink((string)$params->get('mto.knowledge.tags_ndjson')); return $this->redirectToRoute('admin_dashboard');
@unlink((string)$params->get('mto.knowledge.vector_tags_index')); }
@unlink((string)$params->get('mto.knowledge.vector_tags_index_meta'));
$uploadDir = (string)$params->get('mto.knowledge.upload'); foreach ([
'mto.knowledge.ndjson',
'mto.knowledge.vector_index',
'mto.knowledge.vector_index_meta',
'mto.knowledge.index_meta',
'mto.runtime.meta',
'mto.knowledge.tags_ndjson',
'mto.knowledge.vector_tags_index',
'mto.knowledge.vector_tags_index_meta',
] as $parameterName) {
$path = trim((string) $params->get($parameterName));
if ($path !== '' && is_file($path)) {
@unlink($path);
}
}
$uploadDir = trim((string) $params->get('mto.knowledge.upload'));
if ($uploadDir !== '' && is_dir($uploadDir)) { if ($uploadDir !== '' && is_dir($uploadDir)) {
exec('rm -rf ' . escapeshellarg($uploadDir)); exec('rm -rf ' . escapeshellarg($uploadDir));
} }
$lockDir = (string)$params->get('mto.locks.dir'); $lockDir = trim((string) $params->get('mto.locks.dir'));
if ($lockDir !== '' && is_dir($lockDir)) { if ($lockDir !== '' && is_dir($lockDir)) {
exec('rm -rf ' . escapeshellarg($lockDir)); exec('rm -rf ' . escapeshellarg($lockDir));
} }
$sql = ' $sql = <<<'SQL'
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.document;
SET FOREIGN_KEY_CHECKS = 1;
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.document_version;
SET FOREIGN_KEY_CHECKS = 1;
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.ingest_job;
SET FOREIGN_KEY_CHECKS = 1;
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.knowledge_tag;
SET FOREIGN_KEY_CHECKS = 1;
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.tag_rebuild_job;
SET FOREIGN_KEY_CHECKS = 1;
SET FOREIGN_KEY_CHECKS = 0; SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE db.document_tag; TRUNCATE TABLE db.document_tag;
TRUNCATE TABLE db.tag_rebuild_job;
TRUNCATE TABLE db.knowledge_tag;
TRUNCATE TABLE db.ingest_job;
TRUNCATE TABLE db.document_version;
TRUNCATE TABLE db.document;
SET FOREIGN_KEY_CHECKS = 1; SET FOREIGN_KEY_CHECKS = 1;
'; SQL;
$connection->executeQuery($sql);
$connection->executeStatement($sql);
$this->addFlash('success', 'Das System wurde erfolgreich zurückgesetzt.'); $this->addFlash('success', 'Das System wurde erfolgreich zurückgesetzt.');
return $this->redirectToRoute('admin_dashboard'); return $this->redirectToRoute('admin_dashboard');
} }
@@ -381,44 +400,49 @@ class DocumentController extends AbstractController
IngestJobService $jobService, IngestJobService $jobService,
LockService $lockService, LockService $lockService,
): RedirectResponse { ): RedirectResponse {
$this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
if (!$this->isCsrfTokenValid('delete_document_' . $id, (string) $request->request->get('_token'))) { if (!$this->isCsrfTokenValid('delete_document_' . $id, (string) $request->request->get('_token'))) {
throw $this->createAccessDeniedException(); throw $this->createAccessDeniedException();
} }
try { $document = $this->findDocument($id, $em);
$uuid = Uuid::fromString($id);
} catch (\Exception) {
throw $this->createNotFoundException();
}
/** @var Document|null $document */
$document = $em->getRepository(Document::class)->find($uuid);
if (!$document) {
throw $this->createNotFoundException();
}
if (!$lockService->acquire()) { if (!$lockService->acquire()) {
$this->addFlash('danger', 'Ein Ingest-Job läuft bereits. Löschen derzeit nicht möglich.'); $this->addFlash('danger', 'Ein Ingest-Job läuft bereits. Löschen derzeit nicht möglich.');
return $this->redirectToRoute('admin_documents'); return $this->redirectToRoute('admin_documents');
} }
$lockService->release(); $lockService->release();
$job = $jobService->startJob( $job = $jobService->startJob(
IngestJob::TYPE_DOCUMENT_DELETE, IngestJob::TYPE_DOCUMENT_DELETE,
$this->getUser(), $this->requireUser(),
$document->getId(), $document->getId(),
null, null,
null, null,
IngestJob::STATUS_QUEUED IngestJob::STATUS_QUEUED
); );
$logFile = $this->prepareJobLogFile((string) $job->getId());
$job->setLogPath($logFile);
$em->flush();
if (!$this->canExec()) { if (!$this->canExec()) {
$jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).'); $jobService->markFailed($job, 'Server configuration does not allow background execution (exec disabled).');
$this->addFlash('danger', 'Löschen konnte nicht gestartet werden (exec deaktiviert).'); $this->addFlash('danger', 'Löschen konnte nicht gestartet werden (exec deaktiviert).');
return $this->redirectToRoute('admin_documents'); return $this->redirectToRoute('admin_documents');
} }
$this->startIngestJob((string)$job->getId()); try {
$this->startIngestJob((string) $job->getId(), $logFile);
} catch (\Throwable $e) {
$jobService->markFailed($job, 'Delete async start failed: ' . $e->getMessage());
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Löschvorgang konnte nicht gestartet werden.'));
return $this->redirectToRoute('admin_documents');
}
$this->addFlash('success', 'Löschvorgang gestartet. Dokument wird nach Index-Rebuild entfernt.'); $this->addFlash('success', 'Löschvorgang gestartet. Dokument wird nach Index-Rebuild entfernt.');
@@ -427,10 +451,6 @@ class DocumentController extends AbstractController
]); ]);
} }
// =========================================================
// Helpers
// =========================================================
private function canExec(): bool private function canExec(): bool
{ {
if (!function_exists('exec')) { if (!function_exists('exec')) {
@@ -443,6 +463,7 @@ class DocumentController extends AbstractController
} }
$list = array_map('trim', explode(',', $disabled)); $list = array_map('trim', explode(',', $disabled));
return !in_array('exec', $list, true); return !in_array('exec', $list, true);
} }
@@ -452,34 +473,209 @@ class DocumentController extends AbstractController
throw new \RuntimeException('Upload directory not configured.'); throw new \RuntimeException('Upload directory not configured.');
} }
if (!is_dir($dir) && !mkdir($dir, 0777, true) && !is_dir($dir)) { if (!is_dir($dir) && !mkdir($dir, 0775, true) && !is_dir($dir)) {
throw new \RuntimeException('Unable to create upload directory.'); throw new \RuntimeException('Unable to create upload directory.');
} }
} }
private function startIngestJob(string $jobId): void private function moveUploadedFile(UploadedFile $file, string $uploadDir, FormatText $formatText): string
{ {
$projectDir = (string)$this->getParameter('kernel.project_dir'); $originalName = trim((string) $file->getClientOriginalName());
$baseName = pathinfo($originalName !== '' ? $originalName : 'document', PATHINFO_FILENAME);
$extension = strtolower((string) $file->getClientOriginalExtension());
$safeBaseName = $formatText->slugify($baseName !== '' ? $baseName : 'document');
if ($safeBaseName === '') {
$safeBaseName = 'document';
}
$newFilename = uniqid('', true) . '_' . $safeBaseName;
if ($extension !== '') {
$newFilename .= '.' . $extension;
}
try {
$file->move($uploadDir, $newFilename);
} catch (FileException) {
throw new \RuntimeException('File upload failed.');
}
return rtrim($uploadDir, '/') . '/' . $newFilename;
}
private function resolveDocumentTitle(Request $request, UploadedFile $file, FormatText $formatText): string
{
$rawTitle = trim((string) $request->request->get('title', ''));
if ($rawTitle !== '') {
return $rawTitle;
}
$originalName = trim((string) $file->getClientOriginalName());
$baseName = pathinfo($originalName, PATHINFO_FILENAME);
return trim((string) $formatText->slugify($baseName !== '' ? $baseName : $originalName));
}
private function startIngestJob(string $jobId, string $logFile): void
{
$projectDir = $this->resolveProjectDir();
$console = $projectDir . '/bin/console'; $console = $projectDir . '/bin/console';
$logDir = $projectDir . '/var/log/ingest'; if (!is_file($console)) {
if (!is_dir($logDir)) { throw new \RuntimeException('bin/console not found: ' . $console);
@mkdir($logDir, 0777, true);
} }
$logFile = $logDir . '/job_' . $jobId . '.log';
// Wichtig: CLI-PHP verwenden, nicht PHP_BINARY aus FPM $php = $this->resolvePhpBinary();
$php = 'php';
$cmd = sprintf( $cmd = sprintf(
'%s %s --no-interaction %s %s >> %s 2>&1 &', 'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 & echo $!',
escapeshellcmd($php), escapeshellarg($projectDir),
escapeshellarg($php),
escapeshellarg($console), escapeshellarg($console),
escapeshellarg('mto:agent:ingest:run'), escapeshellarg('mto:agent:ingest:run'),
escapeshellarg($jobId), escapeshellarg($jobId),
escapeshellarg($logFile), escapeshellarg($logFile),
); );
exec($cmd); $output = [];
$exitCode = 0;
@exec($cmd, $output, $exitCode);
if ($exitCode !== 0) {
throw new \RuntimeException('Background ingest bootstrap failed with exit code ' . $exitCode . '.');
}
}
private function prepareJobLogFile(string $jobId): string
{
$projectDir = $this->resolveProjectDir();
$logDir = $projectDir . '/var/log/ingest';
$this->ensureDir($logDir);
return $logDir . '/job_' . $jobId . '.log';
}
private function resolveProjectDir(): string
{
$projectDir = trim((string) $this->getParameter('kernel.project_dir'));
if ($projectDir === '' || !is_dir($projectDir)) {
throw new \RuntimeException('Project directory is invalid.');
}
return rtrim($projectDir, '/');
}
private function resolvePhpBinary(): string
{
$envCandidates = [
trim((string) ($_SERVER['PHP_CLI_BINARY'] ?? '')),
trim((string) ($_ENV['PHP_CLI_BINARY'] ?? '')),
trim((string) getenv('PHP_CLI_BINARY')),
];
foreach ($envCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$phpBinary = defined('PHP_BINARY') ? trim((string) PHP_BINARY) : '';
if ($this->isValidCliPhpBinary($phpBinary)) {
return $phpBinary;
}
$fallbackCandidates = [
'/usr/bin/php',
'/usr/local/bin/php',
'/bin/php',
'/opt/homebrew/bin/php',
];
foreach ($fallbackCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$whichPhp = trim((string) @shell_exec('command -v php 2>/dev/null'));
if ($this->isValidCliPhpBinary($whichPhp)) {
return $whichPhp;
}
throw new \RuntimeException(
'Could not resolve a CLI PHP binary. Set PHP_CLI_BINARY explicitly, e.g. /usr/bin/php.'
);
}
private function isValidCliPhpBinary(string $path): bool
{
$path = trim($path);
if ($path === '' || !is_file($path) || !is_executable($path)) {
return false;
}
$basename = strtolower(basename($path));
if (str_contains($basename, 'fpm') || str_contains($basename, 'cgi')) {
return false;
}
return true;
}
private function findDocument(string $id, EntityManagerInterface $em): Document
{
try {
$uuid = Uuid::fromString(trim($id));
} catch (\Throwable) {
throw new NotFoundHttpException();
}
/** @var Document|null $document */
$document = $em->getRepository(Document::class)->find($uuid);
if (!$document instanceof Document) {
throw new NotFoundHttpException();
}
return $document;
}
private function findDocumentVersion(string $versionId, EntityManagerInterface $em): DocumentVersion
{
try {
$uuid = Uuid::fromString(trim($versionId));
} catch (\Throwable) {
throw new NotFoundHttpException();
}
/** @var DocumentVersion|null $version */
$version = $em->getRepository(DocumentVersion::class)->find($uuid);
if (!$version instanceof DocumentVersion) {
throw new NotFoundHttpException();
}
return $version;
}
private function requireUser(): User
{
$user = $this->getUser();
if (!$user instanceof User) {
throw new \RuntimeException('No authenticated user available.');
}
return $user;
}
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
{
$message = trim($e->getMessage());
return $message !== '' ? $message : $fallback;
} }
} }

View File

@@ -19,44 +19,97 @@ final class DocumentTagController extends AbstractController
#[Route('/{id}/tags', name: 'admin_document_tags_edit', methods: ['GET'])] #[Route('/{id}/tags', name: 'admin_document_tags_edit', methods: ['GET'])]
public function edit(string $id, DocumentTagAdminService $svc): Response public function edit(string $id, DocumentTagAdminService $svc): Response
{ {
$id = trim($id);
try {
$data = $svc->getEditData($id); $data = $svc->getEditData($id);
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Dokument-Tags konnten nicht geladen werden.'));
return $this->redirectToRoute('admin_documents');
}
return $this->render('admin/document_tags/edit.html.twig', [ return $this->render('admin/document_tags/edit.html.twig', [
'document' => $data['document'], ...$data,
'allTags' => $data['allTags'], ...$this->buildJobStatusViewData(),
'latestJob' => $data['latestJob'],
'statusRunning' => TagRebuildJob::STATUS_RUNNING,
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
'statusFailed' => TagRebuildJob::STATUS_FAILED,
]); ]);
} }
#[Route('/{id}/tags/save', name: 'admin_document_tags_save', methods: ['POST'])] #[Route('/{id}/tags/save', name: 'admin_document_tags_save', methods: ['POST'])]
public function save(string $id, Request $request, DocumentTagAdminService $svc): RedirectResponse public function save(string $id, Request $request, DocumentTagAdminService $svc): RedirectResponse
{ {
$selected = $request->request->all('tag_ids') ?? []; $id = trim($id);
if (!$this->isCsrfTokenValid('admin_document_tags_save_' . $id, (string) $request->request->get('_token'))) {
$this->addFlash('danger', 'Ungültiges CSRF-Token.');
return $this->redirectToRoute('admin_document_tags_edit', ['id' => $id]);
}
try { try {
$svc->saveTags($id, $selected); $svc->saveTags($id, $this->normalizeStringList($request->request->all('tag_ids')));
$this->addFlash('success', 'Tags wurden aktualisiert. Rebuild läuft im Hintergrund.'); $this->addFlash('success', 'Tags wurden aktualisiert. Rebuild läuft im Hintergrund.');
} catch (\Throwable $e) { } catch (\Throwable $e) {
$this->addFlash('danger', $e->getMessage()); $this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tags konnten nicht aktualisiert werden.'));
} }
return $this->redirectToRoute('admin_document_tags_edit', ['id' => $id]); return $this->redirectToRoute('admin_document_tags_edit', ['id' => $id]);
} }
/**
* Wichtig: Ohne extra "admin/" im Pfad, weil Prefix schon /admin/documents ist.
* Ergebnis: /admin/documents/tags/status
*/
#[Route('/tags/status', name: 'admin_tags_status', methods: ['GET'])] #[Route('/tags/status', name: 'admin_tags_status', methods: ['GET'])]
public function status(DocumentTagAdminService $svc): JsonResponse public function status(DocumentTagAdminService $svc): JsonResponse
{ {
$status = $svc->getLatestRebuildStatus();
return $this->json([ return $this->json([
'status' => $svc->getLatestRebuildStatus(), 'status' => $status,
'hasActiveJob' => $status === TagRebuildJob::STATUS_RUNNING
|| $status === TagRebuildJob::STATUS_QUEUED,
]); ]);
} }
/**
* @param mixed $values
* @return list<string>
*/
private function normalizeStringList(mixed $values): array
{
if (!is_array($values)) {
return [];
}
$normalized = [];
foreach ($values as $value) {
$value = trim((string) $value);
if ($value === '') {
continue;
}
$normalized[] = $value;
}
return array_values(array_unique($normalized));
}
/**
* @return array<string, string>
*/
private function buildJobStatusViewData(): array
{
return [
'statusRunning' => TagRebuildJob::STATUS_RUNNING,
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
'statusFailed' => TagRebuildJob::STATUS_FAILED,
];
}
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
{
$message = trim($e->getMessage());
return $message !== '' ? $message : $fallback;
}
} }

View File

@@ -1,46 +1,44 @@
<?php <?php
declare(strict_types=1);
namespace App\Controller\Admin; namespace App\Controller\Admin;
use App\Entity\IngestJob; use App\Entity\IngestJob;
use App\Service\IngestJobService; use App\Service\IngestJobService;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController; use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\JsonResponse;
use Symfony\Component\HttpFoundation\RedirectResponse;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response; use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\HttpKernel\Exception\NotFoundHttpException; use Symfony\Component\HttpKernel\Exception\NotFoundHttpException;
use Symfony\Component\Routing\Attribute\Route; use Symfony\Component\Routing\Attribute\Route;
use Symfony\Component\HttpFoundation\RedirectResponse;
use Symfony\Component\HttpFoundation\JsonResponse;
#[Route('/admin/jobs')] #[Route('/admin/jobs')]
class IngestJobController extends AbstractController final class IngestJobController extends AbstractController
{ {
#[Route('', name: 'admin_jobs')] #[Route('', name: 'admin_jobs', methods: ['GET'])]
public function index(EntityManagerInterface $em): Response public function index(EntityManagerInterface $em): Response
{ {
$jobs = $em->getRepository(IngestJob::class) $jobs = $em->getRepository(IngestJob::class)
->findBy([], ['startedAt' => 'DESC']); ->findBy([], ['startedAt' => 'DESC', 'id' => 'DESC']);
return $this->render('admin/job/index.html.twig', [ return $this->render('admin/job/index.html.twig', [
'jobs' => $jobs 'jobs' => $jobs,
]); ]);
} }
#[Route( #[Route(
'/{id}', '/{id}',
name: 'admin_job_show', name: 'admin_job_show',
requirements: ['id' => '[0-9a-fA-F\-]{36}'] requirements: ['id' => '[0-9a-fA-F\-]{36}'],
methods: ['GET']
)] )]
public function show(string $id, EntityManagerInterface $em): Response public function show(string $id, EntityManagerInterface $em): Response
{ {
$job = $em->getRepository(IngestJob::class)->find($id);
if (!$job) {
throw new NotFoundHttpException();
}
return $this->render('admin/job/show.html.twig', [ return $this->render('admin/job/show.html.twig', [
'job' => $job 'job' => $this->findJob($id, $em),
]); ]);
} }
@@ -54,12 +52,7 @@ class IngestJobController extends AbstractController
{ {
$this->denyAccessUnlessGranted('ROLE_USER'); $this->denyAccessUnlessGranted('ROLE_USER');
/** @var IngestJob|null $job */ $job = $this->findJob($id, $em);
$job = $em->getRepository(IngestJob::class)->find($id);
if (!$job) {
throw new NotFoundHttpException();
}
return $this->json([ return $this->json([
'id' => (string) $job->getId(), 'id' => (string) $job->getId(),
@@ -68,19 +61,35 @@ class IngestJobController extends AbstractController
'startedAt' => $job->getStartedAt()->format(DATE_ATOM), 'startedAt' => $job->getStartedAt()->format(DATE_ATOM),
'finishedAt' => $job->getFinishedAt()?->format(DATE_ATOM), 'finishedAt' => $job->getFinishedAt()?->format(DATE_ATOM),
'errorMessage' => $job->getErrorMessage(), 'errorMessage' => $job->getErrorMessage(),
'logPath' => $job->getLogPath(),
]); ]);
} }
#[Route('/global-reindex', name: 'admin_global_reindex', methods: ['POST'])] #[Route('/global-reindex', name: 'admin_global_reindex', methods: ['POST'])]
public function globalReindex( public function globalReindex(
Request $request,
IngestJobService $jobService, IngestJobService $jobService,
EntityManagerInterface $em,
): RedirectResponse { ): RedirectResponse {
$this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN'); $this->denyAccessUnlessGranted('ROLE_SUPER_ADMIN');
// --------------------------------------------------------- if (!$this->isCsrfTokenValid('global_reindex', (string) $request->request->get('_token'))) {
// 1) Job anlegen (QUEUED) $this->addFlash('danger', 'Ungültiges CSRF-Token.');
// ---------------------------------------------------------
return $this->redirectToRoute('admin_jobs');
}
try {
$projectDir = $this->resolveProjectDir();
$console = $projectDir . '/bin/console';
if (!is_file($console)) {
throw new \RuntimeException('bin/console not found: ' . $console);
}
$logDir = $projectDir . '/var/log/ingest';
$this->ensureDirectoryExists($logDir);
$job = $jobService->startJob( $job = $jobService->startJob(
IngestJob::TYPE_GLOBAL_REINDEX, IngestJob::TYPE_GLOBAL_REINDEX,
$this->getUser(), $this->getUser(),
@@ -90,36 +99,147 @@ class IngestJobController extends AbstractController
IngestJob::STATUS_QUEUED IngestJob::STATUS_QUEUED
); );
// ---------------------------------------------------------
// 2) CLI im Hintergrund starten
// ---------------------------------------------------------
$projectDir = (string)$this->getParameter('kernel.project_dir');
$console = $projectDir . '/bin/console';
$logDir = $projectDir . '/var/log/ingest';
if (!is_dir($logDir)) {
@mkdir($logDir, 0777, true);
}
$logFile = $logDir . '/job_' . (string) $job->getId() . '.log'; $logFile = $logDir . '/job_' . (string) $job->getId() . '.log';
$job->setLogPath($logFile);
$em->flush();
$php = 'php'; $phpBinary = $this->resolvePhpBinary();
$cmd = sprintf( $cmd = sprintf(
'%s %s --no-interaction %s %s >> %s 2>&1 &', 'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 & echo $!',
escapeshellcmd($php), escapeshellarg($projectDir),
escapeshellarg($phpBinary),
escapeshellarg($console), escapeshellarg($console),
escapeshellarg('mto:agent:ingest:run'), escapeshellarg('mto:agent:ingest:run'),
escapeshellarg((string) $job->getId()), escapeshellarg((string) $job->getId()),
escapeshellarg($logFile), escapeshellarg($logFile),
); );
exec($cmd); $output = [];
$exitCode = 0;
@exec($cmd, $output, $exitCode);
if ($exitCode !== 0) {
$job->markFailed('Global reindex async bootstrap failed with exit code ' . $exitCode . '.');
$em->flush();
$this->addFlash('danger', 'Global Reindex konnte nicht im Hintergrund gestartet werden.');
// ---------------------------------------------------------
// 3) Redirect auf Job-Detailseite (Loader)
// ---------------------------------------------------------
return $this->redirectToRoute('admin_job_show', [ return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(), 'id' => (string) $job->getId(),
]); ]);
} }
$this->addFlash('success', 'Global Reindex wurde gestartet.');
return $this->redirectToRoute('admin_job_show', [
'id' => (string) $job->getId(),
]);
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Global Reindex konnte nicht gestartet werden.'));
return $this->redirectToRoute('admin_jobs');
}
}
private function findJob(string $id, EntityManagerInterface $em): IngestJob
{
$id = trim($id);
/** @var IngestJob|null $job */
$job = $em->getRepository(IngestJob::class)->find($id);
if (!$job instanceof IngestJob) {
throw new NotFoundHttpException();
}
return $job;
}
private function resolveProjectDir(): string
{
$projectDir = trim((string) $this->getParameter('kernel.project_dir'));
if ($projectDir === '' || !is_dir($projectDir)) {
throw new \RuntimeException('Project directory is invalid.');
}
return rtrim($projectDir, '/');
}
private function resolvePhpBinary(): string
{
$envCandidates = [
trim((string) ($_SERVER['PHP_CLI_BINARY'] ?? '')),
trim((string) ($_ENV['PHP_CLI_BINARY'] ?? '')),
trim((string) getenv('PHP_CLI_BINARY')),
];
foreach ($envCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$phpBinary = defined('PHP_BINARY') ? trim((string) PHP_BINARY) : '';
if ($this->isValidCliPhpBinary($phpBinary)) {
return $phpBinary;
}
$fallbackCandidates = [
'/usr/bin/php',
'/usr/local/bin/php',
'/bin/php',
'/opt/homebrew/bin/php',
];
foreach ($fallbackCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$whichPhp = trim((string) @shell_exec('command -v php 2>/dev/null'));
if ($this->isValidCliPhpBinary($whichPhp)) {
return $whichPhp;
}
throw new \RuntimeException(
'Could not resolve a CLI PHP binary. Set PHP_CLI_BINARY explicitly, e.g. /usr/bin/php.'
);
}
private function isValidCliPhpBinary(string $path): bool
{
$path = trim($path);
if ($path === '' || !is_file($path) || !is_executable($path)) {
return false;
}
$basename = strtolower(basename($path));
if (str_contains($basename, 'fpm') || str_contains($basename, 'cgi')) {
return false;
}
return true;
}
private function ensureDirectoryExists(string $dir): void
{
if (is_dir($dir)) {
return;
}
if (!@mkdir($dir, 0775, true) && !is_dir($dir)) {
throw new \RuntimeException('Could not create ingest log directory.');
}
}
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
{
$message = trim($e->getMessage());
return $message !== '' ? $message : $fallback;
}
} }

View File

@@ -6,6 +6,7 @@ namespace App\Controller\Admin;
use App\Entity\TagRebuildJob; use App\Entity\TagRebuildJob;
use App\Service\Admin\TagAdminService; use App\Service\Admin\TagAdminService;
use App\Tag\TagTypes;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController; use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\RedirectResponse; use Symfony\Component\HttpFoundation\RedirectResponse;
use Symfony\Component\HttpFoundation\Request; use Symfony\Component\HttpFoundation\Request;
@@ -18,25 +19,18 @@ final class TagController extends AbstractController
#[Route('', name: 'admin_tags_index', methods: ['GET'])] #[Route('', name: 'admin_tags_index', methods: ['GET'])]
public function index(TagAdminService $svc): Response public function index(TagAdminService $svc): Response
{ {
$data = $svc->getIndexData();
return $this->render('admin/tag/index.html.twig', [ return $this->render('admin/tag/index.html.twig', [
...$data, ...$svc->getIndexData(),
'statusRunning' => TagRebuildJob::STATUS_RUNNING, ...$this->buildJobStatusViewData(),
'statusQueued' => TagRebuildJob::STATUS_QUEUED,
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
'statusFailed' => TagRebuildJob::STATUS_FAILED,
]); ]);
} }
#[Route('/create', name: 'admin_tags_create', methods: ['POST'])] #[Route('/create', name: 'admin_tags_create', methods: ['POST'])]
public function create(Request $request, TagAdminService $svc): RedirectResponse public function create(Request $request, TagAdminService $svc): RedirectResponse
{ {
if (!$this->isCsrfTokenValid( if (!$this->isCsrfTokenValid('admin_tag_create', (string) $request->request->get('_token'))) {
'admin_tag_create', $this->addFlash('danger', 'Ungültiges CSRF-Token.');
$request->request->get('_token')
)) {
$this->addFlash('danger', 'Ungültiges CSRF Token.');
return $this->redirectToRoute('admin_tags_index'); return $this->redirectToRoute('admin_tags_index');
} }
@@ -44,15 +38,13 @@ final class TagController extends AbstractController
$svc->create( $svc->create(
(string) $request->request->get('slug', ''), (string) $request->request->get('slug', ''),
(string) $request->request->get('label', ''), (string) $request->request->get('label', ''),
$request->request->get('description') $this->normalizeNullableString($request->request->get('description')),
? (string)$request->request->get('description') TagTypes::normalize((string) $request->request->get('type', TagTypes::GENERIC))
: null,
(string)$request->request->get('type', 'generic') // NEU
); );
$this->addFlash('success', 'Tag wurde erstellt.'); $this->addFlash('success', 'Tag wurde erstellt.');
} catch (\Throwable $e) { } catch (\Throwable $e) {
$this->addFlash('danger', $e->getMessage()); $this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tag konnte nicht erstellt werden.'));
} }
return $this->redirectToRoute('admin_tags_index'); return $this->redirectToRoute('admin_tags_index');
@@ -61,58 +53,110 @@ final class TagController extends AbstractController
#[Route('/{id}/delete', name: 'admin_tags_delete', methods: ['POST'])] #[Route('/{id}/delete', name: 'admin_tags_delete', methods: ['POST'])]
public function delete(string $id, Request $request, TagAdminService $svc): RedirectResponse public function delete(string $id, Request $request, TagAdminService $svc): RedirectResponse
{ {
if (!$this->isCsrfTokenValid( if (!$this->isCsrfTokenValid('admin_tag_delete_' . $id, (string) $request->request->get('_token'))) {
'admin_tag_delete_' . $id, $this->addFlash('danger', 'Ungültiges CSRF-Token.');
$request->request->get('_token')
)) {
$this->addFlash('danger', 'Ungültiges CSRF Token.');
return $this->redirectToRoute('admin_tags_index'); return $this->redirectToRoute('admin_tags_index');
} }
try { try {
$svc->delete($id); $svc->delete(trim($id));
$this->addFlash('success', 'Tag wurde gelöscht.'); $this->addFlash('success', 'Tag wurde gelöscht.');
} catch (\Throwable $e) { } catch (\Throwable $e) {
$this->addFlash('danger', $e->getMessage()); $this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tag konnte nicht gelöscht werden.'));
} }
return $this->redirectToRoute('admin_tags_index'); return $this->redirectToRoute('admin_tags_index');
} }
#[Route('/{id}/assign', name: 'admin_tags_assign', methods: ['GET', 'POST'])] #[Route('/{id}/assign', name: 'admin_tags_assign', methods: ['GET', 'POST'])]
public function assign( public function assign(string $id, Request $request, TagAdminService $svc): Response
string $id, {
Request $request, $id = trim($id);
TagAdminService $svc
): Response {
if ($request->isMethod('POST')) { if ($request->isMethod('POST')) {
if (!$this->isCsrfTokenValid('assign_tag_' . $id, (string) $request->request->get('_token'))) {
if (!$this->isCsrfTokenValid( $this->addFlash('danger', 'Ungültiges CSRF-Token.');
'assign_tag_' . $id,
$request->request->get('_token')
)) {
throw $this->createAccessDeniedException();
}
$svc->syncAssignments(
$id,
$request->request->all('documents') ?? []
);
$this->addFlash('success', 'Zuweisungen aktualisiert.');
return $this->redirectToRoute('admin_tags_assign', ['id' => $id]); return $this->redirectToRoute('admin_tags_assign', ['id' => $id]);
} }
try {
$svc->syncAssignments($id, $this->normalizeStringList($request->request->all('documents')));
$this->addFlash('success', 'Zuweisungen aktualisiert.');
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Zuweisungen konnten nicht aktualisiert werden.'));
}
return $this->redirectToRoute('admin_tags_assign', ['id' => $id]);
}
try {
$data = $svc->getAssignData($id); $data = $svc->getAssignData($id);
} catch (\Throwable $e) {
$this->addFlash('danger', $this->buildSafeErrorMessage($e, 'Tag konnte nicht geladen werden.'));
return $this->redirectToRoute('admin_tags_index');
}
return $this->render('admin/tag/assign.html.twig', [ return $this->render('admin/tag/assign.html.twig', [
...$data, ...$data,
...$this->buildJobStatusViewData(),
]);
}
/**
* @param mixed $value
*/
private function normalizeNullableString(mixed $value): ?string
{
$value = trim((string) $value);
return $value !== '' ? $value : null;
}
/**
* @param mixed $values
* @return list<string>
*/
private function normalizeStringList(mixed $values): array
{
if (!is_array($values)) {
return [];
}
$normalized = [];
foreach ($values as $value) {
$value = trim((string) $value);
if ($value === '') {
continue;
}
$normalized[] = $value;
}
return array_values(array_unique($normalized));
}
/**
* @return array<string, string>
*/
private function buildJobStatusViewData(): array
{
return [
'statusRunning' => TagRebuildJob::STATUS_RUNNING, 'statusRunning' => TagRebuildJob::STATUS_RUNNING,
'statusQueued' => TagRebuildJob::STATUS_QUEUED, 'statusQueued' => TagRebuildJob::STATUS_QUEUED,
'statusCompleted' => TagRebuildJob::STATUS_COMPLETED, 'statusCompleted' => TagRebuildJob::STATUS_COMPLETED,
'statusFailed' => TagRebuildJob::STATUS_FAILED, 'statusFailed' => TagRebuildJob::STATUS_FAILED,
]); ];
}
private function buildSafeErrorMessage(\Throwable $e, string $fallback): string
{
$message = trim($e->getMessage());
return $message !== '' ? $message : $fallback;
} }
} }

View File

@@ -10,38 +10,79 @@ use Symfony\Component\Routing\Attribute\Route;
final class TagRebuildStreamController final class TagRebuildStreamController
{ {
#[Route('/admin/tags/rebuild/stream', name: 'admin_tags_rebuild_stream')] private const POLL_INTERVAL_SECONDS = 2;
private const KEEPALIVE_INTERVAL_SECONDS = 10;
#[Route('/admin/tags/rebuild/stream', name: 'admin_tags_rebuild_stream', methods: ['GET'])]
public function stream(TagRebuildStatusProvider $provider): StreamedResponse public function stream(TagRebuildStatusProvider $provider): StreamedResponse
{ {
$response = new StreamedResponse(function () use ($provider) { $response = new StreamedResponse(function () use ($provider): void {
self::disableOutputBuffering();
echo "event: ping\n"; echo "retry: 3000\n";
echo "data: " . json_encode(['init' => true]) . "\n\n"; self::sendEvent('ping', ['init' => true]);
@ob_flush(); $lastPayloadHash = null;
@flush(); $lastKeepaliveAt = time();
while (!connection_aborted()) { while (!connection_aborted()) {
$data = $provider->getLatestStatus(); $data = $provider->getLatestStatus();
if ($data !== null) { if ($data !== null) {
echo "event: message\n"; $payloadHash = md5(
echo "data: " . json_encode($data) . "\n\n"; json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) ?: 'null'
);
@ob_flush(); if ($payloadHash !== $lastPayloadHash) {
@flush(); self::sendEvent('message', $data);
$lastPayloadHash = $payloadHash;
$lastKeepaliveAt = time();
}
} }
sleep(2); if ((time() - $lastKeepaliveAt) >= self::KEEPALIVE_INTERVAL_SECONDS) {
self::sendEvent('ping', [
'ts' => (new \DateTimeImmutable())->format(DATE_ATOM),
]);
$lastKeepaliveAt = time();
}
sleep(self::POLL_INTERVAL_SECONDS);
} }
}); });
$response->headers->set('Content-Type', 'text/event-stream'); $response->headers->set('Content-Type', 'text/event-stream');
$response->headers->set('Cache-Control', 'no-cache'); $response->headers->set('Cache-Control', 'no-cache, no-store, must-revalidate');
$response->headers->set('Pragma', 'no-cache');
$response->headers->set('Expires', '0');
$response->headers->set('Connection', 'keep-alive'); $response->headers->set('Connection', 'keep-alive');
$response->headers->set('X-Accel-Buffering', 'no'); $response->headers->set('X-Accel-Buffering', 'no');
return $response; return $response;
} }
private static function disableOutputBuffering(): void
{
while (ob_get_level() > 0) {
@ob_end_flush();
}
}
/**
* @param array<string, mixed> $data
*/
private static function sendEvent(string $event, array $data): void
{
$json = json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
if (!is_string($json)) {
$json = '{"error":"json_encode_failed"}';
}
echo 'event: ' . $event . "\n";
echo 'data: ' . $json . "\n\n";
@ob_flush();
@flush();
}
} }

View File

@@ -8,6 +8,7 @@ use Doctrine\ORM\Mapping as ORM;
#[ORM\Entity] #[ORM\Entity]
#[ORM\Table(name: 'document_tag')] #[ORM\Table(name: 'document_tag')]
#[ORM\Index(name: 'idx_document_tag_tag_id', columns: ['tag_id'])]
class DocumentTag class DocumentTag
{ {
#[ORM\Id] #[ORM\Id]
@@ -22,8 +23,8 @@ class DocumentTag
public function __construct(Document $document, Tag $tag) public function __construct(Document $document, Tag $tag)
{ {
$this->document = $document; $this->setDocument($document);
$this->tag = $tag; $this->setTag($tag);
} }
public function getDocument(): Document public function getDocument(): Document
@@ -35,4 +36,20 @@ class DocumentTag
{ {
return $this->tag; return $this->tag;
} }
public function isSameRelation(Document $document, Tag $tag): bool
{
return $this->document->getId()->equals($document->getId())
&& $this->tag->getId()->equals($tag->getId());
}
private function setDocument(Document $document): void
{
$this->document = $document;
}
private function setTag(Tag $tag): void
{
$this->tag = $tag;
}
} }

View File

@@ -1,8 +1,12 @@
<?php <?php
declare(strict_types=1);
namespace App\Entity; namespace App\Entity;
use App\Tag\TagTypes;
use Doctrine\ORM\Mapping as ORM; use Doctrine\ORM\Mapping as ORM;
use InvalidArgumentException;
use Symfony\Component\Uid\Uuid; use Symfony\Component\Uid\Uuid;
#[ORM\Entity] #[ORM\Entity]
@@ -24,25 +28,25 @@ class Tag
#[ORM\Column(type: 'text', nullable: true)] #[ORM\Column(type: 'text', nullable: true)]
private ?string $description = null; private ?string $description = null;
/**
* NEU: Governance-Typ des Tags
* - generic
* - catalog_entity
*/
#[ORM\Column(length: 50)] #[ORM\Column(length: 50)]
private string $type = 'generic'; private string $type = TagTypes::GENERIC;
#[ORM\Column] #[ORM\Column]
private \DateTimeImmutable $createdAt; private \DateTimeImmutable $createdAt;
public function __construct(string $slug, string $label, ?string $description = null) public function __construct(
{ string $slug,
string $label,
?string $description = null,
string $type = TagTypes::GENERIC,
) {
$this->id = Uuid::v4(); $this->id = Uuid::v4();
$this->createdAt = new \DateTimeImmutable(); $this->createdAt = new \DateTimeImmutable();
$this->slug = $slug; $this->setSlug($slug);
$this->label = $label; $this->setLabel($label);
$this->description = $description; $this->setDescription($description);
$this->setType($type);
} }
public function getId(): Uuid public function getId(): Uuid
@@ -57,7 +61,14 @@ class Tag
public function setSlug(string $slug): static public function setSlug(string $slug): static
{ {
$slug = $this->normalizeSlug($slug);
if ($slug === '') {
throw new InvalidArgumentException('Tag slug must not be empty.');
}
$this->slug = $slug; $this->slug = $slug;
return $this; return $this;
} }
@@ -68,7 +79,14 @@ class Tag
public function setLabel(string $label): static public function setLabel(string $label): static
{ {
$label = trim($label);
if ($label === '') {
throw new InvalidArgumentException('Tag label must not be empty.');
}
$this->label = $label; $this->label = $label;
return $this; return $this;
} }
@@ -79,7 +97,9 @@ class Tag
public function setDescription(?string $description): static public function setDescription(?string $description): static
{ {
$this->description = $description; $description = trim((string) $description);
$this->description = $description !== '' ? $description : null;
return $this; return $this;
} }
@@ -90,13 +110,43 @@ class Tag
public function setType(string $type): static public function setType(string $type): static
{ {
$type = trim($type); $normalizedType = TagTypes::normalize($type);
$this->type = $type !== '' ? $type : 'generic';
if (!TagTypes::isValid($normalizedType)) {
throw new InvalidArgumentException(sprintf('Unsupported tag type "%s".', $type));
}
$this->type = $normalizedType;
return $this; return $this;
} }
public function isGeneric(): bool
{
return $this->type === TagTypes::GENERIC;
}
public function isCatalogEntity(): bool
{
return $this->type === TagTypes::CATALOG_ENTITY;
}
public function isSalesSignal(): bool
{
return $this->type === TagTypes::SALES_SIGNAL;
}
public function getCreatedAt(): \DateTimeImmutable public function getCreatedAt(): \DateTimeImmutable
{ {
return $this->createdAt; return $this->createdAt;
} }
private function normalizeSlug(string $slug): string
{
$slug = mb_strtolower(trim($slug));
$slug = preg_replace('/\s+/u', '-', $slug) ?? $slug;
$slug = preg_replace('/-+/u', '-', $slug) ?? $slug;
return trim($slug, '-');
}
} }

View File

@@ -9,8 +9,8 @@ use Symfony\Component\Uid\Uuid;
#[ORM\Entity] #[ORM\Entity]
#[ORM\Table(name: 'tag_rebuild_job')] #[ORM\Table(name: 'tag_rebuild_job')]
#[ORM\Index(columns: ['status'], name: 'idx_tag_rebuild_job_status')] #[ORM\Index(name: 'idx_tag_rebuild_job_status', columns: ['status'])]
#[ORM\Index(columns: ['created_at'], name: 'idx_tag_rebuild_job_created_at')] #[ORM\Index(name: 'idx_tag_rebuild_job_created_at', columns: ['created_at'])]
class TagRebuildJob class TagRebuildJob
{ {
public const STATUS_QUEUED = 'QUEUED'; public const STATUS_QUEUED = 'QUEUED';
@@ -18,6 +18,8 @@ class TagRebuildJob
public const STATUS_COMPLETED = 'COMPLETED'; public const STATUS_COMPLETED = 'COMPLETED';
public const STATUS_FAILED = 'FAILED'; public const STATUS_FAILED = 'FAILED';
private const ERROR_MESSAGE_MAX_LENGTH = 4000;
#[ORM\Id] #[ORM\Id]
#[ORM\Column(type: 'uuid', unique: true)] #[ORM\Column(type: 'uuid', unique: true)]
private Uuid $id; private Uuid $id;
@@ -44,6 +46,19 @@ class TagRebuildJob
$this->status = self::STATUS_QUEUED; $this->status = self::STATUS_QUEUED;
} }
/**
* @return list<string>
*/
public static function statuses(): array
{
return [
self::STATUS_QUEUED,
self::STATUS_RUNNING,
self::STATUS_COMPLETED,
self::STATUS_FAILED,
];
}
public function getId(): Uuid public function getId(): Uuid
{ {
return $this->id; return $this->id;
@@ -54,24 +69,59 @@ class TagRebuildJob
return $this->status; return $this->status;
} }
public function isQueued(): bool
{
return $this->status === self::STATUS_QUEUED;
}
public function isRunning(): bool
{
return $this->status === self::STATUS_RUNNING;
}
public function isCompleted(): bool
{
return $this->status === self::STATUS_COMPLETED;
}
public function isFailed(): bool
{
return $this->status === self::STATUS_FAILED;
}
public function isActive(): bool
{
return $this->isQueued() || $this->isRunning();
}
public function markRunning(): void public function markRunning(): void
{ {
$this->status = self::STATUS_RUNNING; $this->status = self::STATUS_RUNNING;
$this->startedAt = new \DateTimeImmutable(); $this->startedAt = new \DateTimeImmutable();
$this->finishedAt = null;
$this->errorMessage = null; $this->errorMessage = null;
} }
public function markCompleted(): void public function markCompleted(): void
{ {
if ($this->startedAt === null) {
$this->startedAt = new \DateTimeImmutable();
}
$this->status = self::STATUS_COMPLETED; $this->status = self::STATUS_COMPLETED;
$this->finishedAt = new \DateTimeImmutable(); $this->finishedAt = new \DateTimeImmutable();
$this->errorMessage = null;
} }
public function markFailed(string $message): void public function markFailed(string $message): void
{ {
if ($this->startedAt === null) {
$this->startedAt = new \DateTimeImmutable();
}
$this->status = self::STATUS_FAILED; $this->status = self::STATUS_FAILED;
$this->finishedAt = new \DateTimeImmutable(); $this->finishedAt = new \DateTimeImmutable();
$this->errorMessage = $message; $this->errorMessage = $this->normalizeErrorMessage($message);
} }
public function getCreatedAt(): \DateTimeImmutable public function getCreatedAt(): \DateTimeImmutable
@@ -93,4 +143,19 @@ class TagRebuildJob
{ {
return $this->errorMessage; return $this->errorMessage;
} }
private function normalizeErrorMessage(string $message): ?string
{
$message = trim($message);
if ($message === '') {
return 'Unknown tag rebuild failure.';
}
if (mb_strlen($message) > self::ERROR_MESSAGE_MAX_LENGTH) {
$message = mb_substr($message, 0, self::ERROR_MESSAGE_MAX_LENGTH);
}
return $message;
}
} }

View File

@@ -6,82 +6,132 @@ namespace App\Intent;
use App\Config\CatalogIntentConfig; use App\Config\CatalogIntentConfig;
use App\Knowledge\Retrieval\QueryCleaner; use App\Knowledge\Retrieval\QueryCleaner;
use App\Tag\TagVectorSearchClient;
use App\Tag\TagTypes; use App\Tag\TagTypes;
use App\Tag\TagVectorSearchClient;
/** /**
* CatalogIntentLite * Lightweight catalog entity detector.
* *
* Reiner Entity-Detector. * Responsibilities:
* * - clean the user query for tag lookup
* Verantwortlich nur für: * - query the tag vector index
* - Vector-Tag-Erkennung * - keep only catalog_entity hits
* - Score-Gate * - apply confidence and ambiguity gates
* - Ambiguity-Check * - return one canonical entity label or null
* - Sicherstellen, dass TagType = catalog_entity
*
* KEIN:
* - Listen-Signal
* - SalesIntent
* - Routing
*/ */
final readonly class CatalogIntentLite final readonly class CatalogIntentLite
{ {
/**
* Slightly wider than the old top-3 search so generic tags do not crowd out
* relevant catalog_entity hits too easily.
*/
private const SEARCH_LIMIT = 6;
public function __construct( public function __construct(
private TagVectorSearchClient $tagVectorClient, private TagVectorSearchClient $tagVectorClient,
private QueryCleaner $queryCleaner private QueryCleaner $queryCleaner,
) {} ) {
}
/** /**
* Gibt das canonical Label der erkannten catalog_entity zurück * Returns the canonical normalized label of the detected catalog entity,
* oder null, wenn kein sauberer Treffer. * or null when no safe entity match exists.
*/ */
public function detect(string $prompt): ?string public function detect(string $prompt): ?string
{ {
$prompt = trim($prompt); $prompt = trim($prompt);
if ($prompt === '') { if ($prompt === '') {
return null; return null;
} }
$promptTag = $this->queryCleaner->clean($prompt); $cleanQuery = trim($this->queryCleaner->clean($prompt));
// 1) Tag-Vector-Suche if ($cleanQuery === '') {
$hits = $this->tagVectorClient->search($promptTag, 3);
if ($hits === []) {
return null; return null;
} }
$best = $hits[0]; $catalogHits = $this->filterCatalogEntityHits(
$this->tagVectorClient->search($cleanQuery, self::SEARCH_LIMIT)
);
if ($catalogHits === []) {
return null;
}
$best = $catalogHits[0];
$bestScore = (float) ($best['score'] ?? 0.0); $bestScore = (float) ($best['score'] ?? 0.0);
// 2) Score-Tags
if ($bestScore < CatalogIntentConfig::MIN_SCORE) { if ($bestScore < CatalogIntentConfig::MIN_SCORE) {
return null; return null;
} }
// 3) Ambiguity-Check if (isset($catalogHits[1])) {
if (isset($hits[1])) { $secondScore = (float) ($catalogHits[1]['score'] ?? 0.0);
$secondScore = (float)($hits[1]['score'] ?? 0.0);
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) { if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) {
return null; return null;
} }
} }
// 4) Nur catalog_entity zulassen $label = $this->normalizeLabel((string) ($best['label'] ?? ''));
if (($best['tag_type'] ?? null) !== TagTypes::CATALOG_ENTITY) {
return null; return $label !== '' ? $label : null;
} }
// 5) Canonical Label /**
$label = trim((string)($best['label'] ?? '')); * @param array<int, array{
* tag_id:string,
* score:float,
* label?:string,
* tag_type?:string
* }> $hits
*
* @return list<array{
* tag_id:string,
* score:float,
* label?:string,
* tag_type:string
* }>
*/
private function filterCatalogEntityHits(array $hits): array
{
$filtered = [];
if ($label === '') { foreach ($hits as $hit) {
return null; $tagId = trim((string) ($hit['tag_id'] ?? ''));
$score = (float) ($hit['score'] ?? 0.0);
$tagType = TagTypes::normalize((string) ($hit['tag_type'] ?? TagTypes::GENERIC));
if ($tagId === '') {
continue;
} }
return mb_strtolower($label); if ($tagType !== TagTypes::CATALOG_ENTITY) {
continue;
}
$filtered[] = [
'tag_id' => $tagId,
'score' => $score,
'label' => isset($hit['label']) ? (string) $hit['label'] : null,
'tag_type' => $tagType,
];
}
usort(
$filtered,
static fn (array $left, array $right): int => ($right['score'] <=> $left['score'])
);
return $filtered;
}
private function normalizeLabel(string $label): string
{
$label = mb_strtolower(trim($label));
$label = preg_replace('/\s+/u', ' ', $label) ?? $label;
return trim($label);
} }
} }

View File

@@ -8,65 +8,99 @@ use App\Entity\Document;
use App\Entity\Tag; use App\Entity\Tag;
use App\Service\TagRebuildJobService; use App\Service\TagRebuildJobService;
use App\Tag\TagService; use App\Tag\TagService;
use App\Tag\TagTypes;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
use RuntimeException;
final class DocumentTagAdminService final readonly class DocumentTagAdminService
{ {
public function __construct( public function __construct(
private readonly EntityManagerInterface $em, private EntityManagerInterface $em,
private readonly TagService $tagService, private TagService $tagService,
private readonly TagRebuildJobService $jobs, private TagRebuildJobService $jobs,
) {} ) {
}
/** /**
* @return array{ * @return array{
* document: Document, * document: Document,
* allTags: list<Tag>, * allTags: list<Tag>,
* latestJob: mixed * latestJob: mixed,
* hasActiveJob: bool
* } * }
*/ */
public function getEditData(string $documentId): array public function getEditData(string $documentId): array
{ {
$document = $this->em->getRepository(Document::class)->find($documentId); $document = $this->findDocumentById($documentId);
if (!$document instanceof Document) {
throw new \RuntimeException('Document not found');
}
/** @var list<Tag> $allTags */ /** @var list<Tag> $allTags */
$allTags = $this->em->createQueryBuilder() $allTags = $this->em->getRepository(Tag::class)->findAll();
->select('t')
->from(Tag::class, 't')
->orderBy('t.label', 'ASC')
->getQuery()
->getResult();
$latestJob = $this->jobs->getLatestJob(); usort(
$allTags,
static function (Tag $left, Tag $right): int {
$typeOrder = [
TagTypes::CATALOG_ENTITY => 10,
TagTypes::GENERIC => 20,
TagTypes::SALES_SIGNAL => 30,
];
$leftTypeRank = $typeOrder[$left->getType()] ?? 999;
$rightTypeRank = $typeOrder[$right->getType()] ?? 999;
if ($leftTypeRank !== $rightTypeRank) {
return $leftTypeRank <=> $rightTypeRank;
}
$labelComparison = strcasecmp($left->getLabel(), $right->getLabel());
if ($labelComparison !== 0) {
return $labelComparison;
}
return strcmp($left->getSlug(), $right->getSlug());
}
);
return [ return [
'document' => $document, 'document' => $document,
'allTags' => $allTags, 'allTags' => $allTags,
'latestJob' => $latestJob, 'latestJob' => $this->jobs->getLatestJob(),
'hasActiveJob' => $this->jobs->hasActiveJob(),
]; ];
} }
/** /**
* Speichert die Tag-Auswahl für ein Dokument (inkl. Sync-Logik). * Persists the selected tag set for a document via the central domain service.
*
* @param array<mixed> $selectedTagIds
*/ */
public function saveTags(string $documentId, array $selectedTagIds): void public function saveTags(string $documentId, array $selectedTagIds): void
{ {
$document = $this->em->getRepository(Document::class)->find($documentId); $document = $this->findDocumentById($documentId);
if (!$document instanceof Document) {
throw new \RuntimeException('Document not found');
}
// Delegation an deine Domain-Logik (bleibt dort, wo sie hingehört)
$this->tagService->syncDocumentTags($document, $selectedTagIds); $this->tagService->syncDocumentTags($document, $selectedTagIds);
} }
public function getLatestRebuildStatus(): ?string public function getLatestRebuildStatus(): ?string
{ {
$job = $this->jobs->getLatestJob(); return $this->jobs->getLatestJob()?->getStatus();
}
return $job?->getStatus(); private function findDocumentById(string $documentId): Document
{
$documentId = trim($documentId);
if ($documentId === '') {
throw new RuntimeException('Document not found.');
}
$document = $this->em->getRepository(Document::class)->find($documentId);
if (!$document instanceof Document) {
throw new RuntimeException('Document not found.');
}
return $document;
} }
} }

View File

@@ -9,7 +9,9 @@ use App\Entity\DocumentTag;
use App\Entity\Tag; use App\Entity\Tag;
use App\Service\TagRebuildJobService; use App\Service\TagRebuildJobService;
use App\Tag\TagService; use App\Tag\TagService;
use App\Tag\TagTypes;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
use RuntimeException;
final readonly class TagAdminService final readonly class TagAdminService
{ {
@@ -17,15 +19,19 @@ final readonly class TagAdminService
private EntityManagerInterface $em, private EntityManagerInterface $em,
private TagService $tagService, private TagService $tagService,
private TagRebuildJobService $jobs, private TagRebuildJobService $jobs,
) {} ) {
}
public function getIndexData(): array public function getIndexData(): array
{ {
/** @var list<Tag> $tags */
$tags = $this->em->getRepository(Tag::class) $tags = $this->em->getRepository(Tag::class)
->findBy([], ['label' => 'ASC']); ->findBy([], ['type' => 'ASC', 'label' => 'ASC']);
return [ return [
'tags' => $tags, 'tags' => $tags,
'tagTypeChoices' => TagTypes::choices(),
'documentCountByTagId' => $this->buildDocumentCountByTagId(),
'latestJob' => $this->jobs->getLatestJob(), 'latestJob' => $this->jobs->getLatestJob(),
'hasActiveJob' => $this->jobs->hasActiveJob(), 'hasActiveJob' => $this->jobs->hasActiveJob(),
]; ];
@@ -35,7 +41,7 @@ final readonly class TagAdminService
string $slug, string $slug,
string $label, string $label,
?string $description, ?string $description,
string $type = 'generic' // NEU string $type = TagTypes::GENERIC,
): void { ): void {
$this->tagService->create($slug, $label, $description, $type); $this->tagService->create($slug, $label, $description, $type);
} }
@@ -47,35 +53,47 @@ final readonly class TagAdminService
public function getAssignData(string $tagId): array public function getAssignData(string $tagId): array
{ {
$tag = $this->em->getRepository(Tag::class)->find($tagId); $tag = $this->findTagById($tagId);
if (!$tag instanceof Tag) { /** @var list<Document> $documents */
throw new \RuntimeException('Tag nicht gefunden.'); $documents = $this->em->getRepository(Document::class)->findBy(
} ['status' => Document::STATUS_ACTIVE],
['title' => 'ASC']
$documents = $this->em->getRepository(Document::class)->findAll(); );
$documentsData = array_map( $documentsData = array_map(
fn(Document $d) => [ static fn (Document $document): array => [
'id' => (string)$d->getId(), 'id' => (string) $document->getId(),
'title' => $d->getTitle(), 'title' => $document->getTitle(),
], ],
$documents $documents
); );
/** @var list<DocumentTag> $existingRelations */
$existingRelations = $this->em $existingRelations = $this->em
->getRepository(DocumentTag::class) ->getRepository(DocumentTag::class)
->findBy(['tag' => $tag]); ->findBy(['tag' => $tag]);
$assignedDocIds = array_map( $activeDocumentIds = array_map(
fn(DocumentTag $dt) => (string)$dt->getDocument()->getId(), static fn (Document $document): string => (string) $document->getId(),
$existingRelations $documents
); );
$assignedDocIds = [];
foreach ($existingRelations as $relation) {
$documentId = (string) $relation->getDocument()->getId();
if (in_array($documentId, $activeDocumentIds, true)) {
$assignedDocIds[] = $documentId;
}
}
return [ return [
'tag' => $tag, 'tag' => $tag,
'documents' => $documentsData, 'documents' => $documentsData,
'assignedDocIds' => $assignedDocIds, 'assignedDocIds' => array_values(array_unique($assignedDocIds)),
'tagTypeChoices' => TagTypes::choices(),
'latestJob' => $this->jobs->getLatestJob(), 'latestJob' => $this->jobs->getLatestJob(),
'hasActiveJob' => $this->jobs->hasActiveJob(), 'hasActiveJob' => $this->jobs->hasActiveJob(),
]; ];
@@ -83,12 +101,55 @@ final readonly class TagAdminService
public function syncAssignments(string $tagId, array $selectedDocIds): void public function syncAssignments(string $tagId, array $selectedDocIds): void
{ {
$tag = $this->findTagById($tagId);
$this->tagService->syncTagDocuments($tag, $selectedDocIds);
}
private function findTagById(string $tagId): Tag
{
$tagId = trim($tagId);
if ($tagId === '') {
throw new RuntimeException('Tag nicht gefunden.');
}
$tag = $this->em->getRepository(Tag::class)->find($tagId); $tag = $this->em->getRepository(Tag::class)->find($tagId);
if (!$tag instanceof Tag) { if (!$tag instanceof Tag) {
throw new \RuntimeException('Tag nicht gefunden.'); throw new RuntimeException('Tag nicht gefunden.');
} }
$this->tagService->syncTagDocuments($tag, $selectedDocIds); return $tag;
}
/**
* @return array<string, int>
*/
private function buildDocumentCountByTagId(): array
{
$rows = $this->em->createQueryBuilder()
->select('t AS tag', 'COUNT(d.id) AS documentCount')
->from(Tag::class, 't')
->leftJoin(DocumentTag::class, 'dt', 'WITH', 'dt.tag = t')
->leftJoin('dt.document', 'd', 'WITH', 'd.status = :status')
->groupBy('t.id')
->setParameter('status', Document::STATUS_ACTIVE)
->getQuery()
->getResult();
$counts = [];
foreach ($rows as $row) {
$tag = $row[0] ?? $row['tag'] ?? null;
$documentCount = (int) ($row['documentCount'] ?? 0);
if (!$tag instanceof Tag) {
continue;
}
$counts[$tag->getId()->toRfc4122()] = $documentCount;
}
return $counts;
} }
} }

View File

@@ -1,29 +1,33 @@
<?php <?php
declare(strict_types=1);
namespace App\Service; namespace App\Service;
use App\Entity\Document; use App\Entity\Document;
use App\Entity\DocumentVersion; use App\Entity\DocumentVersion;
use App\Entity\User; use App\Entity\User;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
use RuntimeException;
class DocumentService final readonly class DocumentService
{ {
public function __construct( public function __construct(
private EntityManagerInterface $em, private EntityManagerInterface $em,
) {} private TagRebuildJobService $tagRebuildJobService,
) {
}
/** /**
* Erstellt ein neues Dokument inkl. Version 1 * Creates a new document including version 1.
*/ */
public function createDocument( public function createDocument(
string $title, string $title,
string $filePath, string $filePath,
User $user User $user
): Document { ): Document {
$document = new Document(); $document = new Document();
$document->setTitle($title); $document->setTitle(trim($title));
$document->setCreatedBy($user); $document->setCreatedBy($user);
$version = new DocumentVersion(); $version = new DocumentVersion();
@@ -44,14 +48,13 @@ class DocumentService
} }
/** /**
* Fügt neue Version hinzu (immutable) * Adds a new immutable version to an existing document.
*/ */
public function addVersion( public function addVersion(
Document $document, Document $document,
string $filePath, string $filePath,
User $user User $user
): DocumentVersion { ): DocumentVersion {
$nextVersionNumber = $this->getNextVersionNumber($document); $nextVersionNumber = $this->getNextVersionNumber($document);
$version = new DocumentVersion(); $version = new DocumentVersion();
@@ -70,7 +73,7 @@ class DocumentService
} }
/** /**
* Aktiviert eine Version * Activates a document version and marks it for re-ingest.
*/ */
public function activateVersion(DocumentVersion $version): void public function activateVersion(DocumentVersion $version): void
{ {
@@ -82,41 +85,77 @@ class DocumentService
$version->setActive(true); $version->setActive(true);
$document->setCurrentVersion($version); $document->setCurrentVersion($version);
$version->setIngestStatus(DocumentVersion::INGEST_PENDING); $version->setIngestStatus(DocumentVersion::INGEST_PENDING);
$this->em->flush(); $this->em->flush();
} }
/** /**
* Archiviert Dokument * Archives a document.
*
* If the document had tag assignments, the tag index is rebuilt so the
* routing layer no longer works with an outdated active document set.
*/ */
public function archive(Document $document): void public function archive(Document $document): void
{ {
$document->archive(); if ($document->getStatus() === Document::STATUS_ARCHIVED) {
$this->em->flush(); return;
} }
public function delete(Document $document): void $shouldRebuildTags = $this->hasTagAssignments($document);
{
$this->em->remove($document); $document->archive();
$this->em->flush(); $this->em->flush();
if ($shouldRebuildTags) {
$this->triggerTagRebuildIfIdle();
}
} }
/** /**
* Berechnet SHA256 Checksum * Deletes a document.
*
* If the document had tag assignments, the tag index is rebuilt after the
* removal so stale document references disappear from tag-based routing.
*/
public function delete(Document $document): void
{
$shouldRebuildTags = $this->hasTagAssignments($document);
$this->em->remove($document);
$this->em->flush();
if ($shouldRebuildTags) {
$this->triggerTagRebuildIfIdle();
}
}
/**
* Calculates the SHA256 checksum for a file path.
*/ */
private function calculateChecksum(string $filePath): string private function calculateChecksum(string $filePath): string
{ {
if (!file_exists($filePath)) { $filePath = trim($filePath);
throw new \RuntimeException('File not found for checksum.');
if ($filePath === '') {
throw new RuntimeException('File path must not be empty.');
} }
return hash_file('sha256', $filePath); if (!is_file($filePath)) {
throw new RuntimeException('File not found for checksum.');
}
$checksum = hash_file('sha256', $filePath);
if ($checksum === false) {
throw new RuntimeException('Could not calculate file checksum.');
}
return $checksum;
} }
/** /**
* Ermittelt nächste Versionsnummer * Determines the next version number for a document.
*/ */
private function getNextVersionNumber(Document $document): int private function getNextVersionNumber(Document $document): int
{ {
@@ -128,4 +167,16 @@ class DocumentService
return $max + 1; return $max + 1;
} }
private function hasTagAssignments(Document $document): bool
{
return $document->getDocumentTags()->count() > 0;
}
private function triggerTagRebuildIfIdle(): void
{
if (!$this->tagRebuildJobService->hasActiveJob()) {
$this->tagRebuildJobService->enqueueAndStartAsync();
}
}
} }

View File

@@ -11,16 +11,24 @@ use Psr\Log\LoggerInterface;
final readonly class TagRebuildJobService final readonly class TagRebuildJobService
{ {
/** /**
* Wenn ein QUEUED-Job länger nicht startet, gilt er als "stale" und wird auf FAILED gesetzt, * If a QUEUED job does not transition into RUNNING in time,
* damit das System nicht dauerhaft blockiert. * it is treated as stale so the system does not stay blocked forever.
*/ */
private const STALE_QUEUED_AFTER_SECONDS = 300; // 5 Minuten private const STALE_QUEUED_AFTER_SECONDS = 300;
/**
* The background runner should switch the job from QUEUED to RUNNING almost
* immediately because markRunning() happens at the top of the command.
*/
private const ASYNC_START_TIMEOUT_SECONDS = 3;
private const ASYNC_START_POLL_INTERVAL_MICROSECONDS = 250000;
public function __construct( public function __construct(
private EntityManagerInterface $em, private EntityManagerInterface $em,
private LoggerInterface $agentLogger, private LoggerInterface $agentLogger,
private string $projectDir, private string $projectDir,
) {} ) {
}
public function enqueueAndStartAsync(): TagRebuildJob public function enqueueAndStartAsync(): TagRebuildJob
{ {
@@ -29,14 +37,25 @@ final readonly class TagRebuildJobService
$this->em->persist($job); $this->em->persist($job);
$this->em->flush(); $this->em->flush();
try {
$this->startAsync($job); $this->startAsync($job);
} catch (\Throwable $e) {
$job->markFailed('Async tag rebuild start failed: ' . $e->getMessage());
$this->em->flush();
$this->agentLogger->error('[tags] async job start failed', [
'job' => (string) $job->getId(),
'error' => $e->getMessage(),
]);
throw $e;
}
return $job; return $job;
} }
public function enqueueIfIdle(): ?TagRebuildJob public function enqueueIfIdle(): ?TagRebuildJob
{ {
// Coalescing: Wenn ein Job läuft oder queued ist -> nichts tun
if ($this->hasActiveJob()) { if ($this->hasActiveJob()) {
return null; return null;
} }
@@ -44,23 +63,18 @@ final readonly class TagRebuildJobService
return $this->enqueueAndStartAsync(); return $this->enqueueAndStartAsync();
} }
/**
* Letzter Job (egal welcher Status).
*/
public function getLatestJob(): ?TagRebuildJob public function getLatestJob(): ?TagRebuildJob
{ {
return $this->em->createQueryBuilder() return $this->em->createQueryBuilder()
->select('j') ->select('j')
->from(TagRebuildJob::class, 'j') ->from(TagRebuildJob::class, 'j')
->orderBy('j.createdAt', 'DESC') ->orderBy('j.createdAt', 'DESC')
->addOrderBy('j.id', 'DESC')
->setMaxResults(1) ->setMaxResults(1)
->getQuery() ->getQuery()
->getOneOrNullResult(); ->getOneOrNullResult();
} }
/**
* Letzter Job mit Status COMPLETED.
*/
public function getLatestCompletedJob(): ?TagRebuildJob public function getLatestCompletedJob(): ?TagRebuildJob
{ {
return $this->em->createQueryBuilder() return $this->em->createQueryBuilder()
@@ -69,18 +83,12 @@ final readonly class TagRebuildJobService
->where('j.status = :status') ->where('j.status = :status')
->setParameter('status', TagRebuildJob::STATUS_COMPLETED) ->setParameter('status', TagRebuildJob::STATUS_COMPLETED)
->orderBy('j.createdAt', 'DESC') ->orderBy('j.createdAt', 'DESC')
->addOrderBy('j.id', 'DESC')
->setMaxResults(1) ->setMaxResults(1)
->getQuery() ->getQuery()
->getOneOrNullResult(); ->getOneOrNullResult();
} }
/**
* Ob gerade ein Job aktiv ist:
* - RUNNING ist immer aktiv
* - QUEUED ist nur aktiv, wenn er nicht stale ist
*
* Zusätzlich: stale QUEUED Jobs werden auf FAILED gesetzt (Recovery).
*/
public function hasActiveJob(): bool public function hasActiveJob(): bool
{ {
$this->markStaleQueuedJobsFailed(); $this->markStaleQueuedJobsFailed();
@@ -106,31 +114,33 @@ final readonly class TagRebuildJobService
return (int) $qb->getQuery()->getSingleScalarResult() > 0; return (int) $qb->getQuery()->getSingleScalarResult() > 0;
} }
/**
* Startet den Job async über bin/console.
* Wichtige Fixes:
* - php explizit verwenden
* - --no-interaction
* - Logfile statt /dev/null
*/
private function startAsync(TagRebuildJob $job): void private function startAsync(TagRebuildJob $job): void
{ {
$projectDir = rtrim($this->projectDir, '/'); $projectDir = rtrim(trim($this->projectDir), '/');
$console = $projectDir . '/bin/console'; $console = $projectDir . '/bin/console';
if ($projectDir === '' || !is_dir($projectDir)) {
throw new \RuntimeException('Project directory is invalid.');
}
if (!is_file($console)) {
throw new \RuntimeException('bin/console not found: ' . $console);
}
$phpBinary = $this->resolvePhpBinary();
$jobId = (string) $job->getId(); $jobId = (string) $job->getId();
$logDir = $projectDir . '/var/log/tags'; $logDir = $projectDir . '/var/log/tags';
if (!is_dir($logDir)) { if (!is_dir($logDir) && !@mkdir($logDir, 0775, true) && !is_dir($logDir)) {
@mkdir($logDir, 0777, true); throw new \RuntimeException('Could not create tag job log directory.');
} }
$logFile = $logDir . '/job_' . $jobId . '.log'; $logFile = $logDir . '/job_' . $jobId . '.log';
// Robust: cd ins Projekt, dann nohup php bin/console ...
$cmd = sprintf( $cmd = sprintf(
'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 &', 'cd %s && nohup %s %s %s %s --no-interaction >> %s 2>&1 & echo $!',
escapeshellarg($projectDir), escapeshellarg($projectDir),
escapeshellcmd('php'), escapeshellarg($phpBinary),
escapeshellarg($console), escapeshellarg($console),
escapeshellarg('mto:agent:tags:job:run'), escapeshellarg('mto:agent:tags:job:run'),
escapeshellarg($jobId), escapeshellarg($jobId),
@@ -141,15 +151,92 @@ final readonly class TagRebuildJobService
'job' => $jobId, 'job' => $jobId,
'cmd' => $cmd, 'cmd' => $cmd,
'log' => $logFile, 'log' => $logFile,
'php_binary' => $phpBinary,
]); ]);
@exec($cmd); $output = [];
$exitCode = 0;
@exec($cmd, $output, $exitCode);
$pid = isset($output[0]) ? trim((string) $output[0]) : '';
if ($exitCode !== 0) {
throw new \RuntimeException('Async process bootstrap failed with exit code ' . $exitCode . '.');
}
if ($pid === '' || !ctype_digit($pid)) {
throw new \RuntimeException('Async process bootstrap did not return a valid PID.');
}
$this->agentLogger->info('[tags] async job process started', [
'job' => $jobId,
'pid' => $pid,
'log' => $logFile,
'php_binary' => $phpBinary,
]);
$this->waitForAsyncJobTransition($job, $logFile);
}
private function resolvePhpBinary(): string
{
$envCandidates = [
trim((string) ($_SERVER['PHP_CLI_BINARY'] ?? '')),
trim((string) ($_ENV['PHP_CLI_BINARY'] ?? '')),
trim((string) getenv('PHP_CLI_BINARY')),
];
foreach ($envCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$phpBinary = defined('PHP_BINARY') ? trim((string) PHP_BINARY) : '';
if ($this->isValidCliPhpBinary($phpBinary)) {
return $phpBinary;
}
$fallbackCandidates = [
'/usr/bin/php',
'/usr/local/bin/php',
'/bin/php',
'/opt/homebrew/bin/php',
];
foreach ($fallbackCandidates as $candidate) {
if ($this->isValidCliPhpBinary($candidate)) {
return $candidate;
}
}
$whichPhp = trim((string) @shell_exec('command -v php 2>/dev/null'));
if ($this->isValidCliPhpBinary($whichPhp)) {
return $whichPhp;
}
throw new \RuntimeException(
'Could not resolve a CLI PHP binary. Set PHP_CLI_BINARY explicitly, e.g. /usr/bin/php.'
);
}
private function isValidCliPhpBinary(string $path): bool
{
$path = trim($path);
if ($path === '' || !is_file($path) || !is_executable($path)) {
return false;
}
$basename = strtolower(basename($path));
if (str_contains($basename, 'fpm') || str_contains($basename, 'cgi')) {
return false;
}
return true;
} }
/**
* Recovery gegen "ewig QUEUED":
* Setzt alte QUEUED Jobs auf FAILED, damit enqueueIfIdle() nicht dauerhaft blockiert.
*/
private function markStaleQueuedJobsFailed(): void private function markStaleQueuedJobsFailed(): void
{ {
$cutoff = new \DateTimeImmutable('-' . self::STALE_QUEUED_AFTER_SECONDS . ' seconds'); $cutoff = new \DateTimeImmutable('-' . self::STALE_QUEUED_AFTER_SECONDS . ' seconds');
@@ -161,12 +248,13 @@ final readonly class TagRebuildJobService
->andWhere('j.createdAt < :cutoff') ->andWhere('j.createdAt < :cutoff')
->setParameter('queued', TagRebuildJob::STATUS_QUEUED) ->setParameter('queued', TagRebuildJob::STATUS_QUEUED)
->setParameter('cutoff', $cutoff) ->setParameter('cutoff', $cutoff)
->orderBy('j.createdAt', 'ASC')
->setMaxResults(25); ->setMaxResults(25);
/** @var TagRebuildJob[] $stale */ /** @var list<TagRebuildJob> $stale */
$stale = $qb->getQuery()->getResult(); $stale = $qb->getQuery()->getResult();
if (!$stale) { if ($stale === []) {
return; return;
} }
@@ -183,4 +271,46 @@ final readonly class TagRebuildJobService
$this->em->flush(); $this->em->flush();
} }
private function waitForAsyncJobTransition(TagRebuildJob $job, string $logFile): void
{
$deadline = microtime(true) + self::ASYNC_START_TIMEOUT_SECONDS;
while (microtime(true) < $deadline) {
usleep(self::ASYNC_START_POLL_INTERVAL_MICROSECONDS);
$this->em->refresh($job);
if (!$job->isQueued()) {
return;
}
}
$logHint = $this->readLogTail($logFile);
throw new \RuntimeException(
'Async tag rebuild runner did not transition from QUEUED to RUNNING within '
. self::ASYNC_START_TIMEOUT_SECONDS
. ' seconds.'
. ($logHint !== null ? ' Log tail: ' . $logHint : '')
);
}
private function readLogTail(string $logFile): ?string
{
if (!is_file($logFile) || !is_readable($logFile)) {
return null;
}
$content = @file_get_contents($logFile);
if (!is_string($content) || trim($content) === '') {
return null;
}
$content = trim($content);
$tail = mb_substr($content, -800);
$tail = preg_replace('/\s+/u', ' ', $tail) ?? $tail;
return trim($tail) !== '' ? trim($tail) : null;
}
} }

View File

@@ -11,29 +11,76 @@ final readonly class TagRebuildStatusProvider
{ {
public function __construct( public function __construct(
private EntityManagerInterface $em private EntityManagerInterface $em
) {} ) {
}
public function getLatestStatus(): ?array public function getLatestStatus(): ?array
{ {
$this->em->clear(); $row = $this->em->createQueryBuilder()
->select(
$job = $this->em->createQueryBuilder() 'j.status AS status',
->select('j') 'j.createdAt AS createdAt',
'j.startedAt AS startedAt',
'j.finishedAt AS finishedAt',
'j.errorMessage AS errorMessage'
)
->from(TagRebuildJob::class, 'j') ->from(TagRebuildJob::class, 'j')
->orderBy('j.createdAt', 'DESC') ->orderBy('j.createdAt', 'DESC')
->addOrderBy('j.id', 'DESC')
->setMaxResults(1) ->setMaxResults(1)
->getQuery() ->getQuery()
->getOneOrNullResult(); ->getOneOrNullResult(\Doctrine\ORM\Query::HYDRATE_ARRAY);
if (!$job instanceof TagRebuildJob) { if (!is_array($row)) {
return null;
}
$status = trim((string) ($row['status'] ?? ''));
if ($status === '') {
return null; return null;
} }
return [ return [
'status' => $job->getStatus(), 'status' => $status,
'startedAt' => $job->getStartedAt()?->format(DATE_ATOM), 'createdAt' => $this->formatDateValue($row['createdAt'] ?? null),
'finishedAt' => $job->getFinishedAt()?->format(DATE_ATOM), 'startedAt' => $this->formatDateValue($row['startedAt'] ?? null),
'error' => $job->getErrorMessage(), 'finishedAt' => $this->formatDateValue($row['finishedAt'] ?? null),
'error' => $this->normalizeNullableString($row['errorMessage'] ?? null),
'hasActiveJob' => in_array($status, [
TagRebuildJob::STATUS_QUEUED,
TagRebuildJob::STATUS_RUNNING,
], true),
]; ];
} }
private function formatDateValue(mixed $value): ?string
{
if ($value instanceof \DateTimeInterface) {
return $value->format(DATE_ATOM);
}
if (is_string($value)) {
$value = trim($value);
if ($value === '') {
return null;
}
try {
return (new \DateTimeImmutable($value))->format(DATE_ATOM);
} catch (\Throwable) {
return null;
}
}
return null;
}
private function normalizeNullableString(mixed $value): ?string
{
$value = trim((string) $value);
return $value !== '' ? $value : null;
}
} }

View File

@@ -4,6 +4,7 @@ declare(strict_types=1);
namespace App\Tag; namespace App\Tag;
use App\Entity\Document;
use App\Entity\DocumentTag; use App\Entity\DocumentTag;
use App\Entity\Tag; use App\Entity\Tag;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
@@ -13,147 +14,198 @@ final readonly class TagNdjsonExporter
public function __construct( public function __construct(
private EntityManagerInterface $em, private EntityManagerInterface $em,
private string $tagsNdjsonPath, private string $tagsNdjsonPath,
) {} ) {
}
/** /**
* Export all tags into NDJSON (streaming) with atomic switch (.tmp + rename()). * Export all relevant tags into NDJSON (streaming) with atomic switch (.tmp + rename()).
* *
* Line format: * Line format:
* { * {
* "tag_id":"...", * "tag_id":"...",
* "text":"label\nslug\noptional description", * "text":"label\nslug\noptional description",
* "type":"catalog_entity|generic|...", * "type":"catalog_entity|generic|sales_signal",
* "document_ids":["...","..."] * "document_ids":["...","..."]
* } * }
* *
* Only ACTIVE document assignments are exported. Tags without active document
* assignments are intentionally skipped so they do not influence retrieval.
*
* @return array{tags:int, lines:int, bytes:int, path:string} * @return array{tags:int, lines:int, bytes:int, path:string}
*/ */
public function export(): array public function export(): array
{ {
$dir = \dirname($this->tagsNdjsonPath); $this->ensureTargetDirectoryExists();
if (!\is_dir($dir)) {
@\mkdir($dir, 0775, true);
}
$tmpPath = $this->tagsNdjsonPath . '.tmp'; $tmpPath = $this->tagsNdjsonPath . '.tmp';
$this->cleanupTemporaryFile($tmpPath);
$fh = @\fopen($tmpPath, 'wb'); $fh = @fopen($tmpPath, 'wb');
if (!$fh) {
if ($fh === false) {
throw new \RuntimeException('Cannot write tags NDJSON: ' . $tmpPath); throw new \RuntimeException('Cannot write tags NDJSON: ' . $tmpPath);
} }
// 1) Load all tags try {
/** @var list<Tag> $tags */
$tags = $this->em->createQueryBuilder() $tags = $this->em->createQueryBuilder()
->select('t') ->select('t')
->from(Tag::class, 't') ->from(Tag::class, 't')
->orderBy('t.label', 'ASC') ->orderBy('t.type', 'ASC')
->addOrderBy('t.label', 'ASC')
->getQuery() ->getQuery()
->getResult(); ->getResult();
if (!\is_array($tags) || $tags === []) { if ($tags === []) {
\fclose($fh); fclose($fh);
$this->atomicReplace($tmpPath, $this->tagsNdjsonPath); $this->atomicReplace($tmpPath, $this->tagsNdjsonPath);
return [ return [
'tags' => 0, 'tags' => 0,
'lines' => 0, 'lines' => 0,
'bytes' => (int) @\filesize($this->tagsNdjsonPath), 'bytes' => (int) @filesize($this->tagsNdjsonPath),
'path' => $this->tagsNdjsonPath, 'path' => $this->tagsNdjsonPath,
]; ];
} }
// 2) Build tagId => docIds map $tagToActiveDocs = $this->buildActiveDocumentMap();
$rows = $this->em->createQueryBuilder()
->select('IDENTITY(dt.tag) AS tagId', 'IDENTITY(dt.document) AS docId')
->from(DocumentTag::class, 'dt')
->getQuery()
->getArrayResult();
$tagToDocs = [];
foreach ($rows as $r) {
$tagId = (string) ($r['tagId'] ?? '');
$docId = (string) ($r['docId'] ?? '');
if ($tagId === '' || $docId === '') {
continue;
}
$tagToDocs[$tagId][] = $docId;
}
// 3) Stream NDJSON
$lines = 0; $lines = 0;
foreach ($tags as $tag) { foreach ($tags as $tag) {
if (!$tag instanceof Tag) { $tagId = $tag->getId()->toRfc4122();
$docIds = $tagToActiveDocs[$tagId] ?? [];
if ($docIds === []) {
continue; continue;
} }
$tagId = (string) $tag->getId();
$docIds = $tagToDocs[$tagId] ?? [];
if ($docIds !== []) {
$docIds = \array_values(\array_unique($docIds));
}
// Embedding source
$textParts = [
$tag->getLabel(),
$tag->getSlug(),
];
$desc = $tag->getDescription();
if (\is_string($desc) && \trim($desc) !== '') {
$textParts[] = \trim($desc);
}
$type = method_exists($tag, 'getType')
? (string) $tag->getType()
: 'generic';
if ($type === '') {
$type = 'generic';
}
$line = [ $line = [
'tag_id' => $tagId, 'tag_id' => $tagId,
'text' => \implode("\n", $textParts), 'text' => $this->buildEmbeddingText($tag),
'type' => $type, // 🔥 NEW 'type' => TagTypes::normalize($tag->getType()),
'document_ids' => $docIds, 'document_ids' => $docIds,
]; ];
$json = \json_encode($line, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); $json = json_encode($line, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
if (!\is_string($json)) {
if (!is_string($json)) {
continue; continue;
} }
\fwrite($fh, $json . "\n"); fwrite($fh, $json . "\n");
$lines++; $lines++;
} }
\fclose($fh); fclose($fh);
$this->atomicReplace($tmpPath, $this->tagsNdjsonPath); $this->atomicReplace($tmpPath, $this->tagsNdjsonPath);
return [ return [
'tags' => \count($tags), 'tags' => count($tags),
'lines' => $lines, 'lines' => $lines,
'bytes' => (int) @\filesize($this->tagsNdjsonPath), 'bytes' => (int) @filesize($this->tagsNdjsonPath),
'path' => $this->tagsNdjsonPath, 'path' => $this->tagsNdjsonPath,
]; ];
} catch (\Throwable $e) {
fclose($fh);
$this->cleanupTemporaryFile($tmpPath);
throw $e;
}
}
/**
* @return array<string, list<string>>
*/
private function buildActiveDocumentMap(): array
{
/** @var list<DocumentTag> $relations */
$relations = $this->em->createQueryBuilder()
->select('dt')
->addSelect('t', 'd')
->from(DocumentTag::class, 'dt')
->innerJoin('dt.tag', 't')
->innerJoin('dt.document', 'd')
->where('d.status = :status')
->setParameter('status', Document::STATUS_ACTIVE)
->getQuery()
->getResult();
$tagToDocs = [];
foreach ($relations as $relation) {
$tag = $relation->getTag();
$document = $relation->getDocument();
$tagId = $tag->getId()->toRfc4122();
$docId = $document->getId()->toRfc4122();
$tagToDocs[$tagId][$docId] = $docId;
}
foreach ($tagToDocs as $tagId => $docIds) {
ksort($docIds);
$tagToDocs[$tagId] = array_values($docIds);
}
return $tagToDocs;
}
private function buildEmbeddingText(Tag $tag): string
{
$parts = [
trim($tag->getLabel()),
trim($tag->getSlug()),
];
$description = trim((string) $tag->getDescription());
if ($description !== '') {
$parts[] = preg_replace('/\s+/u', ' ', $description) ?? $description;
}
$parts = array_values(array_filter(
array_unique($parts),
static fn (string $part): bool => $part !== ''
));
return implode("\n", $parts);
}
private function ensureTargetDirectoryExists(): void
{
$dir = dirname($this->tagsNdjsonPath);
if (is_dir($dir)) {
return;
}
if (!@mkdir($dir, 0775, true) && !is_dir($dir)) {
throw new \RuntimeException('Cannot create tags NDJSON directory: ' . $dir);
}
}
private function cleanupTemporaryFile(string $tmpPath): void
{
if (is_file($tmpPath)) {
@unlink($tmpPath);
}
} }
private function atomicReplace(string $tmpPath, string $finalPath): void private function atomicReplace(string $tmpPath, string $finalPath): void
{ {
if (\is_file($finalPath)) { if (is_file($finalPath)) {
@\chmod($finalPath, 0664); @chmod($finalPath, 0664);
} }
if (!@\rename($tmpPath, $finalPath)) { if (!@rename($tmpPath, $finalPath)) {
if (!@\copy($tmpPath, $finalPath)) { if (!@copy($tmpPath, $finalPath)) {
@\unlink($tmpPath); @unlink($tmpPath);
throw new \RuntimeException('Atomic replace failed for: ' . $finalPath); throw new \RuntimeException('Atomic replace failed for: ' . $finalPath);
} }
@\unlink($tmpPath);
@unlink($tmpPath);
} }
@\chmod($finalPath, 0664); @chmod($finalPath, 0664);
} }
} }

View File

@@ -4,6 +4,7 @@ declare(strict_types=1);
namespace App\Tag; namespace App\Tag;
use App\Entity\Document;
use Doctrine\DBAL\ArrayParameterType; use Doctrine\DBAL\ArrayParameterType;
use Doctrine\DBAL\Exception; use Doctrine\DBAL\Exception;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
@@ -11,91 +12,239 @@ use Symfony\Component\Uid\Uuid;
final class TagRoutingService final class TagRoutingService
{ {
/**
* Number of raw tag hits requested from the vector service.
*/
private const DEFAULT_TOPK = 8; private const DEFAULT_TOPK = 8;
private const MIN_BEST_SCORE = 0.25;
private const MAX_CANDIDATE_DOCS = 200; /**
* Hard minimum confidence required to activate tag-based document routing.
*
* This intentionally aligns with the tag vector client gate to avoid
* misleading secondary thresholds in this class.
*/
private const MIN_BEST_SCORE = 0.72;
/**
* Only keep tag hits that stay reasonably close to the best hit.
* This reduces semantic spillover into weakly related document spaces.
*/
private const MAX_SCORE_DROP_FROM_BEST = 0.08;
/**
* Maximum number of tag hits that may influence routing.
*/
private const MAX_ROUTING_TAGS = 5;
/**
* Maximum number of candidate documents passed into scoped chunk search.
*/
private const MAX_CANDIDATE_DOCS = 80;
/**
* Small bonus for documents matched by multiple routed tags.
*/
private const MULTI_TAG_BONUS_PER_EXTRA_TAG = 0.05;
private const MAX_MULTI_TAG_BONUS = 0.15;
public function __construct( public function __construct(
private readonly TagVectorSearchClient $tagSearch, private readonly TagVectorSearchClient $tagSearch,
private readonly EntityManagerInterface $em, private readonly EntityManagerInterface $em,
) {} ) {
}
/** /**
* @return string[]|null * Returns ordered active document ids for tag-scoped retrieval.
*
* The method intentionally returns only document ids so the current
* retriever pipeline can stay unchanged.
*
* @return list<string>|null
* @throws Exception * @throws Exception
*/ */
public function route(string $query): ?array public function route(string $query): ?array
{ {
$query = trim($query); $query = trim($query);
if ($query === '') { if ($query === '') {
return null; return null;
} }
$hits = $this->tagSearch->search($query, self::DEFAULT_TOPK); $hits = $this->filterRoutingHits(
$this->tagSearch->search($query, self::DEFAULT_TOPK)
);
if (!is_array($hits) || $hits === []) { if ($hits === []) {
return null; return null;
} }
$bestScore = (float)($hits[0]['score'] ?? 0.0);
if ($bestScore < self::MIN_BEST_SCORE) {
return null;
}
// Convert tag UUID strings to binary(16)
$tagBinaryIds = []; $tagBinaryIds = [];
$tagMetaById = [];
foreach ($hits as $hit) { foreach ($hits as $hit) {
$id = (string)($hit['tag_id'] ?? ''); $tagId = (string) ($hit['tag_id'] ?? '');
if ($id === '') {
if ($tagId === '') {
continue; continue;
} }
try { try {
$tagBinaryIds[] = Uuid::fromString($id)->toBinary(); $tagBinaryIds[] = Uuid::fromString($tagId)->toBinary();
} catch (\Throwable) { } catch (\Throwable) {
continue; continue;
} }
$tagMetaById[$tagId] = [
'score' => (float) $hit['score'],
'weight' => $this->resolveTypeWeight((string) $hit['tag_type']),
];
} }
if ($tagBinaryIds === []) { if ($tagBinaryIds === []) {
return null; return null;
} }
// Direct DBAL query (binary-safe) $rows = $this->em->getConnection()->executeQuery(
$conn = $this->em->getConnection(); 'SELECT dt.document_id, dt.tag_id
FROM document_tag dt
$rows = $conn->executeQuery( INNER JOIN document d ON d.id = dt.document_id
'SELECT document_id WHERE dt.tag_id IN (:tagIds)
FROM document_tag AND d.status = :status',
WHERE tag_id IN (:tagIds)', [
['tagIds' => $tagBinaryIds], 'tagIds' => $tagBinaryIds,
['tagIds' => ArrayParameterType::BINARY] 'status' => Document::STATUS_ACTIVE,
],
[
'tagIds' => ArrayParameterType::BINARY,
]
)->fetchAllAssociative(); )->fetchAllAssociative();
if ($rows === []) { if ($rows === []) {
return null; return null;
} }
$docIds = []; $documentScores = [];
$documentMatchedTags = [];
foreach ($rows as $row) { foreach ($rows as $row) {
if (!isset($row['document_id'])) { if (!isset($row['document_id'], $row['tag_id'])) {
continue; continue;
} }
try { try {
$uuid = Uuid::fromBinary($row['document_id']); $documentId = (string) Uuid::fromBinary($row['document_id']);
$docIds[(string)$uuid] = true; $tagId = (string) Uuid::fromBinary($row['tag_id']);
} catch (\Throwable) { } catch (\Throwable) {
continue; continue;
} }
if (count($docIds) >= self::MAX_CANDIDATE_DOCS) { if (!isset($tagMetaById[$tagId])) {
continue;
}
$documentScores[$documentId] = ($documentScores[$documentId] ?? 0.0)
+ ($tagMetaById[$tagId]['score'] * $tagMetaById[$tagId]['weight']);
$documentMatchedTags[$documentId][$tagId] = true;
}
if ($documentScores === []) {
return null;
}
foreach ($documentScores as $documentId => $score) {
$matchedTagCount = isset($documentMatchedTags[$documentId])
? count($documentMatchedTags[$documentId])
: 0;
if ($matchedTagCount > 1) {
$documentScores[$documentId] += min(
self::MAX_MULTI_TAG_BONUS,
($matchedTagCount - 1) * self::MULTI_TAG_BONUS_PER_EXTRA_TAG
);
}
}
arsort($documentScores, SORT_NUMERIC);
return array_slice(
array_keys($documentScores),
0,
self::MAX_CANDIDATE_DOCS
);
}
/**
* @param array<int, array{
* tag_id:string,
* score:float,
* label?:string,
* tag_type?:string
* }> $hits
*
* @return list<array{
* tag_id:string,
* score:float,
* tag_type:string
* }>
*/
private function filterRoutingHits(array $hits): array
{
if ($hits === []) {
return [];
}
$bestScore = (float) ($hits[0]['score'] ?? 0.0);
if ($bestScore < self::MIN_BEST_SCORE) {
return [];
}
$minimumAcceptedScore = max(
self::MIN_BEST_SCORE,
$bestScore - self::MAX_SCORE_DROP_FROM_BEST
);
$filtered = [];
foreach ($hits as $hit) {
$tagId = (string) ($hit['tag_id'] ?? '');
$score = (float) ($hit['score'] ?? 0.0);
$tagType = TagTypes::normalize(
(string) ($hit['tag_type'] ?? TagTypes::GENERIC)
);
if ($tagId === '' || $score < $minimumAcceptedScore) {
continue;
}
// Sales signals may still be useful elsewhere, but they should not
// expand the document scope for semantic retrieval.
if ($tagType === TagTypes::SALES_SIGNAL) {
continue;
}
$filtered[] = [
'tag_id' => $tagId,
'score' => $score,
'tag_type' => $tagType,
];
if (count($filtered) >= self::MAX_ROUTING_TAGS) {
break; break;
} }
} }
return array_keys($docIds); return $filtered;
}
private function resolveTypeWeight(string $tagType): float
{
return match (TagTypes::normalize($tagType)) {
TagTypes::CATALOG_ENTITY => 1.20,
TagTypes::GENERIC => 1.00,
TagTypes::SALES_SIGNAL => 0.00,
default => 1.00,
};
} }
} }

View File

@@ -4,42 +4,45 @@ declare(strict_types=1);
namespace App\Tag; namespace App\Tag;
use App\Entity\Tag;
use App\Entity\Document; use App\Entity\Document;
use App\Entity\DocumentTag; use App\Entity\DocumentTag;
use App\Entity\Tag;
use App\Service\TagRebuildJobService; use App\Service\TagRebuildJobService;
use Doctrine\ORM\EntityManagerInterface; use Doctrine\ORM\EntityManagerInterface;
use InvalidArgumentException;
use RuntimeException;
final readonly class TagService final readonly class TagService
{ {
public function __construct( public function __construct(
private EntityManagerInterface $em, private EntityManagerInterface $em,
private TagRebuildJobService $jobs, private TagRebuildJobService $jobs,
) {} ) {
}
// =========================================================
// TAG CREATE
// =========================================================
public function create( public function create(
string $slug, string $slug,
string $label, string $label,
?string $description = null, ?string $description = null,
string $type = 'generic' // NEU string $type = TagTypes::GENERIC,
): Tag { ): Tag {
$slug = trim($slug); $normalizedSlug = $this->normalizeSlug($slug);
$label = trim($label); $label = trim($label);
if ($label === '' || $slug === '') { if ($normalizedSlug === '' || $label === '') {
throw new \InvalidArgumentException('Label und Slug sind Pflichtfelder.'); throw new InvalidArgumentException('Tag label and slug are required.');
} }
if ($this->slugExists($slug)) { if ($this->slugExists($normalizedSlug)) {
throw new \RuntimeException('Slug existiert bereits.'); throw new RuntimeException(sprintf('Tag slug "%s" already exists.', $normalizedSlug));
} }
$tag = new Tag($slug, $label, $description); $tag = new Tag(
$tag->setType($type); // NEU $normalizedSlug,
$label,
$description,
TagTypes::normalize($type)
);
$this->em->persist($tag); $this->em->persist($tag);
$this->em->flush(); $this->em->flush();
@@ -49,18 +52,9 @@ final readonly class TagService
return $tag; return $tag;
} }
// =========================================================
// TAG DELETE
// =========================================================
public function deleteById(string $tagId): void public function deleteById(string $tagId): void
{ {
$tag = $this->em->getRepository(Tag::class)->find($tagId); $tag = $this->findTagById($tagId);
if (!$tag instanceof Tag) {
throw new \RuntimeException('Tag nicht gefunden.');
}
$this->delete($tag); $this->delete($tag);
} }
@@ -72,87 +66,103 @@ final readonly class TagService
$this->triggerRebuildIfIdle(); $this->triggerRebuildIfIdle();
} }
// =========================================================
// DOCUMENT TAG SYNC
// =========================================================
public function syncDocumentTags(Document $document, array $newTagIds): void public function syncDocumentTags(Document $document, array $newTagIds): void
{ {
$newTagIds = array_unique($newTagIds); $normalizedTagIds = $this->normalizeIdList($newTagIds);
/** @var list<DocumentTag> $currentRelations */
$currentRelations = $this->em $currentRelations = $this->em
->getRepository(DocumentTag::class) ->getRepository(DocumentTag::class)
->findBy(['document' => $document]); ->findBy(['document' => $document]);
$currentTagIds = array_map( $currentTagIds = array_map(
fn(DocumentTag $dt) => (string) $dt->getTag()->getId(), static fn (DocumentTag $relation): string => (string) $relation->getTag()->getId(),
$currentRelations $currentRelations
); );
$toAdd = array_diff($newTagIds, $currentTagIds); $toAdd = array_values(array_diff($normalizedTagIds, $currentTagIds));
$toRemove = array_diff($currentTagIds, $newTagIds); $toRemove = array_values(array_diff($currentTagIds, $normalizedTagIds));
foreach ($toAdd as $tagId) { foreach ($toAdd as $tagId) {
$tag = $this->em->getRepository(Tag::class)->find($tagId); $tag = $this->em->getRepository(Tag::class)->find($tagId);
if ($tag instanceof Tag) { if ($tag instanceof Tag) {
$this->em->persist(new DocumentTag($document, $tag)); $this->em->persist(new DocumentTag($document, $tag));
} }
} }
foreach ($currentRelations as $relation) { foreach ($currentRelations as $relation) {
if (in_array((string) $relation->getTag()->getId(), $toRemove, true)) { $relationTagId = (string) $relation->getTag()->getId();
if (in_array($relationTagId, $toRemove, true)) {
$this->em->remove($relation); $this->em->remove($relation);
} }
} }
if ($toAdd || $toRemove) { if ($toAdd !== [] || $toRemove !== []) {
$this->em->flush(); $this->em->flush();
$this->triggerRebuildIfIdle(); $this->triggerRebuildIfIdle();
} }
} }
// =========================================================
// TAG → DOCUMENT SYNC (Bulk Assign)
// =========================================================
public function syncTagDocuments(Tag $tag, array $newDocumentIds): void public function syncTagDocuments(Tag $tag, array $newDocumentIds): void
{ {
$newDocumentIds = array_unique($newDocumentIds); $normalizedDocumentIds = $this->normalizeIdList($newDocumentIds);
/** @var list<DocumentTag> $currentRelations */
$currentRelations = $this->em $currentRelations = $this->em
->getRepository(DocumentTag::class) ->getRepository(DocumentTag::class)
->findBy(['tag' => $tag]); ->findBy(['tag' => $tag]);
$currentDocumentIds = array_map( $currentDocumentIds = array_map(
fn(DocumentTag $dt) => (string) $dt->getDocument()->getId(), static fn (DocumentTag $relation): string => (string) $relation->getDocument()->getId(),
$currentRelations $currentRelations
); );
$toAdd = array_diff($newDocumentIds, $currentDocumentIds); $toAdd = array_values(array_diff($normalizedDocumentIds, $currentDocumentIds));
$toRemove = array_diff($currentDocumentIds, $newDocumentIds); $toRemove = array_values(array_diff($currentDocumentIds, $normalizedDocumentIds));
foreach ($toAdd as $documentId) { foreach ($toAdd as $documentId) {
$document = $this->em->getRepository(Document::class)->find($documentId); $document = $this->em->getRepository(Document::class)->find($documentId);
if ($document instanceof Document) {
if (
$document instanceof Document
&& $document->getStatus() === Document::STATUS_ACTIVE
) {
$this->em->persist(new DocumentTag($document, $tag)); $this->em->persist(new DocumentTag($document, $tag));
} }
} }
foreach ($currentRelations as $relation) { foreach ($currentRelations as $relation) {
if (in_array((string) $relation->getDocument()->getId(), $toRemove, true)) { $relationDocumentId = (string) $relation->getDocument()->getId();
if (in_array($relationDocumentId, $toRemove, true)) {
$this->em->remove($relation); $this->em->remove($relation);
} }
} }
if ($toAdd || $toRemove) { if ($toAdd !== [] || $toRemove !== []) {
$this->em->flush(); $this->em->flush();
$this->triggerRebuildIfIdle(); $this->triggerRebuildIfIdle();
} }
} }
// ========================================================= private function findTagById(string $tagId): Tag
// INTERNAL HELPERS {
// ========================================================= $tagId = trim($tagId);
if ($tagId === '') {
throw new InvalidArgumentException('Tag id must not be empty.');
}
$tag = $this->em->getRepository(Tag::class)->find($tagId);
if (!$tag instanceof Tag) {
throw new RuntimeException('Tag not found.');
}
return $tag;
}
private function slugExists(string $slug): bool private function slugExists(string $slug): bool
{ {
@@ -165,6 +175,36 @@ final readonly class TagService
->getSingleScalarResult() > 0; ->getSingleScalarResult() > 0;
} }
/**
* @param array<mixed> $ids
* @return list<string>
*/
private function normalizeIdList(array $ids): array
{
$normalized = [];
foreach ($ids as $id) {
$id = trim((string) $id);
if ($id === '') {
continue;
}
$normalized[] = $id;
}
return array_values(array_unique($normalized));
}
private function normalizeSlug(string $slug): string
{
$slug = mb_strtolower(trim($slug));
$slug = preg_replace('/\s+/u', '-', $slug) ?? $slug;
$slug = preg_replace('/-+/u', '-', $slug) ?? $slug;
return trim($slug, '-');
}
private function triggerRebuildIfIdle(): void private function triggerRebuildIfIdle(): void
{ {
if (!$this->jobs->hasActiveJob()) { if (!$this->jobs->hasActiveJob()) {

View File

@@ -5,8 +5,10 @@ declare(strict_types=1);
namespace App\Tag; namespace App\Tag;
/** /**
* Zentrale Definition aller erlaubten Tag-Typen. * Central definition of all supported tag types.
* Verhindert Magic Strings im Code. *
* This class is intentionally tiny and dependency-free because it is the
* foundation for entity validation, admin forms, routing, and catalog logic.
*/ */
final class TagTypes final class TagTypes
{ {
@@ -14,6 +16,25 @@ final class TagTypes
public const CATALOG_ENTITY = 'catalog_entity'; public const CATALOG_ENTITY = 'catalog_entity';
public const SALES_SIGNAL = 'sales_signal'; public const SALES_SIGNAL = 'sales_signal';
/**
* Returns the canonical list of allowed type values.
*
* @return list<string>
*/
public static function all(): array
{
return [
self::GENERIC,
self::CATALOG_ENTITY,
self::SALES_SIGNAL,
];
}
/**
* Returns UI choices for forms and admin screens.
*
* @return array<string, string>
*/
public static function choices(): array public static function choices(): array
{ {
return [ return [
@@ -23,5 +44,53 @@ final class TagTypes
]; ];
} }
private function __construct() {} /**
* Returns true if the given value is an allowed tag type.
*/
public static function isValid(?string $type): bool
{
if ($type === null) {
return false;
}
return in_array(self::normalize($type), self::all(), true);
}
/**
* Normalizes external input into a canonical internal value.
*
* Empty or unknown input falls back to the provided default.
*/
public static function normalize(?string $type, string $default = self::GENERIC): string
{
$type = mb_strtolower(trim((string) $type));
$default = mb_strtolower(trim($default));
if ($type === '') {
return self::isKnownDefault($default) ? $default : self::GENERIC;
}
if (in_array($type, self::all(), true)) {
return $type;
}
return self::isKnownDefault($default) ? $default : self::GENERIC;
}
/**
* Returns a human-readable label for a canonical type.
*/
public static function labelFor(string $type): string
{
return array_flip(self::choices())[self::normalize($type)] ?? 'Generic';
}
private static function isKnownDefault(string $type): bool
{
return in_array($type, self::all(), true);
}
private function __construct()
{
}
} }

View File

@@ -9,6 +9,8 @@ use Psr\Log\LoggerInterface;
final readonly class TagVectorIndexBuilder final readonly class TagVectorIndexBuilder
{ {
private const GRACEFUL_TERMINATION_SECONDS = 2;
public function __construct( public function __construct(
private string $pythonBin, private string $pythonBin,
private string $scriptPath, private string $scriptPath,
@@ -17,10 +19,71 @@ final readonly class TagVectorIndexBuilder
private string $embeddingModel, private string $embeddingModel,
private int $timeoutSeconds, private int $timeoutSeconds,
private LoggerInterface $agentLogger, private LoggerInterface $agentLogger,
private IndexMetaManager $metaManager, // ✅ NEU private IndexMetaManager $metaManager,
) {} ) {
}
public function build(): void public function build(): void
{
$this->assertPreconditions();
$tmpIndex = $this->vectorTagsIndexPath . '.tmp';
$tmpMeta = $tmpIndex . '.meta.json';
$finalIndex = $this->vectorTagsIndexPath;
$finalMeta = $finalIndex . '.meta.json';
$this->ensureTargetDirectoryExists($finalIndex);
$this->cleanupTemporaryArtifacts($tmpIndex, $tmpMeta);
if (!$this->hasEmbeddableTags()) {
$this->agentLogger->info('[tags] no embeddable tags found, removing stale tag index artifacts.');
$this->removeFileIfExists($finalIndex);
$this->removeFileIfExists($finalMeta);
$this->commitRuntime(false);
return;
}
$cmd = $this->buildCommand($tmpIndex);
$this->agentLogger->info('[tags] build tag vector index', [
'cmd' => $cmd,
'timeout' => $this->timeoutSeconds,
'embedding_model' => $this->embeddingModel,
]);
try {
$result = $this->runCommand($cmd);
if ($result['exit'] !== 0) {
$this->agentLogger->error('[tags] tag vector ingest failed', [
'exit' => $result['exit'],
'stdout' => $result['stdout'],
'stderr' => $result['stderr'],
]);
throw new \RuntimeException('Tag vector ingest failed (exit=' . $result['exit'] . ')');
}
if (!$this->isUsableArtifact($tmpIndex) || !$this->isUsableArtifact($tmpMeta)) {
throw new \RuntimeException('Tag vector ingest produced incomplete artifacts.');
}
$this->atomicReplace($tmpIndex, $finalIndex);
$this->atomicReplace($tmpMeta, $finalMeta);
$this->commitRuntime(true);
$this->agentLogger->info('[tags] tag vector index build completed + runtime committed', [
'index' => $finalIndex,
'meta' => $finalMeta,
]);
} catch (\Throwable $e) {
$this->cleanupTemporaryArtifacts($tmpIndex, $tmpMeta);
throw $e;
}
}
private function assertPreconditions(): void
{ {
if (!is_file($this->tagsNdjsonPath)) { if (!is_file($this->tagsNdjsonPath)) {
throw new \RuntimeException('tags.ndjson missing: ' . $this->tagsNdjsonPath); throw new \RuntimeException('tags.ndjson missing: ' . $this->tagsNdjsonPath);
@@ -30,65 +93,178 @@ final readonly class TagVectorIndexBuilder
throw new \RuntimeException('Tag ingest script missing: ' . $this->scriptPath); throw new \RuntimeException('Tag ingest script missing: ' . $this->scriptPath);
} }
$tmpIndex = $this->vectorTagsIndexPath . '.tmp'; if (trim($this->pythonBin) === '') {
$tmpMeta = $tmpIndex . '.meta.json'; throw new \RuntimeException('Python binary must not be empty.');
$finalIndex = $this->vectorTagsIndexPath;
$finalMeta = $finalIndex . '.meta.json';
$dir = \dirname($finalIndex);
if (!\is_dir($dir)) {
@\mkdir($dir, 0775, true);
} }
@\unlink($tmpIndex); if ($this->timeoutSeconds < 1) {
@\unlink($tmpMeta); throw new \RuntimeException('Tag vector timeout must be >= 1 second.');
}
}
$cmd = sprintf( private function buildCommand(string $tmpIndex): string
'%s %s %s %s %s 2>&1', {
return sprintf(
'%s %s %s %s 2>&1',
escapeshellarg($this->pythonBin), escapeshellarg($this->pythonBin),
escapeshellarg($this->scriptPath), escapeshellarg($this->scriptPath),
escapeshellarg($this->tagsNdjsonPath), escapeshellarg($this->tagsNdjsonPath),
escapeshellarg($tmpIndex), escapeshellarg($tmpIndex),
escapeshellarg($this->embeddingModel),
); );
$this->agentLogger->info('[tags] build tag vector index', [
'cmd' => $cmd,
'timeout' => $this->timeoutSeconds,
]);
$out = [];
$exit = 0;
exec($cmd, $out, $exit);
if ($exit !== 0) {
$this->agentLogger->error('[tags] tag vector ingest failed', [
'exit' => $exit,
'out' => $out,
]);
throw new \RuntimeException('Tag vector ingest failed (exit=' . $exit . ')');
} }
if (!is_file($tmpIndex) || !is_file($tmpMeta)) { private function ensureTargetDirectoryExists(string $finalIndexPath): void
@\unlink($tmpIndex); {
@\unlink($tmpMeta); $dir = dirname($finalIndexPath);
$this->agentLogger->warning('[tags] no tag index produced (maybe 0 tags).');
if (is_dir($dir)) {
return; return;
} }
$this->atomicReplace($tmpIndex, $finalIndex); if (!@mkdir($dir, 0775, true) && !is_dir($dir)) {
$this->atomicReplace($tmpMeta, $finalMeta); throw new \RuntimeException('Unable to create tag vector directory: ' . $dir);
}
}
// ✅ ENTERPRISE COMMIT MARKER private function hasEmbeddableTags(): bool
$this->metaManager->touchRuntime([ {
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM), $fh = @fopen($this->tagsNdjsonPath, 'rb');
if ($fh === false) {
throw new \RuntimeException('Unable to read tags NDJSON: ' . $this->tagsNdjsonPath);
}
try {
while (($line = fgets($fh)) !== false) {
$line = trim($line);
if ($line === '') {
continue;
}
$decoded = json_decode($line, true);
if (!is_array($decoded)) {
continue;
}
$tagId = trim((string) ($decoded['tag_id'] ?? ''));
$text = trim((string) ($decoded['text'] ?? ''));
if ($tagId !== '' && $text !== '') {
return true;
}
}
} finally {
fclose($fh);
}
return false;
}
/**
* @return array{exit:int, stdout:string, stderr:string}
*/
private function runCommand(string $cmd): array
{
$descriptorSpec = [
0 => ['pipe', 'r'],
1 => ['pipe', 'w'],
2 => ['pipe', 'w'],
];
$process = @proc_open($cmd, $descriptorSpec, $pipes);
if (!is_resource($process)) {
throw new \RuntimeException('Could not start tag vector ingest process.');
}
fclose($pipes[0]);
stream_set_blocking($pipes[1], false);
stream_set_blocking($pipes[2], false);
$stdout = '';
$stderr = '';
$startedAt = microtime(true);
$timedOut = false;
try {
while (true) {
$stdout .= stream_get_contents($pipes[1]) ?: '';
$stderr .= stream_get_contents($pipes[2]) ?: '';
$status = proc_get_status($process);
if (!is_array($status) || ($status['running'] ?? false) !== true) {
break;
}
if ((microtime(true) - $startedAt) > $this->timeoutSeconds) {
$timedOut = true;
proc_terminate($process);
usleep(self::GRACEFUL_TERMINATION_SECONDS * 1000000);
$status = proc_get_status($process);
if (is_array($status) && ($status['running'] ?? false) === true) {
proc_terminate($process, 9);
}
break;
}
usleep(100000);
}
$stdout .= stream_get_contents($pipes[1]) ?: '';
$stderr .= stream_get_contents($pipes[2]) ?: '';
} finally {
fclose($pipes[1]);
fclose($pipes[2]);
}
$exitCode = proc_close($process);
if ($timedOut) {
$this->agentLogger->error('[tags] tag vector ingest timed out', [
'timeout' => $this->timeoutSeconds,
'stdout' => $stdout,
'stderr' => $stderr,
]); ]);
$this->agentLogger->info('[tags] tag vector index build completed + runtime committed', [ throw new \RuntimeException('Tag vector ingest timed out after ' . $this->timeoutSeconds . ' seconds.');
'index' => $finalIndex, }
'meta' => $finalMeta,
return [
'exit' => is_int($exitCode) ? $exitCode : 1,
'stdout' => trim($stdout),
'stderr' => trim($stderr),
];
}
private function isUsableArtifact(string $path): bool
{
return is_file($path) && filesize($path) > 0;
}
private function cleanupTemporaryArtifacts(string ...$paths): void
{
foreach ($paths as $path) {
$this->removeFileIfExists($path);
}
}
private function removeFileIfExists(string $path): void
{
if (is_file($path)) {
@unlink($path);
}
}
private function commitRuntime(bool $indexPresent): void
{
$this->metaManager->touchRuntime([
'last_tags_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
'tags_index_present' => $indexPresent,
]); ]);
} }
@@ -99,6 +275,7 @@ final readonly class TagVectorIndexBuilder
@unlink($tmp); @unlink($tmp);
throw new \RuntimeException('Atomic replace failed for: ' . $final); throw new \RuntimeException('Atomic replace failed for: ' . $final);
} }
@unlink($tmp); @unlink($tmp);
} }

View File

@@ -6,11 +6,20 @@ namespace App\Tag;
final readonly class TagVectorIndexHealthService final readonly class TagVectorIndexHealthService
{ {
private const STATUS_OK = 'OK';
private const STATUS_OK_EMPTY = 'OK_EMPTY';
private const STATUS_INCONSISTENT_STALE_VECTOR = 'INCONSISTENT_STALE_VECTOR';
private const STATUS_INCONSISTENT_MISSING_VECTOR = 'INCONSISTENT_MISSING_VECTOR';
private const STATUS_INCONSISTENT_COUNT_MISMATCH = 'INCONSISTENT_COUNT_MISMATCH';
private const STATUS_INCONSISTENT_INVALID_META = 'INCONSISTENT_INVALID_META';
private const STATUS_UNKNOWN = 'UNKNOWN';
public function __construct( public function __construct(
private string $tagsNdjsonPath, private string $tagsNdjsonPath,
private string $vectorTagsIndexPath, private string $vectorTagsIndexPath,
private string $vectorTagsMetaPath private string $vectorTagsMetaPath,
) {} ) {
}
public function check(): array public function check(): array
{ {
@@ -18,51 +27,189 @@ final readonly class TagVectorIndexHealthService
$vectorExists = is_file($this->vectorTagsIndexPath); $vectorExists = is_file($this->vectorTagsIndexPath);
$metaExists = is_file($this->vectorTagsMetaPath); $metaExists = is_file($this->vectorTagsMetaPath);
$ndjsonTagCount = 0; $ndjsonStats = $this->readNdjsonStats();
$metaStats = $this->readMetaStats();
if ($ndjsonExists) { $status = $this->determineStatus(
$h = @fopen($this->tagsNdjsonPath, 'r'); $ndjsonStats['exported_tag_count'],
if ($h !== false) { $vectorExists,
while (($line = fgets($h)) !== false) { $metaExists,
$line = trim($line); $metaStats['vector_tag_count'],
if ($line === '') continue; $metaStats['meta_valid']
);
$data = json_decode($line, true);
if (is_array($data) && !empty($data['tag_id']) && !empty($data['text'])) {
$ndjsonTagCount++;
}
}
fclose($h);
}
}
$vectorTagCount = 0;
if ($metaExists) {
$meta = json_decode((string) file_get_contents($this->vectorTagsMetaPath), true);
if (is_array($meta)) {
$vectorTagCount = count($meta);
}
}
$status = $this->determineStatus($ndjsonTagCount, $vectorExists, $metaExists, $vectorTagCount);
return [ return [
'tags_ndjson_exists' => $ndjsonExists, 'tags_ndjson_exists' => $ndjsonExists,
'tags_ndjson_count' => $ndjsonTagCount, 'tags_ndjson_count' => $ndjsonStats['exported_tag_count'],
'vector_exists' => $vectorExists, 'vector_exists' => $vectorExists,
'meta_exists' => $metaExists, 'meta_exists' => $metaExists,
'vector_tag_count' => $vectorTagCount, 'vector_tag_count' => $metaStats['vector_tag_count'],
'status' => $status, 'status' => $status,
// Extra diagnostics for admin/CLI.
'tags_ndjson_lines_total' => $ndjsonStats['lines_total'],
'tags_ndjson_invalid_lines' => $ndjsonStats['invalid_lines'],
'tags_ndjson_empty_lines' => $ndjsonStats['empty_lines'],
'tags_with_active_document_ids' => $ndjsonStats['tags_with_document_ids'],
'meta_valid' => $metaStats['meta_valid'],
'paths' => [
'tags_ndjson' => $this->tagsNdjsonPath,
'vector_index' => $this->vectorTagsIndexPath,
'vector_meta' => $this->vectorTagsMetaPath,
],
]; ];
} }
private function determineStatus(int $ndjsonTagCount, bool $vectorExists, bool $metaExists, int $vectorTagCount): string /**
* @return array{
* lines_total:int,
* empty_lines:int,
* invalid_lines:int,
* exported_tag_count:int,
* tags_with_document_ids:int
* }
*/
private function readNdjsonStats(): array
{ {
if ($ndjsonTagCount === 0 && !$vectorExists && !$metaExists) return 'OK_EMPTY'; $stats = [
if ($ndjsonTagCount > 0 && $vectorExists && $metaExists && $vectorTagCount === $ndjsonTagCount) return 'OK'; 'lines_total' => 0,
if ($ndjsonTagCount === 0 && ($vectorExists || $metaExists)) return 'INCONSISTENT_STALE_VECTOR'; 'empty_lines' => 0,
if ($ndjsonTagCount > 0 && (!$vectorExists || !$metaExists)) return 'INCONSISTENT_MISSING_VECTOR'; 'invalid_lines' => 0,
if ($ndjsonTagCount !== $vectorTagCount) return 'INCONSISTENT_COUNT_MISMATCH'; 'exported_tag_count' => 0,
return 'UNKNOWN'; 'tags_with_document_ids' => 0,
];
if (!is_file($this->tagsNdjsonPath)) {
return $stats;
}
$handle = @fopen($this->tagsNdjsonPath, 'rb');
if ($handle === false) {
return $stats;
}
try {
while (($line = fgets($handle)) !== false) {
$stats['lines_total']++;
$line = trim($line);
if ($line === '') {
$stats['empty_lines']++;
continue;
}
$data = json_decode($line, true);
if (!is_array($data)) {
$stats['invalid_lines']++;
continue;
}
$tagId = trim((string) ($data['tag_id'] ?? ''));
$text = trim((string) ($data['text'] ?? ''));
$documentIds = $data['document_ids'] ?? null;
$hasDocumentIds = is_array($documentIds) && $documentIds !== [];
if ($tagId === '' || $text === '') {
$stats['invalid_lines']++;
continue;
}
$stats['exported_tag_count']++;
if ($hasDocumentIds) {
$stats['tags_with_document_ids']++;
}
}
} finally {
fclose($handle);
}
return $stats;
}
/**
* @return array{vector_tag_count:int, meta_valid:bool}
*/
private function readMetaStats(): array
{
if (!is_file($this->vectorTagsMetaPath)) {
return [
'vector_tag_count' => 0,
'meta_valid' => false,
];
}
$raw = file_get_contents($this->vectorTagsMetaPath);
if (!is_string($raw) || trim($raw) === '') {
return [
'vector_tag_count' => 0,
'meta_valid' => false,
];
}
$decoded = json_decode($raw, true);
if (is_array($decoded)) {
if (array_is_list($decoded)) {
return [
'vector_tag_count' => count($decoded),
'meta_valid' => true,
];
}
$numericKeys = array_filter(
array_keys($decoded),
static fn (string|int $key): bool => is_string($key) && ctype_digit($key)
);
if ($numericKeys !== [] && count($numericKeys) === count($decoded)) {
return [
'vector_tag_count' => count($decoded),
'meta_valid' => true,
];
}
}
return [
'vector_tag_count' => 0,
'meta_valid' => false,
];
}
private function determineStatus(
int $ndjsonTagCount,
bool $vectorExists,
bool $metaExists,
int $vectorTagCount,
bool $metaValid
): string {
if ($ndjsonTagCount === 0 && !$vectorExists && !$metaExists) {
return self::STATUS_OK_EMPTY;
}
if ($ndjsonTagCount === 0 && ($vectorExists || $metaExists)) {
return self::STATUS_INCONSISTENT_STALE_VECTOR;
}
if ($ndjsonTagCount > 0 && (!$vectorExists || !$metaExists)) {
return self::STATUS_INCONSISTENT_MISSING_VECTOR;
}
if ($metaExists && !$metaValid) {
return self::STATUS_INCONSISTENT_INVALID_META;
}
if ($ndjsonTagCount > 0 && $vectorExists && $metaExists && $metaValid && $vectorTagCount === $ndjsonTagCount) {
return self::STATUS_OK;
}
if ($ndjsonTagCount !== $vectorTagCount) {
return self::STATUS_INCONSISTENT_COUNT_MISMATCH;
}
return self::STATUS_UNKNOWN;
} }
} }

View File

@@ -12,18 +12,29 @@ final readonly class TagVectorSearchClient
/** /**
* Minimum similarity score required for a tag to be considered. * Minimum similarity score required for a tag to be considered.
*/ */
private const MIN_SCORE = 0.72; public const MIN_SCORE = 0.72;
/**
* Default result size when callers do not specify a limit.
*/
private const DEFAULT_LIMIT = 8;
/** /**
* Hard limit to prevent excessive requests. * Hard limit to prevent excessive requests.
*/ */
private const MAX_LIMIT = 50; private const MAX_LIMIT = 50;
/**
* HTTP timeout for the Python vector service.
*/
private const TIMEOUT_SECONDS = 10;
public function __construct( public function __construct(
private HttpClientInterface $http, private HttpClientInterface $http,
private string $serviceUrl, private string $serviceUrl,
private LoggerInterface $agentLogger, private LoggerInterface $agentLogger,
) {} ) {
}
/** /**
* Executes a vector search against the Python tag index. * Executes a vector search against the Python tag index.
@@ -33,43 +44,51 @@ final readonly class TagVectorSearchClient
* { * {
* "tag_id": "...", * "tag_id": "...",
* "score": 0.73, * "score": 0.73,
* "label": "Geräte", // optional (new) * "label": "Geräte",
* "tag_type": "catalog_entity" // optional (new) * "tag_type": "catalog_entity"
* } * }
* ] * ]
* *
* @return array<int, array{ * @return list<array{
* tag_id:string, * tag_id:string,
* score:float, * score:float,
* label?:string, * label:string,
* tag_type?:string * tag_type:string
* }> * }>
*/ */
public function search(string $query, int $limit = 8): array public function search(string $query, int $limit = self::DEFAULT_LIMIT): array
{ {
$query = trim($query); $query = trim($query);
if ($query === '') { if ($query === '') {
return []; return [];
} }
$limit = max(1, min($limit, self::MAX_LIMIT)); $limit = max(1, min($limit, self::MAX_LIMIT));
$serviceUrl = rtrim(trim($this->serviceUrl), '/');
if ($serviceUrl === '') {
$this->agentLogger->warning('Tag vector service URL is empty.');
return [];
}
try { try {
$response = $this->http->request( $response = $this->http->request(
'POST', 'POST',
rtrim($this->serviceUrl, '/') . '/search-tags', $serviceUrl . '/search-tags',
[ [
'json' => [ 'json' => [
'query' => $query, 'query' => $query,
'limit' => $limit, 'limit' => $limit,
], ],
'timeout' => 10, 'timeout' => self::TIMEOUT_SECONDS,
] ]
); );
if ($response->getStatusCode() !== 200) { if ($response->getStatusCode() !== 200) {
$this->agentLogger->warning( $this->agentLogger->warning(
'Tag vector service returned non-200', 'Tag vector service returned non-200.',
['status' => $response->getStatusCode()] ['status' => $response->getStatusCode()]
); );
@@ -77,10 +96,9 @@ final readonly class TagVectorSearchClient
} }
$data = $response->toArray(false); $data = $response->toArray(false);
} catch (\Throwable $e) { } catch (\Throwable $e) {
$this->agentLogger->warning( $this->agentLogger->warning(
'Tag vector service unreachable', 'Tag vector service unreachable.',
['error' => $e->getMessage()] ['error' => $e->getMessage()]
); );
@@ -88,18 +106,33 @@ final readonly class TagVectorSearchClient
} }
if (!is_array($data)) { if (!is_array($data)) {
$this->agentLogger->warning('Tag vector service returned invalid payload'); $this->agentLogger->warning('Tag vector service returned invalid payload.');
return []; return [];
} }
$hits = []; return $this->normalizeHits($data, $limit);
}
foreach ($data as $row) { /**
* @param array<mixed> $rows
* @return list<array{
* tag_id:string,
* score:float,
* label:string,
* tag_type:string
* }>
*/
private function normalizeHits(array $rows, int $limit): array
{
$hitsByTagId = [];
foreach ($rows as $row) {
if (!is_array($row)) { if (!is_array($row)) {
continue; continue;
} }
$tagId = (string)($row['tag_id'] ?? ''); $tagId = trim((string) ($row['tag_id'] ?? ''));
$score = $row['score'] ?? null; $score = $row['score'] ?? null;
if ($tagId === '' || !is_numeric($score)) { if ($tagId === '' || !is_numeric($score)) {
@@ -112,24 +145,45 @@ final readonly class TagVectorSearchClient
continue; continue;
} }
$hit = [ $normalizedHit = [
'tag_id' => $tagId, 'tag_id' => $tagId,
'score' => $score, 'score' => $score,
'label' => trim((string) ($row['label'] ?? '')),
'tag_type' => TagTypes::normalize((string) ($row['tag_type'] ?? TagTypes::GENERIC)),
]; ];
// Optional: label $existingHit = $hitsByTagId[$tagId] ?? null;
if (isset($row['label']) && is_string($row['label'])) {
$hit['label'] = $row['label']; if ($existingHit === null || $normalizedHit['score'] > $existingHit['score']) {
$hitsByTagId[$tagId] = $normalizedHit;
}
} }
// Optional: tag_type if ($hitsByTagId === []) {
if (isset($row['tag_type']) && is_string($row['tag_type'])) { return [];
$hit['tag_type'] = $row['tag_type'];
} }
$hits[] = $hit; $hits = array_values($hitsByTagId);
usort(
$hits,
static function (array $left, array $right): int {
$scoreComparison = $right['score'] <=> $left['score'];
if ($scoreComparison !== 0) {
return $scoreComparison;
} }
return $hits; $typeComparison = strcmp($left['tag_type'], $right['tag_type']);
if ($typeComparison !== 0) {
return $typeComparison;
}
return strcmp($left['tag_id'], $right['tag_id']);
}
);
return array_slice($hits, 0, $limit);
} }
} }

View File

@@ -5,90 +5,98 @@
{% block body %} {% block body %}
<div class="container-fluid"> <div class="container-fluid">
<!-- ===================================================== --> {% set chunkStatus = vectorHealth.status|default('UNKNOWN') %}
<!-- HEADER --> {% set chunkBadgeClass =
<!-- ===================================================== --> chunkStatus starts with 'OK'
<div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0"><i class="bi bi-hdd-rack"></i> Systemübersicht</h1>
<span class="badge bg-secondary">RAG Enterprise</span>
</div>
<!-- ===================================================== -->
<!-- KPI ROW (NUR STATUS-AMPELN) -->
<!-- ===================================================== -->
<div class="row g-4 mb-4">
{# ================= CHUNK VECTOR STATUS ================= #}
{% if vectorHealth is defined %}
{% set status = vectorHealth.status %}
{% set badgeClass =
status starts with 'OK'
? 'bg-success' ? 'bg-success'
: (status == 'INCONSISTENT_MISSING_VECTOR' : (chunkStatus == 'INCONSISTENT_MISSING_VECTOR'
? 'bg-warning text-dark' ? 'bg-warning text-dark'
: 'bg-danger') %} : 'bg-danger')
{% endif %} %}
<div class="col-lg-6 col-xl-3"> {% set tagStatus = tagVectorHealth.status|default('UNKNOWN') %}
<div class="card bg-black border-secondary text-light h-100">
<div class="card-body">
<div class="small text-light mb-2"><i class="bi bi-files"></i> Chunk-Vektor</div>
{% if vectorHealth is defined %}
<h4 class="mb-0">
<span class="badge {{ badgeClass }}">
{{ vectorHealth.status }}
</span>
</h4>
{% else %}
<div class="small text-light">
Keine Daten verfügbar.
</div>
{% endif %}
</div>
</div>
</div>
{# ================= TAG VECTOR STATUS ================= #}
{% if tagVectorHealth is defined %}
{% set tagStatus = tagVectorHealth.status %}
{% set tagBadgeClass = {% set tagBadgeClass =
tagStatus starts with 'OK' tagStatus starts with 'OK'
? 'bg-success' ? 'bg-success'
: (tagStatus == 'INCONSISTENT_MISSING_VECTOR' : (tagStatus == 'INCONSISTENT_MISSING_VECTOR'
? 'bg-warning text-dark' ? 'bg-warning text-dark'
: 'bg-danger') %} : 'bg-danger')
%}
{% set percent = chunkLimit > 0 ? (chunkCount / chunkLimit * 100)|round(3) : 0 %}
{% set percentClass =
percent >= 95
? 'bg-danger'
: (percent >= 85 ? 'bg-warning text-dark' : 'bg-success')
%}
{% set chunkHealthy = chunkStatus in ['OK', 'OK_EMPTY'] %}
{% set tagHealthy = tagStatus in ['OK', 'OK_EMPTY'] %}
{% set anyHealthIssue = not chunkHealthy or not tagHealthy %}
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<h1 class="h3 mb-0">
<i class="bi bi-hdd-rack"></i> Systemübersicht
</h1>
<span class="badge bg-secondary">RetrieX Admin</span>
</div>
{% if anyHealthIssue %}
<div class="alert alert-warning shadow-sm mb-4">
<strong>Achtung:</strong>
Mindestens ein Index-Zustand ist nicht konsistent.
Prüfe die Detailkarten unten und führe bei Bedarf einen Global Reindex aus.
</div>
{% endif %} {% endif %}
<div class="col-lg-6 col-xl-3"> <div class="row g-4 mb-4">
<div class="card bg-black border-secondary text-light h-100">
<div class="card-body">
<div class="small text-light mb-2"><i class="bi bi-tags"></i> Tag-Vektor</div>
{% if tagVectorHealth is defined %} <div class="col-lg-6 col-xl-3">
<h4 class="mb-0"> <div class="card bg-black border-secondary text-light h-100 shadow-sm">
<span class="badge {{ tagBadgeClass }}"> <div class="card-body">
{{ tagVectorHealth.status }} <div class="small text-light mb-2">
<i class="bi bi-files"></i> Chunk-Vektor
</div>
<h4 class="mb-2">
<span class="badge {{ chunkBadgeClass }}">
{{ chunkStatus }}
</span> </span>
</h4> </h4>
{% else %}
<div class="small text-light">
Keine Daten verfügbar.
</div>
{% endif %}
</div>
</div>
</div>
{# ================= KNOWLEDGE CAPACITY ================= #} <div class="small text-muted">
{% set percent = chunkLimit > 0 ? (chunkCount / chunkLimit * 100)|round(3) : 0 %} Keyword-/Chunk-Retrieval-Grundlage des Systems
</div>
</div>
</div>
</div>
<div class="col-lg-6 col-xl-3"> <div class="col-lg-6 col-xl-3">
<div class="card bg-black border-secondary text-light h-100"> <div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body"> <div class="card-body">
<div class="small text-light mb-2"><i class="bi bi-robot"></i> Wissenskapazität</div> <div class="small text-light mb-2">
<i class="bi bi-tags"></i> Tag-Vektor
</div>
<h4 class="mb-2">
<span class="badge {{ tagBadgeClass }}">
{{ tagStatus }}
</span>
</h4>
<div class="small text-muted">
Semantisches Tag-Routing für Dokumenträume und Entity-Erkennung
</div>
</div>
</div>
</div>
<div class="col-lg-6 col-xl-3">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<div class="small text-light mb-2">
<i class="bi bi-robot"></i> Wissenskapazität
</div>
<h4 class="mb-2"> <h4 class="mb-2">
{{ chunkCount|number_format(0, ',', '.') }} {{ chunkCount|number_format(0, ',', '.') }}
@@ -98,14 +106,7 @@
</h4> </h4>
<div class="progress bg-dark mb-2" style="height: 14px;"> <div class="progress bg-dark mb-2" style="height: 14px;">
<div class="progress-bar <div class="progress-bar {{ percentClass }}"
{% if percent >= 95 %}
bg-danger
{% elseif percent >= 85 %}
bg-warning text-dark
{% else %}
bg-success
{% endif %}"
style="width: {{ percent }}%;"> style="width: {{ percent }}%;">
</div> </div>
</div> </div>
@@ -117,20 +118,21 @@
</div> </div>
</div> </div>
{# ================= GOVERNANCE ================= #}
<div class="col-lg-6 col-xl-3"> <div class="col-lg-6 col-xl-3">
<div class="card bg-black border-secondary text-light h-100"> <div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body"> <div class="card-body">
<div class="small text-light mb-2"><i class="bi bi-shield-check"></i> System-Governance</div> <div class="small text-light mb-2">
<i class="bi bi-shield-check"></i> System-Governance
</div>
<div class="small"> <div class="small">
<strong>Benutzer</strong><br> <strong>Benutzer</strong><br>
{{ app.user.userIdentifier }} {{ app.user ? app.user.userIdentifier : '-' }}
</div> </div>
<div class="small mt-3"> <div class="small mt-3">
<strong>Rollen</strong><br> <strong>Rollen</strong><br>
{{ app.user.roles|join(', ') }} {{ app.user ? app.user.roles|join(', ') : '-' }}
</div> </div>
</div> </div>
</div> </div>
@@ -138,65 +140,94 @@
</div> </div>
<!-- ===================================================== -->
<!-- DETAIL ROW (HIER SIND DIE ZAHLEN) -->
<!-- ===================================================== -->
<div class="row g-4"> <div class="row g-4">
{% if vectorHealth is defined %}
<div class="col-lg-4"> <div class="col-lg-4">
<div class="card bg-black border-secondary text-light h-100"> <div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body"> <div class="card-body">
<h5 class="text-info mb-3"><i class="bi bi-files"></i> Chunk-Vektor-Details</h5> <h5 class="text-info mb-3">
<i class="bi bi-files"></i> Chunk-Vektor-Details
</h5>
<div class="small text-info">NDJSON-Chunks</div> <div class="small text-info">NDJSON-Chunks</div>
<div class="h5 mb-3"> <div class="h5 mb-3">
{{ vectorHealth.ndjson_chunk_count|number_format(0, ',', '.') }} {{ vectorHealth.ndjson_chunk_count|default(0)|number_format(0, ',', '.') }}
</div> </div>
<div class="small text-info">Vektor-Index-Chunks</div> <div class="small text-info">Vektor-Index-Chunks</div>
<div class="h5">
{{ vectorHealth.vector_chunk_count|number_format(0, ',', '.') }}
</div>
</div>
</div>
</div>
{% endif %}
{% if tagVectorHealth is defined %}
<div class="col-lg-4">
<div class="card bg-black border-secondary text-light h-100">
<div class="card-body">
<h5 class="text-info mb-3"><i class="bi bi-tags"></i> Tag-Vektor-Details</h5>
<div class="small text-info">NDJSON-Tags</div>
<div class="h5 mb-3"> <div class="h5 mb-3">
{{ tagVectorHealth.tags_ndjson_count|number_format(0, ',', '.') }} {{ vectorHealth.vector_chunk_count|default(0)|number_format(0, ',', '.') }}
</div>
<div class="d-flex flex-wrap gap-2 mt-3">
<span class="badge {{ vectorHealth.ndjson_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
NDJSON {{ vectorHealth.ndjson_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
<span class="badge {{ vectorHealth.vector_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
Index {{ vectorHealth.vector_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
<span class="badge {{ vectorHealth.meta_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
Meta {{ vectorHealth.meta_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
</div>
</div>
</div>
</div>
<div class="col-lg-4">
<div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<h5 class="text-info mb-3">
<i class="bi bi-tags"></i> Tag-Vektor-Details
</h5>
<div class="small text-info">Exportierte Tags (NDJSON)</div>
<div class="h5 mb-3">
{{ tagVectorHealth.tags_ndjson_count|default(0)|number_format(0, ',', '.') }}
</div> </div>
<div class="small text-info">Vektor-Index-Tags</div> <div class="small text-info">Vektor-Index-Tags</div>
<div class="h5"> <div class="h5 mb-3">
{{ tagVectorHealth.vector_tag_count|number_format(0, ',', '.') }} {{ tagVectorHealth.vector_tag_count|default(0)|number_format(0, ',', '.') }}
</div>
<div class="small text-info">Tags mit aktiven Dokumenten</div>
<div class="h5 mb-3">
{{ tagVectorHealth.tags_with_active_document_ids|default(0)|number_format(0, ',', '.') }}
</div>
<div class="d-flex flex-wrap gap-2 mt-3">
<span class="badge {{ tagVectorHealth.tags_ndjson_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
NDJSON {{ tagVectorHealth.tags_ndjson_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
<span class="badge {{ tagVectorHealth.vector_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
Index {{ tagVectorHealth.vector_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
<span class="badge {{ tagVectorHealth.meta_exists|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
Meta {{ tagVectorHealth.meta_exists|default(false) ? 'vorhanden' : 'fehlt' }}
</span>
<span class="badge {{ tagVectorHealth.meta_valid|default(false) ? 'text-bg-success' : 'text-bg-danger' }}">
Meta {{ tagVectorHealth.meta_valid|default(false) ? 'gültig' : 'ungültig' }}
</span>
</div>
</div>
</div>
</div> </div>
</div>
</div>
</div>
{% endif %}
<!-- INDEXIERUNG -->
<div class="col-lg-4"> <div class="col-lg-4">
<div class="card bg-black border-secondary text-light h-100"> <div class="card bg-black border-secondary text-light h-100 shadow-sm">
<div class="card-body"> <div class="card-body">
<h5 class="text-info mb-3"><i class="bi bi-search"></i> Indexierung (Ingest Jobs)</h5> <h5 class="text-info mb-3">
<i class="bi bi-search"></i> Indexierung (Ingest Jobs)
</h5>
<div class="text-muted small mb-3"> <div class="text-muted small mb-3">
Erstellt den kompletten Wissensindex neu. Erstellt den kompletten Wissensindex neu und zieht dabei auch die
Kann je nach Datenmenge mehrere Minuten dauern. physischen Retrieval-Artefakte wieder gerade.
</div> </div>
<form method="post" <form method="post"
action="/admin/jobs/global-reindex" action="{{ path('admin_global_reindex') }}"
onsubmit="return confirm('Global Reindex starten? Dies kann mehrere Minuten dauern.');"> onsubmit="return confirm('Global Reindex starten? Dies kann mehrere Minuten dauern.');">
<input type="hidden" <input type="hidden"
@@ -208,15 +239,23 @@
Global Reindex starten Global Reindex starten
</button> </button>
</form> </form>
{% if anyHealthIssue %}
<div class="alert alert-dark border border-warning text-light small mt-3 mb-0">
Empfohlen bei inkonsistentem Chunk- oder Tag-Zustand.
</div>
{% endif %}
</div> </div>
</div> </div>
</div> </div>
{% if is_granted('ROLE_SUPER_ADMIN') %} {% if is_granted('ROLE_SUPER_ADMIN') %}
<div class="col-lg-4"> <div class="col-lg-4">
<div class="card bg-black border-danger text-light h-100"> <div class="card bg-black border-danger text-light h-100 shadow-sm">
<div class="card-body"> <div class="card-body">
<h5 class="text-danger mb-3"><i class="bi bi-sign-stop-fill"></i> Kritische Systemoperationen</h5> <h5 class="text-danger mb-3">
<i class="bi bi-sign-stop-fill"></i> Kritische Systemoperationen
</h5>
<div class="small mb-3 text-secondary"> <div class="small mb-3 text-secondary">
Entfernt alle Dokumente, Versionen, Indizes und Jobs. Entfernt alle Dokumente, Versionen, Indizes und Jobs.

View File

@@ -4,8 +4,15 @@
{% block body %} {% block body %}
<div class="d-flex justify-content-between align-items-center mb-4"> <div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<h1 class="h3 mb-0"><i class="bi bi-card-list"></i> Dokumente</h1> <div>
<h1 class="h3 mb-1">
<i class="bi bi-card-list"></i> Dokumente
</h1>
<div class="small text-muted">
Übersicht über Dokumente, aktive Versionen, Ingest-Zustände und Tag-Zuordnungen.
</div>
</div>
<a href="{{ path('admin_document_new') }}" <a href="{{ path('admin_document_new') }}"
class="btn btn-sm btn-outline-info"> class="btn btn-sm btn-outline-info">
@@ -13,50 +20,107 @@
</a> </a>
</div> </div>
{% for message in app.flashes('success') %}
<div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('danger') %}
<div class="alert alert-danger shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('info') %}
<div class="alert alert-info shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Worauf achten?</h5>
<ul class="small mb-0">
<li><strong>INDEXED</strong> bedeutet: aktive Version ist sauber im Wissensindex.</li>
<li><strong>PENDING</strong> oder <strong>FAILED</strong> bedeuten: Dokument prüfen und ggf. Ingest erneut anstoßen.</li>
<li><strong>Tags</strong> sollten fachlich präzise sein und nicht nur generische Oberbegriffe abbilden.</li>
<li>Die aktive Version ist die fachlich führende Version des Dokuments.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Schnellzugriff</h5>
<div class="small text-light">
Über <strong>Tags</strong> gelangst du direkt in die Tag-Pflege des Dokuments.
Über <strong>Details</strong> steuerst du Versionen, Aktivierung, Re-Ingest und Löschung.
</div>
</div>
</div>
</div>
{% if documents is empty %} {% if documents is empty %}
<div class="alert alert-secondary"> <div class="alert alert-secondary shadow-sm">
Keine Dokumente vorhanden. Keine Dokumente vorhanden.
</div> </div>
{% else %} {% else %}
<div class="card bg-black border-secondary"> <div class="card bg-black border-secondary shadow-sm">
<div class="card-body p-0"> <div class="card-body p-0">
<div class="d-flex justify-content-between align-items-center px-3 py-3 border-bottom border-secondary flex-wrap gap-2">
<div>
<strong class="text-info">Vorhandene Dokumente</strong>
<span class="small text-muted ms-2">{{ documents|length }} Einträge</span>
</div>
<div class="small text-muted">
Neueste Dokumente stehen oben.
</div>
</div>
<div class="table-responsive">
<table class="table table-dark table-striped table-hover align-middle mb-0"> <table class="table table-dark table-striped table-hover align-middle mb-0">
<thead class="table-secondary text-dark"> <thead class="table-secondary text-dark">
<tr> <tr>
<th>Titel</th> <th style="width: 20%">Titel</th>
<th>ID</th> <th style="width: 14%">ID</th>
<th>Typ</th> <th style="width: 8%">Typ</th>
<th>Status</th> <th style="width: 8%">Status</th>
<th>Indexierung</th> <th style="width: 10%">Indexierung</th>
<th>Versionen</th> <th style="width: 7%">Versionen</th>
<th>Aktive Version</th> <th style="width: 8%">Aktive Version</th>
<th>Erstellt</th> <th style="width: 7%">Tags</th>
<th class="text-end">Aktionen</th> <th style="width: 8%">Erstellt</th>
<th class="text-end" style="width: 10%">Aktionen</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for document in documents %} {% for document in documents %}
<tr> <tr>
{# Titel #}
<td> <td>
<div class="fw-semibold">
<a href="{{ path('admin_document_show', {id: document.id}) }}" <a href="{{ path('admin_document_show', {id: document.id}) }}"
class="text-light text-decoration-none"> class="text-light text-decoration-none">
{{ document.title }} {{ document.title }}
</a> </a>
</div>
{% if document.currentVersion and document.currentVersion.filePath %}
<div class="small text-muted mt-1">
Aktive Datei vorhanden
</div>
{% endif %}
</td> </td>
{# ID #}
<td class="small text-info"> <td class="small text-info">
{{ document.id }} <code>{{ document.id }}</code>
</td> </td>
{# Typ #}
<td> <td>
{% if document.currentVersion %} {% if document.currentVersion %}
<span class="badge bg-secondary"> <span class="badge bg-secondary">
@@ -69,7 +133,6 @@
{% endif %} {% endif %}
</td> </td>
{# Dokument Status #}
<td> <td>
{% if document.status == 'ACTIVE' %} {% if document.status == 'ACTIVE' %}
<span class="badge bg-success">Aktiv</span> <span class="badge bg-success">Aktiv</span>
@@ -78,18 +141,19 @@
{% endif %} {% endif %}
</td> </td>
{# Ingest Status #}
<td> <td>
{% if document.currentVersion %} {% if document.currentVersion %}
{% if document.currentVersion.ingestStatus == 'INDEXED' %} {% if document.currentVersion.ingestStatus == 'INDEXED' %}
<span class="badge bg-success">INDEXED</span> <span class="badge bg-success">INDEXED</span>
{% elseif document.currentVersion.ingestStatus == 'PENDING' %} {% elseif document.currentVersion.ingestStatus == 'PENDING' %}
<span class="badge bg-warning text-dark">PENDING</span> <span class="badge bg-warning text-dark">PENDING</span>
{% elseif document.currentVersion.ingestStatus == 'RUNNING' %}
<span class="badge bg-warning text-dark">RUNNING</span>
{% elseif document.currentVersion.ingestStatus == 'FAILED' %} {% elseif document.currentVersion.ingestStatus == 'FAILED' %}
<span class="badge bg-danger">FAILED</span> <span class="badge bg-danger">FAILED</span>
{% else %} {% else %}
<span class="badge bg-dark border border-secondary"> <span class="badge bg-dark border border-secondary">
{{ document.currentVersion.ingestStatus }} {{ document.currentVersion.ingestStatus ?: '-' }}
</span> </span>
{% endif %} {% endif %}
{% else %} {% else %}
@@ -97,34 +161,40 @@
{% endif %} {% endif %}
</td> </td>
{# Version Count #}
<td> <td>
<span class="badge text-bg-dark border border-secondary">
{{ document.versions|length }} {{ document.versions|length }}
</span>
</td> </td>
{# Aktive Version #}
<td> <td>
{% if document.currentVersion %} {% if document.currentVersion %}
<span class="badge bg-info text-dark">
v{{ document.currentVersion.versionNumber }} v{{ document.currentVersion.versionNumber }}
</span>
{% else %} {% else %}
- -
{% endif %} {% endif %}
</td> </td>
{# Created At #} <td>
<span class="badge text-bg-dark border border-secondary">
{{ document.tags|length }}
</span>
</td>
<td class="small"> <td class="small">
{{ document.createdAt|date('d.m.Y H:i') }} {{ document.createdAt|date('d.m.Y H:i') }}
</td> </td>
{# Aktionen #}
<td class="text-end"> <td class="text-end">
<div class="d-flex justify-content-end flex-wrap gap-2">
<a class="btn btn-sm btn-outline-info me-2" <a class="btn btn-sm btn-outline-info"
href="{{ path('admin_document_tags_edit', {id: document.id}) }}"> href="{{ path('admin_document_tags_edit', {id: document.id}) }}">
Tags Tags
</a> </a>
<a class="btn btn-sm btn-outline-light me-2" <a class="btn btn-sm btn-outline-light"
href="{{ path('admin_document_show', {id: document.id}) }}"> href="{{ path('admin_document_show', {id: document.id}) }}">
Details Details
</a> </a>
@@ -133,8 +203,7 @@
<form method="post" <form method="post"
action="{{ path('admin_document_delete', {id: document.id}) }}" action="{{ path('admin_document_delete', {id: document.id}) }}"
class="d-inline" class="d-inline"
onsubmit="return confirm('Dokument wirklich endgültig löschen? Diese Aktion entfernt Dokument, Versionen und Index-Daten.');"> onsubmit="return confirm('Dokument wirklich löschen? Der Inhalt wird per Delete-Job aus dem Index entfernt.');">
<input type="hidden" <input type="hidden"
name="_token" name="_token"
value="{{ csrf_token('delete_document_' ~ document.id) }}"> value="{{ csrf_token('delete_document_' ~ document.id) }}">
@@ -144,23 +213,29 @@
</button> </button>
</form> </form>
{% endif %} {% endif %}
</div>
</td> </td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
</div>
</div> </div>
</div> </div>
{% endif %} {% endif %}
<div class="mt-4 small text-secondary"> <div class="card bg-dark border-secondary text-light mt-4 shadow-sm">
Hinweis: Das Löschen eines Dokuments entfernt alle Versionen und <div class="card-body">
erfordert eine Aktualisierung des NDJSON-Indexes. <h5 class="text-info mb-3">Hinweis zum Dokument-Lifecycle</h5>
<div class="small text-light">
Änderungen an aktiven Versionen und Löschvorgänge wirken sich direkt auf den Wissensindex aus.
Zugewiesene Tags beeinflussen zusätzlich die semantische Routing-Ebene des Systems.
Dokumente mit schwachen oder fehlenden Tags sind oft ein guter Kandidat für fachliche Nachpflege.
</div>
</div>
</div> </div>
{% endblock %} {% endblock %}

View File

@@ -4,8 +4,13 @@
{% block body %} {% block body %}
<div class="d-flex justify-content-between align-items-center mb-4"> <div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<h1 class="h3">Neues Dokument</h1> <div>
<h1 class="h3 mb-1">Neues Dokument</h1>
<div class="small text-muted">
Neuer Upload mit initialer Version und anschließendem asynchronen Ingest.
</div>
</div>
<a href="{{ path('admin_documents') }}" <a href="{{ path('admin_documents') }}"
class="btn btn-sm btn-outline-secondary"> class="btn btn-sm btn-outline-secondary">
@@ -13,7 +18,49 @@
</a> </a>
</div> </div>
<div class="card bg-black border-secondary text-light"> {% for message in app.flashes('success') %}
<div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('danger') %}
<div class="alert alert-danger shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('info') %}
<div class="alert alert-info shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Warum ist der Titel wichtig?</h5>
<ul class="small mb-0">
<li>Der Titel wird später Teil des fachlichen Kontexts des Dokuments.</li>
<li>Ein präziser Titel verbessert Retrieval, Chunk-Einordnung und spätere Tag-Pflege.</li>
<li>Generische Titel wie <code>Dokument 1</code> oder nur Dateinamen sind deutlich schwächer.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Gute Beispiele</h5>
<ul class="small mb-0">
<li><code>Testomat 808 Technisches Datenblatt</code></li>
<li><code>Resthärte-Messung Produktübersicht</code></li>
<li><code>Indikator 300 Anwendung und Dosierung</code></li>
</ul>
</div>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body"> <div class="card-body">
<form method="post" enctype="multipart/form-data"> <form method="post" enctype="multipart/form-data">
@@ -22,31 +69,24 @@
name="_token" name="_token"
value="{{ csrf_token('create_document') }}"> value="{{ csrf_token('create_document') }}">
{# ============================= #}
{# Titel #}
{# ============================= #}
<div class="mb-4"> <div class="mb-4">
<label class="form-label">Titel</label> <label class="form-label">Titel</label>
<div class="alert alert-secondary small"> <div class="alert alert-secondary small">
<strong>Hinweis zur Qualität:</strong><br> <strong>Hinweis zur Qualität:</strong><br>
Der Titel ist entscheidend für die semantische Einordnung Verwende einen fachlich präzisen Titel, der Produkt, Thema oder Dokumenttyp klar beschreibt.
der erzeugten Chunks. Jeder Chunk erhält den Titel als Kontext, Wenn kein Titel angegeben wird, wird automatisch der Dateiname verwendet.
wodurch Retrieval und Antwortqualität signifikant verbessert werden.<br><br>
Wird kein Titel angegeben, wird automatisch der Dateiname
verwendet (nicht empfohlen).
</div> </div>
<input class="form-control bg-dark text-light border-secondary" <input class="form-control bg-dark text-light border-secondary"
name="title" name="title"
placeholder="z. B. Sicherheitsdatenblatt Produkt XY"> value="{{ app.request.get('title') }}"
</div> placeholder="z. B. Testomat 808 Technisches Datenblatt">
{# ============================= #} <div class="form-text text-secondary">
{# Datei Upload #} Der Titel muss nicht lang sein, aber fachlich eindeutig.
{# ============================= #} </div>
</div>
<div class="mb-4"> <div class="mb-4">
<label class="form-label">Datei</label> <label class="form-label">Datei</label>
@@ -58,14 +98,22 @@
<div class="form-text text-secondary"> <div class="form-text text-secondary">
Unterstützte Formate: PDF, DOCX, TXT, MD. Unterstützte Formate: PDF, DOCX, TXT, MD.
Das Dokument wird versioniert gespeichert und anschließend Nach dem Upload wird automatisch Version 1 erstellt und ein Ingest-Job gestartet.
indexiert.
</div> </div>
</div> </div>
{# ============================= #} <div class="card bg-dark border-secondary mb-4">
{# Submit #} <div class="card-body">
{# ============================= #} <h6 class="text-info mb-3">Was passiert nach dem Speichern?</h6>
<ul class="small mb-0">
<li>Das Dokument wird versioniert gespeichert.</li>
<li>Die erste Version wird als aktuelle Version gesetzt.</li>
<li>Ein asynchroner Ingest-Job verarbeitet das Dokument für den Wissensindex.</li>
<li>Später können dem Dokument gezielt Tags zugewiesen werden.</li>
</ul>
</div>
</div>
<div class="d-flex justify-content-end"> <div class="d-flex justify-content-end">
<button class="btn btn-outline-info"> <button class="btn btn-outline-info">
@@ -79,8 +127,7 @@
</div> </div>
<div class="mt-4 small text-secondary"> <div class="mt-4 small text-secondary">
Hinweis: Nach dem Upload wird automatisch eine neue Dokumentversion erstellt. Hinweis: Ein sauber benanntes Dokument ist die beste Grundlage für gutes Retrieval und späteres präzises Tagging.
Die Indexierung erfolgt asynchron über einen Ingest-Job.
</div> </div>
{% endblock %} {% endblock %}

View File

@@ -4,10 +4,13 @@
{% block body %} {% block body %}
<div class="d-flex justify-content-between align-items-center mb-4"> <div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<h1 class="h3 mb-0"> <div>
Neue Version <h1 class="h3 mb-1">Neue Version</h1>
</h1> <div class="small text-muted">
Neue unveränderliche Version für ein bestehendes Dokument hochladen.
</div>
</div>
<a href="{{ path('admin_document_show', {id: document.id}) }}" <a href="{{ path('admin_document_show', {id: document.id}) }}"
class="btn btn-sm btn-outline-secondary"> class="btn btn-sm btn-outline-secondary">
@@ -15,36 +18,99 @@
</a> </a>
</div> </div>
<div class="card bg-dark border-secondary mb-4 text-light"> {% for message in app.flashes('success') %}
<div class="card-body"> <div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="mb-3"> {% for message in app.flashes('danger') %}
<div class="alert alert-danger shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('info') %}
<div class="alert alert-info shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Dokumentkontext</h5>
<div class="mb-2">
<strong>Dokument:</strong> <strong>Dokument:</strong>
<span class="text-light">{{ document.title }}</span> <span class="text-light">{{ document.title }}</span>
</div> </div>
<div class="small text-secondary"> <div class="small text-secondary">
Das Hochladen einer neuen Version erzeugt eine zusätzliche Eine neue Version erzeugt eine zusätzliche, unveränderliche Dokumentversion.
unveränderliche Dokumentversion. Die Aktivierung erfolgt separat Die bestehende aktive Version bleibt zunächst unverändert.
und löst einen deterministischen Re-Ingest aus.
</div>
</div> </div>
</div> </div>
<div class="card bg-black border-secondary text-light"> <div class="col-lg-5">
<h5 class="text-info mb-3">Aktueller Stand</h5>
<div class="small mb-2">
<strong>Aktive Version:</strong>
{% if document.currentVersion %}
<span class="badge bg-info text-dark">
v{{ document.currentVersion.versionNumber }}
</span>
{% else %}
-
{% endif %}
</div>
<div class="small mb-2">
<strong>Vorhandene Versionen:</strong>
{{ document.versions|length }}
</div>
<div class="small">
<strong>Zugewiesene Tags:</strong>
{{ document.tags|length }}
</div>
</div>
</div>
</div>
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Wichtig für den Lifecycle</h5>
<ul class="small mb-0">
<li>Der Upload erzeugt nur eine <strong>neue Version</strong>, aber aktiviert sie nicht automatisch.</li>
<li>Erst die spätere <strong>Aktivierung</strong> löst den deterministischen Re-Ingest aus.</li>
<li>Tags bleiben auf <strong>Dokumentebene</strong> bestehen und gelten weiterhin für das Dokument als Ganzes.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Gute Praxis</h5>
<ul class="small mb-0">
<li>Nur fachlich wirklich passende Nachfolgeversionen hochladen.</li>
<li>Kein anderes Thema oder anderes Produkt in dieselbe Dokumentlinie mischen.</li>
<li>Bei stark verändertem Fachinhalt später Tagging mitprüfen.</li>
</ul>
</div>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body"> <div class="card-body">
<form method="post" enctype="multipart/form-data"> <form method="post" enctype="multipart/form-data">
<input type="hidden" <input type="hidden"
name="_token" name="_token"
value="{{ csrf_token('create_document_version_' ~ document.id) }}"> value="{{ csrf_token('create_document_version_' ~ document.id) }}">
{# ============================= #}
{# Datei Upload #}
{# ============================= #}
<div class="mb-4"> <div class="mb-4">
<label class="form-label">Datei auswählen</label> <label class="form-label">Datei auswählen</label>
@@ -54,15 +120,23 @@
required> required>
<div class="form-text text-secondary"> <div class="form-text text-secondary">
Unterstützte Formate: PDF, DOCX, TXT, MD.<br> Unterstützte Formate: PDF, DOCX, TXT, MD.
Die Datei wird versioniert gespeichert und mit einer Die Datei wird versioniert gespeichert und mit einer eindeutigen Checksum versehen.
eindeutigen Checksum versehen.
</div> </div>
</div> </div>
{# ============================= #} <div class="card bg-dark border-secondary mb-4">
{# Submit #} <div class="card-body">
{# ============================= #} <h6 class="text-info mb-3">Was passiert nach dem Upload?</h6>
<ul class="small mb-0">
<li>Es wird eine neue, unveränderliche Dokumentversion angelegt.</li>
<li>Die aktive Version bleibt zunächst unverändert.</li>
<li>Ein Re-Ingest erfolgt erst nach späterer Aktivierung dieser Version.</li>
<li>Danach wird der Wissensindex deterministisch neu aufgebaut.</li>
</ul>
</div>
</div>
{% if is_granted('ROLE_SUPER_ADMIN') %} {% if is_granted('ROLE_SUPER_ADMIN') %}
<div class="d-flex justify-content-end"> <div class="d-flex justify-content-end">
@@ -71,16 +145,14 @@
</button> </button>
</div> </div>
{% endif %} {% endif %}
</form> </form>
</div> </div>
</div> </div>
<div class="mt-4 small text-secondary"> <div class="mt-4 small text-secondary">
Hinweis: Eine neue Version ersetzt nicht automatisch die aktive Version. Hinweis: Eine neue Version verbessert den Dokument-Lifecycle nur dann sauber, wenn sie fachlich wirklich zu diesem Dokument gehört.
Erst nach Aktivierung wird ein Re-Ingest durchgeführt und der Index Bei stark verändertem Inhalt sollten nach der späteren Aktivierung auch die Tags geprüft werden.
neu aufgebaut.
</div> </div>
{% endblock %} {% endblock %}

View File

@@ -4,45 +4,67 @@
{% block body %} {% block body %}
<div class="d-flex justify-content-between align-items-center mb-4"> <div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<h1 class="h3 mb-0">{{ document.title ?? 'Ein Fehler trat auf' }}</h1> <div>
<h1 class="h3 mb-1">{{ document.title }}</h1>
<div class="small text-muted">
Detailansicht für Dokument, Versionen und Tag-Zuordnung.
</div>
</div>
<div class="d-flex flex-wrap gap-2">
<a href="{{ path('admin_document_tags_edit', {id: document.id}) }}"
class="btn btn-sm btn-outline-info">
Tags bearbeiten
</a>
<a href="{{ path('admin_documents') }}" <a href="{{ path('admin_documents') }}"
class="btn btn-sm btn-outline-secondary"> class="btn btn-sm btn-outline-secondary">
Zurück zur Übersicht Zurück zur Übersicht
</a> </a>
</div> </div>
</div>
{% if document %} {% for message in app.flashes('success') %}
<div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
{# ============================= #} {% for message in app.flashes('danger') %}
{# Dokument-Meta #} <div class="alert alert-danger shadow-sm">
{# ============================= #} {{ message }}
</div>
{% endfor %}
<div class="card bg-dark border-secondary mb-5 text-light"> {% for message in app.flashes('info') %}
<div class="alert alert-info shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="row g-4 mb-4">
<div class="col-lg-7">
<div class="card bg-dark border-secondary text-light h-100 shadow-sm">
<div class="card-body"> <div class="card-body">
<h5 class="text-info mb-3">Dokument-Metadaten</h5>
<div class="mb-2"> <div class="row g-3">
<strong>Status:</strong> <div class="col-md-6">
<div class="small text-muted mb-1">Status</div>
<div>
{% if document.status == 'ACTIVE' %} {% if document.status == 'ACTIVE' %}
<span class="badge bg-success">Aktiv</span> <span class="badge bg-success">Aktiv</span>
{% else %} {% else %}
<span class="badge bg-secondary">Archiviert</span> <span class="badge bg-secondary">Archiviert</span>
{% endif %} {% endif %}
</div> </div>
<div class="mb-2">
<strong>Erstellt von:</strong>
{{ document.createdBy ? document.createdBy.email : '-' }}
</div> </div>
<div class="mb-2"> <div class="col-md-6">
<strong>Erstellt am:</strong> <div class="small text-muted mb-1">Aktive Version</div>
{{ document.createdAt|date('d.m.Y H:i') }} <div>
</div>
<div class="mb-2">
<strong>Aktive Version:</strong>
{% if document.currentVersion %} {% if document.currentVersion %}
<span class="badge bg-info text-dark"> <span class="badge bg-info text-dark">
v{{ document.currentVersion.versionNumber }} v{{ document.currentVersion.versionNumber }}
@@ -51,16 +73,104 @@
- -
{% endif %} {% endif %}
</div> </div>
</div>
<div class="col-md-6">
<div class="small text-muted mb-1">Erstellt von</div>
<div>{{ document.createdBy ? document.createdBy.email : '-' }}</div>
</div>
<div class="col-md-6">
<div class="small text-muted mb-1">Erstellt am</div>
<div>{{ document.createdAt|date('d.m.Y H:i:s') }}</div>
</div>
<div class="col-md-6">
<div class="small text-muted mb-1">Anzahl Versionen</div>
<div>{{ document.versions|length }}</div>
</div>
<div class="col-md-6">
<div class="small text-muted mb-1">Zugewiesene Tags</div>
<div>{{ document.tags|length }}</div>
</div> </div>
</div> </div>
{# ============================= #} {% if is_granted('ROLE_SUPER_ADMIN') %}
{# Versionen #} <hr class="border-secondary">
{# ============================= #}
<div class="d-flex flex-wrap gap-2">
<a href="{{ path('admin_document_version_new', {id: document.id}) }}"
class="btn btn-sm btn-outline-info">
Neue Version
</a>
<form method="post"
action="{{ path('admin_document_delete', {id: document.id}) }}"
class="d-inline"
onsubmit="return confirm('Dokument wirklich löschen? Der Inhalt wird per Delete-Job aus dem Index entfernt.');">
<input type="hidden"
name="_token"
value="{{ csrf_token('delete_document_' ~ document.id) }}">
<button class="btn btn-sm btn-outline-danger">
Dokument löschen
</button>
</form>
</div>
{% endif %}
</div>
</div>
</div>
<div class="col-lg-5">
<div class="card bg-dark border-secondary text-light h-100 shadow-sm">
<div class="card-body">
<div class="d-flex justify-content-between align-items-center mb-3"> <div class="d-flex justify-content-between align-items-center mb-3">
<h2 class="h5 mb-0">Versionen</h2> <h5 class="text-info mb-0">Tags</h5>
<a href="{{ path('admin_document_tags_edit', {id: document.id}) }}"
class="btn btn-sm btn-outline-light">
Bearbeiten
</a>
</div>
{% if document.tags is empty %}
<div class="alert alert-secondary mb-0">
Diesem Dokument sind noch keine Tags zugewiesen.
</div>
{% else %}
<div class="d-flex flex-wrap gap-2">
{% for tag in document.tags %}
<span class="badge px-3 py-2
{% if tag.type == 'catalog_entity' %}
text-bg-info
{% elseif tag.type == 'sales_signal' %}
text-bg-warning
{% else %}
text-bg-secondary
{% endif %}">
{{ tag.label }}
</span>
{% endfor %}
</div>
<div class="small text-muted mt-3">
Tags steuern die semantische Routing-Ebene. Weise nur fachlich wirklich passende Tags zu.
</div>
{% endif %}
</div>
</div>
</div>
</div>
<div class="d-flex justify-content-between align-items-center mb-3 flex-wrap gap-2">
<div>
<h2 class="h5 mb-1">Versionen</h2>
<div class="small text-muted">
Beim Aktivieren einer Version wird automatisch ein Re-Ingest ausgelöst.
</div>
</div>
{% if is_granted('ROLE_SUPER_ADMIN') %} {% if is_granted('ROLE_SUPER_ADMIN') %}
<a href="{{ path('admin_document_version_new', {id: document.id}) }}" <a href="{{ path('admin_document_version_new', {id: document.id}) }}"
@@ -72,48 +182,47 @@
{% if document.versions is empty %} {% if document.versions is empty %}
<div class="alert alert-secondary"> <div class="alert alert-secondary shadow-sm">
Keine Versionen vorhanden. Keine Versionen vorhanden.
</div> </div>
{% else %} {% else %}
<div class="card bg-black border-secondary"> <div class="card bg-black border-secondary shadow-sm">
<div class="card-body"> <div class="card-body p-0">
<div class="table-responsive">
<table class="table table-dark table-striped table-hover align-middle mb-0"> <table class="table table-dark table-striped table-hover align-middle mb-0">
<thead class="table-secondary text-dark"> <thead class="table-secondary text-dark">
<tr> <tr>
<th>Version</th> <th style="width: 10%">Version</th>
<th>Status</th> <th style="width: 10%">Aktiv</th>
<th>Ingest</th> <th style="width: 14%">Ingest</th>
<th>Checksum</th> <th style="width: 18%">Checksum</th>
<th>Erstellt von</th> <th style="width: 16%">Erstellt von</th>
<th>Datum</th> <th style="width: 14%">Datum</th>
<th class="text-end">Aktionen</th> <th class="text-end" style="width: 18%">Aktionen</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for version in document.versions %} {% for version in document.versions %}
<tr> <tr>
<td> <td>
<strong>v{{ version.versionNumber }}</strong> <strong>v{{ version.versionNumber }}</strong>
{% if document.currentVersion and version.id == document.currentVersion.id %}
<div class="small text-info mt-1">Current</div>
{% endif %}
</td> </td>
{# Aktivstatus #}
<td> <td>
{% if version.isActive %} {% if version.isActive %}
<span class="badge bg-success">Aktiv</span> <span class="badge bg-success">Aktiv</span>
{% else %} {% else %}
<span class="badge bg-dark border border-secondary"> <span class="badge bg-dark border border-secondary">Inaktiv</span>
Inaktiv
</span>
{% endif %} {% endif %}
</td> </td>
{# Ingest Status #}
<td> <td>
{% if version.ingestStatus == 'INDEXED' %} {% if version.ingestStatus == 'INDEXED' %}
<span class="badge bg-success">INDEXED</span> <span class="badge bg-success">INDEXED</span>
@@ -125,99 +234,85 @@
<span class="badge bg-secondary">PENDING</span> <span class="badge bg-secondary">PENDING</span>
{% else %} {% else %}
<span class="badge bg-dark border border-secondary"> <span class="badge bg-dark border border-secondary">
{{ version.ingestStatus }} {{ version.ingestStatus ?: '-' }}
</span> </span>
{% endif %} {% endif %}
</td> </td>
{# Checksum #}
<td class="small text-secondary"> <td class="small text-secondary">
{{ version.checksum ? version.checksum[:10] ~ '…' : '-' }} {% if version.checksum %}
<code>{{ version.checksum[:12] ~ '…' }}</code>
{% else %}
-
{% endif %}
</td> </td>
{# Created by #}
<td> <td>
{{ version.createdBy ? version.createdBy.email : '-' }} {{ version.createdBy ? version.createdBy.email : '-' }}
</td> </td>
{# Date #}
<td class="small"> <td class="small">
{{ version.createdAt|date('d.m.Y H:i') }} {{ version.createdAt|date('d.m.Y H:i:s') }}
</td> </td>
{# Aktionen #}
<td class="text-end"> <td class="text-end">
<div class="d-flex justify-content-end flex-wrap gap-2">
{% if version.isActive %} {% if version.isActive %}
{% if version.ingestStatus in ['PENDING', 'FAILED'] and is_granted('ROLE_SUPER_ADMIN') %} {% if version.ingestStatus in ['PENDING', 'FAILED'] and is_granted('ROLE_SUPER_ADMIN') %}
<form method="post" <form method="post"
action="{{ path('admin_document_version_ingest', {versionId: version.id}) }}" action="{{ path('admin_document_version_ingest', {versionId: version.id}) }}"
class="d-inline" class="d-inline"
onsubmit="return confirm('Ingest erneut starten?');"> onsubmit="return confirm('Ingest erneut starten?');">
<input type="hidden" <input type="hidden"
name="_token" name="_token"
value="{{ csrf_token('ingest_version_' ~ version.id) }}"> value="{{ csrf_token('ingest_version_' ~ version.id) }}">
<button class="btn btn-sm btn-outline-info"> <button class="btn btn-sm btn-outline-info">
Ingest starten Ingest starten
</button> </button>
</form> </form>
{% else %} {% else %}
<span class="text-success small"> <span class="small text-success align-self-center">
Bereits indexiert Keine Aktion nötig
</span> </span>
{% endif %} {% endif %}
{% else %} {% else %}
{% if is_granted('ROLE_SUPER_ADMIN') %} {% if is_granted('ROLE_SUPER_ADMIN') %}
<form method="post" <form method="post"
action="{{ path('admin_document_version_activate', {versionId: version.id}) }}" action="{{ path('admin_document_version_activate', {versionId: version.id}) }}"
class="d-inline" class="d-inline"
onsubmit="return confirm('Diese Version aktivieren? Es wird ein Re-Ingest ausgelöst.');"> onsubmit="return confirm('Diese Version aktivieren? Es wird ein Re-Ingest ausgelöst.');">
<input type="hidden" <input type="hidden"
name="_token" name="_token"
value="{{ csrf_token('activate_version_' ~ version.id) }}"> value="{{ csrf_token('activate_version_' ~ version.id) }}">
<button class="btn btn-sm btn-outline-light"> <button class="btn btn-sm btn-outline-light">
Aktivieren Aktivieren
</button> </button>
</form> </form>
{% endif %} {% endif %}
{% endif %} {% endif %}
</div>
</td> </td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
</div>
</div> </div>
</div> </div>
{% endif %} {% endif %}
<div class="mt-4 small text-secondary"> <div class="card bg-dark border-secondary text-light mt-4 shadow-sm">
Hinweis: Beim Aktivieren einer Version wird automatisch ein Re-Ingest <div class="card-body">
durchgeführt. Der NDJSON-Index und der FAISS-Index werden deterministisch <h5 class="text-info mb-3">Hinweis zum Lifecycle</h5>
neu aufgebaut. <div class="small text-light">
Beim Aktivieren einer Version wird automatisch ein Re-Ingest durchgeführt.
Der NDJSON-Bestand und der Vektorindex werden deterministisch neu aufgebaut.
Wenn Tags zugewiesen sind, beeinflusst dieses Dokument zusätzlich die semantische Routing-Ebene.
</div>
</div> </div>
{% else %}
<div class="alert alert-danger">
Dokument nicht gefunden.
</div> </div>
{% endif %}
{% endblock %} {% endblock %}

View File

@@ -4,81 +4,87 @@
{% block body %} {% block body %}
{# ============================================= #} <div id="rebuild-status" class="mb-4">
{# Tag-Rebuild Status (Echte Live-Anzeige) #} {% if latestJob %}
{# ============================================= #} <div class="alert alert-secondary shadow-sm mb-0">
Status wird geladen…
<div id="rebuild-status" class="mb-5" style="min-height:54px"></div> </div>
{% endif %}
</div>
<script> <script>
let polling = null; const statusBox = document.getElementById('rebuild-status');
const source = new EventSource("{{ path('admin_tags_rebuild_stream') }}");
function renderStatus(status) { source.onmessage = function (event) {
const el = document.getElementById('rebuild-status'); const data = JSON.parse(event.data);
let html = '';
if (!status) { if (data.status === '{{ statusRunning }}') {
el.innerHTML = ''; html = `
return; <div class="alert alert-info shadow-sm d-flex justify-content-between align-items-center mb-0">
} <div>
<strong>Dokument-Tag-Rebuild läuft</strong><br>
if (status === 'RUNNING') { ${data.startedAt ? 'Gestartet: ' + new Date(data.startedAt).toLocaleString() : ''}
el.innerHTML = ` </div>
<div class="alert alert-info d-flex justify-content-between align-items-center">
<div><strong>Dokument-Tag-Rebuild läuft…</strong></div>
<div class="spinner-border spinner-border-sm"></div> <div class="spinner-border spinner-border-sm"></div>
</div> </div>
`; `;
} else if (status === 'QUEUED') { } else if (data.status === '{{ statusQueued }}') {
el.innerHTML = ` html = `
<div class="alert alert-secondary"> <div class="alert alert-secondary shadow-sm mb-0">
Dokument-Tag-Rebuild in Warteschlange <strong>Dokument-Tag-Rebuild in Warteschlange</strong>
</div> </div>
`; `;
} else if (status === 'COMPLETED') { } else if (data.status === '{{ statusCompleted }}') {
el.innerHTML = ` html = `
<div class="alert alert-success fw-bold"> <div class="alert alert-success shadow-sm mb-0">
Dokument-Tag-Rebuild erfolgreich abgeschlossen. <i class="bi bi-check-lg"></i> Dokument-Tag-Rebuild erfolgreich abgeschlossen
</div> </div>
`; `;
stopPolling(); } else if (data.status === '{{ statusFailed }}') {
} else if (status === 'FAILED') { html = `
el.innerHTML = ` <div class="alert alert-danger shadow-sm mb-0">
<div class="alert alert-danger"> <strong>Dokument-Tag-Rebuild fehlgeschlagen</strong><br>
Dokument-Tag-Rebuild fehlgeschlagen. ${data.error ? '<code>' + data.error + '</code>' : ''}
</div> </div>
`; `;
stopPolling();
}
} }
function checkStatus() { statusBox.innerHTML = html;
fetch('{{ path('admin_tags_status') }}') };
.then(r => r.json())
.then(data => renderStatus(data.status))
.catch(() => stopPolling());
}
function startPolling() { source.onerror = function () {
polling = setInterval(checkStatus, 2000); console.warn('SSE Verbindung verloren');
} };
function stopPolling() { window.addEventListener('beforeunload', function () {
if (polling) { source.close();
clearInterval(polling); });
polling = null;
}
}
// Start polling sofort
checkStatus();
startPolling();
</script> </script>
{% for message in app.flashes('success') %}
<div class="alert alert-success shadow-sm">
{{ message }}
</div>
{% endfor %}
{% for message in app.flashes('danger') %}
<div class="alert alert-danger shadow-sm">
{{ message }}
</div>
{% endfor %}
<div class="d-flex justify-content-between align-items-center mb-4"> <div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0"> <div>
<h1 class="h3 mb-1">
Tags für Dokument Tags für Dokument
<span class="text-info">{{ document.title }}</span> <span class="text-info">{{ document.title }}</span>
</h1> </h1>
<div class="small text-muted">
Weise nur Tags zu, die den fachlichen Kern des Dokuments wirklich beschreiben.
</div>
</div>
<a href="{{ path('admin_documents') }}" <a href="{{ path('admin_documents') }}"
class="btn btn-sm btn-outline-light"> class="btn btn-sm btn-outline-light">
@@ -86,14 +92,40 @@
</a> </a>
</div> </div>
{# ============================================= #} <div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
{# Bereits zugewiesene Tags #} <div class="card-body row g-4">
{# ============================================= #} <div class="col-lg-7">
<h5 class="text-info mb-3">Hinweis für gutes Tagging</h5>
<div class="card bg-dark border-secondary mb-4"> <ul class="small mb-0">
<li><strong>Präzise statt breit:</strong> lieber produkt- oder themenscharfe Tags als allgemeine Oberbegriffe.</li>
<li><strong>Catalog Entity</strong> nur bei echten Produktfamilien, Katalogbegriffen oder klaren Entitäten.</li>
<li><strong>Generic</strong> nur als unterstützende Zusatzsemantik.</li>
<li><strong>Sales Signal</strong> sparsam und bewusst einsetzen, nicht als Ersatz für Fach-Tags.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Aktueller Stand</h5>
<div class="d-flex flex-wrap gap-2">
<span class="badge text-bg-dark border border-secondary">
Zugewiesen: {{ document.tags|length }}
</span>
<span class="badge text-bg-dark border border-secondary">
Verfügbar: {{ allTags|length }}
</span>
<span class="badge text-bg-dark border border-secondary">
Nicht zugewiesen: {{ allTags|length - document.tags|length }}
</span>
</div>
</div>
</div>
</div>
<div class="card bg-dark border-secondary mb-4 shadow-sm">
<div class="card-body"> <div class="card-body">
<h5 class="mb-3">Bereits zugewiesene Tags</h5>
<h5 class="mb-3">Zugewiesene Tags für: <span class="text-info ">{{ document.title }}</span></h5>
{% if document.tags is empty %} {% if document.tags is empty %}
<div class="alert alert-secondary mb-0"> <div class="alert alert-secondary mb-0">
@@ -101,22 +133,26 @@
</div> </div>
{% else %} {% else %}
<div class="d-flex flex-wrap gap-2"> <div class="d-flex flex-wrap gap-2">
{% for tag in document.tags %} {% for tag in allTags %}
<span class="badge bg-info text-dark px-3 py-2"> {% if tag in document.tags %}
<span class="badge px-3 py-2
{% if tag.type == 'catalog_entity' %}
text-bg-info
{% elseif tag.type == 'sales_signal' %}
text-bg-warning
{% else %}
text-bg-secondary
{% endif %}">
{{ tag.label }} {{ tag.label }}
</span> </span>
{% endif %}
{% endfor %} {% endfor %}
</div> </div>
{% endif %} {% endif %}
</div> </div>
</div> </div>
{# ============================================= #} <div class="card bg-black border-secondary shadow-sm">
{# Tag-Zuweisung Formular #}
{# ============================================= #}
<div class="card bg-black border-secondary">
<div class="card-body"> <div class="card-body">
<h5 class="text-info mb-3">Tags zuweisen</h5> <h5 class="text-info mb-3">Tags zuweisen</h5>
@@ -128,38 +164,125 @@
name="_token" name="_token"
value="{{ csrf_token('admin_document_tags_save_' ~ document.id) }}"> value="{{ csrf_token('admin_document_tags_save_' ~ document.id) }}">
<div class="row g-4">
<div class="col-lg-6">
<div class="card bg-dark border-secondary h-100">
<div class="card-header bg-secondary-subtle text-dark fw-semibold">
Zugewiesene Tags
</div>
<div class="card-body">
<div class="row"> <div class="row">
{% set hasAssigned = false %}
{% for tag in allTags %} {% for tag in allTags %}
<div class="col-md-2 mb-2"> {% if tag in document.tags %}
{% set hasAssigned = true %}
<div class="col-md-6 mb-3">
<div class="form-check"> <div class="form-check">
<input <input
class="form-check-input" class="form-check-input"
type="checkbox" type="checkbox"
name="tag_ids[]" name="tag_ids[]"
value="{{ tag.id }}" value="{{ tag.id }}"
id="tag_{{ tag.id }}" id="tag_{{ tag.id }}"
{% if tag in document.tags %}checked{% endif %} checked
> >
<label class="form-check-label w-100" for="tag_{{ tag.id }}">
<label class="form-check-label bg-info text-black badge"{% if tag not in document.tags %} style="opacity: .5;"{% endif %} <span class="badge
for="tag_{{ tag.id }}"> {% if tag.type == 'catalog_entity' %}
{{ tag.label }} text-bg-info
{% elseif tag.type == 'sales_signal' %}
text-bg-warning
{% else %}
text-bg-secondary
{% endif %}">
{{ tag.type }}
</span>
<span class="ms-2 fw-semibold">{{ tag.label }}</span>
{% if tag.description %}
<div class="small text-muted mt-1">{{ tag.description }}</div>
{% endif %}
</label> </label>
</div> </div>
</div> </div>
{% endif %}
{% endfor %} {% endfor %}
{% if not hasAssigned %}
<div class="col-12">
<div class="text-muted">
Noch keine Tags zugewiesen.
</div>
</div>
{% endif %}
</div>
</div>
</div>
</div>
<div class="col-lg-6">
<div class="card bg-dark border-secondary h-100">
<div class="card-header bg-secondary-subtle text-dark fw-semibold">
Verfügbare Tags
</div>
<div class="card-body">
<div class="row">
{% set hasAvailable = false %}
{% for tag in allTags %}
{% if tag not in document.tags %}
{% set hasAvailable = true %}
<div class="col-md-6 mb-3">
<div class="form-check">
<input
class="form-check-input"
type="checkbox"
name="tag_ids[]"
value="{{ tag.id }}"
id="tag_{{ tag.id }}"
>
<label class="form-check-label w-100" for="tag_{{ tag.id }}">
<span class="badge
{% if tag.type == 'catalog_entity' %}
text-bg-info
{% elseif tag.type == 'sales_signal' %}
text-bg-warning
{% else %}
text-bg-secondary
{% endif %}">
{{ tag.type }}
</span>
<span class="ms-2">{{ tag.label }}</span>
{% if tag.description %}
<div class="small text-muted mt-1">{{ tag.description }}</div>
{% endif %}
</label>
</div>
</div>
{% endif %}
{% endfor %}
{% if not hasAvailable %}
<div class="col-12">
<div class="text-muted">
Keine weiteren Tags verfügbar.
</div>
</div>
{% endif %}
</div>
</div>
</div>
</div>
</div> </div>
<hr class="border-secondary"> <hr class="border-secondary">
<div class="d-flex justify-content-end">
<button type="submit" <button type="submit"
class="btn btn-sm btn-outline-info"> class="btn btn-sm btn-outline-info">
Speichern Speichern
</button> </button>
</div>
</form> </form>

View File

@@ -4,8 +4,17 @@
{% block body %} {% block body %}
<div class="d-flex justify-content-between align-items-center mb-4"> {% set latestJob = jobs is not empty ? jobs|first : null %}
<h1 class="h3"><i class="bi bi-terminal"></i> Indexierung (Ingest Jobs)</h1>
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<div>
<h1 class="h3 mb-1">
<i class="bi bi-terminal"></i> Indexierung (Ingest Jobs)
</h1>
<div class="small text-muted">
Übersicht über Reindex-, Dokument- und Aktivierungsjobs des Systems.
</div>
</div>
{% if is_granted('ROLE_SUPER_ADMIN') %} {% if is_granted('ROLE_SUPER_ADMIN') %}
<form method="post" <form method="post"
@@ -25,40 +34,143 @@
{% endif %} {% endif %}
</div> </div>
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Was sieht man hier?</h5>
<ul class="small mb-0">
<li><strong>DOCUMENT</strong> verarbeitet ein einzelnes Dokument neu.</li>
<li><strong>DOCUMENT_VERSION_ACTIVATE</strong> zieht eine aktivierte Version deterministisch neu in den Index.</li>
<li><strong>DOCUMENT_DELETE</strong> entfernt Dokumentinhalt wieder sauber aus den Index-Artefakten.</li>
<li><strong>GLOBAL_REINDEX</strong> baut den Wissensindex vollständig neu auf und ist der stärkste Reparaturpfad.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Worauf achten?</h5>
<ul class="small mb-0">
<li><strong>RUNNING</strong> und <strong>QUEUED</strong> bedeuten: keine unnötigen parallelen Rebuilds starten.</li>
<li><strong>FAILED</strong> oder <strong>ABORTED</strong> direkt prüfen.</li>
<li>Bei inkonsistentem Indexzustand ist meist ein <strong>Global Reindex</strong> der richtige Reparaturschritt.</li>
</ul>
</div>
</div>
</div>
{% if latestJob %}
<div class="card bg-black border-secondary text-light mb-4 shadow-sm">
<div class="card-body">
<div class="d-flex justify-content-between align-items-start flex-wrap gap-3">
<div>
<div class="small text-muted mb-1">Letzter Job</div>
<div class="fw-semibold">
<a href="{{ path('admin_job_show', {id: latestJob.id}) }}"
class="text-light text-decoration-none">
{{ latestJob.id }}
</a>
</div>
</div>
<div>
<div class="small text-muted mb-1">Typ</div>
<span class="badge bg-info text-dark">{{ latestJob.type }}</span>
</div>
<div>
<div class="small text-muted mb-1">Status</div>
{% if latestJob.status == 'COMPLETED' %}
<span class="badge bg-success">COMPLETED</span>
{% elseif latestJob.status == 'QUEUED' %}
<span class="badge bg-secondary">QUEUED</span>
{% elseif latestJob.status == 'RUNNING' %}
<span class="badge bg-warning text-dark">RUNNING</span>
{% elseif latestJob.status == 'FAILED' %}
<span class="badge bg-danger">FAILED</span>
{% elseif latestJob.status == 'ABORTED' %}
<span class="badge bg-dark border border-danger text-danger">ABORTED</span>
{% else %}
<span class="badge bg-dark border border-secondary">{{ latestJob.status }}</span>
{% endif %}
</div>
<div>
<div class="small text-muted mb-1">Gestartet</div>
<div class="small">
{{ latestJob.startedAt ? latestJob.startedAt|date('d.m.Y H:i:s') : '-' }}
</div>
</div>
<div>
<div class="small text-muted mb-1">Beendet</div>
<div class="small">
{{ latestJob.finishedAt ? latestJob.finishedAt|date('d.m.Y H:i:s') : 'läuft noch / offen' }}
</div>
</div>
</div>
{% if latestJob.errorMessage %}
<div class="alert alert-danger small mt-3 mb-0">
<strong>Fehler:</strong>
{{ latestJob.errorMessage|slice(0, 250) }}{% if latestJob.errorMessage|length > 250 %}{% endif %}
</div>
{% endif %}
</div>
</div>
{% endif %}
{% if jobs is empty %} {% if jobs is empty %}
<div class="alert alert-secondary"> <div class="alert alert-secondary shadow-sm">
Keine Ingest Jobs vorhanden. Keine Ingest Jobs vorhanden.
</div> </div>
{% else %} {% else %}
<div class="card bg-black border-secondary"> <div class="card bg-black border-secondary shadow-sm">
<div class="card-body p-0"> <div class="card-body p-0">
<div class="d-flex justify-content-between align-items-center px-3 py-3 border-bottom border-secondary flex-wrap gap-2">
<div>
<strong class="text-info">Vorhandene Jobs</strong>
<span class="small text-muted ms-2">{{ jobs|length }} Einträge</span>
</div>
<div class="small text-muted">
Neueste Jobs stehen oben.
</div>
</div>
<div class="table-responsive">
<table class="table table-dark table-striped table-hover align-middle mb-0"> <table class="table table-dark table-striped table-hover align-middle mb-0">
<thead class="table-secondary text-dark"> <thead class="table-secondary text-dark">
<tr> <tr>
<th>Job-ID</th> <th style="width: 18%">Job</th>
<th>Typ</th> <th style="width: 14%">Typ</th>
<th>Status</th> <th style="width: 12%">Status</th>
<th>Dokument</th> <th style="width: 18%">Bezug</th>
<th>Version</th> <th style="width: 12%">Gestartet</th>
<th>Gestartet</th> <th style="width: 12%">Beendet</th>
<th>Beendet</th> <th style="width: 14%">Benutzer</th>
<th>Benutzer</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for job in jobs %} {% for job in jobs %}
<tr> <tr>
<td class="small"> <td class="small">
<div class="fw-semibold">
<a href="{{ path('admin_job_show', {id: job.id}) }}" <a href="{{ path('admin_job_show', {id: job.id}) }}"
class="text-light text-decoration-none"> class="text-light text-decoration-none">
{{ job.id }} {{ job.id }}
</a> </a>
</div>
{% if job.errorMessage %}
<div class="text-danger small mt-1"
title="{{ job.errorMessage }}">
{{ job.errorMessage|slice(0, 120) }}{% if job.errorMessage|length > 120 %}{% endif %}
</div>
{% endif %}
</td> </td>
<td> <td>
@@ -76,6 +188,8 @@
<span class="badge bg-warning text-dark">RUNNING</span> <span class="badge bg-warning text-dark">RUNNING</span>
{% elseif job.status == 'FAILED' %} {% elseif job.status == 'FAILED' %}
<span class="badge bg-danger">FAILED</span> <span class="badge bg-danger">FAILED</span>
{% elseif job.status == 'ABORTED' %}
<span class="badge bg-dark border border-danger text-danger">ABORTED</span>
{% else %} {% else %}
<span class="badge bg-dark border border-secondary"> <span class="badge bg-dark border border-secondary">
{{ job.status }} {{ job.status }}
@@ -83,37 +197,44 @@
{% endif %} {% endif %}
</td> </td>
<td> <td class="small">
{% if job.documentId %} {% if job.documentId %}
<div>
<span class="text-muted">Dokument:</span>
<a href="{{ path('admin_document_show', {id: job.documentId}) }}" <a href="{{ path('admin_document_show', {id: job.documentId}) }}"
class="text-light text-decoration-none"> class="text-light text-decoration-none">
{{ job.documentId }} {{ job.documentId }}
</a> </a>
{% else %} </div>
{% endif %}
{% if job.documentVersionId %}
<div class="mt-1">
<span class="text-muted">Version:</span>
{{ job.documentVersionId }}
</div>
{% endif %}
{% if not job.documentId and not job.documentVersionId %}
- -
{% endif %} {% endif %}
</td> </td>
<td>
{{ job.documentVersionId ?? '-' }}
</td>
<td class="small"> <td class="small">
{{ job.startedAt ? job.startedAt|date('d.m.Y H:i:s') : '-' }} {{ job.startedAt ? job.startedAt|date('d.m.Y H:i:s') : '-' }}
</td> </td>
<td class="small"> <td class="small">
{{ job.finishedAt ? job.finishedAt|date('d.m.Y H:i:s') : '-' }} {{ job.finishedAt ? job.finishedAt|date('d.m.Y H:i:s') : 'offen' }}
</td> </td>
<td class="small"> <td class="small">
{{ job.startedBy ? job.startedBy.email : '-' }} {{ job.startedBy ? job.startedBy.email : '-' }}
</td> </td>
</tr> </tr>
{% else %} {% else %}
<tr> <tr>
<td colspan="8" class="text-center text-secondary py-4"> <td colspan="7" class="text-center text-secondary py-4">
Keine Jobs gefunden. Keine Jobs gefunden.
</td> </td>
</tr> </tr>
@@ -121,6 +242,7 @@
</tbody> </tbody>
</table> </table>
</div>
</div> </div>
</div> </div>
@@ -128,8 +250,8 @@
{% endif %} {% endif %}
<div class="mt-4 small text-secondary"> <div class="mt-4 small text-secondary">
Hinweis: Während laufender Jobs (Status RUNNING) sollten keine Hinweis: Während laufender Jobs (Status <strong>RUNNING</strong>) oder wartender Jobs (<strong>QUEUED</strong>)
parallelen Reindex-Prozesse gestartet werden. sollten keine unnötigen parallelen Reindex-Prozesse gestartet werden.
</div> </div>
{% endblock %} {% endblock %}

View File

@@ -4,8 +4,18 @@
{% block body %} {% block body %}
<div class="d-flex justify-content-between align-items-center mb-4"> {% set jobStatus = job.status|upper %}
<h1 class="h3 mb-0">Ingest Job</h1> {% set isActiveJob = jobStatus in ['QUEUED', 'RUNNING'] %}
<div class="d-flex justify-content-between align-items-center mb-4 flex-wrap gap-2">
<div>
<h1 class="h3 mb-1">
<i class="bi bi-terminal"></i> Ingest Job
</h1>
<div class="small text-muted">
Detailansicht für einen einzelnen Indexierungs- oder Rebuild-Job.
</div>
</div>
<a href="{{ path('admin_jobs') }}" <a href="{{ path('admin_jobs') }}"
class="btn btn-sm btn-outline-secondary"> class="btn btn-sm btn-outline-secondary">
@@ -13,26 +23,82 @@
</a> </a>
</div> </div>
<div class="card bg-black border-secondary text-light"> <div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row g-4">
<div class="col-lg-7">
<h5 class="text-info mb-3">Einordnung</h5>
<ul class="small mb-0">
<li><strong>DOCUMENT</strong> verarbeitet ein einzelnes Dokument neu.</li>
<li><strong>DOCUMENT_VERSION_ACTIVATE</strong> aktiviert eine Version und zieht sie deterministisch neu in den Index.</li>
<li><strong>DOCUMENT_DELETE</strong> entfernt Dokumentinhalt wieder sauber aus dem Wissensbestand.</li>
<li><strong>GLOBAL_REINDEX</strong> baut den Gesamtindex vollständig neu auf.</li>
</ul>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Aktueller Zustand</h5>
<div class="d-flex flex-wrap gap-2">
{% if jobStatus == 'COMPLETED' %}
<span class="badge bg-success">COMPLETED</span>
{% elseif jobStatus == 'QUEUED' %}
<span class="badge bg-secondary">QUEUED</span>
{% elseif jobStatus == 'RUNNING' %}
<span class="badge bg-warning text-dark">RUNNING</span>
{% elseif jobStatus == 'FAILED' %}
<span class="badge bg-danger">FAILED</span>
{% elseif jobStatus == 'ABORTED' %}
<span class="badge bg-dark border border-danger text-danger">ABORTED</span>
{% else %}
<span class="badge bg-dark border border-secondary">{{ jobStatus }}</span>
{% endif %}
{% if isActiveJob %}
<span class="badge text-bg-info">Polling aktiv</span>
{% endif %}
</div>
</div>
</div>
</div>
<div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body"> <div class="card-body">
<div class="mb-2"> <div class="row g-4">
<strong>ID:</strong> <div class="col-lg-6">
<span class="small text-light">{{ job.id }}</span> <div class="mb-3">
<div class="small text-muted mb-1">Job-ID</div>
<div class="fw-semibold small text-light">{{ job.id }}</div>
</div> </div>
<div class="mb-2"> <div class="mb-3">
<strong>Typ:</strong> <div class="small text-muted mb-1">Typ</div>
<div>
<span class="badge bg-info text-dark">{{ job.type }}</span> <span class="badge bg-info text-dark">{{ job.type }}</span>
</div> </div>
<div class="mb-2">
<strong>Status:</strong>
<span id="job-status-badge"></span>
</div> </div>
<div class="mb-2"> <div class="mb-3">
<strong>Dokument:</strong> <div class="small text-muted mb-1">Status</div>
<div id="job-status-badge">
{% if jobStatus == 'COMPLETED' %}
<span class="badge bg-success">COMPLETED</span>
{% elseif jobStatus == 'QUEUED' %}
<span class="badge bg-secondary">QUEUED</span>
{% elseif jobStatus == 'RUNNING' %}
<span class="badge bg-warning text-dark">RUNNING</span>
{% elseif jobStatus == 'FAILED' %}
<span class="badge bg-danger">FAILED</span>
{% elseif jobStatus == 'ABORTED' %}
<span class="badge bg-dark border border-danger text-danger">ABORTED</span>
{% else %}
<span class="badge bg-dark border border-secondary">{{ jobStatus }}</span>
{% endif %}
</div>
</div>
<div class="mb-3">
<div class="small text-muted mb-1">Dokument</div>
<div>
{% if job.documentId %} {% if job.documentId %}
<a href="{{ path('admin_document_show', {id: job.documentId}) }}" <a href="{{ path('admin_document_show', {id: job.documentId}) }}"
class="text-light text-decoration-none"> class="text-light text-decoration-none">
@@ -42,32 +108,49 @@
- -
{% endif %} {% endif %}
</div> </div>
<div class="mb-2">
<strong>Version:</strong>
{{ job.documentVersionId ?? '-' }}
</div> </div>
<div class="mb-2"> <div class="mb-0">
<strong>Gestartet:</strong> <div class="small text-muted mb-1">Dokumentversion</div>
{{ job.startedAt|date('d.m.Y H:i:s') }} <div>{{ job.documentVersionId ?? '-' }}</div>
</div>
</div> </div>
<div class="mb-2"> <div class="col-lg-6">
<strong>Beendet:</strong> <div class="mb-3">
<span id="job-finished-at"> <div class="small text-muted mb-1">Gestartet</div>
<div>
{{ job.startedAt ? job.startedAt|date('d.m.Y H:i:s') : '-' }}
</div>
</div>
<div class="mb-3">
<div class="small text-muted mb-1">Beendet</div>
<div id="job-finished-at">
{{ job.finishedAt ? job.finishedAt|date('d.m.Y H:i:s') : '-' }} {{ job.finishedAt ? job.finishedAt|date('d.m.Y H:i:s') : '-' }}
</span> </div>
</div> </div>
<div class="mb-2"> <div class="mb-3">
<strong>Gestartet von:</strong> <div class="small text-muted mb-1">Gestartet von</div>
{{ job.startedBy ? job.startedBy.email : '-' }} <div>{{ job.startedBy ? job.startedBy.email : '-' }}</div>
</div>
<div class="mb-0">
<div class="small text-muted mb-1">Polling</div>
<div class="small text-light">
{% if isActiveJob %}
Status wird automatisch aktualisiert.
{% else %}
Kein Live-Polling nötig.
{% endif %}
</div>
</div>
</div>
</div> </div>
{# Loader #}
<div id="job-loader" <div id="job-loader"
class="mt-3 d-none"> class="mt-4 {% if not isActiveJob %}d-none{% endif %}">
<div class="d-flex align-items-center gap-2"> <div class="d-flex align-items-center gap-2">
<div class="spinner-border spinner-border-sm text-info" role="status"></div> <div class="spinner-border spinner-border-sm text-info" role="status"></div>
<div> <div>
@@ -79,10 +162,10 @@
</div> </div>
</div> </div>
{# Fehlerbereich #}
<div id="job-error" <div id="job-error"
class="alert alert-danger mt-3 {% if not job.errorMessage %}d-none{% endif %}"> class="alert alert-danger mt-4 {% if not job.errorMessage %}d-none{% endif %}">
{% if job.errorMessage %} {% if job.errorMessage %}
<strong>Fehler:</strong><br>
{{ job.errorMessage }} {{ job.errorMessage }}
{% endif %} {% endif %}
</div> </div>
@@ -91,13 +174,13 @@
</div> </div>
<div class="mt-4 small text-secondary"> <div class="mt-4 small text-secondary">
Hinweis: Bei DOCUMENT_VERSION_ACTIVATE-Jobs wird ein vollständiger Hinweis: Bei <strong>DOCUMENT_VERSION_ACTIVATE</strong>-Jobs wird ein vollständiger
NDJSON-Rebuild und FAISS-Reindex durchgeführt. NDJSON-Rebuild und FAISS-Reindex durchgeführt. Bei <strong>GLOBAL_REINDEX</strong>
wird der gesamte Wissensindex neu aufgebaut.
</div> </div>
<script> <script>
(function () { (function () {
const statusUrl = {{ path('admin_job_status', {id: job.id})|json_encode|raw }}; const statusUrl = {{ path('admin_job_status', {id: job.id})|json_encode|raw }};
const badgeWrap = document.getElementById('job-status-badge'); const badgeWrap = document.getElementById('job-status-badge');
const finishedAtEl = document.getElementById('job-finished-at'); const finishedAtEl = document.getElementById('job-finished-at');
@@ -106,18 +189,26 @@
let timer = null; let timer = null;
function escapeHtml(value) {
return String(value)
.replaceAll('&', '&amp;')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&#039;');
}
function renderBadge(status) { function renderBadge(status) {
const map = { const map = {
COMPLETED: 'bg-success', COMPLETED: 'bg-success',
QUEUED: 'bg-secondary', QUEUED: 'bg-secondary',
RUNNING: 'bg-warning text-dark', RUNNING: 'bg-warning text-dark',
FAILED: 'bg-danger', FAILED: 'bg-danger',
ABORTED: 'bg-dark' ABORTED: 'bg-dark border border-danger text-danger'
}; };
const css = map[status] || 'bg-secondary'; const css = map[status] || 'bg-dark border border-secondary';
badgeWrap.innerHTML = badgeWrap.innerHTML = `<span class="badge ${css}">${escapeHtml(status || 'UNKNOWN')}</span>`;
`<span class="badge ${css}">${status}</span>`;
} }
function stopPolling() { function stopPolling() {
@@ -127,18 +218,37 @@
} }
} }
function renderError(message) {
if (!message) {
errorEl.classList.add('d-none');
errorEl.innerHTML = '';
return;
}
errorEl.classList.remove('d-none');
errorEl.innerHTML = `<strong>Fehler:</strong><br>${escapeHtml(message)}`;
}
async function poll() { async function poll() {
try { try {
const res = await fetch(statusUrl); const res = await fetch(statusUrl, {
if (!res.ok) return; headers: {
'Accept': 'application/json'
},
cache: 'no-store'
});
if (!res.ok) {
stopPolling();
return;
}
const data = await res.json(); const data = await res.json();
const status = (data.status || '').toUpperCase(); const status = String(data.status || '').toUpperCase();
renderBadge(status); renderBadge(status);
finishedAtEl.textContent = finishedAtEl.textContent = data.finishedAt
data.finishedAt
? new Date(data.finishedAt).toLocaleString('de-DE') ? new Date(data.finishedAt).toLocaleString('de-DE')
: '-'; : '-';
@@ -149,25 +259,21 @@
stopPolling(); stopPolling();
} }
if (status === 'FAILED' && data.errorMessage) { if (status === 'FAILED' || status === 'ABORTED') {
errorEl.classList.remove('d-none'); renderError(data.errorMessage || '');
errorEl.innerHTML = } else {
`<strong>Fehler:</strong><br>${data.errorMessage}`; renderError('');
} }
} catch (e) { } catch (e) {
stopPolling(); stopPolling();
} }
} }
// Initial render from server state renderBadge({{ jobStatus|json_encode|raw }});
renderBadge("{{ job.status|upper }}");
if (["QUEUED", "RUNNING"].includes("{{ job.status|upper }}")) { if ({{ isActiveJob ? 'true' : 'false' }}) {
loaderEl.classList.remove('d-none');
timer = setInterval(poll, 2000); timer = setInterval(poll, 2000);
} }
})(); })();
</script> </script>

View File

@@ -4,27 +4,31 @@
{% block body %} {% block body %}
{# ========================================================= #} <div id="rebuild-status" class="mb-4">
{# LIVE REBUILD STATUS (SSE) #} {% if latestJob %}
{# ========================================================= #} <div class="alert alert-secondary shadow-sm mb-0">
<div id="rebuild-status" class="mb-5">
<div class="alert alert-secondary shadow-sm">
Status wird geladen… Status wird geladen…
</div> </div>
{% endif %}
</div> </div>
<div class="d-flex justify-content-between align-items-center mb-4"> <div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0"> <div>
<h1 class="h3 mb-1">
<i class="bi bi-tag-fill"></i> Tag: {{ tag.label }} <i class="bi bi-tag-fill"></i> Tag: {{ tag.label }}
</h1> </h1>
<div class="small text-muted">
Slug: <code>{{ tag.slug }}</code>
</div>
</div>
<a href="{{ path('admin_tags_index') }}" <a href="{{ path('admin_tags_index') }}"
class="btn btn-sm btn-outline-secondary"> class="btn btn-sm btn-outline-secondary">
Zurück Zurück
</a> </a>
</div> </div>
<script> <script>
const statusBox = document.getElementById('rebuild-status'); const statusBox = document.getElementById('rebuild-status');
const source = new EventSource("{{ path('admin_tags_rebuild_stream') }}"); const source = new EventSource("{{ path('admin_tags_rebuild_stream') }}");
@@ -35,9 +39,9 @@
if (data.status === '{{ statusRunning }}') { if (data.status === '{{ statusRunning }}') {
html = ` html = `
<div class="alert alert-info shadow-sm d-flex justify-content-between align-items-center"> <div class="alert alert-info shadow-sm d-flex justify-content-between align-items-center mb-0">
<div> <div>
Tag-Rebuild läuft<br> <strong>Tag-Rebuild läuft</strong><br>
${data.startedAt ? 'Gestartet: ' + new Date(data.startedAt).toLocaleString() : ''} ${data.startedAt ? 'Gestartet: ' + new Date(data.startedAt).toLocaleString() : ''}
</div> </div>
<div class="spinner-border spinner-border-sm"></div> <div class="spinner-border spinner-border-sm"></div>
@@ -45,20 +49,20 @@
`; `;
} else if (data.status === '{{ statusQueued }}') { } else if (data.status === '{{ statusQueued }}') {
html = ` html = `
<div class="alert alert-secondary shadow-sm"> <div class="alert alert-secondary shadow-sm mb-0">
Tag-Rebuild in Warteschlange <strong>Tag-Rebuild in Warteschlange</strong>
</div> </div>
`; `;
} else if (data.status === '{{ statusCompleted }}') { } else if (data.status === '{{ statusCompleted }}') {
html = ` html = `
<div class="alert alert-success shadow-sm"> <div class="alert alert-success shadow-sm mb-0">
<i class="bi bi-check-lg"></i> Tag-Rebuild erfolgreich abgeschlossen <i class="bi bi-check-lg"></i> Tag-Rebuild erfolgreich abgeschlossen
</div> </div>
`; `;
} else if (data.status === '{{ statusFailed }}') { } else if (data.status === '{{ statusFailed }}') {
html = ` html = `
<div class="alert alert-danger shadow-sm"> <div class="alert alert-danger shadow-sm mb-0">
Tag-Rebuild fehlgeschlagen<br> <strong>Tag-Rebuild fehlgeschlagen</strong><br>
${data.error ? '<code>' + data.error + '</code>' : ''} ${data.error ? '<code>' + data.error + '</code>' : ''}
</div> </div>
`; `;
@@ -70,48 +74,96 @@
source.onerror = function () { source.onerror = function () {
console.warn('SSE Verbindung verloren'); console.warn('SSE Verbindung verloren');
}; };
window.addEventListener('beforeunload', function () {
source.close();
});
</script> </script>
{# ============================= #}
{# Flash Messages #}
{# ============================= #}
{% for message in app.flashes('success') %} {% for message in app.flashes('success') %}
<div class="alert alert-success"> <div class="alert alert-success shadow-sm">
{{ message }} {{ message }}
</div> </div>
{% endfor %} {% endfor %}
{% for message in app.flashes('danger') %} {% for message in app.flashes('danger') %}
<div class="alert alert-danger"> <div class="alert alert-danger shadow-sm">
{{ message }} {{ message }}
</div> </div>
{% endfor %} {% endfor %}
{# ============================= #} <div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
{# Tag → Dokumente #} <div class="card-body row g-4">
{# ============================= #} <div class="col-lg-7">
<h5 class="text-info mb-3">Einordnung des Tags</h5>
<div class="mb-2">
{% if tag.type == 'catalog_entity' %}
<span class="badge text-bg-info">Catalog Entity</span>
{% elseif tag.type == 'sales_signal' %}
<span class="badge text-bg-warning">Sales Signal</span>
{% else %}
<span class="badge text-bg-secondary">Generic</span>
{% endif %}
</div>
<p class="small mb-2">
{{ tag.description ?: 'Keine Beschreibung hinterlegt.' }}
</p>
<p class="small text-muted mb-0">
Weise diesen Tag nur Dokumenten zu, die fachlich wirklich denselben Gegenstand,
dieselbe Produktfamilie oder denselben Anwendungsfall abbilden.
Zu breite Zuweisungen machen das Routing weicher.
</p>
</div>
<div class="col-lg-5">
<h5 class="text-info mb-3">Aktueller Stand</h5>
<div class="d-flex flex-wrap gap-2">
<span class="badge text-bg-dark border border-secondary">
Zugewiesen: {{ assignedDocIds|length }}
</span>
<span class="badge text-bg-dark border border-secondary">
Verfügbar: {{ documents|length }}
</span>
<span class="badge text-bg-dark border border-secondary">
Nicht zugewiesen: {{ documents|length - assignedDocIds|length }}
</span>
</div>
</div>
</div>
</div>
<form method="post"> <form method="post">
<input type="hidden" <input type="hidden"
name="_token" name="_token"
value="{{ csrf_token('assign_tag_' ~ tag.id) }}"> value="{{ csrf_token('assign_tag_' ~ tag.id) }}">
<div class="card bg-black border-secondary"> <div class="row g-4">
<div class="card-body p-0 row">
<div class="col-lg-6"> <div class="col-lg-6">
<div class="card bg-black border-secondary shadow-sm h-100">
<div class="card-header bg-secondary-subtle text-dark fw-semibold">
Zugewiesene Dokumente
</div>
<div class="card-body p-0">
<div class="table-responsive">
<table class="table table-dark table-striped table-hover mb-0 align-middle"> <table class="table table-dark table-striped table-hover mb-0 align-middle">
<thead class="table-secondary text-dark"> <thead class="table-secondary text-dark">
<tr> <tr>
<th style="width:60px;"><i class="bi bi-three-dots"></i></th> <th style="width: 60px;">
<th>Zugewiesene Dokumente</th> <i class="bi bi-check2-square"></i>
</th>
<th>Dokument</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% set hasAssigned = false %}
{% for doc in documents %} {% for doc in documents %}
{% if doc.id in assignedDocIds %} {% if doc.id in assignedDocIds %}
{% set hasAssigned = true %}
<tr> <tr>
<td> <td>
<input type="checkbox" <input type="checkbox"
@@ -119,51 +171,82 @@
value="{{ doc.id }}" value="{{ doc.id }}"
checked> checked>
</td> </td>
<td> <td class="fw-semibold">
{{ doc.title }} {{ doc.title }}
</td> </td>
</tr> </tr>
{% endif %} {% endif %}
{% endfor %} {% endfor %}
{% if not hasAssigned %}
<tr>
<td colspan="2" class="text-center text-muted p-4">
Noch keine Dokumente zugewiesen.
</td>
</tr>
{% endif %}
</tbody> </tbody>
</table> </table>
</div> </div>
</div>
</div>
</div>
<div class="col-lg-6"> <div class="col-lg-6">
<table class="table table-dark table-striped table-hover mb-0 align-middle col-lg-6"> <div class="card bg-black border-secondary shadow-sm h-100">
<div class="card-header bg-secondary-subtle text-dark fw-semibold">
Verfügbare Dokumente
</div>
<div class="card-body p-0">
<div class="table-responsive">
<table class="table table-dark table-striped table-hover mb-0 align-middle">
<thead class="table-secondary text-dark"> <thead class="table-secondary text-dark">
<tr> <tr>
<th style="width:60px;"><i class="bi bi-three-dots"></i></th> <th style="width: 60px;">
<th>Nicht zugewiesene Dokumente</th> <i class="bi bi-square"></i>
</th>
<th>Dokument</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% set hasUnassigned = false %}
{% for doc in documents %} {% for doc in documents %}
{% if doc.id not in assignedDocIds %} {% if doc.id not in assignedDocIds %}
{% set hasUnassigned = true %}
<tr> <tr>
<td> <td>
<input type="checkbox" <input type="checkbox"
name="documents[]" name="documents[]"
value="{{ doc.id }}" value="{{ doc.id }}">
>
</td> </td>
<td class="opacity-50"> <td class="opacity-75">
{{ doc.title }} {{ doc.title }}
</td> </td>
</tr> </tr>
{% endif %} {% endif %}
{% endfor %} {% endfor %}
{% if not hasUnassigned %}
<tr>
<td colspan="2" class="text-center text-muted p-4">
Keine weiteren aktiven Dokumente verfügbar.
</td>
</tr>
{% endif %}
</tbody> </tbody>
</table> </table>
</div> </div>
</div> </div>
</div> </div>
</div>
</div>
<button class="btn btn-primary mt-3"> <div class="d-flex justify-content-end mt-4">
Speichern <button class="btn btn-primary">
Zuweisungen speichern
</button> </button>
</div>
</form> </form>
{% endblock %} {% endblock %}

View File

@@ -4,77 +4,52 @@
{% block body %} {% block body %}
{# ========================================================= #} <div id="rebuild-status" class="mb-4">
{# LIVE REBUILD STATUS (SSE) #}
{# ========================================================= #}
<div id="rebuild-status" class="mb-5">
{% if latestJob %} {% if latestJob %}
<div class="alert alert-secondary shadow-sm"> <div class="alert alert-secondary shadow-sm mb-0">
Status wird geladen… Status wird geladen…
</div> </div>
{% endif %} {% endif %}
</div> </div>
<div class="d-flex justify-content-between align-items-center mb-4"> <div class="d-flex justify-content-between align-items-center mb-4">
<h1 class="h3 mb-0"><i class="bi bi-tag-fill"></i> Tag-Management</h1> <h1 class="h3 mb-0">
<i class="bi bi-tag-fill"></i> Tag-Management
</h1>
</div> </div>
{# ========================================================= #}
{# TAG SYSTEM DESCRIPTION #}
{# ========================================================= #}
<div class="card bg-dark border-secondary text-light mb-4 shadow-sm"> <div class="card bg-dark border-secondary text-light mb-4 shadow-sm">
<div class="card-body row"> <div class="card-body row g-4">
<div class="col-lg-6"> <div class="col-lg-6">
<h5 class="text-info mb-3">Was machen Tags im System?</h5> <h5 class="text-info mb-3">Was machen Tags im System?</h5>
<p class="small text-light mb-2"> <p class="small text-light mb-2">
Tags dienen als semantische Routing-Ebene innerhalb des RAG-Systems. Tags sind die semantische Routing-Ebene innerhalb des Systems.
Sie strukturieren Dokumente thematisch und beeinflussen, Sie helfen dabei, thematisch passende Dokumenträume schneller zu erkennen
welche Inhalte bei einer Nutzeranfrage priorisiert werden. und gute Retrieval-Kandidaten zu priorisieren.
</p> </p>
<ul class="small text-light mb-3"> <ul class="small text-light mb-0">
<li> <li>Tags werden Dokumenten manuell zugewiesen.</li>
Tags werden Dokumenten manuell zugewiesen. <li>Beim Rebuild wird aus den aktiven Tag-Zuordnungen eine <code>tags.ndjson</code> erzeugt.</li>
</li> <li>Zusätzlich wird ein eigener Tag-Vektorindex (<code>vector_tags.index</code>) gebaut.</li>
<li> <li>Bei Anfragen erfolgt zunächst ein semantisches Tag-Matching, danach das eigentliche Chunk-Retrieval.</li>
Beim Rebuild wird aus allen Tags eine eigene
<code>tags.ndjson</code> erzeugt.
</li>
<li>
Zusätzlich wird ein separater Vektorindex
(<code>vector_tags.index</code>) aufgebaut.
</li>
<li>
Bei einer Anfrage erfolgt zunächst ein Tag-Matching,
danach wird das Chunk-Retrieval entsprechend gewichtet.
</li>
</ul> </ul>
</div> </div>
<div class="col-lg-6"> <div class="col-lg-6">
<h6 class="text-info mt-3">Wie werden Tags bewertet?</h6> <h5 class="text-info mb-3">Was ist gutes Tagging?</h5>
<p class="small text-light mb-2"> <ul class="small text-light mb-3">
Die Bewertung erfolgt über einen eigenen Vektor-Similarity-Score <li><strong>Präzise statt generisch:</strong> lieber <code>Produktnamen</code> als <code>Gerät</code>.</li>
im Tag-Index. Das System berechnet: <li><strong>Fachlich sauber:</strong> Tags sollen echte Produktfamilien, Anwendungsfälle oder Entitäten abbilden.</li>
</p> <li><strong>Wenig Überschneidung:</strong> keine unnötig breiten oder doppeldeutigen Tags.</li>
<li><strong>Bewusst typisieren:</strong> <code>catalog_entity</code> für echte Katalog-/Entity-Tags, <code>generic</code> nur für allgemeine Zusatzsemantik.</li>
<ul class="small text-light">
<li>
Ähnlichkeit zwischen Nutzeranfrage und Tag-Embedding
</li>
<li>
Top-K Treffer im Tag-Index
</li>
<li>
Gewichtete Übergabe an das Chunk-Retrieval
</li>
</ul> </ul>
<p class="small text-light mt-2 mb-0"> <p class="small text-warning mb-0">
Tags wirken somit als semantischer Verstärker. Zu breite Tags wie „Produkt“, „System“ oder „Gerät“ machen das Routing weicher
Sie ersetzen kein Chunk-Retrieval, sondern steuern dessen Priorisierung. und bringen meist weniger Nutzen als präzise fachliche Tags.
</p> </p>
</div> </div>
</div> </div>
@@ -90,9 +65,9 @@
if (data.status === '{{ statusRunning }}') { if (data.status === '{{ statusRunning }}') {
html = ` html = `
<div class="alert alert-info shadow-sm d-flex justify-content-between align-items-center"> <div class="alert alert-info shadow-sm d-flex justify-content-between align-items-center mb-0">
<div> <div>
Tag-Rebuild läuft<br> <strong>Tag-Rebuild läuft</strong><br>
${data.startedAt ? 'Gestartet: ' + new Date(data.startedAt).toLocaleString() : ''} ${data.startedAt ? 'Gestartet: ' + new Date(data.startedAt).toLocaleString() : ''}
</div> </div>
<div class="spinner-border spinner-border-sm"></div> <div class="spinner-border spinner-border-sm"></div>
@@ -100,20 +75,20 @@
`; `;
} else if (data.status === '{{ statusQueued }}') { } else if (data.status === '{{ statusQueued }}') {
html = ` html = `
<div class="alert alert-secondary shadow-sm"> <div class="alert alert-secondary shadow-sm mb-0">
Tag-Rebuild in Warteschlange <strong>Tag-Rebuild in Warteschlange</strong>
</div> </div>
`; `;
} else if (data.status === '{{ statusCompleted }}') { } else if (data.status === '{{ statusCompleted }}') {
html = ` html = `
<div class="alert alert-success shadow-sm"> <div class="alert alert-success shadow-sm mb-0">
<i class="bi bi-check-lg"></i> Tag-Rebuild erfolgreich abgeschlossen <i class="bi bi-check-lg"></i> Tag-Rebuild erfolgreich abgeschlossen
</div> </div>
`; `;
} else if (data.status === '{{ statusFailed }}') { } else if (data.status === '{{ statusFailed }}') {
html = ` html = `
<div class="alert alert-danger shadow-sm"> <div class="alert alert-danger shadow-sm mb-0">
Tag-Rebuild fehlgeschlagen<br> <strong>Tag-Rebuild fehlgeschlagen</strong><br>
${data.error ? '<code>' + data.error + '</code>' : ''} ${data.error ? '<code>' + data.error + '</code>' : ''}
</div> </div>
`; `;
@@ -125,11 +100,12 @@
source.onerror = function () { source.onerror = function () {
console.warn('SSE Verbindung verloren'); console.warn('SSE Verbindung verloren');
}; };
window.addEventListener('beforeunload', function () {
source.close();
});
</script> </script>
{# ========================================================= #}
{# Create Tag Card #}
{# ========================================================= #}
<div class="card bg-black border-secondary text-light mb-4 shadow-sm"> <div class="card bg-black border-secondary text-light mb-4 shadow-sm">
<div class="card-body"> <div class="card-body">
<h5 class="text-info mb-3">Neuen Tag hinzufügen</h5> <h5 class="text-info mb-3">Neuen Tag hinzufügen</h5>
@@ -153,24 +129,26 @@
required/> required/>
</div> </div>
<div class="col-md-4"> <div class="col-md-3">
<label class="form-label small text-muted">Beschreibung</label> <label class="form-label small text-muted">Typ</label>
<input class="form-control form-control-sm" <select name="type" class="form-select form-select-sm">
name="description" {% for choiceLabel, choiceValue in tagTypeChoices %}
placeholder="Semantische Beschreibung des Tags" <option value="{{ choiceValue }}"
required/> {% if choiceValue == 'generic' %}selected{% endif %}>
</div> {{ choiceLabel }}
</option>
<div class="mb-3"> {% endfor %}
<label class="form-label">Type</label>
<select name="type" class="form-select">
<option value="generic">Generic</option>
<option value="catalog_entity">Catalog Entity</option>
<option value="sales_signal">Sales Signal</option>
</select> </select>
</div> </div>
<div class="col-md-2 d-grid align-items-end"> <div class="col-md-3">
<label class="form-label small text-muted">Beschreibung</label>
<input class="form-control form-control-sm"
name="description"
placeholder="Optional: fachlicher Kontext des Tags"/>
</div>
<div class="col-12 d-grid d-md-flex justify-content-md-end">
<button class="btn btn-sm btn-outline-info"> <button class="btn btn-sm btn-outline-info">
Anlegen Anlegen
</button> </button>
@@ -179,36 +157,56 @@
</div> </div>
</div> </div>
{# ========================================================= #}
{# Tag Table #}
{# ========================================================= #}
<div class="card bg-black border-secondary text-light shadow-sm"> <div class="card bg-black border-secondary text-light shadow-sm">
<div class="card-body"> <div class="card-body">
<div class="mb-3"> <div class="mb-3 d-flex justify-content-between align-items-center flex-wrap gap-2">
<div>
<strong class="text-info">Vorhandene Tags:</strong> <strong class="text-info">Vorhandene Tags:</strong>
<span class="text-muted small ms-2"> <span class="text-muted small ms-2">
{{ tags|length }} Einträge {{ tags|length }} Einträge
</span> </span>
</div> </div>
<div class="small text-muted">
Dokumentanzahl bezieht sich auf aktive Dokumente.
</div>
</div>
<div class="table-responsive">
<table class="table table-dark table-striped table-hover mb-0 align-middle"> <table class="table table-dark table-striped table-hover mb-0 align-middle">
<thead class="table-secondary text-dark"> <thead class="table-secondary text-dark">
<tr> <tr>
<th style="width: 25%">Label</th> <th style="width: 18%">Label</th>
<th style="width: 25%">Slug</th> <th style="width: 18%">Slug</th>
<th style="width: 35%">Beschreibung</th> <th style="width: 14%">Typ</th>
<th style="width: 10%">Aktive Dokumente</th>
<th style="width: 25%">Beschreibung</th>
<th class="text-end" style="width: 15%">Aktion</th> <th class="text-end" style="width: 15%">Aktion</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for tag in tags %} {% for tag in tags %}
{% set activeDocumentCount = documentCountByTagId[tag.id.toRfc4122] ?? 0 %}
<tr> <tr>
<td class="fw-semibold">{{ tag.label }}</td> <td class="fw-semibold">{{ tag.label }}</td>
<td><code>{{ tag.slug }}</code></td> <td><code>{{ tag.slug }}</code></td>
<td>
{% if tag.type == 'catalog_entity' %}
<span class="badge text-bg-info">Catalog Entity</span>
{% elseif tag.type == 'sales_signal' %}
<span class="badge text-bg-warning">Sales Signal</span>
{% else %}
<span class="badge text-bg-secondary">Generic</span>
{% endif %}
</td>
<td>
<span class="badge text-bg-dark border border-secondary">
{{ activeDocumentCount }}
</span>
</td>
<td>{{ tag.description ?: '-' }}</td> <td>{{ tag.description ?: '-' }}</td>
<td class="text-end"> <td class="text-end">
<a href="{{ path('admin_tags_assign', { id: tag.id }) }}" <a href="{{ path('admin_tags_assign', { id: tag.id }) }}"
class="btn btn-sm btn-outline-info me-2"> class="btn btn-sm btn-outline-info me-2">
Zuweisen Zuweisen
@@ -217,7 +215,6 @@
<form method="post" <form method="post"
action="{{ path('admin_tags_delete', {id: tag.id}) }}" action="{{ path('admin_tags_delete', {id: tag.id}) }}"
style="display:inline-block;"> style="display:inline-block;">
<input type="hidden" <input type="hidden"
name="_token" name="_token"
value="{{ csrf_token('admin_tag_delete_' ~ tag.id) }}"/> value="{{ csrf_token('admin_tag_delete_' ~ tag.id) }}"/>
@@ -227,18 +224,18 @@
Löschen Löschen
</button> </button>
</form> </form>
</td> </td>
</tr> </tr>
{% else %} {% else %}
<tr> <tr>
<td colspan="4" class="p-4 text-center text-muted"> <td colspan="6" class="p-4 text-center text-muted">
Noch keine Tags vorhanden. Noch keine Tags vorhanden.
</td> </td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
</div>
</div> </div>
</div> </div>