add catalog mode

2026-02-28 13:51:54 +01:00
parent 47a3c9cca2
commit d3294464ea
7 changed files with 484 additions and 201 deletions
--- a/config/services.yaml
+++ b/config/services.yaml
@@ -31,7 +31,7 @@ parameters:
  mto.index.chunk_size: 800
  mto.index.chunk_overlap: 100
-  mto.index.embedding_model: 'all-MiniLM-L6-v2'
+  mto.index.embedding_model: 'intfloat/multilingual-e5-base'
  mto.index.embedding_dimension: 768
  mto.index.scoring_version: 1
--- a/python/vector/vector_ingest_tags.py
+++ b/python/vector/vector_ingest_tags.py
@@ -5,24 +5,19 @@ import json
 from pathlib import Path
 # ---------------------------------------------------------
-# Positional args (aligned with PHP builder exec call)
+# Positional args
 # ---------------------------------------------------------
 # 1 tags.ndjson
 # 2 out_index_path (can be .tmp)
 # 3 model
 # Example:
 #   python vector_ingest_tags.py /var/knowledge/tags.ndjson /var/knowledge/vector_tags.index.tmp all-MiniLM-L6-v2
 # ---------------------------------------------------------
-if len(sys.argv) < 4:
+if len(sys.argv) < 3:
-    print("ERROR: usage: vector_ingest_tags.py <tags.ndjson> <out.index> <model>", file=sys.stderr)
+    print("ERROR: usage: vector_ingest_tags.py <tags.ndjson> <out.index>", file=sys.stderr)
    sys.exit(2)
 tags_path = Path(sys.argv[1]).resolve()
 out_path  = Path(sys.argv[2]).resolve()
 model_name = sys.argv[3]
-meta_path = Path(str(out_path) + ".meta.json")  # vector_tags.index(.tmp).meta.json
+meta_path = Path(str(out_path) + ".meta.json")
 # ---------------------------------------------------------
 # Dependency checks
@@ -43,6 +38,25 @@ import numpy as np
 import faiss
 from sentence_transformers import SentenceTransformer
 # ---------------------------------------------------------
 # Load embedding model from index_meta.json (Single Source of Truth)
 # ---------------------------------------------------------
 BASE_PATH = Path(__file__).resolve().parents[2]
 INDEX_META_PATH = BASE_PATH / "var" / "knowledge" / "index_meta.json"
 if not INDEX_META_PATH.exists():
    print("ERROR: index_meta.json not found", file=sys.stderr)
    sys.exit(30)
 meta = json.loads(INDEX_META_PATH.read_text(encoding="utf-8"))
 embedding_model = meta.get("embedding_model")
 if not embedding_model:
    print("ERROR: embedding_model missing in index_meta.json", file=sys.stderr)
    sys.exit(31)
 model = SentenceTransformer(embedding_model)
 # ---------------------------------------------------------
 # File checks
 # ---------------------------------------------------------
@@ -50,14 +64,8 @@ if not tags_path.is_file():
    print(f"ERROR: tags.ndjson not found at {tags_path}", file=sys.stderr)
    sys.exit(20)
 # Ensure output directory exists
 out_path.parent.mkdir(parents=True, exist_ok=True)
 # ---------------------------------------------------------
 # Load model
 # ---------------------------------------------------------
 model = SentenceTransformer(model_name)
 # ---------------------------------------------------------
 # Streaming read NDJSON
 # ---------------------------------------------------------
@@ -85,13 +93,9 @@ with open(tags_path, "r", encoding="utf-8") as f:
        if len(text) > 4000:
            text = text[:4000]
        # -------------------------------------------------
        # E5 requires "passage:" prefix for indexed texts
        # -------------------------------------------------
        texts.append(f"passage: {text}")
        ids.append(str(tag_id))
 # If empty: remove outputs (tmp) and exit success
 if not texts:
    if out_path.exists():
        out_path.unlink()
@@ -112,17 +116,11 @@ embeddings = model.encode(
 embeddings = np.array(embeddings).astype("float32")
 dim = embeddings.shape[1]
 # ---------------------------------------------------------
 # Build FAISS index
 # ---------------------------------------------------------
 index = faiss.IndexFlatIP(dim)
 index.add(embeddings)
 faiss.write_index(index, str(out_path))
 # ---------------------------------------------------------
 # Write ID mapping meta
 # ---------------------------------------------------------
 with open(meta_path, "w", encoding="utf-8") as f:
    json.dump(ids, f)
--- a/python/vector/vector_service.py
+++ b/python/vector/vector_service.py
@@ -10,11 +10,19 @@ from typing import Any, List, Optional, Dict
 import numpy as np
 import faiss
-from fastapi import FastAPI, HTTPException
+from fastapi import FastAPI, HTTPException, Request
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from sentence_transformers import SentenceTransformer
 # ============================================================
 # Service Stamp (to verify you are running THIS file)
 # ============================================================
 SERVICE_STAMP = "vector_service.py@2026-02-28T10:20+01:00"
 # ============================================================
 # Paths
 # ============================================================
@@ -42,6 +50,7 @@ INDEX_NDJSON_PATH = KNOWLEDGE_DIR / "index.ndjson"
 logger = logging.getLogger("vector_service")
 logger.setLevel(logging.INFO)
 def setup_logging() -> None:
    LOG_DIR.mkdir(parents=True, exist_ok=True)
@@ -68,6 +77,23 @@ def setup_logging() -> None:
    if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
        logger.addHandler(stream_handler)
    # Capture uvicorn logs in the same file as well (critical for hidden 500s)
    uvicorn_error = logging.getLogger("uvicorn.error")
    uvicorn_access = logging.getLogger("uvicorn.access")
    uvicorn_error.setLevel(logging.INFO)
    uvicorn_access.setLevel(logging.INFO)
    if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_error.handlers):
        uvicorn_error.addHandler(file_handler)
    if not any(isinstance(h, logging.StreamHandler) for h in uvicorn_error.handlers):
        uvicorn_error.addHandler(stream_handler)
    if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_access.handlers):
        uvicorn_access.addHandler(file_handler)
    if not any(isinstance(h, logging.StreamHandler) for h in uvicorn_access.handlers):
        uvicorn_access.addHandler(stream_handler)
 # ============================================================
 # FastAPI
@@ -79,9 +105,6 @@ model: Optional[SentenceTransformer] = None
 chunk_index = None
 chunk_ids: Optional[List[Any]] = None
 # Sales-RAG signals derived from NDJSON (loaded on startup and reload):
 # - chunk_doc_map: chunk_id -> document_id
 # - chunk_pos_map: chunk_id -> chunk_index (position within document, if available)
 chunk_doc_map: Dict[str, str] = {}
 chunk_pos_map: Dict[str, int] = {}
@@ -89,7 +112,6 @@ tag_index = None
 tag_ids: Optional[List[Any]] = None
 loaded_embedding_model_name: Optional[str] = None
 current_index_version: Optional[int] = None
 current_runtime_stamp: Optional[str] = None
@@ -107,10 +129,10 @@ class SearchRequest(BaseModel):
 # ============================================================
-# Loader
+# Loader Helpers
 # ============================================================
-def _safe_read_json(path: Path) -> Optional[dict]:
+def _safe_read_json(path: Path) -> Optional[Any]:
    try:
        if not path.exists():
            return None
@@ -121,9 +143,6 @@ def _safe_read_json(path: Path) -> Optional[dict]:
 def _as_key(value: Any) -> Optional[str]:
    """
    Normalize IDs to string keys for maps. Returns None if unusable.
    """
    if value is None:
        return None
    if isinstance(value, str):
@@ -136,12 +155,19 @@ def _as_key(value: Any) -> Optional[str]:
        return None
 def _sanitize_limit(limit: int, default: int = 8, max_limit: int = 200) -> int:
    try:
        v = int(limit)
    except Exception:
        return default
    if v <= 0:
        return default
    if v > max_limit:
        return max_limit
    return v
 def load_chunk_maps_from_ndjson() -> None:
    """
    Builds two maps from index.ndjson:
    - chunk_id -> document_id
    - chunk_id -> chunk_index (position inside document, if present)
    """
    global chunk_doc_map, chunk_pos_map
    chunk_doc_map = {}
@@ -156,7 +182,6 @@ def load_chunk_maps_from_ndjson() -> None:
                line = line.strip()
                if not line:
                    continue
                try:
                    row = json.loads(line)
                except Exception:
@@ -166,19 +191,14 @@ def load_chunk_maps_from_ndjson() -> None:
                if not chunk_id_key:
                    continue
-                document_id = row.get("document_id")
+                doc_id_key = _as_key(row.get("document_id"))
                doc_id_key = _as_key(document_id)
                if doc_id_key:
                    chunk_doc_map[chunk_id_key] = doc_id_key
                # chunk_index is optional but very useful for Sales-RAG diversity rules
                # (e.g. min distance within a doc)
                ci = row.get("chunk_index")
                if isinstance(ci, int):
                    chunk_pos_map[chunk_id_key] = ci
-                else:
+                elif isinstance(ci, str):
                    # tolerate numeric strings
                    if isinstance(ci, str):
                    s = ci.strip()
                    if s.isdigit():
                        try:
@@ -190,16 +210,24 @@ def load_chunk_maps_from_ndjson() -> None:
        logger.warning("Failed to load chunk maps from ndjson: %s", str(e))
-def _sanitize_limit(limit: int, default: int = 8, max_limit: int = 200) -> int:
+def _normalize_meta_list(value: Any) -> Optional[List[Any]]:
    """
    Accepts:
      - list: ok
      - dict like {"0": "...", "1": "..."}: convert to list sorted by numeric key
    Returns None if invalid.
    """
    if isinstance(value, list):
        return value
    if isinstance(value, dict):
        try:
-        v = int(limit)
+            keys = sorted(int(k) for k in value.keys())
            return [value[str(i)] for i in keys]
        except Exception:
-        return default
+            return None
-    if v <= 0:
+
-        return default
+    return None
    if v > max_limit:
        return max_limit
    return v
 def load_all() -> None:
@@ -225,13 +253,14 @@ def load_all() -> None:
            model = SentenceTransformer(embedding_model_name)
            loaded_embedding_model_name = embedding_model_name
        # Chunks
        if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
            logger.info("[Reload] Loading chunk index")
            chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
-            chunk_ids = _safe_read_json(CHUNK_MAP_PATH) or None
+            raw = _safe_read_json(CHUNK_MAP_PATH)
-            if not isinstance(chunk_ids, list):
+            chunk_ids = _normalize_meta_list(raw)
            if chunk_ids is None:
                chunk_index = None
                chunk_ids = None
                logger.warning("[Reload] chunk_ids meta invalid -> chunk index disabled")
        else:
            chunk_index = None
@@ -240,13 +269,14 @@ def load_all() -> None:
        logger.info("[Reload] Loading chunk maps (doc_id + chunk_index)")
        load_chunk_maps_from_ndjson()
        # Tags
        if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
            logger.info("[Reload] Loading tag index")
            tag_index = faiss.read_index(str(TAG_INDEX_PATH))
-            tag_ids = _safe_read_json(TAG_MAP_PATH) or None
+            raw = _safe_read_json(TAG_MAP_PATH)
-            if not isinstance(tag_ids, list):
+            tag_ids = _normalize_meta_list(raw)
            if tag_ids is None:
                tag_index = None
                tag_ids = None
                logger.warning("[Reload] tag_ids meta invalid -> tag index disabled")
        else:
            tag_index = None
@@ -262,15 +292,17 @@ def load_all() -> None:
        current_index_version = index_version if isinstance(index_version, int) else None
        logger.info(
-            "[Reload] Completed (index_version=%s runtime=%s embedding_model=%s)",
+            "[Reload] Completed (index_version=%s runtime=%s embedding_model=%s stamp=%s file=%s)",
            str(current_index_version),
            str(current_runtime_stamp),
            str(loaded_embedding_model_name),
            SERVICE_STAMP,
            str(Path(__file__).resolve()),
        )
 # ============================================================
-# Observer (Enterprise Auto Reload)
+# Observer
 # ============================================================
 def observer_loop() -> None:
@@ -294,24 +326,34 @@ def observer_loop() -> None:
                new_runtime = v if isinstance(v, str) else None
            if new_version != current_index_version:
-                logger.info(
+                logger.info("[Observer] index_version changed (%s -> %s) -> Reload", str(current_index_version), str(new_version))
                    "[Observer] index_version changed (%s -> %s) -> Reload",
                    str(current_index_version),
                    str(new_version),
                )
                load_all()
                continue
            if new_runtime != current_runtime_stamp:
-                logger.info(
+                logger.info("[Observer] runtime changed (%s -> %s) -> Reload", str(current_runtime_stamp), str(new_runtime))
                    "[Observer] runtime changed (%s -> %s) -> Reload",
                    str(current_runtime_stamp),
                    str(new_runtime),
                )
                load_all()
        except Exception as e:
-            logger.error("[Observer ERROR] %s", str(e))
+            logger.exception("[Observer ERROR] %s", str(e))
 # ============================================================
 # Global Exception Handler (forces JSON + logs)
 # ============================================================
@app.exception_handler(Exception)
 async def unhandled_exception_handler(request: Request, exc: Exception):
    logger.exception("UNHANDLED_EXCEPTION path=%s method=%s", request.url.path, request.method)
    return JSONResponse(
        status_code=500,
        content={
            "error": "Internal Server Error",
            "detail": str(exc),
            "path": request.url.path,
            "stamp": SERVICE_STAMP,
        },
    )
 # ============================================================
@@ -321,13 +363,10 @@ def observer_loop() -> None:
@app.on_event("startup")
 def startup_event():
    setup_logging()
-    logger.info("[VectorService] Startup")
+    logger.info("[VectorService] Startup stamp=%s file=%s", SERVICE_STAMP, str(Path(__file__).resolve()))
    load_all()
    t = threading.Thread(target=observer_loop, daemon=True)
    t.start()
    logger.info("[VectorService] Ready (log=%s)", str(LOG_FILE))
@@ -339,12 +378,18 @@ def startup_event():
 def health():
    return {
        "status": "ok",
        "stamp": SERVICE_STAMP,
        "file": str(Path(__file__).resolve()),
        "chunk_index_loaded": chunk_index is not None,
        "tag_index_loaded": tag_index is not None,
        "model_loaded": model is not None,
        "embedding_model": loaded_embedding_model_name,
        "index_version": current_index_version,
        "runtime_stamp": current_runtime_stamp,
        "tag_meta_type": type(tag_ids).__name__ if tag_ids is not None else None,
        "tag_meta_len": len(tag_ids) if isinstance(tag_ids, list) else None,
        "chunk_meta_type": type(chunk_ids).__name__ if chunk_ids is not None else None,
        "chunk_meta_len": len(chunk_ids) if isinstance(chunk_ids, list) else None,
        "log_file": str(LOG_FILE),
    }
@@ -353,8 +398,9 @@ def health():
 def reload():
    try:
        load_all()
-        return {"status": "reloaded"}
+        return {"status": "reloaded", "stamp": SERVICE_STAMP}
    except Exception as e:
        logger.exception("reload failed")
        raise HTTPException(status_code=500, detail=str(e))
@@ -363,38 +409,30 @@ def search_chunks(req: SearchRequest):
    if chunk_index is None or chunk_ids is None or model is None:
        raise HTTPException(status_code=503, detail="Chunk index not available")
-    # Safety: clamp limit to prevent abuse / accidental huge queries
+    try:
        limit = _sanitize_limit(req.limit, default=8, max_limit=200)
        query = (req.query or "").strip()
        if not query:
            raise HTTPException(status_code=400, detail="query must not be empty")
-    query_vec = model.encode(
+        query_vec = model.encode([f"query: {query}"], normalize_embeddings=True)
        [f"query: {query}"],
        normalize_embeddings=True
    )
        query_vec = np.array(query_vec).astype("float32")
        effective_limit = limit
        doc_filter: Optional[List[str]] = None
        if req.doc_ids:
        # Normalize incoming doc_ids for reliable matching
            doc_filter = []
            for d in req.doc_ids:
                dk = _as_key(d)
                if dk:
                    doc_filter.append(dk)
        # When doc filtering is enabled, we fetch a wider pool and filter down.
        # Keep it bounded to avoid expensive scans on huge indices.
            effective_limit = max(limit * 5, 50)
            effective_limit = min(effective_limit, 500)
        scores, indices = chunk_index.search(query_vec, effective_limit)
        results = []
        for score, idx in zip(scores[0], indices[0]):
            if idx == -1:
                continue
@@ -406,19 +444,15 @@ def search_chunks(req: SearchRequest):
            if not chunk_id_key:
                continue
        # Apply doc filter if requested
            doc_id = chunk_doc_map.get(chunk_id_key)
            if doc_filter is not None:
                if doc_id is None or doc_id not in doc_filter:
                    continue
        # Sales-RAG signals:
        # - document_id (for doc quotas / diversity rules)
        # - chunk_index (position within doc for distance constraints)
            payload = {
                "chunk_id": raw_chunk_id,
                "score": float(score),
-            "document_id": doc_id,  # may be None if ndjson missing/partial
+                "document_id": doc_id,
            }
            ci = chunk_pos_map.get(chunk_id_key)
@@ -432,37 +466,48 @@ def search_chunks(req: SearchRequest):
        return results
    except HTTPException:
        raise
    except Exception as e:
        logger.exception("search-chunks failure")
        raise HTTPException(status_code=500, detail=str(e))
@app.post("/search-tags")
 def search_tags(req: SearchRequest):
    if tag_index is None or tag_ids is None or model is None:
        raise HTTPException(status_code=503, detail="Tag index not available")
    try:
        limit = _sanitize_limit(req.limit, default=8, max_limit=200)
        query = (req.query or "").strip()
        if not query:
            raise HTTPException(status_code=400, detail="query must not be empty")
-    query_vec = model.encode(
+        query_vec = model.encode([f"query: {query}"], normalize_embeddings=True)
        [f"query: {query}"],
        normalize_embeddings=True
    )
        query_vec = np.array(query_vec).astype("float32")
        if query_vec.ndim != 2:
            raise RuntimeError(f"Invalid embedding shape: {query_vec.shape}")
        if query_vec.shape[1] != tag_index.d:
            raise RuntimeError(f"Embedding dimension mismatch (vec={query_vec.shape[1]}, index={tag_index.d})")
        scores, indices = tag_index.search(query_vec, limit)
        results = []
        for score, idx in zip(scores[0], indices[0]):
            if idx == -1:
                continue
            if idx < 0 or idx >= len(tag_ids):
                continue
-
+            results.append({"tag_id": tag_ids[idx], "score": float(score)})
        results.append({
            "tag_id": tag_ids[idx],
            "score": float(score),
        })
        return results
    except HTTPException:
        raise
    except Exception as e:
        logger.exception("search-tags failure")
        raise HTTPException(status_code=500, detail=str(e))
--- a/src/Catalog/EntityCatalogService.php
+++ b/src/Catalog/EntityCatalogService.php
@@ -0,0 +1,118 @@
 <?php
 declare(strict_types=1);
 namespace App\Catalog;
 use App\Tag\TagVectorSearchClient;
 use Doctrine\DBAL\Connection;
 use Symfony\Component\Uid\Uuid;
 /**
 * EntityCatalogService
 *
 * Deterministische Katalog-Listen auf Basis eines Entity-Terms:
 * - TagVectorSearch (Score-Gate + Ambiguity-Check)
 * - DB Query auf document_tag + document (ACTIVE)
 * - Rückgabe als EIN Textblock (string) oder null (Fallback auf normalen Retrieval)
 */
 final class EntityCatalogService
 {
    private const MIN_SCORE = 0.55;
    private const AMBIGUITY_DELTA = 0.05;
    public function __construct(
        private readonly TagVectorSearchClient $tagVectorClient,
        private readonly Connection            $connection,
    ) {}
    /**
     * @return string|null Textblock oder null (wenn kein sicherer Catalog möglich ist)
     */
    public function listByTerm(string $entityTerm): ?string
    {
        $entityTerm = trim($entityTerm);
        if ($entityTerm === '') {
            return null;
        }
        // 1) Tag-Vektorsuche (Top 3 für Ambiguity-Prüfung)
        $hits = $this->tagVectorClient->search($entityTerm, 3);
        if ($hits === []) {
            return null;
        }
        $best = $hits[0];
        $bestScore = isset($best['score']) ? (float)$best['score'] : 0.0;
        if ($bestScore < self::MIN_SCORE) {
            return null;
        }
        // 2) Ambiguity: wenn Top2 zu nah ist → konservativ abbrechen
        if (isset($hits[1])) {
            $secondScore = isset($hits[1]['score']) ? (float)$hits[1]['score'] : 0.0;
            if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
                return null;
            }
        }
        $tagHex = (string)($best['tag_id'] ?? '');
        if ($tagHex === '') {
            return null;
        }
        // 3) DB Query: alle ACTIVE Dokumente zu diesem Tag
        $rows = $this->connection->fetchAllAssociative(
            '
            SELECT d.title
            FROM document d
            INNER JOIN document_tag dt ON dt.document_id = d.id
            WHERE dt.tag_id = :tagId
              AND d.status = :status
            ORDER BY d.title ASC
            ',
            [
                'tagId'  => Uuid::fromString($tagHex)->toBinary(),
                'status' => 'ACTIVE',
            ]
        );
        if ($rows === []) {
            return null;
        }
        $titles = [];
        foreach ($rows as $row) {
            $t = trim((string)($row['title'] ?? ''));
            if ($t !== '') {
                $titles[] = $t;
            }
        }
        if ($titles === []) {
            return null;
        }
        return $this->buildTextBlock($entityTerm, $titles);
    }
    private function buildTextBlock(string $entityTerm, array $titles): string
    {
        $headline = match ($entityTerm) {
            'geräte' => 'Folgende Geräte sind verfügbar:',
            'indikatoren' => 'Folgende Indikatoren sind verfügbar:',
            'funktionen' => 'Folgende Funktionen sind verfügbar:',
            'zubehör' => 'Folgendes Zubehör ist verfügbar:',
            default => 'Folgende Einträge sind verfügbar:',
        };
        $lines = [];
        foreach ($titles as $title) {
            $lines[] = '- ' . $title;
        }
        return $headline . "\n\n" . implode("\n", $lines);
    }
 }
--- a/src/Intent/CatalogIntentLite.php
+++ b/src/Intent/CatalogIntentLite.php
@@ -0,0 +1,138 @@
 <?php
 declare(strict_types=1);
 namespace App\Intent;
 /**
 * CatalogIntentLite
 *
 * Minimal, deterministische Erkennung von Katalog-/Entity-Listenanfragen.
 *
 * Ziel:
 * - "Liste aller Geräte" / "Welche Indikatoren gibt es?" / "Zeig mir alle Funktionen"
 *
 * Guardrails:
 * - Kein Catalog-Mode bei Sales-/Pricing-/Comparison-/ROI-/Implementation-/Objection-Intents.
 * - Kein Catalog-Mode ohne expliziten Entity-Term.
 *
 * WICHTIG:
 * - Immer mit ORIGINAL-Prompt aufrufen.
 * - Kein LLM, kein ML.
 */
 final class CatalogIntentLite
 {
    /**
     * Listensignale (leichtgewichtig) – IntentLite bleibt weiterhin für "allgemeine" List Detection zuständig.
     */
    private const LIST_SIGNALS = [
        'liste',
        'auflisten',
        'aufzaehl',
        'aufzähl',
        'übersicht',
        'uebersicht',
        'welche gibt es',
        'welche sind',
        'zeig mir alle',
        'zeige mir alle',
        'alle',
    ];
    /**
     * Entity-Terms, die wir als Katalogtypen unterstützen.
     *
     * Left side: canonical term (für Tag-Suche)
     * Right side: Such-Synonyme, die im Prompt vorkommen dürfen.
     */
    private const ENTITY_TERMS = [
        'geräte' => ['gerät', 'geräte', 'geraet', 'geraete', 'device', 'devices'],
        'indikatoren' => ['indikator', 'indikatoren', 'indicator', 'indicators'],
        'funktionen' => ['funktion', 'funktionen', 'feature', 'features', 'funktionalität', 'funktionalitaet'],
        'zubehör' => ['zubehör', 'zubehoer', 'accessory', 'accessories', 'zubehor'],
    ];
    public function __construct(
        private readonly SalesIntentLite $salesIntentLite,
    ) {}
    /**
     * @return string|null canonical entity term (z. B. "geräte") oder null wenn kein Catalog-Intent.
     */
    public function detect(string $originalPrompt): ?string
    {
        $p = $this->normalize($originalPrompt);
        // 1) Muss ein Listen-Signal enthalten
        if (!$this->containsAny($p, self::LIST_SIGNALS)) {
            return null;
        }
        // 2) Guardrail: Kein Catalog-Mode bei Sales-Intents
        $sales = $this->salesIntentLite->detect($originalPrompt);
        $intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
        if ($intent !== SalesIntentLite::DISCOVERY) {
            return null;
        }
        // 3) Expliziten Entity-Term extrahieren (sonst kein Catalog)
        foreach (self::ENTITY_TERMS as $canonical => $synonyms) {
            foreach ($synonyms as $syn) {
                if ($this->containsWord($p, $syn)) {
                    return $canonical;
                }
            }
        }
        return null;
    }
    // ------------------------------------------------------------
    // Helpers
    // ------------------------------------------------------------
    private function containsAny(string $haystack, array $needles): bool
    {
        foreach ($needles as $needle) {
            if ($needle === '') {
                continue;
            }
            if (str_contains($haystack, $needle)) {
                return true;
            }
        }
        return false;
    }
    private function containsWord(string $haystack, string $word): bool
    {
        $word = trim($word);
        if ($word === '') {
            return false;
        }
        return preg_match('/\b' . preg_quote($word, '/') . '\b/u', $haystack) === 1;
    }
    private function normalize(string $s): string
    {
        $s = mb_strtolower($s);
        // Umlaute absichern (analog IntentLite/SalesIntentLite)
        $replacements = [
            'ä' => 'ae',
            'ö' => 'oe',
            'ü' => 'ue',
            'ß' => 'ss',
        ];
        foreach ($replacements as $umlaut => $alt) {
            if (str_contains($s, $umlaut)) {
                $s .= ' ' . str_replace($umlaut, $alt, $s);
                break;
            }
        }
        return $s;
    }
 }
--- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
+++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php
@@ -4,7 +4,9 @@ declare(strict_types=1);
 namespace App\Knowledge\Retrieval;
 use App\Catalog\EntityCatalogService;
 use App\Entity\ModelGenerationConfig;
 use App\Intent\CatalogIntentLite;
 use App\Intent\IntentLite;
 use App\Intent\SalesIntentLite;
 use App\Knowledge\QueryCleaner;
@@ -32,7 +34,9 @@ final class NdjsonHybridRetriever implements RetrieverInterface
        private readonly ModelGenerationConfigRepository $configRepository,
        private readonly QueryCleaner                    $queryCleaner,
        private readonly IntentLite                      $intentLite,
-        private readonly SalesIntentLite                 $salesIntentLite
+        private readonly SalesIntentLite                 $salesIntentLite,
        private readonly CatalogIntentLite               $catalogIntent,
        private readonly EntityCatalogService            $entityCatalogService
    )
    {
    }
@@ -54,6 +58,17 @@ final class NdjsonHybridRetriever implements RetrieverInterface
    public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array
    {
        // 🔵 ENTITY CATALOG EARLY EXIT (jetzt auch im Admin-Test aktiv)
        $entityTerm = $this->catalogIntent->detect($prompt);
        if ($entityTerm !== null) {
            $catalogBlock = $this->entityCatalogService->listByTerm($entityTerm);
            if ($catalogBlock !== null) {
                return [$catalogBlock];
            }
        }
        $core = $this->runCore($prompt, $config, false);
        if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
@@ -111,8 +126,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
            return [];
        }
        // 1) Production-like selection: wir selektieren Texte,
        //    aber in Debug brauchen wir die ChunkIds dazu.
        $selectedChunkIds = $core['is_list_query']
            ? $this->selectChunkIdsListMode($core['ranked_chunk_ids'], $core['rows'], $core['limit'])
            : $this->selectChunkIdsSalesMode($core['ranked_chunk_ids'], $core['rows'], $core['limit']);
@@ -121,7 +134,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
            return [];
        }
        // 2) Ausgabe inklusive Scores
        $out = [];
        $rank = 0;
@@ -179,7 +191,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
            $cleanQuery = $prompt;
        }
        // Intent-based adjustments (identisch zur Produktionslogik)
        $threshold = self::VECTOR_SCORE_THRESHOLD;
        $topK = $vectorTopKBase;
@@ -216,7 +227,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
        $topK = max(1, min($topK, self::HARD_MAX_VECTORK));
        // Tag routing (identisch)
        $candidateDocIds = $this->tagRouting->route($cleanQuery);
        $candidateSet = null;
@@ -224,7 +234,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
            $candidateSet = array_fill_keys($candidateDocIds, true);
        }
        // Dual search (identisch)
        $globalHits = $this->vectorClient->search($cleanQuery, $topK);
        $scopedHits = [];
@@ -249,7 +258,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
        $rrfScores = [];
        $rawScores = [];
        // RRF (identisch) + optional raw capture
        $this->applyRrfWithOptionalRaw($globalHits, $rrfScores, $rawScores, $threshold, false, $withScores);
        $this->applyRrfWithOptionalRaw(
            $scopedHits,
@@ -292,13 +300,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
        ];
    }
    /**
     * Gleiche Logik wie applyRrf(), aber optional mit raw-score capture.
     *
     * @param array<int, array{chunk_id:string, score:float}> $hits
     * @param array<string,float> $rrfScores
     * @param array<string,float> $rawScores
     */
    private function applyRrfWithOptionalRaw(
        array $hits,
        array &$rrfScores,
@@ -322,7 +323,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
            $chunkId = (string)$hit['chunk_id'];
            if ($captureRaw) {
                // wenn global+scoped vorkommt: bestes raw behalten
                if (!isset($rawScores[$chunkId]) || $raw > $rawScores[$chunkId]) {
                    $rawScores[$chunkId] = $raw;
                }
@@ -343,15 +343,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
        }
    }
    // =========================================================
    // DEBUG SELECTION HELPERS (identisch zu Produktionsregeln)
    // =========================================================
    /**
     * List-Mode nutzt exakt collectTexts() Regeln, aber gibt ChunkIds zurück.
     *
     * @return string[]
     */
    private function selectChunkIdsListMode(array $chunkIds, array $rows, int $limit): array
    {
        $seen = [];
@@ -384,11 +375,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
        return $out;
    }
    /**
     * Normal-Mode nutzt exakt collectSalesOptimized() Regeln, aber gibt ChunkIds zurück.
     *
     * @return string[]
     */
    private function selectChunkIdsSalesMode(array $chunkIds, array $rows, int $limit): array
    {
        $out = [];
@@ -437,10 +423,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
        return $out;
    }
    // =========================================================
    // ORIGINAL METHODS (UNVERÄNDERT)
    // =========================================================
    private function applyRrf(array $hits, array &$rrfScores, float $threshold, bool $boost = false): void
    {
        $rank = 0;
--- a/src/Tag/TagVectorSearchClient.php
+++ b/src/Tag/TagVectorSearchClient.php
@@ -58,6 +58,7 @@ final readonly class TagVectorSearchClient
                    'Tag vector service returned non-200',
                    ['status' => $response->getStatusCode()]
                );
                return [];
            }
@@ -68,6 +69,7 @@ final readonly class TagVectorSearchClient
                'Tag vector service unreachable',
                ['error' => $e->getMessage()]
            );
            return [];
        }