add catalog mode

This commit is contained in:
team2
2026-02-28 13:51:54 +01:00
parent 47a3c9cca2
commit d3294464ea
7 changed files with 484 additions and 201 deletions

View File

@@ -31,7 +31,7 @@ parameters:
mto.index.chunk_size: 800 mto.index.chunk_size: 800
mto.index.chunk_overlap: 100 mto.index.chunk_overlap: 100
mto.index.embedding_model: 'all-MiniLM-L6-v2' mto.index.embedding_model: 'intfloat/multilingual-e5-base'
mto.index.embedding_dimension: 768 mto.index.embedding_dimension: 768
mto.index.scoring_version: 1 mto.index.scoring_version: 1

View File

@@ -5,24 +5,19 @@ import json
from pathlib import Path from pathlib import Path
# --------------------------------------------------------- # ---------------------------------------------------------
# Positional args (aligned with PHP builder exec call) # Positional args
# ---------------------------------------------------------
# 1 tags.ndjson # 1 tags.ndjson
# 2 out_index_path (can be .tmp) # 2 out_index_path (can be .tmp)
# 3 model
# Example:
# python vector_ingest_tags.py /var/knowledge/tags.ndjson /var/knowledge/vector_tags.index.tmp all-MiniLM-L6-v2
# --------------------------------------------------------- # ---------------------------------------------------------
if len(sys.argv) < 4: if len(sys.argv) < 3:
print("ERROR: usage: vector_ingest_tags.py <tags.ndjson> <out.index> <model>", file=sys.stderr) print("ERROR: usage: vector_ingest_tags.py <tags.ndjson> <out.index>", file=sys.stderr)
sys.exit(2) sys.exit(2)
tags_path = Path(sys.argv[1]).resolve() tags_path = Path(sys.argv[1]).resolve()
out_path = Path(sys.argv[2]).resolve() out_path = Path(sys.argv[2]).resolve()
model_name = sys.argv[3]
meta_path = Path(str(out_path) + ".meta.json") # vector_tags.index(.tmp).meta.json meta_path = Path(str(out_path) + ".meta.json")
# --------------------------------------------------------- # ---------------------------------------------------------
# Dependency checks # Dependency checks
@@ -43,6 +38,25 @@ import numpy as np
import faiss import faiss
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
# ---------------------------------------------------------
# Load embedding model from index_meta.json (Single Source of Truth)
# ---------------------------------------------------------
BASE_PATH = Path(__file__).resolve().parents[2]
INDEX_META_PATH = BASE_PATH / "var" / "knowledge" / "index_meta.json"
if not INDEX_META_PATH.exists():
print("ERROR: index_meta.json not found", file=sys.stderr)
sys.exit(30)
meta = json.loads(INDEX_META_PATH.read_text(encoding="utf-8"))
embedding_model = meta.get("embedding_model")
if not embedding_model:
print("ERROR: embedding_model missing in index_meta.json", file=sys.stderr)
sys.exit(31)
model = SentenceTransformer(embedding_model)
# --------------------------------------------------------- # ---------------------------------------------------------
# File checks # File checks
# --------------------------------------------------------- # ---------------------------------------------------------
@@ -50,14 +64,8 @@ if not tags_path.is_file():
print(f"ERROR: tags.ndjson not found at {tags_path}", file=sys.stderr) print(f"ERROR: tags.ndjson not found at {tags_path}", file=sys.stderr)
sys.exit(20) sys.exit(20)
# Ensure output directory exists
out_path.parent.mkdir(parents=True, exist_ok=True) out_path.parent.mkdir(parents=True, exist_ok=True)
# ---------------------------------------------------------
# Load model
# ---------------------------------------------------------
model = SentenceTransformer(model_name)
# --------------------------------------------------------- # ---------------------------------------------------------
# Streaming read NDJSON # Streaming read NDJSON
# --------------------------------------------------------- # ---------------------------------------------------------
@@ -85,13 +93,9 @@ with open(tags_path, "r", encoding="utf-8") as f:
if len(text) > 4000: if len(text) > 4000:
text = text[:4000] text = text[:4000]
# -------------------------------------------------
# E5 requires "passage:" prefix for indexed texts
# -------------------------------------------------
texts.append(f"passage: {text}") texts.append(f"passage: {text}")
ids.append(str(tag_id)) ids.append(str(tag_id))
# If empty: remove outputs (tmp) and exit success
if not texts: if not texts:
if out_path.exists(): if out_path.exists():
out_path.unlink() out_path.unlink()
@@ -112,17 +116,11 @@ embeddings = model.encode(
embeddings = np.array(embeddings).astype("float32") embeddings = np.array(embeddings).astype("float32")
dim = embeddings.shape[1] dim = embeddings.shape[1]
# ---------------------------------------------------------
# Build FAISS index
# ---------------------------------------------------------
index = faiss.IndexFlatIP(dim) index = faiss.IndexFlatIP(dim)
index.add(embeddings) index.add(embeddings)
faiss.write_index(index, str(out_path)) faiss.write_index(index, str(out_path))
# ---------------------------------------------------------
# Write ID mapping meta
# ---------------------------------------------------------
with open(meta_path, "w", encoding="utf-8") as f: with open(meta_path, "w", encoding="utf-8") as f:
json.dump(ids, f) json.dump(ids, f)

View File

@@ -10,11 +10,19 @@ from typing import Any, List, Optional, Dict
import numpy as np import numpy as np
import faiss import faiss
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
from pydantic import BaseModel from pydantic import BaseModel
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
# ============================================================
# Service Stamp (to verify you are running THIS file)
# ============================================================
SERVICE_STAMP = "vector_service.py@2026-02-28T10:20+01:00"
# ============================================================ # ============================================================
# Paths # Paths
# ============================================================ # ============================================================
@@ -42,6 +50,7 @@ INDEX_NDJSON_PATH = KNOWLEDGE_DIR / "index.ndjson"
logger = logging.getLogger("vector_service") logger = logging.getLogger("vector_service")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
def setup_logging() -> None: def setup_logging() -> None:
LOG_DIR.mkdir(parents=True, exist_ok=True) LOG_DIR.mkdir(parents=True, exist_ok=True)
@@ -68,6 +77,23 @@ def setup_logging() -> None:
if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers): if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
# Capture uvicorn logs in the same file as well (critical for hidden 500s)
uvicorn_error = logging.getLogger("uvicorn.error")
uvicorn_access = logging.getLogger("uvicorn.access")
uvicorn_error.setLevel(logging.INFO)
uvicorn_access.setLevel(logging.INFO)
if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_error.handlers):
uvicorn_error.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in uvicorn_error.handlers):
uvicorn_error.addHandler(stream_handler)
if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_access.handlers):
uvicorn_access.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in uvicorn_access.handlers):
uvicorn_access.addHandler(stream_handler)
# ============================================================ # ============================================================
# FastAPI # FastAPI
@@ -79,9 +105,6 @@ model: Optional[SentenceTransformer] = None
chunk_index = None chunk_index = None
chunk_ids: Optional[List[Any]] = None chunk_ids: Optional[List[Any]] = None
# Sales-RAG signals derived from NDJSON (loaded on startup and reload):
# - chunk_doc_map: chunk_id -> document_id
# - chunk_pos_map: chunk_id -> chunk_index (position within document, if available)
chunk_doc_map: Dict[str, str] = {} chunk_doc_map: Dict[str, str] = {}
chunk_pos_map: Dict[str, int] = {} chunk_pos_map: Dict[str, int] = {}
@@ -89,7 +112,6 @@ tag_index = None
tag_ids: Optional[List[Any]] = None tag_ids: Optional[List[Any]] = None
loaded_embedding_model_name: Optional[str] = None loaded_embedding_model_name: Optional[str] = None
current_index_version: Optional[int] = None current_index_version: Optional[int] = None
current_runtime_stamp: Optional[str] = None current_runtime_stamp: Optional[str] = None
@@ -107,10 +129,10 @@ class SearchRequest(BaseModel):
# ============================================================ # ============================================================
# Loader # Loader Helpers
# ============================================================ # ============================================================
def _safe_read_json(path: Path) -> Optional[dict]: def _safe_read_json(path: Path) -> Optional[Any]:
try: try:
if not path.exists(): if not path.exists():
return None return None
@@ -121,9 +143,6 @@ def _safe_read_json(path: Path) -> Optional[dict]:
def _as_key(value: Any) -> Optional[str]: def _as_key(value: Any) -> Optional[str]:
"""
Normalize IDs to string keys for maps. Returns None if unusable.
"""
if value is None: if value is None:
return None return None
if isinstance(value, str): if isinstance(value, str):
@@ -136,12 +155,19 @@ def _as_key(value: Any) -> Optional[str]:
return None return None
def _sanitize_limit(limit: int, default: int = 8, max_limit: int = 200) -> int:
try:
v = int(limit)
except Exception:
return default
if v <= 0:
return default
if v > max_limit:
return max_limit
return v
def load_chunk_maps_from_ndjson() -> None: def load_chunk_maps_from_ndjson() -> None:
"""
Builds two maps from index.ndjson:
- chunk_id -> document_id
- chunk_id -> chunk_index (position inside document, if present)
"""
global chunk_doc_map, chunk_pos_map global chunk_doc_map, chunk_pos_map
chunk_doc_map = {} chunk_doc_map = {}
@@ -156,7 +182,6 @@ def load_chunk_maps_from_ndjson() -> None:
line = line.strip() line = line.strip()
if not line: if not line:
continue continue
try: try:
row = json.loads(line) row = json.loads(line)
except Exception: except Exception:
@@ -166,19 +191,14 @@ def load_chunk_maps_from_ndjson() -> None:
if not chunk_id_key: if not chunk_id_key:
continue continue
document_id = row.get("document_id") doc_id_key = _as_key(row.get("document_id"))
doc_id_key = _as_key(document_id)
if doc_id_key: if doc_id_key:
chunk_doc_map[chunk_id_key] = doc_id_key chunk_doc_map[chunk_id_key] = doc_id_key
# chunk_index is optional but very useful for Sales-RAG diversity rules
# (e.g. min distance within a doc)
ci = row.get("chunk_index") ci = row.get("chunk_index")
if isinstance(ci, int): if isinstance(ci, int):
chunk_pos_map[chunk_id_key] = ci chunk_pos_map[chunk_id_key] = ci
else: elif isinstance(ci, str):
# tolerate numeric strings
if isinstance(ci, str):
s = ci.strip() s = ci.strip()
if s.isdigit(): if s.isdigit():
try: try:
@@ -190,16 +210,24 @@ def load_chunk_maps_from_ndjson() -> None:
logger.warning("Failed to load chunk maps from ndjson: %s", str(e)) logger.warning("Failed to load chunk maps from ndjson: %s", str(e))
def _sanitize_limit(limit: int, default: int = 8, max_limit: int = 200) -> int: def _normalize_meta_list(value: Any) -> Optional[List[Any]]:
"""
Accepts:
- list: ok
- dict like {"0": "...", "1": "..."}: convert to list sorted by numeric key
Returns None if invalid.
"""
if isinstance(value, list):
return value
if isinstance(value, dict):
try: try:
v = int(limit) keys = sorted(int(k) for k in value.keys())
return [value[str(i)] for i in keys]
except Exception: except Exception:
return default return None
if v <= 0:
return default return None
if v > max_limit:
return max_limit
return v
def load_all() -> None: def load_all() -> None:
@@ -225,13 +253,14 @@ def load_all() -> None:
model = SentenceTransformer(embedding_model_name) model = SentenceTransformer(embedding_model_name)
loaded_embedding_model_name = embedding_model_name loaded_embedding_model_name = embedding_model_name
# Chunks
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists(): if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
logger.info("[Reload] Loading chunk index") logger.info("[Reload] Loading chunk index")
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH)) chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
chunk_ids = _safe_read_json(CHUNK_MAP_PATH) or None raw = _safe_read_json(CHUNK_MAP_PATH)
if not isinstance(chunk_ids, list): chunk_ids = _normalize_meta_list(raw)
if chunk_ids is None:
chunk_index = None chunk_index = None
chunk_ids = None
logger.warning("[Reload] chunk_ids meta invalid -> chunk index disabled") logger.warning("[Reload] chunk_ids meta invalid -> chunk index disabled")
else: else:
chunk_index = None chunk_index = None
@@ -240,13 +269,14 @@ def load_all() -> None:
logger.info("[Reload] Loading chunk maps (doc_id + chunk_index)") logger.info("[Reload] Loading chunk maps (doc_id + chunk_index)")
load_chunk_maps_from_ndjson() load_chunk_maps_from_ndjson()
# Tags
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists(): if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
logger.info("[Reload] Loading tag index") logger.info("[Reload] Loading tag index")
tag_index = faiss.read_index(str(TAG_INDEX_PATH)) tag_index = faiss.read_index(str(TAG_INDEX_PATH))
tag_ids = _safe_read_json(TAG_MAP_PATH) or None raw = _safe_read_json(TAG_MAP_PATH)
if not isinstance(tag_ids, list): tag_ids = _normalize_meta_list(raw)
if tag_ids is None:
tag_index = None tag_index = None
tag_ids = None
logger.warning("[Reload] tag_ids meta invalid -> tag index disabled") logger.warning("[Reload] tag_ids meta invalid -> tag index disabled")
else: else:
tag_index = None tag_index = None
@@ -262,15 +292,17 @@ def load_all() -> None:
current_index_version = index_version if isinstance(index_version, int) else None current_index_version = index_version if isinstance(index_version, int) else None
logger.info( logger.info(
"[Reload] Completed (index_version=%s runtime=%s embedding_model=%s)", "[Reload] Completed (index_version=%s runtime=%s embedding_model=%s stamp=%s file=%s)",
str(current_index_version), str(current_index_version),
str(current_runtime_stamp), str(current_runtime_stamp),
str(loaded_embedding_model_name), str(loaded_embedding_model_name),
SERVICE_STAMP,
str(Path(__file__).resolve()),
) )
# ============================================================ # ============================================================
# Observer (Enterprise Auto Reload) # Observer
# ============================================================ # ============================================================
def observer_loop() -> None: def observer_loop() -> None:
@@ -294,24 +326,34 @@ def observer_loop() -> None:
new_runtime = v if isinstance(v, str) else None new_runtime = v if isinstance(v, str) else None
if new_version != current_index_version: if new_version != current_index_version:
logger.info( logger.info("[Observer] index_version changed (%s -> %s) -> Reload", str(current_index_version), str(new_version))
"[Observer] index_version changed (%s -> %s) -> Reload",
str(current_index_version),
str(new_version),
)
load_all() load_all()
continue continue
if new_runtime != current_runtime_stamp: if new_runtime != current_runtime_stamp:
logger.info( logger.info("[Observer] runtime changed (%s -> %s) -> Reload", str(current_runtime_stamp), str(new_runtime))
"[Observer] runtime changed (%s -> %s) -> Reload",
str(current_runtime_stamp),
str(new_runtime),
)
load_all() load_all()
except Exception as e: except Exception as e:
logger.error("[Observer ERROR] %s", str(e)) logger.exception("[Observer ERROR] %s", str(e))
# ============================================================
# Global Exception Handler (forces JSON + logs)
# ============================================================
@app.exception_handler(Exception)
async def unhandled_exception_handler(request: Request, exc: Exception):
logger.exception("UNHANDLED_EXCEPTION path=%s method=%s", request.url.path, request.method)
return JSONResponse(
status_code=500,
content={
"error": "Internal Server Error",
"detail": str(exc),
"path": request.url.path,
"stamp": SERVICE_STAMP,
},
)
# ============================================================ # ============================================================
@@ -321,13 +363,10 @@ def observer_loop() -> None:
@app.on_event("startup") @app.on_event("startup")
def startup_event(): def startup_event():
setup_logging() setup_logging()
logger.info("[VectorService] Startup") logger.info("[VectorService] Startup stamp=%s file=%s", SERVICE_STAMP, str(Path(__file__).resolve()))
load_all() load_all()
t = threading.Thread(target=observer_loop, daemon=True) t = threading.Thread(target=observer_loop, daemon=True)
t.start() t.start()
logger.info("[VectorService] Ready (log=%s)", str(LOG_FILE)) logger.info("[VectorService] Ready (log=%s)", str(LOG_FILE))
@@ -339,12 +378,18 @@ def startup_event():
def health(): def health():
return { return {
"status": "ok", "status": "ok",
"stamp": SERVICE_STAMP,
"file": str(Path(__file__).resolve()),
"chunk_index_loaded": chunk_index is not None, "chunk_index_loaded": chunk_index is not None,
"tag_index_loaded": tag_index is not None, "tag_index_loaded": tag_index is not None,
"model_loaded": model is not None, "model_loaded": model is not None,
"embedding_model": loaded_embedding_model_name, "embedding_model": loaded_embedding_model_name,
"index_version": current_index_version, "index_version": current_index_version,
"runtime_stamp": current_runtime_stamp, "runtime_stamp": current_runtime_stamp,
"tag_meta_type": type(tag_ids).__name__ if tag_ids is not None else None,
"tag_meta_len": len(tag_ids) if isinstance(tag_ids, list) else None,
"chunk_meta_type": type(chunk_ids).__name__ if chunk_ids is not None else None,
"chunk_meta_len": len(chunk_ids) if isinstance(chunk_ids, list) else None,
"log_file": str(LOG_FILE), "log_file": str(LOG_FILE),
} }
@@ -353,8 +398,9 @@ def health():
def reload(): def reload():
try: try:
load_all() load_all()
return {"status": "reloaded"} return {"status": "reloaded", "stamp": SERVICE_STAMP}
except Exception as e: except Exception as e:
logger.exception("reload failed")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@@ -363,38 +409,30 @@ def search_chunks(req: SearchRequest):
if chunk_index is None or chunk_ids is None or model is None: if chunk_index is None or chunk_ids is None or model is None:
raise HTTPException(status_code=503, detail="Chunk index not available") raise HTTPException(status_code=503, detail="Chunk index not available")
# Safety: clamp limit to prevent abuse / accidental huge queries try:
limit = _sanitize_limit(req.limit, default=8, max_limit=200) limit = _sanitize_limit(req.limit, default=8, max_limit=200)
query = (req.query or "").strip() query = (req.query or "").strip()
if not query: if not query:
raise HTTPException(status_code=400, detail="query must not be empty") raise HTTPException(status_code=400, detail="query must not be empty")
query_vec = model.encode( query_vec = model.encode([f"query: {query}"], normalize_embeddings=True)
[f"query: {query}"],
normalize_embeddings=True
)
query_vec = np.array(query_vec).astype("float32") query_vec = np.array(query_vec).astype("float32")
effective_limit = limit effective_limit = limit
doc_filter: Optional[List[str]] = None doc_filter: Optional[List[str]] = None
if req.doc_ids: if req.doc_ids:
# Normalize incoming doc_ids for reliable matching
doc_filter = [] doc_filter = []
for d in req.doc_ids: for d in req.doc_ids:
dk = _as_key(d) dk = _as_key(d)
if dk: if dk:
doc_filter.append(dk) doc_filter.append(dk)
# When doc filtering is enabled, we fetch a wider pool and filter down.
# Keep it bounded to avoid expensive scans on huge indices.
effective_limit = max(limit * 5, 50) effective_limit = max(limit * 5, 50)
effective_limit = min(effective_limit, 500) effective_limit = min(effective_limit, 500)
scores, indices = chunk_index.search(query_vec, effective_limit) scores, indices = chunk_index.search(query_vec, effective_limit)
results = [] results = []
for score, idx in zip(scores[0], indices[0]): for score, idx in zip(scores[0], indices[0]):
if idx == -1: if idx == -1:
continue continue
@@ -406,19 +444,15 @@ def search_chunks(req: SearchRequest):
if not chunk_id_key: if not chunk_id_key:
continue continue
# Apply doc filter if requested
doc_id = chunk_doc_map.get(chunk_id_key) doc_id = chunk_doc_map.get(chunk_id_key)
if doc_filter is not None: if doc_filter is not None:
if doc_id is None or doc_id not in doc_filter: if doc_id is None or doc_id not in doc_filter:
continue continue
# Sales-RAG signals:
# - document_id (for doc quotas / diversity rules)
# - chunk_index (position within doc for distance constraints)
payload = { payload = {
"chunk_id": raw_chunk_id, "chunk_id": raw_chunk_id,
"score": float(score), "score": float(score),
"document_id": doc_id, # may be None if ndjson missing/partial "document_id": doc_id,
} }
ci = chunk_pos_map.get(chunk_id_key) ci = chunk_pos_map.get(chunk_id_key)
@@ -432,37 +466,48 @@ def search_chunks(req: SearchRequest):
return results return results
except HTTPException:
raise
except Exception as e:
logger.exception("search-chunks failure")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/search-tags") @app.post("/search-tags")
def search_tags(req: SearchRequest): def search_tags(req: SearchRequest):
if tag_index is None or tag_ids is None or model is None: if tag_index is None or tag_ids is None or model is None:
raise HTTPException(status_code=503, detail="Tag index not available") raise HTTPException(status_code=503, detail="Tag index not available")
try:
limit = _sanitize_limit(req.limit, default=8, max_limit=200) limit = _sanitize_limit(req.limit, default=8, max_limit=200)
query = (req.query or "").strip() query = (req.query or "").strip()
if not query: if not query:
raise HTTPException(status_code=400, detail="query must not be empty") raise HTTPException(status_code=400, detail="query must not be empty")
query_vec = model.encode( query_vec = model.encode([f"query: {query}"], normalize_embeddings=True)
[f"query: {query}"],
normalize_embeddings=True
)
query_vec = np.array(query_vec).astype("float32") query_vec = np.array(query_vec).astype("float32")
if query_vec.ndim != 2:
raise RuntimeError(f"Invalid embedding shape: {query_vec.shape}")
if query_vec.shape[1] != tag_index.d:
raise RuntimeError(f"Embedding dimension mismatch (vec={query_vec.shape[1]}, index={tag_index.d})")
scores, indices = tag_index.search(query_vec, limit) scores, indices = tag_index.search(query_vec, limit)
results = [] results = []
for score, idx in zip(scores[0], indices[0]): for score, idx in zip(scores[0], indices[0]):
if idx == -1: if idx == -1:
continue continue
if idx < 0 or idx >= len(tag_ids): if idx < 0 or idx >= len(tag_ids):
continue continue
results.append({"tag_id": tag_ids[idx], "score": float(score)})
results.append({
"tag_id": tag_ids[idx],
"score": float(score),
})
return results return results
except HTTPException:
raise
except Exception as e:
logger.exception("search-tags failure")
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -0,0 +1,118 @@
<?php
declare(strict_types=1);
namespace App\Catalog;
use App\Tag\TagVectorSearchClient;
use Doctrine\DBAL\Connection;
use Symfony\Component\Uid\Uuid;
/**
* EntityCatalogService
*
* Deterministische Katalog-Listen auf Basis eines Entity-Terms:
* - TagVectorSearch (Score-Gate + Ambiguity-Check)
* - DB Query auf document_tag + document (ACTIVE)
* - Rückgabe als EIN Textblock (string) oder null (Fallback auf normalen Retrieval)
*/
final class EntityCatalogService
{
private const MIN_SCORE = 0.55;
private const AMBIGUITY_DELTA = 0.05;
public function __construct(
private readonly TagVectorSearchClient $tagVectorClient,
private readonly Connection $connection,
) {}
/**
* @return string|null Textblock oder null (wenn kein sicherer Catalog möglich ist)
*/
public function listByTerm(string $entityTerm): ?string
{
$entityTerm = trim($entityTerm);
if ($entityTerm === '') {
return null;
}
// 1) Tag-Vektorsuche (Top 3 für Ambiguity-Prüfung)
$hits = $this->tagVectorClient->search($entityTerm, 3);
if ($hits === []) {
return null;
}
$best = $hits[0];
$bestScore = isset($best['score']) ? (float)$best['score'] : 0.0;
if ($bestScore < self::MIN_SCORE) {
return null;
}
// 2) Ambiguity: wenn Top2 zu nah ist → konservativ abbrechen
if (isset($hits[1])) {
$secondScore = isset($hits[1]['score']) ? (float)$hits[1]['score'] : 0.0;
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
return null;
}
}
$tagHex = (string)($best['tag_id'] ?? '');
if ($tagHex === '') {
return null;
}
// 3) DB Query: alle ACTIVE Dokumente zu diesem Tag
$rows = $this->connection->fetchAllAssociative(
'
SELECT d.title
FROM document d
INNER JOIN document_tag dt ON dt.document_id = d.id
WHERE dt.tag_id = :tagId
AND d.status = :status
ORDER BY d.title ASC
',
[
'tagId' => Uuid::fromString($tagHex)->toBinary(),
'status' => 'ACTIVE',
]
);
if ($rows === []) {
return null;
}
$titles = [];
foreach ($rows as $row) {
$t = trim((string)($row['title'] ?? ''));
if ($t !== '') {
$titles[] = $t;
}
}
if ($titles === []) {
return null;
}
return $this->buildTextBlock($entityTerm, $titles);
}
private function buildTextBlock(string $entityTerm, array $titles): string
{
$headline = match ($entityTerm) {
'geräte' => 'Folgende Geräte sind verfügbar:',
'indikatoren' => 'Folgende Indikatoren sind verfügbar:',
'funktionen' => 'Folgende Funktionen sind verfügbar:',
'zubehör' => 'Folgendes Zubehör ist verfügbar:',
default => 'Folgende Einträge sind verfügbar:',
};
$lines = [];
foreach ($titles as $title) {
$lines[] = '- ' . $title;
}
return $headline . "\n\n" . implode("\n", $lines);
}
}

View File

@@ -0,0 +1,138 @@
<?php
declare(strict_types=1);
namespace App\Intent;
/**
* CatalogIntentLite
*
* Minimal, deterministische Erkennung von Katalog-/Entity-Listenanfragen.
*
* Ziel:
* - "Liste aller Geräte" / "Welche Indikatoren gibt es?" / "Zeig mir alle Funktionen"
*
* Guardrails:
* - Kein Catalog-Mode bei Sales-/Pricing-/Comparison-/ROI-/Implementation-/Objection-Intents.
* - Kein Catalog-Mode ohne expliziten Entity-Term.
*
* WICHTIG:
* - Immer mit ORIGINAL-Prompt aufrufen.
* - Kein LLM, kein ML.
*/
final class CatalogIntentLite
{
/**
* Listensignale (leichtgewichtig) IntentLite bleibt weiterhin für "allgemeine" List Detection zuständig.
*/
private const LIST_SIGNALS = [
'liste',
'auflisten',
'aufzaehl',
'aufzähl',
'übersicht',
'uebersicht',
'welche gibt es',
'welche sind',
'zeig mir alle',
'zeige mir alle',
'alle',
];
/**
* Entity-Terms, die wir als Katalogtypen unterstützen.
*
* Left side: canonical term (für Tag-Suche)
* Right side: Such-Synonyme, die im Prompt vorkommen dürfen.
*/
private const ENTITY_TERMS = [
'geräte' => ['gerät', 'geräte', 'geraet', 'geraete', 'device', 'devices'],
'indikatoren' => ['indikator', 'indikatoren', 'indicator', 'indicators'],
'funktionen' => ['funktion', 'funktionen', 'feature', 'features', 'funktionalität', 'funktionalitaet'],
'zubehör' => ['zubehör', 'zubehoer', 'accessory', 'accessories', 'zubehor'],
];
public function __construct(
private readonly SalesIntentLite $salesIntentLite,
) {}
/**
* @return string|null canonical entity term (z. B. "geräte") oder null wenn kein Catalog-Intent.
*/
public function detect(string $originalPrompt): ?string
{
$p = $this->normalize($originalPrompt);
// 1) Muss ein Listen-Signal enthalten
if (!$this->containsAny($p, self::LIST_SIGNALS)) {
return null;
}
// 2) Guardrail: Kein Catalog-Mode bei Sales-Intents
$sales = $this->salesIntentLite->detect($originalPrompt);
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
if ($intent !== SalesIntentLite::DISCOVERY) {
return null;
}
// 3) Expliziten Entity-Term extrahieren (sonst kein Catalog)
foreach (self::ENTITY_TERMS as $canonical => $synonyms) {
foreach ($synonyms as $syn) {
if ($this->containsWord($p, $syn)) {
return $canonical;
}
}
}
return null;
}
// ------------------------------------------------------------
// Helpers
// ------------------------------------------------------------
private function containsAny(string $haystack, array $needles): bool
{
foreach ($needles as $needle) {
if ($needle === '') {
continue;
}
if (str_contains($haystack, $needle)) {
return true;
}
}
return false;
}
private function containsWord(string $haystack, string $word): bool
{
$word = trim($word);
if ($word === '') {
return false;
}
return preg_match('/\b' . preg_quote($word, '/') . '\b/u', $haystack) === 1;
}
private function normalize(string $s): string
{
$s = mb_strtolower($s);
// Umlaute absichern (analog IntentLite/SalesIntentLite)
$replacements = [
'ä' => 'ae',
'ö' => 'oe',
'ü' => 'ue',
'ß' => 'ss',
];
foreach ($replacements as $umlaut => $alt) {
if (str_contains($s, $umlaut)) {
$s .= ' ' . str_replace($umlaut, $alt, $s);
break;
}
}
return $s;
}
}

View File

@@ -4,7 +4,9 @@ declare(strict_types=1);
namespace App\Knowledge\Retrieval; namespace App\Knowledge\Retrieval;
use App\Catalog\EntityCatalogService;
use App\Entity\ModelGenerationConfig; use App\Entity\ModelGenerationConfig;
use App\Intent\CatalogIntentLite;
use App\Intent\IntentLite; use App\Intent\IntentLite;
use App\Intent\SalesIntentLite; use App\Intent\SalesIntentLite;
use App\Knowledge\QueryCleaner; use App\Knowledge\QueryCleaner;
@@ -32,7 +34,9 @@ final class NdjsonHybridRetriever implements RetrieverInterface
private readonly ModelGenerationConfigRepository $configRepository, private readonly ModelGenerationConfigRepository $configRepository,
private readonly QueryCleaner $queryCleaner, private readonly QueryCleaner $queryCleaner,
private readonly IntentLite $intentLite, private readonly IntentLite $intentLite,
private readonly SalesIntentLite $salesIntentLite private readonly SalesIntentLite $salesIntentLite,
private readonly CatalogIntentLite $catalogIntent,
private readonly EntityCatalogService $entityCatalogService
) )
{ {
} }
@@ -54,6 +58,17 @@ final class NdjsonHybridRetriever implements RetrieverInterface
public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array
{ {
// 🔵 ENTITY CATALOG EARLY EXIT (jetzt auch im Admin-Test aktiv)
$entityTerm = $this->catalogIntent->detect($prompt);
if ($entityTerm !== null) {
$catalogBlock = $this->entityCatalogService->listByTerm($entityTerm);
if ($catalogBlock !== null) {
return [$catalogBlock];
}
}
$core = $this->runCore($prompt, $config, false); $core = $this->runCore($prompt, $config, false);
if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) { if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
@@ -111,8 +126,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return []; return [];
} }
// 1) Production-like selection: wir selektieren Texte,
// aber in Debug brauchen wir die ChunkIds dazu.
$selectedChunkIds = $core['is_list_query'] $selectedChunkIds = $core['is_list_query']
? $this->selectChunkIdsListMode($core['ranked_chunk_ids'], $core['rows'], $core['limit']) ? $this->selectChunkIdsListMode($core['ranked_chunk_ids'], $core['rows'], $core['limit'])
: $this->selectChunkIdsSalesMode($core['ranked_chunk_ids'], $core['rows'], $core['limit']); : $this->selectChunkIdsSalesMode($core['ranked_chunk_ids'], $core['rows'], $core['limit']);
@@ -121,7 +134,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return []; return [];
} }
// 2) Ausgabe inklusive Scores
$out = []; $out = [];
$rank = 0; $rank = 0;
@@ -179,7 +191,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$cleanQuery = $prompt; $cleanQuery = $prompt;
} }
// Intent-based adjustments (identisch zur Produktionslogik)
$threshold = self::VECTOR_SCORE_THRESHOLD; $threshold = self::VECTOR_SCORE_THRESHOLD;
$topK = $vectorTopKBase; $topK = $vectorTopKBase;
@@ -216,7 +227,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$topK = max(1, min($topK, self::HARD_MAX_VECTORK)); $topK = max(1, min($topK, self::HARD_MAX_VECTORK));
// Tag routing (identisch)
$candidateDocIds = $this->tagRouting->route($cleanQuery); $candidateDocIds = $this->tagRouting->route($cleanQuery);
$candidateSet = null; $candidateSet = null;
@@ -224,7 +234,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$candidateSet = array_fill_keys($candidateDocIds, true); $candidateSet = array_fill_keys($candidateDocIds, true);
} }
// Dual search (identisch)
$globalHits = $this->vectorClient->search($cleanQuery, $topK); $globalHits = $this->vectorClient->search($cleanQuery, $topK);
$scopedHits = []; $scopedHits = [];
@@ -249,7 +258,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$rrfScores = []; $rrfScores = [];
$rawScores = []; $rawScores = [];
// RRF (identisch) + optional raw capture
$this->applyRrfWithOptionalRaw($globalHits, $rrfScores, $rawScores, $threshold, false, $withScores); $this->applyRrfWithOptionalRaw($globalHits, $rrfScores, $rawScores, $threshold, false, $withScores);
$this->applyRrfWithOptionalRaw( $this->applyRrfWithOptionalRaw(
$scopedHits, $scopedHits,
@@ -292,13 +300,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
]; ];
} }
/**
* Gleiche Logik wie applyRrf(), aber optional mit raw-score capture.
*
* @param array<int, array{chunk_id:string, score:float}> $hits
* @param array<string,float> $rrfScores
* @param array<string,float> $rawScores
*/
private function applyRrfWithOptionalRaw( private function applyRrfWithOptionalRaw(
array $hits, array $hits,
array &$rrfScores, array &$rrfScores,
@@ -322,7 +323,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$chunkId = (string)$hit['chunk_id']; $chunkId = (string)$hit['chunk_id'];
if ($captureRaw) { if ($captureRaw) {
// wenn global+scoped vorkommt: bestes raw behalten
if (!isset($rawScores[$chunkId]) || $raw > $rawScores[$chunkId]) { if (!isset($rawScores[$chunkId]) || $raw > $rawScores[$chunkId]) {
$rawScores[$chunkId] = $raw; $rawScores[$chunkId] = $raw;
} }
@@ -343,15 +343,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
} }
} }
// =========================================================
// DEBUG SELECTION HELPERS (identisch zu Produktionsregeln)
// =========================================================
/**
* List-Mode nutzt exakt collectTexts() Regeln, aber gibt ChunkIds zurück.
*
* @return string[]
*/
private function selectChunkIdsListMode(array $chunkIds, array $rows, int $limit): array private function selectChunkIdsListMode(array $chunkIds, array $rows, int $limit): array
{ {
$seen = []; $seen = [];
@@ -384,11 +375,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return $out; return $out;
} }
/**
* Normal-Mode nutzt exakt collectSalesOptimized() Regeln, aber gibt ChunkIds zurück.
*
* @return string[]
*/
private function selectChunkIdsSalesMode(array $chunkIds, array $rows, int $limit): array private function selectChunkIdsSalesMode(array $chunkIds, array $rows, int $limit): array
{ {
$out = []; $out = [];
@@ -437,10 +423,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return $out; return $out;
} }
// =========================================================
// ORIGINAL METHODS (UNVERÄNDERT)
// =========================================================
private function applyRrf(array $hits, array &$rrfScores, float $threshold, bool $boost = false): void private function applyRrf(array $hits, array &$rrfScores, float $threshold, bool $boost = false): void
{ {
$rank = 0; $rank = 0;

View File

@@ -58,6 +58,7 @@ final readonly class TagVectorSearchClient
'Tag vector service returned non-200', 'Tag vector service returned non-200',
['status' => $response->getStatusCode()] ['status' => $response->getStatusCode()]
); );
return []; return [];
} }
@@ -68,6 +69,7 @@ final readonly class TagVectorSearchClient
'Tag vector service unreachable', 'Tag vector service unreachable',
['error' => $e->getMessage()] ['error' => $e->getMessage()]
); );
return []; return [];
} }