add catalog mode

This commit is contained in:
team2
2026-02-28 13:51:54 +01:00
parent 47a3c9cca2
commit d3294464ea
7 changed files with 484 additions and 201 deletions

View File

@@ -31,7 +31,7 @@ parameters:
mto.index.chunk_size: 800
mto.index.chunk_overlap: 100
mto.index.embedding_model: 'all-MiniLM-L6-v2'
mto.index.embedding_model: 'intfloat/multilingual-e5-base'
mto.index.embedding_dimension: 768
mto.index.scoring_version: 1

View File

@@ -5,24 +5,19 @@ import json
from pathlib import Path
# ---------------------------------------------------------
# Positional args (aligned with PHP builder exec call)
# ---------------------------------------------------------
# Positional args
# 1 tags.ndjson
# 2 out_index_path (can be .tmp)
# 3 model
# Example:
# python vector_ingest_tags.py /var/knowledge/tags.ndjson /var/knowledge/vector_tags.index.tmp all-MiniLM-L6-v2
# ---------------------------------------------------------
if len(sys.argv) < 4:
print("ERROR: usage: vector_ingest_tags.py <tags.ndjson> <out.index> <model>", file=sys.stderr)
if len(sys.argv) < 3:
print("ERROR: usage: vector_ingest_tags.py <tags.ndjson> <out.index>", file=sys.stderr)
sys.exit(2)
tags_path = Path(sys.argv[1]).resolve()
out_path = Path(sys.argv[2]).resolve()
model_name = sys.argv[3]
meta_path = Path(str(out_path) + ".meta.json") # vector_tags.index(.tmp).meta.json
meta_path = Path(str(out_path) + ".meta.json")
# ---------------------------------------------------------
# Dependency checks
@@ -43,6 +38,25 @@ import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
# ---------------------------------------------------------
# Load embedding model from index_meta.json (Single Source of Truth)
# ---------------------------------------------------------
BASE_PATH = Path(__file__).resolve().parents[2]
INDEX_META_PATH = BASE_PATH / "var" / "knowledge" / "index_meta.json"
if not INDEX_META_PATH.exists():
print("ERROR: index_meta.json not found", file=sys.stderr)
sys.exit(30)
meta = json.loads(INDEX_META_PATH.read_text(encoding="utf-8"))
embedding_model = meta.get("embedding_model")
if not embedding_model:
print("ERROR: embedding_model missing in index_meta.json", file=sys.stderr)
sys.exit(31)
model = SentenceTransformer(embedding_model)
# ---------------------------------------------------------
# File checks
# ---------------------------------------------------------
@@ -50,14 +64,8 @@ if not tags_path.is_file():
print(f"ERROR: tags.ndjson not found at {tags_path}", file=sys.stderr)
sys.exit(20)
# Ensure output directory exists
out_path.parent.mkdir(parents=True, exist_ok=True)
# ---------------------------------------------------------
# Load model
# ---------------------------------------------------------
model = SentenceTransformer(model_name)
# ---------------------------------------------------------
# Streaming read NDJSON
# ---------------------------------------------------------
@@ -85,13 +93,9 @@ with open(tags_path, "r", encoding="utf-8") as f:
if len(text) > 4000:
text = text[:4000]
# -------------------------------------------------
# E5 requires "passage:" prefix for indexed texts
# -------------------------------------------------
texts.append(f"passage: {text}")
ids.append(str(tag_id))
# If empty: remove outputs (tmp) and exit success
if not texts:
if out_path.exists():
out_path.unlink()
@@ -112,17 +116,11 @@ embeddings = model.encode(
embeddings = np.array(embeddings).astype("float32")
dim = embeddings.shape[1]
# ---------------------------------------------------------
# Build FAISS index
# ---------------------------------------------------------
index = faiss.IndexFlatIP(dim)
index.add(embeddings)
faiss.write_index(index, str(out_path))
# ---------------------------------------------------------
# Write ID mapping meta
# ---------------------------------------------------------
with open(meta_path, "w", encoding="utf-8") as f:
json.dump(ids, f)

View File

@@ -10,11 +10,19 @@ from typing import Any, List, Optional, Dict
import numpy as np
import faiss
from fastapi import FastAPI, HTTPException
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
# ============================================================
# Service Stamp (to verify you are running THIS file)
# ============================================================
SERVICE_STAMP = "vector_service.py@2026-02-28T10:20+01:00"
# ============================================================
# Paths
# ============================================================
@@ -42,6 +50,7 @@ INDEX_NDJSON_PATH = KNOWLEDGE_DIR / "index.ndjson"
logger = logging.getLogger("vector_service")
logger.setLevel(logging.INFO)
def setup_logging() -> None:
LOG_DIR.mkdir(parents=True, exist_ok=True)
@@ -68,6 +77,23 @@ def setup_logging() -> None:
if not any(isinstance(h, logging.StreamHandler) for h in logger.handlers):
logger.addHandler(stream_handler)
# Capture uvicorn logs in the same file as well (critical for hidden 500s)
uvicorn_error = logging.getLogger("uvicorn.error")
uvicorn_access = logging.getLogger("uvicorn.access")
uvicorn_error.setLevel(logging.INFO)
uvicorn_access.setLevel(logging.INFO)
if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_error.handlers):
uvicorn_error.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in uvicorn_error.handlers):
uvicorn_error.addHandler(stream_handler)
if not any(isinstance(h, RotatingFileHandler) for h in uvicorn_access.handlers):
uvicorn_access.addHandler(file_handler)
if not any(isinstance(h, logging.StreamHandler) for h in uvicorn_access.handlers):
uvicorn_access.addHandler(stream_handler)
# ============================================================
# FastAPI
@@ -79,9 +105,6 @@ model: Optional[SentenceTransformer] = None
chunk_index = None
chunk_ids: Optional[List[Any]] = None
# Sales-RAG signals derived from NDJSON (loaded on startup and reload):
# - chunk_doc_map: chunk_id -> document_id
# - chunk_pos_map: chunk_id -> chunk_index (position within document, if available)
chunk_doc_map: Dict[str, str] = {}
chunk_pos_map: Dict[str, int] = {}
@@ -89,7 +112,6 @@ tag_index = None
tag_ids: Optional[List[Any]] = None
loaded_embedding_model_name: Optional[str] = None
current_index_version: Optional[int] = None
current_runtime_stamp: Optional[str] = None
@@ -107,10 +129,10 @@ class SearchRequest(BaseModel):
# ============================================================
# Loader
# Loader Helpers
# ============================================================
def _safe_read_json(path: Path) -> Optional[dict]:
def _safe_read_json(path: Path) -> Optional[Any]:
try:
if not path.exists():
return None
@@ -121,9 +143,6 @@ def _safe_read_json(path: Path) -> Optional[dict]:
def _as_key(value: Any) -> Optional[str]:
"""
Normalize IDs to string keys for maps. Returns None if unusable.
"""
if value is None:
return None
if isinstance(value, str):
@@ -136,12 +155,19 @@ def _as_key(value: Any) -> Optional[str]:
return None
def _sanitize_limit(limit: int, default: int = 8, max_limit: int = 200) -> int:
try:
v = int(limit)
except Exception:
return default
if v <= 0:
return default
if v > max_limit:
return max_limit
return v
def load_chunk_maps_from_ndjson() -> None:
"""
Builds two maps from index.ndjson:
- chunk_id -> document_id
- chunk_id -> chunk_index (position inside document, if present)
"""
global chunk_doc_map, chunk_pos_map
chunk_doc_map = {}
@@ -156,7 +182,6 @@ def load_chunk_maps_from_ndjson() -> None:
line = line.strip()
if not line:
continue
try:
row = json.loads(line)
except Exception:
@@ -166,40 +191,43 @@ def load_chunk_maps_from_ndjson() -> None:
if not chunk_id_key:
continue
document_id = row.get("document_id")
doc_id_key = _as_key(document_id)
doc_id_key = _as_key(row.get("document_id"))
if doc_id_key:
chunk_doc_map[chunk_id_key] = doc_id_key
# chunk_index is optional but very useful for Sales-RAG diversity rules
# (e.g. min distance within a doc)
ci = row.get("chunk_index")
if isinstance(ci, int):
chunk_pos_map[chunk_id_key] = ci
else:
# tolerate numeric strings
if isinstance(ci, str):
s = ci.strip()
if s.isdigit():
try:
chunk_pos_map[chunk_id_key] = int(s)
except Exception:
pass
elif isinstance(ci, str):
s = ci.strip()
if s.isdigit():
try:
chunk_pos_map[chunk_id_key] = int(s)
except Exception:
pass
except Exception as e:
logger.warning("Failed to load chunk maps from ndjson: %s", str(e))
def _sanitize_limit(limit: int, default: int = 8, max_limit: int = 200) -> int:
try:
v = int(limit)
except Exception:
return default
if v <= 0:
return default
if v > max_limit:
return max_limit
return v
def _normalize_meta_list(value: Any) -> Optional[List[Any]]:
"""
Accepts:
- list: ok
- dict like {"0": "...", "1": "..."}: convert to list sorted by numeric key
Returns None if invalid.
"""
if isinstance(value, list):
return value
if isinstance(value, dict):
try:
keys = sorted(int(k) for k in value.keys())
return [value[str(i)] for i in keys]
except Exception:
return None
return None
def load_all() -> None:
@@ -225,13 +253,14 @@ def load_all() -> None:
model = SentenceTransformer(embedding_model_name)
loaded_embedding_model_name = embedding_model_name
# Chunks
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
logger.info("[Reload] Loading chunk index")
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
chunk_ids = _safe_read_json(CHUNK_MAP_PATH) or None
if not isinstance(chunk_ids, list):
raw = _safe_read_json(CHUNK_MAP_PATH)
chunk_ids = _normalize_meta_list(raw)
if chunk_ids is None:
chunk_index = None
chunk_ids = None
logger.warning("[Reload] chunk_ids meta invalid -> chunk index disabled")
else:
chunk_index = None
@@ -240,13 +269,14 @@ def load_all() -> None:
logger.info("[Reload] Loading chunk maps (doc_id + chunk_index)")
load_chunk_maps_from_ndjson()
# Tags
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
logger.info("[Reload] Loading tag index")
tag_index = faiss.read_index(str(TAG_INDEX_PATH))
tag_ids = _safe_read_json(TAG_MAP_PATH) or None
if not isinstance(tag_ids, list):
raw = _safe_read_json(TAG_MAP_PATH)
tag_ids = _normalize_meta_list(raw)
if tag_ids is None:
tag_index = None
tag_ids = None
logger.warning("[Reload] tag_ids meta invalid -> tag index disabled")
else:
tag_index = None
@@ -262,15 +292,17 @@ def load_all() -> None:
current_index_version = index_version if isinstance(index_version, int) else None
logger.info(
"[Reload] Completed (index_version=%s runtime=%s embedding_model=%s)",
"[Reload] Completed (index_version=%s runtime=%s embedding_model=%s stamp=%s file=%s)",
str(current_index_version),
str(current_runtime_stamp),
str(loaded_embedding_model_name),
SERVICE_STAMP,
str(Path(__file__).resolve()),
)
# ============================================================
# Observer (Enterprise Auto Reload)
# Observer
# ============================================================
def observer_loop() -> None:
@@ -294,24 +326,34 @@ def observer_loop() -> None:
new_runtime = v if isinstance(v, str) else None
if new_version != current_index_version:
logger.info(
"[Observer] index_version changed (%s -> %s) -> Reload",
str(current_index_version),
str(new_version),
)
logger.info("[Observer] index_version changed (%s -> %s) -> Reload", str(current_index_version), str(new_version))
load_all()
continue
if new_runtime != current_runtime_stamp:
logger.info(
"[Observer] runtime changed (%s -> %s) -> Reload",
str(current_runtime_stamp),
str(new_runtime),
)
logger.info("[Observer] runtime changed (%s -> %s) -> Reload", str(current_runtime_stamp), str(new_runtime))
load_all()
except Exception as e:
logger.error("[Observer ERROR] %s", str(e))
logger.exception("[Observer ERROR] %s", str(e))
# ============================================================
# Global Exception Handler (forces JSON + logs)
# ============================================================
@app.exception_handler(Exception)
async def unhandled_exception_handler(request: Request, exc: Exception):
logger.exception("UNHANDLED_EXCEPTION path=%s method=%s", request.url.path, request.method)
return JSONResponse(
status_code=500,
content={
"error": "Internal Server Error",
"detail": str(exc),
"path": request.url.path,
"stamp": SERVICE_STAMP,
},
)
# ============================================================
@@ -321,13 +363,10 @@ def observer_loop() -> None:
@app.on_event("startup")
def startup_event():
setup_logging()
logger.info("[VectorService] Startup")
logger.info("[VectorService] Startup stamp=%s file=%s", SERVICE_STAMP, str(Path(__file__).resolve()))
load_all()
t = threading.Thread(target=observer_loop, daemon=True)
t.start()
logger.info("[VectorService] Ready (log=%s)", str(LOG_FILE))
@@ -339,12 +378,18 @@ def startup_event():
def health():
return {
"status": "ok",
"stamp": SERVICE_STAMP,
"file": str(Path(__file__).resolve()),
"chunk_index_loaded": chunk_index is not None,
"tag_index_loaded": tag_index is not None,
"model_loaded": model is not None,
"embedding_model": loaded_embedding_model_name,
"index_version": current_index_version,
"runtime_stamp": current_runtime_stamp,
"tag_meta_type": type(tag_ids).__name__ if tag_ids is not None else None,
"tag_meta_len": len(tag_ids) if isinstance(tag_ids, list) else None,
"chunk_meta_type": type(chunk_ids).__name__ if chunk_ids is not None else None,
"chunk_meta_len": len(chunk_ids) if isinstance(chunk_ids, list) else None,
"log_file": str(LOG_FILE),
}
@@ -353,8 +398,9 @@ def health():
def reload():
try:
load_all()
return {"status": "reloaded"}
return {"status": "reloaded", "stamp": SERVICE_STAMP}
except Exception as e:
logger.exception("reload failed")
raise HTTPException(status_code=500, detail=str(e))
@@ -363,74 +409,68 @@ def search_chunks(req: SearchRequest):
if chunk_index is None or chunk_ids is None or model is None:
raise HTTPException(status_code=503, detail="Chunk index not available")
# Safety: clamp limit to prevent abuse / accidental huge queries
limit = _sanitize_limit(req.limit, default=8, max_limit=200)
try:
limit = _sanitize_limit(req.limit, default=8, max_limit=200)
query = (req.query or "").strip()
if not query:
raise HTTPException(status_code=400, detail="query must not be empty")
query = (req.query or "").strip()
if not query:
raise HTTPException(status_code=400, detail="query must not be empty")
query_vec = model.encode(
[f"query: {query}"],
normalize_embeddings=True
)
query_vec = np.array(query_vec).astype("float32")
query_vec = model.encode([f"query: {query}"], normalize_embeddings=True)
query_vec = np.array(query_vec).astype("float32")
effective_limit = limit
doc_filter: Optional[List[str]] = None
if req.doc_ids:
# Normalize incoming doc_ids for reliable matching
doc_filter = []
for d in req.doc_ids:
dk = _as_key(d)
if dk:
doc_filter.append(dk)
effective_limit = limit
doc_filter: Optional[List[str]] = None
if req.doc_ids:
doc_filter = []
for d in req.doc_ids:
dk = _as_key(d)
if dk:
doc_filter.append(dk)
effective_limit = max(limit * 5, 50)
effective_limit = min(effective_limit, 500)
# When doc filtering is enabled, we fetch a wider pool and filter down.
# Keep it bounded to avoid expensive scans on huge indices.
effective_limit = max(limit * 5, 50)
effective_limit = min(effective_limit, 500)
scores, indices = chunk_index.search(query_vec, effective_limit)
scores, indices = chunk_index.search(query_vec, effective_limit)
results = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
if idx < 0 or idx >= len(chunk_ids):
continue
raw_chunk_id = chunk_ids[idx]
chunk_id_key = _as_key(raw_chunk_id)
if not chunk_id_key:
continue
# Apply doc filter if requested
doc_id = chunk_doc_map.get(chunk_id_key)
if doc_filter is not None:
if doc_id is None or doc_id not in doc_filter:
results = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
if idx < 0 or idx >= len(chunk_ids):
continue
# Sales-RAG signals:
# - document_id (for doc quotas / diversity rules)
# - chunk_index (position within doc for distance constraints)
payload = {
"chunk_id": raw_chunk_id,
"score": float(score),
"document_id": doc_id, # may be None if ndjson missing/partial
}
raw_chunk_id = chunk_ids[idx]
chunk_id_key = _as_key(raw_chunk_id)
if not chunk_id_key:
continue
ci = chunk_pos_map.get(chunk_id_key)
if isinstance(ci, int):
payload["chunk_index"] = ci
doc_id = chunk_doc_map.get(chunk_id_key)
if doc_filter is not None:
if doc_id is None or doc_id not in doc_filter:
continue
results.append(payload)
payload = {
"chunk_id": raw_chunk_id,
"score": float(score),
"document_id": doc_id,
}
if len(results) >= limit:
break
ci = chunk_pos_map.get(chunk_id_key)
if isinstance(ci, int):
payload["chunk_index"] = ci
return results
results.append(payload)
if len(results) >= limit:
break
return results
except HTTPException:
raise
except Exception as e:
logger.exception("search-chunks failure")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/search-tags")
@@ -438,31 +478,36 @@ def search_tags(req: SearchRequest):
if tag_index is None or tag_ids is None or model is None:
raise HTTPException(status_code=503, detail="Tag index not available")
limit = _sanitize_limit(req.limit, default=8, max_limit=200)
try:
limit = _sanitize_limit(req.limit, default=8, max_limit=200)
query = (req.query or "").strip()
if not query:
raise HTTPException(status_code=400, detail="query must not be empty")
query = (req.query or "").strip()
if not query:
raise HTTPException(status_code=400, detail="query must not be empty")
query_vec = model.encode(
[f"query: {query}"],
normalize_embeddings=True
)
query_vec = np.array(query_vec).astype("float32")
query_vec = model.encode([f"query: {query}"], normalize_embeddings=True)
query_vec = np.array(query_vec).astype("float32")
scores, indices = tag_index.search(query_vec, limit)
if query_vec.ndim != 2:
raise RuntimeError(f"Invalid embedding shape: {query_vec.shape}")
results = []
if query_vec.shape[1] != tag_index.d:
raise RuntimeError(f"Embedding dimension mismatch (vec={query_vec.shape[1]}, index={tag_index.d})")
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
if idx < 0 or idx >= len(tag_ids):
continue
scores, indices = tag_index.search(query_vec, limit)
results.append({
"tag_id": tag_ids[idx],
"score": float(score),
})
results = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
if idx < 0 or idx >= len(tag_ids):
continue
results.append({"tag_id": tag_ids[idx], "score": float(score)})
return results
return results
except HTTPException:
raise
except Exception as e:
logger.exception("search-tags failure")
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -0,0 +1,118 @@
<?php
declare(strict_types=1);
namespace App\Catalog;
use App\Tag\TagVectorSearchClient;
use Doctrine\DBAL\Connection;
use Symfony\Component\Uid\Uuid;
/**
* EntityCatalogService
*
* Deterministische Katalog-Listen auf Basis eines Entity-Terms:
* - TagVectorSearch (Score-Gate + Ambiguity-Check)
* - DB Query auf document_tag + document (ACTIVE)
* - Rückgabe als EIN Textblock (string) oder null (Fallback auf normalen Retrieval)
*/
final class EntityCatalogService
{
private const MIN_SCORE = 0.55;
private const AMBIGUITY_DELTA = 0.05;
public function __construct(
private readonly TagVectorSearchClient $tagVectorClient,
private readonly Connection $connection,
) {}
/**
* @return string|null Textblock oder null (wenn kein sicherer Catalog möglich ist)
*/
public function listByTerm(string $entityTerm): ?string
{
$entityTerm = trim($entityTerm);
if ($entityTerm === '') {
return null;
}
// 1) Tag-Vektorsuche (Top 3 für Ambiguity-Prüfung)
$hits = $this->tagVectorClient->search($entityTerm, 3);
if ($hits === []) {
return null;
}
$best = $hits[0];
$bestScore = isset($best['score']) ? (float)$best['score'] : 0.0;
if ($bestScore < self::MIN_SCORE) {
return null;
}
// 2) Ambiguity: wenn Top2 zu nah ist → konservativ abbrechen
if (isset($hits[1])) {
$secondScore = isset($hits[1]['score']) ? (float)$hits[1]['score'] : 0.0;
if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) {
return null;
}
}
$tagHex = (string)($best['tag_id'] ?? '');
if ($tagHex === '') {
return null;
}
// 3) DB Query: alle ACTIVE Dokumente zu diesem Tag
$rows = $this->connection->fetchAllAssociative(
'
SELECT d.title
FROM document d
INNER JOIN document_tag dt ON dt.document_id = d.id
WHERE dt.tag_id = :tagId
AND d.status = :status
ORDER BY d.title ASC
',
[
'tagId' => Uuid::fromString($tagHex)->toBinary(),
'status' => 'ACTIVE',
]
);
if ($rows === []) {
return null;
}
$titles = [];
foreach ($rows as $row) {
$t = trim((string)($row['title'] ?? ''));
if ($t !== '') {
$titles[] = $t;
}
}
if ($titles === []) {
return null;
}
return $this->buildTextBlock($entityTerm, $titles);
}
private function buildTextBlock(string $entityTerm, array $titles): string
{
$headline = match ($entityTerm) {
'geräte' => 'Folgende Geräte sind verfügbar:',
'indikatoren' => 'Folgende Indikatoren sind verfügbar:',
'funktionen' => 'Folgende Funktionen sind verfügbar:',
'zubehör' => 'Folgendes Zubehör ist verfügbar:',
default => 'Folgende Einträge sind verfügbar:',
};
$lines = [];
foreach ($titles as $title) {
$lines[] = '- ' . $title;
}
return $headline . "\n\n" . implode("\n", $lines);
}
}

View File

@@ -0,0 +1,138 @@
<?php
declare(strict_types=1);
namespace App\Intent;
/**
* CatalogIntentLite
*
* Minimal, deterministische Erkennung von Katalog-/Entity-Listenanfragen.
*
* Ziel:
* - "Liste aller Geräte" / "Welche Indikatoren gibt es?" / "Zeig mir alle Funktionen"
*
* Guardrails:
* - Kein Catalog-Mode bei Sales-/Pricing-/Comparison-/ROI-/Implementation-/Objection-Intents.
* - Kein Catalog-Mode ohne expliziten Entity-Term.
*
* WICHTIG:
* - Immer mit ORIGINAL-Prompt aufrufen.
* - Kein LLM, kein ML.
*/
final class CatalogIntentLite
{
/**
* Listensignale (leichtgewichtig) IntentLite bleibt weiterhin für "allgemeine" List Detection zuständig.
*/
private const LIST_SIGNALS = [
'liste',
'auflisten',
'aufzaehl',
'aufzähl',
'übersicht',
'uebersicht',
'welche gibt es',
'welche sind',
'zeig mir alle',
'zeige mir alle',
'alle',
];
/**
* Entity-Terms, die wir als Katalogtypen unterstützen.
*
* Left side: canonical term (für Tag-Suche)
* Right side: Such-Synonyme, die im Prompt vorkommen dürfen.
*/
private const ENTITY_TERMS = [
'geräte' => ['gerät', 'geräte', 'geraet', 'geraete', 'device', 'devices'],
'indikatoren' => ['indikator', 'indikatoren', 'indicator', 'indicators'],
'funktionen' => ['funktion', 'funktionen', 'feature', 'features', 'funktionalität', 'funktionalitaet'],
'zubehör' => ['zubehör', 'zubehoer', 'accessory', 'accessories', 'zubehor'],
];
public function __construct(
private readonly SalesIntentLite $salesIntentLite,
) {}
/**
* @return string|null canonical entity term (z. B. "geräte") oder null wenn kein Catalog-Intent.
*/
public function detect(string $originalPrompt): ?string
{
$p = $this->normalize($originalPrompt);
// 1) Muss ein Listen-Signal enthalten
if (!$this->containsAny($p, self::LIST_SIGNALS)) {
return null;
}
// 2) Guardrail: Kein Catalog-Mode bei Sales-Intents
$sales = $this->salesIntentLite->detect($originalPrompt);
$intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY);
if ($intent !== SalesIntentLite::DISCOVERY) {
return null;
}
// 3) Expliziten Entity-Term extrahieren (sonst kein Catalog)
foreach (self::ENTITY_TERMS as $canonical => $synonyms) {
foreach ($synonyms as $syn) {
if ($this->containsWord($p, $syn)) {
return $canonical;
}
}
}
return null;
}
// ------------------------------------------------------------
// Helpers
// ------------------------------------------------------------
private function containsAny(string $haystack, array $needles): bool
{
foreach ($needles as $needle) {
if ($needle === '') {
continue;
}
if (str_contains($haystack, $needle)) {
return true;
}
}
return false;
}
private function containsWord(string $haystack, string $word): bool
{
$word = trim($word);
if ($word === '') {
return false;
}
return preg_match('/\b' . preg_quote($word, '/') . '\b/u', $haystack) === 1;
}
private function normalize(string $s): string
{
$s = mb_strtolower($s);
// Umlaute absichern (analog IntentLite/SalesIntentLite)
$replacements = [
'ä' => 'ae',
'ö' => 'oe',
'ü' => 'ue',
'ß' => 'ss',
];
foreach ($replacements as $umlaut => $alt) {
if (str_contains($s, $umlaut)) {
$s .= ' ' . str_replace($umlaut, $alt, $s);
break;
}
}
return $s;
}
}

View File

@@ -4,7 +4,9 @@ declare(strict_types=1);
namespace App\Knowledge\Retrieval;
use App\Catalog\EntityCatalogService;
use App\Entity\ModelGenerationConfig;
use App\Intent\CatalogIntentLite;
use App\Intent\IntentLite;
use App\Intent\SalesIntentLite;
use App\Knowledge\QueryCleaner;
@@ -32,7 +34,9 @@ final class NdjsonHybridRetriever implements RetrieverInterface
private readonly ModelGenerationConfigRepository $configRepository,
private readonly QueryCleaner $queryCleaner,
private readonly IntentLite $intentLite,
private readonly SalesIntentLite $salesIntentLite
private readonly SalesIntentLite $salesIntentLite,
private readonly CatalogIntentLite $catalogIntent,
private readonly EntityCatalogService $entityCatalogService
)
{
}
@@ -54,6 +58,17 @@ final class NdjsonHybridRetriever implements RetrieverInterface
public function retrieveInternal(string $prompt, ModelGenerationConfig $config): array
{
// 🔵 ENTITY CATALOG EARLY EXIT (jetzt auch im Admin-Test aktiv)
$entityTerm = $this->catalogIntent->detect($prompt);
if ($entityTerm !== null) {
$catalogBlock = $this->entityCatalogService->listByTerm($entityTerm);
if ($catalogBlock !== null) {
return [$catalogBlock];
}
}
$core = $this->runCore($prompt, $config, false);
if ($core['ranked_chunk_ids'] === [] || $core['rows'] === []) {
@@ -111,8 +126,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return [];
}
// 1) Production-like selection: wir selektieren Texte,
// aber in Debug brauchen wir die ChunkIds dazu.
$selectedChunkIds = $core['is_list_query']
? $this->selectChunkIdsListMode($core['ranked_chunk_ids'], $core['rows'], $core['limit'])
: $this->selectChunkIdsSalesMode($core['ranked_chunk_ids'], $core['rows'], $core['limit']);
@@ -121,7 +134,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return [];
}
// 2) Ausgabe inklusive Scores
$out = [];
$rank = 0;
@@ -179,7 +191,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$cleanQuery = $prompt;
}
// Intent-based adjustments (identisch zur Produktionslogik)
$threshold = self::VECTOR_SCORE_THRESHOLD;
$topK = $vectorTopKBase;
@@ -216,7 +227,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$topK = max(1, min($topK, self::HARD_MAX_VECTORK));
// Tag routing (identisch)
$candidateDocIds = $this->tagRouting->route($cleanQuery);
$candidateSet = null;
@@ -224,7 +234,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$candidateSet = array_fill_keys($candidateDocIds, true);
}
// Dual search (identisch)
$globalHits = $this->vectorClient->search($cleanQuery, $topK);
$scopedHits = [];
@@ -249,7 +258,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$rrfScores = [];
$rawScores = [];
// RRF (identisch) + optional raw capture
$this->applyRrfWithOptionalRaw($globalHits, $rrfScores, $rawScores, $threshold, false, $withScores);
$this->applyRrfWithOptionalRaw(
$scopedHits,
@@ -292,13 +300,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
];
}
/**
* Gleiche Logik wie applyRrf(), aber optional mit raw-score capture.
*
* @param array<int, array{chunk_id:string, score:float}> $hits
* @param array<string,float> $rrfScores
* @param array<string,float> $rawScores
*/
private function applyRrfWithOptionalRaw(
array $hits,
array &$rrfScores,
@@ -322,7 +323,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
$chunkId = (string)$hit['chunk_id'];
if ($captureRaw) {
// wenn global+scoped vorkommt: bestes raw behalten
if (!isset($rawScores[$chunkId]) || $raw > $rawScores[$chunkId]) {
$rawScores[$chunkId] = $raw;
}
@@ -343,15 +343,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
}
}
// =========================================================
// DEBUG SELECTION HELPERS (identisch zu Produktionsregeln)
// =========================================================
/**
* List-Mode nutzt exakt collectTexts() Regeln, aber gibt ChunkIds zurück.
*
* @return string[]
*/
private function selectChunkIdsListMode(array $chunkIds, array $rows, int $limit): array
{
$seen = [];
@@ -384,11 +375,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return $out;
}
/**
* Normal-Mode nutzt exakt collectSalesOptimized() Regeln, aber gibt ChunkIds zurück.
*
* @return string[]
*/
private function selectChunkIdsSalesMode(array $chunkIds, array $rows, int $limit): array
{
$out = [];
@@ -437,10 +423,6 @@ final class NdjsonHybridRetriever implements RetrieverInterface
return $out;
}
// =========================================================
// ORIGINAL METHODS (UNVERÄNDERT)
// =========================================================
private function applyRrf(array $hits, array &$rrfScores, float $threshold, bool $boost = false): void
{
$rank = 0;

View File

@@ -58,6 +58,7 @@ final readonly class TagVectorSearchClient
'Tag vector service returned non-200',
['status' => $response->getStatusCode()]
);
return [];
}
@@ -68,6 +69,7 @@ final readonly class TagVectorSearchClient
'Tag vector service unreachable',
['error' => $e->getMessage()]
);
return [];
}