From 0d3f6e21d64f3e16d455a5c7ef9332104c60afc2 Mon Sep 17 00:00:00 2001 From: team2 Date: Sat, 28 Feb 2026 16:10:47 +0100 Subject: [PATCH] optimize catalog semantic match sby tags --- migrations/Version20260228000100.php | 32 +++++ python/vector/vector_service.py | 91 +++++++++++- src/Catalog/EntityCatalogService.php | 32 +++-- src/Controller/Admin/TagController.php | 3 +- src/Entity/Tag.php | 20 +++ src/Intent/CatalogIntentLite.php | 131 ++++++------------ .../Retrieval/NdjsonHybridRetriever.php | 29 ++-- src/Service/Admin/TagAdminService.php | 18 ++- src/Tag/TagNdjsonExporter.php | 38 ++--- src/Tag/TagService.php | 17 +-- src/Tag/TagTypes.php | 27 ++++ src/Tag/TagVectorSearchClient.php | 33 ++++- templates/admin/tag/index.html.twig | 9 ++ 13 files changed, 329 insertions(+), 151 deletions(-) create mode 100644 migrations/Version20260228000100.php create mode 100644 src/Tag/TagTypes.php diff --git a/migrations/Version20260228000100.php b/migrations/Version20260228000100.php new file mode 100644 index 0000000..1081e20 --- /dev/null +++ b/migrations/Version20260228000100.php @@ -0,0 +1,32 @@ +addSql(" + ALTER TABLE knowledge_tag + ADD type VARCHAR(50) NOT NULL DEFAULT 'generic' + "); + } + + public function down(Schema $schema): void + { + $this->addSql(" + ALTER TABLE knowledge_tag + DROP type + "); + } +} \ No newline at end of file diff --git a/python/vector/vector_service.py b/python/vector/vector_service.py index 38893ed..4507741 100644 --- a/python/vector/vector_service.py +++ b/python/vector/vector_service.py @@ -42,6 +42,9 @@ INDEX_META_PATH = KNOWLEDGE_DIR / "index_meta.json" INDEX_RUNTIME_PATH = KNOWLEDGE_DIR / "index_runtime.json" INDEX_NDJSON_PATH = KNOWLEDGE_DIR / "index.ndjson" +# NEW: Tags NDJSON (exported by PHP) used to enrich /search-tags responses +TAGS_NDJSON_PATH = KNOWLEDGE_DIR / "tags.ndjson" + # ============================================================ # Logging @@ -111,6 +114,9 @@ chunk_pos_map: Dict[str, int] = {} tag_index = None tag_ids: Optional[List[Any]] = None +# NEW: tag_id -> {"label": "...", "tag_type": "..."} +tag_meta_map: Dict[str, Dict[str, str]] = {} + loaded_embedding_model_name: Optional[str] = None current_index_version: Optional[int] = None current_runtime_stamp: Optional[str] = None @@ -210,6 +216,61 @@ def load_chunk_maps_from_ndjson() -> None: logger.warning("Failed to load chunk maps from ndjson: %s", str(e)) +def load_tag_meta_from_tags_ndjson() -> None: + """ + Loads minimal tag metadata from tags.ndjson to enrich /search-tags results. + Expected line format (from PHP exporter / ingester pipeline): + {"tag_id":"...","text":"LABEL\\nSLUG\\noptional description", ...} + We extract: + label = first line of "text" (fallback: "") + tag_type = "type" if present (preferred), else "generic" + """ + global tag_meta_map + + tag_meta_map = {} + + if not TAGS_NDJSON_PATH.exists(): + logger.info("[Reload] tags.ndjson missing -> tag_meta_map empty (%s)", str(TAGS_NDJSON_PATH)) + return + + try: + with TAGS_NDJSON_PATH.open("r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + row = json.loads(line) + except Exception: + continue + + tag_id = _as_key(row.get("tag_id")) + if not tag_id: + continue + + # Prefer explicit fields if present + ttype = row.get("type") + if isinstance(ttype, str) and ttype.strip(): + tag_type = ttype.strip() + else: + tag_type = "generic" + + label = "" + txt = row.get("text") + if isinstance(txt, str) and txt.strip(): + first = txt.splitlines()[0].strip() if txt.splitlines() else "" + label = first + + if label: + tag_meta_map[tag_id] = {"label": label, "tag_type": tag_type} + else: + tag_meta_map[tag_id] = {"label": "", "tag_type": tag_type} + + except Exception as e: + logger.warning("Failed to load tag meta from tags.ndjson: %s", str(e)) + tag_meta_map = {} + + def _normalize_meta_list(value: Any) -> Optional[List[Any]]: """ Accepts: @@ -282,6 +343,10 @@ def load_all() -> None: tag_index = None tag_ids = None + # NEW: load tag meta for enrichment + logger.info("[Reload] Loading tag meta from tags.ndjson") + load_tag_meta_from_tags_ndjson() + runtime = _safe_read_json(INDEX_RUNTIME_PATH) if isinstance(runtime, dict): v = runtime.get("last_rebuild_at") @@ -292,10 +357,11 @@ def load_all() -> None: current_index_version = index_version if isinstance(index_version, int) else None logger.info( - "[Reload] Completed (index_version=%s runtime=%s embedding_model=%s stamp=%s file=%s)", + "[Reload] Completed (index_version=%s runtime=%s embedding_model=%s tag_meta=%s stamp=%s file=%s)", str(current_index_version), str(current_runtime_stamp), str(loaded_embedding_model_name), + str(len(tag_meta_map)), SERVICE_STAMP, str(Path(__file__).resolve()), ) @@ -390,6 +456,8 @@ def health(): "tag_meta_len": len(tag_ids) if isinstance(tag_ids, list) else None, "chunk_meta_type": type(chunk_ids).__name__ if chunk_ids is not None else None, "chunk_meta_len": len(chunk_ids) if isinstance(chunk_ids, list) else None, + "tag_meta_map_len": len(tag_meta_map), + "tags_ndjson_path": str(TAGS_NDJSON_PATH), "log_file": str(LOG_FILE), } @@ -502,7 +570,26 @@ def search_tags(req: SearchRequest): continue if idx < 0 or idx >= len(tag_ids): continue - results.append({"tag_id": tag_ids[idx], "score": float(score)}) + + tag_id = tag_ids[idx] + tag_id_key = _as_key(tag_id) or "" + + payload: Dict[str, Any] = { + "tag_id": tag_id, + "score": float(score), + } + + meta = tag_meta_map.get(tag_id_key) + if isinstance(meta, dict): + label = meta.get("label") + ttype = meta.get("tag_type") + + if isinstance(label, str) and label.strip(): + payload["label"] = label + if isinstance(ttype, str) and ttype.strip(): + payload["tag_type"] = ttype + + results.append(payload) return results diff --git a/src/Catalog/EntityCatalogService.php b/src/Catalog/EntityCatalogService.php index 6ccae31..a3962c8 100644 --- a/src/Catalog/EntityCatalogService.php +++ b/src/Catalog/EntityCatalogService.php @@ -15,6 +15,10 @@ use Symfony\Component\Uid\Uuid; * - TagVectorSearch (Score-Gate + Ambiguity-Check) * - DB Query auf document_tag + document (ACTIVE) * - Rückgabe als EIN Textblock (string) oder null (Fallback auf normalen Retrieval) + * + * Schritt-3 Änderung: + * - Headline ist NICHT mehr hardcoded + * - Headline basiert dynamisch auf dem gefundenen Tag */ final class EntityCatalogService { @@ -63,6 +67,10 @@ final class EntityCatalogService return null; } + // OPTIONAL: Falls TagVectorSearchClient künftig tag_label zurückliefert, + // kann das hier direkt verwendet werden. + $tagLabel = isset($best['tag_label']) ? (string)$best['tag_label'] : null; + // 3) DB Query: alle ACTIVE Dokumente zu diesem Tag $rows = $this->connection->fetchAllAssociative( ' @@ -95,18 +103,24 @@ final class EntityCatalogService return null; } - return $this->buildTextBlock($entityTerm, $titles); + return $this->buildTextBlock($tagLabel, $titles); } - private function buildTextBlock(string $entityTerm, array $titles): string + /** + * Dynamische Headline: + * - Wenn Tag-Label vorhanden → verwenden + * - Sonst generischer Fallback + */ + private function buildTextBlock(?string $tagLabel, array $titles): string { - $headline = match ($entityTerm) { - 'geräte' => 'Folgende Geräte sind verfügbar:', - 'indikatoren' => 'Folgende Indikatoren sind verfügbar:', - 'funktionen' => 'Folgende Funktionen sind verfügbar:', - 'zubehör' => 'Folgendes Zubehör ist verfügbar:', - default => 'Folgende Einträge sind verfügbar:', - }; + $headline = 'Folgende Einträge sind verfügbar:'; + + if (\is_string($tagLabel) && \trim($tagLabel) !== '') { + $headline = sprintf( + 'Folgende %s sind verfügbar:', + $tagLabel + ); + } $lines = []; foreach ($titles as $title) { diff --git a/src/Controller/Admin/TagController.php b/src/Controller/Admin/TagController.php index 84dcde1..4fe9763 100644 --- a/src/Controller/Admin/TagController.php +++ b/src/Controller/Admin/TagController.php @@ -46,7 +46,8 @@ final class TagController extends AbstractController (string)$request->request->get('label', ''), $request->request->get('description') ? (string)$request->request->get('description') - : null + : null, + (string)$request->request->get('type', 'generic') // NEU ); $this->addFlash('success', 'Tag wurde erstellt.'); diff --git a/src/Entity/Tag.php b/src/Entity/Tag.php index aeb94b0..08b967a 100644 --- a/src/Entity/Tag.php +++ b/src/Entity/Tag.php @@ -24,6 +24,14 @@ class Tag #[ORM\Column(type: 'text', nullable: true)] private ?string $description = null; + /** + * NEU: Governance-Typ des Tags + * - generic + * - catalog_entity + */ + #[ORM\Column(length: 50)] + private string $type = 'generic'; + #[ORM\Column] private \DateTimeImmutable $createdAt; @@ -75,6 +83,18 @@ class Tag return $this; } + public function getType(): string + { + return $this->type; + } + + public function setType(string $type): static + { + $type = trim($type); + $this->type = $type !== '' ? $type : 'generic'; + return $this; + } + public function getCreatedAt(): \DateTimeImmutable { return $this->createdAt; diff --git a/src/Intent/CatalogIntentLite.php b/src/Intent/CatalogIntentLite.php index 8d1c17c..59f83f7 100644 --- a/src/Intent/CatalogIntentLite.php +++ b/src/Intent/CatalogIntentLite.php @@ -4,27 +4,10 @@ declare(strict_types=1); namespace App\Intent; -/** - * CatalogIntentLite - * - * Minimal, deterministische Erkennung von Katalog-/Entity-Listenanfragen. - * - * Ziel: - * - "Liste aller Geräte" / "Welche Indikatoren gibt es?" / "Zeig mir alle Funktionen" - * - * Guardrails: - * - Kein Catalog-Mode bei Sales-/Pricing-/Comparison-/ROI-/Implementation-/Objection-Intents. - * - Kein Catalog-Mode ohne expliziten Entity-Term. - * - * WICHTIG: - * - Immer mit ORIGINAL-Prompt aufrufen. - * - Kein LLM, kein ML. - */ +use App\Tag\TagVectorSearchClient; + final class CatalogIntentLite { - /** - * Listensignale (leichtgewichtig) – IntentLite bleibt weiterhin für "allgemeine" List Detection zuständig. - */ private const LIST_SIGNALS = [ 'liste', 'auflisten', @@ -39,100 +22,76 @@ final class CatalogIntentLite 'alle', ]; - /** - * Entity-Terms, die wir als Katalogtypen unterstützen. - * - * Left side: canonical term (für Tag-Suche) - * Right side: Such-Synonyme, die im Prompt vorkommen dürfen. - */ - private const ENTITY_TERMS = [ - 'geräte' => ['gerät', 'geräte', 'geraet', 'geraete', 'device', 'devices'], - 'indikatoren' => ['indikator', 'indikatoren', 'indicator', 'indicators'], - 'funktionen' => ['funktion', 'funktionen', 'feature', 'features', 'funktionalität', 'funktionalitaet'], - 'zubehör' => ['zubehör', 'zubehoer', 'accessory', 'accessories', 'zubehor'], - ]; + private const MIN_SCORE = 0.60; + private const AMBIGUITY_DELTA = 0.05; public function __construct( private readonly SalesIntentLite $salesIntentLite, + private readonly TagVectorSearchClient $tagVectorClient, ) {} - /** - * @return string|null canonical entity term (z. B. "geräte") oder null wenn kein Catalog-Intent. - */ - public function detect(string $originalPrompt): ?string + public function detect(string $prompt): ?string { - $p = $this->normalize($originalPrompt); + $normalizedPrompt = mb_strtolower($prompt); - // 1) Muss ein Listen-Signal enthalten - if (!$this->containsAny($p, self::LIST_SIGNALS)) { + // 1) Muss Listen-Signal enthalten + if (!$this->containsAny($normalizedPrompt, self::LIST_SIGNALS)) { return null; } - // 2) Guardrail: Kein Catalog-Mode bei Sales-Intents - $sales = $this->salesIntentLite->detect($originalPrompt); + // 2) Guardrail: Nur DISCOVERY + $sales = $this->salesIntentLite->detect($prompt); $intent = (string)($sales['intent'] ?? SalesIntentLite::DISCOVERY); if ($intent !== SalesIntentLite::DISCOVERY) { return null; } - // 3) Expliziten Entity-Term extrahieren (sonst kein Catalog) - foreach (self::ENTITY_TERMS as $canonical => $synonyms) { - foreach ($synonyms as $syn) { - if ($this->containsWord($p, $syn)) { - return $canonical; - } + // 3) Vector-basierte Tag-Suche (Top 3 für Ambiguity-Check) + $hits = $this->tagVectorClient->search($prompt, 3); + + if ($hits === []) { + return null; + } + + $best = $hits[0]; + $bestScore = (float)($best['score'] ?? 0.0); + + if ($bestScore < self::MIN_SCORE) { + return null; + } + + // Ambiguity-Check + if (isset($hits[1])) { + $secondScore = (float)($hits[1]['score'] ?? 0.0); + if (abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA) { + return null; } } - return null; - } + // 4) Nur catalog_entity zulassen + if (($best['tag_type'] ?? null) !== 'catalog_entity') { + return null; + } - // ------------------------------------------------------------ - // Helpers - // ------------------------------------------------------------ + // 5) Canonical Label zurückgeben + $label = (string)($best['label'] ?? ''); + + if ($label === '') { + return null; + } + + return mb_strtolower($label); + } private function containsAny(string $haystack, array $needles): bool { foreach ($needles as $needle) { - if ($needle === '') { - continue; - } - if (str_contains($haystack, $needle)) { + if ($needle !== '' && str_contains($haystack, $needle)) { return true; } } + return false; } - - private function containsWord(string $haystack, string $word): bool - { - $word = trim($word); - if ($word === '') { - return false; - } - return preg_match('/\b' . preg_quote($word, '/') . '\b/u', $haystack) === 1; - } - - private function normalize(string $s): string - { - $s = mb_strtolower($s); - - // Umlaute absichern (analog IntentLite/SalesIntentLite) - $replacements = [ - 'ä' => 'ae', - 'ö' => 'oe', - 'ü' => 'ue', - 'ß' => 'ss', - ]; - - foreach ($replacements as $umlaut => $alt) { - if (str_contains($s, $umlaut)) { - $s .= ' ' . str_replace($umlaut, $alt, $s); - break; - } - } - - return $s; - } } \ No newline at end of file diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php index 451e68a..16b4219 100644 --- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php +++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php @@ -4,7 +4,6 @@ declare(strict_types=1); namespace App\Knowledge\Retrieval; -use App\Catalog\EntityCatalogService; use App\Entity\ModelGenerationConfig; use App\Intent\CatalogIntentLite; use App\Intent\IntentLite; @@ -13,6 +12,9 @@ use App\Knowledge\QueryCleaner; use App\Repository\ModelGenerationConfigRepository; use App\Tag\TagRoutingService; use App\Vector\VectorSearchClient; +use App\Catalog\EntityCatalogService; +use App\Knowledge\Retrieval\NdjsonChunkLookup; +use App\Knowledge\Retrieval\RetrieverInterface; final class NdjsonHybridRetriever implements RetrieverInterface { @@ -146,15 +148,15 @@ final class NdjsonHybridRetriever implements RetrieverInterface $text = trim((string)($core['rows'][$chunkId]['text'] ?? '')); $out[] = [ - 'rank' => $rank, - 'chunk_id' => $chunkId, - 'document_id' => isset($core['rows'][$chunkId]['document_id']) ? (string)$core['rows'][$chunkId]['document_id'] : null, - 'raw_score' => isset($core['raw_scores'][$chunkId]) ? (float)$core['raw_scores'][$chunkId] : null, - 'rrf_score' => isset($core['rrf_scores'][$chunkId]) ? (float)$core['rrf_scores'][$chunkId] : null, - 'threshold' => (float)$core['threshold'], - 'intent' => (string)$core['sales_intent'], - 'is_list_query'=> (bool)$core['is_list_query'], - 'text' => $text, + 'rank' => $rank, + 'chunk_id' => $chunkId, + 'document_id' => isset($core['rows'][$chunkId]['document_id']) ? (string)$core['rows'][$chunkId]['document_id'] : null, + 'raw_score' => isset($core['raw_scores'][$chunkId]) ? (float)$core['raw_scores'][$chunkId] : null, + 'rrf_score' => isset($core['rrf_scores'][$chunkId]) ? (float)$core['rrf_scores'][$chunkId] : null, + 'threshold' => (float)$core['threshold'], + 'intent' => (string)$core['sales_intent'], + 'is_list_query' => (bool)$core['is_list_query'], + 'text' => $text, ]; } @@ -305,9 +307,10 @@ final class NdjsonHybridRetriever implements RetrieverInterface array &$rrfScores, array &$rawScores, float $threshold, - bool $boost = false, - bool $captureRaw = false - ): void { + bool $boost = false, + bool $captureRaw = false + ): void + { $rank = 0; foreach ($hits as $hit) { diff --git a/src/Service/Admin/TagAdminService.php b/src/Service/Admin/TagAdminService.php index c954309..b0a701f 100644 --- a/src/Service/Admin/TagAdminService.php +++ b/src/Service/Admin/TagAdminService.php @@ -11,12 +11,12 @@ use App\Service\TagRebuildJobService; use App\Tag\TagService; use Doctrine\ORM\EntityManagerInterface; -final class TagAdminService +final readonly class TagAdminService { public function __construct( - private readonly EntityManagerInterface $em, - private readonly TagService $tagService, - private readonly TagRebuildJobService $jobs, + private EntityManagerInterface $em, + private TagService $tagService, + private TagRebuildJobService $jobs, ) {} public function getIndexData(): array @@ -31,9 +31,13 @@ final class TagAdminService ]; } - public function create(string $slug, string $label, ?string $description): void - { - $this->tagService->create($slug, $label, $description); + public function create( + string $slug, + string $label, + ?string $description, + string $type = 'generic' // NEU + ): void { + $this->tagService->create($slug, $label, $description, $type); } public function delete(string $id): void diff --git a/src/Tag/TagNdjsonExporter.php b/src/Tag/TagNdjsonExporter.php index 976f3bb..93cfcd0 100644 --- a/src/Tag/TagNdjsonExporter.php +++ b/src/Tag/TagNdjsonExporter.php @@ -19,7 +19,12 @@ final readonly class TagNdjsonExporter * Export all tags into NDJSON (streaming) with atomic switch (.tmp + rename()). * * Line format: - * {"tag_id":"...","text":"label\nslug\noptional description","document_ids":["...","..."]} + * { + * "tag_id":"...", + * "text":"label\nslug\noptional description", + * "type":"catalog_entity|generic|...", + * "document_ids":["...","..."] + * } * * @return array{tags:int, lines:int, bytes:int, path:string} */ @@ -37,12 +42,7 @@ final readonly class TagNdjsonExporter throw new \RuntimeException('Cannot write tags NDJSON: ' . $tmpPath); } - // ------------------------------------------------------------------ - // Fetch tags (small) + join document ids (can be bigger) efficiently. - // We avoid repositories and keep it DB-agnostic via DQL/QB. - // ------------------------------------------------------------------ - - // 1) Load all tags (id, slug, label, description) + // 1) Load all tags $tags = $this->em->createQueryBuilder() ->select('t') ->from(Tag::class, 't') @@ -52,8 +52,6 @@ final readonly class TagNdjsonExporter if (!\is_array($tags) || $tags === []) { \fclose($fh); - - // Write empty file atomically $this->atomicReplace($tmpPath, $this->tagsNdjsonPath); return [ @@ -64,8 +62,7 @@ final readonly class TagNdjsonExporter ]; } - // 2) Build tagId => docIds map from document_tag - // We query pairs (tag_id, document_id) in one go. + // 2) Build tagId => docIds map $rows = $this->em->createQueryBuilder() ->select('IDENTITY(dt.tag) AS tagId', 'IDENTITY(dt.document) AS docId') ->from(DocumentTag::class, 'dt') @@ -82,7 +79,7 @@ final readonly class TagNdjsonExporter $tagToDocs[$tagId][] = $docId; } - // 3) Stream NDJSON lines + // 3) Stream NDJSON $lines = 0; foreach ($tags as $tag) { @@ -93,13 +90,11 @@ final readonly class TagNdjsonExporter $tagId = (string) $tag->getId(); $docIds = $tagToDocs[$tagId] ?? []; - // de-dupe docIds for safety if ($docIds !== []) { $docIds = \array_values(\array_unique($docIds)); } - // "text" is the embedding source for tag vectors later: - // Keep it short but semantically useful. + // Embedding source $textParts = [ $tag->getLabel(), $tag->getSlug(), @@ -110,15 +105,23 @@ final readonly class TagNdjsonExporter $textParts[] = \trim($desc); } + $type = method_exists($tag, 'getType') + ? (string) $tag->getType() + : 'generic'; + + if ($type === '') { + $type = 'generic'; + } + $line = [ 'tag_id' => $tagId, 'text' => \implode("\n", $textParts), + 'type' => $type, // 🔥 NEW 'document_ids' => $docIds, ]; $json = \json_encode($line, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); if (!\is_string($json)) { - // skip invalid line but keep export running continue; } @@ -127,7 +130,6 @@ final readonly class TagNdjsonExporter } \fclose($fh); - $this->atomicReplace($tmpPath, $this->tagsNdjsonPath); return [ @@ -140,13 +142,11 @@ final readonly class TagNdjsonExporter private function atomicReplace(string $tmpPath, string $finalPath): void { - // Ensure old file can be replaced on Windows-like FS too (best effort) if (\is_file($finalPath)) { @\chmod($finalPath, 0664); } if (!@\rename($tmpPath, $finalPath)) { - // if rename fails, try copy+unlink fallback if (!@\copy($tmpPath, $finalPath)) { @\unlink($tmpPath); throw new \RuntimeException('Atomic replace failed for: ' . $finalPath); diff --git a/src/Tag/TagService.php b/src/Tag/TagService.php index 0081c40..65e8117 100644 --- a/src/Tag/TagService.php +++ b/src/Tag/TagService.php @@ -21,8 +21,12 @@ final readonly class TagService // TAG CREATE // ========================================================= - public function create(string $slug, string $label, ?string $description = null): Tag - { + public function create( + string $slug, + string $label, + ?string $description = null, + string $type = 'generic' // NEU + ): Tag { $slug = trim($slug); $label = trim($label); @@ -35,6 +39,7 @@ final readonly class TagService } $tag = new Tag($slug, $label, $description); + $tag->setType($type); // NEU $this->em->persist($tag); $this->em->flush(); @@ -71,10 +76,6 @@ final readonly class TagService // DOCUMENT TAG SYNC // ========================================================= - /** - * Synchronisiert alle Tags eines Dokuments. - * Löst einen Rebuild aus, da document_ids Teil des NDJSON sind. - */ public function syncDocumentTags(Document $document, array $newTagIds): void { $newTagIds = array_unique($newTagIds); @@ -114,10 +115,6 @@ final readonly class TagService // TAG → DOCUMENT SYNC (Bulk Assign) // ========================================================= - /** - * Synchronisiert alle Dokumente eines Tags. - * Löst einen Rebuild aus, da document_ids Teil des NDJSON sind. - */ public function syncTagDocuments(Tag $tag, array $newDocumentIds): void { $newDocumentIds = array_unique($newDocumentIds); diff --git a/src/Tag/TagTypes.php b/src/Tag/TagTypes.php new file mode 100644 index 0000000..b27c6f3 --- /dev/null +++ b/src/Tag/TagTypes.php @@ -0,0 +1,27 @@ + self::GENERIC, + 'Catalog Entity' => self::CATALOG_ENTITY, + 'Sales Signal' => self::SALES_SIGNAL, + ]; + } + + private function __construct() {} +} \ No newline at end of file diff --git a/src/Tag/TagVectorSearchClient.php b/src/Tag/TagVectorSearchClient.php index 62ea64e..0c8ada7 100644 --- a/src/Tag/TagVectorSearchClient.php +++ b/src/Tag/TagVectorSearchClient.php @@ -11,7 +11,6 @@ final readonly class TagVectorSearchClient { /** * Minimum similarity score required for a tag to be considered. - * Acts as a confidence gate to avoid noisy routing. */ private const MIN_SCORE = 0.4; @@ -29,7 +28,22 @@ final readonly class TagVectorSearchClient /** * Executes a vector search against the Python tag index. * - * @return array + * Expected response rows: + * [ + * { + * "tag_id": "...", + * "score": 0.73, + * "label": "Geräte", // optional (new) + * "tag_type": "catalog_entity" // optional (new) + * } + * ] + * + * @return array */ public function search(string $query, int $limit = 8): array { @@ -94,15 +108,26 @@ final readonly class TagVectorSearchClient $score = (float) $score; - // 🔥 Confidence Gate if ($score < self::MIN_SCORE) { continue; } - $hits[] = [ + $hit = [ 'tag_id' => $tagId, 'score' => $score, ]; + + // Optional: label + if (isset($row['label']) && is_string($row['label'])) { + $hit['label'] = $row['label']; + } + + // Optional: tag_type + if (isset($row['tag_type']) && is_string($row['tag_type'])) { + $hit['tag_type'] = $row['tag_type']; + } + + $hits[] = $hit; } return $hits; diff --git a/templates/admin/tag/index.html.twig b/templates/admin/tag/index.html.twig index 3ed0c05..6941f38 100644 --- a/templates/admin/tag/index.html.twig +++ b/templates/admin/tag/index.html.twig @@ -157,6 +157,15 @@ placeholder="Optional"/> +
+ + +
+