|null * @throws Exception */ public function route(string $query): ?array { $query = trim($query); if ($query === '') { return null; } $hits = $this->filterRoutingHits( $this->tagSearch->search($query, $this->defaultTopK) ); if ($hits === []) { return null; } $tagBinaryIds = []; $tagMetaById = []; foreach ($hits as $hit) { $tagId = (string) ($hit['tag_id'] ?? ''); if ($tagId === '') { continue; } try { $tagBinaryIds[] = Uuid::fromString($tagId)->toBinary(); } catch (\Throwable) { continue; } $tagMetaById[$tagId] = [ 'score' => (float) $hit['score'], 'weight' => $this->resolveTypeWeight((string) $hit['tag_type']), ]; } if ($tagBinaryIds === []) { return null; } $rows = $this->em->getConnection()->executeQuery( 'SELECT dt.document_id, dt.tag_id FROM document_tag dt INNER JOIN document d ON d.id = dt.document_id WHERE dt.tag_id IN (:tagIds) AND d.status = :status', [ 'tagIds' => $tagBinaryIds, 'status' => Document::STATUS_ACTIVE, ], [ 'tagIds' => ArrayParameterType::BINARY, ] )->fetchAllAssociative(); if ($rows === []) { return null; } $documentScores = []; $documentMatchedTags = []; foreach ($rows as $row) { if (!isset($row['document_id'], $row['tag_id'])) { continue; } try { $documentId = (string) Uuid::fromBinary($row['document_id']); $tagId = (string) Uuid::fromBinary($row['tag_id']); } catch (\Throwable) { continue; } if (!isset($tagMetaById[$tagId])) { continue; } $documentScores[$documentId] = ($documentScores[$documentId] ?? 0.0) + ($tagMetaById[$tagId]['score'] * $tagMetaById[$tagId]['weight']); $documentMatchedTags[$documentId][$tagId] = true; } if ($documentScores === []) { return null; } foreach ($documentScores as $documentId => $score) { $matchedTagCount = isset($documentMatchedTags[$documentId]) ? count($documentMatchedTags[$documentId]) : 0; if ($matchedTagCount > 1) { $documentScores[$documentId] += min( $this->maxMultiTagBonus, ($matchedTagCount - 1) * $this->multiTagBonusPerExtraTag ); } } arsort($documentScores, SORT_NUMERIC); return array_slice( array_keys($documentScores), 0, $this->maxCandidateDocs ); } /** * @param array $hits * * @return list */ private function filterRoutingHits(array $hits): array { if ($hits === []) { return []; } $bestScore = (float) ($hits[0]['score'] ?? 0.0); if ($bestScore < $this->minBestScore) { return []; } $minimumAcceptedScore = max( $this->minBestScore, $bestScore - $this->maxScoreDropFromBest ); $filtered = []; foreach ($hits as $hit) { $tagId = (string) ($hit['tag_id'] ?? ''); $score = (float) ($hit['score'] ?? 0.0); $tagType = TagTypes::normalize( (string) ($hit['tag_type'] ?? TagTypes::GENERIC) ); if ($tagId === '' || $score < $minimumAcceptedScore) { continue; } // Sales signals may still be useful elsewhere, but they should not // expand the document scope for semantic retrieval. if ($tagType === TagTypes::SALES_SIGNAL) { continue; } $filtered[] = [ 'tag_id' => $tagId, 'score' => $score, 'tag_type' => $tagType, ]; if (count($filtered) >= $this->maxRoutingTags) { break; } } return $filtered; } private function resolveTypeWeight(string $tagType): float { return match (TagTypes::normalize($tagType)) { TagTypes::CATALOG_ENTITY => 1.20, TagTypes::GENERIC => 1.00, TagTypes::SALES_SIGNAL => 0.00, default => 1.00, }; } }