From 4761648836d5619e90a5dd06d4007a2afbbf72d4 Mon Sep 17 00:00:00 2001 From: team 1 Date: Fri, 27 Feb 2026 12:03:45 +0100 Subject: [PATCH] new system rebuild command harden IngestService --- .../Ingest/KnowledgeIngestService.php | 26 +++++++++---------- .../Retrieval/NdjsonHybridRetriever.php | 4 +-- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Knowledge/Ingest/KnowledgeIngestService.php b/src/Knowledge/Ingest/KnowledgeIngestService.php index adf299f..acf3b14 100644 --- a/src/Knowledge/Ingest/KnowledgeIngestService.php +++ b/src/Knowledge/Ingest/KnowledgeIngestService.php @@ -10,10 +10,11 @@ use App\Repository\DocumentVersionRepository; final class KnowledgeIngestService { public function __construct( - private DocumentLoader $loader, - private SimpleChunker $chunker, + private DocumentLoader $loader, + private SimpleChunker $chunker, private DocumentVersionRepository $versionRepo, - ) { + ) + { } /** @@ -31,17 +32,16 @@ final class KnowledgeIngestService $doc = $version->getDocument(); $documentId = $doc->getId()->toRfc4122(); - $versionId = $version->getId()->toRfc4122(); + $versionId = $version->getId()->toRfc4122(); - $title = trim((string) $doc->getTitle()); + $title = trim((string)$doc->getTitle()); $index = 0; foreach ($chunks as $chunkText) { - // Titel optional weiterhin prefixen (wenn du das behalten willst) if ($title !== '' && !str_starts_with($chunkText, $title)) { - $chunkText = $title . "\n\n" . $chunkText; + $chunkText = "# Produkt Titel: " . $title . "\n\n --- " . $chunkText; } $chunkText = trim($chunkText); @@ -56,13 +56,13 @@ final class KnowledgeIngestService ); yield [ - 'chunk_id' => $chunkId, + 'chunk_id' => $chunkId, 'document_id' => $documentId, - 'version_id' => $versionId, + 'version_id' => $versionId, 'chunk_index' => $index++, - 'text' => $chunkText, - 'checksum' => sha1($chunkText), - 'metadata' => $this->buildMetadata($version), + 'text' => $chunkText, + 'checksum' => sha1($chunkText), + 'metadata' => $this->buildMetadata($version), ]; } } @@ -114,7 +114,7 @@ final class KnowledgeIngestService 'version_number' => method_exists($version, 'getVersionNumber') ? $version->getVersionNumber() : null, - 'file_path' => $version->getFilePath(), + 'file_path' => $version->getFilePath(), ], static fn($v) => $v !== null && $v !== ''); } } \ No newline at end of file diff --git a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php index 64d3244..43e616f 100644 --- a/src/Knowledge/Retrieval/NdjsonHybridRetriever.php +++ b/src/Knowledge/Retrieval/NdjsonHybridRetriever.php @@ -14,7 +14,7 @@ use App\Vector\VectorSearchClient; final class NdjsonHybridRetriever implements RetrieverInterface { - private const VECTOR_SCORE_THRESHOLD = 0.4; + private const VECTOR_SCORE_THRESHOLD = 0.75; private const HARD_MAX_CHUNKS = 200; private const HARD_MAX_VECTORK = 200; @@ -23,7 +23,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface * Tags dürfen nur ein kleiner Bonus sein (kein Gate/Filter). * Enterprise Default: klein halten, sonst dominieren Tags wieder. */ - private const TAG_SCORE_BONUS = 0.08; + private const TAG_SCORE_BONUS = 0.25; public function __construct( private readonly NdjsonChunkLookup $lookup,