new system rebuild command

harden IngestService
This commit is contained in:
team 1
2026-02-27 12:03:45 +01:00
parent 7441273fad
commit 4761648836
2 changed files with 15 additions and 15 deletions

View File

@@ -10,10 +10,11 @@ use App\Repository\DocumentVersionRepository;
final class KnowledgeIngestService final class KnowledgeIngestService
{ {
public function __construct( public function __construct(
private DocumentLoader $loader, private DocumentLoader $loader,
private SimpleChunker $chunker, private SimpleChunker $chunker,
private DocumentVersionRepository $versionRepo, private DocumentVersionRepository $versionRepo,
) { )
{
} }
/** /**
@@ -31,17 +32,16 @@ final class KnowledgeIngestService
$doc = $version->getDocument(); $doc = $version->getDocument();
$documentId = $doc->getId()->toRfc4122(); $documentId = $doc->getId()->toRfc4122();
$versionId = $version->getId()->toRfc4122(); $versionId = $version->getId()->toRfc4122();
$title = trim((string) $doc->getTitle()); $title = trim((string)$doc->getTitle());
$index = 0; $index = 0;
foreach ($chunks as $chunkText) { foreach ($chunks as $chunkText) {
// Titel optional weiterhin prefixen (wenn du das behalten willst)
if ($title !== '' && !str_starts_with($chunkText, $title)) { if ($title !== '' && !str_starts_with($chunkText, $title)) {
$chunkText = $title . "\n\n" . $chunkText; $chunkText = "# Produkt Titel: " . $title . "\n\n --- " . $chunkText;
} }
$chunkText = trim($chunkText); $chunkText = trim($chunkText);
@@ -56,13 +56,13 @@ final class KnowledgeIngestService
); );
yield [ yield [
'chunk_id' => $chunkId, 'chunk_id' => $chunkId,
'document_id' => $documentId, 'document_id' => $documentId,
'version_id' => $versionId, 'version_id' => $versionId,
'chunk_index' => $index++, 'chunk_index' => $index++,
'text' => $chunkText, 'text' => $chunkText,
'checksum' => sha1($chunkText), 'checksum' => sha1($chunkText),
'metadata' => $this->buildMetadata($version), 'metadata' => $this->buildMetadata($version),
]; ];
} }
} }
@@ -114,7 +114,7 @@ final class KnowledgeIngestService
'version_number' => method_exists($version, 'getVersionNumber') 'version_number' => method_exists($version, 'getVersionNumber')
? $version->getVersionNumber() ? $version->getVersionNumber()
: null, : null,
'file_path' => $version->getFilePath(), 'file_path' => $version->getFilePath(),
], static fn($v) => $v !== null && $v !== ''); ], static fn($v) => $v !== null && $v !== '');
} }
} }

View File

@@ -14,7 +14,7 @@ use App\Vector\VectorSearchClient;
final class NdjsonHybridRetriever implements RetrieverInterface final class NdjsonHybridRetriever implements RetrieverInterface
{ {
private const VECTOR_SCORE_THRESHOLD = 0.4; private const VECTOR_SCORE_THRESHOLD = 0.75;
private const HARD_MAX_CHUNKS = 200; private const HARD_MAX_CHUNKS = 200;
private const HARD_MAX_VECTORK = 200; private const HARD_MAX_VECTORK = 200;
@@ -23,7 +23,7 @@ final class NdjsonHybridRetriever implements RetrieverInterface
* Tags dürfen nur ein kleiner Bonus sein (kein Gate/Filter). * Tags dürfen nur ein kleiner Bonus sein (kein Gate/Filter).
* Enterprise Default: klein halten, sonst dominieren Tags wieder. * Enterprise Default: klein halten, sonst dominieren Tags wieder.
*/ */
private const TAG_SCORE_BONUS = 0.08; private const TAG_SCORE_BONUS = 0.25;
public function __construct( public function __construct(
private readonly NdjsonChunkLookup $lookup, private readonly NdjsonChunkLookup $lookup,