optimize code and ingest docs
This commit is contained in:
@@ -29,18 +29,30 @@ final class KnowledgeIngestService
|
||||
|
||||
$chunks = $this->chunker->chunk($text);
|
||||
|
||||
$documentId = $version->getDocument()->getId()->toRfc4122();
|
||||
$doc = $version->getDocument();
|
||||
|
||||
$documentId = $doc->getId()->toRfc4122();
|
||||
$versionId = $version->getId()->toRfc4122();
|
||||
|
||||
// ✅ Regel: Wenn title gefüllt ist, kommt er in jeden Chunk
|
||||
$title = trim((string) $doc->getTitle());
|
||||
|
||||
$index = 0;
|
||||
|
||||
foreach ($chunks as $chunkText) {
|
||||
|
||||
// ✅ Prefix nur wenn title vorhanden; keine Flags, keine Meta-Schalter
|
||||
if ($title !== '' && !str_starts_with($chunkText, $title)) {
|
||||
$chunkText = $title . "\n\n" . $chunkText;
|
||||
}
|
||||
|
||||
yield [
|
||||
'chunk_id' => Uuid::v4()->toRfc4122(),
|
||||
'document_id' => $documentId,
|
||||
'version_id' => $versionId,
|
||||
'chunk_index' => $index++,
|
||||
'text' => $chunkText,
|
||||
// ✅ checksum muss den finalen Text abbilden (inkl. Titel)
|
||||
'checksum' => sha1($chunkText),
|
||||
'metadata' => $this->buildMetadata($version),
|
||||
];
|
||||
@@ -56,7 +68,6 @@ final class KnowledgeIngestService
|
||||
public function buildAllActiveChunkRecords(): iterable
|
||||
{
|
||||
foreach ($this->versionRepo->iterateActiveVersions() as $version) {
|
||||
// yield from hält das Ganze streamingfähig (Generator-Kaskade)
|
||||
yield from $this->buildChunkRecords($version);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user