From 44be40a24de93a2b136849014dc3e051ba7ebcc2 Mon Sep 17 00:00:00 2001 From: team2 Date: Fri, 27 Feb 2026 15:49:01 +0100 Subject: [PATCH] cleanup code --- src/Ingest/ChunkWriteService.php | 28 -------------------------- src/Knowledge/ChunkManager.php | 5 ----- src/Knowledge/Ingest/SimpleChunker.php | 26 +++++++++--------------- src/Service/IngestOrchestrator.php | 10 ++++----- 4 files changed, 14 insertions(+), 55 deletions(-) diff --git a/src/Ingest/ChunkWriteService.php b/src/Ingest/ChunkWriteService.php index 4d445da..2967418 100644 --- a/src/Ingest/ChunkWriteService.php +++ b/src/Ingest/ChunkWriteService.php @@ -14,11 +14,6 @@ final readonly class ChunkWriteService private ChunkManager $chunkManager, ) {} - public function getIndexPath(): string - { - return $this->chunkManager->getIndexPath(); - } - public function countAllChunks(): int { return $this->chunkManager->countAllChunks(); @@ -37,29 +32,6 @@ final readonly class ChunkWriteService $this->chunkManager->appendChunks($chunks); } - /** - * Lokaler Ingest für eine einzelne DocumentVersion. - * - * Ablauf: - * 1. Entfernt bestehende Chunks dieses Dokuments - * 2. Appendet neue Chunks - * - * @param iterable> $chunks - */ - public function writeForDocumentVersion( - DocumentVersion $version, - iterable $chunks - ): void { - $documentId = $version->getDocument()->getId(); - - if (!$documentId instanceof Uuid) { - throw new \RuntimeException('Document ID must be a Uuid instance'); - } - - $this->chunkManager->compactByDocument($documentId); - $this->chunkManager->appendChunks($chunks); - } - /** * Vollständiger Rewrite des NDJSON-Index (Global Reindex). * diff --git a/src/Knowledge/ChunkManager.php b/src/Knowledge/ChunkManager.php index 5409b57..cc3712f 100644 --- a/src/Knowledge/ChunkManager.php +++ b/src/Knowledge/ChunkManager.php @@ -17,11 +17,6 @@ final class ChunkManager $this->indexPath = rtrim($projectDir, '/') . $relativeIndexPath; } - public function getIndexPath(): string - { - return $this->indexPath; - } - // ============================================================ // COUNT (Streaming, robust) // ============================================================ diff --git a/src/Knowledge/Ingest/SimpleChunker.php b/src/Knowledge/Ingest/SimpleChunker.php index 3445537..043e6f5 100644 --- a/src/Knowledge/Ingest/SimpleChunker.php +++ b/src/Knowledge/Ingest/SimpleChunker.php @@ -6,15 +6,16 @@ declare(strict_types=1); namespace App\Knowledge\Ingest; use App\Index\IndexConfigurationProvider; +use App\Knowledge\Text\TextNormalizer; -final class SimpleChunker +final readonly class SimpleChunker { - private IndexConfigurationProvider $configurationProvider; public function __construct( - IndexConfigurationProvider $configurationProvider - ) { - $this->configurationProvider = $configurationProvider; + private IndexConfigurationProvider $configurationProvider, + private TextNormalizer $textNormalizer + ) + { } /** @return string[] */ @@ -25,7 +26,7 @@ final class SimpleChunker $maxWords = $config->getChunkSize(); $overlapWords = $config->getChunkOverlap(); - $text = $this->normalize($text); + $text = $this->textNormalizer->normalize($text); if ($text === '') { return []; } @@ -60,7 +61,7 @@ final class SimpleChunker $wordEnd = min($wordPos + $maxWords, $totalWords); $tokenStart = $wordTokenIndexes[$wordPos]; - $tokenEnd = $wordTokenIndexes[$wordEnd - 1] + 1; + $tokenEnd = $wordTokenIndexes[$wordEnd - 1] + 1; $tokenEnd = $this->adjustCutToBoundary($tokens, $tokenStart, $tokenEnd); @@ -84,15 +85,6 @@ final class SimpleChunker return $this->dedupe($chunks); } - private function normalize(string $text): string - { - $text = str_replace(["\r\n", "\r"], "\n", $text); - $text = preg_replace("/[ \t]+/u", " ", $text); - $text = preg_replace("/\n{3,}/u", "\n\n", $text); - - return trim((string) $text); - } - private function adjustCutToBoundary(array $tokens, int $start, int $end): int { $startToken = $tokens[$start] ?? ''; @@ -122,7 +114,7 @@ final class SimpleChunker private function dedupe(array $chunks): array { $seen = []; - $out = []; + $out = []; foreach ($chunks as $chunk) { $key = mb_strtolower( diff --git a/src/Service/IngestOrchestrator.php b/src/Service/IngestOrchestrator.php index e4a914d..3fa73d1 100644 --- a/src/Service/IngestOrchestrator.php +++ b/src/Service/IngestOrchestrator.php @@ -9,13 +9,13 @@ use App\Ingest\IngestFlow; use Doctrine\ORM\EntityManagerInterface; use Symfony\Component\Uid\Uuid; -final class IngestOrchestrator +final readonly class IngestOrchestrator { public function __construct( - private readonly LockService $lockService, - private readonly IngestJobService $jobService, - private readonly EntityManagerInterface $em, - private readonly IngestFlow $ingestFlow, + private LockService $lockService, + private IngestJobService $jobService, + private EntityManagerInterface $em, + private IngestFlow $ingestFlow, ) { }