cleanup code

This commit is contained in:
team2
2026-02-27 15:49:01 +01:00
parent a5a6f466f3
commit 44be40a24d
4 changed files with 14 additions and 55 deletions

View File

@@ -14,11 +14,6 @@ final readonly class ChunkWriteService
private ChunkManager $chunkManager,
) {}
public function getIndexPath(): string
{
return $this->chunkManager->getIndexPath();
}
public function countAllChunks(): int
{
return $this->chunkManager->countAllChunks();
@@ -37,29 +32,6 @@ final readonly class ChunkWriteService
$this->chunkManager->appendChunks($chunks);
}
/**
* Lokaler Ingest für eine einzelne DocumentVersion.
*
* Ablauf:
* 1. Entfernt bestehende Chunks dieses Dokuments
* 2. Appendet neue Chunks
*
* @param iterable<array<string,mixed>> $chunks
*/
public function writeForDocumentVersion(
DocumentVersion $version,
iterable $chunks
): void {
$documentId = $version->getDocument()->getId();
if (!$documentId instanceof Uuid) {
throw new \RuntimeException('Document ID must be a Uuid instance');
}
$this->chunkManager->compactByDocument($documentId);
$this->chunkManager->appendChunks($chunks);
}
/**
* Vollständiger Rewrite des NDJSON-Index (Global Reindex).
*

View File

@@ -17,11 +17,6 @@ final class ChunkManager
$this->indexPath = rtrim($projectDir, '/') . $relativeIndexPath;
}
public function getIndexPath(): string
{
return $this->indexPath;
}
// ============================================================
// COUNT (Streaming, robust)
// ============================================================

View File

@@ -6,15 +6,16 @@ declare(strict_types=1);
namespace App\Knowledge\Ingest;
use App\Index\IndexConfigurationProvider;
use App\Knowledge\Text\TextNormalizer;
final class SimpleChunker
final readonly class SimpleChunker
{
private IndexConfigurationProvider $configurationProvider;
public function __construct(
IndexConfigurationProvider $configurationProvider
) {
$this->configurationProvider = $configurationProvider;
private IndexConfigurationProvider $configurationProvider,
private TextNormalizer $textNormalizer
)
{
}
/** @return string[] */
@@ -25,7 +26,7 @@ final class SimpleChunker
$maxWords = $config->getChunkSize();
$overlapWords = $config->getChunkOverlap();
$text = $this->normalize($text);
$text = $this->textNormalizer->normalize($text);
if ($text === '') {
return [];
}
@@ -84,15 +85,6 @@ final class SimpleChunker
return $this->dedupe($chunks);
}
private function normalize(string $text): string
{
$text = str_replace(["\r\n", "\r"], "\n", $text);
$text = preg_replace("/[ \t]+/u", " ", $text);
$text = preg_replace("/\n{3,}/u", "\n\n", $text);
return trim((string) $text);
}
private function adjustCutToBoundary(array $tokens, int $start, int $end): int
{
$startToken = $tokens[$start] ?? '';

View File

@@ -9,13 +9,13 @@ use App\Ingest\IngestFlow;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Component\Uid\Uuid;
final class IngestOrchestrator
final readonly class IngestOrchestrator
{
public function __construct(
private readonly LockService $lockService,
private readonly IngestJobService $jobService,
private readonly EntityManagerInterface $em,
private readonly IngestFlow $ingestFlow,
private LockService $lockService,
private IngestJobService $jobService,
private EntityManagerInterface $em,
private IngestFlow $ingestFlow,
)
{
}