add chunk limiter

This commit is contained in:
team 1
2026-02-16 15:29:31 +01:00
parent b04f972971
commit feff95ffe5
7 changed files with 239 additions and 31 deletions

View File

@@ -20,6 +20,7 @@ parameters:
mto.knowledge.index_meta: '%mto.knowledge.root%/index_meta.json'
mto.knowledge.vector_index: '%mto.knowledge.root%/vector.index'
mto.knowledge.vector_index_meta: '%mto.knowledge.root%/vector.index.meta.json'
mto.runtime.meta: '%mto.knowledge.root%/index_runtime.json'
mto.knowledge.upload: '%mto.knowledge.root%/uploads'
# Backward compatibility alias
@@ -136,6 +137,7 @@ services:
App\Index\IndexMetaManager:
arguments:
$metaPath: '%mto.knowledge.index_meta%'
$runTimePath: '%mto.runtime.meta%'
$provider: '@App\Index\IndexConfigurationProvider'
# ------------------------------------------------------------

View File

@@ -3,6 +3,7 @@
namespace App\Controller\Admin;
use App\Index\IndexMetaManager;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Routing\Attribute\Route;
@@ -15,4 +16,15 @@ final class DashboardController extends AbstractController
return $this->render('admin/dashboard/index.html.twig');
}
#[Route('/admin/dashboard', name: 'admin_dashboard')]
public function dashboard(IndexMetaManager $metaManager): Response
{
$chunkCount = $metaManager->getRuntimeChunkCount();
$limit = 120000;
return $this->render('admin/dashboard/index.html.twig', [
'chunkCount' => $chunkCount,
'chunkLimit' => $limit,
]);
}
}

View File

@@ -7,19 +7,24 @@ namespace App\Index;
final class IndexMetaManager
{
private string $metaPath;
private string $runtimePath;
private IndexConfigurationProvider $provider;
public function __construct(
string $metaPath,
string $runTimePath,
IndexConfigurationProvider $provider
) {
$this->metaPath = $metaPath;
$this->provider = $provider;
// runtime liegt im selben Verzeichnis
$this->runtimePath = $runTimePath;
}
// -----------------------------------------------------
// Public API
// -----------------------------------------------------
// =====================================================
// META (Governance unverändert lassen!)
// =====================================================
public function ensureExists(): void
{
@@ -93,4 +98,58 @@ final class IndexMetaManager
)
);
}
// =====================================================
// RUNTIME (Chunk Counter etc.)
// =====================================================
private function ensureRuntimeFileExists(): void
{
if (is_file($this->runtimePath)) {
return;
}
$dir = dirname($this->runtimePath);
if (!is_dir($dir)) {
mkdir($dir, 0777, true);
}
$payload = [
'chunk_count' => 0,
'last_rebuild_at' => null,
];
file_put_contents(
$this->runtimePath,
json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES)
);
}
public function updateRuntimeStats(int $chunkCount): void
{
$this->ensureRuntimeFileExists();
$payload = [
'chunk_count' => $chunkCount,
'last_rebuild_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
];
file_put_contents(
$this->runtimePath,
json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES)
);
}
public function getRuntimeChunkCount(): int
{
$this->ensureRuntimeFileExists();
$data = json_decode(
(string) file_get_contents($this->runtimePath),
true
);
return (int)($data['chunk_count'] ?? 0);
}
}

View File

@@ -9,39 +9,86 @@ use App\Index\IndexMetaManager;
use App\Knowledge\ChunkManager;
use App\Knowledge\Ingest\KnowledgeIngestService;
use App\Vector\VectorIndexBuilder;
use Psr\Log\LoggerInterface;
final readonly class IngestFlow
{
/**
* Realistische Betriebsgrenze für dieses Systemdesign (CPU Embedding + FlatIP + Full Rebuild).
* Wird beim lokalen Ingest (Dokumentversion) enforced.
*/
private const CHUNK_LIMIT_HARD = 120000;
/**
* Ab hier nur Warnung (keine Blockade) damit man frühzeitig reagieren kann.
*/
private const CHUNK_LIMIT_WARN = 100000;
public function __construct(
private KnowledgeIngestService $knowledgeIngestService,
private ChunkManager $chunkManager,
private VectorIndexBuilder $vectorBuilder,
private IndexMetaManager $metaManager,
)
{
private LoggerInterface $logger,
) {
}
public function ingestDocumentVersion(
DocumentVersion $version
): void
public function ingestDocumentVersion(DocumentVersion $version): void
{
$this->metaManager->validateAgainstCurrent();
$this->chunkManager->compactByDocument(
$version->getDocument()->getId()
);
$records = $this->knowledgeIngestService
->buildChunkRecords($version);
$this->chunkManager->appendChunks($records);
$this->vectorBuilder->rebuildFromNdjson();
// Entfernt alte Chunks dieses Dokuments -> danach ist "existing" der Basis-Index ohne dieses Dokument.
$this->chunkManager->compactByDocument($version->getDocument()->getId());
// ------------------------------
// Chunk-Limit Guardrail (Hard Cap)
// ------------------------------
$existing = $this->chunkManager->countAllChunks();
// buildChunkRecords() ist generatorbasiert; für einen sauberen Hard-Cap materialisieren wir lokal,
// damit wir vor dem Append abbrechen können (keine Partial Writes).
$recordsIterable = $this->knowledgeIngestService->buildChunkRecords($version);
$records = is_array($recordsIterable)
? $recordsIterable
: iterator_to_array($recordsIterable, false);
$incoming = count($records);
$total = $existing + $incoming;
if ($total >= self::CHUNK_LIMIT_WARN) {
$this->logger->warning('RAG chunk count approaching limit.', [
'existing' => $existing,
'incoming' => $incoming,
'total' => $total,
'warn_at' => self::CHUNK_LIMIT_WARN,
'hard_cap' => self::CHUNK_LIMIT_HARD,
'document_id' => $version->getDocument()->getId()->toRfc4122(),
'version_id' => $version->getId()->toRfc4122(),
]);
}
if ($total > self::CHUNK_LIMIT_HARD) {
throw new \RuntimeException(sprintf(
'Chunk limit reached: %d existing + %d incoming = %d (hard cap: %d). Reduce knowledge base or move to a scaled vector setup (IVF/HNSW/GPU/sharding).',
$existing,
$incoming,
$total,
self::CHUNK_LIMIT_HARD
));
}
$this->chunkManager->appendChunks($records);
$this->vectorBuilder->rebuildFromNdjson();
$chunkCount = $this->chunkManager->countAllChunks();
$this->metaManager->updateRuntimeStats($chunkCount);
}
public function globalReindex(): void
{
$allRecords = $this->knowledgeIngestService
->buildAllActiveChunkRecords();
$allRecords = $this->knowledgeIngestService->buildAllActiveChunkRecords();
// Optional (wenn du willst): Hier könnte man ebenfalls ein Hard-Cap enforce'n (rewriteAll mit Limit).
$this->chunkManager->rewriteAll($allRecords);
$this->vectorBuilder->rebuildFromNdjson();

View File

@@ -22,6 +22,46 @@ final class ChunkManager
return $this->indexPath;
}
// ============================================================
// COUNT (für Guardrails / Limits)
// ============================================================
/**
* Zählt Datensätze (NDJSON-Zeilen) im index.ndjson streaming-basiert.
* Leere / kaputte Zeilen werden ignoriert.
*/
public function countAllChunks(): int
{
if (!is_file($this->indexPath)) {
return 0;
}
$handle = fopen($this->indexPath, 'rb');
if (!$handle) {
throw new \RuntimeException('Unable to open index.ndjson for counting');
}
$count = 0;
try {
while (($line = fgets($handle)) !== false) {
$line = trim($line);
if ($line === '') {
continue;
}
// NDJSON besteht aus JSON-Objekten; wir zählen nur valide Arrays.
$data = json_decode($line, true);
if (is_array($data)) {
$count++;
}
}
} finally {
fclose($handle);
}
return $count;
}
// ============================================================
// APPEND
// ============================================================

View File

@@ -5,7 +5,10 @@
{% block body %}
<h1 class="h4 mb-3">Dashboard</h1>
<div class="card bg-black text-info border-secondary">
{# ============================= #}
{# USER + RESET CARD #}
{# ============================= #}
<div class="card bg-black text-info border-secondary mb-4">
<div class="card-body">
<div class="mb-2">
<strong>User:</strong> {{ app.user.userIdentifier }}
@@ -19,21 +22,66 @@
<div class="text-light">
<p class="fw-bold">Reset des Systems</p>
<p>Unwiderruflicher Reset des gesamten Systems</p>
{% for label, messages in app.flashes %}
{% for message in messages %}
<div class="alert alert-{{ label }} alert-{{ label}} fade show" role="alert">
<div class="alert alert-{{ label }} fade show" role="alert">
{{ message }}
</div>
{% endfor %}
{% endfor %}
<form method="post" action="/admin/documents/reset" onsubmit="return resetSystem()">
<button type="submit" class="btn btn-outline-danger">Reset System</button>
<form method="post" action="/admin/documents/reset" onsubmit="return resetSystem()">
<button type="submit" class="btn btn-outline-danger">
Reset System
</button>
</form>
</div>
</div>
</div>
{# ============================= #}
{# KNOWLEDGE INDEX STATUS CARD #}
{# ============================= #}
{% set percent = chunkLimit > 0 ? (chunkCount / chunkLimit * 100)|round(1) : 0 %}
<div class="card bg-black text-light border-secondary">
<div class="card-body">
<h5 class="text-info mb-3">Knowledge Index</h5>
<div class="mb-2">
<strong>Chunks:</strong>
{{ chunkCount|number_format(0, ',', '.') }}
/
{{ chunkLimit|number_format(0, ',', '.') }}
</div>
<div class="progress bg-dark" style="height: 18px;">
<div
class="progress-bar
{% if chunkCount > 115000 %}
bg-danger
{% elseif chunkCount > 100000 %}
bg-warning text-dark
{% else %}
bg-success
{% endif %}
"
role="progressbar"
style="width: {{ percent }}%;"
>
{{ percent }}%
</div>
</div>
<div class="mt-2 small text-light">
System ist für maximal 120.000 Chunks optimiert.
</div>
</div>
</div>
<script>
function resetSystem() {
return confirm('Sind Sie sicher, dass Sie das gesamte System zurücksetzen möchten?');

View File

@@ -33,14 +33,14 @@
</tr>
{% for p in profiles %}
<tr {% if p.active %}class="text-success"{% endif %}>
<td>{{ p.version }}</td>
<tr>
<td {% if p.active %}class="text-success"{% endif %}>{{ p.version }}</td>
<td>{{ p.chunkSize }}</td>
<td>{{ p.chunkOverlap }}</td>
<td>{{ p.embeddingModel }}</td>
<td>{{ p.embeddingDimension }}</td>
<td>{{ p.scoringVersion }}</td>
<td>{{ p.active ? 'Yes' : 'No' }}</td>
<td {% if p.active %}class="text-success"{% endif %}>{{ p.active ? 'Yes' : 'No' }}</td>
<td>{{ p.reindexRequired ? 'Yes' : 'No' }}</td>
<td>
{% if not p.active %}