new ingest und profile settings

This commit is contained in:
team 1
2026-02-16 14:38:02 +01:00
parent ece93e4cb4
commit 8666b05570
15 changed files with 655 additions and 199 deletions

View File

@@ -21,11 +21,12 @@ parameters:
mto.knowledge.vector_index: '%mto.knowledge.root%/vector.index'
mto.knowledge.vector_index_meta: '%mto.knowledge.root%/vector.index.meta.json'
mto.knowledge.upload: '%mto.knowledge.root%/uploads'
# Backward compatibility alias
mto.vector.data.upload.path: '%mto.knowledge.upload%'
# ------------------------------------------------------------
# Index Configuration (Guardrails)
# Index Configuration (Fallback Guardrails)
# ------------------------------------------------------------
mto.index.chunk_size: 800
@@ -39,12 +40,11 @@ parameters:
# ------------------------------------------------------------
mto.vector.python_bin: '/var/www/html/.venv/bin/python3'
mto.vector.ingest_script: '%mto.root%/src/Vector/vector_ingest.py'
mto.vector.search_script: '%mto.root%/src/Vector/vector_search.py'
mto.vector.timeout: 600
# ------------------------------------------------------------
# Services
# ------------------------------------------------------------
@@ -117,12 +117,30 @@ services:
alias: App\Knowledge\Retrieval\CachedRetriever
# ------------------------------------------------------------
# Vector Search (noch unverändert Umbau kommt in Schritt 2)
# Index Configuration Provider (DB + Fallback)
# ------------------------------------------------------------
App\Index\IndexConfigurationProvider:
arguments:
$repository: '@App\Repository\IngestProfileRepository'
$fallbackChunkSize: '%mto.index.chunk_size%'
$fallbackChunkOverlap: '%mto.index.chunk_overlap%'
$fallbackEmbeddingModel: '%mto.index.embedding_model%'
$fallbackEmbeddingDimension: '%mto.index.embedding_dimension%'
$fallbackScoringVersion: '%mto.index.scoring_version%'
# ------------------------------------------------------------
# Index Meta Manager (uses Provider)
# ------------------------------------------------------------
App\Index\IndexMetaManager:
arguments:
$metaPath: '%mto.knowledge.index_meta%'
$config: '@App\Index\IndexConfiguration'
$provider: '@App\Index\IndexConfigurationProvider'
# ------------------------------------------------------------
# Vector Layer
# ------------------------------------------------------------
App\Vector\VectorSearchClient:
arguments:
@@ -141,22 +159,12 @@ services:
$indexMetaPath: '%mto.knowledge.index_meta%'
$vectorIndexPath: '%mto.knowledge.vector_index%'
$timeoutSeconds: '%mto.vector.timeout%'
$indexConfiguration: '@App\Index\IndexConfiguration'
$configurationProvider: '@App\Index\IndexConfigurationProvider'
# ------------------------------------------------------------
# Index Configuration
# Admin Utilities
# ------------------------------------------------------------
App\Index\IndexConfiguration:
arguments:
$chunkSize: '%mto.index.chunk_size%'
$chunkOverlap: '%mto.index.chunk_overlap%'
$embeddingModel: '%mto.index.embedding_model%'
$embeddingDimension: '%mto.index.embedding_dimension%'
$scoringVersion: '%mto.index.scoring_version%'
$indexFormat: 'ndjson'
$vectorBackend: 'faiss'
App\Service\Admin\IndexNdjsonInspector:
arguments:
$ndJsonPath: '%mto.knowledge.ndjson%'

View File

@@ -0,0 +1,38 @@
<?php
declare(strict_types=1);
namespace DoctrineMigrations;
use Doctrine\DBAL\Schema\Schema;
use Doctrine\Migrations\AbstractMigration;
final class Version20260216000100 extends AbstractMigration
{
public function getDescription(): string
{
return 'Create ingest_profile table';
}
public function up(Schema $schema): void
{
$table = $schema->createTable('ingest_profile');
$table->addColumn('id', 'binary', ['length' => 16]);
$table->addColumn('version', 'integer');
$table->addColumn('chunk_size', 'integer');
$table->addColumn('chunk_overlap', 'integer');
$table->addColumn('embedding_model', 'string', ['length' => 255]);
$table->addColumn('embedding_dimension', 'integer');
$table->addColumn('scoring_version', 'integer');
$table->addColumn('active', 'boolean');
$table->addColumn('reindex_required', 'boolean');
$table->addColumn('created_at', 'datetime_immutable');
$table->setPrimaryKey(['id']);
}
public function down(Schema $schema): void
{
$schema->dropTable('ingest_profile');
}
}

View File

@@ -0,0 +1,102 @@
<?php
declare(strict_types=1);
namespace App\Controller\Admin;
use App\Entity\IngestProfile;
use App\Index\IndexConfigurationProvider;
use App\Index\IndexMetaManager;
use App\Index\IndexStructureComparator;
use App\Repository\IngestProfileRepository;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Routing\Attribute\Route;
#[Route('/admin/ingest-profiles')]
class IngestProfileController extends AbstractController
{
#[Route('/', name: 'admin_ingest_profile_list')]
public function list(
IngestProfileRepository $repo,
IndexMetaManager $metaManager,
IndexConfigurationProvider $provider,
IndexStructureComparator $comparator
): Response {
$profiles = $repo->findBy([], ['version' => 'DESC']);
$activeProfile = $repo->findActive();
$meta = $metaManager->readMeta();
$currentStructure = $provider->getConfiguration()->toStructureArray();
$diff = $comparator->compare($meta, $currentStructure);
$structureMismatch = false;
foreach ($diff as $row) {
if (!$row['equal']) {
$structureMismatch = true;
break;
}
}
return $this->render('admin/ingest_profile/list.html.twig', [
'profiles' => $profiles,
'activeProfile' => $activeProfile,
'indexMeta' => $meta,
'diff' => $diff,
'structureMismatch' => $structureMismatch,
]);
}
#[Route('/create', name: 'admin_ingest_profile_create', methods: ['GET', 'POST'])]
public function create(
Request $request,
IngestProfileRepository $repo,
EntityManagerInterface $em
): Response {
if ($request->isMethod('POST')) {
$latest = $repo->findLatestVersion();
$nextVersion = $latest ? $latest->getVersion() + 1 : 1;
$profile = new IngestProfile(
$nextVersion,
(int)$request->request->get('chunk_size'),
(int)$request->request->get('chunk_overlap'),
(string)$request->request->get('embedding_model'),
(int)$request->request->get('embedding_dimension'),
(int)$request->request->get('scoring_version')
);
$em->persist($profile);
$em->flush();
return $this->redirectToRoute('admin_ingest_profile_list');
}
return $this->render('admin/ingest_profile/create.html.twig');
}
#[Route('/activate/{id}', name: 'admin_ingest_profile_activate')]
public function activate(
IngestProfile $profile,
IngestProfileRepository $repo,
EntityManagerInterface $em
): Response {
$active = $repo->findActive();
if ($active) {
$active->deactivate();
}
$profile->activate();
$em->flush();
return $this->redirectToRoute('admin_ingest_profile_list');
}
}

View File

@@ -0,0 +1,90 @@
<?php
declare(strict_types=1);
namespace App\Entity;
use App\Repository\IngestProfileRepository;
use Doctrine\ORM\Mapping as ORM;
use Symfony\Component\Uid\Uuid;
#[ORM\Entity(repositoryClass: IngestProfileRepository::class)]
#[ORM\Table(name: 'ingest_profile')]
class IngestProfile
{
#[ORM\Id]
#[ORM\Column(type: 'uuid', unique: true)]
private Uuid $id;
#[ORM\Column(type: 'integer')]
private int $version;
#[ORM\Column(type: 'integer')]
private int $chunkSize;
#[ORM\Column(type: 'integer')]
private int $chunkOverlap;
#[ORM\Column(type: 'string', length: 255)]
private string $embeddingModel;
#[ORM\Column(type: 'integer')]
private int $embeddingDimension;
#[ORM\Column(type: 'integer')]
private int $scoringVersion;
#[ORM\Column(type: 'boolean')]
private bool $active = false;
#[ORM\Column(type: 'boolean')]
private bool $reindexRequired = true;
#[ORM\Column(type: 'datetime_immutable')]
private \DateTimeImmutable $createdAt;
public function __construct(
int $version,
int $chunkSize,
int $chunkOverlap,
string $embeddingModel,
int $embeddingDimension,
int $scoringVersion
) {
$this->id = Uuid::v4();
$this->version = $version;
$this->chunkSize = $chunkSize;
$this->chunkOverlap = $chunkOverlap;
$this->embeddingModel = $embeddingModel;
$this->embeddingDimension = $embeddingDimension;
$this->scoringVersion = $scoringVersion;
$this->createdAt = new \DateTimeImmutable();
}
public function getId(): Uuid { return $this->id; }
public function getVersion(): int { return $this->version; }
public function getChunkSize(): int { return $this->chunkSize; }
public function getChunkOverlap(): int { return $this->chunkOverlap; }
public function getEmbeddingModel(): string { return $this->embeddingModel; }
public function getEmbeddingDimension(): int { return $this->embeddingDimension; }
public function getScoringVersion(): int { return $this->scoringVersion; }
public function isActive(): bool { return $this->active; }
public function isReindexRequired(): bool { return $this->reindexRequired; }
public function getCreatedAt(): \DateTimeImmutable { return $this->createdAt; }
public function activate(): void
{
$this->active = true;
$this->reindexRequired = true;
}
public function deactivate(): void
{
$this->active = false;
}
public function markReindexDone(): void
{
$this->reindexRequired = false;
}
}

View File

@@ -0,0 +1,63 @@
<?php
declare(strict_types=1);
namespace App\Index;
use App\Repository\IngestProfileRepository;
final class IndexConfigurationProvider
{
private IngestProfileRepository $repository;
private int $fallbackChunkSize;
private int $fallbackChunkOverlap;
private string $fallbackEmbeddingModel;
private int $fallbackEmbeddingDimension;
private int $fallbackScoringVersion;
public function __construct(
IngestProfileRepository $repository,
int $fallbackChunkSize,
int $fallbackChunkOverlap,
string $fallbackEmbeddingModel,
int $fallbackEmbeddingDimension,
int $fallbackScoringVersion
) {
$this->repository = $repository;
$this->fallbackChunkSize = $fallbackChunkSize;
$this->fallbackChunkOverlap = $fallbackChunkOverlap;
$this->fallbackEmbeddingModel = $fallbackEmbeddingModel;
$this->fallbackEmbeddingDimension = $fallbackEmbeddingDimension;
$this->fallbackScoringVersion = $fallbackScoringVersion;
}
public function getConfiguration(): IndexConfiguration
{
$active = $this->repository->findActive();
if ($active === null) {
// Fallback auf YAML
return new IndexConfiguration(
$this->fallbackChunkSize,
$this->fallbackChunkOverlap,
$this->fallbackEmbeddingModel,
$this->fallbackEmbeddingDimension,
$this->fallbackScoringVersion,
'ndjson',
'faiss'
);
}
return new IndexConfiguration(
$active->getChunkSize(),
$active->getChunkOverlap(),
$active->getEmbeddingModel(),
$active->getEmbeddingDimension(),
$active->getScoringVersion(),
'ndjson',
'faiss'
);
}
}

View File

@@ -7,157 +7,90 @@ namespace App\Index;
final class IndexMetaManager
{
private string $metaPath;
private IndexConfiguration $config;
private IndexConfigurationProvider $provider;
public function __construct(
string $metaPath,
IndexConfiguration $config
IndexConfigurationProvider $provider
) {
$this->metaPath = $metaPath;
$this->config = $config;
$this->provider = $provider;
}
public function getMetaPath(): string
// -----------------------------------------------------
// Public API
// -----------------------------------------------------
public function ensureExists(): void
{
return $this->metaPath;
if (!is_file($this->metaPath)) {
$this->writeMeta(1);
}
}
/**
* @return array<string,mixed>|null
*/
public function readMeta(): ?array
{
if (!is_file($this->metaPath)) {
return null;
}
$raw = file_get_contents($this->metaPath);
if ($raw === false) {
throw new \RuntimeException('Unable to read index_meta.json');
return json_decode(
(string) file_get_contents($this->metaPath),
true
);
}
$data = json_decode($raw, true);
if (!is_array($data)) {
throw new \RuntimeException('index_meta.json is invalid JSON');
}
return $data;
}
/**
* Guardrail:
* - Wenn Meta fehlt → initialisieren
* - Wenn Struktur driftet → Exception
*/
public function validateAgainstCurrent(): void
{
$meta = $this->readMeta();
$current = $this->provider
->getConfiguration()
->toStructureArray();
if ($meta === null) {
$meta = $this->createInitialMeta();
return;
}
$expected = $this->config->toStructureArray();
$diff = $this->diffStructure($meta, $expected);
if ($diff !== []) {
throw new IndexStructureChangedException(
'Index structure changed. Global Reindex required.',
$diff
foreach ($current as $key => $value) {
if (($meta[$key] ?? null) !== $value) {
throw new \RuntimeException(
'Index structure changed. Global Reindex required.'
);
}
}
}
/**
* Wird beim Global Reindex aufgerufen
*/
public function writeMetaForGlobalReindex(): array
public function writeMetaForGlobalReindex(): void
{
$current = $this->readMeta();
$nextVersion = ($current['index_version'] ?? 0) + 1;
$nextVersion = 1;
if (is_array($current) && isset($current['index_version']) && is_int($current['index_version'])) {
$nextVersion = $current['index_version'] + 1;
$this->writeMeta($nextVersion);
}
$meta = $this->buildMetaPayload($nextVersion);
$this->atomicWriteJson($meta);
return $meta;
}
public function getConfig(): IndexConfiguration
public function writeMeta(int $indexVersion): void
{
return $this->config;
}
$config = $this->provider->getConfiguration();
// ---------------------------------------------------------
// Internals
// ---------------------------------------------------------
private function createInitialMeta(): array
{
$meta = $this->buildMetaPayload(1);
$this->atomicWriteJson($meta);
return $meta;
}
private function buildMetaPayload(int $indexVersion): array
{
$structure = $this->config->toStructureArray();
return [
$payload = array_merge(
[
'index_version' => $indexVersion,
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
'embedding_model' => $structure['embedding_model'],
'embedding_dimension' => $structure['embedding_dimension'],
'chunk_size' => $structure['chunk_size'],
'chunk_overlap' => $structure['chunk_overlap'],
'scoring_version' => $structure['scoring_version'],
'index_format' => $structure['index_format'],
'vector_backend' => $structure['vector_backend'],
];
}
],
$config->toStructureArray()
);
private function diffStructure(array $meta, array $expected): array
{
$diff = [];
foreach ($expected as $key => $value) {
$actual = $meta[$key] ?? null;
if ($actual !== $value) {
$diff[$key] = [
'expected' => $value,
'actual' => $actual,
];
}
}
return $diff;
}
private function atomicWriteJson(array $payload): void
{
$dir = dirname($this->metaPath);
if (!is_dir($dir) && !mkdir($dir, 0777, true) && !is_dir($dir)) {
throw new \RuntimeException('Unable to create directory: ' . $dir);
if (!is_dir($dir)) {
mkdir($dir, 0777, true);
}
$tmp = $this->metaPath . '.tmp';
$json = json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
if ($json === false) {
throw new \RuntimeException('Unable to encode index_meta.json');
}
if (file_put_contents($tmp, $json . PHP_EOL) === false) {
throw new \RuntimeException('Unable to write temp meta file');
}
if (!rename($tmp, $this->metaPath)) {
@unlink($tmp);
throw new \RuntimeException('Unable to switch meta file atomically');
}
file_put_contents(
$this->metaPath,
json_encode(
$payload,
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES
)
);
}
}

View File

@@ -0,0 +1,37 @@
<?php
declare(strict_types=1);
namespace App\Index;
final class IndexStructureComparator
{
public function compare(?array $meta, array $profileStructure): array
{
$result = [];
$keys = array_unique(
array_merge(
array_keys($profileStructure),
$meta ? array_keys($meta) : []
)
);
foreach ($keys as $key) {
if (in_array($key, ['index_version', 'created_at'], true)) {
continue;
}
$metaValue = $meta[$key] ?? null;
$profileValue = $profileStructure[$key] ?? null;
$result[$key] = [
'meta' => $metaValue,
'profile' => $profileValue,
'equal' => $metaValue === $profileValue,
];
}
return $result;
}
}

View File

@@ -5,22 +5,31 @@ declare(strict_types=1);
namespace App\Knowledge\Ingest;
use App\Index\IndexConfigurationProvider;
final class SimpleChunker
{
private IndexConfigurationProvider $configurationProvider;
public function __construct(
private int $maxWords = 180,
private int $overlapWords = 30
) {}
IndexConfigurationProvider $configurationProvider
) {
$this->configurationProvider = $configurationProvider;
}
/** @return string[] */
public function chunk(string $text): array
{
$config = $this->configurationProvider->getConfiguration();
$maxWords = $config->getChunkSize();
$overlapWords = $config->getChunkOverlap();
$text = $this->normalize($text);
if ($text === '') {
return [];
}
// Split into tokens: words + whitespace preserved
$tokens = preg_split(
'/(\s+)/u',
$text,
@@ -32,7 +41,6 @@ final class SimpleChunker
return [];
}
// Build word index → token index mapping
$wordTokenIndexes = [];
foreach ($tokens as $i => $token) {
if (!preg_match('/^\s+$/u', $token)) {
@@ -49,12 +57,11 @@ final class SimpleChunker
$wordPos = 0;
while ($wordPos < $totalWords) {
$wordEnd = min($wordPos + $this->maxWords, $totalWords);
$wordEnd = min($wordPos + $maxWords, $totalWords);
$tokenStart = $wordTokenIndexes[$wordPos];
$tokenEnd = $wordTokenIndexes[$wordEnd - 1] + 1;
// Intelligent cut (sentence / paragraph aware)
$tokenEnd = $this->adjustCutToBoundary($tokens, $tokenStart, $tokenEnd);
$chunk = trim(implode('', array_slice(
@@ -71,7 +78,7 @@ final class SimpleChunker
break;
}
$wordPos = max(0, $wordEnd - $this->overlapWords);
$wordPos = max(0, $wordEnd - $overlapWords);
}
return $this->dedupe($chunks);
@@ -86,30 +93,19 @@ final class SimpleChunker
return trim((string) $text);
}
/**
* Move cut backwards to a natural boundary if possible.
* Rules:
* - Never cut inside markdown list items
* - Sentence end only if followed by a line break
* - Paragraph breaks always allowed
*/
private function adjustCutToBoundary(array $tokens, int $start, int $end): int
{
// Detect markdown list context (e.g. "- Foo: Bar")
$startToken = $tokens[$start] ?? '';
if (preg_match('/^- /u', ltrim($startToken))) {
// Keep list blocks intact
return $end;
}
for ($i = $end - 1; $i > $start; $i--) {
// Paragraph boundary
if ($tokens[$i] === "\n\n") {
return $i + 1;
}
// Sentence boundary only if followed by newline
if (
preg_match('/[.!?]\s*$/u', $tokens[$i]) &&
isset($tokens[$i + 1]) &&

View File

@@ -0,0 +1,31 @@
<?php
declare(strict_types=1);
namespace App\Repository;
use App\Entity\IngestProfile;
use Doctrine\Bundle\DoctrineBundle\Repository\ServiceEntityRepository;
use Doctrine\Persistence\ManagerRegistry;
class IngestProfileRepository extends ServiceEntityRepository
{
public function __construct(ManagerRegistry $registry)
{
parent::__construct($registry, IngestProfile::class);
}
public function findLatestVersion(): ?IngestProfile
{
return $this->createQueryBuilder('p')
->orderBy('p.version', 'DESC')
->setMaxResults(1)
->getQuery()
->getOneOrNullResult();
}
public function findActive(): ?IngestProfile
{
return $this->findOneBy(['active' => true]);
}
}

View File

@@ -4,7 +4,7 @@ declare(strict_types=1);
namespace App\Vector;
use App\Index\IndexConfiguration;
use App\Index\IndexConfigurationProvider;
use Symfony\Component\Process\Exception\ProcessFailedException;
use Symfony\Component\Process\Process;
@@ -18,7 +18,7 @@ final class VectorIndexBuilder
private string $vectorMetaPath;
private int $timeoutSeconds;
private IndexConfiguration $indexConfiguration;
private IndexConfigurationProvider $configurationProvider;
public function __construct(
string $pythonBin,
@@ -27,7 +27,7 @@ final class VectorIndexBuilder
string $indexMetaPath,
string $vectorIndexPath,
int $timeoutSeconds,
IndexConfiguration $indexConfiguration
IndexConfigurationProvider $configurationProvider
) {
$this->pythonBin = $pythonBin;
$this->scriptPath = $scriptPath;
@@ -36,39 +36,29 @@ final class VectorIndexBuilder
$this->vectorIndexPath = $vectorIndexPath;
$this->vectorMetaPath = $vectorIndexPath . '.meta.json';
$this->timeoutSeconds = $timeoutSeconds;
$this->indexConfiguration = $indexConfiguration;
$this->configurationProvider = $configurationProvider;
}
/**
* Rebuild FAISS Index deterministisch aus index.ndjson.
*/
public function rebuildFromNdjson(?string $logPath = null): void
{
if (!is_file($this->scriptPath)) {
throw new \RuntimeException('vector_ingest.py not found at: ' . $this->scriptPath);
}
if (!is_file($this->indexNdjsonPath)) {
throw new \RuntimeException('index.ndjson not found at: ' . $this->indexNdjsonPath);
}
$this->assertPreconditions();
if (!is_file($this->indexMetaPath)) {
$this->initializeIndexMeta();
}
$indexMeta = json_decode((string) file_get_contents($this->indexMetaPath), true);
$indexMeta = $this->readIndexMeta();
if (!is_array($indexMeta) || empty($indexMeta['embedding_model'])) {
throw new \RuntimeException('Invalid index_meta.json');
}
$embeddingModel = (string) $indexMeta['embedding_model'];
$embeddingModel = $indexMeta['embedding_model'];
$tmpVectorIndexPath = $this->vectorIndexPath . '.tmp';
// Wichtig: Python erzeugt meta basierend auf endgültigem Namen
$finalMetaPath = $this->vectorMetaPath;
$tmpMetaPath = dirname($this->vectorIndexPath) . '/' . basename($this->vectorIndexPath, '.index') . '.index.meta.json';
// Clean leftovers
@unlink($tmpVectorIndexPath);
@unlink($finalMetaPath);
@unlink($this->vectorMetaPath);
$cmd = [
$this->pythonBin,
@@ -80,23 +70,43 @@ final class VectorIndexBuilder
$process = new Process($cmd);
$process->setTimeout($this->timeoutSeconds);
$process->mustRun();
if (!is_file($tmpVectorIndexPath) || filesize($tmpVectorIndexPath) === 0) {
throw new \RuntimeException('Vector index tmp missing or empty');
$this->runProcess($process, $logPath);
$this->validatePythonOutputs($tmpVectorIndexPath);
$this->atomicSwitch($tmpVectorIndexPath);
}
// Python erzeugt vector.index.meta.json (nicht tmp.meta!)
if (!is_file($this->vectorMetaPath) || filesize($this->vectorMetaPath) === 0) {
throw new \RuntimeException('Vector meta missing or empty');
// -----------------------------------------------------
// Internals
// -----------------------------------------------------
private function assertPreconditions(): void
{
if (!is_file($this->scriptPath)) {
throw new \RuntimeException('vector_ingest.py not found at: ' . $this->scriptPath);
}
// Atomarer Switch für Index
if (!rename($tmpVectorIndexPath, $this->vectorIndexPath)) {
throw new \RuntimeException('Atomic switch failed for vector index');
if (!is_file($this->indexNdjsonPath)) {
throw new \RuntimeException('index.ndjson not found at: ' . $this->indexNdjsonPath);
}
}
private function readIndexMeta(): array
{
$meta = json_decode(
(string) file_get_contents($this->indexMetaPath),
true
);
if (!is_array($meta) || empty($meta['embedding_model'])) {
throw new \RuntimeException('Invalid index_meta.json');
}
return $meta;
}
private function initializeIndexMeta(): void
{
$dir = dirname($this->indexMetaPath);
@@ -105,14 +115,16 @@ final class VectorIndexBuilder
throw new \RuntimeException('Cannot create knowledge directory');
}
$config = $this->configurationProvider->getConfiguration();
$data = [
'index_version' => 1,
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
'embedding_model' => $this->indexConfiguration->getEmbeddingModel(),
'embedding_dimension' => $this->indexConfiguration->getEmbeddingDimension(),
'chunk_size' => $this->indexConfiguration->getChunkSize(),
'chunk_overlap' => $this->indexConfiguration->getChunkOverlap(),
'scoring_version' => $this->indexConfiguration->getScoringVersion(),
'embedding_model' => $config->getEmbeddingModel(),
'embedding_dimension' => $config->getEmbeddingDimension(),
'chunk_size' => $config->getChunkSize(),
'chunk_overlap' => $config->getChunkOverlap(),
'scoring_version' => $config->getScoringVersion(),
'index_format' => 'ndjson',
'vector_backend' => 'faiss',
];
@@ -123,6 +135,24 @@ final class VectorIndexBuilder
);
}
private function validatePythonOutputs(string $tmpVectorIndexPath): void
{
if (!is_file($tmpVectorIndexPath) || filesize($tmpVectorIndexPath) === 0) {
throw new \RuntimeException('Vector index tmp missing or empty');
}
if (!is_file($this->vectorMetaPath) || filesize($this->vectorMetaPath) === 0) {
throw new \RuntimeException('Vector meta missing or empty');
}
}
private function atomicSwitch(string $tmpVectorIndexPath): void
{
if (!rename($tmpVectorIndexPath, $this->vectorIndexPath)) {
throw new \RuntimeException('Atomic switch failed for vector index');
}
}
private function runProcess(Process $process, ?string $logPath): void
{
if ($logPath !== null) {

View File

@@ -28,22 +28,35 @@
<li class="nav-item">
<a class="nav-link text-light" href="{{ path('admin_dashboard') }}">Dashboard</a>
</li>
</ul>
<hr>
<h3>Dokumente und Wissen</h3>
<ul class="nav flex-column">
<li class="nav-item">
<a class="nav-link text-light" href="{{ path('admin_documents') }}">Dokumente</a>
</li>
<li class="nav-item">
<a class="nav-link text-light" href="{{ path('admin_jobs') }}">
Indexierung Jobs (Ingest)
Indexierung (Ingest Jobs)
</a>
</li>
<li class="nav-item">
<a class="nav-link text-light" href="{{ path('admin_system_agent') }}">
Wissen (Chunk-Index)
Wissensdaten (Chunk-Index)
</a>
</li>
</ul>
<hr>
<h3>System-Profile</h3>
<ul class="nav flex-column">
<li class="nav-item">
<a class="nav-link text-light" href="{{ path('admin_system_prompt') }}">
System-Prompt-Profil
</a>
</li>
<li class="nav-item">
<a class="nav-link text-light" href="{{ path('admin_system_prompt') }}">
System Prompt Settings
<a class="nav-link text-light" href="{{ path('admin_ingest_profile_list') }}">
Indexierungs-Profil (Ingest Profiles)
</a>
</li>
</ul>

View File

@@ -0,0 +1,26 @@
{% extends 'admin/base.html.twig' %}
{% block title %}System Prompt{% endblock %}
{% block body %}
<h1>Create Ingest Profile</h1>
<form method="post">
<label>Chunk Size:</label>
<input type="number" name="chunk_size" required><br>
<label>Chunk Overlap:</label>
<input type="number" name="chunk_overlap" required><br>
<label>Embedding Model:</label>
<input type="text" name="embedding_model" required><br>
<label>Embedding Dimension:</label>
<input type="number" name="embedding_dimension" required><br>
<label>Scoring Version:</label>
<input type="number" name="scoring_version" required><br>
<button type="submit">Create</button>
</form>
{% endblock %}

View File

@@ -0,0 +1,89 @@
{% extends 'admin/base.html.twig' %}
{% block title %}Ingest Profiles{% endblock %}
{% block body %}
<h1>Ingest Profiles</h1>
{% if structureMismatch %}
<div class="alert alert-danger">
⚠ Strukturabweichung festgestellt Globale Neuindizierung erforderlich | <a href="{{ path('admin_jobs') }}">Global Reindex aufrufen</a>
</div>
{% else %}
<div class="alert alert-success">
✅ Die Indexstruktur entspricht dem aktiven Profil
</div>
{% endif %}
<p><a class="btn btn-outline-light" href="{{ path('admin_ingest_profile_create') }}">+ Neues Profil anlegen</a></p>
<h2>Profiles</h2>
<table border="1" cellpadding="6" class="table table-sm table-dark align-middle">
<tr>
<th>Version</th>
<th>Chunk Size</th>
<th>Overlap</th>
<th>Model</th>
<th>Dimension</th>
<th>Scoring</th>
<th>Active</th>
<th>Reindex Required</th>
<th>Actions</th>
</tr>
{% for p in profiles %}
<tr>
<td>{{ p.version }}</td>
<td>{{ p.chunkSize }}</td>
<td>{{ p.chunkOverlap }}</td>
<td>{{ p.embeddingModel }}</td>
<td>{{ p.embeddingDimension }}</td>
<td>{{ p.scoringVersion }}</td>
<td>{{ p.active ? 'Yes' : 'No' }}</td>
<td>{{ p.reindexRequired ? 'Yes' : 'No' }}</td>
<td>
{% if not p.active %}
<a class="btn btn-outline-info btn-sm" href="{{ path('admin_ingest_profile_activate', {id: p.id}) }}">
Aktivieren
</a>
{% endif %}
</td>
</tr>
{% endfor %}
</table>
<hr>
<h2>Index-Struktur-Profil Diff</h2>
{% if indexMeta %}
<p><strong>Index Version:</strong> {{ indexMeta.index_version }}</p>
{% else %}
<p>No index_meta.json found.</p>
{% endif %}
<table border="1" cellpadding="6" class="table table-sm table-dark align-middle">
<tr>
<th>Parameter</th>
<th>Index Meta</th>
<th>Active Profile</th>
<th>Status</th>
</tr>
{% for key, row in diff %}
<tr>
<td>{{ key }}</td>
<td>{{ row.meta }}</td>
<td>{{ row.profile }}</td>
<td>
{% if row.equal %}
<span style="color:green;">✓</span>
{% else %}
<span style="color:red;">✗</span>
{% endif %}
</td>
</tr>
{% endfor %}
</table>
{% endblock %}

View File

@@ -4,7 +4,7 @@
{% block body %}
<h1 class="h4 mb-4">Ingest Jobs</h1>
<h1 class="h4 mb-4">Indexierung (Ingest Jobs-Liste)</h1>
<form method="post"
action="{{ path('admin_global_reindex') }}"

View File

@@ -1,6 +1,6 @@
{% extends 'admin/base.html.twig' %}
{% block title %}Agent System Overview{% endblock %}
{% block title %}Wissensdaten (Chunk-Index){% endblock %}
{% block body %}
@@ -9,7 +9,7 @@
← Zurück
</a>
<h1 class="h4 mb-4">Agent System Overview</h1>
<h1 class="h4 mb-4">Wissensdaten (Chunk-Index)</h1>
{# ============================= #}
{# Index Meta Section #}