new ingest und profile settings
This commit is contained in:
@@ -21,11 +21,12 @@ parameters:
|
|||||||
mto.knowledge.vector_index: '%mto.knowledge.root%/vector.index'
|
mto.knowledge.vector_index: '%mto.knowledge.root%/vector.index'
|
||||||
mto.knowledge.vector_index_meta: '%mto.knowledge.root%/vector.index.meta.json'
|
mto.knowledge.vector_index_meta: '%mto.knowledge.root%/vector.index.meta.json'
|
||||||
mto.knowledge.upload: '%mto.knowledge.root%/uploads'
|
mto.knowledge.upload: '%mto.knowledge.root%/uploads'
|
||||||
|
|
||||||
# Backward compatibility alias
|
# Backward compatibility alias
|
||||||
mto.vector.data.upload.path: '%mto.knowledge.upload%'
|
mto.vector.data.upload.path: '%mto.knowledge.upload%'
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
# Index Configuration (Guardrails)
|
# Index Configuration (Fallback Guardrails)
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
|
|
||||||
mto.index.chunk_size: 800
|
mto.index.chunk_size: 800
|
||||||
@@ -39,12 +40,11 @@ parameters:
|
|||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
|
|
||||||
mto.vector.python_bin: '/var/www/html/.venv/bin/python3'
|
mto.vector.python_bin: '/var/www/html/.venv/bin/python3'
|
||||||
|
|
||||||
mto.vector.ingest_script: '%mto.root%/src/Vector/vector_ingest.py'
|
mto.vector.ingest_script: '%mto.root%/src/Vector/vector_ingest.py'
|
||||||
mto.vector.search_script: '%mto.root%/src/Vector/vector_search.py'
|
mto.vector.search_script: '%mto.root%/src/Vector/vector_search.py'
|
||||||
|
|
||||||
mto.vector.timeout: 600
|
mto.vector.timeout: 600
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
# Services
|
# Services
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
@@ -117,12 +117,30 @@ services:
|
|||||||
alias: App\Knowledge\Retrieval\CachedRetriever
|
alias: App\Knowledge\Retrieval\CachedRetriever
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
# Vector Search (noch unverändert – Umbau kommt in Schritt 2)
|
# Index Configuration Provider (DB + Fallback)
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
|
|
||||||
|
App\Index\IndexConfigurationProvider:
|
||||||
|
arguments:
|
||||||
|
$repository: '@App\Repository\IngestProfileRepository'
|
||||||
|
$fallbackChunkSize: '%mto.index.chunk_size%'
|
||||||
|
$fallbackChunkOverlap: '%mto.index.chunk_overlap%'
|
||||||
|
$fallbackEmbeddingModel: '%mto.index.embedding_model%'
|
||||||
|
$fallbackEmbeddingDimension: '%mto.index.embedding_dimension%'
|
||||||
|
$fallbackScoringVersion: '%mto.index.scoring_version%'
|
||||||
|
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
# Index Meta Manager (uses Provider)
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
|
||||||
App\Index\IndexMetaManager:
|
App\Index\IndexMetaManager:
|
||||||
arguments:
|
arguments:
|
||||||
$metaPath: '%mto.knowledge.index_meta%'
|
$metaPath: '%mto.knowledge.index_meta%'
|
||||||
$config: '@App\Index\IndexConfiguration'
|
$provider: '@App\Index\IndexConfigurationProvider'
|
||||||
|
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
# Vector Layer
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
|
||||||
App\Vector\VectorSearchClient:
|
App\Vector\VectorSearchClient:
|
||||||
arguments:
|
arguments:
|
||||||
@@ -141,22 +159,12 @@ services:
|
|||||||
$indexMetaPath: '%mto.knowledge.index_meta%'
|
$indexMetaPath: '%mto.knowledge.index_meta%'
|
||||||
$vectorIndexPath: '%mto.knowledge.vector_index%'
|
$vectorIndexPath: '%mto.knowledge.vector_index%'
|
||||||
$timeoutSeconds: '%mto.vector.timeout%'
|
$timeoutSeconds: '%mto.vector.timeout%'
|
||||||
$indexConfiguration: '@App\Index\IndexConfiguration'
|
$configurationProvider: '@App\Index\IndexConfigurationProvider'
|
||||||
|
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
# Index Configuration
|
# Admin Utilities
|
||||||
# ------------------------------------------------------------
|
# ------------------------------------------------------------
|
||||||
|
|
||||||
App\Index\IndexConfiguration:
|
|
||||||
arguments:
|
|
||||||
$chunkSize: '%mto.index.chunk_size%'
|
|
||||||
$chunkOverlap: '%mto.index.chunk_overlap%'
|
|
||||||
$embeddingModel: '%mto.index.embedding_model%'
|
|
||||||
$embeddingDimension: '%mto.index.embedding_dimension%'
|
|
||||||
$scoringVersion: '%mto.index.scoring_version%'
|
|
||||||
$indexFormat: 'ndjson'
|
|
||||||
$vectorBackend: 'faiss'
|
|
||||||
|
|
||||||
App\Service\Admin\IndexNdjsonInspector:
|
App\Service\Admin\IndexNdjsonInspector:
|
||||||
arguments:
|
arguments:
|
||||||
$ndJsonPath: '%mto.knowledge.ndjson%'
|
$ndJsonPath: '%mto.knowledge.ndjson%'
|
||||||
|
|||||||
38
migrations/Version20260216000100.php
Normal file
38
migrations/Version20260216000100.php
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace DoctrineMigrations;
|
||||||
|
|
||||||
|
use Doctrine\DBAL\Schema\Schema;
|
||||||
|
use Doctrine\Migrations\AbstractMigration;
|
||||||
|
|
||||||
|
final class Version20260216000100 extends AbstractMigration
|
||||||
|
{
|
||||||
|
public function getDescription(): string
|
||||||
|
{
|
||||||
|
return 'Create ingest_profile table';
|
||||||
|
}
|
||||||
|
|
||||||
|
public function up(Schema $schema): void
|
||||||
|
{
|
||||||
|
$table = $schema->createTable('ingest_profile');
|
||||||
|
$table->addColumn('id', 'binary', ['length' => 16]);
|
||||||
|
$table->addColumn('version', 'integer');
|
||||||
|
$table->addColumn('chunk_size', 'integer');
|
||||||
|
$table->addColumn('chunk_overlap', 'integer');
|
||||||
|
$table->addColumn('embedding_model', 'string', ['length' => 255]);
|
||||||
|
$table->addColumn('embedding_dimension', 'integer');
|
||||||
|
$table->addColumn('scoring_version', 'integer');
|
||||||
|
$table->addColumn('active', 'boolean');
|
||||||
|
$table->addColumn('reindex_required', 'boolean');
|
||||||
|
$table->addColumn('created_at', 'datetime_immutable');
|
||||||
|
$table->setPrimaryKey(['id']);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function down(Schema $schema): void
|
||||||
|
{
|
||||||
|
$schema->dropTable('ingest_profile');
|
||||||
|
}
|
||||||
|
}
|
||||||
102
src/Controller/Admin/IngestProfileController.php
Normal file
102
src/Controller/Admin/IngestProfileController.php
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Controller\Admin;
|
||||||
|
|
||||||
|
use App\Entity\IngestProfile;
|
||||||
|
use App\Index\IndexConfigurationProvider;
|
||||||
|
use App\Index\IndexMetaManager;
|
||||||
|
use App\Index\IndexStructureComparator;
|
||||||
|
use App\Repository\IngestProfileRepository;
|
||||||
|
use Doctrine\ORM\EntityManagerInterface;
|
||||||
|
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
|
||||||
|
use Symfony\Component\HttpFoundation\Request;
|
||||||
|
use Symfony\Component\HttpFoundation\Response;
|
||||||
|
use Symfony\Component\Routing\Attribute\Route;
|
||||||
|
|
||||||
|
#[Route('/admin/ingest-profiles')]
|
||||||
|
class IngestProfileController extends AbstractController
|
||||||
|
{
|
||||||
|
#[Route('/', name: 'admin_ingest_profile_list')]
|
||||||
|
public function list(
|
||||||
|
IngestProfileRepository $repo,
|
||||||
|
IndexMetaManager $metaManager,
|
||||||
|
IndexConfigurationProvider $provider,
|
||||||
|
IndexStructureComparator $comparator
|
||||||
|
): Response {
|
||||||
|
|
||||||
|
$profiles = $repo->findBy([], ['version' => 'DESC']);
|
||||||
|
$activeProfile = $repo->findActive();
|
||||||
|
|
||||||
|
$meta = $metaManager->readMeta();
|
||||||
|
$currentStructure = $provider->getConfiguration()->toStructureArray();
|
||||||
|
|
||||||
|
$diff = $comparator->compare($meta, $currentStructure);
|
||||||
|
|
||||||
|
$structureMismatch = false;
|
||||||
|
foreach ($diff as $row) {
|
||||||
|
if (!$row['equal']) {
|
||||||
|
$structureMismatch = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->render('admin/ingest_profile/list.html.twig', [
|
||||||
|
'profiles' => $profiles,
|
||||||
|
'activeProfile' => $activeProfile,
|
||||||
|
'indexMeta' => $meta,
|
||||||
|
'diff' => $diff,
|
||||||
|
'structureMismatch' => $structureMismatch,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[Route('/create', name: 'admin_ingest_profile_create', methods: ['GET', 'POST'])]
|
||||||
|
public function create(
|
||||||
|
Request $request,
|
||||||
|
IngestProfileRepository $repo,
|
||||||
|
EntityManagerInterface $em
|
||||||
|
): Response {
|
||||||
|
|
||||||
|
if ($request->isMethod('POST')) {
|
||||||
|
|
||||||
|
$latest = $repo->findLatestVersion();
|
||||||
|
$nextVersion = $latest ? $latest->getVersion() + 1 : 1;
|
||||||
|
|
||||||
|
$profile = new IngestProfile(
|
||||||
|
$nextVersion,
|
||||||
|
(int)$request->request->get('chunk_size'),
|
||||||
|
(int)$request->request->get('chunk_overlap'),
|
||||||
|
(string)$request->request->get('embedding_model'),
|
||||||
|
(int)$request->request->get('embedding_dimension'),
|
||||||
|
(int)$request->request->get('scoring_version')
|
||||||
|
);
|
||||||
|
|
||||||
|
$em->persist($profile);
|
||||||
|
$em->flush();
|
||||||
|
|
||||||
|
return $this->redirectToRoute('admin_ingest_profile_list');
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->render('admin/ingest_profile/create.html.twig');
|
||||||
|
}
|
||||||
|
|
||||||
|
#[Route('/activate/{id}', name: 'admin_ingest_profile_activate')]
|
||||||
|
public function activate(
|
||||||
|
IngestProfile $profile,
|
||||||
|
IngestProfileRepository $repo,
|
||||||
|
EntityManagerInterface $em
|
||||||
|
): Response {
|
||||||
|
|
||||||
|
$active = $repo->findActive();
|
||||||
|
if ($active) {
|
||||||
|
$active->deactivate();
|
||||||
|
}
|
||||||
|
|
||||||
|
$profile->activate();
|
||||||
|
|
||||||
|
$em->flush();
|
||||||
|
|
||||||
|
return $this->redirectToRoute('admin_ingest_profile_list');
|
||||||
|
}
|
||||||
|
}
|
||||||
90
src/Entity/IngestProfile.php
Normal file
90
src/Entity/IngestProfile.php
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Entity;
|
||||||
|
|
||||||
|
use App\Repository\IngestProfileRepository;
|
||||||
|
use Doctrine\ORM\Mapping as ORM;
|
||||||
|
use Symfony\Component\Uid\Uuid;
|
||||||
|
|
||||||
|
#[ORM\Entity(repositoryClass: IngestProfileRepository::class)]
|
||||||
|
#[ORM\Table(name: 'ingest_profile')]
|
||||||
|
class IngestProfile
|
||||||
|
{
|
||||||
|
#[ORM\Id]
|
||||||
|
#[ORM\Column(type: 'uuid', unique: true)]
|
||||||
|
private Uuid $id;
|
||||||
|
|
||||||
|
#[ORM\Column(type: 'integer')]
|
||||||
|
private int $version;
|
||||||
|
|
||||||
|
#[ORM\Column(type: 'integer')]
|
||||||
|
private int $chunkSize;
|
||||||
|
|
||||||
|
#[ORM\Column(type: 'integer')]
|
||||||
|
private int $chunkOverlap;
|
||||||
|
|
||||||
|
#[ORM\Column(type: 'string', length: 255)]
|
||||||
|
private string $embeddingModel;
|
||||||
|
|
||||||
|
#[ORM\Column(type: 'integer')]
|
||||||
|
private int $embeddingDimension;
|
||||||
|
|
||||||
|
#[ORM\Column(type: 'integer')]
|
||||||
|
private int $scoringVersion;
|
||||||
|
|
||||||
|
#[ORM\Column(type: 'boolean')]
|
||||||
|
private bool $active = false;
|
||||||
|
|
||||||
|
#[ORM\Column(type: 'boolean')]
|
||||||
|
private bool $reindexRequired = true;
|
||||||
|
|
||||||
|
#[ORM\Column(type: 'datetime_immutable')]
|
||||||
|
private \DateTimeImmutable $createdAt;
|
||||||
|
|
||||||
|
public function __construct(
|
||||||
|
int $version,
|
||||||
|
int $chunkSize,
|
||||||
|
int $chunkOverlap,
|
||||||
|
string $embeddingModel,
|
||||||
|
int $embeddingDimension,
|
||||||
|
int $scoringVersion
|
||||||
|
) {
|
||||||
|
$this->id = Uuid::v4();
|
||||||
|
$this->version = $version;
|
||||||
|
$this->chunkSize = $chunkSize;
|
||||||
|
$this->chunkOverlap = $chunkOverlap;
|
||||||
|
$this->embeddingModel = $embeddingModel;
|
||||||
|
$this->embeddingDimension = $embeddingDimension;
|
||||||
|
$this->scoringVersion = $scoringVersion;
|
||||||
|
$this->createdAt = new \DateTimeImmutable();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getId(): Uuid { return $this->id; }
|
||||||
|
public function getVersion(): int { return $this->version; }
|
||||||
|
public function getChunkSize(): int { return $this->chunkSize; }
|
||||||
|
public function getChunkOverlap(): int { return $this->chunkOverlap; }
|
||||||
|
public function getEmbeddingModel(): string { return $this->embeddingModel; }
|
||||||
|
public function getEmbeddingDimension(): int { return $this->embeddingDimension; }
|
||||||
|
public function getScoringVersion(): int { return $this->scoringVersion; }
|
||||||
|
public function isActive(): bool { return $this->active; }
|
||||||
|
public function isReindexRequired(): bool { return $this->reindexRequired; }
|
||||||
|
public function getCreatedAt(): \DateTimeImmutable { return $this->createdAt; }
|
||||||
|
|
||||||
|
public function activate(): void
|
||||||
|
{
|
||||||
|
$this->active = true;
|
||||||
|
$this->reindexRequired = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function deactivate(): void
|
||||||
|
{
|
||||||
|
$this->active = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function markReindexDone(): void
|
||||||
|
{
|
||||||
|
$this->reindexRequired = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
63
src/Index/IndexConfigurationProvider.php
Normal file
63
src/Index/IndexConfigurationProvider.php
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Index;
|
||||||
|
|
||||||
|
use App\Repository\IngestProfileRepository;
|
||||||
|
|
||||||
|
final class IndexConfigurationProvider
|
||||||
|
{
|
||||||
|
private IngestProfileRepository $repository;
|
||||||
|
|
||||||
|
private int $fallbackChunkSize;
|
||||||
|
private int $fallbackChunkOverlap;
|
||||||
|
private string $fallbackEmbeddingModel;
|
||||||
|
private int $fallbackEmbeddingDimension;
|
||||||
|
private int $fallbackScoringVersion;
|
||||||
|
|
||||||
|
public function __construct(
|
||||||
|
IngestProfileRepository $repository,
|
||||||
|
int $fallbackChunkSize,
|
||||||
|
int $fallbackChunkOverlap,
|
||||||
|
string $fallbackEmbeddingModel,
|
||||||
|
int $fallbackEmbeddingDimension,
|
||||||
|
int $fallbackScoringVersion
|
||||||
|
) {
|
||||||
|
$this->repository = $repository;
|
||||||
|
|
||||||
|
$this->fallbackChunkSize = $fallbackChunkSize;
|
||||||
|
$this->fallbackChunkOverlap = $fallbackChunkOverlap;
|
||||||
|
$this->fallbackEmbeddingModel = $fallbackEmbeddingModel;
|
||||||
|
$this->fallbackEmbeddingDimension = $fallbackEmbeddingDimension;
|
||||||
|
$this->fallbackScoringVersion = $fallbackScoringVersion;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getConfiguration(): IndexConfiguration
|
||||||
|
{
|
||||||
|
$active = $this->repository->findActive();
|
||||||
|
|
||||||
|
if ($active === null) {
|
||||||
|
// Fallback auf YAML
|
||||||
|
return new IndexConfiguration(
|
||||||
|
$this->fallbackChunkSize,
|
||||||
|
$this->fallbackChunkOverlap,
|
||||||
|
$this->fallbackEmbeddingModel,
|
||||||
|
$this->fallbackEmbeddingDimension,
|
||||||
|
$this->fallbackScoringVersion,
|
||||||
|
'ndjson',
|
||||||
|
'faiss'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new IndexConfiguration(
|
||||||
|
$active->getChunkSize(),
|
||||||
|
$active->getChunkOverlap(),
|
||||||
|
$active->getEmbeddingModel(),
|
||||||
|
$active->getEmbeddingDimension(),
|
||||||
|
$active->getScoringVersion(),
|
||||||
|
'ndjson',
|
||||||
|
'faiss'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,157 +7,90 @@ namespace App\Index;
|
|||||||
final class IndexMetaManager
|
final class IndexMetaManager
|
||||||
{
|
{
|
||||||
private string $metaPath;
|
private string $metaPath;
|
||||||
private IndexConfiguration $config;
|
private IndexConfigurationProvider $provider;
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
string $metaPath,
|
string $metaPath,
|
||||||
IndexConfiguration $config
|
IndexConfigurationProvider $provider
|
||||||
) {
|
) {
|
||||||
$this->metaPath = $metaPath;
|
$this->metaPath = $metaPath;
|
||||||
$this->config = $config;
|
$this->provider = $provider;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getMetaPath(): string
|
// -----------------------------------------------------
|
||||||
|
// Public API
|
||||||
|
// -----------------------------------------------------
|
||||||
|
|
||||||
|
public function ensureExists(): void
|
||||||
{
|
{
|
||||||
return $this->metaPath;
|
if (!is_file($this->metaPath)) {
|
||||||
|
$this->writeMeta(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @return array<string,mixed>|null
|
|
||||||
*/
|
|
||||||
public function readMeta(): ?array
|
public function readMeta(): ?array
|
||||||
{
|
{
|
||||||
if (!is_file($this->metaPath)) {
|
if (!is_file($this->metaPath)) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
$raw = file_get_contents($this->metaPath);
|
return json_decode(
|
||||||
if ($raw === false) {
|
(string) file_get_contents($this->metaPath),
|
||||||
throw new \RuntimeException('Unable to read index_meta.json');
|
true
|
||||||
}
|
);
|
||||||
|
|
||||||
$data = json_decode($raw, true);
|
|
||||||
if (!is_array($data)) {
|
|
||||||
throw new \RuntimeException('index_meta.json is invalid JSON');
|
|
||||||
}
|
|
||||||
|
|
||||||
return $data;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Guardrail:
|
|
||||||
* - Wenn Meta fehlt → initialisieren
|
|
||||||
* - Wenn Struktur driftet → Exception
|
|
||||||
*/
|
|
||||||
public function validateAgainstCurrent(): void
|
public function validateAgainstCurrent(): void
|
||||||
{
|
{
|
||||||
$meta = $this->readMeta();
|
$meta = $this->readMeta();
|
||||||
|
$current = $this->provider
|
||||||
|
->getConfiguration()
|
||||||
|
->toStructureArray();
|
||||||
|
|
||||||
if ($meta === null) {
|
if ($meta === null) {
|
||||||
$meta = $this->createInitialMeta();
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
$expected = $this->config->toStructureArray();
|
foreach ($current as $key => $value) {
|
||||||
$diff = $this->diffStructure($meta, $expected);
|
if (($meta[$key] ?? null) !== $value) {
|
||||||
|
throw new \RuntimeException(
|
||||||
if ($diff !== []) {
|
'Index structure changed. Global Reindex required.'
|
||||||
throw new IndexStructureChangedException(
|
);
|
||||||
'Index structure changed. Global Reindex required.',
|
|
||||||
$diff
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wird beim Global Reindex aufgerufen
|
|
||||||
*/
|
|
||||||
public function writeMetaForGlobalReindex(): array
|
|
||||||
{
|
|
||||||
$current = $this->readMeta();
|
|
||||||
|
|
||||||
$nextVersion = 1;
|
|
||||||
if (is_array($current) && isset($current['index_version']) && is_int($current['index_version'])) {
|
|
||||||
$nextVersion = $current['index_version'] + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
$meta = $this->buildMetaPayload($nextVersion);
|
|
||||||
$this->atomicWriteJson($meta);
|
|
||||||
|
|
||||||
return $meta;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getConfig(): IndexConfiguration
|
|
||||||
{
|
|
||||||
return $this->config;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------
|
|
||||||
// Internals
|
|
||||||
// ---------------------------------------------------------
|
|
||||||
|
|
||||||
private function createInitialMeta(): array
|
|
||||||
{
|
|
||||||
$meta = $this->buildMetaPayload(1);
|
|
||||||
$this->atomicWriteJson($meta);
|
|
||||||
return $meta;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function buildMetaPayload(int $indexVersion): array
|
|
||||||
{
|
|
||||||
$structure = $this->config->toStructureArray();
|
|
||||||
|
|
||||||
return [
|
|
||||||
'index_version' => $indexVersion,
|
|
||||||
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
|
|
||||||
'embedding_model' => $structure['embedding_model'],
|
|
||||||
'embedding_dimension' => $structure['embedding_dimension'],
|
|
||||||
'chunk_size' => $structure['chunk_size'],
|
|
||||||
'chunk_overlap' => $structure['chunk_overlap'],
|
|
||||||
'scoring_version' => $structure['scoring_version'],
|
|
||||||
'index_format' => $structure['index_format'],
|
|
||||||
'vector_backend' => $structure['vector_backend'],
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
private function diffStructure(array $meta, array $expected): array
|
|
||||||
{
|
|
||||||
$diff = [];
|
|
||||||
|
|
||||||
foreach ($expected as $key => $value) {
|
|
||||||
$actual = $meta[$key] ?? null;
|
|
||||||
if ($actual !== $value) {
|
|
||||||
$diff[$key] = [
|
|
||||||
'expected' => $value,
|
|
||||||
'actual' => $actual,
|
|
||||||
];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $diff;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function atomicWriteJson(array $payload): void
|
public function writeMetaForGlobalReindex(): void
|
||||||
{
|
{
|
||||||
|
$current = $this->readMeta();
|
||||||
|
$nextVersion = ($current['index_version'] ?? 0) + 1;
|
||||||
|
|
||||||
|
$this->writeMeta($nextVersion);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function writeMeta(int $indexVersion): void
|
||||||
|
{
|
||||||
|
$config = $this->provider->getConfiguration();
|
||||||
|
|
||||||
|
$payload = array_merge(
|
||||||
|
[
|
||||||
|
'index_version' => $indexVersion,
|
||||||
|
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
|
||||||
|
],
|
||||||
|
$config->toStructureArray()
|
||||||
|
);
|
||||||
|
|
||||||
$dir = dirname($this->metaPath);
|
$dir = dirname($this->metaPath);
|
||||||
|
if (!is_dir($dir)) {
|
||||||
if (!is_dir($dir) && !mkdir($dir, 0777, true) && !is_dir($dir)) {
|
mkdir($dir, 0777, true);
|
||||||
throw new \RuntimeException('Unable to create directory: ' . $dir);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$tmp = $this->metaPath . '.tmp';
|
file_put_contents(
|
||||||
|
$this->metaPath,
|
||||||
$json = json_encode($payload, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
json_encode(
|
||||||
if ($json === false) {
|
$payload,
|
||||||
throw new \RuntimeException('Unable to encode index_meta.json');
|
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES
|
||||||
}
|
)
|
||||||
|
);
|
||||||
if (file_put_contents($tmp, $json . PHP_EOL) === false) {
|
|
||||||
throw new \RuntimeException('Unable to write temp meta file');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!rename($tmp, $this->metaPath)) {
|
|
||||||
@unlink($tmp);
|
|
||||||
throw new \RuntimeException('Unable to switch meta file atomically');
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
37
src/Index/IndexStructureComparator.php
Normal file
37
src/Index/IndexStructureComparator.php
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Index;
|
||||||
|
|
||||||
|
final class IndexStructureComparator
|
||||||
|
{
|
||||||
|
public function compare(?array $meta, array $profileStructure): array
|
||||||
|
{
|
||||||
|
$result = [];
|
||||||
|
|
||||||
|
$keys = array_unique(
|
||||||
|
array_merge(
|
||||||
|
array_keys($profileStructure),
|
||||||
|
$meta ? array_keys($meta) : []
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
foreach ($keys as $key) {
|
||||||
|
if (in_array($key, ['index_version', 'created_at'], true)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$metaValue = $meta[$key] ?? null;
|
||||||
|
$profileValue = $profileStructure[$key] ?? null;
|
||||||
|
|
||||||
|
$result[$key] = [
|
||||||
|
'meta' => $metaValue,
|
||||||
|
'profile' => $profileValue,
|
||||||
|
'equal' => $metaValue === $profileValue,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -5,22 +5,31 @@ declare(strict_types=1);
|
|||||||
|
|
||||||
namespace App\Knowledge\Ingest;
|
namespace App\Knowledge\Ingest;
|
||||||
|
|
||||||
|
use App\Index\IndexConfigurationProvider;
|
||||||
|
|
||||||
final class SimpleChunker
|
final class SimpleChunker
|
||||||
{
|
{
|
||||||
|
private IndexConfigurationProvider $configurationProvider;
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private int $maxWords = 180,
|
IndexConfigurationProvider $configurationProvider
|
||||||
private int $overlapWords = 30
|
) {
|
||||||
) {}
|
$this->configurationProvider = $configurationProvider;
|
||||||
|
}
|
||||||
|
|
||||||
/** @return string[] */
|
/** @return string[] */
|
||||||
public function chunk(string $text): array
|
public function chunk(string $text): array
|
||||||
{
|
{
|
||||||
|
$config = $this->configurationProvider->getConfiguration();
|
||||||
|
|
||||||
|
$maxWords = $config->getChunkSize();
|
||||||
|
$overlapWords = $config->getChunkOverlap();
|
||||||
|
|
||||||
$text = $this->normalize($text);
|
$text = $this->normalize($text);
|
||||||
if ($text === '') {
|
if ($text === '') {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Split into tokens: words + whitespace preserved
|
|
||||||
$tokens = preg_split(
|
$tokens = preg_split(
|
||||||
'/(\s+)/u',
|
'/(\s+)/u',
|
||||||
$text,
|
$text,
|
||||||
@@ -32,7 +41,6 @@ final class SimpleChunker
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build word index → token index mapping
|
|
||||||
$wordTokenIndexes = [];
|
$wordTokenIndexes = [];
|
||||||
foreach ($tokens as $i => $token) {
|
foreach ($tokens as $i => $token) {
|
||||||
if (!preg_match('/^\s+$/u', $token)) {
|
if (!preg_match('/^\s+$/u', $token)) {
|
||||||
@@ -49,12 +57,11 @@ final class SimpleChunker
|
|||||||
$wordPos = 0;
|
$wordPos = 0;
|
||||||
|
|
||||||
while ($wordPos < $totalWords) {
|
while ($wordPos < $totalWords) {
|
||||||
$wordEnd = min($wordPos + $this->maxWords, $totalWords);
|
$wordEnd = min($wordPos + $maxWords, $totalWords);
|
||||||
|
|
||||||
$tokenStart = $wordTokenIndexes[$wordPos];
|
$tokenStart = $wordTokenIndexes[$wordPos];
|
||||||
$tokenEnd = $wordTokenIndexes[$wordEnd - 1] + 1;
|
$tokenEnd = $wordTokenIndexes[$wordEnd - 1] + 1;
|
||||||
|
|
||||||
// Intelligent cut (sentence / paragraph aware)
|
|
||||||
$tokenEnd = $this->adjustCutToBoundary($tokens, $tokenStart, $tokenEnd);
|
$tokenEnd = $this->adjustCutToBoundary($tokens, $tokenStart, $tokenEnd);
|
||||||
|
|
||||||
$chunk = trim(implode('', array_slice(
|
$chunk = trim(implode('', array_slice(
|
||||||
@@ -71,7 +78,7 @@ final class SimpleChunker
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
$wordPos = max(0, $wordEnd - $this->overlapWords);
|
$wordPos = max(0, $wordEnd - $overlapWords);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $this->dedupe($chunks);
|
return $this->dedupe($chunks);
|
||||||
@@ -86,30 +93,19 @@ final class SimpleChunker
|
|||||||
return trim((string) $text);
|
return trim((string) $text);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Move cut backwards to a natural boundary if possible.
|
|
||||||
* Rules:
|
|
||||||
* - Never cut inside markdown list items
|
|
||||||
* - Sentence end only if followed by a line break
|
|
||||||
* - Paragraph breaks always allowed
|
|
||||||
*/
|
|
||||||
private function adjustCutToBoundary(array $tokens, int $start, int $end): int
|
private function adjustCutToBoundary(array $tokens, int $start, int $end): int
|
||||||
{
|
{
|
||||||
// Detect markdown list context (e.g. "- Foo: Bar")
|
|
||||||
$startToken = $tokens[$start] ?? '';
|
$startToken = $tokens[$start] ?? '';
|
||||||
if (preg_match('/^- /u', ltrim($startToken))) {
|
if (preg_match('/^- /u', ltrim($startToken))) {
|
||||||
// Keep list blocks intact
|
|
||||||
return $end;
|
return $end;
|
||||||
}
|
}
|
||||||
|
|
||||||
for ($i = $end - 1; $i > $start; $i--) {
|
for ($i = $end - 1; $i > $start; $i--) {
|
||||||
|
|
||||||
// Paragraph boundary
|
|
||||||
if ($tokens[$i] === "\n\n") {
|
if ($tokens[$i] === "\n\n") {
|
||||||
return $i + 1;
|
return $i + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sentence boundary only if followed by newline
|
|
||||||
if (
|
if (
|
||||||
preg_match('/[.!?]\s*$/u', $tokens[$i]) &&
|
preg_match('/[.!?]\s*$/u', $tokens[$i]) &&
|
||||||
isset($tokens[$i + 1]) &&
|
isset($tokens[$i + 1]) &&
|
||||||
|
|||||||
31
src/Repository/IngestProfileRepository.php
Normal file
31
src/Repository/IngestProfileRepository.php
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Repository;
|
||||||
|
|
||||||
|
use App\Entity\IngestProfile;
|
||||||
|
use Doctrine\Bundle\DoctrineBundle\Repository\ServiceEntityRepository;
|
||||||
|
use Doctrine\Persistence\ManagerRegistry;
|
||||||
|
|
||||||
|
class IngestProfileRepository extends ServiceEntityRepository
|
||||||
|
{
|
||||||
|
public function __construct(ManagerRegistry $registry)
|
||||||
|
{
|
||||||
|
parent::__construct($registry, IngestProfile::class);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function findLatestVersion(): ?IngestProfile
|
||||||
|
{
|
||||||
|
return $this->createQueryBuilder('p')
|
||||||
|
->orderBy('p.version', 'DESC')
|
||||||
|
->setMaxResults(1)
|
||||||
|
->getQuery()
|
||||||
|
->getOneOrNullResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function findActive(): ?IngestProfile
|
||||||
|
{
|
||||||
|
return $this->findOneBy(['active' => true]);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,7 +4,7 @@ declare(strict_types=1);
|
|||||||
|
|
||||||
namespace App\Vector;
|
namespace App\Vector;
|
||||||
|
|
||||||
use App\Index\IndexConfiguration;
|
use App\Index\IndexConfigurationProvider;
|
||||||
use Symfony\Component\Process\Exception\ProcessFailedException;
|
use Symfony\Component\Process\Exception\ProcessFailedException;
|
||||||
use Symfony\Component\Process\Process;
|
use Symfony\Component\Process\Process;
|
||||||
|
|
||||||
@@ -18,7 +18,7 @@ final class VectorIndexBuilder
|
|||||||
private string $vectorMetaPath;
|
private string $vectorMetaPath;
|
||||||
private int $timeoutSeconds;
|
private int $timeoutSeconds;
|
||||||
|
|
||||||
private IndexConfiguration $indexConfiguration;
|
private IndexConfigurationProvider $configurationProvider;
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
string $pythonBin,
|
string $pythonBin,
|
||||||
@@ -27,7 +27,7 @@ final class VectorIndexBuilder
|
|||||||
string $indexMetaPath,
|
string $indexMetaPath,
|
||||||
string $vectorIndexPath,
|
string $vectorIndexPath,
|
||||||
int $timeoutSeconds,
|
int $timeoutSeconds,
|
||||||
IndexConfiguration $indexConfiguration
|
IndexConfigurationProvider $configurationProvider
|
||||||
) {
|
) {
|
||||||
$this->pythonBin = $pythonBin;
|
$this->pythonBin = $pythonBin;
|
||||||
$this->scriptPath = $scriptPath;
|
$this->scriptPath = $scriptPath;
|
||||||
@@ -36,39 +36,29 @@ final class VectorIndexBuilder
|
|||||||
$this->vectorIndexPath = $vectorIndexPath;
|
$this->vectorIndexPath = $vectorIndexPath;
|
||||||
$this->vectorMetaPath = $vectorIndexPath . '.meta.json';
|
$this->vectorMetaPath = $vectorIndexPath . '.meta.json';
|
||||||
$this->timeoutSeconds = $timeoutSeconds;
|
$this->timeoutSeconds = $timeoutSeconds;
|
||||||
$this->indexConfiguration = $indexConfiguration;
|
$this->configurationProvider = $configurationProvider;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rebuild FAISS Index deterministisch aus index.ndjson.
|
||||||
|
*/
|
||||||
public function rebuildFromNdjson(?string $logPath = null): void
|
public function rebuildFromNdjson(?string $logPath = null): void
|
||||||
{
|
{
|
||||||
if (!is_file($this->scriptPath)) {
|
$this->assertPreconditions();
|
||||||
throw new \RuntimeException('vector_ingest.py not found at: ' . $this->scriptPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!is_file($this->indexNdjsonPath)) {
|
|
||||||
throw new \RuntimeException('index.ndjson not found at: ' . $this->indexNdjsonPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!is_file($this->indexMetaPath)) {
|
if (!is_file($this->indexMetaPath)) {
|
||||||
$this->initializeIndexMeta();
|
$this->initializeIndexMeta();
|
||||||
}
|
}
|
||||||
|
|
||||||
$indexMeta = json_decode((string) file_get_contents($this->indexMetaPath), true);
|
$indexMeta = $this->readIndexMeta();
|
||||||
|
|
||||||
if (!is_array($indexMeta) || empty($indexMeta['embedding_model'])) {
|
$embeddingModel = $indexMeta['embedding_model'];
|
||||||
throw new \RuntimeException('Invalid index_meta.json');
|
|
||||||
}
|
|
||||||
|
|
||||||
$embeddingModel = (string) $indexMeta['embedding_model'];
|
|
||||||
|
|
||||||
$tmpVectorIndexPath = $this->vectorIndexPath . '.tmp';
|
$tmpVectorIndexPath = $this->vectorIndexPath . '.tmp';
|
||||||
|
|
||||||
// Wichtig: Python erzeugt meta basierend auf endgültigem Namen
|
// Clean leftovers
|
||||||
$finalMetaPath = $this->vectorMetaPath;
|
|
||||||
$tmpMetaPath = dirname($this->vectorIndexPath) . '/' . basename($this->vectorIndexPath, '.index') . '.index.meta.json';
|
|
||||||
|
|
||||||
@unlink($tmpVectorIndexPath);
|
@unlink($tmpVectorIndexPath);
|
||||||
@unlink($finalMetaPath);
|
@unlink($this->vectorMetaPath);
|
||||||
|
|
||||||
$cmd = [
|
$cmd = [
|
||||||
$this->pythonBin,
|
$this->pythonBin,
|
||||||
@@ -80,21 +70,41 @@ final class VectorIndexBuilder
|
|||||||
|
|
||||||
$process = new Process($cmd);
|
$process = new Process($cmd);
|
||||||
$process->setTimeout($this->timeoutSeconds);
|
$process->setTimeout($this->timeoutSeconds);
|
||||||
$process->mustRun();
|
|
||||||
|
|
||||||
if (!is_file($tmpVectorIndexPath) || filesize($tmpVectorIndexPath) === 0) {
|
$this->runProcess($process, $logPath);
|
||||||
throw new \RuntimeException('Vector index tmp missing or empty');
|
|
||||||
|
$this->validatePythonOutputs($tmpVectorIndexPath);
|
||||||
|
|
||||||
|
$this->atomicSwitch($tmpVectorIndexPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------
|
||||||
|
// Internals
|
||||||
|
// -----------------------------------------------------
|
||||||
|
|
||||||
|
private function assertPreconditions(): void
|
||||||
|
{
|
||||||
|
if (!is_file($this->scriptPath)) {
|
||||||
|
throw new \RuntimeException('vector_ingest.py not found at: ' . $this->scriptPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Python erzeugt vector.index.meta.json (nicht tmp.meta!)
|
if (!is_file($this->indexNdjsonPath)) {
|
||||||
if (!is_file($this->vectorMetaPath) || filesize($this->vectorMetaPath) === 0) {
|
throw new \RuntimeException('index.ndjson not found at: ' . $this->indexNdjsonPath);
|
||||||
throw new \RuntimeException('Vector meta missing or empty');
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function readIndexMeta(): array
|
||||||
|
{
|
||||||
|
$meta = json_decode(
|
||||||
|
(string) file_get_contents($this->indexMetaPath),
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!is_array($meta) || empty($meta['embedding_model'])) {
|
||||||
|
throw new \RuntimeException('Invalid index_meta.json');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Atomarer Switch für Index
|
return $meta;
|
||||||
if (!rename($tmpVectorIndexPath, $this->vectorIndexPath)) {
|
|
||||||
throw new \RuntimeException('Atomic switch failed for vector index');
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function initializeIndexMeta(): void
|
private function initializeIndexMeta(): void
|
||||||
@@ -105,14 +115,16 @@ final class VectorIndexBuilder
|
|||||||
throw new \RuntimeException('Cannot create knowledge directory');
|
throw new \RuntimeException('Cannot create knowledge directory');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$config = $this->configurationProvider->getConfiguration();
|
||||||
|
|
||||||
$data = [
|
$data = [
|
||||||
'index_version' => 1,
|
'index_version' => 1,
|
||||||
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
|
'created_at' => (new \DateTimeImmutable())->format(DATE_ATOM),
|
||||||
'embedding_model' => $this->indexConfiguration->getEmbeddingModel(),
|
'embedding_model' => $config->getEmbeddingModel(),
|
||||||
'embedding_dimension' => $this->indexConfiguration->getEmbeddingDimension(),
|
'embedding_dimension' => $config->getEmbeddingDimension(),
|
||||||
'chunk_size' => $this->indexConfiguration->getChunkSize(),
|
'chunk_size' => $config->getChunkSize(),
|
||||||
'chunk_overlap' => $this->indexConfiguration->getChunkOverlap(),
|
'chunk_overlap' => $config->getChunkOverlap(),
|
||||||
'scoring_version' => $this->indexConfiguration->getScoringVersion(),
|
'scoring_version' => $config->getScoringVersion(),
|
||||||
'index_format' => 'ndjson',
|
'index_format' => 'ndjson',
|
||||||
'vector_backend' => 'faiss',
|
'vector_backend' => 'faiss',
|
||||||
];
|
];
|
||||||
@@ -123,6 +135,24 @@ final class VectorIndexBuilder
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function validatePythonOutputs(string $tmpVectorIndexPath): void
|
||||||
|
{
|
||||||
|
if (!is_file($tmpVectorIndexPath) || filesize($tmpVectorIndexPath) === 0) {
|
||||||
|
throw new \RuntimeException('Vector index tmp missing or empty');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_file($this->vectorMetaPath) || filesize($this->vectorMetaPath) === 0) {
|
||||||
|
throw new \RuntimeException('Vector meta missing or empty');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function atomicSwitch(string $tmpVectorIndexPath): void
|
||||||
|
{
|
||||||
|
if (!rename($tmpVectorIndexPath, $this->vectorIndexPath)) {
|
||||||
|
throw new \RuntimeException('Atomic switch failed for vector index');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private function runProcess(Process $process, ?string $logPath): void
|
private function runProcess(Process $process, ?string $logPath): void
|
||||||
{
|
{
|
||||||
if ($logPath !== null) {
|
if ($logPath !== null) {
|
||||||
|
|||||||
@@ -28,22 +28,35 @@
|
|||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link text-light" href="{{ path('admin_dashboard') }}">Dashboard</a>
|
<a class="nav-link text-light" href="{{ path('admin_dashboard') }}">Dashboard</a>
|
||||||
</li>
|
</li>
|
||||||
|
</ul>
|
||||||
|
<hr>
|
||||||
|
<h3>Dokumente und Wissen</h3>
|
||||||
|
<ul class="nav flex-column">
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link text-light" href="{{ path('admin_documents') }}">Dokumente</a>
|
<a class="nav-link text-light" href="{{ path('admin_documents') }}">Dokumente</a>
|
||||||
</li>
|
</li>
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link text-light" href="{{ path('admin_jobs') }}">
|
<a class="nav-link text-light" href="{{ path('admin_jobs') }}">
|
||||||
Indexierung Jobs (Ingest)
|
Indexierung (Ingest Jobs)
|
||||||
</a>
|
</a>
|
||||||
</li>
|
</li>
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link text-light" href="{{ path('admin_system_agent') }}">
|
<a class="nav-link text-light" href="{{ path('admin_system_agent') }}">
|
||||||
Wissen (Chunk-Index)
|
Wissensdaten (Chunk-Index)
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
<hr>
|
||||||
|
<h3>System-Profile</h3>
|
||||||
|
<ul class="nav flex-column">
|
||||||
|
<li class="nav-item">
|
||||||
|
<a class="nav-link text-light" href="{{ path('admin_system_prompt') }}">
|
||||||
|
System-Prompt-Profil
|
||||||
</a>
|
</a>
|
||||||
</li>
|
</li>
|
||||||
<li class="nav-item">
|
<li class="nav-item">
|
||||||
<a class="nav-link text-light" href="{{ path('admin_system_prompt') }}">
|
<a class="nav-link text-light" href="{{ path('admin_ingest_profile_list') }}">
|
||||||
System Prompt Settings
|
Indexierungs-Profil (Ingest Profiles)
|
||||||
</a>
|
</a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|||||||
26
templates/admin/ingest_profile/create.html.twig
Normal file
26
templates/admin/ingest_profile/create.html.twig
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
{% extends 'admin/base.html.twig' %}
|
||||||
|
|
||||||
|
{% block title %}System Prompt{% endblock %}
|
||||||
|
|
||||||
|
{% block body %}
|
||||||
|
<h1>Create Ingest Profile</h1>
|
||||||
|
|
||||||
|
<form method="post">
|
||||||
|
<label>Chunk Size:</label>
|
||||||
|
<input type="number" name="chunk_size" required><br>
|
||||||
|
|
||||||
|
<label>Chunk Overlap:</label>
|
||||||
|
<input type="number" name="chunk_overlap" required><br>
|
||||||
|
|
||||||
|
<label>Embedding Model:</label>
|
||||||
|
<input type="text" name="embedding_model" required><br>
|
||||||
|
|
||||||
|
<label>Embedding Dimension:</label>
|
||||||
|
<input type="number" name="embedding_dimension" required><br>
|
||||||
|
|
||||||
|
<label>Scoring Version:</label>
|
||||||
|
<input type="number" name="scoring_version" required><br>
|
||||||
|
|
||||||
|
<button type="submit">Create</button>
|
||||||
|
</form>
|
||||||
|
{% endblock %}
|
||||||
89
templates/admin/ingest_profile/list.html.twig
Normal file
89
templates/admin/ingest_profile/list.html.twig
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
{% extends 'admin/base.html.twig' %}
|
||||||
|
|
||||||
|
{% block title %}Ingest Profiles{% endblock %}
|
||||||
|
|
||||||
|
{% block body %}
|
||||||
|
<h1>Ingest Profiles</h1>
|
||||||
|
|
||||||
|
{% if structureMismatch %}
|
||||||
|
<div class="alert alert-danger">
|
||||||
|
⚠ Strukturabweichung festgestellt – Globale Neuindizierung erforderlich | <a href="{{ path('admin_jobs') }}">Global Reindex aufrufen</a>
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<div class="alert alert-success">
|
||||||
|
✅ Die Indexstruktur entspricht dem aktiven Profil
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<p><a class="btn btn-outline-light" href="{{ path('admin_ingest_profile_create') }}">+ Neues Profil anlegen</a></p>
|
||||||
|
|
||||||
|
<h2>Profiles</h2>
|
||||||
|
|
||||||
|
<table border="1" cellpadding="6" class="table table-sm table-dark align-middle">
|
||||||
|
<tr>
|
||||||
|
<th>Version</th>
|
||||||
|
<th>Chunk Size</th>
|
||||||
|
<th>Overlap</th>
|
||||||
|
<th>Model</th>
|
||||||
|
<th>Dimension</th>
|
||||||
|
<th>Scoring</th>
|
||||||
|
<th>Active</th>
|
||||||
|
<th>Reindex Required</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
{% for p in profiles %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ p.version }}</td>
|
||||||
|
<td>{{ p.chunkSize }}</td>
|
||||||
|
<td>{{ p.chunkOverlap }}</td>
|
||||||
|
<td>{{ p.embeddingModel }}</td>
|
||||||
|
<td>{{ p.embeddingDimension }}</td>
|
||||||
|
<td>{{ p.scoringVersion }}</td>
|
||||||
|
<td>{{ p.active ? 'Yes' : 'No' }}</td>
|
||||||
|
<td>{{ p.reindexRequired ? 'Yes' : 'No' }}</td>
|
||||||
|
<td>
|
||||||
|
{% if not p.active %}
|
||||||
|
<a class="btn btn-outline-info btn-sm" href="{{ path('admin_ingest_profile_activate', {id: p.id}) }}">
|
||||||
|
Aktivieren
|
||||||
|
</a>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
<h2>Index-Struktur-Profil Diff</h2>
|
||||||
|
|
||||||
|
{% if indexMeta %}
|
||||||
|
<p><strong>Index Version:</strong> {{ indexMeta.index_version }}</p>
|
||||||
|
{% else %}
|
||||||
|
<p>No index_meta.json found.</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<table border="1" cellpadding="6" class="table table-sm table-dark align-middle">
|
||||||
|
<tr>
|
||||||
|
<th>Parameter</th>
|
||||||
|
<th>Index Meta</th>
|
||||||
|
<th>Active Profile</th>
|
||||||
|
<th>Status</th>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
{% for key, row in diff %}
|
||||||
|
<tr>
|
||||||
|
<td>{{ key }}</td>
|
||||||
|
<td>{{ row.meta }}</td>
|
||||||
|
<td>{{ row.profile }}</td>
|
||||||
|
<td>
|
||||||
|
{% if row.equal %}
|
||||||
|
<span style="color:green;">✓</span>
|
||||||
|
{% else %}
|
||||||
|
<span style="color:red;">✗</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
{% block body %}
|
{% block body %}
|
||||||
|
|
||||||
<h1 class="h4 mb-4">Ingest Jobs</h1>
|
<h1 class="h4 mb-4">Indexierung (Ingest Jobs-Liste)</h1>
|
||||||
|
|
||||||
<form method="post"
|
<form method="post"
|
||||||
action="{{ path('admin_global_reindex') }}"
|
action="{{ path('admin_global_reindex') }}"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{% extends 'admin/base.html.twig' %}
|
{% extends 'admin/base.html.twig' %}
|
||||||
|
|
||||||
{% block title %}Agent System Overview{% endblock %}
|
{% block title %}Wissensdaten (Chunk-Index){% endblock %}
|
||||||
|
|
||||||
{% block body %}
|
{% block body %}
|
||||||
|
|
||||||
@@ -9,7 +9,7 @@
|
|||||||
← Zurück
|
← Zurück
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
<h1 class="h4 mb-4">Agent System Overview</h1>
|
<h1 class="h4 mb-4">Wissensdaten (Chunk-Index)</h1>
|
||||||
|
|
||||||
{# ============================= #}
|
{# ============================= #}
|
||||||
{# Index Meta Section #}
|
{# Index Meta Section #}
|
||||||
|
|||||||
Reference in New Issue
Block a user