new version ndjson

This commit is contained in:
team 1
2026-02-12 11:22:56 +01:00
parent 0bb0c0b42f
commit 5a52e07edc
10 changed files with 375 additions and 492 deletions

View File

@@ -1,130 +1,118 @@
# This file is the entry point to configure your own services.
# Files in the packages/ subdirectory configure your dependencies.
#
# https://symfony.com/doc/current/best_practices.html
# ------------------------------------------------------------
# Parameters
# ------------------------------------------------------------
parameters:
mto.index.chunk_size: 800
mto.index.chunk_overlap: 100
mto.index.embedding_model: 'nomic-embed-text'
mto.index.embedding_dimension: 768
mto.index.scoring_version: 1
mto.vector.python_bin: '/var/www/html/src/Vector/.venv/bin/python'
mto.vector.ingest_script: '/src/Vector/vector_ingest.py'
mto.vector.timeout: 600
mto.index.chunk_size: 800
mto.index.chunk_overlap: 100
mto.index.embedding_model: 'nomic-embed-text'
mto.index.embedding_dimension: 768
mto.index.scoring_version: 1
mto.vector.python_bin: '/var/www/html/src/Vector/.venv/bin/python'
mto.vector.ingest_script: '/src/Vector/vector_ingest.py'
mto.vector.timeout: 600
# ------------------------------------------------------------
# Services
# ------------------------------------------------------------
services:
# ------------------------------------------------------------
# Default service configuration
# ------------------------------------------------------------
_defaults:
autowire: true
autoconfigure: true
# ------------------------------------------------------------
# Default service configuration
# ------------------------------------------------------------
_defaults:
autowire: true
autoconfigure: true
# Bind the agent-specific Monolog channel explicitly
bind:
Psr\Log\LoggerInterface $agentLogger: '@monolog.logger.agent'
string $projectDir: '%kernel.project_dir%'
bind:
Psr\Log\LoggerInterface $agentLogger: '@monolog.logger.agent'
string $projectDir: '%kernel.project_dir%'
# ------------------------------------------------------------
# Automatically register all services in src/
# ------------------------------------------------------------
App\:
resource: '../src/'
exclude:
- '../src/DependencyInjection/'
- '../src/Entity/'
- '../src/Kernel.php'
# ------------------------------------------------------------
# Automatically register all services in src/
# ------------------------------------------------------------
App\:
resource: '../src/'
exclude:
- '../src/DependencyInjection/'
- '../src/Entity/'
- '../src/Kernel.php'
# ------------------------------------------------------------
# AI Agent Infrastructure
# ------------------------------------------------------------
App\Infrastructure\OllamaClient:
arguments:
$apiUrl: '%env(AI_LLM_API_URL)%'
$model: '%env(AI_LLM_MODEL)%'
$timeoutSeconds: '%env(int:AI_LLM_TIMEOUT)%'
App\Controller\:
resource: '../src/Controller/'
tags: ['controller.service_arguments']
# ------------------------------------------------------------
# AI Agent Context & state
# ------------------------------------------------------------
App\Context\ContextService:
arguments:
$historyDir: '%env(AI_HISTORY_DIR)%'
$projectDir: '%kernel.project_dir%'
# ------------------------------------------------------------
# AI Agent Infrastructure
# ------------------------------------------------------------
App\Infrastructure\OllamaClient:
arguments:
$apiUrl: '%env(AI_LLM_API_URL)%'
$model: '%env(AI_LLM_MODEL)%'
$timeoutSeconds: '%env(int:AI_LLM_TIMEOUT)%'
# ------------------------------------------------------------
# AI Agent Debug & logging flags
# ------------------------------------------------------------
App\Agent\AgentRunner:
arguments:
$debug: '%env(bool:AI_DEBUG)%'
$logPrompt: '%env(bool:AI_LOG_PROMPT)%'
$logContext: '%env(bool:AI_LOG_CONTEXT)%'
# ------------------------------------------------------------
# AI Agent Context & Runner
# ------------------------------------------------------------
App\Context\ContextService:
arguments:
$historyDir: '%env(AI_HISTORY_DIR)%'
$projectDir: '%kernel.project_dir%'
App\Controller\:
resource: '../src/Controller/'
tags: [ 'controller.service_arguments' ]
App\Agent\AgentRunner:
arguments:
$debug: '%env(bool:AI_DEBUG)%'
$logPrompt: '%env(bool:AI_LOG_PROMPT)%'
$logContext: '%env(bool:AI_LOG_CONTEXT)%'
# ------------------------------------------------------------
# AI Agent Knowledge
# ------------------------------------------------------------
App\Knowledge\Retrieval\ChunkKeywordRetriever:
arguments:
$chunksDir: '%kernel.project_dir%/var/knowledge/chunks'
# ------------------------------------------------------------
# NDJSON Retrieval Stack (FINAL ARCHITECTURE)
# ------------------------------------------------------------
App\Knowledge\Retrieval\CachedRetriever:
arguments:
$inner: '@App\Knowledge\Retrieval\ChunkKeywordRetriever'
$cache: '@cache.app'
$ttlSeconds: 600
App\Knowledge\Retrieval\NdjsonChunkLookup: ~
App\Knowledge\Retrieval\RetrieverInterface:
alias: App\Knowledge\Retrieval\CachedRetriever
App\Knowledge\Retrieval\NdjsonKeywordSearch: ~
App\Knowledge\Ingest\ChunkWriter:
arguments:
$chunksDir: '%kernel.project_dir%/var/knowledge/chunks'
$manifestPath: '%kernel.project_dir%/var/knowledge/manifest.json'
App\Knowledge\Retrieval\NdjsonHybridRetriever:
arguments:
$maxChunks: 3
$vectorTopK: 5
App\Knowledge\Ingest\ChunkIndexWriter:
arguments:
$indexPath: '%kernel.project_dir%/var/knowledge/index.json'
App\Knowledge\Retrieval\CachedRetriever:
arguments:
$inner: '@App\Knowledge\Retrieval\NdjsonHybridRetriever'
$cache: '@cache.app'
$ttlSeconds: 600
App\Knowledge\Retrieval\ChunkIndexLoader:
arguments:
$indexPath: '%kernel.project_dir%/var/knowledge/index.json'
App\Knowledge\Retrieval\RetrieverInterface:
alias: App\Knowledge\Retrieval\CachedRetriever
App\Command\KnowledgeIngestCommand:
arguments:
$uploadsDir: '%kernel.project_dir%/var/knowledge/uploads'
# ------------------------------------------------------------
# Vector Search (FAISS NDJSON-based)
# ------------------------------------------------------------
App\Vector\VectorSearchClient:
arguments:
$vectorDir: '%kernel.project_dir%/src/Vector'
App\Vector\VectorSearchClient:
arguments:
$vectorDir: '%kernel.project_dir%/var/knowledge'
App\Command\VectorIngestCommand:
arguments:
$vectorDir: '%kernel.project_dir%/src/Vector'
$projectDir: '%kernel.project_dir%'
App\Vector\VectorIndexBuilder:
arguments:
$pythonBin: '%mto.vector.python_bin%'
$relativeScriptPath: '%mto.vector.ingest_script%'
$timeoutSeconds: '%mto.vector.timeout%'
App\Command\VectorInstallCommand:
arguments:
$vectorDir: '%kernel.project_dir%/src/Vector'
# ------------------------------------------------------------
# Index Configuration (Guardrails)
# ------------------------------------------------------------
App\Index\IndexConfiguration:
arguments:
$chunkSize: '%mto.index.chunk_size%'
$chunkOverlap: '%mto.index.chunk_overlap%'
$embeddingModel: '%mto.index.embedding_model%'
$embeddingDimension: '%mto.index.embedding_dimension%'
$scoringVersion: '%mto.index.scoring_version%'
$indexFormat: 'ndjson'
$vectorBackend: 'faiss'
App\Vector\VectorIndexBuilder:
arguments:
$pythonBin: '%mto.vector.python_bin%'
$relativeScriptPath: '%mto.vector.ingest_script%'
$timeoutSeconds: '%mto.vector.timeout%'
App\Index\IndexConfiguration:
arguments:
$chunkSize: '%mto.index.chunk_size%'
$chunkOverlap: '%mto.index.chunk_overlap%'
$embeddingModel: '%mto.index.embedding_model%'
$embeddingDimension: '%mto.index.embedding_dimension%'
$scoringVersion: '%mto.index.scoring_version%'
$indexFormat: 'ndjson'
$vectorBackend: 'faiss'