MtoRagSystem/config/services.yaml

# ------------------------------------------------------------
# Parameters
# ------------------------------------------------------------

parameters:
  mto.index.chunk_size: 800
  mto.index.chunk_overlap: 100
  mto.index.embedding_model: 'nomic-embed-text'
  mto.index.embedding_dimension: 768
  mto.index.scoring_version: 1

  mto.vector.python_bin: '/var/www/html/.venv/bin/python3'
  mto.vector.ingest_script: '/src/Vector/vector_ingest.py'
  mto.vector.timeout: 600

# ------------------------------------------------------------
# Services
# ------------------------------------------------------------

services:

  # ------------------------------------------------------------
  # Default service configuration
  # ------------------------------------------------------------
  _defaults:
    autowire: true
    autoconfigure: true

    bind:
      Psr\Log\LoggerInterface $agentLogger: '@monolog.logger.agent'
      string $projectDir: '%kernel.project_dir%'

  # ------------------------------------------------------------
  # Automatically register all services in src/
  # ------------------------------------------------------------
  App\:
    resource: '../src/'
    exclude:
      - '../src/DependencyInjection/'
      - '../src/Entity/'
      - '../src/Kernel.php'

  App\Controller\:
    resource: '../src/Controller/'
    tags: ['controller.service_arguments']

  # ------------------------------------------------------------
  # AI Agent – Infrastructure
  # ------------------------------------------------------------
  App\Infrastructure\OllamaClient:
    arguments:
      $apiUrl: '%env(AI_LLM_API_URL)%'
      $model: '%env(AI_LLM_MODEL)%'
      $timeoutSeconds: '%env(int:AI_LLM_TIMEOUT)%'

  # ------------------------------------------------------------
  # AI Agent – Context & Runner
  # ------------------------------------------------------------
  App\Context\ContextService:
    arguments:
      $historyDir: '%env(AI_HISTORY_DIR)%'
      $projectDir: '%kernel.project_dir%'

  App\Agent\AgentRunner:
    arguments:
      $debug: '%env(bool:AI_DEBUG)%'
      $logPrompt: '%env(bool:AI_LOG_PROMPT)%'
      $logContext: '%env(bool:AI_LOG_CONTEXT)%'

  # ------------------------------------------------------------
  # NDJSON Retrieval Stack (FINAL ARCHITECTURE)
  # ------------------------------------------------------------

  App\Knowledge\Retrieval\NdjsonChunkLookup: ~

  App\Knowledge\Retrieval\NdjsonKeywordSearch: ~

  App\Knowledge\Retrieval\NdjsonHybridRetriever:
    arguments:
      $maxChunks: 3
      $vectorTopK: 5

  App\Knowledge\Retrieval\CachedRetriever:
    arguments:
      $inner: '@App\Knowledge\Retrieval\NdjsonHybridRetriever'
      $cache: '@cache.app'
      $ttlSeconds: 600

  App\Knowledge\Retrieval\RetrieverInterface:
    alias: App\Knowledge\Retrieval\CachedRetriever

  # ------------------------------------------------------------
  # Vector Search (FAISS NDJSON-based)
  # ------------------------------------------------------------

  App\Vector\VectorSearchClient:
    arguments:
      $vectorDir: '%kernel.project_dir%/var/knowledge'

  App\Vector\VectorIndexBuilder:
    arguments:
      $pythonBin: '%mto.vector.python_bin%'
      $relativeScriptPath: '%mto.vector.ingest_script%'
      $timeoutSeconds: '%mto.vector.timeout%'

  # ------------------------------------------------------------
  # Index Configuration (Guardrails)
  # ------------------------------------------------------------

  App\Index\IndexConfiguration:
    arguments:
      $chunkSize: '%mto.index.chunk_size%'
      $chunkOverlap: '%mto.index.chunk_overlap%'
      $embeddingModel: '%mto.index.embedding_model%'
      $embeddingDimension: '%mto.index.embedding_dimension%'
      $scoringVersion: '%mto.index.scoring_version%'
      $indexFormat: 'ndjson'
      $vectorBackend: 'faiss'