first update to external config values
This commit is contained in:
35
RETRIEX_CONFIG_FIX_README.md
Normal file
35
RETRIEX_CONFIG_FIX_README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# RetrieX config hardening overlay
|
||||
|
||||
Scope:
|
||||
- Multi-tenant and configuration hardening only.
|
||||
- No retrieval logic rewrite.
|
||||
- No prompt logic rewrite.
|
||||
- No security/secrets cleanup in this round.
|
||||
|
||||
Install:
|
||||
1. Backup the current project.
|
||||
2. Extract this ZIP over the project root.
|
||||
3. Clear Symfony cache.
|
||||
4. Run:
|
||||
bin/console mto:agent:config:dump-effective --summary
|
||||
bin/console mto:agent:config:validate
|
||||
5. Run the existing 1.4.2 regression tests before deployment.
|
||||
|
||||
New config files:
|
||||
- config/retriex/runtime.yaml
|
||||
- config/retriex/index.yaml
|
||||
- config/retriex/vector.yaml
|
||||
- config/retriex/commerce.yaml
|
||||
- config/retriex/model.yaml
|
||||
- config/retriex/prompt.yaml
|
||||
- config/retriex/agent.yaml
|
||||
- config/retriex/retrieval.yaml
|
||||
|
||||
New commands:
|
||||
- mto:agent:config:dump-effective
|
||||
- mto:agent:config:validate
|
||||
|
||||
Notes:
|
||||
- Existing mto.* parameters remain for compatibility.
|
||||
- services.yaml imports config/retriex/*.yaml explicitly in stable order.
|
||||
- Retrieval constants are exposed as inventory and validation baseline, but are not dynamically changed in this round.
|
||||
8
config/retriex/agent.yaml
Normal file
8
config/retriex/agent.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
# Agent orchestration limits and user-visible source/progress labels.
|
||||
# Values mirror the current 1.4.2 defaults.
|
||||
parameters:
|
||||
retriex.agent.config:
|
||||
commerce_history_budget_chars: 1000
|
||||
product_search_knowledge_chunk_limit: 6
|
||||
advisory_product_search_knowledge_chunk_limit: 9
|
||||
optimized_shop_query_prefix_pattern: '/^(?:keywords?|suchquery|search\s*query|query)\s*:\s*/iu'
|
||||
12
config/retriex/commerce.yaml
Normal file
12
config/retriex/commerce.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
# Commerce / Shopware Store API configuration.
|
||||
# The existing Commerce and Shopware services stay unchanged; these values only centralize wiring.
|
||||
parameters:
|
||||
retriex.commerce.enabled: true
|
||||
retriex.commerce.max_shop_results: '%env(SHOPWARE_STORE_API_MAX_RESULT)%'
|
||||
retriex.commerce.shop_timeout: 5
|
||||
retriex.commerce.store_api_base_url: '%env(SHOPWARE_STORE_API_BASE_URL)%'
|
||||
retriex.commerce.sales_channel_access_key: '%env(SHOPWARE_SALES_CHANNEL_ACCESS_KEY)%'
|
||||
|
||||
retriex.commerce.search_repair.enabled: true
|
||||
retriex.commerce.search_repair.max_queries: 3
|
||||
retriex.commerce.search_repair.min_primary_results_without_repair: 2
|
||||
10
config/retriex/index.yaml
Normal file
10
config/retriex/index.yaml
Normal file
@@ -0,0 +1,10 @@
|
||||
# Fallback ingest/index configuration.
|
||||
# DB based IngestProfile remains authoritative when an active profile exists.
|
||||
parameters:
|
||||
retriex.index.chunk_size: 250
|
||||
retriex.index.chunk_overlap: 50
|
||||
retriex.index.embedding_model: 'intfloat/multilingual-e5-base'
|
||||
retriex.index.embedding_dimension: 768
|
||||
retriex.index.scoring_version: 1
|
||||
retriex.index.index_format: 'ndjson'
|
||||
retriex.index.vector_backend: 'faiss'
|
||||
17
config/retriex/model.yaml
Normal file
17
config/retriex/model.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
# Default model generation values used only when no DB value/user input overrides them.
|
||||
parameters:
|
||||
retriex.model.default_name: 'mto-model'
|
||||
retriex.model.default_stream: false
|
||||
retriex.model.default_temperature: 0.1
|
||||
retriex.model.default_top_k: 20
|
||||
retriex.model.default_top_p: 0.8
|
||||
retriex.model.default_repeat_penalty: 1.05
|
||||
retriex.model.default_num_ctx: 4096
|
||||
retriex.model.default_retrieval_max_chunks: 25
|
||||
retriex.model.default_retrieval_vector_top_k: 25
|
||||
|
||||
retriex.model.guardrail_min_num_ctx: 512
|
||||
retriex.model.guardrail_max_retrieval_chunks: 200
|
||||
retriex.model.guardrail_max_vector_top_k: 200
|
||||
|
||||
retriex.llm.timeout_seconds: 600
|
||||
18
config/retriex/prompt.yaml
Normal file
18
config/retriex/prompt.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
# Prompt budget and prompt rendering limits.
|
||||
# Existing prompt wording/rules remain in PromptBuilderConfig for this minimal-invasive round.
|
||||
parameters:
|
||||
retriex.prompt.config:
|
||||
budget:
|
||||
chars_per_token: 4
|
||||
history_padding_chars: 400
|
||||
output_reserve_ratio: 0.25
|
||||
output_reserve_min_tokens: 768
|
||||
output_reserve_max_tokens: 6000
|
||||
safety_reserve_ratio: 0.05
|
||||
safety_reserve_min_tokens: 256
|
||||
safety_reserve_max_tokens: 1024
|
||||
min_prompt_budget_tokens: 1024
|
||||
shop_results:
|
||||
max_results_in_prompt: 24
|
||||
detailed_max_count: 5
|
||||
technical_product_keyword_match_threshold: 2
|
||||
28
config/retriex/retrieval.yaml
Normal file
28
config/retriex/retrieval.yaml
Normal file
@@ -0,0 +1,28 @@
|
||||
# Current 1.4.2 retrieval constants documented as configuration inventory.
|
||||
# In this round these values are exposed by config dump/validation; the retriever logic remains unchanged.
|
||||
parameters:
|
||||
retriex.retrieval.inventory:
|
||||
hard_max_chunks: 6
|
||||
hard_max_vectork: 18
|
||||
hard_max_keywordk: 36
|
||||
vector_score_threshold: 0.83
|
||||
threshold_floor: 0.75
|
||||
threshold_ceil: 0.90
|
||||
list_bonus: 1.35
|
||||
rrf_k: 50
|
||||
keyword_topk_multiplier: 2.0
|
||||
keyword_score_threshold: 0.35
|
||||
keyword_rrf_weight: 1.15
|
||||
scoped_vector_rrf_weight: 1.20
|
||||
scoped_keyword_rrf_weight: 1.30
|
||||
empty_rrf_fallback_topn: 1
|
||||
max_chunks_per_doc: 2
|
||||
min_chunk_distance: 2
|
||||
dominant_doc_window: 6
|
||||
dominant_doc_min_hits: 3
|
||||
dominant_doc_max_chunks: 4
|
||||
exact_document_max_chunks: 6
|
||||
focused_product_window: 8
|
||||
focused_product_min_score: 10.0
|
||||
focused_product_min_gap: 4.0
|
||||
focused_product_max_chunks: 4
|
||||
19
config/retriex/runtime.yaml
Normal file
19
config/retriex/runtime.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
# RetrieX runtime paths.
|
||||
# Keep these values customer/project specific and keep service wiring generic.
|
||||
parameters:
|
||||
retriex.root: '%kernel.project_dir%'
|
||||
|
||||
retriex.knowledge.root: '%retriex.root%/var/knowledge'
|
||||
retriex.knowledge.ndjson: '%retriex.knowledge.root%/index.ndjson'
|
||||
retriex.knowledge.index_meta: '%retriex.knowledge.root%/index_meta.json'
|
||||
retriex.knowledge.vector_index: '%retriex.knowledge.root%/vector.index'
|
||||
retriex.knowledge.vector_index_meta: '%retriex.knowledge.root%/vector.index.meta.json'
|
||||
retriex.knowledge.runtime_meta: '%retriex.knowledge.root%/index_runtime.json'
|
||||
retriex.knowledge.upload: '%retriex.knowledge.root%/uploads'
|
||||
|
||||
retriex.knowledge.tags_ndjson: '%retriex.knowledge.root%/tags.ndjson'
|
||||
retriex.knowledge.vector_tags_index: '%retriex.knowledge.root%/vector_tags.index'
|
||||
retriex.knowledge.vector_tags_index_meta: '%retriex.knowledge.root%/vector_tags.index.meta.json'
|
||||
|
||||
retriex.locks.dir: '%retriex.knowledge.root%/locks'
|
||||
retriex.tags.rebuild_lock: '%retriex.locks.dir%/tag_rebuild.lock'
|
||||
31
config/retriex/vector.yaml
Normal file
31
config/retriex/vector.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
# Vector runtime configuration shared by Symfony and vector control commands.
|
||||
parameters:
|
||||
retriex.vector.script_dir: '%retriex.root%/python/vector'
|
||||
retriex.vector.python_bin: '%retriex.root%/.venv/bin/python3'
|
||||
retriex.vector.control_script: '%retriex.vector.script_dir%/vector_control.py'
|
||||
retriex.vector.ingest_script: '%retriex.vector.script_dir%/vector_ingest.py'
|
||||
retriex.vector.search_script: '%retriex.vector.script_dir%/vector_search.py'
|
||||
retriex.vector.ingest_tags_script: '%retriex.vector.script_dir%/vector_ingest_tags.py'
|
||||
retriex.vector.search_tags_script: '%retriex.vector.script_dir%/vector_search_tags.py'
|
||||
|
||||
retriex.vector.host: '0.0.0.0'
|
||||
retriex.vector.port: 8090
|
||||
retriex.vector.service_url: 'http://127.0.0.1:%retriex.vector.port%'
|
||||
retriex.vector.timeout: 600
|
||||
|
||||
retriex.vector.search.min_score: 0.30
|
||||
retriex.vector.search.max_limit: 200
|
||||
retriex.vector.search.http_timeout: 10
|
||||
|
||||
retriex.vector.tags.min_score: 0.72
|
||||
retriex.vector.tags.default_limit: 8
|
||||
retriex.vector.tags.max_limit: 50
|
||||
retriex.vector.tags.http_timeout: 10
|
||||
|
||||
retriex.vector.tag_routing.default_topk: 8
|
||||
retriex.vector.tag_routing.min_best_score: 0.72
|
||||
retriex.vector.tag_routing.max_score_drop_from_best: 0.08
|
||||
retriex.vector.tag_routing.max_routing_tags: 5
|
||||
retriex.vector.tag_routing.max_candidate_docs: 80
|
||||
retriex.vector.tag_routing.multi_tag_bonus_per_extra_tag: 0.05
|
||||
retriex.vector.tag_routing.max_multi_tag_bonus: 0.15
|
||||
BIN
rag-inprogress.zip
Normal file
BIN
rag-inprogress.zip
Normal file
Binary file not shown.
120
src/Command/ConfigDumpEffectiveCommand.php
Normal file
120
src/Command/ConfigDumpEffectiveCommand.php
Normal file
@@ -0,0 +1,120 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use App\Config\RetriexEffectiveConfigProvider;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'mto:agent:config:dump-effective',
|
||||
description: 'Dump the effective RetrieX configuration inventory'
|
||||
)]
|
||||
final class ConfigDumpEffectiveCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly RetriexEffectiveConfigProvider $provider,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function configure(): void
|
||||
{
|
||||
$this->addOption('summary', null, InputOption::VALUE_NONE, 'Render a compact summary instead of JSON.');
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$config = $this->provider->dump();
|
||||
|
||||
if ((bool) $input->getOption('summary')) {
|
||||
$this->renderSummary(new SymfonyStyle($input, $output), $config);
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
$json = json_encode($config, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
|
||||
$output->writeln(is_string($json) ? $json : '{}');
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $config
|
||||
*/
|
||||
private function renderSummary(SymfonyStyle $io, array $config): void
|
||||
{
|
||||
$io->title('RetrieX effective configuration');
|
||||
|
||||
$runtime = $this->section($config, 'runtime');
|
||||
$model = $this->section($config, 'model_generation');
|
||||
$index = $this->section($config, 'index');
|
||||
$retrieval = $this->section($config, 'retrieval');
|
||||
$vector = $this->section($config, 'vector');
|
||||
$commerce = $this->section($config, 'commerce');
|
||||
|
||||
$io->section('Runtime');
|
||||
$io->definitionList(
|
||||
['root' => (string) ($runtime['root'] ?? '')],
|
||||
['knowledge_root' => (string) ($runtime['knowledge_root'] ?? '')],
|
||||
['index_ndjson' => (string) ($runtime['index_ndjson'] ?? '')]
|
||||
);
|
||||
|
||||
$io->section('Model');
|
||||
$io->definitionList(
|
||||
['model_name' => (string) ($model['model_name'] ?? $model['default_model_name'] ?? '')],
|
||||
['num_ctx' => (string) ($model['num_ctx'] ?? $model['default_num_ctx'] ?? '')],
|
||||
['retrieval_max_chunks' => (string) ($model['retrieval_max_chunks'] ?? $model['default_retrieval_max_chunks'] ?? '')],
|
||||
['retrieval_vector_top_k' => (string) ($model['retrieval_vector_top_k'] ?? $model['default_retrieval_vector_top_k'] ?? '')]
|
||||
);
|
||||
|
||||
$io->section('Index');
|
||||
$io->definitionList(
|
||||
['chunk_size' => (string) ($index['chunk_size'] ?? $index['fallback_chunk_size'] ?? '')],
|
||||
['chunk_overlap' => (string) ($index['chunk_overlap'] ?? $index['fallback_chunk_overlap'] ?? '')],
|
||||
['embedding_model' => (string) ($index['embedding_model'] ?? $index['fallback_embedding_model'] ?? '')],
|
||||
['embedding_dimension' => (string) ($index['embedding_dimension'] ?? $index['fallback_embedding_dimension'] ?? '')]
|
||||
);
|
||||
|
||||
$io->section('Retrieval');
|
||||
$io->definitionList(
|
||||
['hard_max_chunks' => (string) ($retrieval['hard_max_chunks'] ?? '')],
|
||||
['hard_max_vectork' => (string) ($retrieval['hard_max_vectork'] ?? '')],
|
||||
['vector_score_threshold' => (string) ($retrieval['vector_score_threshold'] ?? '')]
|
||||
);
|
||||
|
||||
$io->section('Vector');
|
||||
$io->definitionList(
|
||||
['service_url' => (string) ($vector['service_url'] ?? '')],
|
||||
['port' => (string) ($vector['port'] ?? '')],
|
||||
['timeout' => (string) ($vector['timeout'] ?? '')]
|
||||
);
|
||||
|
||||
$io->section('Commerce');
|
||||
$io->definitionList(
|
||||
['enabled' => $this->formatBool($commerce['enabled'] ?? false)],
|
||||
['max_shop_results' => (string) ($commerce['max_shop_results'] ?? '')],
|
||||
['store_api_base_url' => (string) ($commerce['store_api_base_url'] ?? '')]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $data
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function section(array $data, string $key): array
|
||||
{
|
||||
return isset($data[$key]) && is_array($data[$key]) ? $data[$key] : [];
|
||||
}
|
||||
|
||||
private function formatBool(mixed $value): string
|
||||
{
|
||||
return filter_var($value, FILTER_VALIDATE_BOOLEAN) ? 'yes' : 'no';
|
||||
}
|
||||
}
|
||||
74
src/Command/ConfigValidateCommand.php
Normal file
74
src/Command/ConfigValidateCommand.php
Normal file
@@ -0,0 +1,74 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use App\Config\RetriexEffectiveConfigProvider;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'mto:agent:config:validate',
|
||||
description: 'Validate the effective RetrieX configuration'
|
||||
)]
|
||||
final class ConfigValidateCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly RetriexEffectiveConfigProvider $provider,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function configure(): void
|
||||
{
|
||||
$this->addOption('json', null, InputOption::VALUE_NONE, 'Render validation result as JSON.');
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$result = $this->provider->validate();
|
||||
|
||||
if ((bool) $input->getOption('json')) {
|
||||
$json = json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
|
||||
$output->writeln(is_string($json) ? $json : '{}');
|
||||
|
||||
return $result['status'] === 'OK' ? Command::SUCCESS : Command::FAILURE;
|
||||
}
|
||||
|
||||
$this->renderSummary(new SymfonyStyle($input, $output), $result);
|
||||
|
||||
return $result['status'] === 'OK' ? Command::SUCCESS : Command::FAILURE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array{status:string, errors:list<string>, warnings:list<string>, config:array<string,mixed>} $result
|
||||
*/
|
||||
private function renderSummary(SymfonyStyle $io, array $result): void
|
||||
{
|
||||
$io->title('RetrieX configuration validation');
|
||||
|
||||
if ($result['errors'] === [] && $result['warnings'] === []) {
|
||||
$io->success('Configuration is valid.');
|
||||
return;
|
||||
}
|
||||
|
||||
if ($result['errors'] !== []) {
|
||||
$io->section('Errors');
|
||||
foreach ($result['errors'] as $error) {
|
||||
$io->writeln('- ' . $error);
|
||||
}
|
||||
}
|
||||
|
||||
if ($result['warnings'] !== []) {
|
||||
$io->section('Warnings');
|
||||
foreach ($result['warnings'] as $warning) {
|
||||
$io->writeln('- ' . $warning);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
66
src/Config/ModelGenerationDefaultsConfig.php
Normal file
66
src/Config/ModelGenerationDefaultsConfig.php
Normal file
@@ -0,0 +1,66 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
final readonly class ModelGenerationDefaultsConfig
|
||||
{
|
||||
public function __construct(
|
||||
private string $modelName = 'mto-model',
|
||||
private bool $stream = false,
|
||||
private float $temperature = 0.1,
|
||||
private int $topK = 20,
|
||||
private float $topP = 0.8,
|
||||
private float $repeatPenalty = 1.05,
|
||||
private int $numCtx = 4096,
|
||||
private int $retrievalMaxChunks = 25,
|
||||
private int $retrievalVectorTopK = 25,
|
||||
) {
|
||||
}
|
||||
|
||||
public function getModelName(): string
|
||||
{
|
||||
return $this->modelName;
|
||||
}
|
||||
|
||||
public function isStream(): bool
|
||||
{
|
||||
return $this->stream;
|
||||
}
|
||||
|
||||
public function getTemperature(): float
|
||||
{
|
||||
return $this->temperature;
|
||||
}
|
||||
|
||||
public function getTopK(): int
|
||||
{
|
||||
return $this->topK;
|
||||
}
|
||||
|
||||
public function getTopP(): float
|
||||
{
|
||||
return $this->topP;
|
||||
}
|
||||
|
||||
public function getRepeatPenalty(): float
|
||||
{
|
||||
return $this->repeatPenalty;
|
||||
}
|
||||
|
||||
public function getNumCtx(): int
|
||||
{
|
||||
return $this->numCtx;
|
||||
}
|
||||
|
||||
public function getRetrievalMaxChunks(): int
|
||||
{
|
||||
return $this->retrievalMaxChunks;
|
||||
}
|
||||
|
||||
public function getRetrievalVectorTopK(): int
|
||||
{
|
||||
return $this->retrievalVectorTopK;
|
||||
}
|
||||
}
|
||||
459
src/Config/RetriexEffectiveConfigProvider.php
Normal file
459
src/Config/RetriexEffectiveConfigProvider.php
Normal file
@@ -0,0 +1,459 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
use App\Index\IndexConfigurationProvider;
|
||||
use App\Config\NdjsonHybridRetrieverConfig;
|
||||
use App\Service\ModelGenerationConfigProvider;
|
||||
use Symfony\Component\DependencyInjection\ParameterBag\ParameterBagInterface;
|
||||
|
||||
final readonly class RetriexEffectiveConfigProvider
|
||||
{
|
||||
public function __construct(
|
||||
private ParameterBagInterface $parameters,
|
||||
private ModelGenerationConfigProvider $modelProvider,
|
||||
private IndexConfigurationProvider $indexProvider,
|
||||
private PromptBuilderConfig $promptConfig,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
public function dump(): array
|
||||
{
|
||||
return [
|
||||
'runtime' => $this->runtimeConfig(),
|
||||
'index' => $this->indexConfig(),
|
||||
'model_generation' => $this->modelConfig(),
|
||||
'llm' => ['timeout_seconds' => $this->param('retriex.llm.timeout_seconds')],
|
||||
'retrieval' => $this->retrievalConfig(),
|
||||
'prompt' => $this->promptConfig(),
|
||||
'vector' => $this->vectorConfig(),
|
||||
'commerce' => $this->commerceConfig(),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{status:string, errors:list<string>, warnings:list<string>, config:array<string,mixed>}
|
||||
*/
|
||||
public function validate(): array
|
||||
{
|
||||
$errors = [];
|
||||
$warnings = [];
|
||||
$config = $this->dump();
|
||||
|
||||
$this->validateRuntime($config['runtime'], $errors, $warnings);
|
||||
$this->validateIndex($config['index'], $errors, $warnings);
|
||||
$this->validateModel($config['model_generation'], $errors, $warnings);
|
||||
$this->validateRetrieval($config['retrieval'], $errors, $warnings);
|
||||
$this->validatePrompt($config['prompt'], $errors, $warnings);
|
||||
$this->validateVector($config['vector'], $errors, $warnings);
|
||||
$this->validateCommerce($config['commerce'], $errors, $warnings);
|
||||
|
||||
return [
|
||||
'status' => $errors === [] ? 'OK' : 'ERROR',
|
||||
'errors' => $errors,
|
||||
'warnings' => $warnings,
|
||||
'config' => $config,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function runtimeConfig(): array
|
||||
{
|
||||
return [
|
||||
'root' => $this->param('retriex.root'),
|
||||
'knowledge_root' => $this->param('retriex.knowledge.root'),
|
||||
'index_ndjson' => $this->param('retriex.knowledge.ndjson'),
|
||||
'index_meta' => $this->param('retriex.knowledge.index_meta'),
|
||||
'runtime_meta' => $this->param('retriex.knowledge.runtime_meta'),
|
||||
'upload_dir' => $this->param('retriex.knowledge.upload'),
|
||||
'locks_dir' => $this->param('retriex.locks.dir'),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function indexConfig(): array
|
||||
{
|
||||
try {
|
||||
$index = $this->indexProvider->getConfiguration();
|
||||
|
||||
return [
|
||||
'chunk_size' => $index->getChunkSize(),
|
||||
'chunk_overlap' => $index->getChunkOverlap(),
|
||||
'embedding_model' => $index->getEmbeddingModel(),
|
||||
'embedding_dimension' => $index->getEmbeddingDimension(),
|
||||
'scoring_version' => $index->getScoringVersion(),
|
||||
'index_format' => $index->getIndexFormat(),
|
||||
'vector_backend' => $index->getVectorBackend(),
|
||||
];
|
||||
} catch (\Throwable $e) {
|
||||
return [
|
||||
'error' => $e->getMessage(),
|
||||
'fallback_chunk_size' => $this->param('retriex.index.chunk_size'),
|
||||
'fallback_chunk_overlap' => $this->param('retriex.index.chunk_overlap'),
|
||||
'fallback_embedding_model' => $this->param('retriex.index.embedding_model'),
|
||||
'fallback_embedding_dimension' => $this->param('retriex.index.embedding_dimension'),
|
||||
'fallback_scoring_version' => $this->param('retriex.index.scoring_version'),
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function modelConfig(): array
|
||||
{
|
||||
try {
|
||||
$model = $this->modelProvider->getActiveForModel();
|
||||
|
||||
return [
|
||||
'model_name' => $model->getModelName(),
|
||||
'version' => $model->getVersion(),
|
||||
'active' => $model->isActive(),
|
||||
'stream' => $model->isStream(),
|
||||
'temperature' => $model->getTemperature(),
|
||||
'top_k' => $model->getTopK(),
|
||||
'top_p' => $model->getTopP(),
|
||||
'repeat_penalty' => $model->getRepeatPenalty(),
|
||||
'num_ctx' => $model->getNumCtx(),
|
||||
'retrieval_max_chunks' => $model->getRetrievalMaxChunks(),
|
||||
'retrieval_vector_top_k' => $model->getRetrievalVectorTopK(),
|
||||
];
|
||||
} catch (\Throwable $e) {
|
||||
return [
|
||||
'error' => $e->getMessage(),
|
||||
'default_model_name' => $this->param('retriex.model.default_name'),
|
||||
'default_num_ctx' => $this->param('retriex.model.default_num_ctx'),
|
||||
'default_retrieval_max_chunks' => $this->param('retriex.model.default_retrieval_max_chunks'),
|
||||
'default_retrieval_vector_top_k' => $this->param('retriex.model.default_retrieval_vector_top_k'),
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function retrievalConfig(): array
|
||||
{
|
||||
return [
|
||||
'hard_max_chunks' => NdjsonHybridRetrieverConfig::HARD_MAX_CHUNKS,
|
||||
'hard_max_vectork' => NdjsonHybridRetrieverConfig::HARD_MAX_VECTORK,
|
||||
'hard_max_keywordk' => NdjsonHybridRetrieverConfig::HARD_MAX_KEYWORDK,
|
||||
'vector_score_threshold' => NdjsonHybridRetrieverConfig::VECTOR_SCORE_THRESHOLD,
|
||||
'threshold_floor' => NdjsonHybridRetrieverConfig::THRESHOLD_FLOOR,
|
||||
'threshold_ceil' => NdjsonHybridRetrieverConfig::THRESHOLD_CEIL,
|
||||
'list_bonus' => NdjsonHybridRetrieverConfig::LIST_BONUS,
|
||||
'rrf_k' => NdjsonHybridRetrieverConfig::RRF_K,
|
||||
'keyword_topk_multiplier' => NdjsonHybridRetrieverConfig::KEYWORD_TOPK_MULTIPLIER,
|
||||
'keyword_score_threshold' => NdjsonHybridRetrieverConfig::KEYWORD_SCORE_THRESHOLD,
|
||||
'keyword_rrf_weight' => NdjsonHybridRetrieverConfig::KEYWORD_RRF_WEIGHT,
|
||||
'scoped_vector_rrf_weight' => NdjsonHybridRetrieverConfig::SCOPED_VECTOR_RRF_WEIGHT,
|
||||
'scoped_keyword_rrf_weight' => NdjsonHybridRetrieverConfig::SCOPED_KEYWORD_RRF_WEIGHT,
|
||||
'empty_rrf_fallback_topn' => NdjsonHybridRetrieverConfig::EMPTY_RRF_FALLBACK_TOPN,
|
||||
'max_chunks_per_doc' => NdjsonHybridRetrieverConfig::MAX_CHUNKS_PER_DOC,
|
||||
'min_chunk_distance' => NdjsonHybridRetrieverConfig::MIN_CHUNK_DISTANCE,
|
||||
'dominant_doc_window' => NdjsonHybridRetrieverConfig::DOMINANT_DOC_WINDOW,
|
||||
'dominant_doc_min_hits' => NdjsonHybridRetrieverConfig::DOMINANT_DOC_MIN_HITS,
|
||||
'dominant_doc_max_chunks' => NdjsonHybridRetrieverConfig::DOMINANT_DOC_MAX_CHUNKS,
|
||||
'exact_document_max_chunks' => NdjsonHybridRetrieverConfig::EXACT_DOCUMENT_MAX_CHUNKS,
|
||||
'focused_product_window' => NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_WINDOW,
|
||||
'focused_product_min_score' => NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MIN_SCORE,
|
||||
'focused_product_min_gap' => NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MIN_GAP,
|
||||
'focused_product_max_chunks' => NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MAX_CHUNKS,
|
||||
'inventory_parameter' => $this->param('retriex.retrieval.inventory', []),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function promptConfig(): array
|
||||
{
|
||||
return [
|
||||
'chars_per_token' => $this->promptConfig->getCharsPerToken(),
|
||||
'history_padding_chars' => $this->promptConfig->getHistoryPaddingChars(),
|
||||
'output_reserve_ratio' => $this->promptConfig->getOutputReserveRatio(),
|
||||
'output_reserve_min_tokens' => $this->promptConfig->getOutputReserveMinTokens(),
|
||||
'output_reserve_max_tokens' => $this->promptConfig->getOutputReserveMaxTokens(),
|
||||
'safety_reserve_ratio' => $this->promptConfig->getSafetyReserveRatio(),
|
||||
'safety_reserve_min_tokens' => $this->promptConfig->getSafetyReserveMinTokens(),
|
||||
'safety_reserve_max_tokens' => $this->promptConfig->getSafetyReserveMaxTokens(),
|
||||
'min_prompt_budget_tokens' => $this->promptConfig->getMinPromptBudgetTokens(),
|
||||
'max_shop_results_in_prompt' => $this->promptConfig->getMaxShopResultsInPrompt(),
|
||||
'detailed_shop_results_max_count' => $this->promptConfig->getDetailedShopResultsMaxCount(),
|
||||
'technical_product_keyword_match_threshold' => $this->promptConfig->getTechnicalProductKeywordMatchThreshold(),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function vectorConfig(): array
|
||||
{
|
||||
return [
|
||||
'service_url' => $this->param('retriex.vector.service_url'),
|
||||
'host' => $this->param('retriex.vector.host'),
|
||||
'port' => $this->param('retriex.vector.port'),
|
||||
'python_bin' => $this->param('retriex.vector.python_bin'),
|
||||
'control_script' => $this->param('retriex.vector.control_script'),
|
||||
'timeout' => $this->param('retriex.vector.timeout'),
|
||||
'search' => [
|
||||
'min_score' => $this->param('retriex.vector.search.min_score'),
|
||||
'max_limit' => $this->param('retriex.vector.search.max_limit'),
|
||||
'http_timeout' => $this->param('retriex.vector.search.http_timeout'),
|
||||
],
|
||||
'tags' => [
|
||||
'min_score' => $this->param('retriex.vector.tags.min_score'),
|
||||
'default_limit' => $this->param('retriex.vector.tags.default_limit'),
|
||||
'max_limit' => $this->param('retriex.vector.tags.max_limit'),
|
||||
'http_timeout' => $this->param('retriex.vector.tags.http_timeout'),
|
||||
],
|
||||
'tag_routing' => [
|
||||
'default_topk' => $this->param('retriex.vector.tag_routing.default_topk'),
|
||||
'min_best_score' => $this->param('retriex.vector.tag_routing.min_best_score'),
|
||||
'max_score_drop_from_best' => $this->param('retriex.vector.tag_routing.max_score_drop_from_best'),
|
||||
'max_routing_tags' => $this->param('retriex.vector.tag_routing.max_routing_tags'),
|
||||
'max_candidate_docs' => $this->param('retriex.vector.tag_routing.max_candidate_docs'),
|
||||
'multi_tag_bonus_per_extra_tag' => $this->param('retriex.vector.tag_routing.multi_tag_bonus_per_extra_tag'),
|
||||
'max_multi_tag_bonus' => $this->param('retriex.vector.tag_routing.max_multi_tag_bonus'),
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
private function commerceConfig(): array
|
||||
{
|
||||
return [
|
||||
'enabled' => $this->param('retriex.commerce.enabled'),
|
||||
'max_shop_results' => $this->param('retriex.commerce.max_shop_results'),
|
||||
'shop_timeout' => $this->param('retriex.commerce.shop_timeout'),
|
||||
'store_api_base_url' => $this->param('retriex.commerce.store_api_base_url'),
|
||||
'sales_channel_access_key_configured' => $this->param('retriex.commerce.sales_channel_access_key') !== '',
|
||||
'search_repair' => [
|
||||
'enabled' => $this->param('retriex.commerce.search_repair.enabled'),
|
||||
'max_queries' => $this->param('retriex.commerce.search_repair.max_queries'),
|
||||
'min_primary_results_without_repair' => $this->param('retriex.commerce.search_repair.min_primary_results_without_repair'),
|
||||
],
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $runtime
|
||||
* @param list<string> $errors
|
||||
* @param list<string> $warnings
|
||||
*/
|
||||
private function validateRuntime(array $runtime, array &$errors, array &$warnings): void
|
||||
{
|
||||
foreach (['root', 'knowledge_root', 'index_ndjson', 'index_meta', 'upload_dir'] as $key) {
|
||||
if (trim((string) ($runtime[$key] ?? '')) === '') {
|
||||
$errors[] = 'runtime.' . $key . ' must not be empty.';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $index
|
||||
* @param list<string> $errors
|
||||
* @param list<string> $warnings
|
||||
*/
|
||||
private function validateIndex(array $index, array &$errors, array &$warnings): void
|
||||
{
|
||||
if (isset($index['error'])) {
|
||||
$warnings[] = 'index configuration could not be loaded from DB/provider: ' . (string) $index['error'];
|
||||
return;
|
||||
}
|
||||
|
||||
$chunkSize = $this->asInt($index['chunk_size'] ?? null);
|
||||
$chunkOverlap = $this->asInt($index['chunk_overlap'] ?? null);
|
||||
|
||||
if ($chunkSize === null || $chunkSize <= 0) {
|
||||
$errors[] = 'index.chunk_size must be greater than 0.';
|
||||
}
|
||||
|
||||
if ($chunkOverlap === null || $chunkOverlap < 0) {
|
||||
$errors[] = 'index.chunk_overlap must be greater than or equal to 0.';
|
||||
}
|
||||
|
||||
if ($chunkSize !== null && $chunkOverlap !== null && $chunkOverlap >= $chunkSize) {
|
||||
$errors[] = 'index.chunk_overlap must be smaller than index.chunk_size.';
|
||||
}
|
||||
|
||||
if (trim((string) ($index['embedding_model'] ?? '')) === '') {
|
||||
$errors[] = 'index.embedding_model must not be empty.';
|
||||
}
|
||||
|
||||
if (($this->asInt($index['embedding_dimension'] ?? null) ?? 0) <= 0) {
|
||||
$errors[] = 'index.embedding_dimension must be greater than 0.';
|
||||
}
|
||||
|
||||
if (($this->asInt($index['scoring_version'] ?? null) ?? 0) <= 0) {
|
||||
$errors[] = 'index.scoring_version must be greater than 0.';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $model
|
||||
* @param list<string> $errors
|
||||
* @param list<string> $warnings
|
||||
*/
|
||||
private function validateModel(array $model, array &$errors, array &$warnings): void
|
||||
{
|
||||
if (isset($model['error'])) {
|
||||
$warnings[] = 'model configuration could not be loaded from DB/provider: ' . (string) $model['error'];
|
||||
return;
|
||||
}
|
||||
|
||||
if (trim((string) ($model['model_name'] ?? '')) === '') {
|
||||
$errors[] = 'model_generation.model_name must not be empty.';
|
||||
}
|
||||
|
||||
if (($this->asInt($model['num_ctx'] ?? null) ?? 0) < 512) {
|
||||
$errors[] = 'model_generation.num_ctx must be at least 512.';
|
||||
}
|
||||
|
||||
if (($this->asInt($model['retrieval_max_chunks'] ?? null) ?? 0) < 1) {
|
||||
$errors[] = 'model_generation.retrieval_max_chunks must be greater than 0.';
|
||||
}
|
||||
|
||||
if (($this->asInt($model['retrieval_vector_top_k'] ?? null) ?? 0) < 1) {
|
||||
$errors[] = 'model_generation.retrieval_vector_top_k must be greater than 0.';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $retrieval
|
||||
* @param list<string> $errors
|
||||
* @param list<string> $warnings
|
||||
*/
|
||||
private function validateRetrieval(array $retrieval, array &$errors, array &$warnings): void
|
||||
{
|
||||
$floor = (float) $retrieval['threshold_floor'];
|
||||
$threshold = (float) $retrieval['vector_score_threshold'];
|
||||
$ceil = (float) $retrieval['threshold_ceil'];
|
||||
|
||||
if ($floor > $threshold || $threshold > $ceil) {
|
||||
$errors[] = 'retrieval threshold must satisfy threshold_floor <= vector_score_threshold <= threshold_ceil.';
|
||||
}
|
||||
|
||||
if ((int) $retrieval['hard_max_chunks'] < 1) {
|
||||
$errors[] = 'retrieval.hard_max_chunks must be greater than 0.';
|
||||
}
|
||||
|
||||
$inventory = $retrieval['inventory_parameter'] ?? [];
|
||||
if (is_array($inventory)) {
|
||||
foreach ($inventory as $key => $value) {
|
||||
if (array_key_exists($key, $retrieval) && $retrieval[$key] != $value) {
|
||||
$warnings[] = 'retrieval.inventory.' . $key . ' differs from active retriever constant.';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $prompt
|
||||
* @param list<string> $errors
|
||||
* @param list<string> $warnings
|
||||
*/
|
||||
private function validatePrompt(array $prompt, array &$errors, array &$warnings): void
|
||||
{
|
||||
if ((int) $prompt['chars_per_token'] < 1) {
|
||||
$errors[] = 'prompt.chars_per_token must be greater than 0.';
|
||||
}
|
||||
|
||||
if ((float) $prompt['output_reserve_ratio'] < 0.0 || (float) $prompt['output_reserve_ratio'] > 1.0) {
|
||||
$errors[] = 'prompt.output_reserve_ratio must be between 0 and 1.';
|
||||
}
|
||||
|
||||
if ((float) $prompt['safety_reserve_ratio'] < 0.0 || (float) $prompt['safety_reserve_ratio'] > 1.0) {
|
||||
$errors[] = 'prompt.safety_reserve_ratio must be between 0 and 1.';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $vector
|
||||
* @param list<string> $errors
|
||||
* @param list<string> $warnings
|
||||
*/
|
||||
private function validateVector(array $vector, array &$errors, array &$warnings): void
|
||||
{
|
||||
if (trim((string) ($vector['service_url'] ?? '')) === '') {
|
||||
$errors[] = 'vector.service_url must not be empty.';
|
||||
}
|
||||
|
||||
if (($this->asInt($vector['port'] ?? null) ?? 0) < 1) {
|
||||
$errors[] = 'vector.port must be greater than 0.';
|
||||
}
|
||||
|
||||
$search = is_array($vector['search'] ?? null) ? $vector['search'] : [];
|
||||
$tags = is_array($vector['tags'] ?? null) ? $vector['tags'] : [];
|
||||
|
||||
foreach (['search.min_score' => $search['min_score'] ?? null, 'tags.min_score' => $tags['min_score'] ?? null] as $name => $value) {
|
||||
$score = $this->asFloat($value);
|
||||
if ($score === null || $score < 0.0 || $score > 1.0) {
|
||||
$errors[] = 'vector.' . $name . ' must be between 0 and 1.';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $commerce
|
||||
* @param list<string> $errors
|
||||
* @param list<string> $warnings
|
||||
*/
|
||||
private function validateCommerce(array $commerce, array &$errors, array &$warnings): void
|
||||
{
|
||||
if (!$this->asBool($commerce['enabled'] ?? false)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (trim((string) ($commerce['store_api_base_url'] ?? '')) === '') {
|
||||
$errors[] = 'commerce.store_api_base_url must not be empty when commerce is enabled.';
|
||||
}
|
||||
|
||||
if (($this->asInt($commerce['max_shop_results'] ?? null) ?? 0) < 1) {
|
||||
$warnings[] = 'commerce.max_shop_results could not be resolved as a positive integer.';
|
||||
}
|
||||
}
|
||||
|
||||
private function param(string $name, mixed $default = null): mixed
|
||||
{
|
||||
if (!$this->parameters->has($name)) {
|
||||
return $default;
|
||||
}
|
||||
|
||||
return $this->parameters->get($name);
|
||||
}
|
||||
|
||||
private function asInt(mixed $value): ?int
|
||||
{
|
||||
return is_numeric($value) ? (int) $value : null;
|
||||
}
|
||||
|
||||
private function asFloat(mixed $value): ?float
|
||||
{
|
||||
return is_numeric($value) ? (float) $value : null;
|
||||
}
|
||||
|
||||
private function asBool(mixed $value): bool
|
||||
{
|
||||
if (is_bool($value)) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
if (is_string($value)) {
|
||||
return in_array(strtolower($value), ['1', 'true', 'yes', 'on'], true);
|
||||
}
|
||||
|
||||
return (bool) $value;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user