diff --git a/config/retriex/intent.yaml b/config/retriex/intent.yaml index 2168108..cea9474 100644 --- a/config/retriex/intent.yaml +++ b/config/retriex/intent.yaml @@ -132,6 +132,14 @@ parameters: - '/\bzubehoer\b/u' - '/\bersatzteil(?:e)?\b/u' + retriex.intent.catalog.config: + min_score: 0.72 + ambiguity_delta: 0.02 + intent_search_limit: 6 + list_search_limit: 3 + min_allowed_score: 0.0 + max_allowed_score: 1.0 + retriex.intent.light.config: quantity_words: - alle diff --git a/config/retriex/runtime.yaml b/config/retriex/runtime.yaml index 5192e16..f604c2b 100644 --- a/config/retriex/runtime.yaml +++ b/config/retriex/runtime.yaml @@ -17,3 +17,7 @@ parameters: retriex.locks.dir: '%retriex.knowledge.root%/locks' retriex.tags.rebuild_lock: '%retriex.locks.dir%/tag_rebuild.lock' + + retriex.context.config: + max_visible_regular_lines: 25 + max_full_lines: 500 diff --git a/config/services.yaml b/config/services.yaml index d475547..83c51e4 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -118,6 +118,14 @@ services: arguments: $config: '%retriex.vocabulary.config%' + App\Config\ContextServiceConfig: + arguments: + $config: '%retriex.context.config%' + + App\Config\CatalogIntentConfig: + arguments: + $config: '%retriex.intent.catalog.config%' + App\Config\PromptBuilderConfig: arguments: $config: '%retriex.prompt.config%' diff --git a/src/Catalog/EntityCatalogService.php b/src/Catalog/EntityCatalogService.php index 4b06ab1..139cb65 100644 --- a/src/Catalog/EntityCatalogService.php +++ b/src/Catalog/EntityCatalogService.php @@ -21,11 +21,10 @@ use Symfony\Component\Uid\Uuid; */ final class EntityCatalogService { - private const SEARCH_LIMIT = 3; - public function __construct( private readonly TagVectorSearchClient $tagVectorClient, private readonly Connection $connection, + private readonly CatalogIntentConfig $config, ) { } @@ -40,7 +39,7 @@ final class EntityCatalogService return null; } - $hits = $this->tagVectorClient->search($entityTerm, self::SEARCH_LIMIT); + $hits = $this->tagVectorClient->search($entityTerm, $this->config->getListSearchLimit()); if ($hits === []) { return null; @@ -49,7 +48,7 @@ final class EntityCatalogService $best = $hits[0]; $bestScore = (float) ($best['score'] ?? 0.0); - if ($bestScore < CatalogIntentConfig::MIN_SCORE) { + if (!$this->config->isScoreAccepted($bestScore)) { return null; } @@ -60,7 +59,7 @@ final class EntityCatalogService if (isset($hits[1])) { $secondScore = (float) ($hits[1]['score'] ?? 0.0); - if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) { + if ($this->config->isAmbiguous($bestScore, $secondScore)) { return null; } } diff --git a/src/Config/CatalogIntentConfig.php b/src/Config/CatalogIntentConfig.php index 7a5b2ea..0b0f1e1 100644 --- a/src/Config/CatalogIntentConfig.php +++ b/src/Config/CatalogIntentConfig.php @@ -5,58 +5,111 @@ declare(strict_types=1); namespace App\Config; /** - * Central thresholds for deterministic catalog-entity detection. + * YAML-backed thresholds for deterministic catalog-entity detection. * - * The values in this class intentionally define a conservative gate: - * - only strong semantic tag hits may open the catalog path - * - small score gaps between the best and second-best hit are treated as ambiguous + * This class intentionally has no PHP fallback values. Missing or invalid + * configuration must be fixed in config/retriex/intent.yaml. */ final class CatalogIntentConfig { /** - * Minimum semantic similarity required before a catalog entity is accepted. + * @param array $config */ - public const MIN_SCORE = 0.72; - - /** - * Required distance between the best and second-best catalog entity hit. - */ - public const AMBIGUITY_DELTA = 0.02; - - /** - * Number of candidate tag hits to inspect during catalog intent detection. - * - * This is intentionally wider than the final accepted set so that strong - * catalog_entity tags are not hidden behind generic tags in the raw result. - */ - public const SEARCH_LIMIT = 6; - - /** - * Conservative lower boundary for score normalization helpers. - */ - public const MIN_ALLOWED_SCORE = 0.0; - - /** - * Conservative upper boundary for score normalization helpers. - */ - public const MAX_ALLOWED_SCORE = 1.0; - - public static function isScoreAccepted(float $score): bool - { - return $score >= self::MIN_SCORE; - } - - public static function isAmbiguous(float $bestScore, float $secondScore): bool - { - return abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA; - } - - public static function clampScore(float $score): float - { - return max(self::MIN_ALLOWED_SCORE, min(self::MAX_ALLOWED_SCORE, $score)); - } - - private function __construct() + public function __construct(private readonly array $config) { } -} \ No newline at end of file + + public function getMinScore(): float + { + return $this->requiredFloatInRange('min_score', 0.0, 1.0); + } + + public function getAmbiguityDelta(): float + { + return $this->requiredFloatInRange('ambiguity_delta', 0.0, 1.0); + } + + public function getIntentSearchLimit(): int + { + return $this->requiredPositiveInt('intent_search_limit'); + } + + public function getListSearchLimit(): int + { + return $this->requiredPositiveInt('list_search_limit'); + } + + public function getMinAllowedScore(): float + { + return $this->requiredFloatInRange('min_allowed_score', 0.0, 1.0); + } + + public function getMaxAllowedScore(): float + { + return $this->requiredFloatInRange('max_allowed_score', 0.0, 1.0); + } + + public function isScoreAccepted(float $score): bool + { + return $score >= $this->getMinScore(); + } + + public function isAmbiguous(float $bestScore, float $secondScore): bool + { + return abs($bestScore - $secondScore) < $this->getAmbiguityDelta(); + } + + public function clampScore(float $score): float + { + return max($this->getMinAllowedScore(), min($this->getMaxAllowedScore(), $score)); + } + + private function requiredPositiveInt(string $key): int + { + if (!array_key_exists($key, $this->config)) { + throw new \InvalidArgumentException(sprintf('Missing required RetrieX catalog intent config key "%s".', $key)); + } + + $value = $this->config[$key]; + + if (is_int($value)) { + $intValue = $value; + } elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) { + $intValue = (int) trim($value); + } else { + throw new \InvalidArgumentException(sprintf('RetrieX catalog intent config key "%s" must be an integer.', $key)); + } + + if ($intValue <= 0) { + throw new \InvalidArgumentException(sprintf('RetrieX catalog intent config key "%s" must be greater than 0.', $key)); + } + + return $intValue; + } + + private function requiredFloatInRange(string $key, float $min, float $max): float + { + if (!array_key_exists($key, $this->config)) { + throw new \InvalidArgumentException(sprintf('Missing required RetrieX catalog intent config key "%s".', $key)); + } + + $value = $this->config[$key]; + + if (is_int($value) || is_float($value) || (is_string($value) && is_numeric(trim($value)))) { + $floatValue = (float) $value; + } else { + throw new \InvalidArgumentException(sprintf('RetrieX catalog intent config key "%s" must be numeric.', $key)); + } + + if ($floatValue < $min || $floatValue > $max) { + throw new \InvalidArgumentException(sprintf( + 'RetrieX catalog intent config key "%s" must be between %s and %s.', + $key, + (string) $min, + (string) $max + )); + } + + return $floatValue; + } +} diff --git a/src/Config/ConfigSourceAuditProvider.php b/src/Config/ConfigSourceAuditProvider.php index 64dd433..65215b9 100644 --- a/src/Config/ConfigSourceAuditProvider.php +++ b/src/Config/ConfigSourceAuditProvider.php @@ -12,6 +12,8 @@ final readonly class ConfigSourceAuditProvider 'AgentRunnerConfig' => 'retriex.agent.config', 'CommerceIntentConfig' => 'retriex.intent.commerce.config', 'CommerceQueryParserConfig' => 'retriex.commerce_query.config', + 'ContextServiceConfig' => 'retriex.context.config', + 'CatalogIntentConfig' => 'retriex.intent.catalog.config', 'DomainVocabularyConfig' => 'retriex.vocabulary.config', 'IntentLightConfig' => 'retriex.intent.light.config', 'NdjsonHybridRetrieverConfig' => 'retriex.retrieval.config', diff --git a/src/Config/ContextServiceConfig.php b/src/Config/ContextServiceConfig.php index 379fa16..c574a1c 100644 --- a/src/Config/ContextServiceConfig.php +++ b/src/Config/ContextServiceConfig.php @@ -1,12 +1,55 @@ $config + */ + public function __construct(private readonly array $config) + { + } - //Number of lines included in full context. Intended for exceptional or diagnostic scenarios. - public const MAX_FULL_LINES = 500; -} \ No newline at end of file + public function getMaxVisibleRegularLines(): int + { + return $this->requiredPositiveInt('max_visible_regular_lines'); + } + + public function getMaxFullLines(): int + { + return $this->requiredPositiveInt('max_full_lines'); + } + + private function requiredPositiveInt(string $key): int + { + if (!array_key_exists($key, $this->config)) { + throw new \InvalidArgumentException(sprintf('Missing required RetrieX context config key "%s".', $key)); + } + + $value = $this->config[$key]; + + if (is_int($value)) { + $intValue = $value; + } elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) { + $intValue = (int) trim($value); + } else { + throw new \InvalidArgumentException(sprintf('RetrieX context config key "%s" must be an integer.', $key)); + } + + if ($intValue <= 0) { + throw new \InvalidArgumentException(sprintf('RetrieX context config key "%s" must be greater than 0.', $key)); + } + + return $intValue; + } +} diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 1ec3e18..2bfff5d 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -26,6 +26,8 @@ final readonly class RetriexEffectiveConfigProvider private ShopServiceConfig $shopServiceConfig, private StopWordsConfig $stopWordsConfig, private QueryEnricherConfig $queryEnricherConfig, + private CatalogIntentConfig $catalogIntentConfig, + private ContextServiceConfig $contextServiceConfig, ) { } @@ -692,11 +694,12 @@ final readonly class RetriexEffectiveConfigProvider private function catalogIntentConfig(): array { return [ - 'min_score' => CatalogIntentConfig::MIN_SCORE, - 'ambiguity_delta' => CatalogIntentConfig::AMBIGUITY_DELTA, - 'search_limit' => CatalogIntentConfig::SEARCH_LIMIT, - 'min_allowed_score' => CatalogIntentConfig::MIN_ALLOWED_SCORE, - 'max_allowed_score' => CatalogIntentConfig::MAX_ALLOWED_SCORE, + 'min_score' => $this->catalogIntentConfig->getMinScore(), + 'ambiguity_delta' => $this->catalogIntentConfig->getAmbiguityDelta(), + 'intent_search_limit' => $this->catalogIntentConfig->getIntentSearchLimit(), + 'list_search_limit' => $this->catalogIntentConfig->getListSearchLimit(), + 'min_allowed_score' => $this->catalogIntentConfig->getMinAllowedScore(), + 'max_allowed_score' => $this->catalogIntentConfig->getMaxAllowedScore(), ]; } @@ -704,8 +707,8 @@ final readonly class RetriexEffectiveConfigProvider private function contextConfig(): array { return [ - 'max_visible_regular_lines' => ContextServiceConfig::MAX_VISIBLE_REGULAR_LINES, - 'max_full_lines' => ContextServiceConfig::MAX_FULL_LINES, + 'max_visible_regular_lines' => $this->contextServiceConfig->getMaxVisibleRegularLines(), + 'max_full_lines' => $this->contextServiceConfig->getMaxFullLines(), ]; } diff --git a/src/Config/StopWordsConfig.php b/src/Config/StopWordsConfig.php index af3b034..fb0dec9 100644 --- a/src/Config/StopWordsConfig.php +++ b/src/Config/StopWordsConfig.php @@ -4,39 +4,18 @@ declare(strict_types=1); namespace App\Config; +/** + * YAML-backed stop-word configuration. + * + * This class intentionally has no PHP fallback list. The complete list lives in + * config/retriex/language.yaml. + */ final class StopWordsConfig { - /** - * Retrieval-optimized stop-word list. - * - * Important: - * - keep negations - * - keep question words - * - keep domain terms - * - remove only structural filler words - */ - private const DEFAULT_STOP_WORDS = [ - 'mit', - 'der', 'die', 'das', - 'ein', 'eine', 'einer', 'eines', - 'den', 'dem', 'des', - 'und', 'oder', 'aber', 'sowie', - 'ich', 'du', 'er', 'sie', 'es', - 'wir', 'ihr', - 'halt', 'eben', 'auch', 'schon', - 'noch', 'mal', 'bitte', 'danke', - 'also', 'nun', 'tja', - 'dann', 'danach', 'davor', - 'hier', 'dort', - 'heute', 'gestern', 'morgen', - 'könnte', 'kannst', 'kann', - 'würde', 'würdest', 'würden', - ]; - /** * @param array $config */ - public function __construct(private array $config = []) + public function __construct(private readonly array $config) { } @@ -45,19 +24,22 @@ final class StopWordsConfig */ public function getStopWords(): array { - return $this->stringList('words', self::DEFAULT_STOP_WORDS); + return $this->requiredStringList('words'); } /** - * @param string[] $default * @return string[] */ - private function stringList(string $key, array $default): array + private function requiredStringList(string $key): array { - $value = $this->config[$key] ?? $default; + if (!array_key_exists($key, $this->config)) { + throw new \InvalidArgumentException(sprintf('Missing required RetrieX stopwords config key "%s".', $key)); + } + + $value = $this->config[$key]; if (!is_array($value)) { - return $default; + throw new \InvalidArgumentException(sprintf('RetrieX stopwords config key "%s" must be a list.', $key)); } $out = []; @@ -76,6 +58,10 @@ final class StopWordsConfig } } - return $out !== [] ? $out : $default; + if ($out === []) { + throw new \InvalidArgumentException(sprintf('RetrieX stopwords config key "%s" must not be empty.', $key)); + } + + return $out; } } diff --git a/src/Context/ContextService.php b/src/Context/ContextService.php index d5ce81d..4558469 100644 --- a/src/Context/ContextService.php +++ b/src/Context/ContextService.php @@ -32,6 +32,7 @@ final class ContextService public function __construct( string $historyDir, string $projectDir, + private readonly ContextServiceConfig $config, ) { /** @@ -73,8 +74,8 @@ final class ContextService } $maxLines = $full - ? ContextServiceConfig::MAX_FULL_LINES - : ContextServiceConfig::MAX_VISIBLE_REGULAR_LINES; + ? $this->config->getMaxFullLines() + : $this->config->getMaxVisibleRegularLines(); $selected = array_slice($lines, -$maxLines); diff --git a/src/Intent/CatalogIntentLite.php b/src/Intent/CatalogIntentLite.php index d8c5760..ba4906b 100644 --- a/src/Intent/CatalogIntentLite.php +++ b/src/Intent/CatalogIntentLite.php @@ -21,15 +21,10 @@ use App\Tag\TagVectorSearchClient; */ final readonly class CatalogIntentLite { - /** - * Slightly wider than the old top-3 search so generic tags do not crowd out - * relevant catalog_entity hits too easily. - */ - private const SEARCH_LIMIT = 6; - public function __construct( private TagVectorSearchClient $tagVectorClient, private QueryCleaner $queryCleaner, + private CatalogIntentConfig $config, ) { } @@ -52,7 +47,7 @@ final readonly class CatalogIntentLite } $catalogHits = $this->filterCatalogEntityHits( - $this->tagVectorClient->search($cleanQuery, self::SEARCH_LIMIT) + $this->tagVectorClient->search($cleanQuery, $this->config->getIntentSearchLimit()) ); if ($catalogHits === []) { @@ -62,14 +57,14 @@ final readonly class CatalogIntentLite $best = $catalogHits[0]; $bestScore = (float) ($best['score'] ?? 0.0); - if ($bestScore < CatalogIntentConfig::MIN_SCORE) { + if (!$this->config->isScoreAccepted($bestScore)) { return null; } if (isset($catalogHits[1])) { $secondScore = (float) ($catalogHits[1]['score'] ?? 0.0); - if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) { + if ($this->config->isAmbiguous($bestScore, $secondScore)) { return null; } }