second step

This commit is contained in:
team2
2026-04-29 20:55:21 +02:00
parent a460eee429
commit d099457534
11 changed files with 212 additions and 110 deletions

View File

@@ -132,6 +132,14 @@ parameters:
- '/\bzubehoer\b/u' - '/\bzubehoer\b/u'
- '/\bersatzteil(?:e)?\b/u' - '/\bersatzteil(?:e)?\b/u'
retriex.intent.catalog.config:
min_score: 0.72
ambiguity_delta: 0.02
intent_search_limit: 6
list_search_limit: 3
min_allowed_score: 0.0
max_allowed_score: 1.0
retriex.intent.light.config: retriex.intent.light.config:
quantity_words: quantity_words:
- alle - alle

View File

@@ -17,3 +17,7 @@ parameters:
retriex.locks.dir: '%retriex.knowledge.root%/locks' retriex.locks.dir: '%retriex.knowledge.root%/locks'
retriex.tags.rebuild_lock: '%retriex.locks.dir%/tag_rebuild.lock' retriex.tags.rebuild_lock: '%retriex.locks.dir%/tag_rebuild.lock'
retriex.context.config:
max_visible_regular_lines: 25
max_full_lines: 500

View File

@@ -118,6 +118,14 @@ services:
arguments: arguments:
$config: '%retriex.vocabulary.config%' $config: '%retriex.vocabulary.config%'
App\Config\ContextServiceConfig:
arguments:
$config: '%retriex.context.config%'
App\Config\CatalogIntentConfig:
arguments:
$config: '%retriex.intent.catalog.config%'
App\Config\PromptBuilderConfig: App\Config\PromptBuilderConfig:
arguments: arguments:
$config: '%retriex.prompt.config%' $config: '%retriex.prompt.config%'

View File

@@ -21,11 +21,10 @@ use Symfony\Component\Uid\Uuid;
*/ */
final class EntityCatalogService final class EntityCatalogService
{ {
private const SEARCH_LIMIT = 3;
public function __construct( public function __construct(
private readonly TagVectorSearchClient $tagVectorClient, private readonly TagVectorSearchClient $tagVectorClient,
private readonly Connection $connection, private readonly Connection $connection,
private readonly CatalogIntentConfig $config,
) { ) {
} }
@@ -40,7 +39,7 @@ final class EntityCatalogService
return null; return null;
} }
$hits = $this->tagVectorClient->search($entityTerm, self::SEARCH_LIMIT); $hits = $this->tagVectorClient->search($entityTerm, $this->config->getListSearchLimit());
if ($hits === []) { if ($hits === []) {
return null; return null;
@@ -49,7 +48,7 @@ final class EntityCatalogService
$best = $hits[0]; $best = $hits[0];
$bestScore = (float) ($best['score'] ?? 0.0); $bestScore = (float) ($best['score'] ?? 0.0);
if ($bestScore < CatalogIntentConfig::MIN_SCORE) { if (!$this->config->isScoreAccepted($bestScore)) {
return null; return null;
} }
@@ -60,7 +59,7 @@ final class EntityCatalogService
if (isset($hits[1])) { if (isset($hits[1])) {
$secondScore = (float) ($hits[1]['score'] ?? 0.0); $secondScore = (float) ($hits[1]['score'] ?? 0.0);
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) { if ($this->config->isAmbiguous($bestScore, $secondScore)) {
return null; return null;
} }
} }

View File

@@ -5,58 +5,111 @@ declare(strict_types=1);
namespace App\Config; namespace App\Config;
/** /**
* Central thresholds for deterministic catalog-entity detection. * YAML-backed thresholds for deterministic catalog-entity detection.
* *
* The values in this class intentionally define a conservative gate: * This class intentionally has no PHP fallback values. Missing or invalid
* - only strong semantic tag hits may open the catalog path * configuration must be fixed in config/retriex/intent.yaml.
* - small score gaps between the best and second-best hit are treated as ambiguous
*/ */
final class CatalogIntentConfig final class CatalogIntentConfig
{ {
/** /**
* Minimum semantic similarity required before a catalog entity is accepted. * @param array<string, mixed> $config
*/ */
public const MIN_SCORE = 0.72; public function __construct(private readonly array $config)
/**
* Required distance between the best and second-best catalog entity hit.
*/
public const AMBIGUITY_DELTA = 0.02;
/**
* Number of candidate tag hits to inspect during catalog intent detection.
*
* This is intentionally wider than the final accepted set so that strong
* catalog_entity tags are not hidden behind generic tags in the raw result.
*/
public const SEARCH_LIMIT = 6;
/**
* Conservative lower boundary for score normalization helpers.
*/
public const MIN_ALLOWED_SCORE = 0.0;
/**
* Conservative upper boundary for score normalization helpers.
*/
public const MAX_ALLOWED_SCORE = 1.0;
public static function isScoreAccepted(float $score): bool
{ {
return $score >= self::MIN_SCORE;
} }
public static function isAmbiguous(float $bestScore, float $secondScore): bool public function getMinScore(): float
{ {
return abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA; return $this->requiredFloatInRange('min_score', 0.0, 1.0);
} }
public static function clampScore(float $score): float public function getAmbiguityDelta(): float
{ {
return max(self::MIN_ALLOWED_SCORE, min(self::MAX_ALLOWED_SCORE, $score)); return $this->requiredFloatInRange('ambiguity_delta', 0.0, 1.0);
} }
private function __construct() public function getIntentSearchLimit(): int
{ {
return $this->requiredPositiveInt('intent_search_limit');
}
public function getListSearchLimit(): int
{
return $this->requiredPositiveInt('list_search_limit');
}
public function getMinAllowedScore(): float
{
return $this->requiredFloatInRange('min_allowed_score', 0.0, 1.0);
}
public function getMaxAllowedScore(): float
{
return $this->requiredFloatInRange('max_allowed_score', 0.0, 1.0);
}
public function isScoreAccepted(float $score): bool
{
return $score >= $this->getMinScore();
}
public function isAmbiguous(float $bestScore, float $secondScore): bool
{
return abs($bestScore - $secondScore) < $this->getAmbiguityDelta();
}
public function clampScore(float $score): float
{
return max($this->getMinAllowedScore(), min($this->getMaxAllowedScore(), $score));
}
private function requiredPositiveInt(string $key): int
{
if (!array_key_exists($key, $this->config)) {
throw new \InvalidArgumentException(sprintf('Missing required RetrieX catalog intent config key "%s".', $key));
}
$value = $this->config[$key];
if (is_int($value)) {
$intValue = $value;
} elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
$intValue = (int) trim($value);
} else {
throw new \InvalidArgumentException(sprintf('RetrieX catalog intent config key "%s" must be an integer.', $key));
}
if ($intValue <= 0) {
throw new \InvalidArgumentException(sprintf('RetrieX catalog intent config key "%s" must be greater than 0.', $key));
}
return $intValue;
}
private function requiredFloatInRange(string $key, float $min, float $max): float
{
if (!array_key_exists($key, $this->config)) {
throw new \InvalidArgumentException(sprintf('Missing required RetrieX catalog intent config key "%s".', $key));
}
$value = $this->config[$key];
if (is_int($value) || is_float($value) || (is_string($value) && is_numeric(trim($value)))) {
$floatValue = (float) $value;
} else {
throw new \InvalidArgumentException(sprintf('RetrieX catalog intent config key "%s" must be numeric.', $key));
}
if ($floatValue < $min || $floatValue > $max) {
throw new \InvalidArgumentException(sprintf(
'RetrieX catalog intent config key "%s" must be between %s and %s.',
$key,
(string) $min,
(string) $max
));
}
return $floatValue;
} }
} }

View File

@@ -12,6 +12,8 @@ final readonly class ConfigSourceAuditProvider
'AgentRunnerConfig' => 'retriex.agent.config', 'AgentRunnerConfig' => 'retriex.agent.config',
'CommerceIntentConfig' => 'retriex.intent.commerce.config', 'CommerceIntentConfig' => 'retriex.intent.commerce.config',
'CommerceQueryParserConfig' => 'retriex.commerce_query.config', 'CommerceQueryParserConfig' => 'retriex.commerce_query.config',
'ContextServiceConfig' => 'retriex.context.config',
'CatalogIntentConfig' => 'retriex.intent.catalog.config',
'DomainVocabularyConfig' => 'retriex.vocabulary.config', 'DomainVocabularyConfig' => 'retriex.vocabulary.config',
'IntentLightConfig' => 'retriex.intent.light.config', 'IntentLightConfig' => 'retriex.intent.light.config',
'NdjsonHybridRetrieverConfig' => 'retriex.retrieval.config', 'NdjsonHybridRetrieverConfig' => 'retriex.retrieval.config',

View File

@@ -1,12 +1,55 @@
<?php <?php
declare(strict_types=1);
namespace App\Config; namespace App\Config;
class ContextServiceConfig /**
* YAML-backed context configuration.
*
* This class intentionally has no PHP fallback values. Missing or invalid
* configuration must be fixed in config/retriex/*.yaml instead of being hidden
* by application defaults.
*/
final class ContextServiceConfig
{ {
//Number of lines included in regular context. Intended for normal conversational continuity. /**
public const MAX_VISIBLE_REGULAR_LINES = 25; * @param array<string, mixed> $config
*/
public function __construct(private readonly array $config)
{
}
//Number of lines included in full context. Intended for exceptional or diagnostic scenarios. public function getMaxVisibleRegularLines(): int
public const MAX_FULL_LINES = 500; {
return $this->requiredPositiveInt('max_visible_regular_lines');
}
public function getMaxFullLines(): int
{
return $this->requiredPositiveInt('max_full_lines');
}
private function requiredPositiveInt(string $key): int
{
if (!array_key_exists($key, $this->config)) {
throw new \InvalidArgumentException(sprintf('Missing required RetrieX context config key "%s".', $key));
}
$value = $this->config[$key];
if (is_int($value)) {
$intValue = $value;
} elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
$intValue = (int) trim($value);
} else {
throw new \InvalidArgumentException(sprintf('RetrieX context config key "%s" must be an integer.', $key));
}
if ($intValue <= 0) {
throw new \InvalidArgumentException(sprintf('RetrieX context config key "%s" must be greater than 0.', $key));
}
return $intValue;
}
} }

View File

@@ -26,6 +26,8 @@ final readonly class RetriexEffectiveConfigProvider
private ShopServiceConfig $shopServiceConfig, private ShopServiceConfig $shopServiceConfig,
private StopWordsConfig $stopWordsConfig, private StopWordsConfig $stopWordsConfig,
private QueryEnricherConfig $queryEnricherConfig, private QueryEnricherConfig $queryEnricherConfig,
private CatalogIntentConfig $catalogIntentConfig,
private ContextServiceConfig $contextServiceConfig,
) { ) {
} }
@@ -692,11 +694,12 @@ final readonly class RetriexEffectiveConfigProvider
private function catalogIntentConfig(): array private function catalogIntentConfig(): array
{ {
return [ return [
'min_score' => CatalogIntentConfig::MIN_SCORE, 'min_score' => $this->catalogIntentConfig->getMinScore(),
'ambiguity_delta' => CatalogIntentConfig::AMBIGUITY_DELTA, 'ambiguity_delta' => $this->catalogIntentConfig->getAmbiguityDelta(),
'search_limit' => CatalogIntentConfig::SEARCH_LIMIT, 'intent_search_limit' => $this->catalogIntentConfig->getIntentSearchLimit(),
'min_allowed_score' => CatalogIntentConfig::MIN_ALLOWED_SCORE, 'list_search_limit' => $this->catalogIntentConfig->getListSearchLimit(),
'max_allowed_score' => CatalogIntentConfig::MAX_ALLOWED_SCORE, 'min_allowed_score' => $this->catalogIntentConfig->getMinAllowedScore(),
'max_allowed_score' => $this->catalogIntentConfig->getMaxAllowedScore(),
]; ];
} }
@@ -704,8 +707,8 @@ final readonly class RetriexEffectiveConfigProvider
private function contextConfig(): array private function contextConfig(): array
{ {
return [ return [
'max_visible_regular_lines' => ContextServiceConfig::MAX_VISIBLE_REGULAR_LINES, 'max_visible_regular_lines' => $this->contextServiceConfig->getMaxVisibleRegularLines(),
'max_full_lines' => ContextServiceConfig::MAX_FULL_LINES, 'max_full_lines' => $this->contextServiceConfig->getMaxFullLines(),
]; ];
} }

View File

@@ -4,39 +4,18 @@ declare(strict_types=1);
namespace App\Config; namespace App\Config;
/**
* YAML-backed stop-word configuration.
*
* This class intentionally has no PHP fallback list. The complete list lives in
* config/retriex/language.yaml.
*/
final class StopWordsConfig final class StopWordsConfig
{ {
/**
* Retrieval-optimized stop-word list.
*
* Important:
* - keep negations
* - keep question words
* - keep domain terms
* - remove only structural filler words
*/
private const DEFAULT_STOP_WORDS = [
'mit',
'der', 'die', 'das',
'ein', 'eine', 'einer', 'eines',
'den', 'dem', 'des',
'und', 'oder', 'aber', 'sowie',
'ich', 'du', 'er', 'sie', 'es',
'wir', 'ihr',
'halt', 'eben', 'auch', 'schon',
'noch', 'mal', 'bitte', 'danke',
'also', 'nun', 'tja',
'dann', 'danach', 'davor',
'hier', 'dort',
'heute', 'gestern', 'morgen',
'könnte', 'kannst', 'kann',
'würde', 'würdest', 'würden',
];
/** /**
* @param array<string, mixed> $config * @param array<string, mixed> $config
*/ */
public function __construct(private array $config = []) public function __construct(private readonly array $config)
{ {
} }
@@ -45,19 +24,22 @@ final class StopWordsConfig
*/ */
public function getStopWords(): array public function getStopWords(): array
{ {
return $this->stringList('words', self::DEFAULT_STOP_WORDS); return $this->requiredStringList('words');
} }
/** /**
* @param string[] $default
* @return string[] * @return string[]
*/ */
private function stringList(string $key, array $default): array private function requiredStringList(string $key): array
{ {
$value = $this->config[$key] ?? $default; if (!array_key_exists($key, $this->config)) {
throw new \InvalidArgumentException(sprintf('Missing required RetrieX stopwords config key "%s".', $key));
}
$value = $this->config[$key];
if (!is_array($value)) { if (!is_array($value)) {
return $default; throw new \InvalidArgumentException(sprintf('RetrieX stopwords config key "%s" must be a list.', $key));
} }
$out = []; $out = [];
@@ -76,6 +58,10 @@ final class StopWordsConfig
} }
} }
return $out !== [] ? $out : $default; if ($out === []) {
throw new \InvalidArgumentException(sprintf('RetrieX stopwords config key "%s" must not be empty.', $key));
}
return $out;
} }
} }

View File

@@ -32,6 +32,7 @@ final class ContextService
public function __construct( public function __construct(
string $historyDir, string $historyDir,
string $projectDir, string $projectDir,
private readonly ContextServiceConfig $config,
) )
{ {
/** /**
@@ -73,8 +74,8 @@ final class ContextService
} }
$maxLines = $full $maxLines = $full
? ContextServiceConfig::MAX_FULL_LINES ? $this->config->getMaxFullLines()
: ContextServiceConfig::MAX_VISIBLE_REGULAR_LINES; : $this->config->getMaxVisibleRegularLines();
$selected = array_slice($lines, -$maxLines); $selected = array_slice($lines, -$maxLines);

View File

@@ -21,15 +21,10 @@ use App\Tag\TagVectorSearchClient;
*/ */
final readonly class CatalogIntentLite final readonly class CatalogIntentLite
{ {
/**
* Slightly wider than the old top-3 search so generic tags do not crowd out
* relevant catalog_entity hits too easily.
*/
private const SEARCH_LIMIT = 6;
public function __construct( public function __construct(
private TagVectorSearchClient $tagVectorClient, private TagVectorSearchClient $tagVectorClient,
private QueryCleaner $queryCleaner, private QueryCleaner $queryCleaner,
private CatalogIntentConfig $config,
) { ) {
} }
@@ -52,7 +47,7 @@ final readonly class CatalogIntentLite
} }
$catalogHits = $this->filterCatalogEntityHits( $catalogHits = $this->filterCatalogEntityHits(
$this->tagVectorClient->search($cleanQuery, self::SEARCH_LIMIT) $this->tagVectorClient->search($cleanQuery, $this->config->getIntentSearchLimit())
); );
if ($catalogHits === []) { if ($catalogHits === []) {
@@ -62,14 +57,14 @@ final readonly class CatalogIntentLite
$best = $catalogHits[0]; $best = $catalogHits[0];
$bestScore = (float) ($best['score'] ?? 0.0); $bestScore = (float) ($best['score'] ?? 0.0);
if ($bestScore < CatalogIntentConfig::MIN_SCORE) { if (!$this->config->isScoreAccepted($bestScore)) {
return null; return null;
} }
if (isset($catalogHits[1])) { if (isset($catalogHits[1])) {
$secondScore = (float) ($catalogHits[1]['score'] ?? 0.0); $secondScore = (float) ($catalogHits[1]['score'] ?? 0.0);
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) { if ($this->config->isAmbiguous($bestScore, $secondScore)) {
return null; return null;
} }
} }