second step
This commit is contained in:
@@ -132,6 +132,14 @@ parameters:
|
||||
- '/\bzubehoer\b/u'
|
||||
- '/\bersatzteil(?:e)?\b/u'
|
||||
|
||||
retriex.intent.catalog.config:
|
||||
min_score: 0.72
|
||||
ambiguity_delta: 0.02
|
||||
intent_search_limit: 6
|
||||
list_search_limit: 3
|
||||
min_allowed_score: 0.0
|
||||
max_allowed_score: 1.0
|
||||
|
||||
retriex.intent.light.config:
|
||||
quantity_words:
|
||||
- alle
|
||||
|
||||
@@ -17,3 +17,7 @@ parameters:
|
||||
|
||||
retriex.locks.dir: '%retriex.knowledge.root%/locks'
|
||||
retriex.tags.rebuild_lock: '%retriex.locks.dir%/tag_rebuild.lock'
|
||||
|
||||
retriex.context.config:
|
||||
max_visible_regular_lines: 25
|
||||
max_full_lines: 500
|
||||
|
||||
@@ -118,6 +118,14 @@ services:
|
||||
arguments:
|
||||
$config: '%retriex.vocabulary.config%'
|
||||
|
||||
App\Config\ContextServiceConfig:
|
||||
arguments:
|
||||
$config: '%retriex.context.config%'
|
||||
|
||||
App\Config\CatalogIntentConfig:
|
||||
arguments:
|
||||
$config: '%retriex.intent.catalog.config%'
|
||||
|
||||
App\Config\PromptBuilderConfig:
|
||||
arguments:
|
||||
$config: '%retriex.prompt.config%'
|
||||
|
||||
@@ -21,11 +21,10 @@ use Symfony\Component\Uid\Uuid;
|
||||
*/
|
||||
final class EntityCatalogService
|
||||
{
|
||||
private const SEARCH_LIMIT = 3;
|
||||
|
||||
public function __construct(
|
||||
private readonly TagVectorSearchClient $tagVectorClient,
|
||||
private readonly Connection $connection,
|
||||
private readonly CatalogIntentConfig $config,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -40,7 +39,7 @@ final class EntityCatalogService
|
||||
return null;
|
||||
}
|
||||
|
||||
$hits = $this->tagVectorClient->search($entityTerm, self::SEARCH_LIMIT);
|
||||
$hits = $this->tagVectorClient->search($entityTerm, $this->config->getListSearchLimit());
|
||||
|
||||
if ($hits === []) {
|
||||
return null;
|
||||
@@ -49,7 +48,7 @@ final class EntityCatalogService
|
||||
$best = $hits[0];
|
||||
$bestScore = (float) ($best['score'] ?? 0.0);
|
||||
|
||||
if ($bestScore < CatalogIntentConfig::MIN_SCORE) {
|
||||
if (!$this->config->isScoreAccepted($bestScore)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -60,7 +59,7 @@ final class EntityCatalogService
|
||||
if (isset($hits[1])) {
|
||||
$secondScore = (float) ($hits[1]['score'] ?? 0.0);
|
||||
|
||||
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) {
|
||||
if ($this->config->isAmbiguous($bestScore, $secondScore)) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,58 +5,111 @@ declare(strict_types=1);
|
||||
namespace App\Config;
|
||||
|
||||
/**
|
||||
* Central thresholds for deterministic catalog-entity detection.
|
||||
* YAML-backed thresholds for deterministic catalog-entity detection.
|
||||
*
|
||||
* The values in this class intentionally define a conservative gate:
|
||||
* - only strong semantic tag hits may open the catalog path
|
||||
* - small score gaps between the best and second-best hit are treated as ambiguous
|
||||
* This class intentionally has no PHP fallback values. Missing or invalid
|
||||
* configuration must be fixed in config/retriex/intent.yaml.
|
||||
*/
|
||||
final class CatalogIntentConfig
|
||||
{
|
||||
/**
|
||||
* Minimum semantic similarity required before a catalog entity is accepted.
|
||||
* @param array<string, mixed> $config
|
||||
*/
|
||||
public const MIN_SCORE = 0.72;
|
||||
|
||||
/**
|
||||
* Required distance between the best and second-best catalog entity hit.
|
||||
*/
|
||||
public const AMBIGUITY_DELTA = 0.02;
|
||||
|
||||
/**
|
||||
* Number of candidate tag hits to inspect during catalog intent detection.
|
||||
*
|
||||
* This is intentionally wider than the final accepted set so that strong
|
||||
* catalog_entity tags are not hidden behind generic tags in the raw result.
|
||||
*/
|
||||
public const SEARCH_LIMIT = 6;
|
||||
|
||||
/**
|
||||
* Conservative lower boundary for score normalization helpers.
|
||||
*/
|
||||
public const MIN_ALLOWED_SCORE = 0.0;
|
||||
|
||||
/**
|
||||
* Conservative upper boundary for score normalization helpers.
|
||||
*/
|
||||
public const MAX_ALLOWED_SCORE = 1.0;
|
||||
|
||||
public static function isScoreAccepted(float $score): bool
|
||||
{
|
||||
return $score >= self::MIN_SCORE;
|
||||
}
|
||||
|
||||
public static function isAmbiguous(float $bestScore, float $secondScore): bool
|
||||
{
|
||||
return abs($bestScore - $secondScore) < self::AMBIGUITY_DELTA;
|
||||
}
|
||||
|
||||
public static function clampScore(float $score): float
|
||||
{
|
||||
return max(self::MIN_ALLOWED_SCORE, min(self::MAX_ALLOWED_SCORE, $score));
|
||||
}
|
||||
|
||||
private function __construct()
|
||||
public function __construct(private readonly array $config)
|
||||
{
|
||||
}
|
||||
|
||||
public function getMinScore(): float
|
||||
{
|
||||
return $this->requiredFloatInRange('min_score', 0.0, 1.0);
|
||||
}
|
||||
|
||||
public function getAmbiguityDelta(): float
|
||||
{
|
||||
return $this->requiredFloatInRange('ambiguity_delta', 0.0, 1.0);
|
||||
}
|
||||
|
||||
public function getIntentSearchLimit(): int
|
||||
{
|
||||
return $this->requiredPositiveInt('intent_search_limit');
|
||||
}
|
||||
|
||||
public function getListSearchLimit(): int
|
||||
{
|
||||
return $this->requiredPositiveInt('list_search_limit');
|
||||
}
|
||||
|
||||
public function getMinAllowedScore(): float
|
||||
{
|
||||
return $this->requiredFloatInRange('min_allowed_score', 0.0, 1.0);
|
||||
}
|
||||
|
||||
public function getMaxAllowedScore(): float
|
||||
{
|
||||
return $this->requiredFloatInRange('max_allowed_score', 0.0, 1.0);
|
||||
}
|
||||
|
||||
public function isScoreAccepted(float $score): bool
|
||||
{
|
||||
return $score >= $this->getMinScore();
|
||||
}
|
||||
|
||||
public function isAmbiguous(float $bestScore, float $secondScore): bool
|
||||
{
|
||||
return abs($bestScore - $secondScore) < $this->getAmbiguityDelta();
|
||||
}
|
||||
|
||||
public function clampScore(float $score): float
|
||||
{
|
||||
return max($this->getMinAllowedScore(), min($this->getMaxAllowedScore(), $score));
|
||||
}
|
||||
|
||||
private function requiredPositiveInt(string $key): int
|
||||
{
|
||||
if (!array_key_exists($key, $this->config)) {
|
||||
throw new \InvalidArgumentException(sprintf('Missing required RetrieX catalog intent config key "%s".', $key));
|
||||
}
|
||||
|
||||
$value = $this->config[$key];
|
||||
|
||||
if (is_int($value)) {
|
||||
$intValue = $value;
|
||||
} elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
|
||||
$intValue = (int) trim($value);
|
||||
} else {
|
||||
throw new \InvalidArgumentException(sprintf('RetrieX catalog intent config key "%s" must be an integer.', $key));
|
||||
}
|
||||
|
||||
if ($intValue <= 0) {
|
||||
throw new \InvalidArgumentException(sprintf('RetrieX catalog intent config key "%s" must be greater than 0.', $key));
|
||||
}
|
||||
|
||||
return $intValue;
|
||||
}
|
||||
|
||||
private function requiredFloatInRange(string $key, float $min, float $max): float
|
||||
{
|
||||
if (!array_key_exists($key, $this->config)) {
|
||||
throw new \InvalidArgumentException(sprintf('Missing required RetrieX catalog intent config key "%s".', $key));
|
||||
}
|
||||
|
||||
$value = $this->config[$key];
|
||||
|
||||
if (is_int($value) || is_float($value) || (is_string($value) && is_numeric(trim($value)))) {
|
||||
$floatValue = (float) $value;
|
||||
} else {
|
||||
throw new \InvalidArgumentException(sprintf('RetrieX catalog intent config key "%s" must be numeric.', $key));
|
||||
}
|
||||
|
||||
if ($floatValue < $min || $floatValue > $max) {
|
||||
throw new \InvalidArgumentException(sprintf(
|
||||
'RetrieX catalog intent config key "%s" must be between %s and %s.',
|
||||
$key,
|
||||
(string) $min,
|
||||
(string) $max
|
||||
));
|
||||
}
|
||||
|
||||
return $floatValue;
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,8 @@ final readonly class ConfigSourceAuditProvider
|
||||
'AgentRunnerConfig' => 'retriex.agent.config',
|
||||
'CommerceIntentConfig' => 'retriex.intent.commerce.config',
|
||||
'CommerceQueryParserConfig' => 'retriex.commerce_query.config',
|
||||
'ContextServiceConfig' => 'retriex.context.config',
|
||||
'CatalogIntentConfig' => 'retriex.intent.catalog.config',
|
||||
'DomainVocabularyConfig' => 'retriex.vocabulary.config',
|
||||
'IntentLightConfig' => 'retriex.intent.light.config',
|
||||
'NdjsonHybridRetrieverConfig' => 'retriex.retrieval.config',
|
||||
|
||||
@@ -1,12 +1,55 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
class ContextServiceConfig
|
||||
/**
|
||||
* YAML-backed context configuration.
|
||||
*
|
||||
* This class intentionally has no PHP fallback values. Missing or invalid
|
||||
* configuration must be fixed in config/retriex/*.yaml instead of being hidden
|
||||
* by application defaults.
|
||||
*/
|
||||
final class ContextServiceConfig
|
||||
{
|
||||
/**
|
||||
* @param array<string, mixed> $config
|
||||
*/
|
||||
public function __construct(private readonly array $config)
|
||||
{
|
||||
//Number of lines included in regular context. Intended for normal conversational continuity.
|
||||
public const MAX_VISIBLE_REGULAR_LINES = 25;
|
||||
|
||||
//Number of lines included in full context. Intended for exceptional or diagnostic scenarios.
|
||||
public const MAX_FULL_LINES = 500;
|
||||
}
|
||||
|
||||
public function getMaxVisibleRegularLines(): int
|
||||
{
|
||||
return $this->requiredPositiveInt('max_visible_regular_lines');
|
||||
}
|
||||
|
||||
public function getMaxFullLines(): int
|
||||
{
|
||||
return $this->requiredPositiveInt('max_full_lines');
|
||||
}
|
||||
|
||||
private function requiredPositiveInt(string $key): int
|
||||
{
|
||||
if (!array_key_exists($key, $this->config)) {
|
||||
throw new \InvalidArgumentException(sprintf('Missing required RetrieX context config key "%s".', $key));
|
||||
}
|
||||
|
||||
$value = $this->config[$key];
|
||||
|
||||
if (is_int($value)) {
|
||||
$intValue = $value;
|
||||
} elseif (is_string($value) && preg_match('/^-?\d+$/', trim($value)) === 1) {
|
||||
$intValue = (int) trim($value);
|
||||
} else {
|
||||
throw new \InvalidArgumentException(sprintf('RetrieX context config key "%s" must be an integer.', $key));
|
||||
}
|
||||
|
||||
if ($intValue <= 0) {
|
||||
throw new \InvalidArgumentException(sprintf('RetrieX context config key "%s" must be greater than 0.', $key));
|
||||
}
|
||||
|
||||
return $intValue;
|
||||
}
|
||||
}
|
||||
@@ -26,6 +26,8 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
private ShopServiceConfig $shopServiceConfig,
|
||||
private StopWordsConfig $stopWordsConfig,
|
||||
private QueryEnricherConfig $queryEnricherConfig,
|
||||
private CatalogIntentConfig $catalogIntentConfig,
|
||||
private ContextServiceConfig $contextServiceConfig,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -692,11 +694,12 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
private function catalogIntentConfig(): array
|
||||
{
|
||||
return [
|
||||
'min_score' => CatalogIntentConfig::MIN_SCORE,
|
||||
'ambiguity_delta' => CatalogIntentConfig::AMBIGUITY_DELTA,
|
||||
'search_limit' => CatalogIntentConfig::SEARCH_LIMIT,
|
||||
'min_allowed_score' => CatalogIntentConfig::MIN_ALLOWED_SCORE,
|
||||
'max_allowed_score' => CatalogIntentConfig::MAX_ALLOWED_SCORE,
|
||||
'min_score' => $this->catalogIntentConfig->getMinScore(),
|
||||
'ambiguity_delta' => $this->catalogIntentConfig->getAmbiguityDelta(),
|
||||
'intent_search_limit' => $this->catalogIntentConfig->getIntentSearchLimit(),
|
||||
'list_search_limit' => $this->catalogIntentConfig->getListSearchLimit(),
|
||||
'min_allowed_score' => $this->catalogIntentConfig->getMinAllowedScore(),
|
||||
'max_allowed_score' => $this->catalogIntentConfig->getMaxAllowedScore(),
|
||||
];
|
||||
}
|
||||
|
||||
@@ -704,8 +707,8 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
private function contextConfig(): array
|
||||
{
|
||||
return [
|
||||
'max_visible_regular_lines' => ContextServiceConfig::MAX_VISIBLE_REGULAR_LINES,
|
||||
'max_full_lines' => ContextServiceConfig::MAX_FULL_LINES,
|
||||
'max_visible_regular_lines' => $this->contextServiceConfig->getMaxVisibleRegularLines(),
|
||||
'max_full_lines' => $this->contextServiceConfig->getMaxFullLines(),
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
@@ -4,39 +4,18 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
/**
|
||||
* YAML-backed stop-word configuration.
|
||||
*
|
||||
* This class intentionally has no PHP fallback list. The complete list lives in
|
||||
* config/retriex/language.yaml.
|
||||
*/
|
||||
final class StopWordsConfig
|
||||
{
|
||||
/**
|
||||
* Retrieval-optimized stop-word list.
|
||||
*
|
||||
* Important:
|
||||
* - keep negations
|
||||
* - keep question words
|
||||
* - keep domain terms
|
||||
* - remove only structural filler words
|
||||
*/
|
||||
private const DEFAULT_STOP_WORDS = [
|
||||
'mit',
|
||||
'der', 'die', 'das',
|
||||
'ein', 'eine', 'einer', 'eines',
|
||||
'den', 'dem', 'des',
|
||||
'und', 'oder', 'aber', 'sowie',
|
||||
'ich', 'du', 'er', 'sie', 'es',
|
||||
'wir', 'ihr',
|
||||
'halt', 'eben', 'auch', 'schon',
|
||||
'noch', 'mal', 'bitte', 'danke',
|
||||
'also', 'nun', 'tja',
|
||||
'dann', 'danach', 'davor',
|
||||
'hier', 'dort',
|
||||
'heute', 'gestern', 'morgen',
|
||||
'könnte', 'kannst', 'kann',
|
||||
'würde', 'würdest', 'würden',
|
||||
];
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $config
|
||||
*/
|
||||
public function __construct(private array $config = [])
|
||||
public function __construct(private readonly array $config)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -45,19 +24,22 @@ final class StopWordsConfig
|
||||
*/
|
||||
public function getStopWords(): array
|
||||
{
|
||||
return $this->stringList('words', self::DEFAULT_STOP_WORDS);
|
||||
return $this->requiredStringList('words');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $default
|
||||
* @return string[]
|
||||
*/
|
||||
private function stringList(string $key, array $default): array
|
||||
private function requiredStringList(string $key): array
|
||||
{
|
||||
$value = $this->config[$key] ?? $default;
|
||||
if (!array_key_exists($key, $this->config)) {
|
||||
throw new \InvalidArgumentException(sprintf('Missing required RetrieX stopwords config key "%s".', $key));
|
||||
}
|
||||
|
||||
$value = $this->config[$key];
|
||||
|
||||
if (!is_array($value)) {
|
||||
return $default;
|
||||
throw new \InvalidArgumentException(sprintf('RetrieX stopwords config key "%s" must be a list.', $key));
|
||||
}
|
||||
|
||||
$out = [];
|
||||
@@ -76,6 +58,10 @@ final class StopWordsConfig
|
||||
}
|
||||
}
|
||||
|
||||
return $out !== [] ? $out : $default;
|
||||
if ($out === []) {
|
||||
throw new \InvalidArgumentException(sprintf('RetrieX stopwords config key "%s" must not be empty.', $key));
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ final class ContextService
|
||||
public function __construct(
|
||||
string $historyDir,
|
||||
string $projectDir,
|
||||
private readonly ContextServiceConfig $config,
|
||||
)
|
||||
{
|
||||
/**
|
||||
@@ -73,8 +74,8 @@ final class ContextService
|
||||
}
|
||||
|
||||
$maxLines = $full
|
||||
? ContextServiceConfig::MAX_FULL_LINES
|
||||
: ContextServiceConfig::MAX_VISIBLE_REGULAR_LINES;
|
||||
? $this->config->getMaxFullLines()
|
||||
: $this->config->getMaxVisibleRegularLines();
|
||||
|
||||
$selected = array_slice($lines, -$maxLines);
|
||||
|
||||
|
||||
@@ -21,15 +21,10 @@ use App\Tag\TagVectorSearchClient;
|
||||
*/
|
||||
final readonly class CatalogIntentLite
|
||||
{
|
||||
/**
|
||||
* Slightly wider than the old top-3 search so generic tags do not crowd out
|
||||
* relevant catalog_entity hits too easily.
|
||||
*/
|
||||
private const SEARCH_LIMIT = 6;
|
||||
|
||||
public function __construct(
|
||||
private TagVectorSearchClient $tagVectorClient,
|
||||
private QueryCleaner $queryCleaner,
|
||||
private CatalogIntentConfig $config,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -52,7 +47,7 @@ final readonly class CatalogIntentLite
|
||||
}
|
||||
|
||||
$catalogHits = $this->filterCatalogEntityHits(
|
||||
$this->tagVectorClient->search($cleanQuery, self::SEARCH_LIMIT)
|
||||
$this->tagVectorClient->search($cleanQuery, $this->config->getIntentSearchLimit())
|
||||
);
|
||||
|
||||
if ($catalogHits === []) {
|
||||
@@ -62,14 +57,14 @@ final readonly class CatalogIntentLite
|
||||
$best = $catalogHits[0];
|
||||
$bestScore = (float) ($best['score'] ?? 0.0);
|
||||
|
||||
if ($bestScore < CatalogIntentConfig::MIN_SCORE) {
|
||||
if (!$this->config->isScoreAccepted($bestScore)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (isset($catalogHits[1])) {
|
||||
$secondScore = (float) ($catalogHits[1]['score'] ?? 0.0);
|
||||
|
||||
if (abs($bestScore - $secondScore) < CatalogIntentConfig::AMBIGUITY_DELTA) {
|
||||
if ($this->config->isAmbiguous($bestScore, $secondScore)) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user