This commit is contained in:
team 1
2026-05-04 16:33:36 +02:00
parent 33b2b30d99
commit 387506b239
13 changed files with 198 additions and 57 deletions

View File

@@ -260,6 +260,26 @@ final class AgentRunnerConfig
return $this->getRequiredStringList('input_normalization.fuzzy_routing.terms');
}
/**
* @return string[]
*/
public function getInputNormalizationPlaceholderOutputs(): array
{
return $this->getRequiredStringList('input_normalization.placeholder_outputs');
}
/** @return array<string, string> */
public function getCommerceFollowUpActions(): array
{
return $this->getRequiredStringMap('followup_actions.commerce');
}
/** @return array<string, string> */
public function getKnowledgeFollowUpActions(): array
{
return $this->getRequiredStringMap('followup_actions.knowledge');
}
private function getRequiredInt(string $key): int
{
$value = $this->requiredValue($key);
@@ -384,6 +404,39 @@ final class AgentRunnerConfig
return $out;
}
/**
* @return array<string, string>
*/
private function getRequiredStringMap(string $key): array
{
$value = $this->requiredValue($key);
if (!is_array($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX agent config key "%s" must be a string map.', $key));
}
$out = [];
foreach ($value as $mapKey => $mapValue) {
if (!is_scalar($mapKey) || !is_scalar($mapValue)) {
continue;
}
$mapKey = trim((string) $mapKey);
$mapValue = trim((string) $mapValue);
if ($mapKey !== '' && $mapValue !== '') {
$out[$mapKey] = $mapValue;
}
}
if ($out === []) {
throw new \InvalidArgumentException(sprintf('RetrieX agent config key "%s" must contain at least one valid entry.', $key));
}
return $out;
}
/**
* @return array<string, string>
*/

View File

@@ -44,6 +44,27 @@ final class LanguageCleanupConfig
return in_array($term, $this->getProtectedTerms(), true);
}
/** @return array<string, string> */
public function getAsciiTransliterationMap(): array
{
$normalization = $this->requiredMap('normalization');
if (!array_key_exists('ascii_transliteration', $normalization)) {
throw $this->invalid('normalization.ascii_transliteration', 'is missing');
}
return $this->stringMapFromValue($normalization['ascii_transliteration'], 'normalization.ascii_transliteration', true);
}
public function transliterateToAscii(string $value): string
{
$map = $this->getAsciiTransliterationMap();
if ($map === []) {
return $value;
}
return strtr($value, $map);
}
/** @return string[] */
public function getCleanupProfileNames(): array
{
@@ -235,6 +256,35 @@ final class LanguageCleanupConfig
return $out;
}
/** @return array<string, string> */
private function stringMapFromValue(mixed $value, string $path, bool $required): array
{
if (!is_array($value)) {
throw $this->invalid($path, 'must be a map of non-empty strings');
}
$out = [];
foreach ($value as $key => $item) {
if (!is_scalar($key) || !is_scalar($item)) {
continue;
}
$key = trim((string) $key);
$item = trim((string) $item);
if ($key === '' || $item === '') {
continue;
}
$out[$key] = $item;
}
if ($required && $out === []) {
throw $this->invalid($path, 'must contain at least one non-empty map entry');
}
return $out;
}
/** @param string[] $terms */
private function removeProtectedTerms(array $terms): array
{

View File

@@ -148,6 +148,12 @@ final class NdjsonHybridRetrieverConfig
return $this->requiredStringListMap('exact_selection_token_variant_prefixes');
}
/** @return string[] */
public function exactSelectionTokenVariantSuffixes(): array
{
return $this->requiredStringList('exact_selection_token_variant_suffixes');
}
/** @return string[] */
public function exactSelectionIndicatorQuestionTokens(): array
{
@@ -313,6 +319,7 @@ final class NdjsonHybridRetrieverConfig
'focused_product_max_chunks' => $this->focusedProductMaxChunks(),
'catalog_list_shortcut_patterns' => $this->catalogListShortcutPatterns(),
'exact_selection_token_variant_prefixes' => $this->exactSelectionTokenVariantPrefixes(),
'exact_selection_token_variant_suffixes' => $this->exactSelectionTokenVariantSuffixes(),
'exact_selection_indicator_question_tokens' => $this->exactSelectionIndicatorQuestionTokens(),
'exact_selection_indicator_question_phrases' => $this->exactSelectionIndicatorQuestionPhrases(),
'exact_selection_indicator_table_heading_patterns' => $this->exactSelectionIndicatorTableHeadingPatterns(),

View File

@@ -583,6 +583,7 @@ final readonly class RetriexEffectiveConfigProvider
'max_length_ratio_percent' => $this->agentRunnerConfig->getInputNormalizationMaxLengthRatioPercent(),
'heartbeat_message' => $this->agentRunnerConfig->getInputNormalizationHeartbeatMessage(),
'output_prefix_pattern' => $this->agentRunnerConfig->getInputNormalizationOutputPrefixPattern(),
'placeholder_outputs' => $this->agentRunnerConfig->getInputNormalizationPlaceholderOutputs(),
'skip_patterns' => $this->agentRunnerConfig->getInputNormalizationSkipPatterns(),
'prompt' => [
'intro' => $this->agentRunnerConfig->getInputNormalizationIntro(),
@@ -602,6 +603,10 @@ final readonly class RetriexEffectiveConfigProvider
'terms' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingTerms(),
],
],
'followup_actions' => [
'commerce' => $this->agentRunnerConfig->getCommerceFollowUpActions(),
'knowledge' => $this->agentRunnerConfig->getKnowledgeFollowUpActions(),
],
'messages' => [
'empty_prompt' => $this->agentRunnerConfig->getEmptyPromptMessage(),
'analyze_request' => $this->agentRunnerConfig->getAnalyzeRequestMessage(),
@@ -929,6 +934,9 @@ final readonly class RetriexEffectiveConfigProvider
return [
'stopwords' => $this->stopWordsConfig->getStopWords(),
'protected_terms' => $this->languageCleanupConfig->getProtectedTerms(),
'normalization' => [
'ascii_transliteration' => $this->languageCleanupConfig->getAsciiTransliterationMap(),
],
'cleanup_profile_names' => $this->languageCleanupConfig->getCleanupProfileNames(),
'cleanup_profiles' => $profiles,
];
@@ -1200,6 +1208,7 @@ final readonly class RetriexEffectiveConfigProvider
private function validateAgent(array $agent, array &$errors, array &$warnings): void
{
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
$this->validateStringListMap($agent['followup_actions'] ?? [], 'agent.followup_actions', $errors, $warnings);
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
@@ -1217,6 +1226,9 @@ final readonly class RetriexEffectiveConfigProvider
$errors[] = 'agent.follow_up_context.commercial_table_follow_up.query_template_without_model must not be empty.';
}
$inputNormalization = is_array($agent['input_normalization'] ?? null) ? $agent['input_normalization'] : [];
$this->validateStringList($this->toList($inputNormalization['placeholder_outputs'] ?? []), 'agent.input_normalization.placeholder_outputs', $errors, $warnings);
$ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : [];
$ragEvidenceCleanupProfile = $ragEvidence['cleanup_profile'] ?? null;
if (!is_string($ragEvidenceCleanupProfile) || trim($ragEvidenceCleanupProfile) === '') {