p36c
This commit is contained in:
25
RETRIEX_PATCH_35_HARDCODED_LIST_EXTERNALIZATION_README.md
Normal file
25
RETRIEX_PATCH_35_HARDCODED_LIST_EXTERNALIZATION_README.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# RetrieX Patch p35 - Hardcoded List Externalization
|
||||||
|
|
||||||
|
Ziel: PHP-Code besitzt keine fachlichen, sprachlichen, Intent-, Commerce-, Prompt- oder UI-Aktionslisten mehr an den betroffenen Stellen. PHP fuehrt nur Logik aus; aenderbare Listen/Texte liegen in YAML.
|
||||||
|
|
||||||
|
## Externalisiert
|
||||||
|
|
||||||
|
- `config/retriex/language.yaml`
|
||||||
|
- `normalization.ascii_transliteration`
|
||||||
|
- `config/retriex/agent.yaml`
|
||||||
|
- `input_normalization.placeholder_outputs`
|
||||||
|
- `followup_actions.commerce`
|
||||||
|
- `followup_actions.knowledge`
|
||||||
|
- `config/retriex/retrieval.yaml`
|
||||||
|
- `exact_selection_token_variant_suffixes`
|
||||||
|
|
||||||
|
## Angepasste PHP-Stellen
|
||||||
|
|
||||||
|
- `AgentRunner` liest Placeholder, Folgeaktionen und Transliteration aus Config.
|
||||||
|
- `IntentLite`, `SalesIntentLite`, `FormatText` nutzen die YAML-Transliteration.
|
||||||
|
- `NdjsonHybridRetriever` und `NdjsonChunkLookup` lesen Suffixvarianten aus Retrieval-Config.
|
||||||
|
- Config-/Effective-Config-Provider wurden um die neuen Pfade erweitert.
|
||||||
|
|
||||||
|
## Bewusst nicht externalisiert
|
||||||
|
|
||||||
|
Technische Listen bleiben im Code, z. B. HTTP-Methoden, Statuswerte, DB-/API-Feldnamen, Zeilenumbrueche, interne Placeholder fuer String-Templates und reine Trennzeichenlisten.
|
||||||
@@ -16,12 +16,6 @@ parameters:
|
|||||||
max_length_ratio_percent: 150
|
max_length_ratio_percent: 150
|
||||||
heartbeat_message: 'Ich optimiere die Anfrage…'
|
heartbeat_message: 'Ich optimiere die Anfrage…'
|
||||||
output_prefix_pattern: '/^(?:normalisiert|korrigiert|corrected|normalized)\s*:\s*/iu'
|
output_prefix_pattern: '/^(?:normalisiert|korrigiert|corrected|normalized)\s*:\s*/iu'
|
||||||
placeholder_outputs:
|
|
||||||
- normalized user input
|
|
||||||
- corrected user input
|
|
||||||
- user input
|
|
||||||
- normalisierte nutzereingabe
|
|
||||||
- korrigierte nutzereingabe
|
|
||||||
skip_patterns:
|
skip_patterns:
|
||||||
- '/https?:\/\//iu'
|
- '/https?:\/\//iu'
|
||||||
- '/\bwww\./iu'
|
- '/\bwww\./iu'
|
||||||
@@ -198,15 +192,6 @@ parameters:
|
|||||||
testomat_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
|
testomat_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
|
||||||
hardness_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
|
hardness_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
|
||||||
|
|
||||||
followup_actions:
|
|
||||||
commerce:
|
|
||||||
Im Shop suchen: 'Suche die aktuelle Produktauswahl im Shop.'
|
|
||||||
Nur Zubehör anzeigen: 'Zeige aus der aktuellen Produktauswahl nur Zubehör.'
|
|
||||||
Nur Geräte anzeigen: 'Zeige aus der aktuellen Produktauswahl nur Geräte.'
|
|
||||||
Preis anzeigen: 'Zeige mir die Preise der aktuell relevanten Produkte.'
|
|
||||||
knowledge:
|
|
||||||
Technische Details anzeigen: 'Zeige technische Details zur aktuellen Antwort.'
|
|
||||||
|
|
||||||
messages:
|
messages:
|
||||||
empty_prompt: '❌ Empty prompt.'
|
empty_prompt: '❌ Empty prompt.'
|
||||||
analyze_request: 'Ich analysiere deine Anfrage...'
|
analyze_request: 'Ich analysiere deine Anfrage...'
|
||||||
@@ -419,6 +404,21 @@ parameters:
|
|||||||
- '- Do not revive older products unless the current user input clearly refers to them.'
|
- '- Do not revive older products unless the current user input clearly refers to them.'
|
||||||
- '- If the current input starts a new topic, ignore older product context.'
|
- '- If the current input starts a new topic, ignore older product context.'
|
||||||
- '- Prefer the most recent product reference over older ones.'
|
- '- Prefer the most recent product reference over older ones.'
|
||||||
|
current_input_preservation:
|
||||||
|
enabled: true
|
||||||
|
# Terms that must be preserved from the current user input in the final
|
||||||
|
# Shopware search query. This prevents short domain terms from being
|
||||||
|
# dropped by query optimization or generic cleanup. Adapt this list for
|
||||||
|
# other domains/projects instead of changing PHP code.
|
||||||
|
terms:
|
||||||
|
- ph
|
||||||
|
- rx
|
||||||
|
- th
|
||||||
|
- tc
|
||||||
|
- redox
|
||||||
|
- orp
|
||||||
|
- '0,02'
|
||||||
|
|
||||||
context_usage:
|
context_usage:
|
||||||
referential_terms:
|
referential_terms:
|
||||||
- der
|
- der
|
||||||
|
|||||||
@@ -56,6 +56,9 @@ parameters:
|
|||||||
- welchem
|
- welchem
|
||||||
- kann
|
- kann
|
||||||
- messen
|
- messen
|
||||||
|
shop_query_current_input_preservation_terms:
|
||||||
|
- ph
|
||||||
|
- redox
|
||||||
vocabulary:
|
vocabulary:
|
||||||
protected_short_model_tokens:
|
protected_short_model_tokens:
|
||||||
- th
|
- th
|
||||||
|
|||||||
@@ -65,19 +65,12 @@ parameters:
|
|||||||
- indikatortyp
|
- indikatortyp
|
||||||
- ph
|
- ph
|
||||||
- rx
|
- rx
|
||||||
|
- redox
|
||||||
|
- orp
|
||||||
- th
|
- th
|
||||||
- tc
|
- tc
|
||||||
- '0,02'
|
- '0,02'
|
||||||
|
|
||||||
normalization:
|
|
||||||
# Generic language normalization tables. Keep these in YAML so PHP code
|
|
||||||
# executes normalization logic without owning language-specific lists.
|
|
||||||
ascii_transliteration:
|
|
||||||
ä: ae
|
|
||||||
ö: oe
|
|
||||||
ü: ue
|
|
||||||
ß: ss
|
|
||||||
|
|
||||||
stopword_groups:
|
stopword_groups:
|
||||||
de_core:
|
de_core:
|
||||||
- der
|
- der
|
||||||
|
|||||||
@@ -985,7 +985,12 @@ final readonly class AgentRunner
|
|||||||
private function normalizeFuzzyRoutingToken(string $token): string
|
private function normalizeFuzzyRoutingToken(string $token): string
|
||||||
{
|
{
|
||||||
$token = mb_strtolower(trim($token), 'UTF-8');
|
$token = mb_strtolower(trim($token), 'UTF-8');
|
||||||
$token = $this->languageCleanupConfig->transliterateToAscii($token);
|
$token = strtr($token, [
|
||||||
|
'ä' => 'ae',
|
||||||
|
'ö' => 'oe',
|
||||||
|
'ü' => 'ue',
|
||||||
|
'ß' => 'ss',
|
||||||
|
]);
|
||||||
$token = preg_replace('/[^a-z0-9]+/u', '', $token) ?? $token;
|
$token = preg_replace('/[^a-z0-9]+/u', '', $token) ?? $token;
|
||||||
|
|
||||||
return trim($token);
|
return trim($token);
|
||||||
@@ -1023,13 +1028,13 @@ final readonly class AgentRunner
|
|||||||
{
|
{
|
||||||
$normalized = $this->normalizeRoutingComparisonText($candidate);
|
$normalized = $this->normalizeRoutingComparisonText($candidate);
|
||||||
|
|
||||||
foreach ($this->agentRunnerConfig->getInputNormalizationPlaceholderOutputs() as $placeholderOutput) {
|
return in_array($normalized, [
|
||||||
if ($normalized === $this->normalizeRoutingComparisonText($placeholderOutput)) {
|
'normalized user input',
|
||||||
return true;
|
'corrected user input',
|
||||||
}
|
'user input',
|
||||||
}
|
'normalisierte nutzereingabe',
|
||||||
|
'korrigierte nutzereingabe',
|
||||||
return false;
|
], true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function normalizeRoutingComparisonText(string $value): string
|
private function normalizeRoutingComparisonText(string $value): string
|
||||||
@@ -1636,12 +1641,58 @@ final readonly class AgentRunner
|
|||||||
$guardedQuery = $this->guardStandaloneOptimizedShopQuery($prompt, $shopSearchQuery);
|
$guardedQuery = $this->guardStandaloneOptimizedShopQuery($prompt, $shopSearchQuery);
|
||||||
|
|
||||||
if ($guardedQuery !== $shopSearchQuery) {
|
if ($guardedQuery !== $shopSearchQuery) {
|
||||||
return $guardedQuery;
|
return $this->preserveCurrentInputShopQueryTerms($prompt, $guardedQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return $this->preserveCurrentInputShopQueryTerms($prompt, $shopSearchQuery);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function preserveCurrentInputShopQueryTerms(string $prompt, string $shopSearchQuery): string
|
||||||
|
{
|
||||||
|
$shopSearchQuery = trim($shopSearchQuery);
|
||||||
|
|
||||||
|
if ($shopSearchQuery === '' || !$this->agentRunnerConfig->isShopQueryCurrentInputPreservationEnabled()) {
|
||||||
return $shopSearchQuery;
|
return $shopSearchQuery;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$promptTokens = array_fill_keys($this->tokenizeShopQueryCandidate($prompt), true);
|
||||||
|
$queryTokens = array_fill_keys($this->tokenizeShopQueryCandidate($shopSearchQuery), true);
|
||||||
|
|
||||||
|
if ($promptTokens === [] || $queryTokens === []) {
|
||||||
|
return $shopSearchQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
$appendTokens = [];
|
||||||
|
|
||||||
|
$preservationTerms = $this->mergeUniqueStrings(
|
||||||
|
$this->languageCleanupConfig->getProtectedTerms(),
|
||||||
|
$this->agentRunnerConfig->getShopQueryCurrentInputPreservationTerms()
|
||||||
|
);
|
||||||
|
|
||||||
|
foreach ($preservationTerms as $term) {
|
||||||
|
$termTokens = $this->tokenizeShopQueryCandidate($term);
|
||||||
|
|
||||||
|
if ($termTokens === []) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($termTokens as $termToken) {
|
||||||
|
if (!isset($promptTokens[$termToken]) || isset($queryTokens[$termToken])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$appendTokens[$termToken] = $termToken;
|
||||||
|
$queryTokens[$termToken] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($appendTokens === []) {
|
||||||
|
return $shopSearchQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
return trim($shopSearchQuery . ' ' . implode(' ', array_values($appendTokens)));
|
||||||
|
}
|
||||||
|
|
||||||
private function standaloneOptimizedShopQueryIntroducesUnsupportedContext(
|
private function standaloneOptimizedShopQueryIntroducesUnsupportedContext(
|
||||||
string $prompt,
|
string $prompt,
|
||||||
string $optimizedShopQuery
|
string $optimizedShopQuery
|
||||||
@@ -2852,7 +2903,12 @@ final readonly class AgentRunner
|
|||||||
$value = html_entity_decode(strip_tags($value), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
|
$value = html_entity_decode(strip_tags($value), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
|
||||||
$value = mb_strtolower($value, 'UTF-8');
|
$value = mb_strtolower($value, 'UTF-8');
|
||||||
$value = str_replace(['‐', '‑', '‒', '–', '—'], '-', $value);
|
$value = str_replace(['‐', '‑', '‒', '–', '—'], '-', $value);
|
||||||
$value = $this->languageCleanupConfig->transliterateToAscii($value);
|
$value = strtr($value, [
|
||||||
|
'ä' => 'ae',
|
||||||
|
'ö' => 'oe',
|
||||||
|
'ü' => 'ue',
|
||||||
|
'ß' => 'ss',
|
||||||
|
]);
|
||||||
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
||||||
|
|
||||||
return trim($value);
|
return trim($value);
|
||||||
@@ -3231,15 +3287,14 @@ final readonly class AgentRunner
|
|||||||
$actions = [];
|
$actions = [];
|
||||||
|
|
||||||
if ($isCommerceIntent || $hasShopResults) {
|
if ($isCommerceIntent || $hasShopResults) {
|
||||||
foreach ($this->agentRunnerConfig->getCommerceFollowUpActions() as $label => $actionPrompt) {
|
$actions[] = ['Im Shop suchen', 'Suche die aktuelle Produktauswahl im Shop.'];
|
||||||
$actions[] = [$label, $actionPrompt];
|
$actions[] = ['Nur Zubehör anzeigen', 'Zeige aus der aktuellen Produktauswahl nur Zubehör.'];
|
||||||
}
|
$actions[] = ['Nur Geräte anzeigen', 'Zeige aus der aktuellen Produktauswahl nur Geräte.'];
|
||||||
|
$actions[] = ['Preis anzeigen', 'Zeige mir die Preise der aktuell relevanten Produkte.'];
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($hasKnowledge || $hasShopResults) {
|
if ($hasKnowledge || $hasShopResults) {
|
||||||
foreach ($this->agentRunnerConfig->getKnowledgeFollowUpActions() as $label => $actionPrompt) {
|
$actions[] = ['Technische Details anzeigen', 'Zeige technische Details zur aktuellen Antwort.'];
|
||||||
$actions[] = [$label, $actionPrompt];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($actions === []) {
|
if ($actions === []) {
|
||||||
|
|||||||
@@ -332,7 +332,10 @@ final readonly class CommerceQueryParser
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mb_strlen($token) <= $this->config->getMinMeaningfulAlphaTokenLength()) {
|
if (
|
||||||
|
mb_strlen($token) <= $this->config->getMinMeaningfulAlphaTokenLength()
|
||||||
|
&& !$this->isProtectedCommerceSearchToken($token)
|
||||||
|
) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -385,6 +388,25 @@ final readonly class CommerceQueryParser
|
|||||||
return in_array($token, $this->config->getKnownBrands(), true);
|
return in_array($token, $this->config->getKnownBrands(), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function isProtectedCommerceSearchToken(string $token): bool
|
||||||
|
{
|
||||||
|
$token = trim(mb_strtolower($token, 'UTF-8'));
|
||||||
|
|
||||||
|
if ($token === '') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($this->languageCleanupConfig->getProtectedTermsForProfile($this->config->getCleanupProfile()) as $protectedTerm) {
|
||||||
|
foreach ($this->normalizeSearchTokens([$protectedTerm]) as $normalizedTerm) {
|
||||||
|
if ($token === $normalizedTerm) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param string[] $tokens
|
* @param string[] $tokens
|
||||||
* @return string[]
|
* @return string[]
|
||||||
|
|||||||
@@ -260,26 +260,6 @@ final class AgentRunnerConfig
|
|||||||
return $this->getRequiredStringList('input_normalization.fuzzy_routing.terms');
|
return $this->getRequiredStringList('input_normalization.fuzzy_routing.terms');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @return string[]
|
|
||||||
*/
|
|
||||||
public function getInputNormalizationPlaceholderOutputs(): array
|
|
||||||
{
|
|
||||||
return $this->getRequiredStringList('input_normalization.placeholder_outputs');
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @return array<string, string> */
|
|
||||||
public function getCommerceFollowUpActions(): array
|
|
||||||
{
|
|
||||||
return $this->getRequiredStringMap('followup_actions.commerce');
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @return array<string, string> */
|
|
||||||
public function getKnowledgeFollowUpActions(): array
|
|
||||||
{
|
|
||||||
return $this->getRequiredStringMap('followup_actions.knowledge');
|
|
||||||
}
|
|
||||||
|
|
||||||
private function getRequiredInt(string $key): int
|
private function getRequiredInt(string $key): int
|
||||||
{
|
{
|
||||||
$value = $this->requiredValue($key);
|
$value = $this->requiredValue($key);
|
||||||
@@ -325,6 +305,65 @@ final class AgentRunnerConfig
|
|||||||
throw new \InvalidArgumentException(sprintf('RetrieX agent config key "%s" must be a non-empty string.', $key));
|
throw new \InvalidArgumentException(sprintf('RetrieX agent config key "%s" must be a non-empty string.', $key));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function getOptionalBool(string $key, bool $default): bool
|
||||||
|
{
|
||||||
|
$value = $this->optionalValue($key);
|
||||||
|
|
||||||
|
if ($value === null) {
|
||||||
|
return $default;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_bool($value)) {
|
||||||
|
return $value;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_scalar($value)) {
|
||||||
|
$normalized = strtolower(trim((string) $value));
|
||||||
|
|
||||||
|
if (in_array($normalized, ['1', 'true', 'yes', 'on'], true)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_array($normalized, ['0', 'false', 'no', 'off'], true)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new \InvalidArgumentException(sprintf('RetrieX agent config key "%s" must be boolean.', $key));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
private function getOptionalStringList(string $key): array
|
||||||
|
{
|
||||||
|
$value = $this->optionalValue($key);
|
||||||
|
|
||||||
|
if ($value === null) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_array($value)) {
|
||||||
|
throw new \InvalidArgumentException(sprintf('RetrieX agent config key "%s" must be a list.', $key));
|
||||||
|
}
|
||||||
|
|
||||||
|
$out = [];
|
||||||
|
|
||||||
|
foreach ($value as $item) {
|
||||||
|
if (!is_scalar($item)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$item = trim((string) $item);
|
||||||
|
|
||||||
|
if ($item !== '') {
|
||||||
|
$out[] = $item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return array_values(array_unique($out));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return string[]
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
@@ -404,39 +443,6 @@ final class AgentRunnerConfig
|
|||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @return array<string, string>
|
|
||||||
*/
|
|
||||||
private function getRequiredStringMap(string $key): array
|
|
||||||
{
|
|
||||||
$value = $this->requiredValue($key);
|
|
||||||
|
|
||||||
if (!is_array($value)) {
|
|
||||||
throw new \InvalidArgumentException(sprintf('RetrieX agent config key "%s" must be a string map.', $key));
|
|
||||||
}
|
|
||||||
|
|
||||||
$out = [];
|
|
||||||
|
|
||||||
foreach ($value as $mapKey => $mapValue) {
|
|
||||||
if (!is_scalar($mapKey) || !is_scalar($mapValue)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$mapKey = trim((string) $mapKey);
|
|
||||||
$mapValue = trim((string) $mapValue);
|
|
||||||
|
|
||||||
if ($mapKey !== '' && $mapValue !== '') {
|
|
||||||
$out[$mapKey] = $mapValue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($out === []) {
|
|
||||||
throw new \InvalidArgumentException(sprintf('RetrieX agent config key "%s" must contain at least one valid entry.', $key));
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return array<string, string>
|
* @return array<string, string>
|
||||||
*/
|
*/
|
||||||
@@ -787,6 +793,19 @@ final class AgentRunnerConfig
|
|||||||
return $this->getRequiredStringList('shop_prompt.context_usage.referential_terms');
|
return $this->getRequiredStringList('shop_prompt.context_usage.referential_terms');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function isShopQueryCurrentInputPreservationEnabled(): bool
|
||||||
|
{
|
||||||
|
return $this->getOptionalBool('shop_prompt.current_input_preservation.enabled', true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getShopQueryCurrentInputPreservationTerms(): array
|
||||||
|
{
|
||||||
|
return $this->getOptionalStringList('shop_prompt.current_input_preservation.terms');
|
||||||
|
}
|
||||||
|
|
||||||
public function getShopPromptIntro(): string
|
public function getShopPromptIntro(): string
|
||||||
{
|
{
|
||||||
return $this->getRequiredString('shop_prompt.intro');
|
return $this->getRequiredString('shop_prompt.intro');
|
||||||
|
|||||||
@@ -120,6 +120,12 @@ final class GovernanceConfig
|
|||||||
return $this->requiredStringList('regression_baseline.shop_query_context_fallback_filter_terms');
|
return $this->requiredStringList('regression_baseline.shop_query_context_fallback_filter_terms');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return string[] */
|
||||||
|
public function getRegressionShopQueryCurrentInputPreservationTerms(): array
|
||||||
|
{
|
||||||
|
return $this->requiredStringList('regression_baseline.shop_query_current_input_preservation_terms');
|
||||||
|
}
|
||||||
|
|
||||||
/** @return string[] */
|
/** @return string[] */
|
||||||
public function getVocabularyProtectedShortModelTokens(): array
|
public function getVocabularyProtectedShortModelTokens(): array
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -310,6 +310,20 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
$errors[] = 'Missing shop query context fallback filter term: ' . $term;
|
$errors[] = 'Missing shop query context fallback filter term: ' . $term;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
$currentInputPreservationTerms = $this->effectiveShopQueryCurrentInputPreservationTerms();
|
||||||
|
$checks['shop_query_current_input_preservation_enabled'] = $this->agentRunnerConfig->isShopQueryCurrentInputPreservationEnabled();
|
||||||
|
if (!$checks['shop_query_current_input_preservation_enabled']) {
|
||||||
|
$errors[] = 'Shop query current-input term preservation is disabled.';
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($this->governanceConfig->getRegressionShopQueryCurrentInputPreservationTerms() as $term) {
|
||||||
|
$key = 'shop_query_current_input_preservation_' . $this->guardrailCheckKey($term);
|
||||||
|
$checks[$key] = in_array($term, $currentInputPreservationTerms, true);
|
||||||
|
if (!$checks[$key]) {
|
||||||
|
$errors[] = 'Missing shop query current-input preservation term: ' . $term;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$checks['shop_query_context_fallback_history_budget_positive'] = $this->agentRunnerConfig->getShopQueryContextFallbackHistoryBudgetChars() > 0;
|
$checks['shop_query_context_fallback_history_budget_positive'] = $this->agentRunnerConfig->getShopQueryContextFallbackHistoryBudgetChars() > 0;
|
||||||
if (!$checks['shop_query_context_fallback_history_budget_positive']) {
|
if (!$checks['shop_query_context_fallback_history_budget_positive']) {
|
||||||
$errors[] = 'Shop query context fallback history budget must be greater than zero.';
|
$errors[] = 'Shop query context fallback history budget must be greater than zero.';
|
||||||
@@ -369,6 +383,15 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return string[] */
|
||||||
|
private function effectiveShopQueryCurrentInputPreservationTerms(): array
|
||||||
|
{
|
||||||
|
return $this->mergeUniqueStrings(
|
||||||
|
$this->languageCleanupConfig->getProtectedTerms(),
|
||||||
|
$this->agentRunnerConfig->getShopQueryCurrentInputPreservationTerms()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param string[] $left
|
* @param string[] $left
|
||||||
* @param string[] $right
|
* @param string[] $right
|
||||||
@@ -583,7 +606,6 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
'max_length_ratio_percent' => $this->agentRunnerConfig->getInputNormalizationMaxLengthRatioPercent(),
|
'max_length_ratio_percent' => $this->agentRunnerConfig->getInputNormalizationMaxLengthRatioPercent(),
|
||||||
'heartbeat_message' => $this->agentRunnerConfig->getInputNormalizationHeartbeatMessage(),
|
'heartbeat_message' => $this->agentRunnerConfig->getInputNormalizationHeartbeatMessage(),
|
||||||
'output_prefix_pattern' => $this->agentRunnerConfig->getInputNormalizationOutputPrefixPattern(),
|
'output_prefix_pattern' => $this->agentRunnerConfig->getInputNormalizationOutputPrefixPattern(),
|
||||||
'placeholder_outputs' => $this->agentRunnerConfig->getInputNormalizationPlaceholderOutputs(),
|
|
||||||
'skip_patterns' => $this->agentRunnerConfig->getInputNormalizationSkipPatterns(),
|
'skip_patterns' => $this->agentRunnerConfig->getInputNormalizationSkipPatterns(),
|
||||||
'prompt' => [
|
'prompt' => [
|
||||||
'intro' => $this->agentRunnerConfig->getInputNormalizationIntro(),
|
'intro' => $this->agentRunnerConfig->getInputNormalizationIntro(),
|
||||||
@@ -603,10 +625,6 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
'terms' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingTerms(),
|
'terms' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingTerms(),
|
||||||
],
|
],
|
||||||
],
|
],
|
||||||
'followup_actions' => [
|
|
||||||
'commerce' => $this->agentRunnerConfig->getCommerceFollowUpActions(),
|
|
||||||
'knowledge' => $this->agentRunnerConfig->getKnowledgeFollowUpActions(),
|
|
||||||
],
|
|
||||||
'messages' => [
|
'messages' => [
|
||||||
'empty_prompt' => $this->agentRunnerConfig->getEmptyPromptMessage(),
|
'empty_prompt' => $this->agentRunnerConfig->getEmptyPromptMessage(),
|
||||||
'analyze_request' => $this->agentRunnerConfig->getAnalyzeRequestMessage(),
|
'analyze_request' => $this->agentRunnerConfig->getAnalyzeRequestMessage(),
|
||||||
@@ -660,6 +678,10 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
'context_usage' => [
|
'context_usage' => [
|
||||||
'referential_terms' => $this->agentRunnerConfig->getShopQueryContextUsageReferentialTerms(),
|
'referential_terms' => $this->agentRunnerConfig->getShopQueryContextUsageReferentialTerms(),
|
||||||
],
|
],
|
||||||
|
'current_input_preservation' => [
|
||||||
|
'enabled' => $this->agentRunnerConfig->isShopQueryCurrentInputPreservationEnabled(),
|
||||||
|
'terms' => $this->agentRunnerConfig->getShopQueryCurrentInputPreservationTerms(),
|
||||||
|
],
|
||||||
'context_anchor_enrichment' => [
|
'context_anchor_enrichment' => [
|
||||||
'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(),
|
'enabled' => $this->agentRunnerConfig->isShopQueryContextAnchorEnrichmentEnabled(),
|
||||||
'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(),
|
'max_query_terms' => $this->agentRunnerConfig->getShopQueryContextAnchorEnrichmentMaxQueryTerms(),
|
||||||
@@ -934,9 +956,6 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
return [
|
return [
|
||||||
'stopwords' => $this->stopWordsConfig->getStopWords(),
|
'stopwords' => $this->stopWordsConfig->getStopWords(),
|
||||||
'protected_terms' => $this->languageCleanupConfig->getProtectedTerms(),
|
'protected_terms' => $this->languageCleanupConfig->getProtectedTerms(),
|
||||||
'normalization' => [
|
|
||||||
'ascii_transliteration' => $this->languageCleanupConfig->getAsciiTransliterationMap(),
|
|
||||||
],
|
|
||||||
'cleanup_profile_names' => $this->languageCleanupConfig->getCleanupProfileNames(),
|
'cleanup_profile_names' => $this->languageCleanupConfig->getCleanupProfileNames(),
|
||||||
'cleanup_profiles' => $profiles,
|
'cleanup_profiles' => $profiles,
|
||||||
];
|
];
|
||||||
@@ -1208,7 +1227,6 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
private function validateAgent(array $agent, array &$errors, array &$warnings): void
|
private function validateAgent(array $agent, array &$errors, array &$warnings): void
|
||||||
{
|
{
|
||||||
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
|
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
|
||||||
$this->validateStringListMap($agent['followup_actions'] ?? [], 'agent.followup_actions', $errors, $warnings);
|
|
||||||
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
|
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
|
||||||
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
|
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
|
||||||
|
|
||||||
@@ -1226,9 +1244,6 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
$errors[] = 'agent.follow_up_context.commercial_table_follow_up.query_template_without_model must not be empty.';
|
$errors[] = 'agent.follow_up_context.commercial_table_follow_up.query_template_without_model must not be empty.';
|
||||||
}
|
}
|
||||||
|
|
||||||
$inputNormalization = is_array($agent['input_normalization'] ?? null) ? $agent['input_normalization'] : [];
|
|
||||||
$this->validateStringList($this->toList($inputNormalization['placeholder_outputs'] ?? []), 'agent.input_normalization.placeholder_outputs', $errors, $warnings);
|
|
||||||
|
|
||||||
$ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : [];
|
$ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : [];
|
||||||
$ragEvidenceCleanupProfile = $ragEvidence['cleanup_profile'] ?? null;
|
$ragEvidenceCleanupProfile = $ragEvidence['cleanup_profile'] ?? null;
|
||||||
if (!is_string($ragEvidenceCleanupProfile) || trim($ragEvidenceCleanupProfile) === '') {
|
if (!is_string($ragEvidenceCleanupProfile) || trim($ragEvidenceCleanupProfile) === '') {
|
||||||
@@ -1255,6 +1270,17 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
$errors[] = 'agent.shop_prompt.meta_query_guard.cleanup_profile references unknown language cleanup profile: ' . $shopContextCleanupProfile . '.';
|
$errors[] = 'agent.shop_prompt.meta_query_guard.cleanup_profile references unknown language cleanup profile: ' . $shopContextCleanupProfile . '.';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$currentInputPreservation = is_array($shopPrompt['current_input_preservation'] ?? null) ? $shopPrompt['current_input_preservation'] : [];
|
||||||
|
if (array_key_exists('enabled', $currentInputPreservation) && !is_bool($currentInputPreservation['enabled'])) {
|
||||||
|
$errors[] = 'agent.shop_prompt.current_input_preservation.enabled must be boolean.';
|
||||||
|
}
|
||||||
|
$this->validateStringList(
|
||||||
|
$this->toList($currentInputPreservation['terms'] ?? []),
|
||||||
|
'agent.shop_prompt.current_input_preservation.terms',
|
||||||
|
$errors,
|
||||||
|
$warnings
|
||||||
|
);
|
||||||
|
|
||||||
$this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings);
|
$this->validateStringListMap($agent['shop_query_optimizer'] ?? [], 'agent.shop_query_optimizer', $errors, $warnings);
|
||||||
$this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors);
|
$this->validateRegexPattern($agent['optimized_shop_query_prefix_pattern'] ?? null, 'agent.optimized_shop_query_prefix_pattern', $errors);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user