p43N
This commit is contained in:
@@ -230,6 +230,17 @@ parameters:
|
|||||||
- gibt es
|
- gibt es
|
||||||
- suche im shop
|
- suche im shop
|
||||||
|
|
||||||
|
# Reusable cleanup group sets keep common profile compositions in one place.
|
||||||
|
# Profiles may still add local group references after these shared sets.
|
||||||
|
stopword_group_sets:
|
||||||
|
de_conversation:
|
||||||
|
- de_core
|
||||||
|
- conversation
|
||||||
|
|
||||||
|
phrase_group_sets:
|
||||||
|
user_instruction:
|
||||||
|
- user_instruction
|
||||||
|
|
||||||
meta_term_groups:
|
meta_term_groups:
|
||||||
presentation:
|
presentation:
|
||||||
- tabelle
|
- tabelle
|
||||||
@@ -256,40 +267,40 @@ parameters:
|
|||||||
|
|
||||||
cleanup_profiles:
|
cleanup_profiles:
|
||||||
commerce_query:
|
commerce_query:
|
||||||
|
stopword_group_sets:
|
||||||
|
- de_conversation
|
||||||
stopword_groups:
|
stopword_groups:
|
||||||
- de_core
|
|
||||||
- conversation
|
|
||||||
- pronouns
|
- pronouns
|
||||||
- user_instruction_terms
|
- user_instruction_terms
|
||||||
- response_style
|
- response_style
|
||||||
phrase_groups:
|
phrase_group_sets:
|
||||||
- user_instruction
|
- user_instruction
|
||||||
|
|
||||||
rag_evidence:
|
rag_evidence:
|
||||||
|
stopword_group_sets:
|
||||||
|
- de_conversation
|
||||||
stopword_groups:
|
stopword_groups:
|
||||||
- de_core
|
|
||||||
- conversation
|
|
||||||
- user_instruction_terms
|
- user_instruction_terms
|
||||||
|
|
||||||
retrieval_reference_cleanup:
|
retrieval_reference_cleanup:
|
||||||
|
stopword_group_sets:
|
||||||
|
- de_conversation
|
||||||
stopword_groups:
|
stopword_groups:
|
||||||
- de_core
|
|
||||||
- conversation
|
|
||||||
- question_terms
|
- question_terms
|
||||||
meta_term_groups:
|
meta_term_groups:
|
||||||
- retrieval_reference
|
- retrieval_reference
|
||||||
|
|
||||||
shop_context_fallback:
|
shop_context_fallback:
|
||||||
|
stopword_group_sets:
|
||||||
|
- de_conversation
|
||||||
stopword_groups:
|
stopword_groups:
|
||||||
- de_core
|
|
||||||
- conversation
|
|
||||||
- pronouns
|
- pronouns
|
||||||
- user_instruction_terms
|
- user_instruction_terms
|
||||||
- question_terms
|
- question_terms
|
||||||
- usage_terms
|
- usage_terms
|
||||||
- reference_fillers
|
- reference_fillers
|
||||||
- response_style
|
- response_style
|
||||||
phrase_groups:
|
phrase_group_sets:
|
||||||
- user_instruction
|
- user_instruction
|
||||||
meta_term_groups:
|
meta_term_groups:
|
||||||
- presentation
|
- presentation
|
||||||
|
|||||||
@@ -0,0 +1,46 @@
|
|||||||
|
# RetrieX Patch 43N - Language Cleanup Group Sets
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Reduce repeated cleanup-profile group compositions in `language.yaml` without changing effective cleanup behavior.
|
||||||
|
|
||||||
|
## Changes
|
||||||
|
|
||||||
|
- Added optional `stopword_group_sets` and `phrase_group_sets` to `config/retriex/language.yaml`.
|
||||||
|
- Moved repeated `de_core` + `conversation` profile composition into `stopword_group_sets.de_conversation`.
|
||||||
|
- Moved repeated `user_instruction` phrase composition into `phrase_group_sets.user_instruction`.
|
||||||
|
- Updated `LanguageCleanupConfig` to resolve group sets before local profile groups.
|
||||||
|
- Existing `stopword_groups`, `phrase_groups`, and `meta_term_groups` remain supported.
|
||||||
|
|
||||||
|
## Non-goals
|
||||||
|
|
||||||
|
- No new domain/runtime logic.
|
||||||
|
- No scoring changes.
|
||||||
|
- No prompt-rule changes.
|
||||||
|
- No retrieval changes.
|
||||||
|
- No admin UI changes.
|
||||||
|
- No new hard-coded fachliche lists in PHP core.
|
||||||
|
|
||||||
|
## Expected effective behavior
|
||||||
|
|
||||||
|
The effective cleanup profile outputs remain identical to p43M:
|
||||||
|
|
||||||
|
- `commerce_query`: same stopwords, phrases, meta terms, protected terms.
|
||||||
|
- `rag_evidence`: same stopwords, phrases, meta terms, protected terms.
|
||||||
|
- `retrieval_reference_cleanup`: same stopwords, phrases, meta terms, protected terms.
|
||||||
|
- `shop_context_fallback`: same stopwords, phrases, meta terms, protected terms.
|
||||||
|
|
||||||
|
## Local checks performed
|
||||||
|
|
||||||
|
```bash
|
||||||
|
php -l src/Config/LanguageCleanupConfig.php
|
||||||
|
php -l src/Config/GovernanceConfig.php
|
||||||
|
php -l src/Config/AgentRunnerConfig.php
|
||||||
|
php -l src/Config/PromptBuilderConfig.php
|
||||||
|
php -l src/Config/SearchRepairConfig.php
|
||||||
|
python3 YAML parse check for config/retriex/*.yaml
|
||||||
|
python3 effective p43M-vs-p43N cleanup profile comparison
|
||||||
|
php LanguageCleanupConfig effective resolver comparison
|
||||||
|
```
|
||||||
|
|
||||||
|
The local `bin/console` checks could not be executed in this artifact environment because the ZIP does not include `vendor/` and Composer dependencies are unavailable here.
|
||||||
@@ -163,10 +163,14 @@ final class LanguageCleanupConfig
|
|||||||
private function resolveGroupedTerms(string $profileName, string $profileKey, string $rootKey): array
|
private function resolveGroupedTerms(string $profileName, string $profileKey, string $rootKey): array
|
||||||
{
|
{
|
||||||
$profile = $this->requiredCleanupProfile($profileName);
|
$profile = $this->requiredCleanupProfile($profileName);
|
||||||
$groupNames = $this->stringListFromValue(
|
$groupNames = $this->resolveProfileGroupSetTerms($profileName, $profile, $profileKey);
|
||||||
$this->profileValue($profile, $profileKey),
|
$groupNames = $this->mergeUnique(
|
||||||
sprintf('cleanup_profiles.%s.%s', $profileName, $profileKey),
|
$groupNames,
|
||||||
false
|
$this->stringListFromValue(
|
||||||
|
$this->profileValue($profile, $profileKey),
|
||||||
|
sprintf('cleanup_profiles.%s.%s', $profileName, $profileKey),
|
||||||
|
false
|
||||||
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
if ($groupNames === []) {
|
if ($groupNames === []) {
|
||||||
@@ -193,6 +197,66 @@ final class LanguageCleanupConfig
|
|||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array<string, mixed> $profile
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
private function resolveProfileGroupSetTerms(string $profileName, array $profile, string $profileKey): array
|
||||||
|
{
|
||||||
|
$profileSetKey = $this->profileGroupSetKey($profileKey);
|
||||||
|
$setNames = $this->stringListFromValue(
|
||||||
|
$this->profileValue($profile, $profileSetKey),
|
||||||
|
sprintf('cleanup_profiles.%s.%s', $profileName, $profileSetKey),
|
||||||
|
false
|
||||||
|
);
|
||||||
|
|
||||||
|
if ($setNames === []) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$rootSetKey = $this->rootGroupSetKey($profileKey);
|
||||||
|
$sets = $this->requiredMap($rootSetKey);
|
||||||
|
$out = [];
|
||||||
|
|
||||||
|
foreach ($setNames as $setName) {
|
||||||
|
if (!array_key_exists($setName, $sets)) {
|
||||||
|
throw $this->invalid(
|
||||||
|
sprintf('cleanup_profiles.%s.%s', $profileName, $profileSetKey),
|
||||||
|
sprintf('references unknown group set "%s"', $setName)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
$out = $this->mergeUnique(
|
||||||
|
$out,
|
||||||
|
$this->stringListFromValue($sets[$setName], sprintf('%s.%s', $rootSetKey, $setName), true)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function profileGroupSetKey(string $profileKey): string
|
||||||
|
{
|
||||||
|
if ($profileKey === 'stopword_groups') {
|
||||||
|
return 'stopword_group_sets';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($profileKey === 'phrase_groups') {
|
||||||
|
return 'phrase_group_sets';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($profileKey === 'meta_term_groups') {
|
||||||
|
return 'meta_term_group_sets';
|
||||||
|
}
|
||||||
|
|
||||||
|
return sprintf('%s_sets', $profileKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function rootGroupSetKey(string $profileKey): string
|
||||||
|
{
|
||||||
|
return $this->profileGroupSetKey($profileKey);
|
||||||
|
}
|
||||||
|
|
||||||
/** @return array<string, mixed> */
|
/** @return array<string, mixed> */
|
||||||
private function requiredCleanupProfile(string $profileName): array
|
private function requiredCleanupProfile(string $profileName): array
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user