p43N
This commit is contained in:
@@ -230,6 +230,17 @@ parameters:
|
||||
- gibt es
|
||||
- suche im shop
|
||||
|
||||
# Reusable cleanup group sets keep common profile compositions in one place.
|
||||
# Profiles may still add local group references after these shared sets.
|
||||
stopword_group_sets:
|
||||
de_conversation:
|
||||
- de_core
|
||||
- conversation
|
||||
|
||||
phrase_group_sets:
|
||||
user_instruction:
|
||||
- user_instruction
|
||||
|
||||
meta_term_groups:
|
||||
presentation:
|
||||
- tabelle
|
||||
@@ -256,40 +267,40 @@ parameters:
|
||||
|
||||
cleanup_profiles:
|
||||
commerce_query:
|
||||
stopword_group_sets:
|
||||
- de_conversation
|
||||
stopword_groups:
|
||||
- de_core
|
||||
- conversation
|
||||
- pronouns
|
||||
- user_instruction_terms
|
||||
- response_style
|
||||
phrase_groups:
|
||||
phrase_group_sets:
|
||||
- user_instruction
|
||||
|
||||
rag_evidence:
|
||||
stopword_group_sets:
|
||||
- de_conversation
|
||||
stopword_groups:
|
||||
- de_core
|
||||
- conversation
|
||||
- user_instruction_terms
|
||||
|
||||
retrieval_reference_cleanup:
|
||||
stopword_group_sets:
|
||||
- de_conversation
|
||||
stopword_groups:
|
||||
- de_core
|
||||
- conversation
|
||||
- question_terms
|
||||
meta_term_groups:
|
||||
- retrieval_reference
|
||||
|
||||
shop_context_fallback:
|
||||
stopword_group_sets:
|
||||
- de_conversation
|
||||
stopword_groups:
|
||||
- de_core
|
||||
- conversation
|
||||
- pronouns
|
||||
- user_instruction_terms
|
||||
- question_terms
|
||||
- usage_terms
|
||||
- reference_fillers
|
||||
- response_style
|
||||
phrase_groups:
|
||||
phrase_group_sets:
|
||||
- user_instruction
|
||||
meta_term_groups:
|
||||
- presentation
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
# RetrieX Patch 43N - Language Cleanup Group Sets
|
||||
|
||||
## Goal
|
||||
|
||||
Reduce repeated cleanup-profile group compositions in `language.yaml` without changing effective cleanup behavior.
|
||||
|
||||
## Changes
|
||||
|
||||
- Added optional `stopword_group_sets` and `phrase_group_sets` to `config/retriex/language.yaml`.
|
||||
- Moved repeated `de_core` + `conversation` profile composition into `stopword_group_sets.de_conversation`.
|
||||
- Moved repeated `user_instruction` phrase composition into `phrase_group_sets.user_instruction`.
|
||||
- Updated `LanguageCleanupConfig` to resolve group sets before local profile groups.
|
||||
- Existing `stopword_groups`, `phrase_groups`, and `meta_term_groups` remain supported.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- No new domain/runtime logic.
|
||||
- No scoring changes.
|
||||
- No prompt-rule changes.
|
||||
- No retrieval changes.
|
||||
- No admin UI changes.
|
||||
- No new hard-coded fachliche lists in PHP core.
|
||||
|
||||
## Expected effective behavior
|
||||
|
||||
The effective cleanup profile outputs remain identical to p43M:
|
||||
|
||||
- `commerce_query`: same stopwords, phrases, meta terms, protected terms.
|
||||
- `rag_evidence`: same stopwords, phrases, meta terms, protected terms.
|
||||
- `retrieval_reference_cleanup`: same stopwords, phrases, meta terms, protected terms.
|
||||
- `shop_context_fallback`: same stopwords, phrases, meta terms, protected terms.
|
||||
|
||||
## Local checks performed
|
||||
|
||||
```bash
|
||||
php -l src/Config/LanguageCleanupConfig.php
|
||||
php -l src/Config/GovernanceConfig.php
|
||||
php -l src/Config/AgentRunnerConfig.php
|
||||
php -l src/Config/PromptBuilderConfig.php
|
||||
php -l src/Config/SearchRepairConfig.php
|
||||
python3 YAML parse check for config/retriex/*.yaml
|
||||
python3 effective p43M-vs-p43N cleanup profile comparison
|
||||
php LanguageCleanupConfig effective resolver comparison
|
||||
```
|
||||
|
||||
The local `bin/console` checks could not be executed in this artifact environment because the ZIP does not include `vendor/` and Composer dependencies are unavailable here.
|
||||
@@ -163,10 +163,14 @@ final class LanguageCleanupConfig
|
||||
private function resolveGroupedTerms(string $profileName, string $profileKey, string $rootKey): array
|
||||
{
|
||||
$profile = $this->requiredCleanupProfile($profileName);
|
||||
$groupNames = $this->stringListFromValue(
|
||||
$groupNames = $this->resolveProfileGroupSetTerms($profileName, $profile, $profileKey);
|
||||
$groupNames = $this->mergeUnique(
|
||||
$groupNames,
|
||||
$this->stringListFromValue(
|
||||
$this->profileValue($profile, $profileKey),
|
||||
sprintf('cleanup_profiles.%s.%s', $profileName, $profileKey),
|
||||
false
|
||||
)
|
||||
);
|
||||
|
||||
if ($groupNames === []) {
|
||||
@@ -193,6 +197,66 @@ final class LanguageCleanupConfig
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $profile
|
||||
* @return string[]
|
||||
*/
|
||||
private function resolveProfileGroupSetTerms(string $profileName, array $profile, string $profileKey): array
|
||||
{
|
||||
$profileSetKey = $this->profileGroupSetKey($profileKey);
|
||||
$setNames = $this->stringListFromValue(
|
||||
$this->profileValue($profile, $profileSetKey),
|
||||
sprintf('cleanup_profiles.%s.%s', $profileName, $profileSetKey),
|
||||
false
|
||||
);
|
||||
|
||||
if ($setNames === []) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$rootSetKey = $this->rootGroupSetKey($profileKey);
|
||||
$sets = $this->requiredMap($rootSetKey);
|
||||
$out = [];
|
||||
|
||||
foreach ($setNames as $setName) {
|
||||
if (!array_key_exists($setName, $sets)) {
|
||||
throw $this->invalid(
|
||||
sprintf('cleanup_profiles.%s.%s', $profileName, $profileSetKey),
|
||||
sprintf('references unknown group set "%s"', $setName)
|
||||
);
|
||||
}
|
||||
|
||||
$out = $this->mergeUnique(
|
||||
$out,
|
||||
$this->stringListFromValue($sets[$setName], sprintf('%s.%s', $rootSetKey, $setName), true)
|
||||
);
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function profileGroupSetKey(string $profileKey): string
|
||||
{
|
||||
if ($profileKey === 'stopword_groups') {
|
||||
return 'stopword_group_sets';
|
||||
}
|
||||
|
||||
if ($profileKey === 'phrase_groups') {
|
||||
return 'phrase_group_sets';
|
||||
}
|
||||
|
||||
if ($profileKey === 'meta_term_groups') {
|
||||
return 'meta_term_group_sets';
|
||||
}
|
||||
|
||||
return sprintf('%s_sets', $profileKey);
|
||||
}
|
||||
|
||||
private function rootGroupSetKey(string $profileKey): string
|
||||
{
|
||||
return $this->profileGroupSetKey($profileKey);
|
||||
}
|
||||
|
||||
/** @return array<string, mixed> */
|
||||
private function requiredCleanupProfile(string $profileName): array
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user