This commit is contained in:
team 1
2026-05-05 20:18:47 +02:00
parent 1c61f33097
commit 931af8b098
3 changed files with 135 additions and 14 deletions

View File

@@ -230,6 +230,17 @@ parameters:
- gibt es
- suche im shop
# Reusable cleanup group sets keep common profile compositions in one place.
# Profiles may still add local group references after these shared sets.
stopword_group_sets:
de_conversation:
- de_core
- conversation
phrase_group_sets:
user_instruction:
- user_instruction
meta_term_groups:
presentation:
- tabelle
@@ -256,40 +267,40 @@ parameters:
cleanup_profiles:
commerce_query:
stopword_group_sets:
- de_conversation
stopword_groups:
- de_core
- conversation
- pronouns
- user_instruction_terms
- response_style
phrase_groups:
phrase_group_sets:
- user_instruction
rag_evidence:
stopword_group_sets:
- de_conversation
stopword_groups:
- de_core
- conversation
- user_instruction_terms
retrieval_reference_cleanup:
stopword_group_sets:
- de_conversation
stopword_groups:
- de_core
- conversation
- question_terms
meta_term_groups:
- retrieval_reference
shop_context_fallback:
stopword_group_sets:
- de_conversation
stopword_groups:
- de_core
- conversation
- pronouns
- user_instruction_terms
- question_terms
- usage_terms
- reference_fillers
- response_style
phrase_groups:
phrase_group_sets:
- user_instruction
meta_term_groups:
- presentation

View File

@@ -0,0 +1,46 @@
# RetrieX Patch 43N - Language Cleanup Group Sets
## Goal
Reduce repeated cleanup-profile group compositions in `language.yaml` without changing effective cleanup behavior.
## Changes
- Added optional `stopword_group_sets` and `phrase_group_sets` to `config/retriex/language.yaml`.
- Moved repeated `de_core` + `conversation` profile composition into `stopword_group_sets.de_conversation`.
- Moved repeated `user_instruction` phrase composition into `phrase_group_sets.user_instruction`.
- Updated `LanguageCleanupConfig` to resolve group sets before local profile groups.
- Existing `stopword_groups`, `phrase_groups`, and `meta_term_groups` remain supported.
## Non-goals
- No new domain/runtime logic.
- No scoring changes.
- No prompt-rule changes.
- No retrieval changes.
- No admin UI changes.
- No new hard-coded fachliche lists in PHP core.
## Expected effective behavior
The effective cleanup profile outputs remain identical to p43M:
- `commerce_query`: same stopwords, phrases, meta terms, protected terms.
- `rag_evidence`: same stopwords, phrases, meta terms, protected terms.
- `retrieval_reference_cleanup`: same stopwords, phrases, meta terms, protected terms.
- `shop_context_fallback`: same stopwords, phrases, meta terms, protected terms.
## Local checks performed
```bash
php -l src/Config/LanguageCleanupConfig.php
php -l src/Config/GovernanceConfig.php
php -l src/Config/AgentRunnerConfig.php
php -l src/Config/PromptBuilderConfig.php
php -l src/Config/SearchRepairConfig.php
python3 YAML parse check for config/retriex/*.yaml
python3 effective p43M-vs-p43N cleanup profile comparison
php LanguageCleanupConfig effective resolver comparison
```
The local `bin/console` checks could not be executed in this artifact environment because the ZIP does not include `vendor/` and Composer dependencies are unavailable here.

View File

@@ -163,10 +163,14 @@ final class LanguageCleanupConfig
private function resolveGroupedTerms(string $profileName, string $profileKey, string $rootKey): array
{
$profile = $this->requiredCleanupProfile($profileName);
$groupNames = $this->stringListFromValue(
$this->profileValue($profile, $profileKey),
sprintf('cleanup_profiles.%s.%s', $profileName, $profileKey),
false
$groupNames = $this->resolveProfileGroupSetTerms($profileName, $profile, $profileKey);
$groupNames = $this->mergeUnique(
$groupNames,
$this->stringListFromValue(
$this->profileValue($profile, $profileKey),
sprintf('cleanup_profiles.%s.%s', $profileName, $profileKey),
false
)
);
if ($groupNames === []) {
@@ -193,6 +197,66 @@ final class LanguageCleanupConfig
return $out;
}
/**
* @param array<string, mixed> $profile
* @return string[]
*/
private function resolveProfileGroupSetTerms(string $profileName, array $profile, string $profileKey): array
{
$profileSetKey = $this->profileGroupSetKey($profileKey);
$setNames = $this->stringListFromValue(
$this->profileValue($profile, $profileSetKey),
sprintf('cleanup_profiles.%s.%s', $profileName, $profileSetKey),
false
);
if ($setNames === []) {
return [];
}
$rootSetKey = $this->rootGroupSetKey($profileKey);
$sets = $this->requiredMap($rootSetKey);
$out = [];
foreach ($setNames as $setName) {
if (!array_key_exists($setName, $sets)) {
throw $this->invalid(
sprintf('cleanup_profiles.%s.%s', $profileName, $profileSetKey),
sprintf('references unknown group set "%s"', $setName)
);
}
$out = $this->mergeUnique(
$out,
$this->stringListFromValue($sets[$setName], sprintf('%s.%s', $rootSetKey, $setName), true)
);
}
return $out;
}
private function profileGroupSetKey(string $profileKey): string
{
if ($profileKey === 'stopword_groups') {
return 'stopword_group_sets';
}
if ($profileKey === 'phrase_groups') {
return 'phrase_group_sets';
}
if ($profileKey === 'meta_term_groups') {
return 'meta_term_group_sets';
}
return sprintf('%s_sets', $profileKey);
}
private function rootGroupSetKey(string $profileKey): string
{
return $this->profileGroupSetKey($profileKey);
}
/** @return array<string, mixed> */
private function requiredCleanupProfile(string $profileName): array
{