p43K
This commit is contained in:
@@ -52,75 +52,11 @@ parameters:
|
||||
max_distance_long: 3
|
||||
min_similarity_percent: 72
|
||||
# Canonical routing terms only, not typo variants.
|
||||
# The code fuzzy-matches user tokens against these terms when the LLM leaves
|
||||
# an obvious routing typo unchanged.
|
||||
terms:
|
||||
- shop
|
||||
- suche
|
||||
- suchen
|
||||
- such
|
||||
- finde
|
||||
- finden
|
||||
- kostet
|
||||
- kosten
|
||||
- preis
|
||||
- preise
|
||||
- preisen
|
||||
- preiswert
|
||||
- preiswerte
|
||||
- günstig
|
||||
- guenstig
|
||||
- kaufen
|
||||
- bestellen
|
||||
- produkt
|
||||
- produkte
|
||||
- artikel
|
||||
- sku
|
||||
- online
|
||||
- analysegerät
|
||||
- analysegeraet
|
||||
- messgerät
|
||||
- messgeraet
|
||||
- handmessgerät
|
||||
- handmessgeraet
|
||||
- pockettester
|
||||
- analysator
|
||||
- analyzer
|
||||
- indikator
|
||||
- indikatoren
|
||||
- reagenz
|
||||
- reagenzien
|
||||
- verbrauchsmaterial
|
||||
- zubehör
|
||||
- zubehoer
|
||||
- ersatzteil
|
||||
- ersatzteile
|
||||
- anschlusskabel
|
||||
- kabel
|
||||
- sensorkabel
|
||||
- elektrode
|
||||
- elektrodenkabel
|
||||
- puffer
|
||||
- kalibrierpuffer
|
||||
- kalibrierlösung
|
||||
- kalibrierloesung
|
||||
- kalibrierung
|
||||
- lösung
|
||||
- loesung
|
||||
- messen
|
||||
- messung
|
||||
- überwachen
|
||||
- ueberwachen
|
||||
- kontrollieren
|
||||
- schwimmbad
|
||||
- pool
|
||||
- becken
|
||||
- wasseranalyse
|
||||
- geeignet
|
||||
- passend
|
||||
- empfehlung
|
||||
- empfehlen
|
||||
- empfiehl
|
||||
# Resolved from config/retriex/vocabulary.yaml view
|
||||
# agent.input_normalization_fuzzy_routing_terms.
|
||||
# A local terms list may still be added here as an explicit project override.
|
||||
vocabulary_views:
|
||||
terms: agent.input_normalization_fuzzy_routing_terms
|
||||
|
||||
follow_up_context:
|
||||
strong_reference_patterns:
|
||||
|
||||
@@ -59,14 +59,10 @@ parameters:
|
||||
shop_query_current_input_preservation_terms:
|
||||
- ph
|
||||
- redox
|
||||
vocabulary:
|
||||
protected_short_model_tokens:
|
||||
- th
|
||||
- tc
|
||||
- tp
|
||||
- tm
|
||||
- ph
|
||||
- rx
|
||||
# Protected vocabulary tokens fall back to
|
||||
# regression_baseline.protected_short_model_tokens.
|
||||
# Add vocabulary.protected_short_model_tokens only for an explicit override.
|
||||
vocabulary: {}
|
||||
language:
|
||||
protected_stopword_terms:
|
||||
- nicht
|
||||
@@ -86,28 +82,19 @@ parameters:
|
||||
- rag_evidence
|
||||
- shop_context_fallback
|
||||
- retrieval_reference_cleanup
|
||||
required_profile_terms:
|
||||
commerce_query:
|
||||
required_profile_term_defaults:
|
||||
stopwords:
|
||||
- der
|
||||
- dieser
|
||||
- mit
|
||||
- bitte
|
||||
required_profile_terms:
|
||||
commerce_query:
|
||||
phrases:
|
||||
- ich suche
|
||||
- suche im shop
|
||||
rag_evidence:
|
||||
stopwords:
|
||||
- der
|
||||
- dieser
|
||||
- mit
|
||||
- bitte
|
||||
rag_evidence: {}
|
||||
shop_context_fallback:
|
||||
stopwords:
|
||||
- der
|
||||
- dieser
|
||||
- mit
|
||||
- bitte
|
||||
phrases:
|
||||
- zeige mir
|
||||
- suche im shop
|
||||
|
||||
@@ -100,6 +100,73 @@ parameters:
|
||||
- mehr
|
||||
- weniger
|
||||
- als
|
||||
input_normalization_fuzzy_routing_terms:
|
||||
- shop
|
||||
- suche
|
||||
- suchen
|
||||
- such
|
||||
- finde
|
||||
- finden
|
||||
- kostet
|
||||
- kosten
|
||||
- preis
|
||||
- preise
|
||||
- preisen
|
||||
- preiswert
|
||||
- preiswerte
|
||||
- günstig
|
||||
- guenstig
|
||||
- kaufen
|
||||
- bestellen
|
||||
- produkt
|
||||
- produkte
|
||||
- artikel
|
||||
- sku
|
||||
- online
|
||||
- analysegerät
|
||||
- analysegeraet
|
||||
- messgerät
|
||||
- messgeraet
|
||||
- handmessgerät
|
||||
- handmessgeraet
|
||||
- pockettester
|
||||
- analysator
|
||||
- analyzer
|
||||
- indikator
|
||||
- indikatoren
|
||||
- reagenz
|
||||
- reagenzien
|
||||
- verbrauchsmaterial
|
||||
- zubehör
|
||||
- zubehoer
|
||||
- ersatzteil
|
||||
- ersatzteile
|
||||
- anschlusskabel
|
||||
- kabel
|
||||
- sensorkabel
|
||||
- elektrode
|
||||
- elektrodenkabel
|
||||
- puffer
|
||||
- kalibrierpuffer
|
||||
- kalibrierlösung
|
||||
- kalibrierloesung
|
||||
- kalibrierung
|
||||
- lösung
|
||||
- loesung
|
||||
- messen
|
||||
- messung
|
||||
- überwachen
|
||||
- ueberwachen
|
||||
- kontrollieren
|
||||
- schwimmbad
|
||||
- pool
|
||||
- becken
|
||||
- wasseranalyse
|
||||
- geeignet
|
||||
- passend
|
||||
- empfehlung
|
||||
- empfehlen
|
||||
- empfiehl
|
||||
views:
|
||||
shop:
|
||||
device_query:
|
||||
@@ -666,6 +733,10 @@ parameters:
|
||||
- ph indikator
|
||||
- ph-indikatoren
|
||||
- ph indikatoren
|
||||
agent:
|
||||
input_normalization_fuzzy_routing_terms:
|
||||
include:
|
||||
- input_normalization_fuzzy_routing_terms
|
||||
maps:
|
||||
agent:
|
||||
rag_evidence_guard:
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
# RetrieX Patch 43I - Input Normalization Fuzzy Routing Vocabulary View
|
||||
|
||||
## Ziel
|
||||
|
||||
p43I reduziert eine weitere lokale YAML-Begriffsliste ohne fachliche Runtime-Änderung.
|
||||
Die Canonical Fuzzy-Routing-Terms der Input-Normalisierung werden zentral in `config/retriex/vocabulary.yaml` gepflegt und aus `agent.yaml` nur noch per Vocabulary-View referenziert.
|
||||
|
||||
## Änderungen
|
||||
|
||||
- `config/retriex/agent.yaml`
|
||||
- lokale Liste `input_normalization.fuzzy_routing.terms` entfernt
|
||||
- neue Referenz `input_normalization.fuzzy_routing.vocabulary_views.terms` ergänzt
|
||||
- Kommentar ergänzt, dass lokale `terms` weiterhin als expliziter Projekt-Override möglich sind
|
||||
|
||||
- `config/retriex/vocabulary.yaml`
|
||||
- neue Klasse `input_normalization_fuzzy_routing_terms` ergänzt
|
||||
- neue View `agent.input_normalization_fuzzy_routing_terms` ergänzt
|
||||
- Reihenfolge und Werte entsprechen exakt dem vorherigen p43H-Stand
|
||||
|
||||
- `src/Config/AgentRunnerConfig.php`
|
||||
- `getInputNormalizationFuzzyRoutingTerms()` nutzt nun `getConfiguredStringListOrVocabularyView()`
|
||||
- lokale Override-Liste bleibt möglich
|
||||
|
||||
## Nicht geändert
|
||||
|
||||
- keine neue Fachlogik
|
||||
- keine Scoringänderung
|
||||
- keine Retrievaländerung
|
||||
- keine Prompt-Regeländerung
|
||||
- keine Admin-UI
|
||||
- keine neuen harten Listen im PHP-Core
|
||||
|
||||
## Lokale Prüfungen
|
||||
|
||||
Ausgeführt:
|
||||
|
||||
```bash
|
||||
php -l src/Config/AgentRunnerConfig.php
|
||||
php -l src/Config/SearchRepairConfig.php
|
||||
php -l src/Config/PromptBuilderConfig.php
|
||||
python3 YAML parse check for config/retriex/*.yaml
|
||||
python3 effective p43H-vs-p43I fuzzy routing terms comparison
|
||||
```
|
||||
|
||||
Ergebnis: grün.
|
||||
|
||||
Die effektive Liste `input_normalization.fuzzy_routing.terms` ist gegenüber p43H identisch geblieben.
|
||||
|
||||
## Hinweis zu Symfony-Checks
|
||||
|
||||
Die folgenden Checks konnten in dieser Umgebung nicht lokal ausgeführt werden, weil der ZIP-Stand kein `vendor/` enthält und `bin/console` dadurch mit fehlenden Dependencies abbricht:
|
||||
|
||||
```bash
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
|
||||
Sie sollen nach dem Einspielen im vollständigen Projekt ausgeführt werden.
|
||||
@@ -0,0 +1,70 @@
|
||||
# RetrieX Patch p43J – Governance Protected Short Model Fallback
|
||||
|
||||
## Ziel
|
||||
|
||||
Kleiner Konsolidierungsschritt nach p43I: eine doppelt gepflegte Governance-Liste entfernen, ohne die unabhängige Regression-Baseline zu schwächen.
|
||||
|
||||
## Inhalt
|
||||
|
||||
- `config/retriex/governance.yaml`
|
||||
- Die lokale Duplikatliste `vocabulary.protected_short_model_tokens` wurde entfernt.
|
||||
- `regression_baseline.protected_short_model_tokens` bleibt bewusst als unabhängige Guardrail-Baseline erhalten.
|
||||
- `vocabulary` bleibt als leerer Override-Block erhalten; ein lokaler Override kann später wieder explizit gesetzt werden.
|
||||
|
||||
- `src/Config/GovernanceConfig.php`
|
||||
- `getVocabularyProtectedShortModelTokens()` nutzt nun `vocabulary.protected_short_model_tokens`, falls lokal gesetzt.
|
||||
- Wenn kein lokaler Override vorhanden ist, fällt die Methode auf `getRegressionProtectedShortModelTokens()` zurück.
|
||||
- Neue interne Helper:
|
||||
- `optionalStringList()`
|
||||
- `optionalValue()`
|
||||
|
||||
## Bewusst nicht geändert
|
||||
|
||||
- Keine Runtime-Fachlogik
|
||||
- Keine Prompt-Regeländerung
|
||||
- Keine Retrievaländerung
|
||||
- Keine Scoringänderung
|
||||
- Keine Admin-UI
|
||||
- Keine neuen harten Listen im PHP-Core
|
||||
- Die Regression-Baseline bleibt YAML-owned und unabhängig von der zu prüfenden Retrieval-/Vocabulary-View.
|
||||
|
||||
## Effektive Werte
|
||||
|
||||
Vorher und nachher identisch:
|
||||
|
||||
- `regression_baseline.protected_short_model_tokens`: 6 / 6
|
||||
- effektive `vocabulary.protected_short_model_tokens`: 6 / 6
|
||||
|
||||
Werte:
|
||||
|
||||
```text
|
||||
th, tc, tp, tm, ph, rx
|
||||
```
|
||||
|
||||
## Lokale Prüfungen
|
||||
|
||||
Ausgeführt:
|
||||
|
||||
```bash
|
||||
php -l src/Config/GovernanceConfig.php
|
||||
php -l src/Config/AgentRunnerConfig.php
|
||||
php -l src/Config/SearchRepairConfig.php
|
||||
php -l src/Config/PromptBuilderConfig.php
|
||||
python3 YAML parse check for config/retriex/*.yaml
|
||||
python3 effective governance protected short model token comparison
|
||||
```
|
||||
|
||||
Ergebnis: grün.
|
||||
|
||||
## Nicht lokal ausführbar
|
||||
|
||||
Die Symfony-Console-Checks konnten in der ChatGPT-Arbeitsumgebung nicht ausgeführt werden, weil der ZIP-Stand kein `vendor/` enthält und `bin/console` mit fehlenden Dependencies abbricht.
|
||||
|
||||
Bitte im Projekt ausführen:
|
||||
|
||||
```bash
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
@@ -0,0 +1,93 @@
|
||||
# RetrieX Patch p43K - Governance Required Profile Term Defaults
|
||||
|
||||
## Ziel
|
||||
|
||||
Kleiner Config-/Accessor-Konsolidierungsschritt nach p43J.
|
||||
|
||||
Der Patch reduziert doppelte Governance-Listen innerhalb von `language.required_profile_terms`, ohne fachliche Runtime-Logik, Scoring, Prompt-Regeln, Retrieval oder Admin-UI zu ändern.
|
||||
|
||||
## Änderungen
|
||||
|
||||
### `config/retriex/governance.yaml`
|
||||
|
||||
Neu eingeführt:
|
||||
|
||||
```yaml
|
||||
language:
|
||||
required_profile_term_defaults:
|
||||
stopwords:
|
||||
- der
|
||||
- dieser
|
||||
- mit
|
||||
- bitte
|
||||
```
|
||||
|
||||
Entfernt wurden die identischen lokalen `stopwords`-Listen aus:
|
||||
|
||||
- `language.required_profile_terms.commerce_query.stopwords`
|
||||
- `language.required_profile_terms.rag_evidence.stopwords`
|
||||
- `language.required_profile_terms.shop_context_fallback.stopwords`
|
||||
|
||||
Die Profile behalten ihre spezifischen `phrases` und `meta_terms` unverändert.
|
||||
|
||||
### `src/Config/GovernanceConfig.php`
|
||||
|
||||
`getLanguageRequiredProfileTerms()` verwendet jetzt optionale Defaults aus:
|
||||
|
||||
```text
|
||||
language.required_profile_term_defaults
|
||||
```
|
||||
|
||||
Lokale Profilwerte bleiben weiterhin möglich und überschreiben die Defaults pro Profilfeld.
|
||||
|
||||
Neuer interner Helper:
|
||||
|
||||
```php
|
||||
languageRequiredProfileTermDefaults()
|
||||
```
|
||||
|
||||
## Effektive Werte
|
||||
|
||||
Die effektiven Werte bleiben gegenüber p43J identisch:
|
||||
|
||||
- `commerce_query.stopwords`: 4 / identisch
|
||||
- `commerce_query.phrases`: 2 / identisch
|
||||
- `rag_evidence.stopwords`: 4 / identisch
|
||||
- `shop_context_fallback.stopwords`: 4 / identisch
|
||||
- `shop_context_fallback.phrases`: 2 / identisch
|
||||
- `shop_context_fallback.meta_terms`: 3 / identisch
|
||||
|
||||
## Keine fachlichen Änderungen
|
||||
|
||||
Nicht geändert:
|
||||
|
||||
- keine neue Fachlogik
|
||||
- keine Scoringänderung
|
||||
- keine Retrievaländerung
|
||||
- keine Prompt-Regeländerung
|
||||
- keine Admin-UI
|
||||
- keine neuen harten Listen im PHP-Core
|
||||
|
||||
## Lokale Prüfungen
|
||||
|
||||
Grün:
|
||||
|
||||
```bash
|
||||
php -l src/Config/GovernanceConfig.php
|
||||
php -l src/Config/AgentRunnerConfig.php
|
||||
php -l src/Config/SearchRepairConfig.php
|
||||
php -l src/Config/PromptBuilderConfig.php
|
||||
python3 YAML parse check for config/retriex/*.yaml
|
||||
python3 effective p43J-vs-p43K governance required profile terms comparison
|
||||
```
|
||||
|
||||
Lokal nicht ausführbar wegen fehlendem `vendor/` im ZIP-Stand:
|
||||
|
||||
```bash
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
|
||||
`bin/console` bricht mit `Dependencies are missing. Try running "composer install".` ab.
|
||||
@@ -281,7 +281,10 @@ final class AgentRunnerConfig
|
||||
*/
|
||||
public function getInputNormalizationFuzzyRoutingTerms(): array
|
||||
{
|
||||
return $this->getRequiredStringList('input_normalization.fuzzy_routing.terms');
|
||||
return $this->getConfiguredStringListOrVocabularyView(
|
||||
'input_normalization.fuzzy_routing.terms',
|
||||
'input_normalization.fuzzy_routing.vocabulary_views.terms'
|
||||
);
|
||||
}
|
||||
|
||||
private function getRequiredInt(string $key): int
|
||||
|
||||
@@ -129,7 +129,10 @@ final class GovernanceConfig
|
||||
/** @return string[] */
|
||||
public function getVocabularyProtectedShortModelTokens(): array
|
||||
{
|
||||
return $this->requiredStringList('vocabulary.protected_short_model_tokens');
|
||||
return $this->optionalStringList(
|
||||
'vocabulary.protected_short_model_tokens',
|
||||
$this->getRegressionProtectedShortModelTokens()
|
||||
);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
@@ -152,6 +155,8 @@ final class GovernanceConfig
|
||||
throw $this->invalid('language.required_profile_terms', 'must be a map of cleanup profile term lists');
|
||||
}
|
||||
|
||||
$defaults = $this->languageRequiredProfileTermDefaults();
|
||||
|
||||
$out = [];
|
||||
foreach ($value as $profileName => $profileTerms) {
|
||||
if (!is_string($profileName) || trim($profileName) === '' || !is_array($profileTerms)) {
|
||||
@@ -160,9 +165,9 @@ final class GovernanceConfig
|
||||
|
||||
$normalizedProfileName = trim($profileName);
|
||||
$out[$normalizedProfileName] = [
|
||||
'stopwords' => $this->normalizeStringList($profileTerms['stopwords'] ?? []),
|
||||
'phrases' => $this->normalizeStringList($profileTerms['phrases'] ?? []),
|
||||
'meta_terms' => $this->normalizeStringList($profileTerms['meta_terms'] ?? []),
|
||||
'stopwords' => $this->normalizeStringList($profileTerms['stopwords'] ?? $defaults['stopwords']),
|
||||
'phrases' => $this->normalizeStringList($profileTerms['phrases'] ?? $defaults['phrases']),
|
||||
'meta_terms' => $this->normalizeStringList($profileTerms['meta_terms'] ?? $defaults['meta_terms']),
|
||||
];
|
||||
|
||||
if ($out[$normalizedProfileName]['stopwords'] === []
|
||||
@@ -180,6 +185,29 @@ final class GovernanceConfig
|
||||
return $out;
|
||||
}
|
||||
|
||||
/** @return array{stopwords:string[], phrases:string[], meta_terms:string[]} */
|
||||
private function languageRequiredProfileTermDefaults(): array
|
||||
{
|
||||
$value = $this->optionalValue('language.required_profile_term_defaults');
|
||||
if ($value === null) {
|
||||
return [
|
||||
'stopwords' => [],
|
||||
'phrases' => [],
|
||||
'meta_terms' => [],
|
||||
];
|
||||
}
|
||||
|
||||
if (!is_array($value)) {
|
||||
throw $this->invalid('language.required_profile_term_defaults', 'must be a map of cleanup profile term lists');
|
||||
}
|
||||
|
||||
return [
|
||||
'stopwords' => $this->normalizeStringList($value['stopwords'] ?? []),
|
||||
'phrases' => $this->normalizeStringList($value['phrases'] ?? []),
|
||||
'meta_terms' => $this->normalizeStringList($value['meta_terms'] ?? []),
|
||||
];
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function getCorePatternAuditSourceRoots(): array
|
||||
{
|
||||
@@ -299,6 +327,18 @@ final class GovernanceConfig
|
||||
return $this->nonEmptyStringList($path, $this->requiredValue($path));
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
private function optionalStringList(string $path, array $fallback = []): array
|
||||
{
|
||||
$value = $this->optionalValue($path);
|
||||
if ($value === null) {
|
||||
return $this->normalizeStringList($fallback);
|
||||
}
|
||||
|
||||
$out = $this->normalizeStringList($value);
|
||||
return $out !== [] ? $out : $this->normalizeStringList($fallback);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
private function nonEmptyStringList(string $path, mixed $value): array
|
||||
{
|
||||
@@ -337,11 +377,21 @@ final class GovernanceConfig
|
||||
}
|
||||
|
||||
private function requiredValue(string $path): mixed
|
||||
{
|
||||
$value = $this->optionalValue($path);
|
||||
if ($value === null) {
|
||||
throw $this->missing($path);
|
||||
}
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
private function optionalValue(string $path): mixed
|
||||
{
|
||||
$value = $this->config;
|
||||
foreach (explode('.', $path) as $segment) {
|
||||
if (!is_array($value) || !array_key_exists($segment, $value)) {
|
||||
throw $this->missing($path);
|
||||
return null;
|
||||
}
|
||||
|
||||
$value = $value[$segment];
|
||||
|
||||
Reference in New Issue
Block a user