p35
This commit is contained in:
@@ -16,6 +16,12 @@ parameters:
|
|||||||
max_length_ratio_percent: 150
|
max_length_ratio_percent: 150
|
||||||
heartbeat_message: 'Ich optimiere die Anfrage…'
|
heartbeat_message: 'Ich optimiere die Anfrage…'
|
||||||
output_prefix_pattern: '/^(?:normalisiert|korrigiert|corrected|normalized)\s*:\s*/iu'
|
output_prefix_pattern: '/^(?:normalisiert|korrigiert|corrected|normalized)\s*:\s*/iu'
|
||||||
|
placeholder_outputs:
|
||||||
|
- normalized user input
|
||||||
|
- corrected user input
|
||||||
|
- user input
|
||||||
|
- normalisierte nutzereingabe
|
||||||
|
- korrigierte nutzereingabe
|
||||||
skip_patterns:
|
skip_patterns:
|
||||||
- '/https?:\/\//iu'
|
- '/https?:\/\//iu'
|
||||||
- '/\bwww\./iu'
|
- '/\bwww\./iu'
|
||||||
@@ -192,6 +198,15 @@ parameters:
|
|||||||
testomat_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
|
testomat_model_pattern: '/\bTestomat(?:®)?\s+(?:\d{3,4}(?:\s+[A-Z]{2,8})?|EVO(?:\s+[A-Z]{2,6})?|ECO(?:[-\s]?(?:PLUS|C))?|DUO(?:\s+\d{3,4})?|LAB(?:\s+[A-Z]{2,6})?)\b/iu'
|
||||||
hardness_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
|
hardness_value_pattern: '/\b\d+(?:[,.]\d+)?\s*°\s*dH\b/iu'
|
||||||
|
|
||||||
|
followup_actions:
|
||||||
|
commerce:
|
||||||
|
Im Shop suchen: 'Suche die aktuelle Produktauswahl im Shop.'
|
||||||
|
Nur Zubehör anzeigen: 'Zeige aus der aktuellen Produktauswahl nur Zubehör.'
|
||||||
|
Nur Geräte anzeigen: 'Zeige aus der aktuellen Produktauswahl nur Geräte.'
|
||||||
|
Preis anzeigen: 'Zeige mir die Preise der aktuell relevanten Produkte.'
|
||||||
|
knowledge:
|
||||||
|
Technische Details anzeigen: 'Zeige technische Details zur aktuellen Antwort.'
|
||||||
|
|
||||||
messages:
|
messages:
|
||||||
empty_prompt: '❌ Empty prompt.'
|
empty_prompt: '❌ Empty prompt.'
|
||||||
analyze_request: 'Ich analysiere deine Anfrage...'
|
analyze_request: 'Ich analysiere deine Anfrage...'
|
||||||
|
|||||||
@@ -69,6 +69,15 @@ parameters:
|
|||||||
- tc
|
- tc
|
||||||
- '0,02'
|
- '0,02'
|
||||||
|
|
||||||
|
normalization:
|
||||||
|
# Generic language normalization tables. Keep these in YAML so PHP code
|
||||||
|
# executes normalization logic without owning language-specific lists.
|
||||||
|
ascii_transliteration:
|
||||||
|
ä: ae
|
||||||
|
ö: oe
|
||||||
|
ü: ue
|
||||||
|
ß: ss
|
||||||
|
|
||||||
stopword_groups:
|
stopword_groups:
|
||||||
de_core:
|
de_core:
|
||||||
- der
|
- der
|
||||||
|
|||||||
@@ -46,6 +46,17 @@ parameters:
|
|||||||
- messbereich
|
- messbereich
|
||||||
testomat:
|
testomat:
|
||||||
- testomat
|
- testomat
|
||||||
|
exact_selection_token_variant_suffixes:
|
||||||
|
- typen
|
||||||
|
- innen
|
||||||
|
- enen
|
||||||
|
- ern
|
||||||
|
- en
|
||||||
|
- er
|
||||||
|
- es
|
||||||
|
- e
|
||||||
|
- s
|
||||||
|
- n
|
||||||
exact_selection_indicator_question_tokens:
|
exact_selection_indicator_question_tokens:
|
||||||
- indikator
|
- indikator
|
||||||
- indikatortyp
|
- indikatortyp
|
||||||
|
|||||||
@@ -985,12 +985,7 @@ final readonly class AgentRunner
|
|||||||
private function normalizeFuzzyRoutingToken(string $token): string
|
private function normalizeFuzzyRoutingToken(string $token): string
|
||||||
{
|
{
|
||||||
$token = mb_strtolower(trim($token), 'UTF-8');
|
$token = mb_strtolower(trim($token), 'UTF-8');
|
||||||
$token = strtr($token, [
|
$token = $this->languageCleanupConfig->transliterateToAscii($token);
|
||||||
'ä' => 'ae',
|
|
||||||
'ö' => 'oe',
|
|
||||||
'ü' => 'ue',
|
|
||||||
'ß' => 'ss',
|
|
||||||
]);
|
|
||||||
$token = preg_replace('/[^a-z0-9]+/u', '', $token) ?? $token;
|
$token = preg_replace('/[^a-z0-9]+/u', '', $token) ?? $token;
|
||||||
|
|
||||||
return trim($token);
|
return trim($token);
|
||||||
@@ -1028,13 +1023,13 @@ final readonly class AgentRunner
|
|||||||
{
|
{
|
||||||
$normalized = $this->normalizeRoutingComparisonText($candidate);
|
$normalized = $this->normalizeRoutingComparisonText($candidate);
|
||||||
|
|
||||||
return in_array($normalized, [
|
foreach ($this->agentRunnerConfig->getInputNormalizationPlaceholderOutputs() as $placeholderOutput) {
|
||||||
'normalized user input',
|
if ($normalized === $this->normalizeRoutingComparisonText($placeholderOutput)) {
|
||||||
'corrected user input',
|
return true;
|
||||||
'user input',
|
}
|
||||||
'normalisierte nutzereingabe',
|
}
|
||||||
'korrigierte nutzereingabe',
|
|
||||||
], true);
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function normalizeRoutingComparisonText(string $value): string
|
private function normalizeRoutingComparisonText(string $value): string
|
||||||
@@ -2857,12 +2852,7 @@ final readonly class AgentRunner
|
|||||||
$value = html_entity_decode(strip_tags($value), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
|
$value = html_entity_decode(strip_tags($value), ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
|
||||||
$value = mb_strtolower($value, 'UTF-8');
|
$value = mb_strtolower($value, 'UTF-8');
|
||||||
$value = str_replace(['‐', '‑', '‒', '–', '—'], '-', $value);
|
$value = str_replace(['‐', '‑', '‒', '–', '—'], '-', $value);
|
||||||
$value = strtr($value, [
|
$value = $this->languageCleanupConfig->transliterateToAscii($value);
|
||||||
'ä' => 'ae',
|
|
||||||
'ö' => 'oe',
|
|
||||||
'ü' => 'ue',
|
|
||||||
'ß' => 'ss',
|
|
||||||
]);
|
|
||||||
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
||||||
|
|
||||||
return trim($value);
|
return trim($value);
|
||||||
@@ -3241,14 +3231,15 @@ final readonly class AgentRunner
|
|||||||
$actions = [];
|
$actions = [];
|
||||||
|
|
||||||
if ($isCommerceIntent || $hasShopResults) {
|
if ($isCommerceIntent || $hasShopResults) {
|
||||||
$actions[] = ['Im Shop suchen', 'Suche die aktuelle Produktauswahl im Shop.'];
|
foreach ($this->agentRunnerConfig->getCommerceFollowUpActions() as $label => $actionPrompt) {
|
||||||
$actions[] = ['Nur Zubehör anzeigen', 'Zeige aus der aktuellen Produktauswahl nur Zubehör.'];
|
$actions[] = [$label, $actionPrompt];
|
||||||
$actions[] = ['Nur Geräte anzeigen', 'Zeige aus der aktuellen Produktauswahl nur Geräte.'];
|
}
|
||||||
$actions[] = ['Preis anzeigen', 'Zeige mir die Preise der aktuell relevanten Produkte.'];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($hasKnowledge || $hasShopResults) {
|
if ($hasKnowledge || $hasShopResults) {
|
||||||
$actions[] = ['Technische Details anzeigen', 'Zeige technische Details zur aktuellen Antwort.'];
|
foreach ($this->agentRunnerConfig->getKnowledgeFollowUpActions() as $label => $actionPrompt) {
|
||||||
|
$actions[] = [$label, $actionPrompt];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($actions === []) {
|
if ($actions === []) {
|
||||||
|
|||||||
@@ -260,6 +260,26 @@ final class AgentRunnerConfig
|
|||||||
return $this->getRequiredStringList('input_normalization.fuzzy_routing.terms');
|
return $this->getRequiredStringList('input_normalization.fuzzy_routing.terms');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getInputNormalizationPlaceholderOutputs(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringList('input_normalization.placeholder_outputs');
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return array<string, string> */
|
||||||
|
public function getCommerceFollowUpActions(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringMap('followup_actions.commerce');
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return array<string, string> */
|
||||||
|
public function getKnowledgeFollowUpActions(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringMap('followup_actions.knowledge');
|
||||||
|
}
|
||||||
|
|
||||||
private function getRequiredInt(string $key): int
|
private function getRequiredInt(string $key): int
|
||||||
{
|
{
|
||||||
$value = $this->requiredValue($key);
|
$value = $this->requiredValue($key);
|
||||||
@@ -384,6 +404,39 @@ final class AgentRunnerConfig
|
|||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array<string, string>
|
||||||
|
*/
|
||||||
|
private function getRequiredStringMap(string $key): array
|
||||||
|
{
|
||||||
|
$value = $this->requiredValue($key);
|
||||||
|
|
||||||
|
if (!is_array($value)) {
|
||||||
|
throw new \InvalidArgumentException(sprintf('RetrieX agent config key "%s" must be a string map.', $key));
|
||||||
|
}
|
||||||
|
|
||||||
|
$out = [];
|
||||||
|
|
||||||
|
foreach ($value as $mapKey => $mapValue) {
|
||||||
|
if (!is_scalar($mapKey) || !is_scalar($mapValue)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$mapKey = trim((string) $mapKey);
|
||||||
|
$mapValue = trim((string) $mapValue);
|
||||||
|
|
||||||
|
if ($mapKey !== '' && $mapValue !== '') {
|
||||||
|
$out[$mapKey] = $mapValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($out === []) {
|
||||||
|
throw new \InvalidArgumentException(sprintf('RetrieX agent config key "%s" must contain at least one valid entry.', $key));
|
||||||
|
}
|
||||||
|
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return array<string, string>
|
* @return array<string, string>
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -44,6 +44,27 @@ final class LanguageCleanupConfig
|
|||||||
return in_array($term, $this->getProtectedTerms(), true);
|
return in_array($term, $this->getProtectedTerms(), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return array<string, string> */
|
||||||
|
public function getAsciiTransliterationMap(): array
|
||||||
|
{
|
||||||
|
$normalization = $this->requiredMap('normalization');
|
||||||
|
if (!array_key_exists('ascii_transliteration', $normalization)) {
|
||||||
|
throw $this->invalid('normalization.ascii_transliteration', 'is missing');
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->stringMapFromValue($normalization['ascii_transliteration'], 'normalization.ascii_transliteration', true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function transliterateToAscii(string $value): string
|
||||||
|
{
|
||||||
|
$map = $this->getAsciiTransliterationMap();
|
||||||
|
if ($map === []) {
|
||||||
|
return $value;
|
||||||
|
}
|
||||||
|
|
||||||
|
return strtr($value, $map);
|
||||||
|
}
|
||||||
|
|
||||||
/** @return string[] */
|
/** @return string[] */
|
||||||
public function getCleanupProfileNames(): array
|
public function getCleanupProfileNames(): array
|
||||||
{
|
{
|
||||||
@@ -235,6 +256,35 @@ final class LanguageCleanupConfig
|
|||||||
return $out;
|
return $out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return array<string, string> */
|
||||||
|
private function stringMapFromValue(mixed $value, string $path, bool $required): array
|
||||||
|
{
|
||||||
|
if (!is_array($value)) {
|
||||||
|
throw $this->invalid($path, 'must be a map of non-empty strings');
|
||||||
|
}
|
||||||
|
|
||||||
|
$out = [];
|
||||||
|
foreach ($value as $key => $item) {
|
||||||
|
if (!is_scalar($key) || !is_scalar($item)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$key = trim((string) $key);
|
||||||
|
$item = trim((string) $item);
|
||||||
|
if ($key === '' || $item === '') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$out[$key] = $item;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($required && $out === []) {
|
||||||
|
throw $this->invalid($path, 'must contain at least one non-empty map entry');
|
||||||
|
}
|
||||||
|
|
||||||
|
return $out;
|
||||||
|
}
|
||||||
|
|
||||||
/** @param string[] $terms */
|
/** @param string[] $terms */
|
||||||
private function removeProtectedTerms(array $terms): array
|
private function removeProtectedTerms(array $terms): array
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -148,6 +148,12 @@ final class NdjsonHybridRetrieverConfig
|
|||||||
return $this->requiredStringListMap('exact_selection_token_variant_prefixes');
|
return $this->requiredStringListMap('exact_selection_token_variant_prefixes');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return string[] */
|
||||||
|
public function exactSelectionTokenVariantSuffixes(): array
|
||||||
|
{
|
||||||
|
return $this->requiredStringList('exact_selection_token_variant_suffixes');
|
||||||
|
}
|
||||||
|
|
||||||
/** @return string[] */
|
/** @return string[] */
|
||||||
public function exactSelectionIndicatorQuestionTokens(): array
|
public function exactSelectionIndicatorQuestionTokens(): array
|
||||||
{
|
{
|
||||||
@@ -313,6 +319,7 @@ final class NdjsonHybridRetrieverConfig
|
|||||||
'focused_product_max_chunks' => $this->focusedProductMaxChunks(),
|
'focused_product_max_chunks' => $this->focusedProductMaxChunks(),
|
||||||
'catalog_list_shortcut_patterns' => $this->catalogListShortcutPatterns(),
|
'catalog_list_shortcut_patterns' => $this->catalogListShortcutPatterns(),
|
||||||
'exact_selection_token_variant_prefixes' => $this->exactSelectionTokenVariantPrefixes(),
|
'exact_selection_token_variant_prefixes' => $this->exactSelectionTokenVariantPrefixes(),
|
||||||
|
'exact_selection_token_variant_suffixes' => $this->exactSelectionTokenVariantSuffixes(),
|
||||||
'exact_selection_indicator_question_tokens' => $this->exactSelectionIndicatorQuestionTokens(),
|
'exact_selection_indicator_question_tokens' => $this->exactSelectionIndicatorQuestionTokens(),
|
||||||
'exact_selection_indicator_question_phrases' => $this->exactSelectionIndicatorQuestionPhrases(),
|
'exact_selection_indicator_question_phrases' => $this->exactSelectionIndicatorQuestionPhrases(),
|
||||||
'exact_selection_indicator_table_heading_patterns' => $this->exactSelectionIndicatorTableHeadingPatterns(),
|
'exact_selection_indicator_table_heading_patterns' => $this->exactSelectionIndicatorTableHeadingPatterns(),
|
||||||
|
|||||||
@@ -583,6 +583,7 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
'max_length_ratio_percent' => $this->agentRunnerConfig->getInputNormalizationMaxLengthRatioPercent(),
|
'max_length_ratio_percent' => $this->agentRunnerConfig->getInputNormalizationMaxLengthRatioPercent(),
|
||||||
'heartbeat_message' => $this->agentRunnerConfig->getInputNormalizationHeartbeatMessage(),
|
'heartbeat_message' => $this->agentRunnerConfig->getInputNormalizationHeartbeatMessage(),
|
||||||
'output_prefix_pattern' => $this->agentRunnerConfig->getInputNormalizationOutputPrefixPattern(),
|
'output_prefix_pattern' => $this->agentRunnerConfig->getInputNormalizationOutputPrefixPattern(),
|
||||||
|
'placeholder_outputs' => $this->agentRunnerConfig->getInputNormalizationPlaceholderOutputs(),
|
||||||
'skip_patterns' => $this->agentRunnerConfig->getInputNormalizationSkipPatterns(),
|
'skip_patterns' => $this->agentRunnerConfig->getInputNormalizationSkipPatterns(),
|
||||||
'prompt' => [
|
'prompt' => [
|
||||||
'intro' => $this->agentRunnerConfig->getInputNormalizationIntro(),
|
'intro' => $this->agentRunnerConfig->getInputNormalizationIntro(),
|
||||||
@@ -602,6 +603,10 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
'terms' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingTerms(),
|
'terms' => $this->agentRunnerConfig->getInputNormalizationFuzzyRoutingTerms(),
|
||||||
],
|
],
|
||||||
],
|
],
|
||||||
|
'followup_actions' => [
|
||||||
|
'commerce' => $this->agentRunnerConfig->getCommerceFollowUpActions(),
|
||||||
|
'knowledge' => $this->agentRunnerConfig->getKnowledgeFollowUpActions(),
|
||||||
|
],
|
||||||
'messages' => [
|
'messages' => [
|
||||||
'empty_prompt' => $this->agentRunnerConfig->getEmptyPromptMessage(),
|
'empty_prompt' => $this->agentRunnerConfig->getEmptyPromptMessage(),
|
||||||
'analyze_request' => $this->agentRunnerConfig->getAnalyzeRequestMessage(),
|
'analyze_request' => $this->agentRunnerConfig->getAnalyzeRequestMessage(),
|
||||||
@@ -929,6 +934,9 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
return [
|
return [
|
||||||
'stopwords' => $this->stopWordsConfig->getStopWords(),
|
'stopwords' => $this->stopWordsConfig->getStopWords(),
|
||||||
'protected_terms' => $this->languageCleanupConfig->getProtectedTerms(),
|
'protected_terms' => $this->languageCleanupConfig->getProtectedTerms(),
|
||||||
|
'normalization' => [
|
||||||
|
'ascii_transliteration' => $this->languageCleanupConfig->getAsciiTransliterationMap(),
|
||||||
|
],
|
||||||
'cleanup_profile_names' => $this->languageCleanupConfig->getCleanupProfileNames(),
|
'cleanup_profile_names' => $this->languageCleanupConfig->getCleanupProfileNames(),
|
||||||
'cleanup_profiles' => $profiles,
|
'cleanup_profiles' => $profiles,
|
||||||
];
|
];
|
||||||
@@ -1200,6 +1208,7 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
private function validateAgent(array $agent, array &$errors, array &$warnings): void
|
private function validateAgent(array $agent, array &$errors, array &$warnings): void
|
||||||
{
|
{
|
||||||
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
|
$this->validateStringListMap($agent['messages'] ?? [], 'agent.messages', $errors, $warnings);
|
||||||
|
$this->validateStringListMap($agent['followup_actions'] ?? [], 'agent.followup_actions', $errors, $warnings);
|
||||||
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
|
$this->validateStringListMap($agent['source_labels'] ?? [], 'agent.source_labels', $errors, $warnings);
|
||||||
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
|
$this->validateStringListMap($agent['html_templates'] ?? [], 'agent.html_templates', $errors, $warnings);
|
||||||
|
|
||||||
@@ -1217,6 +1226,9 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
$errors[] = 'agent.follow_up_context.commercial_table_follow_up.query_template_without_model must not be empty.';
|
$errors[] = 'agent.follow_up_context.commercial_table_follow_up.query_template_without_model must not be empty.';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$inputNormalization = is_array($agent['input_normalization'] ?? null) ? $agent['input_normalization'] : [];
|
||||||
|
$this->validateStringList($this->toList($inputNormalization['placeholder_outputs'] ?? []), 'agent.input_normalization.placeholder_outputs', $errors, $warnings);
|
||||||
|
|
||||||
$ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : [];
|
$ragEvidence = is_array($agent['rag_evidence_guard'] ?? null) ? $agent['rag_evidence_guard'] : [];
|
||||||
$ragEvidenceCleanupProfile = $ragEvidence['cleanup_profile'] ?? null;
|
$ragEvidenceCleanupProfile = $ragEvidence['cleanup_profile'] ?? null;
|
||||||
if (!is_string($ragEvidenceCleanupProfile) || trim($ragEvidenceCleanupProfile) === '') {
|
if (!is_string($ragEvidenceCleanupProfile) || trim($ragEvidenceCleanupProfile) === '') {
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ declare(strict_types=1);
|
|||||||
namespace App\Intent;
|
namespace App\Intent;
|
||||||
|
|
||||||
use App\Config\IntentLightConfig;
|
use App\Config\IntentLightConfig;
|
||||||
|
use App\Config\LanguageCleanupConfig;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IntentLite
|
* IntentLite
|
||||||
@@ -20,10 +21,9 @@ final readonly class IntentLite
|
|||||||
{
|
{
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private IntentLightConfig $config
|
private IntentLightConfig $config,
|
||||||
)
|
private LanguageCleanupConfig $languageCleanupConfig
|
||||||
{
|
) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function detectList(string $originalPrompt): array
|
public function detectList(string $originalPrompt): array
|
||||||
@@ -99,16 +99,9 @@ final readonly class IntentLite
|
|||||||
{
|
{
|
||||||
$s = mb_strtolower($s);
|
$s = mb_strtolower($s);
|
||||||
|
|
||||||
// Umlaute zusätzlich absichern (falls QueryCleaner das tut)
|
// Keep the language-specific transliteration table in YAML.
|
||||||
$replacements = [
|
// Only append an ASCII variant; do not replace the original form.
|
||||||
'ä' => 'ae',
|
foreach ($this->languageCleanupConfig->getAsciiTransliterationMap() as $umlaut => $alt) {
|
||||||
'ö' => 'oe',
|
|
||||||
'ü' => 'ue',
|
|
||||||
'ß' => 'ss',
|
|
||||||
];
|
|
||||||
|
|
||||||
// Nur als Zusatzform speichern (nicht ersetzen!)
|
|
||||||
foreach ($replacements as $umlaut => $alt) {
|
|
||||||
if (str_contains($s, $umlaut)) {
|
if (str_contains($s, $umlaut)) {
|
||||||
$s .= ' ' . str_replace($umlaut, $alt, $s);
|
$s .= ' ' . str_replace($umlaut, $alt, $s);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ declare(strict_types=1);
|
|||||||
|
|
||||||
namespace App\Intent;
|
namespace App\Intent;
|
||||||
|
|
||||||
|
use App\Config\LanguageCleanupConfig;
|
||||||
use App\Config\SalesIntentConfig;
|
use App\Config\SalesIntentConfig;
|
||||||
|
|
||||||
final class SalesIntentLite
|
final class SalesIntentLite
|
||||||
@@ -16,7 +17,8 @@ final class SalesIntentLite
|
|||||||
public const ROI = 'roi';
|
public const ROI = 'roi';
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private readonly SalesIntentConfig $config
|
private readonly SalesIntentConfig $config,
|
||||||
|
private readonly LanguageCleanupConfig $languageCleanupConfig
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -123,11 +125,6 @@ final class SalesIntentLite
|
|||||||
{
|
{
|
||||||
$s = mb_strtolower($s);
|
$s = mb_strtolower($s);
|
||||||
|
|
||||||
return strtr($s, [
|
return $this->languageCleanupConfig->transliterateToAscii($s);
|
||||||
'ä' => 'ae',
|
|
||||||
'ö' => 'oe',
|
|
||||||
'ü' => 'ue',
|
|
||||||
'ß' => 'ss',
|
|
||||||
]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -4,12 +4,14 @@ declare(strict_types=1);
|
|||||||
|
|
||||||
namespace App\Knowledge\Retrieval;
|
namespace App\Knowledge\Retrieval;
|
||||||
|
|
||||||
|
use App\Config\NdjsonHybridRetrieverConfig;
|
||||||
use App\Knowledge\ChunkManager;
|
use App\Knowledge\ChunkManager;
|
||||||
|
|
||||||
final readonly class NdjsonChunkLookup
|
final readonly class NdjsonChunkLookup
|
||||||
{
|
{
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private ChunkManager $chunkManager
|
private ChunkManager $chunkManager,
|
||||||
|
private NdjsonHybridRetrieverConfig $retrieverConfig
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -395,7 +397,7 @@ final readonly class NdjsonChunkLookup
|
|||||||
$length = mb_strlen($token, 'UTF-8');
|
$length = mb_strlen($token, 'UTF-8');
|
||||||
|
|
||||||
if ($length >= 5) {
|
if ($length >= 5) {
|
||||||
foreach (['innen', 'enen', 'ern', 'en', 'er', 'es', 'e', 's', 'n'] as $suffix) {
|
foreach ($this->retrieverConfig->exactSelectionTokenVariantSuffixes() as $suffix) {
|
||||||
if (!str_ends_with($token, $suffix)) {
|
if (!str_ends_with($token, $suffix)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -843,7 +843,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
|||||||
$length = mb_strlen($token, 'UTF-8');
|
$length = mb_strlen($token, 'UTF-8');
|
||||||
|
|
||||||
if ($length >= 5) {
|
if ($length >= 5) {
|
||||||
foreach (['typen', 'innen', 'enen', 'ern', 'en', 'er', 'es', 'e', 's', 'n'] as $suffix) {
|
foreach ($this->retrieverConfig->exactSelectionTokenVariantSuffixes() as $suffix) {
|
||||||
if (!str_ends_with($token, $suffix)) {
|
if (!str_ends_with($token, $suffix)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,19 +2,20 @@
|
|||||||
|
|
||||||
namespace App\Service;
|
namespace App\Service;
|
||||||
|
|
||||||
|
use App\Config\LanguageCleanupConfig;
|
||||||
|
|
||||||
class FormatText
|
class FormatText
|
||||||
{
|
{
|
||||||
|
public function __construct(private readonly LanguageCleanupConfig $languageCleanupConfig)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
function slugify(string $text): string
|
function slugify(string $text): string
|
||||||
{
|
{
|
||||||
$text = mb_strtolower($text, 'UTF-8');
|
$text = mb_strtolower($text, 'UTF-8');
|
||||||
|
|
||||||
// Umlaute ersetzen
|
// Use YAML-backed language normalization instead of a PHP-owned list.
|
||||||
$replacements = [
|
$replacements = $this->languageCleanupConfig->getAsciiTransliterationMap();
|
||||||
'ä' => 'ae',
|
|
||||||
'ö' => 'oe',
|
|
||||||
'ü' => 'ue',
|
|
||||||
'ß' => 'ss'
|
|
||||||
];
|
|
||||||
$text = str_replace(array_keys($replacements), $replacements, $text);
|
$text = str_replace(array_keys($replacements), $replacements, $text);
|
||||||
|
|
||||||
// Nicht erlaubte Zeichen entfernen
|
// Nicht erlaubte Zeichen entfernen
|
||||||
|
|||||||
Reference in New Issue
Block a user