central config part 1

This commit is contained in:
team2
2026-04-25 23:39:41 +02:00
parent 2797834a5f
commit f42022e5f7
11 changed files with 1197 additions and 476 deletions

View File

@@ -6,24 +6,13 @@ namespace App\Config;
final class CommerceQueryParserConfig
{
/**
* @return string[]
*/
public function getKnownBrands(): array
{
return [
private const KNOWN_BRANDS = [
'heyl',
'horiba',
'neomeris',
];
}
/**
* @return string[]
*/
public function getPhrasesToRemove(): array
{
return [
private const PHRASES_TO_REMOVE = [
'ich suche',
'suche',
'habt ihr',
@@ -51,24 +40,8 @@ final class CommerceQueryParserConfig
'verfügbarkeit',
'verfuegbarkeit',
];
}
public function getHistoryContextPattern(): string
{
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
}
public function getHistoryContextValuePattern(): string
{
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
}
/**
* @return string[]
*/
public function getFilterSearchTokens(): array
{
return [
private const FILTER_SEARCH_TOKENS = [
'auch',
'noch',
'nochmal',
@@ -142,14 +115,8 @@ final class CommerceQueryParserConfig
'passen',
'passend',
];
}
/**
* @return array<string, string>
*/
public function getSearchTokenCorrections(): array
{
return [
private const SEARCH_TOKEN_CORRECTIONS = [
'siene' => 'seine',
'sienen' => 'seinen',
'siener' => 'seiner',
@@ -157,14 +124,8 @@ final class CommerceQueryParserConfig
'sienes' => 'seines',
'indicatoren' => 'indikatoren',
];
}
/**
* @return array<string, string>
*/
public function getSearchTokenCanonicalMap(): array
{
return [
private const SEARCH_TOKEN_CANONICAL_MAP = [
'indikatoren' => 'indikator',
'indicators' => 'indikator',
'indicator' => 'indikator',
@@ -173,6 +134,113 @@ final class CommerceQueryParserConfig
'reagent' => 'reagenz',
'produkte' => 'produkt',
];
private const SEMANTIC_SHOP_SEARCH_TOKENS = [
'indikator',
'indicator',
'reagenz',
'reagent',
'zubehör',
'zubehor',
'ersatzteil',
'verbrauchsmaterial',
'chemie',
'indikatorchemie',
'reagenzchemie',
'kit',
'set',
'filter',
'pumpe',
'pumpenkopf',
'motorblock',
'lösung',
'loesung',
'solution',
'teststreifen',
'gerät',
'geraet',
'messgerät',
'messgeraet',
'analysegerät',
'analysegeraet',
'analysator',
'monitor',
'controller',
'system',
];
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
/**
* @return string[]
*/
public function getKnownBrands(): array
{
return $this->stringList(
'known_brands',
$this->vocabularyView('commerce_query.known_brands', self::KNOWN_BRANDS)
);
}
/**
* @return string[]
*/
public function getPhrasesToRemove(): array
{
return $this->stringList(
'phrases_to_remove',
$this->vocabularyView('commerce_query.phrases_to_remove', self::PHRASES_TO_REMOVE)
);
}
public function getHistoryContextPattern(): string
{
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
}
public function getHistoryContextValuePattern(): string
{
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
}
/**
* @return string[]
*/
public function getFilterSearchTokens(): array
{
return $this->stringList(
'filter_search_tokens',
$this->vocabularyView('commerce_query.filter_search_tokens', self::FILTER_SEARCH_TOKENS)
);
}
/**
* @return array<string, string>
*/
public function getSearchTokenCorrections(): array
{
return $this->stringMap(
'search_token_corrections',
$this->vocabularyStringMap('commerce_query.search_token_corrections', self::SEARCH_TOKEN_CORRECTIONS)
);
}
/**
* @return array<string, string>
*/
public function getSearchTokenCanonicalMap(): array
{
return $this->stringMap(
'search_token_canonical_map',
$this->vocabularyStringMap('commerce_query.search_token_canonical', self::SEARCH_TOKEN_CANONICAL_MAP)
);
}
/**
@@ -335,39 +403,86 @@ final class CommerceQueryParserConfig
*/
public function getSemanticShopSearchTokens(): array
{
return [
'indikator',
'indicator',
'reagenz',
'reagent',
'zubehör',
'zubehor',
'ersatzteil',
'verbrauchsmaterial',
'chemie',
'indikatorchemie',
'reagenzchemie',
'kit',
'set',
'filter',
'pumpe',
'pumpenkopf',
'motorblock',
'lösung',
'loesung',
'solution',
'teststreifen',
'gerät',
'geraet',
'messgerät',
'messgeraet',
'analysegerät',
'analysegeraet',
'analysator',
'monitor',
'controller',
'system',
];
return $this->stringList(
'semantic_shop_search_tokens',
$this->vocabularyView('commerce_query.semantic_shop_search_tokens', self::SEMANTIC_SHOP_SEARCH_TOKENS)
);
}
/** @return string[] */
private function vocabularyView(string $path, array $fallback): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
/** @return array<string, string> */
private function vocabularyStringMap(string $path, array $fallback): array
{
return $this->vocabulary?->stringMap($path, $fallback) ?? $fallback;
}
/** @return string[] */
private function stringList(string $path, array $default): array
{
$value = $this->value($path, $default);
if (!is_array($value)) {
return $default;
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
return $out !== [] ? $out : $default;
}
/** @return array<string, string> */
private function stringMap(string $path, array $default): array
{
$value = $this->value($path, $default);
if (!is_array($value)) {
return $default;
}
$out = [];
foreach ($value as $key => $item) {
if (!is_scalar($key) || !is_scalar($item)) {
continue;
}
$cleanKey = trim((string) $key);
$cleanValue = trim((string) $item);
if ($cleanKey !== '' && $cleanValue !== '') {
$out[$cleanKey] = $cleanValue;
}
}
return $out !== [] ? $out : $default;
}
private function value(string $path, mixed $default): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return $default;
}
$current = $current[$segment];
}
return $current;
}
public function buildExactTokenRemovalPattern(string $token): string

View File

@@ -0,0 +1,196 @@
<?php
declare(strict_types=1);
namespace App\Config;
final class DomainVocabularyConfig
{
public function __construct(private readonly array $config = [])
{
}
/** @return string[] */
public function view(string $path, array $fallback = []): array
{
$definition = $this->value('views.' . $path, null);
if (!is_array($definition)) {
return $this->uniqueStringList($fallback);
}
$terms = [];
foreach ($this->stringListFromValue($definition['include'] ?? []) as $className) {
foreach ($this->domainClass($className) as $term) {
$terms[] = $term;
}
}
foreach ($this->stringListFromValue($definition['add'] ?? []) as $term) {
$terms[] = $term;
}
$terms = $this->uniqueStringList($terms);
return $terms !== [] ? $terms : $this->uniqueStringList($fallback);
}
/** @return string[] */
public function domainClass(string $name): array
{
return $this->stringList('classes.' . $name, []);
}
/** @return array<string, string[]> */
public function map(string $path, array $fallback = []): array
{
$value = $this->value('maps.' . $path, null);
if (!is_array($value)) {
return $this->uniqueStringListMap($fallback);
}
$out = [];
foreach ($value as $key => $items) {
if (!is_scalar($key)) {
continue;
}
$cleanKey = trim((string) $key);
$cleanItems = $this->stringListFromValue($items);
if ($cleanKey !== '' && $cleanItems !== []) {
$out[$cleanKey] = $cleanItems;
}
}
return $out !== [] ? $out : $this->uniqueStringListMap($fallback);
}
/** @return array<string, string> */
public function stringMap(string $path, array $fallback = []): array
{
$value = $this->value('maps.' . $path, null);
if (!is_array($value)) {
return $this->uniqueStringMap($fallback);
}
$out = [];
foreach ($value as $key => $mappedValue) {
if (!is_scalar($key)) {
continue;
}
$cleanKey = trim((string) $key);
if ($cleanKey === '') {
continue;
}
if (is_array($mappedValue)) {
$items = $this->stringListFromValue($mappedValue);
$mappedValue = $items[0] ?? '';
}
if (!is_scalar($mappedValue)) {
continue;
}
$cleanValue = trim((string) $mappedValue);
if ($cleanValue !== '') {
$out[$cleanKey] = $cleanValue;
}
}
return $out !== [] ? $out : $this->uniqueStringMap($fallback);
}
/** @return array<string, mixed> */
public function toArray(): array
{
return $this->config;
}
/** @return string[] */
private function stringList(string $path, array $fallback): array
{
$value = $this->value($path, null);
$items = $this->stringListFromValue($value);
return $items !== [] ? $items : $this->uniqueStringList($fallback);
}
/** @return string[] */
private function stringListFromValue(mixed $value): array
{
if (!is_array($value)) {
return [];
}
return $this->uniqueStringList($value);
}
/** @return string[] */
private function uniqueStringList(array $items): array
{
$out = [];
foreach ($items as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
return $out;
}
/** @return array<string, string[]> */
private function uniqueStringListMap(array $map): array
{
$out = [];
foreach ($map as $key => $items) {
if (!is_scalar($key)) {
continue;
}
$cleanKey = trim((string) $key);
$cleanItems = $this->uniqueStringList(is_array($items) ? $items : []);
if ($cleanKey !== '' && $cleanItems !== []) {
$out[$cleanKey] = $cleanItems;
}
}
return $out;
}
/** @return array<string, string> */
private function uniqueStringMap(array $map): array
{
$out = [];
foreach ($map as $key => $value) {
if (!is_scalar($key) || !is_scalar($value)) {
continue;
}
$cleanKey = trim((string) $key);
$cleanValue = trim((string) $value);
if ($cleanKey !== '' && $cleanValue !== '') {
$out[$cleanKey] = $cleanValue;
}
}
return $out;
}
private function value(string $path, mixed $fallback): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return $fallback;
}
$current = $current[$segment];
}
return $current;
}
}

View File

@@ -176,11 +176,10 @@ final class NdjsonHybridRetrieverConfig
/**
* @param array<string, mixed> $config
* @param array<string, mixed> $vocabulary Kept for backwards-compatible service wiring.
*/
public function __construct(
private array $config = [],
private array $vocabulary = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
@@ -307,55 +306,55 @@ final class NdjsonHybridRetrieverConfig
/** @return string[] */
public function genericProductTokens(): array
{
return $this->stringList('generic_product_tokens', self::GENERIC_PRODUCT_TOKEN);
return $this->stringList('generic_product_tokens', $this->vocabularyView('retrieval.generic_product_tokens', self::GENERIC_PRODUCT_TOKEN));
}
/** @return string[] */
public function importantShortModelTokens(): array
{
return $this->stringList('important_short_model_tokens', self::IMPORTANT_SHORT_MODEL_TOKEN);
return $this->stringList('important_short_model_tokens', $this->vocabularyView('retrieval.important_short_model_tokens', self::IMPORTANT_SHORT_MODEL_TOKEN));
}
/** @return string[] */
public function familyDescriptorTokens(): array
{
return $this->stringList('family_descriptor_tokens', self::FAMILY_DESCRIPTOR_TOKEN);
return $this->stringList('family_descriptor_tokens', $this->vocabularyView('retrieval.family_descriptor_tokens', self::FAMILY_DESCRIPTOR_TOKEN));
}
/** @return string[] */
public function looksLikeReagentTokens(): array
{
return $this->stringList('looks_like_reagent_tokens', self::LOOKS_LIKE_REAGENT_TOKENS);
return $this->stringList('looks_like_reagent_tokens', $this->vocabularyView('retrieval.looks_like_reagent_tokens', self::LOOKS_LIKE_REAGENT_TOKENS));
}
/** @return string[] */
public function looksLikeSafetyDocs(): array
{
return $this->stringList('looks_like_safety_docs', self::LOOKS_LIKE_SAFETY_DOCS);
return $this->stringList('looks_like_safety_docs', $this->vocabularyView('retrieval.looks_like_safety_docs', self::LOOKS_LIKE_SAFETY_DOCS));
}
/** @return string[] */
public function looksLikeReagentWords(): array
{
return $this->stringList('looks_like_reagent_words', self::LOOKS_LIKE_REAGENT_WORDS);
return $this->stringList('looks_like_reagent_words', $this->vocabularyView('retrieval.looks_like_reagent_words', self::LOOKS_LIKE_REAGENT_WORDS));
}
/** @return string[] */
public function looksLikeDocumentWords(): array
{
return $this->stringList('looks_like_document_words', self::LOOKS_LIKE_DOCUMENT_WORDS);
return $this->stringList('looks_like_document_words', $this->vocabularyView('retrieval.looks_like_document_words', self::LOOKS_LIKE_DOCUMENT_WORDS));
}
/** @return string[] */
public function looksLikeSafetyWords(): array
{
return $this->stringList('looks_like_safety_words', self::LOOKS_LIKE_SAFETY_WORDS);
return $this->stringList('looks_like_safety_words', $this->vocabularyView('retrieval.looks_like_safety_words', self::LOOKS_LIKE_SAFETY_WORDS));
}
/** @return string[] */
public function looksLikeDeviceWords(): array
{
return $this->stringList('looks_like_device_words', self::LOOKS_LIKE_DEVICE_WORDS);
return $this->stringList('looks_like_device_words', $this->vocabularyView('retrieval.looks_like_device_words', self::LOOKS_LIKE_DEVICE_WORDS));
}
/**
* Effective retrieval vocabulary as a dedicated structure for diagnostics and config dumps.
@@ -459,6 +458,12 @@ final class NdjsonHybridRetrieverConfig
* @param string[] $default
* @return string[]
*/
/** @return string[] */
private function vocabularyView(string $path, array $fallback): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
private function stringList(string $key, array $default): array
{
$value = $this->raw($key, $default);
@@ -492,10 +497,6 @@ final class NdjsonHybridRetrieverConfig
return $this->config[$key];
}
if (array_key_exists($key, $this->vocabulary)) {
return $this->vocabulary[$key];
}
return $default;
}
}

View File

@@ -6,11 +6,74 @@ namespace App\Config;
final class PromptBuilderConfig
{
private const TECHNICAL_PRODUCT_KEYWORDS = [
'technisch',
'technical',
'produkt',
'product',
'gerät',
'device',
'modell',
'model',
'messprinzip',
'measurement principle',
'schnittstelle',
'interface',
'relais',
'relay',
'indikator',
'indicator',
'grenzwert',
'threshold',
'messbereich',
'measurement range',
'minimaler',
'minimum',
'resthärte',
'resthaerte',
'°dh',
'dh',
'spannung',
'voltage',
'strom',
'current',
'druck',
'pressure',
'temperatur',
'temperature',
'schutzart',
'ip',
'fehlercode',
'error code',
'wasserhärte',
'hardness',
'testomat',
'chlor',
'chlormessung',
];
private const ACCESSORY_REQUEST_KEYWORDS = [
'passend',
'passende',
'passendes',
'zubehör',
'zubehor',
'dazu',
'indikator',
'reagenz',
'kit',
'set',
'zusatz',
'ergänzung',
'ergaenzung',
];
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
@@ -88,6 +151,42 @@ final class PromptBuilderConfig
return is_numeric($value) ? (float) $value : $default;
}
/**
* @return string[]
*/
private function getStringList(string $path, array $default): array
{
$value = $this->getValue($path, $default);
if (!is_array($value)) {
return $default;
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
return $out !== [] ? $out : $default;
}
/**
* @return string[]
*/
private function vocabularyView(string $path, array $fallback): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
private function getValue(string $path, mixed $default): mixed
{
$current = $this->config;
@@ -445,51 +544,10 @@ final class PromptBuilderConfig
*/
public function getTechnicalProductKeywords(): array
{
return [
'technisch',
'technical',
'produkt',
'product',
'gerät',
'device',
'modell',
'model',
'messprinzip',
'measurement principle',
'schnittstelle',
'interface',
'relais',
'relay',
'indikator',
'indicator',
'grenzwert',
'threshold',
'messbereich',
'measurement range',
'minimaler',
'minimum',
'resthärte',
'resthaerte',
'°dh',
'dh',
'spannung',
'voltage',
'strom',
'current',
'druck',
'pressure',
'temperatur',
'temperature',
'schutzart',
'ip',
'fehlercode',
'error code',
'wasserhärte',
'hardness',
'testomat',
'chlor',
'chlormessung',
];
return $this->getStringList(
'technical_product_keywords',
$this->vocabularyView('prompt.technical_product_keywords', self::TECHNICAL_PRODUCT_KEYWORDS)
);
}
/**
@@ -497,21 +555,10 @@ final class PromptBuilderConfig
*/
public function getAccessoryRequestKeywords(): array
{
return [
'passend',
'passende',
'passendes',
'zubehör',
'zubehor',
'dazu',
'indikator',
'reagenz',
'kit',
'set',
'zusatz',
'ergänzung',
'ergaenzung',
];
return $this->getStringList(
'accessory_request_keywords',
$this->vocabularyView('prompt.accessory_request_keywords', self::ACCESSORY_REQUEST_KEYWORDS)
);
}
public function getTechnicalProductModelPattern(): string

View File

@@ -68,8 +68,10 @@ final class ShopServiceConfig
/**
* @param array<string, mixed> $config
*/
public function __construct(private array $config = [])
{
public function __construct(
private array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
public function getTopProductLogLimit(): int
@@ -80,43 +82,43 @@ final class ShopServiceConfig
/** @return string[] */
public function getDeviceFocusKeywords(): array
{
return $this->stringList('device_focus_keywords', self::DEVICE_FOCUS_KEYWORDS);
return $this->stringList('device_focus_keywords', $this->vocabularyView('shop.device_focus', self::DEVICE_FOCUS_KEYWORDS));
}
/** @return string[] */
public function getAccessoryFocusKeywords(): array
{
return $this->stringList('accessory_focus_keywords', self::ACCESSORY_FOCUS_KEYWORDS);
return $this->stringList('accessory_focus_keywords', $this->vocabularyView('shop.accessory_focus', self::ACCESSORY_FOCUS_KEYWORDS));
}
/** @return array<string, string[]> */
public function getAccessoryFocusVariantMap(): array
{
return $this->stringListMap('accessory_focus_variant_map', self::ACCESSORY_FOCUS_VARIANT_MAP);
return $this->stringListMap('accessory_focus_variant_map', $this->vocabularyMap('shop.accessory_focus_variants', self::ACCESSORY_FOCUS_VARIANT_MAP));
}
/** @return string[] */
public function getDeviceQueryKeywords(): array
{
return $this->stringList('device_query_keywords', self::DEVICE_QUERY_KEYWORDS);
return $this->stringList('device_query_keywords', $this->vocabularyView('shop.device_query', self::DEVICE_QUERY_KEYWORDS));
}
/** @return string[] */
public function getAccessoryQueryKeywords(): array
{
return $this->stringList('accessory_query_keywords', self::ACCESSORY_QUERY_KEYWORDS);
return $this->stringList('accessory_query_keywords', $this->vocabularyView('shop.accessory_query', self::ACCESSORY_QUERY_KEYWORDS));
}
/** @return string[] */
public function getAccessoryProductKeywords(): array
{
return $this->stringList('accessory_product_keywords', self::ACCESSORY_PRODUCT_KEYWORDS);
return $this->stringList('accessory_product_keywords', $this->vocabularyView('shop.accessory_product', self::ACCESSORY_PRODUCT_KEYWORDS));
}
/** @return string[] */
public function getDeviceProductKeywords(): array
{
return $this->stringList('device_product_keywords', self::DEVICE_PRODUCT_KEYWORDS);
return $this->stringList('device_product_keywords', $this->vocabularyView('shop.device_product', self::DEVICE_PRODUCT_KEYWORDS));
}
public function getExactProductNumberPhraseScore(): int
@@ -368,6 +370,18 @@ final class ShopServiceConfig
* @param string[]|null $emptySafeDefault
* @return string[]
*/
/** @return string[] */
private function vocabularyView(string $path, array $fallback): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
/** @return array<string, string[]> */
private function vocabularyMap(string $path, array $fallback): array
{
return $this->vocabulary?->map($path, $fallback) ?? $fallback;
}
private function stringList(string $path, array $default, bool $allowEmptyStrings = false, ?array $emptySafeDefault = null): array
{
$value = $this->value($path, $default);