Files
MtoRagSystem/src/Config/PromptBuilderConfig.php
2026-04-30 19:46:11 +02:00

871 lines
28 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Config;
final class PromptBuilderConfig
{
private const MAIN_DEVICE_REQUEST_ROLE_KEYWORDS = [
'messanlage', 'messanlagen', 'anlage', 'anlagen', 'messgerät', 'messgeraet',
'messgeräte', 'messgeraete', 'analysegerät', 'analysegeraet', 'analysegeräte',
'analysegeraete', 'analysator', 'analysatoren', 'analyzer', 'gerät', 'geraet',
'geräte', 'geraete', 'system', 'systeme', 'monitor', 'monitore', 'controller',
'tester', 'pocket tester', 'pockettester', 'handmessgerät', 'handmessgeraet',
'überwachungsgerät', 'ueberwachungsgeraet', 'testomat', 'testoamt',
];
private const MAIN_DEVICE_PRODUCT_ROLE_KEYWORDS = [
'messanlage', 'messanlagen', 'messgerät', 'messgeraet', 'messgeräte', 'messgeraete',
'analysegerät', 'analysegeraet', 'analysegeräte', 'analysegeraete', 'analysator',
'analysatoren', 'analyzer', 'online-analysator', 'online analysator',
'online-analysegerät', 'online analysegeraet', 'gerät', 'geraet', 'geräte',
'geraete', 'system', 'systeme', 'monitor', 'monitore', 'controller',
'tester', 'pocket tester', 'pockettester', 'handmessgerät', 'handmessgeraet',
'labor messgerät', 'labor-messgerät', 'labor messgeraet', 'labor-messgeraet',
'kombimessgerät', 'kombi-messgerät', 'kombimessgeraet', 'kombi-messgeraet',
'überwachungsgerät', 'ueberwachungsgeraet', 'messumformer', 'transmitter',
'regler', 'testomat',
];
private const ACCESSORY_PRODUCT_ROLE_KEYWORDS = [
'indikator', 'indikatoren', 'indicator', 'reagenz', 'reagenzien', 'reagent',
'zubehör', 'zubehor', 'ersatzteil', 'ersatzteile', 'kit', 'set',
'verbrauchsmaterial', 'consumable', 'nachfüll', 'nachfuell', 'refill',
'lösung', 'loesung', 'solution', 'teststreifen', 'test strip', 'filter',
'pumpenkopf', 'motorblock', 'service set', 'serviceset', 'service-set',
'elektrode', 'elektroden', 'electrode', 'electrodes', 'glasschaft-elektrode',
'kunststoffschaft-elektrode', 'sensor', 'sensoren', 'sensors', 'sonde',
'sonden', 'probe', 'probes', 'messsonde', 'elektrolyt', 'kabel', 'adapter',
'ph-indikator', 'ph indikator', 'ph-indikatoren', 'ph indikatoren',
];
private const TECHNICAL_PRODUCT_KEYWORDS = [
'technisch',
'technical',
'produkt',
'product',
'gerät',
'device',
'modell',
'model',
'messprinzip',
'measurement principle',
'schnittstelle',
'interface',
'relais',
'relay',
'indikator',
'indicator',
'grenzwert',
'threshold',
'messbereich',
'measurement range',
'gemessen',
'measured',
'minimaler',
'minimum',
'resthärte',
'resthaerte',
'°dh',
'dh',
'spannung',
'voltage',
'strom',
'current',
'druck',
'pressure',
'temperatur',
'temperature',
'schutzart',
'ip',
'fehlercode',
'error code',
'wasserhärte',
'hardness',
'testomat',
'chlor',
'chlormessung',
];
private const MEASUREMENT_EVIDENCE_PARAMETERS = [
[
'id' => 'ph',
'label' => 'pH / pH-Wert',
'request_terms' => ['ph', 'pH', 'pH-Wert', 'ph-wert', 'ph wert'],
'positive_terms' => ['pH', 'pH-Wert', 'ph-wert', 'ph wert'],
'positive_context_terms' => ['Messung', 'messen', 'misst', 'Messbereich', 'Messparameter', 'Messgröße', 'Messgroesse', 'Bestimmung', 'bestimmen', 'Analyse', 'analysiert', 'überwachen', 'ueberwachen', 'Indikator für', 'Indikator fuer', 'Reagenz für', 'Reagenz fuer', 'Sensor', 'Elektrode'],
'negative_context_terms' => ['Betriebsbereich', 'Betriebsumgebung', 'Einsatzbedingungen', 'störungsfrei', 'stoerungsfrei', 'pH-Wert bei', 'ph wert bei', 'ph-wert bei', 'bei 20 °C', 'bei 20 °c', 'bei 20°C', 'bei 20°c', 'Reagenzlösung hat', 'Loesung hat', 'Lösung hat'],
'non_equivalent_terms' => ['p-Wert', 'p Wert', 'm-Wert', 'minus m-Wert', 'Alkalität', 'Säurekapazität', 'mmol/l'],
'safe_no_evidence_answer_de' => 'Ich finde in den bereitgestellten Quellen keinen sicher belegten Testomat für pH-Messung.',
'safe_no_accessory_evidence_answer_de' => 'Ich finde in den bereitgestellten Quellen keinen sicher belegten pH-Indikator oder ein pH-Reagenz für Messgeräte.',
],
[
'id' => 'redox',
'label' => 'Redox / ORP',
'request_terms' => ['redox', 'orp', 'oxidations-reduktionspotential', 'oxidations reduktionspotential'],
'positive_terms' => ['Redox', 'ORP', 'Oxidations-Reduktionspotential', 'Oxidations Reduktionspotential'],
'positive_context_terms' => ['Messung', 'messen', 'misst', 'Messbereich', 'Messparameter', 'Messgröße', 'Messgroesse', 'Bestimmung', 'bestimmen', 'Analyse', 'analysiert', 'überwachen', 'ueberwachen', 'Indikator für', 'Indikator fuer', 'Reagenz für', 'Reagenz fuer', 'Sensor', 'Elektrode'],
'negative_context_terms' => ['Betriebsbereich', 'Betriebsumgebung', 'Einsatzbedingungen', 'störungsfrei', 'stoerungsfrei'],
'non_equivalent_terms' => [],
'safe_no_evidence_answer_de' => 'Ich finde in den bereitgestellten Quellen keinen sicher belegten Treffer für Redox-/ORP-Messung.',
'safe_no_accessory_evidence_answer_de' => 'Ich finde in den bereitgestellten Quellen keinen sicher belegten Redox-/ORP-Indikator oder ein Redox-/ORP-Reagenz für Messgeräte.',
],
[
'id' => 'free_chlorine',
'label' => 'freies Chlor',
'request_terms' => ['freies chlor', 'freiem chlor', 'freien chlor', 'free chlorine'],
'positive_terms' => ['freies Chlor', 'freiem Chlor', 'freien Chlor', 'free chlorine'],
'positive_context_terms' => ['Messung', 'messen', 'misst', 'Messbereich', 'Messparameter', 'Messgröße', 'Messgroesse', 'Bestimmung', 'bestimmen', 'Analyse', 'analysiert', 'überwachen', 'ueberwachen', 'Indikator für', 'Indikator fuer', 'Reagenz für', 'Reagenz fuer', 'Sensor', 'Elektrode'],
'negative_context_terms' => ['Betriebsbereich', 'Betriebsumgebung', 'Einsatzbedingungen', 'störungsfrei', 'stoerungsfrei'],
'non_equivalent_terms' => ['Chlor gesamt', 'Gesamtchlor', 'total chlorine'],
'safe_no_evidence_answer_de' => 'Ich finde in den bereitgestellten Quellen keinen sicher belegten Treffer für die Messung von freiem Chlor.',
'safe_no_accessory_evidence_answer_de' => 'Ich finde in den bereitgestellten Quellen keinen sicher belegten Indikator oder ein Reagenz für die Messung von freiem Chlor.',
],
];
private const ACCESSORY_REQUEST_KEYWORDS = [
'passend',
'passende',
'passendes',
'zubehör',
'zubehor',
'dazu',
'indikator',
'indikatoren',
'ph-indikator',
'ph indikator',
'ph-indikatoren',
'ph indikatoren',
'reagenz',
'kit',
'set',
'zusatz',
'ergänzung',
'ergaenzung',
];
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
public function getCharsPerToken(): int
{
return $this->getRequiredInt('budget.chars_per_token');
}
public function getHistoryPaddingChars(): int
{
return $this->getRequiredInt('budget.history_padding_chars');
}
public function getOutputReserveRatio(): float
{
return $this->getRequiredFloat('budget.output_reserve_ratio');
}
public function getOutputReserveMinTokens(): int
{
return $this->getRequiredInt('budget.output_reserve_min_tokens');
}
public function getOutputReserveMaxTokens(): int
{
return $this->getRequiredInt('budget.output_reserve_max_tokens');
}
public function getSafetyReserveRatio(): float
{
return $this->getRequiredFloat('budget.safety_reserve_ratio');
}
public function getSafetyReserveMinTokens(): int
{
return $this->getRequiredInt('budget.safety_reserve_min_tokens');
}
public function getSafetyReserveMaxTokens(): int
{
return $this->getRequiredInt('budget.safety_reserve_max_tokens');
}
public function getMinPromptBudgetTokens(): int
{
return $this->getRequiredInt('budget.min_prompt_budget_tokens');
}
public function getMaxShopResultsInPrompt(): int
{
return $this->getRequiredInt('shop_results.max_results_in_prompt');
}
public function getDetailedShopResultsMaxCount(): int
{
return $this->getRequiredInt('shop_results.detailed_max_count');
}
public function getTechnicalProductKeywordMatchThreshold(): int
{
return $this->getRequiredInt('technical_product_keyword_match_threshold');
}
private function getInt(string $path, int $default): int
{
$value = $this->getValue($path, $default);
return is_numeric($value) ? (int) $value : $default;
}
private function getFloat(string $path, float $default): float
{
$value = $this->getValue($path, $default);
return is_numeric($value) ? (float) $value : $default;
}
private function getRequiredInt(string $path): int
{
$value = $this->getRequiredValue($path);
if (!is_numeric($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX prompt config value "%s" must be numeric.', $path));
}
return (int) $value;
}
private function getRequiredFloat(string $path): float
{
$value = $this->getRequiredValue($path);
if (!is_numeric($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX prompt config value "%s" must be numeric.', $path));
}
return (float) $value;
}
private function getString(string $path, string $default): string
{
$value = $this->getValue($path, $default);
if (!is_scalar($value)) {
return $default;
}
$value = (string) $value;
return $value !== '' ? $value : $default;
}
private function getRequiredString(string $path): string
{
$value = $this->getRequiredValue($path);
if (!is_scalar($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX prompt config value "%s" must be a scalar string.', $path));
}
$value = (string) $value;
if (trim($value) === '') {
throw new \InvalidArgumentException(sprintf('RetrieX prompt config value "%s" must not be empty.', $path));
}
return $value;
}
/**
* @return string[]
*/
private function getStringList(string $path, array $default): array
{
$value = $this->getValue($path, $default);
if (!is_array($value)) {
return $default;
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
return $out !== [] ? $out : $default;
}
/**
* @return string[]
*/
private function getRequiredStringList(string $path): array
{
$value = $this->getRequiredValue($path);
if (!is_array($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX prompt config value "%s" must be a string list.', $path));
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
if ($out === []) {
throw new \InvalidArgumentException(sprintf('RetrieX prompt config value "%s" must contain at least one string.', $path));
}
return $out;
}
/**
* @return string[]
*/
private function getOptionalStringList(string $path): array
{
$value = $this->getOptionalValue($path);
if ($value === null) {
return [];
}
if (!is_array($value)) {
throw new \InvalidArgumentException(sprintf('RetrieX prompt config value "%s" must be a string list.', $path));
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
return $out;
}
/**
* @return string[]
*/
private function vocabularyView(string $path, array $fallback): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
private function getValue(string $path, mixed $default): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return $default;
}
$current = $current[$segment];
}
return $current;
}
private function getOptionalValue(string $path): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return null;
}
$current = $current[$segment];
}
return $current;
}
private function getRequiredValue(string $path): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
throw new \InvalidArgumentException(sprintf('Missing required RetrieX prompt config path "%s".', $path));
}
$current = $current[$segment];
}
return $current;
}
public function getSystemSectionLabel(): string
{
return $this->getRequiredString('sections.system_label');
}
public function getUserQuestionSectionLabel(): string
{
return $this->getRequiredString('sections.user_question_label');
}
public function getConversationContextSectionLabel(): string
{
return $this->getRequiredString('sections.conversation_context_label');
}
/**
* @return string[]
*/
public function getConversationContextIntroLines(): array
{
return $this->getRequiredStringList('conversation_context.intro_lines');
}
public function getShopSearchQuerySectionLabel(): string
{
return $this->getRequiredString('sections.shop_search_query_label');
}
public function getShopSearchQuerySourceLine(): string
{
return $this->getRequiredString('shop_search.source_line');
}
/**
* @return string[]
*/
public function getLiveShopResultsHeaderLines(): array
{
return $this->getRequiredStringList('shop_results.header_lines');
}
public function getLiveShopResultsOverflowNoticeTemplate(): string
{
return $this->getRequiredString('shop_results.overflow_notice_template');
}
public function getShopRecordHeaderTemplate(): string
{
return $this->getRequiredString('shop_results.record_header_template');
}
public function getShopExactProductNameLabel(): string
{
return $this->getRequiredString('shop_results.exact_product_name_label');
}
/**
* @return string[]
*/
public function getShopAtomicRecordNoteLines(): array
{
return $this->getRequiredStringList('shop_results.atomic_record_note_lines');
}
public function getOutputPrioritySectionLabel(): string
{
return $this->getRequiredString('sections.output_priority_label');
}
/**
* @return string[]
*/
public function getOutputPriorityRules(): array
{
return $this->getRequiredStringList('output_priority.rules');
}
/**
* @return string[]
*/
public function getOutputPriorityTechnicalRules(): array
{
return $this->getRequiredStringList('output_priority.technical_rules');
}
public function getFallbackEscalationSectionLabel(): string
{
return $this->getRequiredString('sections.fallback_escalation_label');
}
public function getFallbackEscalationStateLineTemplate(): string
{
return $this->getRequiredString('fallback_escalation.state_line_template');
}
/**
* @return string[]
*/
public function getFallbackEscalationBaseRules(): array
{
return $this->getRequiredStringList('fallback_escalation.base_rules');
}
/**
* @return string[]
*/
public function getFallbackEscalationStateRules(string $state): array
{
return $this->getOptionalStringList('fallback_escalation.states.' . $state);
}
/**
* @return string[]
*/
public function getFallbackEscalationWithoutShopCheckRules(): array
{
return $this->getRequiredStringList('fallback_escalation.without_shop_check_rules');
}
public function getResponseFormatSectionLabel(): string
{
return $this->getRequiredString('sections.response_format_label');
}
/**
* @return string[]
*/
public function getResponseFormatBaseRules(): array
{
return $this->getRequiredStringList('response_format.base_rules');
}
/**
* @return string[]
*/
public function getResponseFormatWithShopRules(): array
{
return $this->getRequiredStringList('response_format.with_shop_rules');
}
/**
* @return string[]
*/
public function getResponseFormatWithoutShopRules(): array
{
return $this->getRequiredStringList('response_format.without_shop_rules');
}
/**
* @return string[]
*/
public function getResponseFormatTechnicalRules(): array
{
return $this->getRequiredStringList('response_format.technical_rules');
}
/**
* @return string[]
*/
public function getResponseFormatAccessoryRules(): array
{
return $this->getRequiredStringList('response_format.accessory_rules');
}
public function getLanguageRulesSectionLabel(): string
{
return $this->getRequiredString('sections.language_rules_label');
}
/**
* @return string[]
*/
public function getLanguageRules(): array
{
return $this->getRequiredStringList('language.rules');
}
public function getFactGroundingRulesSectionLabel(): string
{
return $this->getRequiredString('sections.fact_grounding_rules_label');
}
/**
* @return string[]
*/
public function getFactGroundingBaseRules(): array
{
return $this->getRequiredStringList('fact_grounding.base_rules');
}
/**
* @return string[]
*/
public function getFactGroundingWithShopRules(): array
{
return $this->getRequiredStringList('fact_grounding.with_shop_rules');
}
/**
* @return string[]
*/
public function getFactGroundingWithoutShopRules(): array
{
return $this->getRequiredStringList('fact_grounding.without_shop_rules');
}
/**
* @return string[]
*/
public function getFactGroundingTechnicalRules(): array
{
return $this->getRequiredStringList('fact_grounding.technical_rules');
}
public function getRetrievedKnowledgeSectionLabel(): string
{
return $this->getRequiredString('sections.retrieved_knowledge_label');
}
public function getRetrievedKnowledgeSourceLine(): string
{
return $this->getRequiredString('retrieved_knowledge.source_line');
}
public function getUrlContentSectionLabel(): string
{
return $this->getRequiredString('sections.url_content_label');
}
public function getUrlContentSourceLine(): string
{
return $this->getRequiredString('url_content.source_line');
}
public function getShopProductNumberLabel(): string
{
return $this->getRequiredString('shop_results.fields.product_number_label');
}
public function getShopManufacturerLabel(): string
{
return $this->getRequiredString('shop_results.fields.manufacturer_label');
}
public function getShopPriceLabel(): string
{
return $this->getRequiredString('shop_results.fields.price_label');
}
public function getShopAvailabilityLabel(): string
{
return $this->getRequiredString('shop_results.fields.availability_label');
}
public function getShopAvailabilityYesLabel(): string
{
return $this->getRequiredString('shop_results.fields.availability_yes_label');
}
public function getShopAvailabilityNoLabel(): string
{
return $this->getRequiredString('shop_results.fields.availability_no_label');
}
public function getShopHighlightPrefix(): string
{
return $this->getRequiredString('shop_results.fields.highlight_prefix');
}
public function getShopUrlLabel(): string
{
return $this->getRequiredString('shop_results.fields.url_label');
}
public function getShopProductImageLabel(): string
{
return $this->getRequiredString('shop_results.fields.product_image_label');
}
public function getShopDescriptionLabel(): string
{
return $this->getRequiredString('shop_results.fields.description_label');
}
public function getShopMetaInformationLabel(): string
{
return $this->getRequiredString('shop_results.fields.meta_information_label');
}
public function getShopRequestedRoleLabel(): string
{
return $this->getRequiredString('shop_results.fields.requested_role_label');
}
public function getShopInferredRoleLabel(): string
{
return $this->getRequiredString('shop_results.fields.inferred_role_label');
}
public function getShopRoleCompatibilityLabel(): string
{
return $this->getRequiredString('shop_results.fields.role_compatibility_label');
}
public function getShopRoleIncompatibleCommercialSuppressionNote(): string
{
return $this->getRequiredString('shop_results.fields.role_incompatible_commercial_suppression_note');
}
/**
* @return string[]
*/
public function getMainDeviceRequestRoleKeywords(): array
{
return $this->getStringList('role_guard.main_device_request_keywords', self::MAIN_DEVICE_REQUEST_ROLE_KEYWORDS);
}
/**
* @return string[]
*/
public function getMainDeviceProductRoleKeywords(): array
{
return $this->getStringList('role_guard.main_device_product_keywords', self::MAIN_DEVICE_PRODUCT_ROLE_KEYWORDS);
}
/**
* @return string[]
*/
public function getAccessoryProductRoleKeywords(): array
{
return $this->getStringList('role_guard.accessory_product_keywords', self::ACCESSORY_PRODUCT_ROLE_KEYWORDS);
}
/**
* @return string[]
*/
public function getTechnicalProductKeywords(): array
{
return $this->getStringList(
'technical_product_keywords',
$this->vocabularyView('prompt.technical_product_keywords', self::TECHNICAL_PRODUCT_KEYWORDS)
);
}
/**
* @return string[]
*/
public function getAccessoryRequestKeywords(): array
{
return $this->getStringList(
'accessory_request_keywords',
$this->vocabularyView('prompt.accessory_request_keywords', self::ACCESSORY_REQUEST_KEYWORDS)
);
}
public function getMeasurementEvidenceSectionLabel(): string
{
return $this->getString('sections.measurement_evidence_label', 'MEASUREMENT PARAMETER EVIDENCE CHECK');
}
/**
* @return string[]
*/
public function getMeasurementEvidenceIntroRules(): array
{
return $this->getStringList('measurement_evidence_guard.intro_rules', [
'- This block is generated from the current user question and is stricter than broad product-selection wording.',
'- For measurement-parameter questions, technical suitability requires explicit positive evidence for the requested parameter in the same source record.',
'- Similar water-treatment parameters, abbreviations, units, product families, search queries, or ranking positions are not enough.',
]);
}
/**
* @return array<int, array<string, mixed>>
*/
public function getMeasurementEvidenceParameters(): array
{
$value = $this->getValue('measurement_evidence_guard.parameters', self::MEASUREMENT_EVIDENCE_PARAMETERS);
if (!is_array($value)) {
return self::MEASUREMENT_EVIDENCE_PARAMETERS;
}
$out = [];
foreach ($value as $item) {
if (!is_array($item)) {
continue;
}
$id = isset($item['id']) && is_scalar($item['id']) ? trim((string) $item['id']) : '';
$label = isset($item['label']) && is_scalar($item['label']) ? trim((string) $item['label']) : '';
if ($id === '' || $label === '') {
continue;
}
$out[] = [
'id' => $id,
'label' => $label,
'request_terms' => $this->normalizeMixedStringList($item['request_terms'] ?? []),
'positive_terms' => $this->normalizeMixedStringList($item['positive_terms'] ?? []),
'positive_context_terms' => $this->normalizeMixedStringList($item['positive_context_terms'] ?? []),
'negative_context_terms' => $this->normalizeMixedStringList($item['negative_context_terms'] ?? []),
'non_equivalent_terms' => $this->normalizeMixedStringList($item['non_equivalent_terms'] ?? []),
'safe_no_evidence_answer_de' => isset($item['safe_no_evidence_answer_de']) && is_scalar($item['safe_no_evidence_answer_de'])
? trim((string) $item['safe_no_evidence_answer_de'])
: '',
'safe_no_accessory_evidence_answer_de' => isset($item['safe_no_accessory_evidence_answer_de']) && is_scalar($item['safe_no_accessory_evidence_answer_de'])
? trim((string) $item['safe_no_accessory_evidence_answer_de'])
: '',
];
}
return $out !== [] ? $out : self::MEASUREMENT_EVIDENCE_PARAMETERS;
}
/**
* @return string[]
*/
private function normalizeMixedStringList(mixed $value): array
{
if (!is_array($value)) {
return [];
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item !== '' && !in_array($item, $out, true)) {
$out[] = $item;
}
}
return $out;
}
public function getTechnicalProductModelPattern(): string
{
return $this->getString('technical_product_model_pattern', '/\b[\p{L}]{2,}\s?\d{2,5}\b/u');
}
}