diff --git a/config/retriex/commerce.yaml b/config/retriex/commerce.yaml index cd390db..6c6817d 100644 --- a/config/retriex/commerce.yaml +++ b/config/retriex/commerce.yaml @@ -64,6 +64,9 @@ parameters: - würde - ich - gerne + - welchem + - kann + - mit - mein - größer - zeige @@ -121,6 +124,7 @@ parameters: - kostet - kosten - ua + - ein - also - gut - gute @@ -129,6 +133,7 @@ parameters: - gutes - passen - passend + - was search_control_tokens: - shop diff --git a/config/retriex/prompt.yaml b/config/retriex/prompt.yaml index 6958552..cab3c88 100644 --- a/config/retriex/prompt.yaml +++ b/config/retriex/prompt.yaml @@ -220,6 +220,42 @@ parameters: - '- This block is generated from the current user question and is stricter than broad product-selection wording.' - '- For measurement-parameter questions, technical suitability requires explicit positive evidence for the requested parameter in the same source record.' - '- Similar water-treatment parameters, abbreviations, units, product families, search queries, or ranking positions are not enough.' + product_specific_rules: + - '- Verify every recommended product independently against the requested measurement parameter.' + - '- If a retrieved RAG record mentions several products, only use a product for the requested parameter when that product is named in the same sentence, bullet, table row, or clearly bounded product section as the parameter evidence.' + - '- Do not transfer measurement suitability from one product, variant, indicator, category, tag, heading, or nearby paragraph to another product.' + - '- Generic category, umbrella-topic, device-class, product-family, document-title, tag, or application-area terms are not enough to prove a specific measurement parameter for a specific product.' + generic_request_patterns: + - '/\b(?:mit|für|fuer|zur|zum)\s+(?:dem\s+)?(?:messparameter|parameter|messwert|messgröße|messgroesse)\s+(?[^?.!,;\n]{2,80})/iu' + - '/\b(?:messparameter|parameter|messwert|messgröße|messgroesse)\s*(?:für|fuer|von|zur|zum|:)\s*(?[^?.!,;\n]{2,80})/iu' + generic_positive_context_terms: + - Messung + - messen + - misst + - Messbereich + - Messparameter + - Messgröße + - Messgroesse + - Bestimmung + - bestimmen + - Analyse + - analysiert + - überwachen + - ueberwachen + - Indikator für + - Indikator fuer + - Reagenz für + - Reagenz fuer + - Sensor + - Elektrode + generic_negative_context_terms: + - Betriebsbereich + - Betriebsumgebung + - Einsatzbedingungen + - störungsfrei + - stoerungsfrei + generic_safe_no_evidence_answer_template_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Treffer für die Messung von {label}. + generic_safe_no_accessory_evidence_answer_template_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Indikator oder ein Reagenz für die Messung von {label}. parameters: - id: ph label: pH / pH-Wert diff --git a/src/Agent/PromptBuilder.php b/src/Agent/PromptBuilder.php index c6d0e5f..fceb9d1 100644 --- a/src/Agent/PromptBuilder.php +++ b/src/Agent/PromptBuilder.php @@ -610,6 +610,7 @@ final readonly class PromptBuilder $negativeContextTerms = $this->extractMeasurementGuardStringList($guard, 'negative_context_terms'); $nonEquivalentTerms = $this->extractMeasurementGuardStringList($guard, 'non_equivalent_terms'); $label = $this->normalizeBlockText((string) ($guard['label'] ?? 'requested measurement parameter')); + $strictNoEvidence = (bool) ($guard['strict_no_evidence'] ?? true); $resolvedRequestedRole = $requestedRole ?? $this->resolveRequestedProductRole($prompt); $safeNoEvidenceAnswer = $this->normalizeBlockText((string) ( $resolvedRequestedRole === 'accessory_or_consumable' @@ -650,6 +651,7 @@ final readonly class PromptBuilder } $rules = $this->config->getMeasurementEvidenceIntroRules(); + $rules = array_merge($rules, $this->config->getMeasurementEvidenceProductSpecificRules()); $rules[] = '- User requested measurement parameter: ' . $label . '.'; $rules[] = '- Positive parameter terms for this request: ' . implode(', ', $positiveTerms) . '.'; if ($positiveContextTerms !== []) { @@ -666,7 +668,11 @@ final readonly class PromptBuilder $rules[] = '- RAG/URL evidence scan for this exact parameter: ' . ($knowledgeHasEvidence ? 'explicit positive evidence found.' : 'no explicit positive evidence found.'); $rules = array_merge($rules, $shopEvidenceLines); - if (!$knowledgeHasEvidence && !$shopHasEvidence) { + if (!$strictNoEvidence && !$knowledgeHasEvidence && !$shopHasEvidence) { + $rules[] = '- The deterministic exact-term scan did not find product-specific evidence. The answer may still use a clearly equivalent named measurement parameter from the same source record, but must not infer suitability from generic categories, document titles, tags, search terms, neighbouring products, or broad umbrella-topic wording.'; + } + + if ($strictNoEvidence && !$knowledgeHasEvidence && !$shopHasEvidence) { $rules[] = '- Mandatory answer behavior: do not recommend a product as suitable for this measurement parameter.'; if ($safeNoEvidenceAnswer !== '') { $rules[] = '- Start the answer with this meaning in the user language: ' . $safeNoEvidenceAnswer; @@ -724,14 +730,114 @@ final readonly class PromptBuilder foreach ($requestTerms as $term) { if ($this->containsMeasurementTerm($normalizedPrompt, $term)) { + $parameter['strict_no_evidence'] = true; + return $parameter; } } } + return $this->resolveGenericRequestedMeasurementGuard($prompt); + } + + private function resolveGenericRequestedMeasurementGuard(string $prompt): ?array + { + foreach ($this->config->getMeasurementEvidenceGenericRequestPatterns() as $pattern) { + if (@preg_match($pattern, $prompt, $matches) !== 1) { + continue; + } + + $rawParameter = $matches['parameter'] ?? ($matches[1] ?? ''); + if (!is_scalar($rawParameter)) { + continue; + } + + $parameterTerms = $this->buildGenericMeasurementParameterTerms((string) $rawParameter); + if ($parameterTerms === []) { + continue; + } + + $label = implode(' / ', $parameterTerms); + + return [ + 'id' => 'generic_' . substr(sha1($label), 0, 12), + 'label' => $label, + 'request_terms' => $parameterTerms, + 'positive_terms' => $parameterTerms, + 'positive_context_terms' => $this->config->getMeasurementEvidenceGenericPositiveContextTerms(), + 'negative_context_terms' => $this->config->getMeasurementEvidenceGenericNegativeContextTerms(), + 'non_equivalent_terms' => [], + 'safe_no_evidence_answer_de' => $this->renderMeasurementEvidenceTemplate( + $this->config->getMeasurementEvidenceGenericSafeNoEvidenceAnswerTemplate(), + $label + ), + 'safe_no_accessory_evidence_answer_de' => $this->renderMeasurementEvidenceTemplate( + $this->config->getMeasurementEvidenceGenericSafeNoAccessoryEvidenceAnswerTemplate(), + $label + ), + 'strict_no_evidence' => false, + ]; + } + return null; } + /** + * @return string[] + */ + private function buildGenericMeasurementParameterTerms(string $rawParameter): array + { + $rawParameter = $this->normalizeBlockText($rawParameter); + if ($rawParameter === '') { + return []; + } + + $terms = []; + $withoutParentheses = preg_replace('/\([^)]*\)/u', ' ', $rawParameter) ?? $rawParameter; + $this->appendGenericMeasurementParameterParts($terms, $withoutParentheses); + + if (preg_match_all('/\(([^)]{1,40})\)/u', $rawParameter, $matches) === 1) { + foreach ($matches[1] as $parenthetical) { + $this->appendGenericMeasurementParameterParts($terms, (string) $parenthetical); + } + } + + return array_values(array_unique($terms)); + } + + /** + * @param string[] $terms + */ + private function appendGenericMeasurementParameterParts(array &$terms, string $value): void + { + $value = $this->normalizeBlockText($value); + if ($value === '') { + return; + } + + $parts = preg_split('/\s*(?:,|;|\/|\boder\b|\bund\b|\bor\b|\band\b)\s*/iu', $value) ?: [$value]; + + foreach ($parts as $part) { + $part = $this->normalizeBlockText((string) $part); + $part = trim($part, " \t\n\r\0\x0B-–—:()[]{}\"'`“”„"); + + if ($part === '' || preg_match('/[\p{L}\p{N}]/u', $part) !== 1) { + continue; + } + + if (mb_strlen($part, 'UTF-8') < 2 || in_array($part, $terms, true)) { + continue; + } + + $terms[] = $part; + } + } + + private function renderMeasurementEvidenceTemplate(string $template, string $label): string + { + return strtr($template, ['{label}' => $label]); + } + /** * @return string[] */ diff --git a/src/Config/PromptBuilderConfig.php b/src/Config/PromptBuilderConfig.php index cb896a4..c539992 100644 --- a/src/Config/PromptBuilderConfig.php +++ b/src/Config/PromptBuilderConfig.php @@ -581,6 +581,48 @@ final class PromptBuilderConfig return $this->getRequiredStringList('measurement_evidence_guard.intro_rules'); } + /** + * @return string[] + */ + public function getMeasurementEvidenceProductSpecificRules(): array + { + return $this->getRequiredStringList('measurement_evidence_guard.product_specific_rules'); + } + + /** + * @return string[] + */ + public function getMeasurementEvidenceGenericRequestPatterns(): array + { + return $this->getRequiredStringList('measurement_evidence_guard.generic_request_patterns'); + } + + /** + * @return string[] + */ + public function getMeasurementEvidenceGenericPositiveContextTerms(): array + { + return $this->getRequiredStringList('measurement_evidence_guard.generic_positive_context_terms'); + } + + /** + * @return string[] + */ + public function getMeasurementEvidenceGenericNegativeContextTerms(): array + { + return $this->getRequiredStringList('measurement_evidence_guard.generic_negative_context_terms'); + } + + public function getMeasurementEvidenceGenericSafeNoEvidenceAnswerTemplate(): string + { + return $this->getRequiredString('measurement_evidence_guard.generic_safe_no_evidence_answer_template_de'); + } + + public function getMeasurementEvidenceGenericSafeNoAccessoryEvidenceAnswerTemplate(): string + { + return $this->getRequiredString('measurement_evidence_guard.generic_safe_no_accessory_evidence_answer_template_de'); + } + /** * @return array> */