optimize commerce keyword filterlist
This commit is contained in:
@@ -64,6 +64,9 @@ parameters:
|
|||||||
- würde
|
- würde
|
||||||
- ich
|
- ich
|
||||||
- gerne
|
- gerne
|
||||||
|
- welchem
|
||||||
|
- kann
|
||||||
|
- mit
|
||||||
- mein
|
- mein
|
||||||
- größer
|
- größer
|
||||||
- zeige
|
- zeige
|
||||||
@@ -121,6 +124,7 @@ parameters:
|
|||||||
- kostet
|
- kostet
|
||||||
- kosten
|
- kosten
|
||||||
- ua
|
- ua
|
||||||
|
- ein
|
||||||
- also
|
- also
|
||||||
- gut
|
- gut
|
||||||
- gute
|
- gute
|
||||||
@@ -129,6 +133,7 @@ parameters:
|
|||||||
- gutes
|
- gutes
|
||||||
- passen
|
- passen
|
||||||
- passend
|
- passend
|
||||||
|
- was
|
||||||
|
|
||||||
search_control_tokens:
|
search_control_tokens:
|
||||||
- shop
|
- shop
|
||||||
|
|||||||
@@ -220,6 +220,42 @@ parameters:
|
|||||||
- '- This block is generated from the current user question and is stricter than broad product-selection wording.'
|
- '- This block is generated from the current user question and is stricter than broad product-selection wording.'
|
||||||
- '- For measurement-parameter questions, technical suitability requires explicit positive evidence for the requested parameter in the same source record.'
|
- '- For measurement-parameter questions, technical suitability requires explicit positive evidence for the requested parameter in the same source record.'
|
||||||
- '- Similar water-treatment parameters, abbreviations, units, product families, search queries, or ranking positions are not enough.'
|
- '- Similar water-treatment parameters, abbreviations, units, product families, search queries, or ranking positions are not enough.'
|
||||||
|
product_specific_rules:
|
||||||
|
- '- Verify every recommended product independently against the requested measurement parameter.'
|
||||||
|
- '- If a retrieved RAG record mentions several products, only use a product for the requested parameter when that product is named in the same sentence, bullet, table row, or clearly bounded product section as the parameter evidence.'
|
||||||
|
- '- Do not transfer measurement suitability from one product, variant, indicator, category, tag, heading, or nearby paragraph to another product.'
|
||||||
|
- '- Generic category, umbrella-topic, device-class, product-family, document-title, tag, or application-area terms are not enough to prove a specific measurement parameter for a specific product.'
|
||||||
|
generic_request_patterns:
|
||||||
|
- '/\b(?:mit|für|fuer|zur|zum)\s+(?:dem\s+)?(?:messparameter|parameter|messwert|messgröße|messgroesse)\s+(?<parameter>[^?.!,;\n]{2,80})/iu'
|
||||||
|
- '/\b(?:messparameter|parameter|messwert|messgröße|messgroesse)\s*(?:für|fuer|von|zur|zum|:)\s*(?<parameter>[^?.!,;\n]{2,80})/iu'
|
||||||
|
generic_positive_context_terms:
|
||||||
|
- Messung
|
||||||
|
- messen
|
||||||
|
- misst
|
||||||
|
- Messbereich
|
||||||
|
- Messparameter
|
||||||
|
- Messgröße
|
||||||
|
- Messgroesse
|
||||||
|
- Bestimmung
|
||||||
|
- bestimmen
|
||||||
|
- Analyse
|
||||||
|
- analysiert
|
||||||
|
- überwachen
|
||||||
|
- ueberwachen
|
||||||
|
- Indikator für
|
||||||
|
- Indikator fuer
|
||||||
|
- Reagenz für
|
||||||
|
- Reagenz fuer
|
||||||
|
- Sensor
|
||||||
|
- Elektrode
|
||||||
|
generic_negative_context_terms:
|
||||||
|
- Betriebsbereich
|
||||||
|
- Betriebsumgebung
|
||||||
|
- Einsatzbedingungen
|
||||||
|
- störungsfrei
|
||||||
|
- stoerungsfrei
|
||||||
|
generic_safe_no_evidence_answer_template_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Treffer für die Messung von {label}.
|
||||||
|
generic_safe_no_accessory_evidence_answer_template_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Indikator oder ein Reagenz für die Messung von {label}.
|
||||||
parameters:
|
parameters:
|
||||||
- id: ph
|
- id: ph
|
||||||
label: pH / pH-Wert
|
label: pH / pH-Wert
|
||||||
|
|||||||
@@ -610,6 +610,7 @@ final readonly class PromptBuilder
|
|||||||
$negativeContextTerms = $this->extractMeasurementGuardStringList($guard, 'negative_context_terms');
|
$negativeContextTerms = $this->extractMeasurementGuardStringList($guard, 'negative_context_terms');
|
||||||
$nonEquivalentTerms = $this->extractMeasurementGuardStringList($guard, 'non_equivalent_terms');
|
$nonEquivalentTerms = $this->extractMeasurementGuardStringList($guard, 'non_equivalent_terms');
|
||||||
$label = $this->normalizeBlockText((string) ($guard['label'] ?? 'requested measurement parameter'));
|
$label = $this->normalizeBlockText((string) ($guard['label'] ?? 'requested measurement parameter'));
|
||||||
|
$strictNoEvidence = (bool) ($guard['strict_no_evidence'] ?? true);
|
||||||
$resolvedRequestedRole = $requestedRole ?? $this->resolveRequestedProductRole($prompt);
|
$resolvedRequestedRole = $requestedRole ?? $this->resolveRequestedProductRole($prompt);
|
||||||
$safeNoEvidenceAnswer = $this->normalizeBlockText((string) (
|
$safeNoEvidenceAnswer = $this->normalizeBlockText((string) (
|
||||||
$resolvedRequestedRole === 'accessory_or_consumable'
|
$resolvedRequestedRole === 'accessory_or_consumable'
|
||||||
@@ -650,6 +651,7 @@ final readonly class PromptBuilder
|
|||||||
}
|
}
|
||||||
|
|
||||||
$rules = $this->config->getMeasurementEvidenceIntroRules();
|
$rules = $this->config->getMeasurementEvidenceIntroRules();
|
||||||
|
$rules = array_merge($rules, $this->config->getMeasurementEvidenceProductSpecificRules());
|
||||||
$rules[] = '- User requested measurement parameter: ' . $label . '.';
|
$rules[] = '- User requested measurement parameter: ' . $label . '.';
|
||||||
$rules[] = '- Positive parameter terms for this request: ' . implode(', ', $positiveTerms) . '.';
|
$rules[] = '- Positive parameter terms for this request: ' . implode(', ', $positiveTerms) . '.';
|
||||||
if ($positiveContextTerms !== []) {
|
if ($positiveContextTerms !== []) {
|
||||||
@@ -666,7 +668,11 @@ final readonly class PromptBuilder
|
|||||||
$rules[] = '- RAG/URL evidence scan for this exact parameter: ' . ($knowledgeHasEvidence ? 'explicit positive evidence found.' : 'no explicit positive evidence found.');
|
$rules[] = '- RAG/URL evidence scan for this exact parameter: ' . ($knowledgeHasEvidence ? 'explicit positive evidence found.' : 'no explicit positive evidence found.');
|
||||||
$rules = array_merge($rules, $shopEvidenceLines);
|
$rules = array_merge($rules, $shopEvidenceLines);
|
||||||
|
|
||||||
if (!$knowledgeHasEvidence && !$shopHasEvidence) {
|
if (!$strictNoEvidence && !$knowledgeHasEvidence && !$shopHasEvidence) {
|
||||||
|
$rules[] = '- The deterministic exact-term scan did not find product-specific evidence. The answer may still use a clearly equivalent named measurement parameter from the same source record, but must not infer suitability from generic categories, document titles, tags, search terms, neighbouring products, or broad umbrella-topic wording.';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($strictNoEvidence && !$knowledgeHasEvidence && !$shopHasEvidence) {
|
||||||
$rules[] = '- Mandatory answer behavior: do not recommend a product as suitable for this measurement parameter.';
|
$rules[] = '- Mandatory answer behavior: do not recommend a product as suitable for this measurement parameter.';
|
||||||
if ($safeNoEvidenceAnswer !== '') {
|
if ($safeNoEvidenceAnswer !== '') {
|
||||||
$rules[] = '- Start the answer with this meaning in the user language: ' . $safeNoEvidenceAnswer;
|
$rules[] = '- Start the answer with this meaning in the user language: ' . $safeNoEvidenceAnswer;
|
||||||
@@ -724,14 +730,114 @@ final readonly class PromptBuilder
|
|||||||
|
|
||||||
foreach ($requestTerms as $term) {
|
foreach ($requestTerms as $term) {
|
||||||
if ($this->containsMeasurementTerm($normalizedPrompt, $term)) {
|
if ($this->containsMeasurementTerm($normalizedPrompt, $term)) {
|
||||||
|
$parameter['strict_no_evidence'] = true;
|
||||||
|
|
||||||
return $parameter;
|
return $parameter;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return $this->resolveGenericRequestedMeasurementGuard($prompt);
|
||||||
|
}
|
||||||
|
|
||||||
|
private function resolveGenericRequestedMeasurementGuard(string $prompt): ?array
|
||||||
|
{
|
||||||
|
foreach ($this->config->getMeasurementEvidenceGenericRequestPatterns() as $pattern) {
|
||||||
|
if (@preg_match($pattern, $prompt, $matches) !== 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$rawParameter = $matches['parameter'] ?? ($matches[1] ?? '');
|
||||||
|
if (!is_scalar($rawParameter)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$parameterTerms = $this->buildGenericMeasurementParameterTerms((string) $rawParameter);
|
||||||
|
if ($parameterTerms === []) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$label = implode(' / ', $parameterTerms);
|
||||||
|
|
||||||
|
return [
|
||||||
|
'id' => 'generic_' . substr(sha1($label), 0, 12),
|
||||||
|
'label' => $label,
|
||||||
|
'request_terms' => $parameterTerms,
|
||||||
|
'positive_terms' => $parameterTerms,
|
||||||
|
'positive_context_terms' => $this->config->getMeasurementEvidenceGenericPositiveContextTerms(),
|
||||||
|
'negative_context_terms' => $this->config->getMeasurementEvidenceGenericNegativeContextTerms(),
|
||||||
|
'non_equivalent_terms' => [],
|
||||||
|
'safe_no_evidence_answer_de' => $this->renderMeasurementEvidenceTemplate(
|
||||||
|
$this->config->getMeasurementEvidenceGenericSafeNoEvidenceAnswerTemplate(),
|
||||||
|
$label
|
||||||
|
),
|
||||||
|
'safe_no_accessory_evidence_answer_de' => $this->renderMeasurementEvidenceTemplate(
|
||||||
|
$this->config->getMeasurementEvidenceGenericSafeNoAccessoryEvidenceAnswerTemplate(),
|
||||||
|
$label
|
||||||
|
),
|
||||||
|
'strict_no_evidence' => false,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
private function buildGenericMeasurementParameterTerms(string $rawParameter): array
|
||||||
|
{
|
||||||
|
$rawParameter = $this->normalizeBlockText($rawParameter);
|
||||||
|
if ($rawParameter === '') {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
$terms = [];
|
||||||
|
$withoutParentheses = preg_replace('/\([^)]*\)/u', ' ', $rawParameter) ?? $rawParameter;
|
||||||
|
$this->appendGenericMeasurementParameterParts($terms, $withoutParentheses);
|
||||||
|
|
||||||
|
if (preg_match_all('/\(([^)]{1,40})\)/u', $rawParameter, $matches) === 1) {
|
||||||
|
foreach ($matches[1] as $parenthetical) {
|
||||||
|
$this->appendGenericMeasurementParameterParts($terms, (string) $parenthetical);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return array_values(array_unique($terms));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string[] $terms
|
||||||
|
*/
|
||||||
|
private function appendGenericMeasurementParameterParts(array &$terms, string $value): void
|
||||||
|
{
|
||||||
|
$value = $this->normalizeBlockText($value);
|
||||||
|
if ($value === '') {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$parts = preg_split('/\s*(?:,|;|\/|\boder\b|\bund\b|\bor\b|\band\b)\s*/iu', $value) ?: [$value];
|
||||||
|
|
||||||
|
foreach ($parts as $part) {
|
||||||
|
$part = $this->normalizeBlockText((string) $part);
|
||||||
|
$part = trim($part, " \t\n\r\0\x0B-–—:()[]{}\"'`“”„");
|
||||||
|
|
||||||
|
if ($part === '' || preg_match('/[\p{L}\p{N}]/u', $part) !== 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mb_strlen($part, 'UTF-8') < 2 || in_array($part, $terms, true)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$terms[] = $part;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private function renderMeasurementEvidenceTemplate(string $template, string $label): string
|
||||||
|
{
|
||||||
|
return strtr($template, ['{label}' => $label]);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return string[]
|
* @return string[]
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -581,6 +581,48 @@ final class PromptBuilderConfig
|
|||||||
return $this->getRequiredStringList('measurement_evidence_guard.intro_rules');
|
return $this->getRequiredStringList('measurement_evidence_guard.intro_rules');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getMeasurementEvidenceProductSpecificRules(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringList('measurement_evidence_guard.product_specific_rules');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getMeasurementEvidenceGenericRequestPatterns(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringList('measurement_evidence_guard.generic_request_patterns');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getMeasurementEvidenceGenericPositiveContextTerms(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringList('measurement_evidence_guard.generic_positive_context_terms');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return string[]
|
||||||
|
*/
|
||||||
|
public function getMeasurementEvidenceGenericNegativeContextTerms(): array
|
||||||
|
{
|
||||||
|
return $this->getRequiredStringList('measurement_evidence_guard.generic_negative_context_terms');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getMeasurementEvidenceGenericSafeNoEvidenceAnswerTemplate(): string
|
||||||
|
{
|
||||||
|
return $this->getRequiredString('measurement_evidence_guard.generic_safe_no_evidence_answer_template_de');
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getMeasurementEvidenceGenericSafeNoAccessoryEvidenceAnswerTemplate(): string
|
||||||
|
{
|
||||||
|
return $this->getRequiredString('measurement_evidence_guard.generic_safe_no_accessory_evidence_answer_template_de');
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return array<int, array<string, mixed>>
|
* @return array<int, array<string, mixed>>
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user