diff --git a/config/retriex/agent.yaml b/config/retriex/agent.yaml index b9a2905..b2a8db8 100644 --- a/config/retriex/agent.yaml +++ b/config/retriex/agent.yaml @@ -565,6 +565,71 @@ parameters: - orp - '0,02' + product_attribute_query_cleanup: + enabled: true + # For direct product/accessory lookups with comparative attribute + # constraints, keep the concrete product type and application terms but + # do not send range words/numeric thresholds to the plain text shop + # search. Example: "Anschlusskabel pH/Redox länger 20m" becomes + # "anschlusskabel redox" so the shop can return 25m/50m/100m cables. + min_query_tokens_after_cleanup: 2 + product_type_terms: + - anschlusskabel + - kabel + - sensorkabel + - elektrodenkabel + - elektrodenanschlusskabel + - messkabel + - verbindungskabel + - steckerkabel + - elektrode + - sensor + - puffer + - kalibrierpuffer + stop_terms: + - zeige + - zeig + - suche + - such + - mir + - bitte + - für + - fuer + - nach + - mit + - ohne + - von + - zum + - zur + - der + - die + - das + - ein + - eine + - einen + - länger + - laenger + - lang + - kürzer + - kuerzer + - größer + - groesser + - kleiner + - über + - ueber + - unter + - mindestens + - maximal + - maximum + - minimum + - ab + - bis + - mehr + - weniger + - als + comparative_constraint_patterns: + - '/\b(?:länger|laenger|kürzer|kuerzer|größer|groesser|kleiner|über|ueber|unter|mindestens|maximal|maximum|minimum|ab|bis|mehr\s+als|weniger\s+als)\s+(?P\d+(?:[,.]\d+)?\s*[\p{L}µ°%]*)\b/iu' + context_usage: referential_terms: - der diff --git a/config/retriex/prompt.yaml b/config/retriex/prompt.yaml index 2cab632..3d4bed5 100644 --- a/config/retriex/prompt.yaml +++ b/config/retriex/prompt.yaml @@ -118,6 +118,15 @@ parameters: - zusatz - ergänzung - ergaenzung + - anschlusskabel + - kabel + - sensorkabel + - elektrodenkabel + - elektrode + - puffer + - kalibrierpuffer + - kalibrierlösung + - kalibrierloesung sections: system_label: SYSTEM user_question_label: USER QUESTION @@ -199,6 +208,15 @@ parameters: - loesung - solution - teststreifen + - anschlusskabel + - kabel + - sensorkabel + - elektrodenkabel + - elektrode + - puffer + - kalibrierpuffer + - kalibrierlösung + - kalibrierloesung - filter - pumpenkopf - motorblock @@ -261,6 +279,24 @@ parameters: - stoerungsfrei generic_safe_no_evidence_answer_template_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Treffer für die Messung von {label}. generic_safe_no_accessory_evidence_answer_template_de: Ich finde in den bereitgestellten Quellen keinen sicher belegten Indikator oder ein Reagenz für die Messung von {label}. + accessory_lookup_guard_terms: + - indikator + - indikatoren + - indicator + - reagenz + - reagenzien + - reagent + - teststreifen + accessory_lookup_passthrough_terms: + - anschlusskabel + - kabel + - sensorkabel + - elektrodenkabel + - elektrode + - puffer + - kalibrierpuffer + - kalibrierlösung + - kalibrierloesung rule_templates: shop_positive_evidence: '- Shop record {index} ({product}): explicit positive evidence for {label} is present in this same record.' shop_no_evidence: '- No shop product record shown to the model contains explicit positive evidence for {label} in the same record.' @@ -537,8 +573,11 @@ parameters: - '- For product-selection answers, keep the answer minimal: suitable product if explicitly supported, exact evidence, current shop fields if same product identity is clear. Do not add sections for Vorteile, Einsatzbereiche, Messprinzip, or Hinweise unless directly asked and explicitly sourced.' - '- For product-selection answers tied to a numeric value/range, do not include an indicator field unless the same retrieved record explicitly connects the selected product, numeric value/range, and indicator code.' accessory_rules: - - '- If the user asks for a matching accessory, separate the answer into: main device and matching accessory.' - - '- The main device must come first. The accessory must not replace the main device.' + - '- If the user directly asks for accessories, cables, electrodes, buffers, kits, sets, indicators, reagents, or consumables, answer the accessory request first instead of reframing it as a request for a measuring device.' + - '- For direct accessory shop searches, do not introduce Testomat, measuring-device, or main-device caveats unless the user asks for a device or the provided sources explicitly require a device context.' + - '- If the shop product name itself explicitly contains the requested accessory type and parameter, such as pH/Redox, treat it as a commercial accessory match and list the exact shop fields. Do not demand separate proof that the accessory itself measures the parameter.' + - '- If the user asks for a matching accessory for a named main device, separate the answer into: main device and matching accessory.' + - '- The main device must come first only when a main device is explicitly requested or named.' - '- Only name an accessory as matching if compatibility is explicitly grounded in the provided sources.' - '- Do not call accessories, indicators, reagents, kits, sets, or consumables a device, measuring device, or main product unless the source explicitly says so.' diff --git a/config/retriex/search_repair.yaml b/config/retriex/search_repair.yaml index bdf2fb8..3848b65 100644 --- a/config/retriex/search_repair.yaml +++ b/config/retriex/search_repair.yaml @@ -5,6 +5,56 @@ parameters: strict_requested_accessory_code_repair: true prefer_prompt_anchored_model_for_requested_accessory_code: true + direct_product_attribute_lookup: + enabled: true + min_query_tokens_after_cleanup: 2 + # Query repair must stay on the requested product/accessory type for + # direct attribute lookups. It may relax comparative constraints, but it + # must not expand to unrelated RAG model/device candidates. + stop_terms: + - zeige + - zeig + - suche + - such + - mir + - bitte + - für + - fuer + - nach + - mit + - ohne + - von + - zum + - zur + - der + - die + - das + - ein + - eine + - einen + - länger + - laenger + - lang + - kürzer + - kuerzer + - größer + - groesser + - kleiner + - über + - ueber + - unter + - mindestens + - maximal + - maximum + - minimum + - ab + - bis + - mehr + - weniger + - als + comparative_constraint_patterns: + - '/\b(?:länger|laenger|kürzer|kuerzer|größer|groesser|kleiner|über|ueber|unter|mindestens|maximal|maximum|minimum|ab|bis|mehr\s+als|weniger\s+als)\s+(?P\d+(?:[,.]\d+)?\s*[\p{L}µ°%]*)\b/iu' + requested_accessory_code_fallback_query_templates: - '{term} {code}' requested_accessory_code_fallback_terms: diff --git a/config/retriex/vocabulary.yaml b/config/retriex/vocabulary.yaml index a9ebb22..fa16980 100644 --- a/config/retriex/vocabulary.yaml +++ b/config/retriex/vocabulary.yaml @@ -362,6 +362,20 @@ parameters: - preis - infos - wissen + direct_product_type_terms: + add: + - anschlusskabel + - kabel + - sensorkabel + - elektrodenkabel + - elektrodenanschlusskabel + - messkabel + - verbindungskabel + - steckerkabel + - elektrode + - sensor + - puffer + - kalibrierpuffer accessory_candidate_terms: add: - indikator diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index 4558072..800a322 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -1651,12 +1651,122 @@ final readonly class AgentRunner } $guardedQuery = $this->guardStandaloneOptimizedShopQuery($prompt, $shopSearchQuery); + $query = $guardedQuery !== $shopSearchQuery + ? $this->preserveCurrentInputShopQueryTerms($prompt, $guardedQuery) + : $this->preserveCurrentInputShopQueryTerms($prompt, $shopSearchQuery); - if ($guardedQuery !== $shopSearchQuery) { - return $this->preserveCurrentInputShopQueryTerms($prompt, $guardedQuery); + return $this->cleanupDirectProductAttributeShopQuery($prompt, $query); + } + + private function cleanupDirectProductAttributeShopQuery(string $prompt, string $shopSearchQuery): string + { + $shopSearchQuery = trim($shopSearchQuery); + + if ( + $shopSearchQuery === '' + || !$this->agentRunnerConfig->isShopQueryProductAttributeCleanupEnabled() + ) { + return $shopSearchQuery; } - return $this->preserveCurrentInputShopQueryTerms($prompt, $shopSearchQuery); + $combined = trim($prompt . ' ' . $shopSearchQuery); + if (!$this->containsAnyShopQueryTerm($combined, $this->agentRunnerConfig->getShopQueryProductAttributeCleanupProductTypeTerms())) { + return $shopSearchQuery; + } + + $constraintTokens = $this->extractConfiguredShopQueryConstraintTokens( + $combined, + $this->agentRunnerConfig->getShopQueryProductAttributeCleanupComparativeConstraintPatterns() + ); + + if ($constraintTokens === []) { + return $shopSearchQuery; + } + + $removeTokens = array_fill_keys($constraintTokens, true); + foreach ($this->agentRunnerConfig->getShopQueryProductAttributeCleanupStopTerms() as $term) { + foreach ($this->tokenizeShopQueryCandidate($term) as $token) { + $removeTokens[$token] = true; + } + } + + $kept = []; + foreach ($this->tokenizeShopQueryCandidate($shopSearchQuery) as $token) { + if (isset($removeTokens[$token]) || isset($kept[$token])) { + continue; + } + + $kept[$token] = $token; + } + + if (count($kept) < max(1, $this->agentRunnerConfig->getShopQueryProductAttributeCleanupMinTokens())) { + return $shopSearchQuery; + } + + $cleaned = implode(' ', array_values($kept)); + + return $cleaned !== '' ? $cleaned : $shopSearchQuery; + } + + /** + * @param string[] $terms + */ + private function containsAnyShopQueryTerm(string $text, array $terms): bool + { + $tokens = array_fill_keys($this->tokenizeShopQueryCandidate($text), true); + + if ($tokens === []) { + return false; + } + + foreach ($terms as $term) { + $termTokens = $this->tokenizeShopQueryCandidate($term); + if ($termTokens === []) { + continue; + } + + $matches = true; + foreach ($termTokens as $termToken) { + if (!isset($tokens[$termToken])) { + $matches = false; + break; + } + } + + if ($matches) { + return true; + } + } + + return false; + } + + /** + * @param string[] $patterns + * @return string[] + */ + private function extractConfiguredShopQueryConstraintTokens(string $text, array $patterns): array + { + $tokens = []; + + foreach ($patterns as $pattern) { + if (@preg_match_all($pattern, $text, $matches, PREG_SET_ORDER) === false) { + continue; + } + + foreach ($matches as $match) { + $value = $match['value'] ?? ($match[1] ?? ''); + if (!is_scalar($value)) { + continue; + } + + foreach ($this->tokenizeShopQueryCandidate((string) $value) as $token) { + $tokens[$token] = $token; + } + } + } + + return array_values($tokens); } private function preserveCurrentInputShopQueryTerms(string $prompt, string $shopSearchQuery): string diff --git a/src/Agent/PromptBuilder.php b/src/Agent/PromptBuilder.php index be9929b..98606fa 100644 --- a/src/Agent/PromptBuilder.php +++ b/src/Agent/PromptBuilder.php @@ -222,6 +222,9 @@ final readonly class PromptBuilder $isDetailed = count($limitedShopResults) <= $this->config->getDetailedShopResultsMaxCount(); $requestedRole = $requestedProductRole ?? $this->resolveRequestedProductRole($prompt); $measurementGuard = $this->resolveRequestedMeasurementGuard($prompt); + if ($measurementGuard !== null && $this->shouldSkipMeasurementEvidenceForAccessoryLookup($prompt, $requestedRole)) { + $measurementGuard = null; + } $lines = []; foreach ($limitedShopResults as $i => $product) { @@ -779,13 +782,17 @@ final readonly class PromptBuilder return ''; } + $resolvedRequestedRole = $requestedRole ?? $this->resolveRequestedProductRole($prompt); + if ($this->shouldSkipMeasurementEvidenceForAccessoryLookup($prompt, $resolvedRequestedRole)) { + return ''; + } + $positiveTerms = $this->extractMeasurementGuardStringList($guard, 'positive_terms'); $positiveContextTerms = $this->extractMeasurementGuardStringList($guard, 'positive_context_terms'); $negativeContextTerms = $this->extractMeasurementGuardStringList($guard, 'negative_context_terms'); $nonEquivalentTerms = $this->extractMeasurementGuardStringList($guard, 'non_equivalent_terms'); $label = $this->normalizeBlockText((string) ($guard['label'] ?? $this->config->getMeasurementEvidenceRuleTemplate('default_requested_parameter_label'))); $strictNoEvidence = (bool) ($guard['strict_no_evidence'] ?? true); - $resolvedRequestedRole = $requestedRole ?? $this->resolveRequestedProductRole($prompt); $safeNoEvidenceAnswer = $this->normalizeBlockText((string) ( $resolvedRequestedRole === 'accessory_or_consumable' ? ($guard['safe_no_accessory_evidence_answer_de'] ?? $guard['safe_no_evidence_answer_de'] ?? '') @@ -869,6 +876,21 @@ final readonly class PromptBuilder } + private function shouldSkipMeasurementEvidenceForAccessoryLookup(string $prompt, string $requestedRole): bool + { + if ($requestedRole !== 'accessory_or_consumable') { + return false; + } + + $normalizedPrompt = $this->normalizeForMeasurementMatching($prompt); + + if ($this->containsAnyPromptKeyword($normalizedPrompt, $this->config->getMeasurementEvidenceAccessoryLookupGuardTerms())) { + return false; + } + + return $this->containsAnyPromptKeyword($normalizedPrompt, $this->config->getMeasurementEvidenceAccessoryLookupPassthroughTerms()); + } + /** * @param array $values */ diff --git a/src/Commerce/SearchRepairService.php b/src/Commerce/SearchRepairService.php index 16d52da..7314c56 100644 --- a/src/Commerce/SearchRepairService.php +++ b/src/Commerce/SearchRepairService.php @@ -208,6 +208,16 @@ final readonly class SearchRepairService ); } + if ( + $requestedAccessoryCodes === [] + && $this->isDirectProductAttributeLookup($prompt . ' ' . $primaryQuery) + ) { + return $this->normalizeRepairQueries( + $this->buildDirectProductAttributeRepairQueries($prompt, $primaryQuery), + $primaryQuery + ); + } + $topPrimaryName = $primaryShopResults[0]->name ?? ''; $topPrimaryProductNumber = $primaryShopResults[0]->productNumber ?? null; $topPrimaryPhrase = trim($topPrimaryName . ' ' . ($topPrimaryProductNumber ?? '')); @@ -259,6 +269,125 @@ final readonly class SearchRepairService return $this->normalizeRepairQueries($queries, $primaryQuery); } + /** + * @return string[] + */ + private function buildDirectProductAttributeRepairQueries(string $prompt, string $primaryQuery): array + { + $queries = []; + + foreach ([$primaryQuery, $prompt] as $source) { + $query = $this->cleanupDirectProductAttributeRepairQuery($source); + if ($query !== '') { + $queries[] = $query; + } + } + + return array_values(array_unique($queries)); + } + + private function cleanupDirectProductAttributeRepairQuery(string $source): string + { + $source = trim($source); + + if ($source === '') { + return ''; + } + + $constraintTokens = $this->extractDirectProductAttributeConstraintTokens($source); + if ($constraintTokens === []) { + return ''; + } + + $removeTokens = array_fill_keys($constraintTokens, true); + foreach ($this->config->getDirectProductAttributeLookupStopTerms() as $term) { + foreach ($this->tokenize($term) as $token) { + $removeTokens[$token] = true; + } + } + + $kept = []; + foreach ($this->tokenize($source) as $token) { + if (isset($removeTokens[$token]) || isset($kept[$token])) { + continue; + } + + $kept[$token] = $token; + } + + if (count($kept) < $this->config->getDirectProductAttributeLookupMinTokens()) { + return ''; + } + + $query = implode(' ', array_values($kept)); + + return $this->containsDirectProductTypeTerm($query) ? $query : ''; + } + + /** + * @return string[] + */ + private function extractDirectProductAttributeConstraintTokens(string $text): array + { + $tokens = []; + + foreach ($this->config->getDirectProductAttributeLookupComparativeConstraintPatterns() as $pattern) { + if (@preg_match_all($pattern, $text, $matches, PREG_SET_ORDER) === false) { + continue; + } + + foreach ($matches as $match) { + $value = $match['value'] ?? ($match[1] ?? ''); + if (!is_scalar($value)) { + continue; + } + + foreach ($this->tokenize((string) $value) as $token) { + $tokens[$token] = $token; + } + } + } + + return array_values($tokens); + } + + private function isDirectProductAttributeLookup(string $text): bool + { + return $this->config->isDirectProductAttributeLookupRepairEnabled() + && $this->containsDirectProductTypeTerm($text) + && $this->extractDirectProductAttributeConstraintTokens($text) !== []; + } + + private function containsDirectProductTypeTerm(string $text): bool + { + $tokens = array_fill_keys($this->tokenize($text), true); + + if ($tokens === []) { + return false; + } + + foreach ($this->config->getDirectProductAttributeLookupProductTypeTerms() as $term) { + $termTokens = $this->tokenize($term); + if ($termTokens === []) { + continue; + } + + $matches = true; + foreach ($termTokens as $termToken) { + if (!isset($tokens[$termToken])) { + $matches = false; + break; + } + } + + if ($matches) { + return true; + } + } + + return false; + } + /** * @param string[] $queries * @return string[] diff --git a/src/Config/AgentRunnerConfig.php b/src/Config/AgentRunnerConfig.php index b9a0f7a..680a0e2 100644 --- a/src/Config/AgentRunnerConfig.php +++ b/src/Config/AgentRunnerConfig.php @@ -918,6 +918,40 @@ final class AgentRunnerConfig return $this->getOptionalStringList('shop_prompt.current_input_preservation.terms'); } + public function isShopQueryProductAttributeCleanupEnabled(): bool + { + return $this->getRequiredBool('shop_prompt.product_attribute_query_cleanup.enabled'); + } + + public function getShopQueryProductAttributeCleanupMinTokens(): int + { + return $this->getRequiredInt('shop_prompt.product_attribute_query_cleanup.min_query_tokens_after_cleanup'); + } + + /** + * @return string[] + */ + public function getShopQueryProductAttributeCleanupProductTypeTerms(): array + { + return $this->getRequiredStringList('shop_prompt.product_attribute_query_cleanup.product_type_terms'); + } + + /** + * @return string[] + */ + public function getShopQueryProductAttributeCleanupStopTerms(): array + { + return $this->getRequiredStringList('shop_prompt.product_attribute_query_cleanup.stop_terms'); + } + + /** + * @return string[] + */ + public function getShopQueryProductAttributeCleanupComparativeConstraintPatterns(): array + { + return $this->getRequiredStringList('shop_prompt.product_attribute_query_cleanup.comparative_constraint_patterns'); + } + public function getShopPromptIntro(): string { return $this->getRequiredString('shop_prompt.intro'); diff --git a/src/Config/PromptBuilderConfig.php b/src/Config/PromptBuilderConfig.php index cd45b85..86a8d0a 100644 --- a/src/Config/PromptBuilderConfig.php +++ b/src/Config/PromptBuilderConfig.php @@ -670,6 +670,22 @@ final class PromptBuilderConfig return $this->getRequiredString('measurement_evidence_guard.generic_safe_no_accessory_evidence_answer_template_de'); } + /** + * @return string[] + */ + public function getMeasurementEvidenceAccessoryLookupGuardTerms(): array + { + return $this->getRequiredStringList('measurement_evidence_guard.accessory_lookup_guard_terms'); + } + + /** + * @return string[] + */ + public function getMeasurementEvidenceAccessoryLookupPassthroughTerms(): array + { + return $this->getRequiredStringList('measurement_evidence_guard.accessory_lookup_passthrough_terms'); + } + public function getMeasurementEvidenceRuleTemplate(string $key): string { return $this->getRequiredString('measurement_evidence_guard.rule_templates.' . $key); diff --git a/src/Config/RetriexEffectiveConfigProvider.php b/src/Config/RetriexEffectiveConfigProvider.php index 1277cdc..bb8cde9 100644 --- a/src/Config/RetriexEffectiveConfigProvider.php +++ b/src/Config/RetriexEffectiveConfigProvider.php @@ -845,6 +845,13 @@ final readonly class RetriexEffectiveConfigProvider 'min_primary_results_without_repair' => $this->searchRepairConfig->getMinPrimaryResultsWithoutRepair(), 'strict_requested_accessory_code_repair' => $this->searchRepairConfig->shouldRestrictRequestedAccessoryCodeRepair(), 'prefer_prompt_anchored_model_for_requested_accessory_code' => $this->searchRepairConfig->shouldPreferPromptAnchoredModelForRequestedAccessoryCode(), + 'direct_product_attribute_lookup' => [ + 'enabled' => $this->searchRepairConfig->isDirectProductAttributeLookupRepairEnabled(), + 'min_query_tokens_after_cleanup' => $this->searchRepairConfig->getDirectProductAttributeLookupMinTokens(), + 'product_type_terms' => $this->searchRepairConfig->getDirectProductAttributeLookupProductTypeTerms(), + 'stop_terms' => $this->searchRepairConfig->getDirectProductAttributeLookupStopTerms(), + 'comparative_constraint_patterns' => $this->searchRepairConfig->getDirectProductAttributeLookupComparativeConstraintPatterns(), + ], 'requested_accessory_code_fallback_query_templates' => $this->searchRepairConfig->getRequestedAccessoryCodeFallbackQueryTemplates(), 'requested_accessory_code_fallback_terms' => $this->searchRepairConfig->getRequestedAccessoryCodeFallbackTerms(), 'requested_accessory_code_context_prefix_terms' => $this->searchRepairConfig->getRequestedAccessoryCodeContextPrefixTerms(), diff --git a/src/Config/SearchRepairConfig.php b/src/Config/SearchRepairConfig.php index a245ed2..613b038 100644 --- a/src/Config/SearchRepairConfig.php +++ b/src/Config/SearchRepairConfig.php @@ -50,6 +50,37 @@ final class SearchRepairConfig return $this->requiredBool('prefer_prompt_anchored_model_for_requested_accessory_code'); } + public function isDirectProductAttributeLookupRepairEnabled(): bool + { + return $this->requiredBool('direct_product_attribute_lookup.enabled'); + } + + public function getDirectProductAttributeLookupMinTokens(): int + { + return $this->requiredPositiveInt('direct_product_attribute_lookup.min_query_tokens_after_cleanup'); + } + + /** @return string[] */ + public function getDirectProductAttributeLookupProductTypeTerms(): array + { + return $this->configOrVocabularyStringList( + 'direct_product_attribute_lookup.product_type_terms', + 'search_repair.direct_product_type_terms' + ); + } + + /** @return string[] */ + public function getDirectProductAttributeLookupStopTerms(): array + { + return $this->requiredStringList('direct_product_attribute_lookup.stop_terms'); + } + + /** @return string[] */ + public function getDirectProductAttributeLookupComparativeConstraintPatterns(): array + { + return $this->requiredStringList('direct_product_attribute_lookup.comparative_constraint_patterns'); + } + /** @return string[] */ public function getRequestedAccessoryCodeFallbackQueryTemplates(): array {