diff --git a/RETRIEX_P29_SHOP_RESULT_DEDUPE_HARDENING_README.md b/RETRIEX_P29_SHOP_RESULT_DEDUPE_HARDENING_README.md new file mode 100644 index 0000000..c482f1b --- /dev/null +++ b/RETRIEX_P29_SHOP_RESULT_DEDUPE_HARDENING_README.md @@ -0,0 +1,18 @@ +# RetrieX p29 - Shop Result Dedupe Hardening + +Purpose: +- Prevent repeated shop product records from being passed into prompts or rendered in fallback output. +- Treat product number as the strongest deduplication key, so SKU variants with slightly different display names do not appear repeatedly. +- Keep the existing shop ranking and technical suitability logic unchanged. + +Changed files: +- `src/Commerce/ShopSearchService.php` +- `src/Agent/PromptBuilder.php` + +Validation: +```bash +bin/console mto:agent:config:validate +bin/console mto:agent:regression:test +bin/console mto:agent:config:audit-source --details +bin/console mto:agent:config:audit-patterns --details +``` diff --git a/src/Agent/PromptBuilder.php b/src/Agent/PromptBuilder.php index fceb9d1..9d964c8 100644 --- a/src/Agent/PromptBuilder.php +++ b/src/Agent/PromptBuilder.php @@ -202,10 +202,10 @@ final readonly class PromptBuilder ]); } - $normalizedShopResults = array_values(array_filter( + $normalizedShopResults = $this->deduplicateShopResultsForPrompt(array_values(array_filter( $shopResults, static fn(mixed $product): bool => $product instanceof ShopProductResult - )); + ))); if ($normalizedShopResults === []) { return $this->implodeBlocks($parts); @@ -1174,6 +1174,81 @@ final readonly class PromptBuilder return false; } + /** + * @param ShopProductResult[] $shopResults + * @return ShopProductResult[] + */ + private function deduplicateShopResultsForPrompt(array $shopResults): array + { + $unique = []; + $seen = []; + + foreach ($shopResults as $product) { + $keys = $this->buildShopResultPromptDeduplicationKeys($product); + + if ($keys === []) { + $unique[] = $product; + continue; + } + + $isDuplicate = false; + foreach ($keys as $key) { + if (isset($seen[$key])) { + $isDuplicate = true; + break; + } + } + + if ($isDuplicate) { + continue; + } + + foreach ($keys as $key) { + $seen[$key] = true; + } + + $unique[] = $product; + } + + return $unique; + } + + /** + * @return string[] + */ + private function buildShopResultPromptDeduplicationKeys(ShopProductResult $product): array + { + $productNumber = $this->normalizeShopResultDeduplicationValue((string) ($product->productNumber ?? '')); + if ($productNumber !== '') { + return ['number|' . $productNumber]; + } + + $id = $this->normalizeShopResultDeduplicationValue($product->id); + if ($id !== '') { + return ['id|' . $id]; + } + + $url = $this->normalizeShopResultDeduplicationValue((string) ($product->url ?? '')); + if ($url !== '') { + return ['url|' . $url]; + } + + $name = $this->normalizeShopResultDeduplicationValue($product->name); + if ($name !== '') { + return ['name|' . $name]; + } + + return []; + } + + private function normalizeShopResultDeduplicationValue(string $value): string + { + $value = mb_strtolower(trim($value), 'UTF-8'); + $value = preg_replace('/\s+/u', ' ', $value) ?? $value; + + return trim($value); + } + private function normalizeNullableBlockText(?string $value): ?string { if ($value === null) { @@ -1233,4 +1308,4 @@ final readonly class PromptBuilder { return max($min, min($max, $value)); } -} \ No newline at end of file +} diff --git a/src/Commerce/ShopSearchService.php b/src/Commerce/ShopSearchService.php index b5baa08..43b1016 100644 --- a/src/Commerce/ShopSearchService.php +++ b/src/Commerce/ShopSearchService.php @@ -1665,21 +1665,70 @@ final class ShopSearchService $seen = []; foreach ($products as $product) { - $key = mb_strtolower(trim(implode($this->shopConfig->getDeduplicationSeparator(), [ - $product->id, - $product->productNumber ?? '', - $product->name, - $product->url ?? '', - ])), 'UTF-8'); + $keys = $this->buildProductDeduplicationKeys($product); - if (isset($seen[$key])) { + if ($keys === []) { + $unique[] = $product; continue; } - $seen[$key] = true; + $isDuplicate = false; + foreach ($keys as $key) { + if (isset($seen[$key])) { + $isDuplicate = true; + break; + } + } + + if ($isDuplicate) { + continue; + } + + foreach ($keys as $key) { + $seen[$key] = true; + } + $unique[] = $product; } return $unique; } -} \ No newline at end of file + + /** + * @return string[] + */ + private function buildProductDeduplicationKeys(ShopProductResult $product): array + { + $separator = $this->shopConfig->getDeduplicationSeparator(); + + $productNumber = $this->normalizeDeduplicationValue($product->productNumber ?? ''); + if ($productNumber !== '') { + return ['number' . $separator . $productNumber]; + } + + $id = $this->normalizeDeduplicationValue($product->id); + if ($id !== '') { + return ['id' . $separator . $id]; + } + + $url = $this->normalizeDeduplicationValue((string) ($product->url ?? '')); + if ($url !== '') { + return ['url' . $separator . $url]; + } + + $name = $this->normalizeDeduplicationValue($product->name); + if ($name !== '') { + return ['name' . $separator . $name]; + } + + return []; + } + + private function normalizeDeduplicationValue(string $value): string + { + $value = mb_strtolower(trim($value), 'UTF-8'); + $value = preg_replace($this->shopConfig->getWhitespaceCollapsePattern(), ' ', $value) ?? $value; + + return trim($value); + } +}