p29
This commit is contained in:
18
RETRIEX_P29_SHOP_RESULT_DEDUPE_HARDENING_README.md
Normal file
18
RETRIEX_P29_SHOP_RESULT_DEDUPE_HARDENING_README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# RetrieX p29 - Shop Result Dedupe Hardening
|
||||
|
||||
Purpose:
|
||||
- Prevent repeated shop product records from being passed into prompts or rendered in fallback output.
|
||||
- Treat product number as the strongest deduplication key, so SKU variants with slightly different display names do not appear repeatedly.
|
||||
- Keep the existing shop ranking and technical suitability logic unchanged.
|
||||
|
||||
Changed files:
|
||||
- `src/Commerce/ShopSearchService.php`
|
||||
- `src/Agent/PromptBuilder.php`
|
||||
|
||||
Validation:
|
||||
```bash
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
@@ -202,10 +202,10 @@ final readonly class PromptBuilder
|
||||
]);
|
||||
}
|
||||
|
||||
$normalizedShopResults = array_values(array_filter(
|
||||
$normalizedShopResults = $this->deduplicateShopResultsForPrompt(array_values(array_filter(
|
||||
$shopResults,
|
||||
static fn(mixed $product): bool => $product instanceof ShopProductResult
|
||||
));
|
||||
)));
|
||||
|
||||
if ($normalizedShopResults === []) {
|
||||
return $this->implodeBlocks($parts);
|
||||
@@ -1174,6 +1174,81 @@ final readonly class PromptBuilder
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ShopProductResult[] $shopResults
|
||||
* @return ShopProductResult[]
|
||||
*/
|
||||
private function deduplicateShopResultsForPrompt(array $shopResults): array
|
||||
{
|
||||
$unique = [];
|
||||
$seen = [];
|
||||
|
||||
foreach ($shopResults as $product) {
|
||||
$keys = $this->buildShopResultPromptDeduplicationKeys($product);
|
||||
|
||||
if ($keys === []) {
|
||||
$unique[] = $product;
|
||||
continue;
|
||||
}
|
||||
|
||||
$isDuplicate = false;
|
||||
foreach ($keys as $key) {
|
||||
if (isset($seen[$key])) {
|
||||
$isDuplicate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($isDuplicate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($keys as $key) {
|
||||
$seen[$key] = true;
|
||||
}
|
||||
|
||||
$unique[] = $product;
|
||||
}
|
||||
|
||||
return $unique;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function buildShopResultPromptDeduplicationKeys(ShopProductResult $product): array
|
||||
{
|
||||
$productNumber = $this->normalizeShopResultDeduplicationValue((string) ($product->productNumber ?? ''));
|
||||
if ($productNumber !== '') {
|
||||
return ['number|' . $productNumber];
|
||||
}
|
||||
|
||||
$id = $this->normalizeShopResultDeduplicationValue($product->id);
|
||||
if ($id !== '') {
|
||||
return ['id|' . $id];
|
||||
}
|
||||
|
||||
$url = $this->normalizeShopResultDeduplicationValue((string) ($product->url ?? ''));
|
||||
if ($url !== '') {
|
||||
return ['url|' . $url];
|
||||
}
|
||||
|
||||
$name = $this->normalizeShopResultDeduplicationValue($product->name);
|
||||
if ($name !== '') {
|
||||
return ['name|' . $name];
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private function normalizeShopResultDeduplicationValue(string $value): string
|
||||
{
|
||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
|
||||
|
||||
return trim($value);
|
||||
}
|
||||
|
||||
private function normalizeNullableBlockText(?string $value): ?string
|
||||
{
|
||||
if ($value === null) {
|
||||
@@ -1233,4 +1308,4 @@ final readonly class PromptBuilder
|
||||
{
|
||||
return max($min, min($max, $value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1665,21 +1665,70 @@ final class ShopSearchService
|
||||
$seen = [];
|
||||
|
||||
foreach ($products as $product) {
|
||||
$key = mb_strtolower(trim(implode($this->shopConfig->getDeduplicationSeparator(), [
|
||||
$product->id,
|
||||
$product->productNumber ?? '',
|
||||
$product->name,
|
||||
$product->url ?? '',
|
||||
])), 'UTF-8');
|
||||
$keys = $this->buildProductDeduplicationKeys($product);
|
||||
|
||||
if (isset($seen[$key])) {
|
||||
if ($keys === []) {
|
||||
$unique[] = $product;
|
||||
continue;
|
||||
}
|
||||
|
||||
$seen[$key] = true;
|
||||
$isDuplicate = false;
|
||||
foreach ($keys as $key) {
|
||||
if (isset($seen[$key])) {
|
||||
$isDuplicate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($isDuplicate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($keys as $key) {
|
||||
$seen[$key] = true;
|
||||
}
|
||||
|
||||
$unique[] = $product;
|
||||
}
|
||||
|
||||
return $unique;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
private function buildProductDeduplicationKeys(ShopProductResult $product): array
|
||||
{
|
||||
$separator = $this->shopConfig->getDeduplicationSeparator();
|
||||
|
||||
$productNumber = $this->normalizeDeduplicationValue($product->productNumber ?? '');
|
||||
if ($productNumber !== '') {
|
||||
return ['number' . $separator . $productNumber];
|
||||
}
|
||||
|
||||
$id = $this->normalizeDeduplicationValue($product->id);
|
||||
if ($id !== '') {
|
||||
return ['id' . $separator . $id];
|
||||
}
|
||||
|
||||
$url = $this->normalizeDeduplicationValue((string) ($product->url ?? ''));
|
||||
if ($url !== '') {
|
||||
return ['url' . $separator . $url];
|
||||
}
|
||||
|
||||
$name = $this->normalizeDeduplicationValue($product->name);
|
||||
if ($name !== '') {
|
||||
return ['name' . $separator . $name];
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private function normalizeDeduplicationValue(string $value): string
|
||||
{
|
||||
$value = mb_strtolower(trim($value), 'UTF-8');
|
||||
$value = preg_replace($this->shopConfig->getWhitespaceCollapsePattern(), ' ', $value) ?? $value;
|
||||
|
||||
return trim($value);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user