88 lines
2.1 KiB
PHP
88 lines
2.1 KiB
PHP
<?php
|
||
|
||
declare(strict_types=1);
|
||
|
||
namespace App\Knowledge;
|
||
|
||
/**
|
||
* KeywordSimilarity
|
||
*
|
||
* Deterministic and fault-tolerant comparison of two keywords.
|
||
* Returns a similarity score between 0.0 and 1.0.
|
||
*
|
||
* Design goals:
|
||
* - index.json remains unchanged
|
||
* - comparison logic is intelligent (typos, phonetics)
|
||
* - no alias or synonym lists
|
||
* - no LLM dependency
|
||
*/
|
||
final class KeywordSimilarity
|
||
{
|
||
/**
|
||
* Compare a query token with an index keyword.
|
||
*
|
||
* @param string $queryToken Token from user input
|
||
* @param string $indexKeyword Keyword from index.json
|
||
*
|
||
* @return float Similarity score (0.0 – 1.0)
|
||
*/
|
||
public static function compare(string $queryToken, string $indexKeyword): float
|
||
{
|
||
$a = self::normalize($queryToken);
|
||
$b = self::normalize($indexKeyword);
|
||
|
||
// Guard: ignore empty or very short tokens
|
||
if ($a === '' || $b === '' || mb_strlen($a) < 3 || mb_strlen($b) < 3) {
|
||
return 0.0;
|
||
}
|
||
|
||
// 1. Exact match
|
||
if ($a === $b) {
|
||
return 1.0;
|
||
}
|
||
|
||
// 2. Phonetic comparison (metaphone)
|
||
// Useful for: showpare → shopware, shopvare → shopware
|
||
if (metaphone($a) === metaphone($b)) {
|
||
return 0.85;
|
||
}
|
||
|
||
// 3. Edit distance comparison (only for longer words)
|
||
if (mb_strlen($a) >= 6 && mb_strlen($b) >= 6) {
|
||
$distance = levenshtein($a, $b);
|
||
|
||
if ($distance === 1) {
|
||
return 0.9;
|
||
}
|
||
|
||
if ($distance === 2) {
|
||
return 0.8;
|
||
}
|
||
}
|
||
|
||
// No relevant match
|
||
return 0.0;
|
||
}
|
||
|
||
/**
|
||
* Normalize a keyword to ensure stable comparison.
|
||
*/
|
||
private static function normalize(string $value): string
|
||
{
|
||
$value = mb_strtolower(trim($value));
|
||
|
||
// Remove non-alphanumeric characters
|
||
$value = preg_replace('/[^\p{L}\p{N}]/u', '', $value) ?? '';
|
||
|
||
// Normalize German umlauts
|
||
$map = [
|
||
'ä' => 'ae',
|
||
'ö' => 'oe',
|
||
'ü' => 'ue',
|
||
'ß' => 'ss',
|
||
];
|
||
|
||
return strtr($value, $map);
|
||
}
|
||
}
|