lexical logic

This commit is contained in:
team2
2026-04-20 21:46:42 +02:00
parent 2587ac8b4b
commit 065f59c090
9 changed files with 2576 additions and 326 deletions

View File

@@ -1,22 +1,180 @@
<?php
declare(strict_types=1);
namespace App\Config;
class QueryEnricherConfig
final readonly class QueryEnricherConfig
{
/**
* Keep the enrichment vocabulary in the class for now.
*
* Important:
* - This is intentionally NOT externalized yet.
* - Add or maintain the current project-specific mappings here.
* - The later move to external config/files can happen separately.
*
* Supported shapes:
*
* 1) Simple mapping:
* [
* 'water hardness' => 'residual hardness',
* 'device' => 'instrument',
* ]
*
* 2) Small synonym groups:
* [
* ['water hardness', 'residual hardness', 'hardness'],
* ['device', 'instrument', 'meter'],
* ]
*
* The public API stays intentionally simple:
* - getEnrichQueryList(): array<string,string>
*
* This keeps QueryEnricher generic while the domain vocabulary
* deliberately remains inside this class for now.
*
* Replace the example entries below with your real project mappings.
*
* @var array<int|string, mixed>
*/
private const ENRICH_QUERY_LIST = [
// -----------------------------------------------------------------
// Example mappings.
// Replace / extend these with your current real project mappings.
// -----------------------------------------------------------------
'water hardness' => 'residual hardness',
'device' => 'instrument',
'gerät'=>'produkt',
'rebuild'=>'reindex',
['measuring device', 'meter', 'instrument'],
];
/**
* Returns a normalized, deduplicated mapping for the QueryEnricher.
*
* Output format:
* [
* 'term a' => 'term b',
* 'term c' => 'term d',
* ]
*
* Rules:
* - ignore empty / invalid values
* - trim and normalize whitespace
* - ignore self-mappings
* - preserve first valid rule if duplicates normalize to the same key
*
* @return array<string, string>
*/
public function getEnrichQueryList(): array
{
return [
'Wasserhärte' => 'Resthärte',
'Gerät' => 'Modell',
'Indikator' => 'Chemie',
'Seminar' => 'Webinar',
'Schulung' => 'Seminar',
'Indikatoren' => 'Indikator',
'Wasserhärte-Grenzwert' => 'Resthärte',
'Resthärte-Grenzwert' => 'Wasserhärte',
'Grenzwert' => 'Überwachungsbereich',
'store'=>'shop'
];
$normalized = [];
foreach (self::ENRICH_QUERY_LIST as $key => $value) {
if (is_array($value)) {
$this->ingestGroup($normalized, $value);
continue;
}
$left = $this->normalizePhrase(is_string($key) ? $key : '');
$right = $this->normalizePhrase(is_string($value) ? $value : '');
if (!$this->isValidPair($left, $right)) {
continue;
}
if (!isset($normalized[$left])) {
$normalized[$left] = $right;
}
}
return $normalized;
}
/**
* Returns true when at least one valid enrichment rule exists.
*/
public function hasRules(): bool
{
return $this->getEnrichQueryList() !== [];
}
/**
* @param array<string, string> $normalized
* @param array<int|string, mixed> $group
*/
private function ingestGroup(array &$normalized, array $group): void
{
$items = [];
foreach ($group as $item) {
if (!is_string($item)) {
continue;
}
$item = $this->normalizePhrase($item);
if ($item === '') {
continue;
}
$items[$item] = $item;
}
$items = array_values($items);
if (count($items) < 2) {
return;
}
/**
* Turn a synonym group into a conservative chain:
* ['a', 'b', 'c'] => a=>b, b=>c
*
* QueryEnricher builds a bidirectional lookup later,
* so the config output stays intentionally small.
*/
for ($i = 0, $max = count($items) - 1; $i < $max; $i++) {
$left = $items[$i];
$right = $items[$i + 1];
if (!$this->isValidPair($left, $right)) {
continue;
}
if (!isset($normalized[$left])) {
$normalized[$left] = $right;
}
}
}
private function isValidPair(string $left, string $right): bool
{
if ($left === '' || $right === '') {
return false;
}
if ($left === $right) {
return false;
}
return true;
}
private function normalizePhrase(string $value): string
{
$value = trim($value);
if ($value === '') {
return '';
}
$value = mb_strtolower($value, 'UTF-8');
$value = preg_replace('/\s+/u', ' ', $value) ?? $value;
return trim($value);
}
}