add comments

This commit is contained in:
team 1
2026-04-16 20:26:25 +02:00
parent e5f035a961
commit eeebdfa21a
7 changed files with 246 additions and 81 deletions

View File

@@ -4,11 +4,25 @@ declare(strict_types=1);
namespace App\Knowledge\Retrieval;
final class QueryEnricher
use App\Config\QueryEnricherConfig;
final readonly class QueryEnricher
{
public function __construct(
private QueryEnricherConfig $config
)
{
}
/**
* Enriches the query with mapped counterpart terms.
*
* Example:
* - input: "water hardness device"
* - output: "water hardness device | Synonyms: residual hardness, model"
*/
public function enrichPrompt(string $query): string
{
// Return early if the input is empty or contains only whitespace.
if (trim($query) === '') {
return '';
}
@@ -19,19 +33,19 @@ final class QueryEnricher
// Normalize the query for case-insensitive matching.
$normalizedQuery = $this->normalize($query);
// Expect an associative array like:
// Expected format:
// [
// 'hose' => 'jeans',
// 'jacke' => 'mantel',
// 'trousers' => 'jeans',
// 'jacket' => 'coat',
// ]
$mapping = $this->enrichQueryList();
$mapping = $this->config->getEnrichQueryList();
// Build a bidirectional lookup table:
// key -> value
// value -> key
$lookup = $this->buildBidirectionalLookup($mapping);
// Split the query into searchable words/tokens.
// Split the query into searchable tokens.
$tokens = $this->tokenize($normalizedQuery);
$matches = [];
@@ -46,17 +60,17 @@ final class QueryEnricher
// Remove duplicates while preserving order.
$matches = array_values(array_unique($matches));
// If nothing was found, return the original query unchanged.
// If no matches were found, return the original query unchanged.
if ($matches === []) {
return $originalQuery;
}
// Append the matched counterpart terms to the original prompt.
return $originalQuery . " | Pseudonyme: " . implode(', ', $matches);
// Append the matched counterpart terms to the original query.
return $originalQuery . ' | Synonyms: ' . implode(', ', $matches);
}
/**
* Normalize a string for case-insensitive comparison.
* Normalizes a string for case-insensitive comparison.
*/
private function normalize(string $value): string
{
@@ -64,8 +78,9 @@ final class QueryEnricher
}
/**
* Tokenize the query into words.
* Splits on everything that is not a letter or number.
* Tokenizes the query into words.
*
* Splits on every character that is not a letter or number.
*/
private function tokenize(string $value): array
{
@@ -73,20 +88,20 @@ final class QueryEnricher
}
/**
* Build a lookup table that works in both directions.
* Builds a lookup table that works in both directions.
*
* Example:
* [
* 'hose' => 'jeans',
* 'jacke' => 'mantel',
* 'trousers' => 'jeans',
* 'jacket' => 'coat',
* ]
*
* becomes:
* [
* 'hose' => 'jeans',
* 'jeans' => 'hose',
* 'jacke' => 'mantel',
* 'mantel' => 'jacke',
* 'trousers' => 'jeans',
* 'jeans' => 'trousers',
* 'jacket' => 'coat',
* 'coat' => 'jacket',
* ]
*/
private function buildBidirectionalLookup(array $mapping): array
@@ -94,8 +109,8 @@ final class QueryEnricher
$lookup = [];
foreach ($mapping as $key => $value) {
$key = trim((string)$key);
$value = trim((string)$value);
$key = trim((string) $key);
$value = trim((string) $value);
// Skip incomplete pairs.
if ($key === '' || $value === '') {
@@ -114,18 +129,4 @@ final class QueryEnricher
return $lookup;
}
public function enrichQueryList(): array
{
return [
'Wasserhärte' => "Resthärte",
'Gerät' => 'Modell',
'Indikator' => 'Chemie',
'Seminar' => 'Webinar',
'Schulung' => 'Seminar',
'Indikatoren' => 'Indikator',
'Wasserhärte-Grenzwert'=>'Resthärte',
'Resthärte-Grenzwert'=>'Wasserhärte'
];
}
}