patch 15
This commit is contained in:
@@ -474,25 +474,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return false;
|
||||
}
|
||||
|
||||
$patterns = [
|
||||
'/\balle\b/u',
|
||||
'/\bliste\b/u',
|
||||
'/\bauflistung\b/u',
|
||||
'/\buebersicht\b/u',
|
||||
'/\bübersicht\b/u',
|
||||
'/\bsortiment\b/u',
|
||||
'/\bwelche\b.*\b(gibt|verfügbar|verfuegbar|existieren)\b/u',
|
||||
'/\bzeige\b.*\b(produkte|geraete|geräte|modelle|artikel)\b/u',
|
||||
'/\bwas\b.*\b(gibt es|verfügbar|verfuegbar)\b/u',
|
||||
];
|
||||
|
||||
foreach ($patterns as $pattern) {
|
||||
if (preg_match($pattern, $normalized) === 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return $this->matchesAnyPattern($normalized, $this->retrieverConfig->catalogListShortcutPatterns());
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -872,21 +854,14 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
}
|
||||
}
|
||||
|
||||
if (str_starts_with($token, 'indikator')) {
|
||||
$variants[] = 'indikator';
|
||||
$variants[] = 'indikatortyp';
|
||||
}
|
||||
foreach ($this->retrieverConfig->exactSelectionTokenVariantPrefixes() as $prefix => $configuredVariants) {
|
||||
if (!str_starts_with($token, $prefix)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (str_starts_with($token, 'grenzwert')) {
|
||||
$variants[] = 'grenzwert';
|
||||
}
|
||||
|
||||
if (str_starts_with($token, 'messbereich')) {
|
||||
$variants[] = 'messbereich';
|
||||
}
|
||||
|
||||
if (str_starts_with($token, 'testomat')) {
|
||||
$variants[] = 'testomat';
|
||||
foreach ($configuredVariants as $variant) {
|
||||
$variants[] = $variant;
|
||||
}
|
||||
}
|
||||
|
||||
return array_values(array_unique($variants));
|
||||
@@ -903,12 +878,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
true
|
||||
);
|
||||
|
||||
$asksIndicator = isset($tokens['indikator'])
|
||||
|| isset($tokens['indikatortyp'])
|
||||
|| isset($tokens['reagenz'])
|
||||
|| isset($tokens['reagens'])
|
||||
|| str_contains($normalized, 'mit welchem')
|
||||
|| str_contains($normalized, 'womit');
|
||||
$asksIndicator = $this->containsAnyConfiguredToken(
|
||||
$tokens,
|
||||
$this->retrieverConfig->exactSelectionIndicatorQuestionTokens()
|
||||
) || $this->containsAnyConfiguredPhrase(
|
||||
$normalized,
|
||||
$this->retrieverConfig->exactSelectionIndicatorQuestionPhrases()
|
||||
);
|
||||
|
||||
return [
|
||||
'asks_indicator' => $asksIndicator,
|
||||
@@ -931,24 +907,26 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return $score;
|
||||
}
|
||||
|
||||
if (preg_match('/verf(?:ü|ue)gbare\s+indikatortypen|indikatortypen|indikatorvarianten/iu', $rawText) === 1) {
|
||||
if ($this->matchesAnyPattern($rawText, $this->retrieverConfig->exactSelectionIndicatorTableHeadingPatterns())) {
|
||||
$score += 14.0;
|
||||
}
|
||||
|
||||
if (preg_match('/\|\s*(?:typ|indikator)\s*\|\s*(?:grenzwert|messbereich|bereich)/iu', $rawText) === 1) {
|
||||
if ($this->matchesAnyPattern($rawText, $this->retrieverConfig->exactSelectionIndicatorTableHeaderPatterns())) {
|
||||
$score += 10.0;
|
||||
}
|
||||
|
||||
if (preg_match('/\|\s*[A-Z]{0,4}\s*\d{2,4}\s*[A-Z]?\s*\|\s*\d/iu', $rawText) === 1) {
|
||||
if ($this->matchesAnyPattern($rawText, $this->retrieverConfig->exactSelectionIndicatorTableRowPatterns())) {
|
||||
$score += 8.0;
|
||||
}
|
||||
|
||||
if (
|
||||
str_contains($normalizedHaystack, 'indikator')
|
||||
&& (
|
||||
str_contains($normalizedHaystack, 'grenzwert')
|
||||
|| str_contains($normalizedHaystack, 'messbereich')
|
||||
|| str_contains($normalizedHaystack, 'bereich')
|
||||
$this->containsAnyConfiguredPhrase(
|
||||
$normalizedHaystack,
|
||||
$this->retrieverConfig->exactSelectionIndicatorTableRequiredPrimaryTerms()
|
||||
)
|
||||
&& $this->containsAnyConfiguredPhrase(
|
||||
$normalizedHaystack,
|
||||
$this->retrieverConfig->exactSelectionIndicatorTableRequiredContextTerms()
|
||||
)
|
||||
) {
|
||||
$score += 5.0;
|
||||
@@ -959,27 +937,55 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
|
||||
private function isExactDetailToken(string $token): bool
|
||||
{
|
||||
return in_array($token, [
|
||||
'indikator', 'indikatoren', 'indikatortyp', 'indikatortypen', 'reagenz', 'reagens', 'grenzwert',
|
||||
'messbereich', 'bereich', 'wasserhaerte', 'wasserhärte',
|
||||
'resthaerte', 'resthärte', 'haerte', 'härte', 'aufloesung',
|
||||
'auflösung', 'schnittstelle', 'relais', 'fehlercode', 'code',
|
||||
'wert', 'werte',
|
||||
], true);
|
||||
return in_array($token, $this->retrieverConfig->exactDetailTokens(), true);
|
||||
}
|
||||
|
||||
private function isGenericExactSelectionToken(string $token): bool
|
||||
{
|
||||
return in_array($token, [
|
||||
'vorherige', 'vorheriger', 'nutzerfrage', 'aktuelle',
|
||||
'folgefrage', 'frage', 'antwort', 'technische', 'referenzanker',
|
||||
'referenzaufloesung', 'referenzauflösung', 'faktenquelle', 'keine',
|
||||
'welche', 'welcher', 'welches',
|
||||
'welchem', 'welchen', 'wird', 'werden', 'wurde', 'kann', 'koennen',
|
||||
'können', 'mit', 'der', 'die', 'das', 'den', 'dem', 'ein', 'eine',
|
||||
'einer', 'eines', 'ist', 'sind', 'was', 'wie', 'wo', 'zum', 'zur',
|
||||
'fuer', 'für', 'durch', 'von', 'vom', 'und', 'oder', 'auch',
|
||||
], true);
|
||||
return in_array($token, $this->retrieverConfig->genericExactSelectionTokens(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $patterns
|
||||
*/
|
||||
private function matchesAnyPattern(string $value, array $patterns): bool
|
||||
{
|
||||
foreach ($patterns as $pattern) {
|
||||
if (preg_match($pattern, $value) === 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, bool> $tokens
|
||||
* @param string[] $needles
|
||||
*/
|
||||
private function containsAnyConfiguredToken(array $tokens, array $needles): bool
|
||||
{
|
||||
foreach ($needles as $needle) {
|
||||
if (isset($tokens[$needle])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $phrases
|
||||
*/
|
||||
private function containsAnyConfiguredPhrase(string $haystack, array $phrases): bool
|
||||
{
|
||||
foreach ($phrases as $phrase) {
|
||||
if ($phrase !== '' && str_contains($haystack, $phrase)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user