This commit is contained in:
team 1
2026-05-04 15:37:33 +02:00
parent c4bf41cd47
commit 16e3d0a0ae
8 changed files with 173 additions and 131 deletions

View File

@@ -196,6 +196,11 @@ final class NdjsonHybridRetrieverConfig
return $this->requiredStringList('exact_detail_tokens');
}
public function genericExactSelectionCleanupProfile(): string
{
return $this->requiredString('generic_exact_selection_cleanup_profile');
}
/** @return string[] */
public function genericExactSelectionTokens(): array
{
@@ -316,6 +321,7 @@ final class NdjsonHybridRetrieverConfig
'exact_selection_indicator_table_required_primary_terms' => $this->exactSelectionIndicatorTableRequiredPrimaryTerms(),
'exact_selection_indicator_table_required_context_terms' => $this->exactSelectionIndicatorTableRequiredContextTerms(),
'exact_detail_tokens' => $this->exactDetailTokens(),
'generic_exact_selection_cleanup_profile' => $this->genericExactSelectionCleanupProfile(),
'generic_exact_selection_tokens' => $this->genericExactSelectionTokens(),
'generic_product_tokens' => $this->genericProductTokens(),
'important_short_model_tokens' => $this->importantShortModelTokens(),
@@ -369,6 +375,22 @@ final class NdjsonHybridRetrieverConfig
return $value;
}
private function requiredString(string $key): string
{
$value = $this->requiredValue($key);
if (!is_scalar($value)) {
throw $this->invalid($key, 'must be a non-empty string');
}
$value = trim((string) $value);
if ($value === '') {
throw $this->invalid($key, 'must be a non-empty string');
}
return $value;
}
/** @return string[] */
private function requiredStringList(string $key): array
{

View File

@@ -1059,6 +1059,13 @@ final readonly class RetriexEffectiveConfigProvider
$errors[] = 'retrieval.hard_max_chunks must be greater than 0.';
}
$cleanupProfile = $retrieval['generic_exact_selection_cleanup_profile'] ?? null;
if (!is_string($cleanupProfile) || trim($cleanupProfile) === '') {
$errors[] = 'retrieval.generic_exact_selection_cleanup_profile must be a non-empty string.';
} elseif (!in_array(trim($cleanupProfile), $this->languageCleanupConfig->getCleanupProfileNames(), true)) {
$errors[] = 'retrieval.generic_exact_selection_cleanup_profile references unknown language cleanup profile: ' . trim($cleanupProfile) . '.';
}
$this->validateStringListMap($retrieval['vocabulary'] ?? [], 'retrieval.vocabulary', $errors, $warnings);
$inventory = $retrieval['inventory_parameter'] ?? [];

View File

@@ -5,6 +5,7 @@ declare(strict_types=1);
namespace App\Knowledge\Retrieval;
use App\Catalog\EntityCatalogService;
use App\Config\LanguageCleanupConfig;
use App\Config\NdjsonHybridRetrieverConfig;
use App\Entity\ModelGenerationConfig;
use App\Intent\CatalogIntentLite;
@@ -46,6 +47,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
private EntityCatalogService $entityCatalogService,
private QueryEnricher $queryEnricher,
private NdjsonHybridRetrieverConfig $retrieverConfig,
private LanguageCleanupConfig $languageCleanupConfig,
)
{
}
@@ -942,7 +944,19 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
private function isGenericExactSelectionToken(string $token): bool
{
return in_array($token, $this->retrieverConfig->genericExactSelectionTokens(), true);
return in_array($token, $this->genericExactSelectionCleanupTokens(), true);
}
/** @return string[] */
private function genericExactSelectionCleanupTokens(): array
{
$profileName = $this->retrieverConfig->genericExactSelectionCleanupProfile();
return array_values(array_unique(array_merge(
$this->languageCleanupConfig->getStopWordsForProfile($profileName),
$this->languageCleanupConfig->getMetaTermsForProfile($profileName),
$this->retrieverConfig->genericExactSelectionTokens()
)));
}
/**