p33b
This commit is contained in:
@@ -196,6 +196,11 @@ final class NdjsonHybridRetrieverConfig
|
||||
return $this->requiredStringList('exact_detail_tokens');
|
||||
}
|
||||
|
||||
public function genericExactSelectionCleanupProfile(): string
|
||||
{
|
||||
return $this->requiredString('generic_exact_selection_cleanup_profile');
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
public function genericExactSelectionTokens(): array
|
||||
{
|
||||
@@ -316,6 +321,7 @@ final class NdjsonHybridRetrieverConfig
|
||||
'exact_selection_indicator_table_required_primary_terms' => $this->exactSelectionIndicatorTableRequiredPrimaryTerms(),
|
||||
'exact_selection_indicator_table_required_context_terms' => $this->exactSelectionIndicatorTableRequiredContextTerms(),
|
||||
'exact_detail_tokens' => $this->exactDetailTokens(),
|
||||
'generic_exact_selection_cleanup_profile' => $this->genericExactSelectionCleanupProfile(),
|
||||
'generic_exact_selection_tokens' => $this->genericExactSelectionTokens(),
|
||||
'generic_product_tokens' => $this->genericProductTokens(),
|
||||
'important_short_model_tokens' => $this->importantShortModelTokens(),
|
||||
@@ -369,6 +375,22 @@ final class NdjsonHybridRetrieverConfig
|
||||
return $value;
|
||||
}
|
||||
|
||||
private function requiredString(string $key): string
|
||||
{
|
||||
$value = $this->requiredValue($key);
|
||||
|
||||
if (!is_scalar($value)) {
|
||||
throw $this->invalid($key, 'must be a non-empty string');
|
||||
}
|
||||
|
||||
$value = trim((string) $value);
|
||||
if ($value === '') {
|
||||
throw $this->invalid($key, 'must be a non-empty string');
|
||||
}
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
private function requiredStringList(string $key): array
|
||||
{
|
||||
|
||||
@@ -1059,6 +1059,13 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
$errors[] = 'retrieval.hard_max_chunks must be greater than 0.';
|
||||
}
|
||||
|
||||
$cleanupProfile = $retrieval['generic_exact_selection_cleanup_profile'] ?? null;
|
||||
if (!is_string($cleanupProfile) || trim($cleanupProfile) === '') {
|
||||
$errors[] = 'retrieval.generic_exact_selection_cleanup_profile must be a non-empty string.';
|
||||
} elseif (!in_array(trim($cleanupProfile), $this->languageCleanupConfig->getCleanupProfileNames(), true)) {
|
||||
$errors[] = 'retrieval.generic_exact_selection_cleanup_profile references unknown language cleanup profile: ' . trim($cleanupProfile) . '.';
|
||||
}
|
||||
|
||||
$this->validateStringListMap($retrieval['vocabulary'] ?? [], 'retrieval.vocabulary', $errors, $warnings);
|
||||
|
||||
$inventory = $retrieval['inventory_parameter'] ?? [];
|
||||
|
||||
@@ -5,6 +5,7 @@ declare(strict_types=1);
|
||||
namespace App\Knowledge\Retrieval;
|
||||
|
||||
use App\Catalog\EntityCatalogService;
|
||||
use App\Config\LanguageCleanupConfig;
|
||||
use App\Config\NdjsonHybridRetrieverConfig;
|
||||
use App\Entity\ModelGenerationConfig;
|
||||
use App\Intent\CatalogIntentLite;
|
||||
@@ -46,6 +47,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
private EntityCatalogService $entityCatalogService,
|
||||
private QueryEnricher $queryEnricher,
|
||||
private NdjsonHybridRetrieverConfig $retrieverConfig,
|
||||
private LanguageCleanupConfig $languageCleanupConfig,
|
||||
)
|
||||
{
|
||||
}
|
||||
@@ -942,7 +944,19 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
|
||||
private function isGenericExactSelectionToken(string $token): bool
|
||||
{
|
||||
return in_array($token, $this->retrieverConfig->genericExactSelectionTokens(), true);
|
||||
return in_array($token, $this->genericExactSelectionCleanupTokens(), true);
|
||||
}
|
||||
|
||||
/** @return string[] */
|
||||
private function genericExactSelectionCleanupTokens(): array
|
||||
{
|
||||
$profileName = $this->retrieverConfig->genericExactSelectionCleanupProfile();
|
||||
|
||||
return array_values(array_unique(array_merge(
|
||||
$this->languageCleanupConfig->getStopWordsForProfile($profileName),
|
||||
$this->languageCleanupConfig->getMetaTermsForProfile($profileName),
|
||||
$this->retrieverConfig->genericExactSelectionTokens()
|
||||
)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user