central config part 1

This commit is contained in:
team2
2026-04-25 23:39:41 +02:00
parent 2797834a5f
commit f42022e5f7
11 changed files with 1197 additions and 476 deletions

View File

@@ -6,24 +6,13 @@ namespace App\Config;
final class CommerceQueryParserConfig
{
/**
* @return string[]
*/
public function getKnownBrands(): array
{
return [
private const KNOWN_BRANDS = [
'heyl',
'horiba',
'neomeris',
];
}
/**
* @return string[]
*/
public function getPhrasesToRemove(): array
{
return [
private const PHRASES_TO_REMOVE = [
'ich suche',
'suche',
'habt ihr',
@@ -51,24 +40,8 @@ final class CommerceQueryParserConfig
'verfügbarkeit',
'verfuegbarkeit',
];
}
public function getHistoryContextPattern(): string
{
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
}
public function getHistoryContextValuePattern(): string
{
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
}
/**
* @return string[]
*/
public function getFilterSearchTokens(): array
{
return [
private const FILTER_SEARCH_TOKENS = [
'auch',
'noch',
'nochmal',
@@ -142,14 +115,8 @@ final class CommerceQueryParserConfig
'passen',
'passend',
];
}
/**
* @return array<string, string>
*/
public function getSearchTokenCorrections(): array
{
return [
private const SEARCH_TOKEN_CORRECTIONS = [
'siene' => 'seine',
'sienen' => 'seinen',
'siener' => 'seiner',
@@ -157,14 +124,8 @@ final class CommerceQueryParserConfig
'sienes' => 'seines',
'indicatoren' => 'indikatoren',
];
}
/**
* @return array<string, string>
*/
public function getSearchTokenCanonicalMap(): array
{
return [
private const SEARCH_TOKEN_CANONICAL_MAP = [
'indikatoren' => 'indikator',
'indicators' => 'indikator',
'indicator' => 'indikator',
@@ -173,6 +134,113 @@ final class CommerceQueryParserConfig
'reagent' => 'reagenz',
'produkte' => 'produkt',
];
private const SEMANTIC_SHOP_SEARCH_TOKENS = [
'indikator',
'indicator',
'reagenz',
'reagent',
'zubehör',
'zubehor',
'ersatzteil',
'verbrauchsmaterial',
'chemie',
'indikatorchemie',
'reagenzchemie',
'kit',
'set',
'filter',
'pumpe',
'pumpenkopf',
'motorblock',
'lösung',
'loesung',
'solution',
'teststreifen',
'gerät',
'geraet',
'messgerät',
'messgeraet',
'analysegerät',
'analysegeraet',
'analysator',
'monitor',
'controller',
'system',
];
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
/**
* @return string[]
*/
public function getKnownBrands(): array
{
return $this->stringList(
'known_brands',
$this->vocabularyView('commerce_query.known_brands', self::KNOWN_BRANDS)
);
}
/**
* @return string[]
*/
public function getPhrasesToRemove(): array
{
return $this->stringList(
'phrases_to_remove',
$this->vocabularyView('commerce_query.phrases_to_remove', self::PHRASES_TO_REMOVE)
);
}
public function getHistoryContextPattern(): string
{
return 'chat|auch|noch|nochmal|zusätzlich|dazu|davon|stattdessen|alternative|alternativen|größer|groesser|kleiner|gleich(?:e|en|er|es)?|derselbe|dieselbe|dasselbe|wie oben|wie zuvor|wie gehabt';
}
public function getHistoryContextValuePattern(): string
{
return '/\b(' . $this->getHistoryContextPattern() . ')\b/u';
}
/**
* @return string[]
*/
public function getFilterSearchTokens(): array
{
return $this->stringList(
'filter_search_tokens',
$this->vocabularyView('commerce_query.filter_search_tokens', self::FILTER_SEARCH_TOKENS)
);
}
/**
* @return array<string, string>
*/
public function getSearchTokenCorrections(): array
{
return $this->stringMap(
'search_token_corrections',
$this->vocabularyStringMap('commerce_query.search_token_corrections', self::SEARCH_TOKEN_CORRECTIONS)
);
}
/**
* @return array<string, string>
*/
public function getSearchTokenCanonicalMap(): array
{
return $this->stringMap(
'search_token_canonical_map',
$this->vocabularyStringMap('commerce_query.search_token_canonical', self::SEARCH_TOKEN_CANONICAL_MAP)
);
}
/**
@@ -335,39 +403,86 @@ final class CommerceQueryParserConfig
*/
public function getSemanticShopSearchTokens(): array
{
return [
'indikator',
'indicator',
'reagenz',
'reagent',
'zubehör',
'zubehor',
'ersatzteil',
'verbrauchsmaterial',
'chemie',
'indikatorchemie',
'reagenzchemie',
'kit',
'set',
'filter',
'pumpe',
'pumpenkopf',
'motorblock',
'lösung',
'loesung',
'solution',
'teststreifen',
'gerät',
'geraet',
'messgerät',
'messgeraet',
'analysegerät',
'analysegeraet',
'analysator',
'monitor',
'controller',
'system',
];
return $this->stringList(
'semantic_shop_search_tokens',
$this->vocabularyView('commerce_query.semantic_shop_search_tokens', self::SEMANTIC_SHOP_SEARCH_TOKENS)
);
}
/** @return string[] */
private function vocabularyView(string $path, array $fallback): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
/** @return array<string, string> */
private function vocabularyStringMap(string $path, array $fallback): array
{
return $this->vocabulary?->stringMap($path, $fallback) ?? $fallback;
}
/** @return string[] */
private function stringList(string $path, array $default): array
{
$value = $this->value($path, $default);
if (!is_array($value)) {
return $default;
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
return $out !== [] ? $out : $default;
}
/** @return array<string, string> */
private function stringMap(string $path, array $default): array
{
$value = $this->value($path, $default);
if (!is_array($value)) {
return $default;
}
$out = [];
foreach ($value as $key => $item) {
if (!is_scalar($key) || !is_scalar($item)) {
continue;
}
$cleanKey = trim((string) $key);
$cleanValue = trim((string) $item);
if ($cleanKey !== '' && $cleanValue !== '') {
$out[$cleanKey] = $cleanValue;
}
}
return $out !== [] ? $out : $default;
}
private function value(string $path, mixed $default): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return $default;
}
$current = $current[$segment];
}
return $current;
}
public function buildExactTokenRemovalPattern(string $token): string

View File

@@ -0,0 +1,196 @@
<?php
declare(strict_types=1);
namespace App\Config;
final class DomainVocabularyConfig
{
public function __construct(private readonly array $config = [])
{
}
/** @return string[] */
public function view(string $path, array $fallback = []): array
{
$definition = $this->value('views.' . $path, null);
if (!is_array($definition)) {
return $this->uniqueStringList($fallback);
}
$terms = [];
foreach ($this->stringListFromValue($definition['include'] ?? []) as $className) {
foreach ($this->domainClass($className) as $term) {
$terms[] = $term;
}
}
foreach ($this->stringListFromValue($definition['add'] ?? []) as $term) {
$terms[] = $term;
}
$terms = $this->uniqueStringList($terms);
return $terms !== [] ? $terms : $this->uniqueStringList($fallback);
}
/** @return string[] */
public function domainClass(string $name): array
{
return $this->stringList('classes.' . $name, []);
}
/** @return array<string, string[]> */
public function map(string $path, array $fallback = []): array
{
$value = $this->value('maps.' . $path, null);
if (!is_array($value)) {
return $this->uniqueStringListMap($fallback);
}
$out = [];
foreach ($value as $key => $items) {
if (!is_scalar($key)) {
continue;
}
$cleanKey = trim((string) $key);
$cleanItems = $this->stringListFromValue($items);
if ($cleanKey !== '' && $cleanItems !== []) {
$out[$cleanKey] = $cleanItems;
}
}
return $out !== [] ? $out : $this->uniqueStringListMap($fallback);
}
/** @return array<string, string> */
public function stringMap(string $path, array $fallback = []): array
{
$value = $this->value('maps.' . $path, null);
if (!is_array($value)) {
return $this->uniqueStringMap($fallback);
}
$out = [];
foreach ($value as $key => $mappedValue) {
if (!is_scalar($key)) {
continue;
}
$cleanKey = trim((string) $key);
if ($cleanKey === '') {
continue;
}
if (is_array($mappedValue)) {
$items = $this->stringListFromValue($mappedValue);
$mappedValue = $items[0] ?? '';
}
if (!is_scalar($mappedValue)) {
continue;
}
$cleanValue = trim((string) $mappedValue);
if ($cleanValue !== '') {
$out[$cleanKey] = $cleanValue;
}
}
return $out !== [] ? $out : $this->uniqueStringMap($fallback);
}
/** @return array<string, mixed> */
public function toArray(): array
{
return $this->config;
}
/** @return string[] */
private function stringList(string $path, array $fallback): array
{
$value = $this->value($path, null);
$items = $this->stringListFromValue($value);
return $items !== [] ? $items : $this->uniqueStringList($fallback);
}
/** @return string[] */
private function stringListFromValue(mixed $value): array
{
if (!is_array($value)) {
return [];
}
return $this->uniqueStringList($value);
}
/** @return string[] */
private function uniqueStringList(array $items): array
{
$out = [];
foreach ($items as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
return $out;
}
/** @return array<string, string[]> */
private function uniqueStringListMap(array $map): array
{
$out = [];
foreach ($map as $key => $items) {
if (!is_scalar($key)) {
continue;
}
$cleanKey = trim((string) $key);
$cleanItems = $this->uniqueStringList(is_array($items) ? $items : []);
if ($cleanKey !== '' && $cleanItems !== []) {
$out[$cleanKey] = $cleanItems;
}
}
return $out;
}
/** @return array<string, string> */
private function uniqueStringMap(array $map): array
{
$out = [];
foreach ($map as $key => $value) {
if (!is_scalar($key) || !is_scalar($value)) {
continue;
}
$cleanKey = trim((string) $key);
$cleanValue = trim((string) $value);
if ($cleanKey !== '' && $cleanValue !== '') {
$out[$cleanKey] = $cleanValue;
}
}
return $out;
}
private function value(string $path, mixed $fallback): mixed
{
$current = $this->config;
foreach (explode('.', $path) as $segment) {
if (!is_array($current) || !array_key_exists($segment, $current)) {
return $fallback;
}
$current = $current[$segment];
}
return $current;
}
}

View File

@@ -176,11 +176,10 @@ final class NdjsonHybridRetrieverConfig
/**
* @param array<string, mixed> $config
* @param array<string, mixed> $vocabulary Kept for backwards-compatible service wiring.
*/
public function __construct(
private array $config = [],
private array $vocabulary = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
@@ -307,55 +306,55 @@ final class NdjsonHybridRetrieverConfig
/** @return string[] */
public function genericProductTokens(): array
{
return $this->stringList('generic_product_tokens', self::GENERIC_PRODUCT_TOKEN);
return $this->stringList('generic_product_tokens', $this->vocabularyView('retrieval.generic_product_tokens', self::GENERIC_PRODUCT_TOKEN));
}
/** @return string[] */
public function importantShortModelTokens(): array
{
return $this->stringList('important_short_model_tokens', self::IMPORTANT_SHORT_MODEL_TOKEN);
return $this->stringList('important_short_model_tokens', $this->vocabularyView('retrieval.important_short_model_tokens', self::IMPORTANT_SHORT_MODEL_TOKEN));
}
/** @return string[] */
public function familyDescriptorTokens(): array
{
return $this->stringList('family_descriptor_tokens', self::FAMILY_DESCRIPTOR_TOKEN);
return $this->stringList('family_descriptor_tokens', $this->vocabularyView('retrieval.family_descriptor_tokens', self::FAMILY_DESCRIPTOR_TOKEN));
}
/** @return string[] */
public function looksLikeReagentTokens(): array
{
return $this->stringList('looks_like_reagent_tokens', self::LOOKS_LIKE_REAGENT_TOKENS);
return $this->stringList('looks_like_reagent_tokens', $this->vocabularyView('retrieval.looks_like_reagent_tokens', self::LOOKS_LIKE_REAGENT_TOKENS));
}
/** @return string[] */
public function looksLikeSafetyDocs(): array
{
return $this->stringList('looks_like_safety_docs', self::LOOKS_LIKE_SAFETY_DOCS);
return $this->stringList('looks_like_safety_docs', $this->vocabularyView('retrieval.looks_like_safety_docs', self::LOOKS_LIKE_SAFETY_DOCS));
}
/** @return string[] */
public function looksLikeReagentWords(): array
{
return $this->stringList('looks_like_reagent_words', self::LOOKS_LIKE_REAGENT_WORDS);
return $this->stringList('looks_like_reagent_words', $this->vocabularyView('retrieval.looks_like_reagent_words', self::LOOKS_LIKE_REAGENT_WORDS));
}
/** @return string[] */
public function looksLikeDocumentWords(): array
{
return $this->stringList('looks_like_document_words', self::LOOKS_LIKE_DOCUMENT_WORDS);
return $this->stringList('looks_like_document_words', $this->vocabularyView('retrieval.looks_like_document_words', self::LOOKS_LIKE_DOCUMENT_WORDS));
}
/** @return string[] */
public function looksLikeSafetyWords(): array
{
return $this->stringList('looks_like_safety_words', self::LOOKS_LIKE_SAFETY_WORDS);
return $this->stringList('looks_like_safety_words', $this->vocabularyView('retrieval.looks_like_safety_words', self::LOOKS_LIKE_SAFETY_WORDS));
}
/** @return string[] */
public function looksLikeDeviceWords(): array
{
return $this->stringList('looks_like_device_words', self::LOOKS_LIKE_DEVICE_WORDS);
return $this->stringList('looks_like_device_words', $this->vocabularyView('retrieval.looks_like_device_words', self::LOOKS_LIKE_DEVICE_WORDS));
}
/**
* Effective retrieval vocabulary as a dedicated structure for diagnostics and config dumps.
@@ -459,6 +458,12 @@ final class NdjsonHybridRetrieverConfig
* @param string[] $default
* @return string[]
*/
/** @return string[] */
private function vocabularyView(string $path, array $fallback): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
private function stringList(string $key, array $default): array
{
$value = $this->raw($key, $default);
@@ -492,10 +497,6 @@ final class NdjsonHybridRetrieverConfig
return $this->config[$key];
}
if (array_key_exists($key, $this->vocabulary)) {
return $this->vocabulary[$key];
}
return $default;
}
}

View File

@@ -6,11 +6,74 @@ namespace App\Config;
final class PromptBuilderConfig
{
private const TECHNICAL_PRODUCT_KEYWORDS = [
'technisch',
'technical',
'produkt',
'product',
'gerät',
'device',
'modell',
'model',
'messprinzip',
'measurement principle',
'schnittstelle',
'interface',
'relais',
'relay',
'indikator',
'indicator',
'grenzwert',
'threshold',
'messbereich',
'measurement range',
'minimaler',
'minimum',
'resthärte',
'resthaerte',
'°dh',
'dh',
'spannung',
'voltage',
'strom',
'current',
'druck',
'pressure',
'temperatur',
'temperature',
'schutzart',
'ip',
'fehlercode',
'error code',
'wasserhärte',
'hardness',
'testomat',
'chlor',
'chlormessung',
];
private const ACCESSORY_REQUEST_KEYWORDS = [
'passend',
'passende',
'passendes',
'zubehör',
'zubehor',
'dazu',
'indikator',
'reagenz',
'kit',
'set',
'zusatz',
'ergänzung',
'ergaenzung',
];
/**
* @param array<string, mixed> $config
*/
public function __construct(
private readonly array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
@@ -88,6 +151,42 @@ final class PromptBuilderConfig
return is_numeric($value) ? (float) $value : $default;
}
/**
* @return string[]
*/
private function getStringList(string $path, array $default): array
{
$value = $this->getValue($path, $default);
if (!is_array($value)) {
return $default;
}
$out = [];
foreach ($value as $item) {
if (!is_scalar($item)) {
continue;
}
$item = trim((string) $item);
if ($item === '' || in_array($item, $out, true)) {
continue;
}
$out[] = $item;
}
return $out !== [] ? $out : $default;
}
/**
* @return string[]
*/
private function vocabularyView(string $path, array $fallback): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
private function getValue(string $path, mixed $default): mixed
{
$current = $this->config;
@@ -445,51 +544,10 @@ final class PromptBuilderConfig
*/
public function getTechnicalProductKeywords(): array
{
return [
'technisch',
'technical',
'produkt',
'product',
'gerät',
'device',
'modell',
'model',
'messprinzip',
'measurement principle',
'schnittstelle',
'interface',
'relais',
'relay',
'indikator',
'indicator',
'grenzwert',
'threshold',
'messbereich',
'measurement range',
'minimaler',
'minimum',
'resthärte',
'resthaerte',
'°dh',
'dh',
'spannung',
'voltage',
'strom',
'current',
'druck',
'pressure',
'temperatur',
'temperature',
'schutzart',
'ip',
'fehlercode',
'error code',
'wasserhärte',
'hardness',
'testomat',
'chlor',
'chlormessung',
];
return $this->getStringList(
'technical_product_keywords',
$this->vocabularyView('prompt.technical_product_keywords', self::TECHNICAL_PRODUCT_KEYWORDS)
);
}
/**
@@ -497,21 +555,10 @@ final class PromptBuilderConfig
*/
public function getAccessoryRequestKeywords(): array
{
return [
'passend',
'passende',
'passendes',
'zubehör',
'zubehor',
'dazu',
'indikator',
'reagenz',
'kit',
'set',
'zusatz',
'ergänzung',
'ergaenzung',
];
return $this->getStringList(
'accessory_request_keywords',
$this->vocabularyView('prompt.accessory_request_keywords', self::ACCESSORY_REQUEST_KEYWORDS)
);
}
public function getTechnicalProductModelPattern(): string

View File

@@ -68,8 +68,10 @@ final class ShopServiceConfig
/**
* @param array<string, mixed> $config
*/
public function __construct(private array $config = [])
{
public function __construct(
private array $config = [],
private readonly ?DomainVocabularyConfig $vocabulary = null,
) {
}
public function getTopProductLogLimit(): int
@@ -80,43 +82,43 @@ final class ShopServiceConfig
/** @return string[] */
public function getDeviceFocusKeywords(): array
{
return $this->stringList('device_focus_keywords', self::DEVICE_FOCUS_KEYWORDS);
return $this->stringList('device_focus_keywords', $this->vocabularyView('shop.device_focus', self::DEVICE_FOCUS_KEYWORDS));
}
/** @return string[] */
public function getAccessoryFocusKeywords(): array
{
return $this->stringList('accessory_focus_keywords', self::ACCESSORY_FOCUS_KEYWORDS);
return $this->stringList('accessory_focus_keywords', $this->vocabularyView('shop.accessory_focus', self::ACCESSORY_FOCUS_KEYWORDS));
}
/** @return array<string, string[]> */
public function getAccessoryFocusVariantMap(): array
{
return $this->stringListMap('accessory_focus_variant_map', self::ACCESSORY_FOCUS_VARIANT_MAP);
return $this->stringListMap('accessory_focus_variant_map', $this->vocabularyMap('shop.accessory_focus_variants', self::ACCESSORY_FOCUS_VARIANT_MAP));
}
/** @return string[] */
public function getDeviceQueryKeywords(): array
{
return $this->stringList('device_query_keywords', self::DEVICE_QUERY_KEYWORDS);
return $this->stringList('device_query_keywords', $this->vocabularyView('shop.device_query', self::DEVICE_QUERY_KEYWORDS));
}
/** @return string[] */
public function getAccessoryQueryKeywords(): array
{
return $this->stringList('accessory_query_keywords', self::ACCESSORY_QUERY_KEYWORDS);
return $this->stringList('accessory_query_keywords', $this->vocabularyView('shop.accessory_query', self::ACCESSORY_QUERY_KEYWORDS));
}
/** @return string[] */
public function getAccessoryProductKeywords(): array
{
return $this->stringList('accessory_product_keywords', self::ACCESSORY_PRODUCT_KEYWORDS);
return $this->stringList('accessory_product_keywords', $this->vocabularyView('shop.accessory_product', self::ACCESSORY_PRODUCT_KEYWORDS));
}
/** @return string[] */
public function getDeviceProductKeywords(): array
{
return $this->stringList('device_product_keywords', self::DEVICE_PRODUCT_KEYWORDS);
return $this->stringList('device_product_keywords', $this->vocabularyView('shop.device_product', self::DEVICE_PRODUCT_KEYWORDS));
}
public function getExactProductNumberPhraseScore(): int
@@ -368,6 +370,18 @@ final class ShopServiceConfig
* @param string[]|null $emptySafeDefault
* @return string[]
*/
/** @return string[] */
private function vocabularyView(string $path, array $fallback): array
{
return $this->vocabulary?->view($path, $fallback) ?? $fallback;
}
/** @return array<string, string[]> */
private function vocabularyMap(string $path, array $fallback): array
{
return $this->vocabulary?->map($path, $fallback) ?? $fallback;
}
private function stringList(string $path, array $default, bool $allowEmptyStrings = false, ?array $emptySafeDefault = null): array
{
$value = $this->value($path, $default);

View File

@@ -1125,7 +1125,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$candidates = [];
$seenDocs = [];
foreach (array_slice($chunkIds, 0, NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_WINDOW) as $rank => $chunkId) {
foreach (array_slice($chunkIds, 0, $this->retrieverConfig->focusedProductWindow()) as $rank => $chunkId) {
$row = $rows[$chunkId] ?? null;
if (!is_array($row)) {
continue;
@@ -1171,7 +1171,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$bestScore = (float)$best['score'];
$gap = $bestScore - $runnerUpScore;
if ($bestScore < NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MIN_SCORE || $gap < NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MIN_GAP) {
if ($bestScore < $this->retrieverConfig->focusedProductMinScore() || $gap < $this->retrieverConfig->focusedProductMinGap()) {
return null;
}
@@ -1199,10 +1199,10 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$normalized = $this->normalizeText($prompt);
$tokens = $this->tokenizeText($normalized);
$reagentWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_REAGENT_WORDS;
$documentWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_DOCUMENT_WORDS;
$safetyWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_SAFETY_WORDS;
$deviceWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_DEVICE_WORDS;
$reagentWords = $this->retrieverConfig->looksLikeReagentWords();
$documentWords = $this->retrieverConfig->looksLikeDocumentWords();
$safetyWords = $this->retrieverConfig->looksLikeSafetyWords();
$deviceWords = $this->retrieverConfig->looksLikeDeviceWords();
$asksReagent = $this->containsAnyToken($tokens, $reagentWords);
$asksDocument = $this->containsAnyToken($tokens, $documentWords);
@@ -1343,7 +1343,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$documentId,
$chunkIds,
$rows,
min($limit, NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MAX_CHUNKS)
min($limit, $this->retrieverConfig->focusedProductMaxChunks())
);
}
@@ -1358,7 +1358,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
{
$docWindow = [];
foreach (array_slice($chunkIds, 0, NdjsonHybridRetrieverConfig::DOMINANT_DOC_WINDOW) as $chunkId) {
foreach (array_slice($chunkIds, 0, $this->retrieverConfig->dominantDocWindow()) as $chunkId) {
if (!isset($rows[$chunkId]['text'])) {
continue;
}
@@ -1388,7 +1388,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$dominantCount = (int)($counts[$dominantDocId] ?? 0);
if ($dominantCount >= NdjsonHybridRetrieverConfig::DOMINANT_DOC_MIN_HITS) {
if ($dominantCount >= $this->retrieverConfig->dominantDocMinHits()) {
return $dominantDocId;
}
@@ -1450,7 +1450,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
return [];
}
$maxFromDoc = min($limit, NdjsonHybridRetrieverConfig::DOMINANT_DOC_MAX_CHUNKS);
$maxFromDoc = min($limit, $this->retrieverConfig->dominantDocMaxChunks());
if ($anchorChunkIndex !== null) {
usort($docHits, static function (array $a, array $b) use ($anchorChunkIndex): int {
@@ -1550,13 +1550,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
continue;
}
if (($docCounter[$docId] ?? 0) >= NdjsonHybridRetrieverConfig::MAX_CHUNKS_PER_DOC) {
if (($docCounter[$docId] ?? 0) >= $this->retrieverConfig->maxChunksPerDoc()) {
continue;
}
if (is_int($chunkIndex)) {
foreach ($docChunkPositions[$docId] ?? [] as $prevIdx) {
if (abs($prevIdx - $chunkIndex) < NdjsonHybridRetrieverConfig::MIN_CHUNK_DISTANCE) {
if (abs($prevIdx - $chunkIndex) < $this->retrieverConfig->minChunkDistance()) {
continue 2;
}
}
@@ -1609,13 +1609,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
continue;
}
if (($docCounter[$docId] ?? 0) >= NdjsonHybridRetrieverConfig::MAX_CHUNKS_PER_DOC) {
if (($docCounter[$docId] ?? 0) >= $this->retrieverConfig->maxChunksPerDoc()) {
continue;
}
if (is_int($chunkIndex)) {
foreach ($docChunkPositions[$docId] ?? [] as $prevIdx) {
if (abs($prevIdx - $chunkIndex) < NdjsonHybridRetrieverConfig::MIN_CHUNK_DISTANCE) {
if (abs($prevIdx - $chunkIndex) < $this->retrieverConfig->minChunkDistance()) {
continue 2;
}
}
@@ -1715,7 +1715,8 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
*/
private function isGenericProductToken(string $token): bool
{
static $generic = NdjsonHybridRetrieverConfig::GENERIC_PRODUCT_TOKEN;
$generic = $this->retrieverConfig->genericProductTokens();
return isset(array_fill_keys($generic, true)[$token]);
}
@@ -1724,7 +1725,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
*/
private function isImportantShortModelToken(string $token): bool
{
static $allowed = NdjsonHybridRetrieverConfig::IMPORTANT_SHORT_MODEL_TOKEN;
$allowed = $this->retrieverConfig->importantShortModelTokens();
return in_array($token, $allowed, true);
}
@@ -1734,7 +1735,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
*/
private function isFamilyDescriptorToken(string $token): bool
{
static $familyDescriptors = NdjsonHybridRetrieverConfig::FAMILY_DESCRIPTOR_TOKEN;
$familyDescriptors = $this->retrieverConfig->familyDescriptorTokens();
return in_array($token, $familyDescriptors, true)
|| $this->isImportantShortModelToken($token)
@@ -1752,7 +1753,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
return false;
}
$needles = NdjsonHybridRetrieverConfig::LOOKS_LIKE_REAGENT_TOKENS;
$needles = $this->retrieverConfig->looksLikeReagentTokens();
foreach ($needles as $needle) {
if (str_contains($haystack, $needle)) {
@@ -1774,7 +1775,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
return false;
}
$needles = NdjsonHybridRetrieverConfig::LOOKS_LIKE_SAFETY_DOCS;
$needles = $this->retrieverConfig->looksLikeSafetyDocs();
foreach ($needles as $needle) {
if (str_contains($haystack, $needle)) {