central config part 1
This commit is contained in:
@@ -1125,7 +1125,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$candidates = [];
|
||||
$seenDocs = [];
|
||||
|
||||
foreach (array_slice($chunkIds, 0, NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_WINDOW) as $rank => $chunkId) {
|
||||
foreach (array_slice($chunkIds, 0, $this->retrieverConfig->focusedProductWindow()) as $rank => $chunkId) {
|
||||
$row = $rows[$chunkId] ?? null;
|
||||
if (!is_array($row)) {
|
||||
continue;
|
||||
@@ -1171,7 +1171,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$bestScore = (float)$best['score'];
|
||||
$gap = $bestScore - $runnerUpScore;
|
||||
|
||||
if ($bestScore < NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MIN_SCORE || $gap < NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MIN_GAP) {
|
||||
if ($bestScore < $this->retrieverConfig->focusedProductMinScore() || $gap < $this->retrieverConfig->focusedProductMinGap()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -1199,10 +1199,10 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$normalized = $this->normalizeText($prompt);
|
||||
$tokens = $this->tokenizeText($normalized);
|
||||
|
||||
$reagentWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_REAGENT_WORDS;
|
||||
$documentWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_DOCUMENT_WORDS;
|
||||
$safetyWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_SAFETY_WORDS;
|
||||
$deviceWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_DEVICE_WORDS;
|
||||
$reagentWords = $this->retrieverConfig->looksLikeReagentWords();
|
||||
$documentWords = $this->retrieverConfig->looksLikeDocumentWords();
|
||||
$safetyWords = $this->retrieverConfig->looksLikeSafetyWords();
|
||||
$deviceWords = $this->retrieverConfig->looksLikeDeviceWords();
|
||||
|
||||
$asksReagent = $this->containsAnyToken($tokens, $reagentWords);
|
||||
$asksDocument = $this->containsAnyToken($tokens, $documentWords);
|
||||
@@ -1343,7 +1343,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
$documentId,
|
||||
$chunkIds,
|
||||
$rows,
|
||||
min($limit, NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MAX_CHUNKS)
|
||||
min($limit, $this->retrieverConfig->focusedProductMaxChunks())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1358,7 +1358,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
{
|
||||
$docWindow = [];
|
||||
|
||||
foreach (array_slice($chunkIds, 0, NdjsonHybridRetrieverConfig::DOMINANT_DOC_WINDOW) as $chunkId) {
|
||||
foreach (array_slice($chunkIds, 0, $this->retrieverConfig->dominantDocWindow()) as $chunkId) {
|
||||
if (!isset($rows[$chunkId]['text'])) {
|
||||
continue;
|
||||
}
|
||||
@@ -1388,7 +1388,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
|
||||
$dominantCount = (int)($counts[$dominantDocId] ?? 0);
|
||||
|
||||
if ($dominantCount >= NdjsonHybridRetrieverConfig::DOMINANT_DOC_MIN_HITS) {
|
||||
if ($dominantCount >= $this->retrieverConfig->dominantDocMinHits()) {
|
||||
return $dominantDocId;
|
||||
}
|
||||
|
||||
@@ -1450,7 +1450,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return [];
|
||||
}
|
||||
|
||||
$maxFromDoc = min($limit, NdjsonHybridRetrieverConfig::DOMINANT_DOC_MAX_CHUNKS);
|
||||
$maxFromDoc = min($limit, $this->retrieverConfig->dominantDocMaxChunks());
|
||||
|
||||
if ($anchorChunkIndex !== null) {
|
||||
usort($docHits, static function (array $a, array $b) use ($anchorChunkIndex): int {
|
||||
@@ -1550,13 +1550,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
continue;
|
||||
}
|
||||
|
||||
if (($docCounter[$docId] ?? 0) >= NdjsonHybridRetrieverConfig::MAX_CHUNKS_PER_DOC) {
|
||||
if (($docCounter[$docId] ?? 0) >= $this->retrieverConfig->maxChunksPerDoc()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_int($chunkIndex)) {
|
||||
foreach ($docChunkPositions[$docId] ?? [] as $prevIdx) {
|
||||
if (abs($prevIdx - $chunkIndex) < NdjsonHybridRetrieverConfig::MIN_CHUNK_DISTANCE) {
|
||||
if (abs($prevIdx - $chunkIndex) < $this->retrieverConfig->minChunkDistance()) {
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
@@ -1609,13 +1609,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
continue;
|
||||
}
|
||||
|
||||
if (($docCounter[$docId] ?? 0) >= NdjsonHybridRetrieverConfig::MAX_CHUNKS_PER_DOC) {
|
||||
if (($docCounter[$docId] ?? 0) >= $this->retrieverConfig->maxChunksPerDoc()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_int($chunkIndex)) {
|
||||
foreach ($docChunkPositions[$docId] ?? [] as $prevIdx) {
|
||||
if (abs($prevIdx - $chunkIndex) < NdjsonHybridRetrieverConfig::MIN_CHUNK_DISTANCE) {
|
||||
if (abs($prevIdx - $chunkIndex) < $this->retrieverConfig->minChunkDistance()) {
|
||||
continue 2;
|
||||
}
|
||||
}
|
||||
@@ -1715,7 +1715,8 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
*/
|
||||
private function isGenericProductToken(string $token): bool
|
||||
{
|
||||
static $generic = NdjsonHybridRetrieverConfig::GENERIC_PRODUCT_TOKEN;
|
||||
$generic = $this->retrieverConfig->genericProductTokens();
|
||||
|
||||
return isset(array_fill_keys($generic, true)[$token]);
|
||||
}
|
||||
|
||||
@@ -1724,7 +1725,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
*/
|
||||
private function isImportantShortModelToken(string $token): bool
|
||||
{
|
||||
static $allowed = NdjsonHybridRetrieverConfig::IMPORTANT_SHORT_MODEL_TOKEN;
|
||||
$allowed = $this->retrieverConfig->importantShortModelTokens();
|
||||
|
||||
return in_array($token, $allowed, true);
|
||||
}
|
||||
@@ -1734,7 +1735,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
*/
|
||||
private function isFamilyDescriptorToken(string $token): bool
|
||||
{
|
||||
static $familyDescriptors = NdjsonHybridRetrieverConfig::FAMILY_DESCRIPTOR_TOKEN;
|
||||
$familyDescriptors = $this->retrieverConfig->familyDescriptorTokens();
|
||||
|
||||
return in_array($token, $familyDescriptors, true)
|
||||
|| $this->isImportantShortModelToken($token)
|
||||
@@ -1752,7 +1753,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return false;
|
||||
}
|
||||
|
||||
$needles = NdjsonHybridRetrieverConfig::LOOKS_LIKE_REAGENT_TOKENS;
|
||||
$needles = $this->retrieverConfig->looksLikeReagentTokens();
|
||||
|
||||
foreach ($needles as $needle) {
|
||||
if (str_contains($haystack, $needle)) {
|
||||
@@ -1774,7 +1775,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
|
||||
return false;
|
||||
}
|
||||
|
||||
$needles = NdjsonHybridRetrieverConfig::LOOKS_LIKE_SAFETY_DOCS;
|
||||
$needles = $this->retrieverConfig->looksLikeSafetyDocs();
|
||||
|
||||
foreach ($needles as $needle) {
|
||||
if (str_contains($haystack, $needle)) {
|
||||
|
||||
Reference in New Issue
Block a user