central config part 1

This commit is contained in:
team2
2026-04-25 23:39:41 +02:00
parent 2797834a5f
commit f42022e5f7
11 changed files with 1197 additions and 476 deletions

View File

@@ -1125,7 +1125,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$candidates = [];
$seenDocs = [];
foreach (array_slice($chunkIds, 0, NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_WINDOW) as $rank => $chunkId) {
foreach (array_slice($chunkIds, 0, $this->retrieverConfig->focusedProductWindow()) as $rank => $chunkId) {
$row = $rows[$chunkId] ?? null;
if (!is_array($row)) {
continue;
@@ -1171,7 +1171,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$bestScore = (float)$best['score'];
$gap = $bestScore - $runnerUpScore;
if ($bestScore < NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MIN_SCORE || $gap < NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MIN_GAP) {
if ($bestScore < $this->retrieverConfig->focusedProductMinScore() || $gap < $this->retrieverConfig->focusedProductMinGap()) {
return null;
}
@@ -1199,10 +1199,10 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$normalized = $this->normalizeText($prompt);
$tokens = $this->tokenizeText($normalized);
$reagentWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_REAGENT_WORDS;
$documentWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_DOCUMENT_WORDS;
$safetyWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_SAFETY_WORDS;
$deviceWords = NdjsonHybridRetrieverConfig::LOOKS_LIKE_DEVICE_WORDS;
$reagentWords = $this->retrieverConfig->looksLikeReagentWords();
$documentWords = $this->retrieverConfig->looksLikeDocumentWords();
$safetyWords = $this->retrieverConfig->looksLikeSafetyWords();
$deviceWords = $this->retrieverConfig->looksLikeDeviceWords();
$asksReagent = $this->containsAnyToken($tokens, $reagentWords);
$asksDocument = $this->containsAnyToken($tokens, $documentWords);
@@ -1343,7 +1343,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$documentId,
$chunkIds,
$rows,
min($limit, NdjsonHybridRetrieverConfig::FOCUSED_PRODUCT_MAX_CHUNKS)
min($limit, $this->retrieverConfig->focusedProductMaxChunks())
);
}
@@ -1358,7 +1358,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
{
$docWindow = [];
foreach (array_slice($chunkIds, 0, NdjsonHybridRetrieverConfig::DOMINANT_DOC_WINDOW) as $chunkId) {
foreach (array_slice($chunkIds, 0, $this->retrieverConfig->dominantDocWindow()) as $chunkId) {
if (!isset($rows[$chunkId]['text'])) {
continue;
}
@@ -1388,7 +1388,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
$dominantCount = (int)($counts[$dominantDocId] ?? 0);
if ($dominantCount >= NdjsonHybridRetrieverConfig::DOMINANT_DOC_MIN_HITS) {
if ($dominantCount >= $this->retrieverConfig->dominantDocMinHits()) {
return $dominantDocId;
}
@@ -1450,7 +1450,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
return [];
}
$maxFromDoc = min($limit, NdjsonHybridRetrieverConfig::DOMINANT_DOC_MAX_CHUNKS);
$maxFromDoc = min($limit, $this->retrieverConfig->dominantDocMaxChunks());
if ($anchorChunkIndex !== null) {
usort($docHits, static function (array $a, array $b) use ($anchorChunkIndex): int {
@@ -1550,13 +1550,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
continue;
}
if (($docCounter[$docId] ?? 0) >= NdjsonHybridRetrieverConfig::MAX_CHUNKS_PER_DOC) {
if (($docCounter[$docId] ?? 0) >= $this->retrieverConfig->maxChunksPerDoc()) {
continue;
}
if (is_int($chunkIndex)) {
foreach ($docChunkPositions[$docId] ?? [] as $prevIdx) {
if (abs($prevIdx - $chunkIndex) < NdjsonHybridRetrieverConfig::MIN_CHUNK_DISTANCE) {
if (abs($prevIdx - $chunkIndex) < $this->retrieverConfig->minChunkDistance()) {
continue 2;
}
}
@@ -1609,13 +1609,13 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
continue;
}
if (($docCounter[$docId] ?? 0) >= NdjsonHybridRetrieverConfig::MAX_CHUNKS_PER_DOC) {
if (($docCounter[$docId] ?? 0) >= $this->retrieverConfig->maxChunksPerDoc()) {
continue;
}
if (is_int($chunkIndex)) {
foreach ($docChunkPositions[$docId] ?? [] as $prevIdx) {
if (abs($prevIdx - $chunkIndex) < NdjsonHybridRetrieverConfig::MIN_CHUNK_DISTANCE) {
if (abs($prevIdx - $chunkIndex) < $this->retrieverConfig->minChunkDistance()) {
continue 2;
}
}
@@ -1715,7 +1715,8 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
*/
private function isGenericProductToken(string $token): bool
{
static $generic = NdjsonHybridRetrieverConfig::GENERIC_PRODUCT_TOKEN;
$generic = $this->retrieverConfig->genericProductTokens();
return isset(array_fill_keys($generic, true)[$token]);
}
@@ -1724,7 +1725,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
*/
private function isImportantShortModelToken(string $token): bool
{
static $allowed = NdjsonHybridRetrieverConfig::IMPORTANT_SHORT_MODEL_TOKEN;
$allowed = $this->retrieverConfig->importantShortModelTokens();
return in_array($token, $allowed, true);
}
@@ -1734,7 +1735,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
*/
private function isFamilyDescriptorToken(string $token): bool
{
static $familyDescriptors = NdjsonHybridRetrieverConfig::FAMILY_DESCRIPTOR_TOKEN;
$familyDescriptors = $this->retrieverConfig->familyDescriptorTokens();
return in_array($token, $familyDescriptors, true)
|| $this->isImportantShortModelToken($token)
@@ -1752,7 +1753,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
return false;
}
$needles = NdjsonHybridRetrieverConfig::LOOKS_LIKE_REAGENT_TOKENS;
$needles = $this->retrieverConfig->looksLikeReagentTokens();
foreach ($needles as $needle) {
if (str_contains($haystack, $needle)) {
@@ -1774,7 +1775,7 @@ final readonly class NdjsonHybridRetriever implements RetrieverInterface
return false;
}
$needles = NdjsonHybridRetrieverConfig::LOOKS_LIKE_SAFETY_DOCS;
$needles = $this->retrieverConfig->looksLikeSafetyDocs();
foreach ($needles as $needle) {
if (str_contains($haystack, $needle)) {