harden retrieve logic
This commit is contained in:
@@ -1,21 +1,68 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
class NdjsonHybridRetrieverConfig
|
||||
final class NdjsonHybridRetrieverConfig
|
||||
{
|
||||
public const VECTOR_SCORE_THRESHOLD = 0.75;
|
||||
/**
|
||||
* Default semantic similarity threshold for vector hits.
|
||||
*
|
||||
* Chosen to stay selective enough for product-family-heavy data
|
||||
* while not cutting off too many useful fallback hits.
|
||||
*/
|
||||
public const VECTOR_SCORE_THRESHOLD = 0.80;
|
||||
|
||||
public const HARD_MAX_CHUNKS = 90;
|
||||
public const HARD_MAX_VECTORK = 250;
|
||||
/**
|
||||
* Absolute safety caps.
|
||||
*
|
||||
* These limits protect the retriever from overly large candidate sets
|
||||
* even if runtime config values are set too high.
|
||||
*/
|
||||
public const HARD_MAX_CHUNKS = 72;
|
||||
public const HARD_MAX_VECTORK = 180;
|
||||
|
||||
public const LIST_BONUS = 1.25;
|
||||
/**
|
||||
* List-style queries benefit from a slightly wider candidate pool
|
||||
* before de-duplication and final selection.
|
||||
*/
|
||||
public const LIST_BONUS = 1.40;
|
||||
|
||||
public const MAX_CHUNKS_PER_DOC = 2;
|
||||
public const MIN_CHUNK_DISTANCE = 2.5;
|
||||
public const RRF_K = 60;
|
||||
/**
|
||||
* Selection rules for cross-document semantic retrieval.
|
||||
*
|
||||
* MAX_CHUNKS_PER_DOC:
|
||||
* Keeps one document from dominating the final result in normal
|
||||
* semantic retrieval mode.
|
||||
*
|
||||
* MIN_CHUNK_DISTANCE:
|
||||
* Allows nearby chunks to be selected when they are still meaningfully
|
||||
* distinct, which is important for compact product sheets.
|
||||
*/
|
||||
public const MAX_CHUNKS_PER_DOC = 3;
|
||||
public const MIN_CHUNK_DISTANCE = 1.0;
|
||||
|
||||
public const THRESHOLD_FLOOR = 0.83;
|
||||
public const THRESHOLD_CEIL = 0.92;
|
||||
public const EMPTY_RRF_FALLBACK_TOPN = 1;
|
||||
/**
|
||||
* Reciprocal Rank Fusion constant.
|
||||
*
|
||||
* Slightly lower than classic defaults so top-ranked hits matter more.
|
||||
*/
|
||||
public const RRF_K = 50;
|
||||
|
||||
/**
|
||||
* Dynamic threshold clamp boundaries.
|
||||
*
|
||||
* The floor must stay below the default threshold, otherwise the
|
||||
* configured base threshold becomes ineffective.
|
||||
*/
|
||||
public const THRESHOLD_FLOOR = 0.78;
|
||||
public const THRESHOLD_CEIL = 0.90;
|
||||
|
||||
/**
|
||||
* Fallback breadth when strict thresholding removes all fused hits.
|
||||
*
|
||||
* More than one fallback result makes the retriever less brittle.
|
||||
*/
|
||||
public const EMPTY_RRF_FALLBACK_TOPN = 3;
|
||||
}
|
||||
Reference in New Issue
Block a user