addArgument(
'prompt',
InputArgument::REQUIRED,
'Prompt to test against the real hybrid retrieval pipeline'
)
->addOption(
'json',
null,
InputOption::VALUE_NONE,
'Return the raw retrieval debug result as JSON'
)
->addOption(
'show-text',
null,
InputOption::VALUE_NONE,
'Show full chunk text instead of a shortened preview'
);
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$prompt = trim((string) $input->getArgument('prompt'));
$asJson = (bool) $input->getOption('json');
$showText = (bool) $input->getOption('show-text');
if ($prompt === '') {
$io->error('Prompt must not be empty.');
return Command::FAILURE;
}
$start = microtime(true);
try {
$results = $this->retriever->retrieveDebug($prompt);
} catch (\Throwable $e) {
$io->error($e->getMessage());
return Command::FAILURE;
}
$durationMs = round((microtime(true) - $start) * 1000, 2);
if ($asJson) {
$payload = [
'prompt' => $prompt,
'duration_ms' => $durationMs,
'result_count' => count($results),
'results' => $results,
];
$json = json_encode(
$payload,
JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
);
if (!is_string($json)) {
$io->error('json_encode failed.');
return Command::FAILURE;
}
$output->writeln($json);
return Command::SUCCESS;
}
$io->title('Hybrid Retrieval Test');
$io->definitionList(
['prompt' => $prompt],
['duration_ms' => (string) $durationMs],
['result_count' => (string) count($results)]
);
if ($results === []) {
$io->warning('No retrieval results returned.');
return Command::SUCCESS;
}
$first = $results[0];
$io->section('Pipeline Summary');
$io->definitionList(
['scope_mode' => $this->stringValue($first, 'scope_mode')],
['selection_mode' => $this->stringValue($first, 'selection_mode')],
['intent' => $this->stringValue($first, 'intent')],
['route' => $this->stringValue($first, 'route')],
['entity_label' => $this->stringValue($first, 'entity_label')],
['is_list_query' => $this->boolishValue($first, 'is_list_query')],
['clean_query' => $this->stringValue($first, 'clean_query')],
['semantic_query' => $this->stringValue($first, 'semantic_query')],
['secondary_vector_query' => $this->stringValue($first, 'secondary_vector_query')],
['lexical_query' => $this->stringValue($first, 'lexical_query')],
['threshold' => $this->scalarValue($first, 'threshold')],
['lexical_threshold' => $this->scalarValue($first, 'lexical_threshold')]
);
$io->section('Scope Candidates');
$io->definitionList(
['tag_candidate_doc_ids' => $this->jsonValue($first, 'tag_candidate_doc_ids')],
['soft_document_candidate_doc_ids' => $this->jsonValue($first, 'soft_document_candidate_doc_ids')],
['pseudo_scope_doc_ids' => $this->jsonValue($first, 'pseudo_scope_doc_ids')],
['title_metadata_doc_boosts' => $this->jsonObjectValue($first, 'title_metadata_doc_boosts')]
);
$io->section('Hit Counts');
$io->definitionList(
['global_hit_count' => $this->scalarValue($first, 'global_hit_count')],
['scoped_hit_count' => $this->scalarValue($first, 'scoped_hit_count')],
['global_vector_hit_count' => $this->scalarValue($first, 'global_vector_hit_count')],
['global_primary_vector_hit_count' => $this->scalarValue($first, 'global_primary_vector_hit_count')],
['global_secondary_vector_hit_count' => $this->scalarValue($first, 'global_secondary_vector_hit_count')],
['global_keyword_hit_count' => $this->scalarValue($first, 'global_keyword_hit_count')],
['scoped_vector_hit_count' => $this->scalarValue($first, 'scoped_vector_hit_count')],
['scoped_primary_vector_hit_count' => $this->scalarValue($first, 'scoped_primary_vector_hit_count')],
['scoped_secondary_vector_hit_count' => $this->scalarValue($first, 'scoped_secondary_vector_hit_count')],
['scoped_keyword_hit_count' => $this->scalarValue($first, 'scoped_keyword_hit_count')]
);
$io->section('Boosts');
$io->definitionList(
['scoped_boost_factor' => $this->scalarValue($first, 'scoped_boost_factor')],
['scoped_vector_boost_factor' => $this->scalarValue($first, 'scoped_vector_boost_factor')],
['secondary_scoped_vector_boost_factor' => $this->scalarValue($first, 'secondary_scoped_vector_boost_factor')],
['scoped_keyword_boost_factor' => $this->scalarValue($first, 'scoped_keyword_boost_factor')]
);
$io->section('Selected Chunks');
foreach ($results as $row) {
$rank = $this->scalarValue($row, 'rank');
$chunkId = $this->stringValue($row, 'chunk_id');
$documentId = $this->stringValue($row, 'document_id');
$chunkIndex = $this->scalarValue($row, 'chunk_index');
$rrfScore = $this->scalarValue($row, 'rrf_score');
$rawVectorScore = $this->scalarValue($row, 'raw_vector_score');
$rawKeywordScore = $this->scalarValue($row, 'raw_keyword_score');
$titleMetadataBoost = $this->scalarValue($row, 'title_metadata_boost');
$text = (string) ($row['text'] ?? '');
if (!$showText) {
$text = $this->shortenText($text, 500);
}
$io->writeln(sprintf(
'#%s chunk=%s doc=%s idx=%s rrf=%s vector=%s keyword=%s title_meta=%s',
$rank,
$chunkId,
$documentId !== '' ? $documentId : '-',
$chunkIndex !== '' ? $chunkIndex : '-',
$rrfScore !== '' ? $rrfScore : '-',
$rawVectorScore !== '' ? $rawVectorScore : '-',
$rawKeywordScore !== '' ? $rawKeywordScore : '-',
$titleMetadataBoost !== '' ? $titleMetadataBoost : '-'
));
$io->writeln($text);
$io->writeln('');
}
return Command::SUCCESS;
}
/**
* @param array $row
*/
private function stringValue(array $row, string $key): string
{
$value = $row[$key] ?? null;
if ($value === null) {
return '';
}
return trim((string) $value);
}
/**
* @param array $row
*/
private function scalarValue(array $row, string $key): string
{
$value = $row[$key] ?? null;
if ($value === null) {
return '';
}
if (is_bool($value)) {
return $value ? 'true' : 'false';
}
if (is_scalar($value)) {
return (string) $value;
}
return '';
}
/**
* @param array $row
*/
private function boolishValue(array $row, string $key): string
{
$value = $row[$key] ?? null;
if (is_bool($value)) {
return $value ? 'true' : 'false';
}
if (is_scalar($value)) {
return (string) $value;
}
return '';
}
/**
* @param array $row
*/
private function jsonValue(array $row, string $key): string
{
$value = $row[$key] ?? null;
if ($value === null || !is_array($value)) {
return '[]';
}
$json = json_encode(
array_values($value),
JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
);
return is_string($json) ? $json : '[]';
}
/**
* @param array $row
*/
private function jsonObjectValue(array $row, string $key): string
{
$value = $row[$key] ?? null;
if ($value === null || !is_array($value)) {
return '{}';
}
$json = json_encode(
$value,
JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE
);
return is_string($json) ? $json : '{}';
}
private function shortenText(string $text, int $maxLength): string
{
$text = trim((preg_replace('/\s+/u', ' ', $text) ?? $text));
if (mb_strlen($text, 'UTF-8') <= $maxLength) {
return $text;
}
return mb_substr($text, 0, $maxLength, 'UTF-8') . ' …';
}
}