fix vector python embedding
This commit is contained in:
@@ -6,11 +6,12 @@ namespace App\Tag;
|
||||
|
||||
use App\Index\IndexMetaManager;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Symfony\Component\Process\Exception\ProcessFailedException;
|
||||
use Symfony\Component\Process\Exception\ProcessTimedOutException;
|
||||
use Symfony\Component\Process\Process;
|
||||
|
||||
final readonly class TagVectorIndexBuilder
|
||||
{
|
||||
private const GRACEFUL_TERMINATION_SECONDS = 2;
|
||||
|
||||
public function __construct(
|
||||
private string $pythonBin,
|
||||
private string $scriptPath,
|
||||
@@ -44,25 +45,46 @@ final readonly class TagVectorIndexBuilder
|
||||
return;
|
||||
}
|
||||
|
||||
$cmd = $this->buildCommand($tmpIndex);
|
||||
$cmd = [
|
||||
$this->pythonBin,
|
||||
$this->scriptPath,
|
||||
$this->tagsNdjsonPath,
|
||||
$tmpIndex,
|
||||
];
|
||||
|
||||
$this->agentLogger->info('[tags] build tag vector index', [
|
||||
'cmd' => $cmd,
|
||||
'timeout' => $this->timeoutSeconds,
|
||||
'embedding_model' => $this->embeddingModel,
|
||||
'model_path_override' => getenv('RETRIEX_EMBEDDING_MODEL_PATH') ?: null,
|
||||
]);
|
||||
|
||||
try {
|
||||
$result = $this->runCommand($cmd);
|
||||
$process = new Process($cmd);
|
||||
$process->setTimeout($this->timeoutSeconds);
|
||||
$process->setIdleTimeout($this->timeoutSeconds);
|
||||
|
||||
if ($result['exit'] !== 0) {
|
||||
$process->run(function (string $type, string $buffer): void {
|
||||
$message = trim($buffer);
|
||||
|
||||
if ($message === '') {
|
||||
return;
|
||||
}
|
||||
|
||||
$this->agentLogger->info('[tags] vector ingest output', [
|
||||
'type' => $type,
|
||||
'output' => $message,
|
||||
]);
|
||||
});
|
||||
|
||||
if (!$process->isSuccessful()) {
|
||||
$this->agentLogger->error('[tags] tag vector ingest failed', [
|
||||
'exit' => $result['exit'],
|
||||
'stdout' => $result['stdout'],
|
||||
'stderr' => $result['stderr'],
|
||||
'exit' => $process->getExitCode(),
|
||||
'stdout' => trim($process->getOutput()),
|
||||
'stderr' => trim($process->getErrorOutput()),
|
||||
]);
|
||||
|
||||
throw new \RuntimeException('Tag vector ingest failed (exit=' . $result['exit'] . ')');
|
||||
throw new ProcessFailedException($process);
|
||||
}
|
||||
|
||||
if (!$this->isUsableArtifact($tmpIndex) || !$this->isUsableArtifact($tmpMeta)) {
|
||||
@@ -77,6 +99,21 @@ final readonly class TagVectorIndexBuilder
|
||||
'index' => $finalIndex,
|
||||
'meta' => $finalMeta,
|
||||
]);
|
||||
} catch (ProcessTimedOutException $e) {
|
||||
$this->cleanupTemporaryArtifacts($tmpIndex, $tmpMeta);
|
||||
|
||||
$this->agentLogger->error('[tags] tag vector ingest timed out', [
|
||||
'timeout' => $this->timeoutSeconds,
|
||||
'message' => $e->getMessage(),
|
||||
]);
|
||||
|
||||
throw new \RuntimeException(
|
||||
'Tag vector ingest timed out after ' . $this->timeoutSeconds . ' seconds. '
|
||||
. 'Most likely the embedding model cannot be loaded. '
|
||||
. 'Set RETRIEX_EMBEDDING_MODEL_PATH to a local model directory or check the HuggingFace cache.',
|
||||
0,
|
||||
$e,
|
||||
);
|
||||
} catch (\Throwable $e) {
|
||||
$this->cleanupTemporaryArtifacts($tmpIndex, $tmpMeta);
|
||||
throw $e;
|
||||
@@ -102,17 +139,6 @@ final readonly class TagVectorIndexBuilder
|
||||
}
|
||||
}
|
||||
|
||||
private function buildCommand(string $tmpIndex): string
|
||||
{
|
||||
return sprintf(
|
||||
'%s %s %s %s 2>&1',
|
||||
escapeshellarg($this->pythonBin),
|
||||
escapeshellarg($this->scriptPath),
|
||||
escapeshellarg($this->tagsNdjsonPath),
|
||||
escapeshellarg($tmpIndex),
|
||||
);
|
||||
}
|
||||
|
||||
private function ensureTargetDirectoryExists(string $finalIndexPath): void
|
||||
{
|
||||
$dir = dirname($finalIndexPath);
|
||||
@@ -162,85 +188,6 @@ final readonly class TagVectorIndexBuilder
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array{exit:int, stdout:string, stderr:string}
|
||||
*/
|
||||
private function runCommand(string $cmd): array
|
||||
{
|
||||
$descriptorSpec = [
|
||||
0 => ['pipe', 'r'],
|
||||
1 => ['pipe', 'w'],
|
||||
2 => ['pipe', 'w'],
|
||||
];
|
||||
|
||||
$process = @proc_open($cmd, $descriptorSpec, $pipes);
|
||||
|
||||
if (!is_resource($process)) {
|
||||
throw new \RuntimeException('Could not start tag vector ingest process.');
|
||||
}
|
||||
|
||||
fclose($pipes[0]);
|
||||
stream_set_blocking($pipes[1], false);
|
||||
stream_set_blocking($pipes[2], false);
|
||||
|
||||
$stdout = '';
|
||||
$stderr = '';
|
||||
$startedAt = microtime(true);
|
||||
$timedOut = false;
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
$stdout .= stream_get_contents($pipes[1]) ?: '';
|
||||
$stderr .= stream_get_contents($pipes[2]) ?: '';
|
||||
|
||||
$status = proc_get_status($process);
|
||||
|
||||
if (!is_array($status) || ($status['running'] ?? false) !== true) {
|
||||
break;
|
||||
}
|
||||
|
||||
if ((microtime(true) - $startedAt) > $this->timeoutSeconds) {
|
||||
$timedOut = true;
|
||||
proc_terminate($process);
|
||||
usleep(self::GRACEFUL_TERMINATION_SECONDS * 1000000);
|
||||
|
||||
$status = proc_get_status($process);
|
||||
if (is_array($status) && ($status['running'] ?? false) === true) {
|
||||
proc_terminate($process, 9);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
usleep(100000);
|
||||
}
|
||||
|
||||
$stdout .= stream_get_contents($pipes[1]) ?: '';
|
||||
$stderr .= stream_get_contents($pipes[2]) ?: '';
|
||||
} finally {
|
||||
fclose($pipes[1]);
|
||||
fclose($pipes[2]);
|
||||
}
|
||||
|
||||
$exitCode = proc_close($process);
|
||||
|
||||
if ($timedOut) {
|
||||
$this->agentLogger->error('[tags] tag vector ingest timed out', [
|
||||
'timeout' => $this->timeoutSeconds,
|
||||
'stdout' => $stdout,
|
||||
'stderr' => $stderr,
|
||||
]);
|
||||
|
||||
throw new \RuntimeException('Tag vector ingest timed out after ' . $this->timeoutSeconds . ' seconds.');
|
||||
}
|
||||
|
||||
return [
|
||||
'exit' => is_int($exitCode) ? $exitCode : 1,
|
||||
'stdout' => trim($stdout),
|
||||
'stderr' => trim($stderr),
|
||||
];
|
||||
}
|
||||
|
||||
private function isUsableArtifact(string $path): bool
|
||||
{
|
||||
return is_file($path) && filesize($path) > 0;
|
||||
@@ -281,4 +228,4 @@ final readonly class TagVectorIndexBuilder
|
||||
|
||||
@chmod($final, 0664);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user