add uvicorn as py server for faster com
This commit is contained in:
79
src/Command/TestVectorCommand.php
Normal file
79
src/Command/TestVectorCommand.php
Normal file
@@ -0,0 +1,79 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use App\Vector\VectorSearchClient;
|
||||
use App\Tag\TagVectorSearchClient;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputArgument;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
|
||||
#[AsCommand(name: 'mto:agent:test-vector')]
|
||||
final class TestVectorCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly VectorSearchClient $vectorSearchClient,
|
||||
private readonly TagVectorSearchClient $tagVectorSearchClient,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function configure(): void
|
||||
{
|
||||
$this->addArgument(
|
||||
'prompt',
|
||||
InputArgument::REQUIRED,
|
||||
'User prompt (realistic retrieval test)'
|
||||
);
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$prompt = (string) $input->getArgument('prompt');
|
||||
$limit = 8;
|
||||
|
||||
$output->writeln('');
|
||||
$output->writeln('<info>Prompt:</info> ' . $prompt);
|
||||
$output->writeln('');
|
||||
|
||||
$totalStart = microtime(true);
|
||||
|
||||
// ----------------------------
|
||||
// 1️⃣ Tag Routing Phase
|
||||
// ----------------------------
|
||||
$tagStart = microtime(true);
|
||||
$tagResults = $this->tagVectorSearchClient->search($prompt, $limit);
|
||||
$tagDuration = (microtime(true) - $tagStart) * 1000;
|
||||
|
||||
// ----------------------------
|
||||
// 2️⃣ Chunk Retrieval Phase
|
||||
// ----------------------------
|
||||
$chunkStart = microtime(true);
|
||||
$chunkResults = $this->vectorSearchClient->search($prompt, $limit);
|
||||
$chunkDuration = (microtime(true) - $chunkStart) * 1000;
|
||||
|
||||
$totalDuration = (microtime(true) - $totalStart) * 1000;
|
||||
|
||||
// ----------------------------
|
||||
// Output
|
||||
// ----------------------------
|
||||
$output->writeln('<comment>Tag Routing Time:</comment> ' . round($tagDuration, 2) . ' ms');
|
||||
$output->writeln('<comment>Chunk Retrieval Time:</comment> ' . round($chunkDuration, 2) . ' ms');
|
||||
$output->writeln('<comment>Total Retrieval Time:</comment> ' . round($totalDuration, 2) . ' ms');
|
||||
$output->writeln('');
|
||||
|
||||
$output->writeln('--- Tag Results ---');
|
||||
$output->writeln(json_encode($tagResults, JSON_PRETTY_PRINT));
|
||||
$output->writeln('');
|
||||
|
||||
$output->writeln('--- Chunk Results ---');
|
||||
$output->writeln(json_encode($chunkResults, JSON_PRETTY_PRINT));
|
||||
$output->writeln('');
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
}
|
||||
@@ -5,16 +5,16 @@ declare(strict_types=1);
|
||||
namespace App\Tag;
|
||||
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
|
||||
final readonly class TagVectorSearchClient
|
||||
{
|
||||
private const MIN_SCORE = 0.4; // 🔥 Tag Confidence Gate
|
||||
|
||||
public function __construct(
|
||||
private string $pythonBin,
|
||||
private string $scriptPath,
|
||||
private string $vectorTagsIndexPath,
|
||||
private string $vectorTagsMetaPath,
|
||||
private string $embeddingModel,
|
||||
private LoggerInterface $agentLogger,
|
||||
private HttpClientInterface $http,
|
||||
private string $serviceUrl,
|
||||
private LoggerInterface $agentLogger,
|
||||
) {}
|
||||
|
||||
/**
|
||||
@@ -22,42 +22,32 @@ final readonly class TagVectorSearchClient
|
||||
*/
|
||||
public function search(string $query, int $limit = 8): array
|
||||
{
|
||||
if (!is_file($this->scriptPath)) {
|
||||
$this->agentLogger->warning('Tag vector search script missing: ' . $this->scriptPath);
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!is_file($this->vectorTagsIndexPath) || !is_file($this->vectorTagsMetaPath)) {
|
||||
// no tag index available yet => no routing
|
||||
return [];
|
||||
}
|
||||
|
||||
$limit = max(1, min($limit, 50));
|
||||
|
||||
// Positional args, aligned with existing VectorSearchClient approach:
|
||||
// python vector_search_tags.py <query> <limit> <index> <meta> <model>
|
||||
$cmd = sprintf(
|
||||
'%s %s %s %d %s %s %s 2>&1',
|
||||
escapeshellarg($this->pythonBin),
|
||||
escapeshellarg($this->scriptPath),
|
||||
escapeshellarg($query),
|
||||
$limit,
|
||||
escapeshellarg($this->vectorTagsIndexPath),
|
||||
escapeshellarg($this->vectorTagsMetaPath),
|
||||
escapeshellarg($this->embeddingModel),
|
||||
);
|
||||
|
||||
exec($cmd, $out, $exitCode);
|
||||
|
||||
if ($exitCode !== 0 || empty($out)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$json = implode("\n", $out);
|
||||
|
||||
try {
|
||||
$data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
|
||||
} catch (\Throwable) {
|
||||
$response = $this->http->request(
|
||||
'POST',
|
||||
rtrim($this->serviceUrl, '/') . '/search-tags',
|
||||
[
|
||||
'json' => [
|
||||
'query' => $query,
|
||||
'limit' => $limit,
|
||||
],
|
||||
'timeout' => 10,
|
||||
]
|
||||
);
|
||||
|
||||
if ($response->getStatusCode() !== 200) {
|
||||
$this->agentLogger->warning('Tag vector service returned non-200');
|
||||
return [];
|
||||
}
|
||||
|
||||
$data = $response->toArray(false);
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
$this->agentLogger->warning(
|
||||
'Tag vector service unreachable: ' . $e->getMessage()
|
||||
);
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -66,20 +56,29 @@ final readonly class TagVectorSearchClient
|
||||
}
|
||||
|
||||
$hits = [];
|
||||
|
||||
foreach ($data as $row) {
|
||||
if (!is_array($row)) {
|
||||
continue;
|
||||
}
|
||||
$tagId = (string)($row['tag_id'] ?? '');
|
||||
|
||||
$tagId = (string)($row['chunk_id'] ?? '');
|
||||
$score = $row['score'] ?? null;
|
||||
|
||||
if ($tagId === '' || !is_numeric($score)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$score = (float) $score;
|
||||
|
||||
// 🔥 Confidence Gate
|
||||
if ($score < self::MIN_SCORE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$hits[] = [
|
||||
'tag_id' => $tagId,
|
||||
'score' => (float)$score,
|
||||
'score' => $score,
|
||||
];
|
||||
}
|
||||
|
||||
|
||||
@@ -5,94 +5,86 @@ declare(strict_types=1);
|
||||
namespace App\Vector;
|
||||
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
|
||||
final class VectorSearchClient
|
||||
{
|
||||
private string $pythonBin;
|
||||
private string $scriptPath;
|
||||
private string $vectorIndexPath;
|
||||
private string $vectorMetaPath;
|
||||
private string $indexMetaPath;
|
||||
private const MIN_SCORE = 0.30; // 🔥 weicher als Tag-Gate
|
||||
|
||||
private HttpClientInterface $http;
|
||||
private string $serviceUrl;
|
||||
private LoggerInterface $agentLogger;
|
||||
|
||||
public function __construct(
|
||||
string $pythonBin,
|
||||
string $scriptPath,
|
||||
string $vectorIndexPath,
|
||||
string $vectorMetaPath,
|
||||
string $indexMetaPath,
|
||||
HttpClientInterface $http,
|
||||
string $serviceUrl,
|
||||
LoggerInterface $agentLogger
|
||||
) {
|
||||
$this->pythonBin = $pythonBin;
|
||||
$this->scriptPath = $scriptPath;
|
||||
$this->vectorIndexPath = $vectorIndexPath;
|
||||
$this->vectorMetaPath = $vectorMetaPath;
|
||||
$this->indexMetaPath = $indexMetaPath;
|
||||
$this->http = $http;
|
||||
$this->serviceUrl = rtrim($serviceUrl, '/');
|
||||
$this->agentLogger = $agentLogger;
|
||||
}
|
||||
|
||||
public function search(string $query, int $limit = 5): array
|
||||
{
|
||||
if (!is_file($this->scriptPath)) {
|
||||
$this->agentLogger->error('vector_search.py not found: ' . $this->scriptPath);
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!is_file($this->vectorIndexPath)) {
|
||||
$this->agentLogger->warning('vector.index not found.');
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!is_file($this->vectorMetaPath)) {
|
||||
$this->agentLogger->warning('vector.index.meta.json not found.');
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!is_file($this->indexMetaPath)) {
|
||||
$this->agentLogger->warning('index_meta.json not found.');
|
||||
return [];
|
||||
}
|
||||
|
||||
$indexMeta = json_decode((string) file_get_contents($this->indexMetaPath), true);
|
||||
|
||||
if (!is_array($indexMeta) || empty($indexMeta['embedding_model'])) {
|
||||
$this->agentLogger->error('Invalid index_meta.json.');
|
||||
return [];
|
||||
}
|
||||
|
||||
$embeddingModel = $indexMeta['embedding_model'];
|
||||
|
||||
$cmd = [
|
||||
$this->pythonBin,
|
||||
$this->scriptPath,
|
||||
'--query', $query,
|
||||
'--limit', (string)$limit,
|
||||
'--index', $this->vectorIndexPath,
|
||||
'--meta', $this->vectorMetaPath,
|
||||
'--model', $embeddingModel,
|
||||
];
|
||||
|
||||
$process = new \Symfony\Component\Process\Process($cmd);
|
||||
$process->setTimeout(30);
|
||||
$process->run();
|
||||
|
||||
if (!$process->isSuccessful()) {
|
||||
$this->agentLogger->error('Vector search failed: ' . $process->getErrorOutput());
|
||||
return [];
|
||||
}
|
||||
|
||||
$output = $process->getOutput();
|
||||
|
||||
if (trim($output) === '') {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
$this->agentLogger->info('vector_search.py is done: ' . $this->scriptPath);
|
||||
return json_decode($output, true, 512, JSON_THROW_ON_ERROR);
|
||||
$response = $this->http->request(
|
||||
'POST',
|
||||
$this->serviceUrl . '/search-chunks',
|
||||
[
|
||||
'json' => [
|
||||
'query' => $query,
|
||||
'limit' => $limit,
|
||||
],
|
||||
'timeout' => 10,
|
||||
]
|
||||
);
|
||||
|
||||
if ($response->getStatusCode() !== 200) {
|
||||
$this->agentLogger->error('Vector service returned non-200 (chunks)');
|
||||
return [];
|
||||
}
|
||||
|
||||
$data = $response->toArray(false);
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
$this->agentLogger->error('Invalid JSON from vector_search.py');
|
||||
$this->agentLogger->error(
|
||||
'Vector service unreachable (chunks): ' . $e->getMessage()
|
||||
);
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!is_array($data)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$filtered = [];
|
||||
|
||||
foreach ($data as $row) {
|
||||
if (!is_array($row)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$chunkId = (string)($row['chunk_id'] ?? '');
|
||||
$score = $row['score'] ?? null;
|
||||
|
||||
if ($chunkId === '' || !is_numeric($score)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$score = (float)$score;
|
||||
|
||||
// 🔥 Soft Confidence Gate
|
||||
if ($score < self::MIN_SCORE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$filtered[] = [
|
||||
'chunk_id' => $chunkId,
|
||||
'score' => $score,
|
||||
];
|
||||
}
|
||||
|
||||
return $filtered;
|
||||
}
|
||||
}
|
||||
}
|
||||
176
src/Vector/vector_service.py
Normal file
176
src/Vector/vector_service.py
Normal file
@@ -0,0 +1,176 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import numpy as np
|
||||
import faiss
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Paths
|
||||
# ============================================================
|
||||
|
||||
BASE_PATH = Path(__file__).resolve().parents[2]
|
||||
KNOWLEDGE_DIR = BASE_PATH / "var" / "knowledge"
|
||||
|
||||
CHUNK_INDEX_PATH = KNOWLEDGE_DIR / "vector.index"
|
||||
CHUNK_MAP_PATH = KNOWLEDGE_DIR / "vector.index.meta.json"
|
||||
|
||||
TAG_INDEX_PATH = KNOWLEDGE_DIR / "vector_tags.index"
|
||||
TAG_MAP_PATH = KNOWLEDGE_DIR / "vector_tags.index.meta.json"
|
||||
|
||||
INDEX_META_PATH = KNOWLEDGE_DIR / "index_meta.json"
|
||||
|
||||
|
||||
# ============================================================
|
||||
# FastAPI
|
||||
# ============================================================
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
model: Optional[SentenceTransformer] = None
|
||||
chunk_index = None
|
||||
chunk_ids: Optional[List[Any]] = None
|
||||
tag_index = None
|
||||
tag_ids: Optional[List[Any]] = None
|
||||
loaded_embedding_model_name: Optional[str] = None
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Models
|
||||
# ============================================================
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
query: str
|
||||
limit: int = 8
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Loader
|
||||
# ============================================================
|
||||
|
||||
def load_all():
|
||||
global model, chunk_index, chunk_ids, tag_index, tag_ids, loaded_embedding_model_name
|
||||
|
||||
if not INDEX_META_PATH.exists():
|
||||
raise RuntimeError("index_meta.json not found")
|
||||
|
||||
meta = json.loads(INDEX_META_PATH.read_text())
|
||||
embedding_model_name = meta.get("embedding_model")
|
||||
|
||||
if not embedding_model_name:
|
||||
raise RuntimeError("embedding_model missing in index_meta.json")
|
||||
|
||||
# Reload model only if changed
|
||||
if model is None or embedding_model_name != loaded_embedding_model_name:
|
||||
print(f"[Reload] Loading embedding model: {embedding_model_name}")
|
||||
model = SentenceTransformer(embedding_model_name)
|
||||
loaded_embedding_model_name = embedding_model_name
|
||||
|
||||
# Reload chunk index
|
||||
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
|
||||
print("[Reload] Loading chunk index")
|
||||
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
|
||||
chunk_ids = json.loads(CHUNK_MAP_PATH.read_text())
|
||||
else:
|
||||
chunk_index = None
|
||||
chunk_ids = None
|
||||
|
||||
# Reload tag index
|
||||
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
|
||||
print("[Reload] Loading tag index")
|
||||
tag_index = faiss.read_index(str(TAG_INDEX_PATH))
|
||||
tag_ids = json.loads(TAG_MAP_PATH.read_text())
|
||||
else:
|
||||
tag_index = None
|
||||
tag_ids = None
|
||||
|
||||
print("[Reload] Completed")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Startup
|
||||
# ============================================================
|
||||
|
||||
@app.on_event("startup")
|
||||
def startup_event():
|
||||
load_all()
|
||||
print("[VectorService] Ready")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Endpoints
|
||||
# ============================================================
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {
|
||||
"status": "ok",
|
||||
"chunk_index_loaded": chunk_index is not None,
|
||||
"tag_index_loaded": tag_index is not None,
|
||||
"model_loaded": model is not None,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/reload")
|
||||
def reload():
|
||||
try:
|
||||
load_all()
|
||||
return {"status": "reloaded"}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.post("/search-chunks")
|
||||
def search_chunks(req: SearchRequest):
|
||||
if chunk_index is None or chunk_ids is None:
|
||||
raise HTTPException(status_code=503, detail="Chunk index not available")
|
||||
|
||||
query_vec = model.encode([req.query], normalize_embeddings=True)
|
||||
query_vec = np.array(query_vec).astype("float32")
|
||||
|
||||
scores, indices = chunk_index.search(query_vec, req.limit)
|
||||
|
||||
results = []
|
||||
for score, idx in zip(scores[0], indices[0]):
|
||||
if idx == -1:
|
||||
continue
|
||||
if idx < 0 or idx >= len(chunk_ids):
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"chunk_id": chunk_ids[idx],
|
||||
"score": float(score),
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@app.post("/search-tags")
|
||||
def search_tags(req: SearchRequest):
|
||||
if tag_index is None or tag_ids is None:
|
||||
raise HTTPException(status_code=503, detail="Tag index not available")
|
||||
|
||||
query_vec = model.encode([req.query], normalize_embeddings=True)
|
||||
query_vec = np.array(query_vec).astype("float32")
|
||||
|
||||
scores, indices = tag_index.search(query_vec, req.limit)
|
||||
|
||||
results = []
|
||||
for score, idx in zip(scores[0], indices[0]):
|
||||
if idx == -1:
|
||||
continue
|
||||
if idx < 0 or idx >= len(tag_ids):
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"chunk_id": tag_ids[idx],
|
||||
"score": float(score),
|
||||
})
|
||||
|
||||
return results
|
||||
Reference in New Issue
Block a user