add uvicorn as py server for faster com

This commit is contained in:
team2
2026-02-22 08:35:13 +01:00
parent f62d102d61
commit 2629774dcd
8 changed files with 547 additions and 127 deletions

View File

@@ -16,6 +16,7 @@
"symfony/dotenv": "^7.4",
"symfony/flex": "^2",
"symfony/framework-bundle": "^7.4",
"symfony/http-client": "7.4.*",
"symfony/messenger": "7.4.*",
"symfony/monolog-bundle": "^4.0",
"symfony/runtime": "^7.4",

183
composer.lock generated
View File

@@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "6e61c35db778f34002e60f6186e53e4a",
"content-hash": "ffc8b627f07f3eb413d757a4d80af3f2",
"packages": [
{
"name": "doctrine/collections",
@@ -2912,6 +2912,185 @@
],
"time": "2025-12-29T09:31:36+00:00"
},
{
"name": "symfony/http-client",
"version": "v7.4.5",
"source": {
"type": "git",
"url": "https://github.com/symfony/http-client.git",
"reference": "84bb634857a893cc146cceb467e31b3f02c5fe9f"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/http-client/zipball/84bb634857a893cc146cceb467e31b3f02c5fe9f",
"reference": "84bb634857a893cc146cceb467e31b3f02c5fe9f",
"shasum": ""
},
"require": {
"php": ">=8.2",
"psr/log": "^1|^2|^3",
"symfony/deprecation-contracts": "^2.5|^3",
"symfony/http-client-contracts": "~3.4.4|^3.5.2",
"symfony/polyfill-php83": "^1.29",
"symfony/service-contracts": "^2.5|^3"
},
"conflict": {
"amphp/amp": "<2.5",
"amphp/socket": "<1.1",
"php-http/discovery": "<1.15",
"symfony/http-foundation": "<6.4"
},
"provide": {
"php-http/async-client-implementation": "*",
"php-http/client-implementation": "*",
"psr/http-client-implementation": "1.0",
"symfony/http-client-implementation": "3.0"
},
"require-dev": {
"amphp/http-client": "^4.2.1|^5.0",
"amphp/http-tunnel": "^1.0|^2.0",
"guzzlehttp/promises": "^1.4|^2.0",
"nyholm/psr7": "^1.0",
"php-http/httplug": "^1.0|^2.0",
"psr/http-client": "^1.0",
"symfony/amphp-http-client-meta": "^1.0|^2.0",
"symfony/cache": "^6.4|^7.0|^8.0",
"symfony/dependency-injection": "^6.4|^7.0|^8.0",
"symfony/http-kernel": "^6.4|^7.0|^8.0",
"symfony/messenger": "^6.4|^7.0|^8.0",
"symfony/process": "^6.4|^7.0|^8.0",
"symfony/rate-limiter": "^6.4|^7.0|^8.0",
"symfony/stopwatch": "^6.4|^7.0|^8.0"
},
"type": "library",
"autoload": {
"psr-4": {
"Symfony\\Component\\HttpClient\\": ""
},
"exclude-from-classmap": [
"/Tests/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Nicolas Grekas",
"email": "p@tchwork.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Provides powerful methods to fetch HTTP resources synchronously or asynchronously",
"homepage": "https://symfony.com",
"keywords": [
"http"
],
"support": {
"source": "https://github.com/symfony/http-client/tree/v7.4.5"
},
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://github.com/nicolas-grekas",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2026-01-27T16:16:02+00:00"
},
{
"name": "symfony/http-client-contracts",
"version": "v3.6.0",
"source": {
"type": "git",
"url": "https://github.com/symfony/http-client-contracts.git",
"reference": "75d7043853a42837e68111812f4d964b01e5101c"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/http-client-contracts/zipball/75d7043853a42837e68111812f4d964b01e5101c",
"reference": "75d7043853a42837e68111812f4d964b01e5101c",
"shasum": ""
},
"require": {
"php": ">=8.1"
},
"type": "library",
"extra": {
"thanks": {
"url": "https://github.com/symfony/contracts",
"name": "symfony/contracts"
},
"branch-alias": {
"dev-main": "3.6-dev"
}
},
"autoload": {
"psr-4": {
"Symfony\\Contracts\\HttpClient\\": ""
},
"exclude-from-classmap": [
"/Test/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Nicolas Grekas",
"email": "p@tchwork.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Generic abstractions related to HTTP clients",
"homepage": "https://symfony.com",
"keywords": [
"abstractions",
"contracts",
"decoupling",
"interfaces",
"interoperability",
"standards"
],
"support": {
"source": "https://github.com/symfony/http-client-contracts/tree/v3.6.0"
},
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"time": "2025-04-29T11:18:49+00:00"
},
{
"name": "symfony/http-foundation",
"version": "v7.4.3",
@@ -6062,5 +6241,5 @@
"ext-iconv": "*"
},
"platform-dev": {},
"plugin-api-version": "2.9.0"
"plugin-api-version": "2.6.0"
}

View File

@@ -472,7 +472,7 @@ use Symfony\Component\Config\Loader\ParamConfigurator as Param;
* },
* disallow_search_engine_index?: bool|Param, // Enabled by default when debug is enabled. // Default: true
* http_client?: bool|array{ // HTTP Client configuration
* enabled?: bool|Param, // Default: false
* enabled?: bool|Param, // Default: true
* max_host_connections?: int|Param, // The maximum number of connections to a single host.
* default_options?: array{
* headers?: array<string, mixed>,

View File

@@ -57,6 +57,8 @@ parameters:
mto.vector.search_script: '%mto.root%/src/Vector/vector_search.py'
mto.vector.timeout: 600
mto.vector.service_url: 'http://127.0.0.1:8090'
# ------------------------------------------------------------
# Services
@@ -153,11 +155,7 @@ services:
App\Vector\VectorSearchClient:
arguments:
$pythonBin: '%mto.vector.python_bin%'
$scriptPath: '%mto.vector.search_script%'
$vectorIndexPath: '%mto.knowledge.vector_index%'
$vectorMetaPath: '%mto.knowledge.vector_index_meta%'
$indexMetaPath: '%mto.knowledge.index_meta%'
$serviceUrl: '%mto.vector.service_url%'
$agentLogger: '@monolog.logger.agent'
App\Vector\VectorIndexBuilder:
@@ -194,11 +192,7 @@ services:
App\Tag\TagVectorSearchClient:
arguments:
$pythonBin: '%mto.vector.python_bin%'
$scriptPath: '%mto.vector.search_tags_script%'
$vectorTagsIndexPath: '%mto.knowledge.vector_tags_index%'
$vectorTagsMetaPath: '%mto.knowledge.vector_tags_index_meta%'
$embeddingModel: '%mto.index.embedding_model%'
$serviceUrl: '%mto.vector.service_url%'
$agentLogger: '@monolog.logger.agent'
App\Tag\TagRoutingService: ~

View File

@@ -0,0 +1,79 @@
<?php
declare(strict_types=1);
namespace App\Command;
use App\Vector\VectorSearchClient;
use App\Tag\TagVectorSearchClient;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
#[AsCommand(name: 'mto:agent:test-vector')]
final class TestVectorCommand extends Command
{
public function __construct(
private readonly VectorSearchClient $vectorSearchClient,
private readonly TagVectorSearchClient $tagVectorSearchClient,
) {
parent::__construct();
}
protected function configure(): void
{
$this->addArgument(
'prompt',
InputArgument::REQUIRED,
'User prompt (realistic retrieval test)'
);
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$prompt = (string) $input->getArgument('prompt');
$limit = 8;
$output->writeln('');
$output->writeln('<info>Prompt:</info> ' . $prompt);
$output->writeln('');
$totalStart = microtime(true);
// ----------------------------
// 1⃣ Tag Routing Phase
// ----------------------------
$tagStart = microtime(true);
$tagResults = $this->tagVectorSearchClient->search($prompt, $limit);
$tagDuration = (microtime(true) - $tagStart) * 1000;
// ----------------------------
// 2⃣ Chunk Retrieval Phase
// ----------------------------
$chunkStart = microtime(true);
$chunkResults = $this->vectorSearchClient->search($prompt, $limit);
$chunkDuration = (microtime(true) - $chunkStart) * 1000;
$totalDuration = (microtime(true) - $totalStart) * 1000;
// ----------------------------
// Output
// ----------------------------
$output->writeln('<comment>Tag Routing Time:</comment> ' . round($tagDuration, 2) . ' ms');
$output->writeln('<comment>Chunk Retrieval Time:</comment> ' . round($chunkDuration, 2) . ' ms');
$output->writeln('<comment>Total Retrieval Time:</comment> ' . round($totalDuration, 2) . ' ms');
$output->writeln('');
$output->writeln('--- Tag Results ---');
$output->writeln(json_encode($tagResults, JSON_PRETTY_PRINT));
$output->writeln('');
$output->writeln('--- Chunk Results ---');
$output->writeln(json_encode($chunkResults, JSON_PRETTY_PRINT));
$output->writeln('');
return Command::SUCCESS;
}
}

View File

@@ -5,16 +5,16 @@ declare(strict_types=1);
namespace App\Tag;
use Psr\Log\LoggerInterface;
use Symfony\Contracts\HttpClient\HttpClientInterface;
final readonly class TagVectorSearchClient
{
private const MIN_SCORE = 0.4; // 🔥 Tag Confidence Gate
public function __construct(
private string $pythonBin,
private string $scriptPath,
private string $vectorTagsIndexPath,
private string $vectorTagsMetaPath,
private string $embeddingModel,
private LoggerInterface $agentLogger,
private HttpClientInterface $http,
private string $serviceUrl,
private LoggerInterface $agentLogger,
) {}
/**
@@ -22,42 +22,32 @@ final readonly class TagVectorSearchClient
*/
public function search(string $query, int $limit = 8): array
{
if (!is_file($this->scriptPath)) {
$this->agentLogger->warning('Tag vector search script missing: ' . $this->scriptPath);
return [];
}
if (!is_file($this->vectorTagsIndexPath) || !is_file($this->vectorTagsMetaPath)) {
// no tag index available yet => no routing
return [];
}
$limit = max(1, min($limit, 50));
// Positional args, aligned with existing VectorSearchClient approach:
// python vector_search_tags.py <query> <limit> <index> <meta> <model>
$cmd = sprintf(
'%s %s %s %d %s %s %s 2>&1',
escapeshellarg($this->pythonBin),
escapeshellarg($this->scriptPath),
escapeshellarg($query),
$limit,
escapeshellarg($this->vectorTagsIndexPath),
escapeshellarg($this->vectorTagsMetaPath),
escapeshellarg($this->embeddingModel),
);
exec($cmd, $out, $exitCode);
if ($exitCode !== 0 || empty($out)) {
return [];
}
$json = implode("\n", $out);
try {
$data = json_decode($json, true, 512, JSON_THROW_ON_ERROR);
} catch (\Throwable) {
$response = $this->http->request(
'POST',
rtrim($this->serviceUrl, '/') . '/search-tags',
[
'json' => [
'query' => $query,
'limit' => $limit,
],
'timeout' => 10,
]
);
if ($response->getStatusCode() !== 200) {
$this->agentLogger->warning('Tag vector service returned non-200');
return [];
}
$data = $response->toArray(false);
} catch (\Throwable $e) {
$this->agentLogger->warning(
'Tag vector service unreachable: ' . $e->getMessage()
);
return [];
}
@@ -66,20 +56,29 @@ final readonly class TagVectorSearchClient
}
$hits = [];
foreach ($data as $row) {
if (!is_array($row)) {
continue;
}
$tagId = (string)($row['tag_id'] ?? '');
$tagId = (string)($row['chunk_id'] ?? '');
$score = $row['score'] ?? null;
if ($tagId === '' || !is_numeric($score)) {
continue;
}
$score = (float) $score;
// 🔥 Confidence Gate
if ($score < self::MIN_SCORE) {
continue;
}
$hits[] = [
'tag_id' => $tagId,
'score' => (float)$score,
'score' => $score,
];
}

View File

@@ -5,94 +5,86 @@ declare(strict_types=1);
namespace App\Vector;
use Psr\Log\LoggerInterface;
use Symfony\Contracts\HttpClient\HttpClientInterface;
final class VectorSearchClient
{
private string $pythonBin;
private string $scriptPath;
private string $vectorIndexPath;
private string $vectorMetaPath;
private string $indexMetaPath;
private const MIN_SCORE = 0.30; // 🔥 weicher als Tag-Gate
private HttpClientInterface $http;
private string $serviceUrl;
private LoggerInterface $agentLogger;
public function __construct(
string $pythonBin,
string $scriptPath,
string $vectorIndexPath,
string $vectorMetaPath,
string $indexMetaPath,
HttpClientInterface $http,
string $serviceUrl,
LoggerInterface $agentLogger
) {
$this->pythonBin = $pythonBin;
$this->scriptPath = $scriptPath;
$this->vectorIndexPath = $vectorIndexPath;
$this->vectorMetaPath = $vectorMetaPath;
$this->indexMetaPath = $indexMetaPath;
$this->http = $http;
$this->serviceUrl = rtrim($serviceUrl, '/');
$this->agentLogger = $agentLogger;
}
public function search(string $query, int $limit = 5): array
{
if (!is_file($this->scriptPath)) {
$this->agentLogger->error('vector_search.py not found: ' . $this->scriptPath);
return [];
}
if (!is_file($this->vectorIndexPath)) {
$this->agentLogger->warning('vector.index not found.');
return [];
}
if (!is_file($this->vectorMetaPath)) {
$this->agentLogger->warning('vector.index.meta.json not found.');
return [];
}
if (!is_file($this->indexMetaPath)) {
$this->agentLogger->warning('index_meta.json not found.');
return [];
}
$indexMeta = json_decode((string) file_get_contents($this->indexMetaPath), true);
if (!is_array($indexMeta) || empty($indexMeta['embedding_model'])) {
$this->agentLogger->error('Invalid index_meta.json.');
return [];
}
$embeddingModel = $indexMeta['embedding_model'];
$cmd = [
$this->pythonBin,
$this->scriptPath,
'--query', $query,
'--limit', (string)$limit,
'--index', $this->vectorIndexPath,
'--meta', $this->vectorMetaPath,
'--model', $embeddingModel,
];
$process = new \Symfony\Component\Process\Process($cmd);
$process->setTimeout(30);
$process->run();
if (!$process->isSuccessful()) {
$this->agentLogger->error('Vector search failed: ' . $process->getErrorOutput());
return [];
}
$output = $process->getOutput();
if (trim($output) === '') {
return [];
}
try {
$this->agentLogger->info('vector_search.py is done: ' . $this->scriptPath);
return json_decode($output, true, 512, JSON_THROW_ON_ERROR);
$response = $this->http->request(
'POST',
$this->serviceUrl . '/search-chunks',
[
'json' => [
'query' => $query,
'limit' => $limit,
],
'timeout' => 10,
]
);
if ($response->getStatusCode() !== 200) {
$this->agentLogger->error('Vector service returned non-200 (chunks)');
return [];
}
$data = $response->toArray(false);
} catch (\Throwable $e) {
$this->agentLogger->error('Invalid JSON from vector_search.py');
$this->agentLogger->error(
'Vector service unreachable (chunks): ' . $e->getMessage()
);
return [];
}
if (!is_array($data)) {
return [];
}
$filtered = [];
foreach ($data as $row) {
if (!is_array($row)) {
continue;
}
$chunkId = (string)($row['chunk_id'] ?? '');
$score = $row['score'] ?? null;
if ($chunkId === '' || !is_numeric($score)) {
continue;
}
$score = (float)$score;
// 🔥 Soft Confidence Gate
if ($score < self::MIN_SCORE) {
continue;
}
$filtered[] = [
'chunk_id' => $chunkId,
'score' => $score,
];
}
return $filtered;
}
}
}

View File

@@ -0,0 +1,176 @@
#!/usr/bin/env python3
import json
from pathlib import Path
from typing import Any, List, Optional
import numpy as np
import faiss
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
# ============================================================
# Paths
# ============================================================
BASE_PATH = Path(__file__).resolve().parents[2]
KNOWLEDGE_DIR = BASE_PATH / "var" / "knowledge"
CHUNK_INDEX_PATH = KNOWLEDGE_DIR / "vector.index"
CHUNK_MAP_PATH = KNOWLEDGE_DIR / "vector.index.meta.json"
TAG_INDEX_PATH = KNOWLEDGE_DIR / "vector_tags.index"
TAG_MAP_PATH = KNOWLEDGE_DIR / "vector_tags.index.meta.json"
INDEX_META_PATH = KNOWLEDGE_DIR / "index_meta.json"
# ============================================================
# FastAPI
# ============================================================
app = FastAPI()
model: Optional[SentenceTransformer] = None
chunk_index = None
chunk_ids: Optional[List[Any]] = None
tag_index = None
tag_ids: Optional[List[Any]] = None
loaded_embedding_model_name: Optional[str] = None
# ============================================================
# Models
# ============================================================
class SearchRequest(BaseModel):
query: str
limit: int = 8
# ============================================================
# Loader
# ============================================================
def load_all():
global model, chunk_index, chunk_ids, tag_index, tag_ids, loaded_embedding_model_name
if not INDEX_META_PATH.exists():
raise RuntimeError("index_meta.json not found")
meta = json.loads(INDEX_META_PATH.read_text())
embedding_model_name = meta.get("embedding_model")
if not embedding_model_name:
raise RuntimeError("embedding_model missing in index_meta.json")
# Reload model only if changed
if model is None or embedding_model_name != loaded_embedding_model_name:
print(f"[Reload] Loading embedding model: {embedding_model_name}")
model = SentenceTransformer(embedding_model_name)
loaded_embedding_model_name = embedding_model_name
# Reload chunk index
if CHUNK_INDEX_PATH.exists() and CHUNK_MAP_PATH.exists():
print("[Reload] Loading chunk index")
chunk_index = faiss.read_index(str(CHUNK_INDEX_PATH))
chunk_ids = json.loads(CHUNK_MAP_PATH.read_text())
else:
chunk_index = None
chunk_ids = None
# Reload tag index
if TAG_INDEX_PATH.exists() and TAG_MAP_PATH.exists():
print("[Reload] Loading tag index")
tag_index = faiss.read_index(str(TAG_INDEX_PATH))
tag_ids = json.loads(TAG_MAP_PATH.read_text())
else:
tag_index = None
tag_ids = None
print("[Reload] Completed")
# ============================================================
# Startup
# ============================================================
@app.on_event("startup")
def startup_event():
load_all()
print("[VectorService] Ready")
# ============================================================
# Endpoints
# ============================================================
@app.get("/health")
def health():
return {
"status": "ok",
"chunk_index_loaded": chunk_index is not None,
"tag_index_loaded": tag_index is not None,
"model_loaded": model is not None,
}
@app.post("/reload")
def reload():
try:
load_all()
return {"status": "reloaded"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/search-chunks")
def search_chunks(req: SearchRequest):
if chunk_index is None or chunk_ids is None:
raise HTTPException(status_code=503, detail="Chunk index not available")
query_vec = model.encode([req.query], normalize_embeddings=True)
query_vec = np.array(query_vec).astype("float32")
scores, indices = chunk_index.search(query_vec, req.limit)
results = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
if idx < 0 or idx >= len(chunk_ids):
continue
results.append({
"chunk_id": chunk_ids[idx],
"score": float(score),
})
return results
@app.post("/search-tags")
def search_tags(req: SearchRequest):
if tag_index is None or tag_ids is None:
raise HTTPException(status_code=503, detail="Tag index not available")
query_vec = model.encode([req.query], normalize_embeddings=True)
query_vec = np.array(query_vec).astype("float32")
scores, indices = tag_index.search(query_vec, req.limit)
results = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
if idx < 0 or idx >= len(tag_ids):
continue
results.append({
"chunk_id": tag_ids[idx],
"score": float(score),
})
return results