first commit
This commit is contained in:
55
src/Vector/VectorSearchClient.php
Normal file
55
src/Vector/VectorSearchClient.php
Normal file
@@ -0,0 +1,55 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Vector;
|
||||
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
final class VectorSearchClient
|
||||
{
|
||||
public function __construct(
|
||||
private readonly string $vectorDir,
|
||||
private LoggerInterface $agentLogger,
|
||||
) {
|
||||
}
|
||||
|
||||
public function search(string $query, int $limit = 5): array
|
||||
{
|
||||
$script = rtrim($this->vectorDir, '/') . '/vector_search.py';
|
||||
$this->agentLogger->info("Run vector search script $script");
|
||||
if (!is_file($script)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// -------------------------------------------------
|
||||
// Determine Python interpreter (venv preferred)
|
||||
// -------------------------------------------------
|
||||
$venvPython = $this->vectorDir . '/.venv/bin/python';
|
||||
$pythonBin = is_file($venvPython) ? $venvPython : 'python3';
|
||||
|
||||
$cmd = sprintf(
|
||||
'%s %s %s %d 2>&1',
|
||||
escapeshellarg($pythonBin),
|
||||
escapeshellarg($script),
|
||||
escapeshellarg($query),
|
||||
$limit
|
||||
);
|
||||
|
||||
exec($cmd, $out, $exitCode);
|
||||
|
||||
if ($exitCode !== 0 || empty($out)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$json = implode("\n", $out);
|
||||
|
||||
$this->agentLogger->info($json);
|
||||
|
||||
try {
|
||||
return json_decode($json, true, 512, JSON_THROW_ON_ERROR);
|
||||
} catch (\Throwable) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
89
src/Vector/vector_ingest.py
Normal file
89
src/Vector/vector_ingest.py
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Argument handling
|
||||
# ---------------------------------------------------------
|
||||
if len(sys.argv) < 3:
|
||||
print("ERROR: Missing arguments (vectorDir, knowledgeDir)")
|
||||
sys.exit(2)
|
||||
|
||||
vector_dir = Path(sys.argv[1]).resolve()
|
||||
knowledge_dir = Path(sys.argv[2]).resolve()
|
||||
|
||||
index_json = knowledge_dir / "index.json"
|
||||
index_out = vector_dir / "vector.index"
|
||||
meta_out = vector_dir / "vector_meta.json"
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Dependency checks
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
import faiss # noqa
|
||||
except Exception:
|
||||
print("ERROR: Python module 'faiss' not found.")
|
||||
sys.exit(10)
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer # noqa
|
||||
except Exception:
|
||||
print("ERROR: Python module 'sentence-transformers' not found.")
|
||||
sys.exit(11)
|
||||
|
||||
import faiss
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# File checks
|
||||
# ---------------------------------------------------------
|
||||
if not index_json.is_file():
|
||||
print(f"ERROR: index.json not found at {index_json}")
|
||||
sys.exit(20)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Load chunks from index.json
|
||||
# ---------------------------------------------------------
|
||||
with open(index_json, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
texts = []
|
||||
ids = []
|
||||
|
||||
for entry in data:
|
||||
if "file" not in entry:
|
||||
continue
|
||||
|
||||
chunk_path = knowledge_dir / "chunks" / entry["file"]
|
||||
if not chunk_path.is_file():
|
||||
continue
|
||||
|
||||
text = chunk_path.read_text(encoding="utf-8").strip()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
texts.append(text)
|
||||
ids.append(entry["file"])
|
||||
|
||||
if not texts:
|
||||
print("ERROR: No chunks loaded from index.json")
|
||||
sys.exit(21)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Build vector index
|
||||
# ---------------------------------------------------------
|
||||
model = SentenceTransformer("all-MiniLM-L6-v2")
|
||||
embeddings = model.encode(texts, normalize_embeddings=True)
|
||||
|
||||
dim = embeddings.shape[1]
|
||||
index = faiss.IndexFlatIP(dim)
|
||||
index.add(embeddings)
|
||||
|
||||
faiss.write_index(index, str(index_out))
|
||||
|
||||
with open(meta_out, "w", encoding="utf-8") as f:
|
||||
json.dump(ids, f)
|
||||
|
||||
print(f"Indexed {len(ids)} chunks.")
|
||||
72
src/Vector/vector_search.py
Normal file
72
src/Vector/vector_search.py
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Argument handling
|
||||
# ---------------------------------------------------------
|
||||
if len(sys.argv) < 3:
|
||||
print("ERROR: Missing arguments (query, limit)")
|
||||
sys.exit(2)
|
||||
|
||||
query = sys.argv[1]
|
||||
limit = int(sys.argv[2])
|
||||
|
||||
vector_dir = Path(__file__).resolve().parent
|
||||
index_path = vector_dir / "vector.index"
|
||||
meta_path = vector_dir / "vector_meta.json"
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Dependency checks (controlled)
|
||||
# ---------------------------------------------------------
|
||||
try:
|
||||
import faiss # noqa
|
||||
except Exception:
|
||||
print("ERROR: Python module 'faiss' not found.")
|
||||
sys.exit(10)
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer # noqa
|
||||
except Exception:
|
||||
print("ERROR: Python module 'sentence-transformers' not found.")
|
||||
sys.exit(11)
|
||||
|
||||
import faiss
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# File checks
|
||||
# ---------------------------------------------------------
|
||||
if not index_path.is_file() or not meta_path.is_file():
|
||||
print("ERROR: Vector index not found. Run vector ingest first.")
|
||||
sys.exit(20)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Load model and index
|
||||
# ---------------------------------------------------------
|
||||
model = SentenceTransformer("all-MiniLM-L6-v2")
|
||||
query_vec = model.encode([query], normalize_embeddings=True)
|
||||
|
||||
index = faiss.read_index(str(index_path))
|
||||
|
||||
with open(meta_path, "r", encoding="utf-8") as f:
|
||||
ids = json.load(f)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Search
|
||||
# ---------------------------------------------------------
|
||||
scores, indices = index.search(query_vec, limit)
|
||||
|
||||
results = []
|
||||
for score, idx in zip(scores[0], indices[0]):
|
||||
if idx == -1:
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"chunk_id": ids[idx],
|
||||
"score": float(score)
|
||||
})
|
||||
|
||||
print(json.dumps(results))
|
||||
Reference in New Issue
Block a user