first commit

This commit is contained in:
team 1
2026-02-11 14:15:08 +01:00
parent a4742c2c38
commit aa7d362bc3
58 changed files with 9999 additions and 0 deletions

View File

@@ -0,0 +1,55 @@
<?php
declare(strict_types=1);
namespace App\Vector;
use Psr\Log\LoggerInterface;
final class VectorSearchClient
{
public function __construct(
private readonly string $vectorDir,
private LoggerInterface $agentLogger,
) {
}
public function search(string $query, int $limit = 5): array
{
$script = rtrim($this->vectorDir, '/') . '/vector_search.py';
$this->agentLogger->info("Run vector search script $script");
if (!is_file($script)) {
return [];
}
// -------------------------------------------------
// Determine Python interpreter (venv preferred)
// -------------------------------------------------
$venvPython = $this->vectorDir . '/.venv/bin/python';
$pythonBin = is_file($venvPython) ? $venvPython : 'python3';
$cmd = sprintf(
'%s %s %s %d 2>&1',
escapeshellarg($pythonBin),
escapeshellarg($script),
escapeshellarg($query),
$limit
);
exec($cmd, $out, $exitCode);
if ($exitCode !== 0 || empty($out)) {
return [];
}
$json = implode("\n", $out);
$this->agentLogger->info($json);
try {
return json_decode($json, true, 512, JSON_THROW_ON_ERROR);
} catch (\Throwable) {
return [];
}
}
}

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
import sys
import json
from pathlib import Path
# ---------------------------------------------------------
# Argument handling
# ---------------------------------------------------------
if len(sys.argv) < 3:
print("ERROR: Missing arguments (vectorDir, knowledgeDir)")
sys.exit(2)
vector_dir = Path(sys.argv[1]).resolve()
knowledge_dir = Path(sys.argv[2]).resolve()
index_json = knowledge_dir / "index.json"
index_out = vector_dir / "vector.index"
meta_out = vector_dir / "vector_meta.json"
# ---------------------------------------------------------
# Dependency checks
# ---------------------------------------------------------
try:
import faiss # noqa
except Exception:
print("ERROR: Python module 'faiss' not found.")
sys.exit(10)
try:
from sentence_transformers import SentenceTransformer # noqa
except Exception:
print("ERROR: Python module 'sentence-transformers' not found.")
sys.exit(11)
import faiss
from sentence_transformers import SentenceTransformer
# ---------------------------------------------------------
# File checks
# ---------------------------------------------------------
if not index_json.is_file():
print(f"ERROR: index.json not found at {index_json}")
sys.exit(20)
# ---------------------------------------------------------
# Load chunks from index.json
# ---------------------------------------------------------
with open(index_json, "r", encoding="utf-8") as f:
data = json.load(f)
texts = []
ids = []
for entry in data:
if "file" not in entry:
continue
chunk_path = knowledge_dir / "chunks" / entry["file"]
if not chunk_path.is_file():
continue
text = chunk_path.read_text(encoding="utf-8").strip()
if not text:
continue
texts.append(text)
ids.append(entry["file"])
if not texts:
print("ERROR: No chunks loaded from index.json")
sys.exit(21)
# ---------------------------------------------------------
# Build vector index
# ---------------------------------------------------------
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(texts, normalize_embeddings=True)
dim = embeddings.shape[1]
index = faiss.IndexFlatIP(dim)
index.add(embeddings)
faiss.write_index(index, str(index_out))
with open(meta_out, "w", encoding="utf-8") as f:
json.dump(ids, f)
print(f"Indexed {len(ids)} chunks.")

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env python3
import sys
import json
from pathlib import Path
# ---------------------------------------------------------
# Argument handling
# ---------------------------------------------------------
if len(sys.argv) < 3:
print("ERROR: Missing arguments (query, limit)")
sys.exit(2)
query = sys.argv[1]
limit = int(sys.argv[2])
vector_dir = Path(__file__).resolve().parent
index_path = vector_dir / "vector.index"
meta_path = vector_dir / "vector_meta.json"
# ---------------------------------------------------------
# Dependency checks (controlled)
# ---------------------------------------------------------
try:
import faiss # noqa
except Exception:
print("ERROR: Python module 'faiss' not found.")
sys.exit(10)
try:
from sentence_transformers import SentenceTransformer # noqa
except Exception:
print("ERROR: Python module 'sentence-transformers' not found.")
sys.exit(11)
import faiss
from sentence_transformers import SentenceTransformer
# ---------------------------------------------------------
# File checks
# ---------------------------------------------------------
if not index_path.is_file() or not meta_path.is_file():
print("ERROR: Vector index not found. Run vector ingest first.")
sys.exit(20)
# ---------------------------------------------------------
# Load model and index
# ---------------------------------------------------------
model = SentenceTransformer("all-MiniLM-L6-v2")
query_vec = model.encode([query], normalize_embeddings=True)
index = faiss.read_index(str(index_path))
with open(meta_path, "r", encoding="utf-8") as f:
ids = json.load(f)
# ---------------------------------------------------------
# Search
# ---------------------------------------------------------
scores, indices = index.search(query_vec, limit)
results = []
for score, idx in zip(scores[0], indices[0]):
if idx == -1:
continue
results.append({
"chunk_id": ids[idx],
"score": float(score)
})
print(json.dumps(results))