optimize as sales rag

This commit is contained in:
team2
2026-02-27 21:03:59 +01:00
parent efa9b17c2f
commit 3a5804e44c
6 changed files with 541 additions and 213 deletions

View File

@@ -9,7 +9,16 @@ use Symfony\Contracts\HttpClient\HttpClientInterface;
final class VectorSearchClient
{
private const MIN_SCORE = 0.30; // 🔥 weicher als Tag-Gate
/**
* Soft minimum similarity threshold.
* Lower than tag gate to allow broader recall.
*/
private const MIN_SCORE = 0.30;
/**
* Hard limit clamp to avoid abusive queries.
*/
private const MAX_LIMIT = 200;
private HttpClientInterface $http;
private string $serviceUrl;
@@ -26,18 +35,34 @@ final class VectorSearchClient
}
/**
* Standard global search
* Standard global search.
*
* @return array<int, array{
* chunk_id:string,
* score:float,
* document_id:?string,
* chunk_index:?int
* }>
*/
public function search(string $query, int $limit = 5): array
{
return $this->executeSearch([
'query' => $query,
'limit' => $limit,
'query' => trim($query),
'limit' => $this->clampLimit($limit),
]);
}
/**
* Scoped search: nur innerhalb bestimmter Dokumente
* Scoped search: only inside specific documents.
*
* @param array<int,string> $docIds
*
* @return array<int, array{
* chunk_id:string,
* score:float,
* document_id:?string,
* chunk_index:?int
* }>
*/
public function searchScoped(
string $query,
@@ -49,14 +74,23 @@ final class VectorSearchClient
}
return $this->executeSearch([
'query' => $query,
'limit' => $limit,
'query' => trim($query),
'limit' => $this->clampLimit($limit),
'doc_ids' => array_values($docIds),
]);
}
/**
* Gemeinsame HTTP-Logik (keine Duplikation)
* Shared HTTP logic.
*
* @param array<string,mixed> $payload
*
* @return array<int, array{
* chunk_id:string,
* score:float,
* document_id:?string,
* chunk_index:?int
* }>
*/
private function executeSearch(array $payload): array
{
@@ -71,7 +105,10 @@ final class VectorSearchClient
);
if ($response->getStatusCode() !== 200) {
$this->agentLogger->error('Vector service returned non-200 (chunks)');
$this->agentLogger->error(
'Vector service returned non-200 (chunks)',
['status' => $response->getStatusCode()]
);
return [];
}
@@ -79,12 +116,14 @@ final class VectorSearchClient
} catch (\Throwable $e) {
$this->agentLogger->error(
'Vector service unreachable (chunks): ' . $e->getMessage()
'Vector service unreachable (chunks)',
['error' => $e->getMessage()]
);
return [];
}
if (!is_array($data)) {
$this->agentLogger->warning('Vector service returned invalid payload (chunks)');
return [];
}
@@ -109,12 +148,41 @@ final class VectorSearchClient
continue;
}
$documentId = null;
if (isset($row['document_id']) && is_string($row['document_id']) && $row['document_id'] !== '') {
$documentId = $row['document_id'];
}
$chunkIndex = null;
if (isset($row['chunk_index'])) {
if (is_int($row['chunk_index'])) {
$chunkIndex = $row['chunk_index'];
} elseif (is_string($row['chunk_index']) && ctype_digit($row['chunk_index'])) {
$chunkIndex = (int)$row['chunk_index'];
}
}
$filtered[] = [
'chunk_id' => $chunkId,
'score' => $score,
'chunk_id' => $chunkId,
'score' => $score,
'document_id' => $documentId,
'chunk_index' => $chunkIndex,
];
}
return $filtered;
}
private function clampLimit(int $limit): int
{
if ($limit < 1) {
return 1;
}
if ($limit > self::MAX_LIMIT) {
return self::MAX_LIMIT;
}
return $limit;
}
}