add multi model
This commit is contained in:
@@ -18,3 +18,10 @@ parameters:
|
|||||||
# Hard upper bound for generated output tokens. Prevents runaway repeated answers
|
# Hard upper bound for generated output tokens. Prevents runaway repeated answers
|
||||||
# before they can become very long streamed or blocking responses.
|
# before they can become very long streamed or blocking responses.
|
||||||
retriex.llm.num_predict: 2048
|
retriex.llm.num_predict: 2048
|
||||||
|
|
||||||
|
# Optional model override per existing LLM call.
|
||||||
|
# null or an empty value keeps the active default model from ModelGenerationConfig.
|
||||||
|
retriex.llm.call_models:
|
||||||
|
input_normalization: null
|
||||||
|
shop_query_optimization: null
|
||||||
|
final_answer: null
|
||||||
|
|||||||
@@ -200,6 +200,7 @@ services:
|
|||||||
|
|
||||||
App\Agent\AgentRunner:
|
App\Agent\AgentRunner:
|
||||||
arguments:
|
arguments:
|
||||||
|
$llmCallModels: '%retriex.llm.call_models%'
|
||||||
$debug: '%env(bool:AI_DEBUG)%'
|
$debug: '%env(bool:AI_DEBUG)%'
|
||||||
$logPrompt: '%env(bool:AI_LOG_PROMPT)%'
|
$logPrompt: '%env(bool:AI_LOG_PROMPT)%'
|
||||||
$logContext: '%env(bool:AI_LOG_CONTEXT)%'
|
$logContext: '%env(bool:AI_LOG_CONTEXT)%'
|
||||||
|
|||||||
@@ -0,0 +1,182 @@
|
|||||||
|
# RetrieX Patch 97 - Per-LLM-Call Model Override
|
||||||
|
|
||||||
|
## Ziel
|
||||||
|
|
||||||
|
Dieser Patch ergaenzt eine minimale YAML-Konfiguration, mit der fuer jeden bestehenden LLM-Call optional ein anderes Ollama-Modell angegeben werden kann.
|
||||||
|
|
||||||
|
Der Patch aendert bewusst keine fachliche Logik:
|
||||||
|
|
||||||
|
- keine Aenderung an Retrieval, Scoring oder Ranking
|
||||||
|
- keine Aenderung an Shopquery-Guards oder Shop-Matching
|
||||||
|
- keine Aenderung an PromptBuilder-Regeln
|
||||||
|
- keine neue Admin-Pflege fuer Modellprofile
|
||||||
|
- keine neue Routing- oder Multi-Agent-Logik
|
||||||
|
|
||||||
|
Wenn fuer einen Call kein Modell gesetzt ist, nutzt der Call wie bisher das aktive Standardmodell aus `ModelGenerationConfig`.
|
||||||
|
|
||||||
|
## Neue YAML-Konfiguration
|
||||||
|
|
||||||
|
Datei: `config/retriex/model.yaml`
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
retriex.llm.call_models:
|
||||||
|
input_normalization: null
|
||||||
|
shop_query_optimization: null
|
||||||
|
final_answer: null
|
||||||
|
```
|
||||||
|
|
||||||
|
Beispiel, um Normalisierung und Shopquery-Optimierung ueber ein schnelleres Modell laufen zu lassen:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
retriex.llm.call_models:
|
||||||
|
input_normalization: 'llama3.1'
|
||||||
|
shop_query_optimization: 'llama3.1'
|
||||||
|
final_answer: null
|
||||||
|
```
|
||||||
|
|
||||||
|
`null` oder ein leerer Wert bedeutet: Standardmodell verwenden.
|
||||||
|
|
||||||
|
## Geaenderte Dateien
|
||||||
|
|
||||||
|
- `config/retriex/model.yaml`
|
||||||
|
- `config/services.yaml`
|
||||||
|
- `src/Agent/AgentRunner.php`
|
||||||
|
- `src/Infrastructure/OllamaClient.php`
|
||||||
|
- `src/Config/RetriexEffectiveConfigProvider.php`
|
||||||
|
|
||||||
|
## Technische Umsetzung
|
||||||
|
|
||||||
|
### 1. OllamaClient unterstuetzt optionalen Modellnamen pro Call
|
||||||
|
|
||||||
|
Die bestehenden Methoden akzeptieren nun optional einen Modellnamen:
|
||||||
|
|
||||||
|
```php
|
||||||
|
public function stream(string $prompt, ?string $modelName = null): Generator
|
||||||
|
public function generate(string $prompt, ?string $modelName = null): string
|
||||||
|
```
|
||||||
|
|
||||||
|
Der Request-Payload nutzt dann:
|
||||||
|
|
||||||
|
- den uebergebenen Modellnamen, wenn er nicht leer ist
|
||||||
|
- sonst das bestehende Standardmodell aus `ModelGenerationConfig`
|
||||||
|
|
||||||
|
Alle sonstigen Optionen wie Temperatur, `top_k`, `top_p`, `repeat_penalty`, `num_ctx` und `num_predict` bleiben unveraendert.
|
||||||
|
|
||||||
|
### 2. AgentRunner reicht nur den konfigurierten Override weiter
|
||||||
|
|
||||||
|
Die drei bestehenden LLM-Calls wurden minimal erweitert:
|
||||||
|
|
||||||
|
- `input_normalization`
|
||||||
|
- `shop_query_optimization`
|
||||||
|
- `final_answer`
|
||||||
|
|
||||||
|
Die fachliche Verarbeitung vor und nach diesen Calls bleibt unveraendert.
|
||||||
|
|
||||||
|
### 3. Effective Config Dump zeigt die Werte an
|
||||||
|
|
||||||
|
`RetriexEffectiveConfigProvider` gibt die neue Konfiguration unter `llm.call_models` aus und validiert sie grob:
|
||||||
|
|
||||||
|
- `llm.call_models` muss eine Map sein
|
||||||
|
- Werte muessen `null` oder String sein
|
||||||
|
- unbekannte Call-Namen erzeugen nur eine Warnung, keinen Fehler
|
||||||
|
|
||||||
|
## Bewusst nicht geaendert
|
||||||
|
|
||||||
|
- Keine separaten Modellparameter pro Call.
|
||||||
|
- Keine getrennten Timeouts pro Call.
|
||||||
|
- Kein JSON-Schema-Output fuer Shopquery.
|
||||||
|
- Keine Aenderung daran, wann welcher LLM-Call ausgefuehrt wird.
|
||||||
|
- Keine Aenderung an bestehenden Follow-up-, Accessory-, Code- oder Product-Identity-Guards.
|
||||||
|
|
||||||
|
## Lokale Checks
|
||||||
|
|
||||||
|
Im Patch-Arbeitsverzeichnis ausgefuehrt:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
php -l src/Infrastructure/OllamaClient.php
|
||||||
|
php -l src/Agent/AgentRunner.php
|
||||||
|
php -l src/Config/RetriexEffectiveConfigProvider.php
|
||||||
|
python3 - <<'PY'
|
||||||
|
import yaml, pathlib
|
||||||
|
for path in pathlib.Path('config/retriex').glob('*.yaml'):
|
||||||
|
with path.open(encoding='utf-8') as f:
|
||||||
|
yaml.safe_load(f)
|
||||||
|
print('all retriex yaml OK')
|
||||||
|
PY
|
||||||
|
```
|
||||||
|
|
||||||
|
Nicht lokal ausfuehrbar, weil `vendor/` im ZIP nicht enthalten ist:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
bin/console mto:agent:config:validate
|
||||||
|
bin/console mto:agent:regression:test
|
||||||
|
bin/console mto:agent:config:audit-source --details
|
||||||
|
bin/console mto:agent:config:audit-patterns --details
|
||||||
|
```
|
||||||
|
|
||||||
|
## Test nach Einspielen
|
||||||
|
|
||||||
|
### Default-Verhalten
|
||||||
|
|
||||||
|
Mit der ausgelieferten Konfiguration:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
retriex.llm.call_models:
|
||||||
|
input_normalization: null
|
||||||
|
shop_query_optimization: null
|
||||||
|
final_answer: null
|
||||||
|
```
|
||||||
|
|
||||||
|
Erwartung:
|
||||||
|
|
||||||
|
- Alle LLM-Calls nutzen weiterhin das aktive Standardmodell.
|
||||||
|
- Bestehende Regressionen muessen unveraendert gruen bleiben.
|
||||||
|
|
||||||
|
### Schnelles Modell fuer Shopquery testen
|
||||||
|
|
||||||
|
Konfiguration:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
retriex.llm.call_models:
|
||||||
|
input_normalization: null
|
||||||
|
shop_query_optimization: 'llama3.1'
|
||||||
|
final_answer: null
|
||||||
|
```
|
||||||
|
|
||||||
|
Erwartung:
|
||||||
|
|
||||||
|
- Nur die Shopquery-Optimierung sendet `model: llama3.1` an Ollama.
|
||||||
|
- Die finale Antwort nutzt weiterhin das aktive Standardmodell.
|
||||||
|
- Shopquery-Guards und Shop-Ergebnislogik bleiben identisch.
|
||||||
|
|
||||||
|
### Schnelles Modell fuer Normalisierung und Shopquery testen
|
||||||
|
|
||||||
|
Konfiguration:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
retriex.llm.call_models:
|
||||||
|
input_normalization: 'llama3.1'
|
||||||
|
shop_query_optimization: 'llama3.1'
|
||||||
|
final_answer: null
|
||||||
|
```
|
||||||
|
|
||||||
|
Erwartung:
|
||||||
|
|
||||||
|
- Input-Normalisierung und Shopquery-Optimierung verwenden `llama3.1`.
|
||||||
|
- Die finale Antwort verwendet weiterhin das aktive Standardmodell.
|
||||||
|
|
||||||
|
### Finalantwort testweise umstellen
|
||||||
|
|
||||||
|
Konfiguration:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
retriex.llm.call_models:
|
||||||
|
input_normalization: null
|
||||||
|
shop_query_optimization: null
|
||||||
|
final_answer: 'llama3.1'
|
||||||
|
```
|
||||||
|
|
||||||
|
Erwartung:
|
||||||
|
|
||||||
|
- Nur der finale Antwortstream nutzt `llama3.1`.
|
||||||
|
- Die restliche Pipeline bleibt unveraendert.
|
||||||
@@ -39,6 +39,7 @@ final readonly class AgentRunner
|
|||||||
private LoggerInterface $agentLogger,
|
private LoggerInterface $agentLogger,
|
||||||
private AgentRunnerConfig $agentRunnerConfig,
|
private AgentRunnerConfig $agentRunnerConfig,
|
||||||
private LanguageCleanupConfig $languageCleanupConfig,
|
private LanguageCleanupConfig $languageCleanupConfig,
|
||||||
|
private array $llmCallModels,
|
||||||
private bool $debug,
|
private bool $debug,
|
||||||
private bool $logPrompt,
|
private bool $logPrompt,
|
||||||
private bool $logContext,
|
private bool $logContext,
|
||||||
@@ -46,6 +47,18 @@ final readonly class AgentRunner
|
|||||||
$this->systemMsgOn = true;
|
$this->systemMsgOn = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function llmCallModel(string $callName): ?string
|
||||||
|
{
|
||||||
|
$modelName = $this->llmCallModels[$callName] ?? null;
|
||||||
|
if (!is_string($modelName)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$modelName = trim($modelName);
|
||||||
|
|
||||||
|
return $modelName !== '' ? $modelName : null;
|
||||||
|
}
|
||||||
|
|
||||||
public function run(string $prompt, string $userId, bool $forceFullContext = false, string $requestContextHint = ''): Generator
|
public function run(string $prompt, string $userId, bool $forceFullContext = false, string $requestContextHint = ''): Generator
|
||||||
{
|
{
|
||||||
$originalPrompt = trim($prompt);
|
$originalPrompt = trim($prompt);
|
||||||
@@ -973,7 +986,7 @@ final readonly class AgentRunner
|
|||||||
$this->thinkSuppressor->reset();
|
$this->thinkSuppressor->reset();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
foreach ($this->ollamaClient->stream($normalizationPrompt) as $token) {
|
foreach ($this->ollamaClient->stream($normalizationPrompt, $this->llmCallModel('input_normalization')) as $token) {
|
||||||
if (!is_string($token)) {
|
if (!is_string($token)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -1539,7 +1552,7 @@ final readonly class AgentRunner
|
|||||||
$this->thinkSuppressor->reset();
|
$this->thinkSuppressor->reset();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
foreach ($this->ollamaClient->stream($shopPrompt) as $token) {
|
foreach ($this->ollamaClient->stream($shopPrompt, $this->llmCallModel('shop_query_optimization')) as $token) {
|
||||||
if (!is_string($token)) {
|
if (!is_string($token)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -4655,7 +4668,7 @@ final readonly class AgentRunner
|
|||||||
$thinkingNoticeShown = true;
|
$thinkingNoticeShown = true;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
foreach ($this->ollamaClient->stream($finalPrompt) as $token) {
|
foreach ($this->ollamaClient->stream($finalPrompt, $this->llmCallModel('final_answer')) as $token) {
|
||||||
if (!is_string($token)) {
|
if (!is_string($token)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
'llm' => [
|
'llm' => [
|
||||||
'timeout_seconds' => $this->param('retriex.llm.timeout_seconds'),
|
'timeout_seconds' => $this->param('retriex.llm.timeout_seconds'),
|
||||||
'num_predict' => $this->param('retriex.llm.num_predict'),
|
'num_predict' => $this->param('retriex.llm.num_predict'),
|
||||||
|
'call_models' => $this->param('retriex.llm.call_models'),
|
||||||
],
|
],
|
||||||
'retrieval' => $this->retrievalConfig(),
|
'retrieval' => $this->retrievalConfig(),
|
||||||
'prompt' => $this->promptConfig(),
|
'prompt' => $this->promptConfig(),
|
||||||
@@ -85,6 +86,7 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
$this->validateRuntime($config['runtime'], $errors, $warnings);
|
$this->validateRuntime($config['runtime'], $errors, $warnings);
|
||||||
$this->validateIndex($config['index'], $errors, $warnings);
|
$this->validateIndex($config['index'], $errors, $warnings);
|
||||||
$this->validateModel($config['model_generation'], $errors, $warnings);
|
$this->validateModel($config['model_generation'], $errors, $warnings);
|
||||||
|
$this->validateLlm($config['llm'], $errors, $warnings);
|
||||||
$this->validateRetrieval($config['retrieval'], $errors, $warnings);
|
$this->validateRetrieval($config['retrieval'], $errors, $warnings);
|
||||||
$this->validatePrompt($config['prompt'], $errors, $warnings);
|
$this->validatePrompt($config['prompt'], $errors, $warnings);
|
||||||
$this->validateAgent($config['agent'], $errors, $warnings);
|
$this->validateAgent($config['agent'], $errors, $warnings);
|
||||||
@@ -1714,6 +1716,46 @@ final readonly class RetriexEffectiveConfigProvider
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array<string, mixed> $llm
|
||||||
|
* @param list<string> $errors
|
||||||
|
* @param list<string> $warnings
|
||||||
|
*/
|
||||||
|
private function validateLlm(array $llm, array &$errors, array &$warnings): void
|
||||||
|
{
|
||||||
|
$callModels = $llm['call_models'] ?? [];
|
||||||
|
if (!is_array($callModels)) {
|
||||||
|
$errors[] = 'llm.call_models must be a map.';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$knownCalls = [
|
||||||
|
'input_normalization',
|
||||||
|
'shop_query_optimization',
|
||||||
|
'final_answer',
|
||||||
|
];
|
||||||
|
|
||||||
|
foreach ($callModels as $callName => $modelName) {
|
||||||
|
if (!is_string($callName) || trim($callName) === '') {
|
||||||
|
$errors[] = 'llm.call_models contains an invalid call name.';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!in_array($callName, $knownCalls, true)) {
|
||||||
|
$warnings[] = 'llm.call_models contains an unknown call name: ' . $callName . '.';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($modelName !== null && !is_string($modelName)) {
|
||||||
|
$errors[] = 'llm.call_models.' . $callName . ' must be null or a string model name.';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_string($modelName) && trim($modelName) === '') {
|
||||||
|
$warnings[] = 'llm.call_models.' . $callName . ' is empty and will use the default model.';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param array<string, mixed> $retrieval
|
* @param array<string, mixed> $retrieval
|
||||||
* @param list<string> $errors
|
* @param list<string> $errors
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ final class OllamaClient
|
|||||||
private const LOW_SPEED_LIMIT_BYTES = 1;
|
private const LOW_SPEED_LIMIT_BYTES = 1;
|
||||||
private const LOW_SPEED_TIME_SECONDS = 45;
|
private const LOW_SPEED_TIME_SECONDS = 45;
|
||||||
private ?ModelGenerationConfig $cachedConfig = null;
|
private ?ModelGenerationConfig $cachedConfig = null;
|
||||||
private $config = null;
|
private ?ModelGenerationConfig $config = null;
|
||||||
|
|
||||||
public function __construct(
|
public function __construct(
|
||||||
private string $apiUrl,
|
private string $apiUrl,
|
||||||
@@ -29,33 +29,35 @@ final class OllamaClient
|
|||||||
/**
|
/**
|
||||||
* Public Streaming API
|
* Public Streaming API
|
||||||
*/
|
*/
|
||||||
public function stream(string $prompt): Generator
|
public function stream(string $prompt, ?string $modelName = null): Generator
|
||||||
{
|
{
|
||||||
$this->config = $this->getConfig();
|
$this->config = $this->getConfig();
|
||||||
|
|
||||||
if ($this->config->isStream()) {
|
if ($this->config->isStream()) {
|
||||||
yield from $this->streamInternal($prompt);
|
yield from $this->streamInternal($prompt, $modelName);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback: Blocking generate → Generator-kompatibel ausgeben
|
// Fallback: Blocking generate with Generator-compatible output
|
||||||
yield $this->generateInternal($prompt);
|
yield $this->generateInternal($prompt, $modelName);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Public Blocking API
|
* Public Blocking API
|
||||||
*/
|
*/
|
||||||
public function generate(string $prompt): string
|
public function generate(string $prompt, ?string $modelName = null): string
|
||||||
{
|
{
|
||||||
return $this->generateInternal($prompt);
|
$this->config = $this->getConfig();
|
||||||
|
|
||||||
|
return $this->generateInternal($prompt, $modelName);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal streaming transport
|
* Internal streaming transport
|
||||||
*/
|
*/
|
||||||
private function streamInternal(string $prompt): Generator
|
private function streamInternal(string $prompt, ?string $modelName = null): Generator
|
||||||
{
|
{
|
||||||
$payload = $this->buildPayload($prompt, true);
|
$payload = $this->buildPayload($prompt, true, $modelName);
|
||||||
|
|
||||||
$buffer = '';
|
$buffer = '';
|
||||||
$done = false;
|
$done = false;
|
||||||
@@ -137,9 +139,9 @@ final class OllamaClient
|
|||||||
/**
|
/**
|
||||||
* Internal blocking transport
|
* Internal blocking transport
|
||||||
*/
|
*/
|
||||||
private function generateInternal(string $prompt): string
|
private function generateInternal(string $prompt, ?string $modelName = null): string
|
||||||
{
|
{
|
||||||
$payload = $this->buildPayload($prompt, false);
|
$payload = $this->buildPayload($prompt, false, $modelName);
|
||||||
|
|
||||||
$ch = curl_init($this->apiUrl);
|
$ch = curl_init($this->apiUrl);
|
||||||
if ($ch === false) {
|
if ($ch === false) {
|
||||||
@@ -173,10 +175,18 @@ final class OllamaClient
|
|||||||
/**
|
/**
|
||||||
* Central Payload Builder (DRY)
|
* Central Payload Builder (DRY)
|
||||||
*/
|
*/
|
||||||
private function buildPayload(string $prompt, bool $stream): string
|
private function buildPayload(string $prompt, bool $stream, ?string $modelName = null): string
|
||||||
{
|
{
|
||||||
|
$config = $this->getConfig();
|
||||||
|
$this->config = $config;
|
||||||
|
|
||||||
|
$effectiveModelName = trim((string) $modelName);
|
||||||
|
if ($effectiveModelName === '') {
|
||||||
|
$effectiveModelName = $config->getModelName();
|
||||||
|
}
|
||||||
|
|
||||||
return json_encode([
|
return json_encode([
|
||||||
'model' => $this->config->getModelName(),
|
'model' => $effectiveModelName,
|
||||||
'prompt' => $prompt,
|
'prompt' => $prompt,
|
||||||
'stream' => $stream,
|
'stream' => $stream,
|
||||||
'options' => $this->buildOptions()
|
'options' => $this->buildOptions()
|
||||||
|
|||||||
Reference in New Issue
Block a user