add multi model
This commit is contained in:
@@ -18,3 +18,10 @@ parameters:
|
||||
# Hard upper bound for generated output tokens. Prevents runaway repeated answers
|
||||
# before they can become very long streamed or blocking responses.
|
||||
retriex.llm.num_predict: 2048
|
||||
|
||||
# Optional model override per existing LLM call.
|
||||
# null or an empty value keeps the active default model from ModelGenerationConfig.
|
||||
retriex.llm.call_models:
|
||||
input_normalization: null
|
||||
shop_query_optimization: null
|
||||
final_answer: null
|
||||
|
||||
@@ -200,6 +200,7 @@ services:
|
||||
|
||||
App\Agent\AgentRunner:
|
||||
arguments:
|
||||
$llmCallModels: '%retriex.llm.call_models%'
|
||||
$debug: '%env(bool:AI_DEBUG)%'
|
||||
$logPrompt: '%env(bool:AI_LOG_PROMPT)%'
|
||||
$logContext: '%env(bool:AI_LOG_CONTEXT)%'
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
# RetrieX Patch 97 - Per-LLM-Call Model Override
|
||||
|
||||
## Ziel
|
||||
|
||||
Dieser Patch ergaenzt eine minimale YAML-Konfiguration, mit der fuer jeden bestehenden LLM-Call optional ein anderes Ollama-Modell angegeben werden kann.
|
||||
|
||||
Der Patch aendert bewusst keine fachliche Logik:
|
||||
|
||||
- keine Aenderung an Retrieval, Scoring oder Ranking
|
||||
- keine Aenderung an Shopquery-Guards oder Shop-Matching
|
||||
- keine Aenderung an PromptBuilder-Regeln
|
||||
- keine neue Admin-Pflege fuer Modellprofile
|
||||
- keine neue Routing- oder Multi-Agent-Logik
|
||||
|
||||
Wenn fuer einen Call kein Modell gesetzt ist, nutzt der Call wie bisher das aktive Standardmodell aus `ModelGenerationConfig`.
|
||||
|
||||
## Neue YAML-Konfiguration
|
||||
|
||||
Datei: `config/retriex/model.yaml`
|
||||
|
||||
```yaml
|
||||
retriex.llm.call_models:
|
||||
input_normalization: null
|
||||
shop_query_optimization: null
|
||||
final_answer: null
|
||||
```
|
||||
|
||||
Beispiel, um Normalisierung und Shopquery-Optimierung ueber ein schnelleres Modell laufen zu lassen:
|
||||
|
||||
```yaml
|
||||
retriex.llm.call_models:
|
||||
input_normalization: 'llama3.1'
|
||||
shop_query_optimization: 'llama3.1'
|
||||
final_answer: null
|
||||
```
|
||||
|
||||
`null` oder ein leerer Wert bedeutet: Standardmodell verwenden.
|
||||
|
||||
## Geaenderte Dateien
|
||||
|
||||
- `config/retriex/model.yaml`
|
||||
- `config/services.yaml`
|
||||
- `src/Agent/AgentRunner.php`
|
||||
- `src/Infrastructure/OllamaClient.php`
|
||||
- `src/Config/RetriexEffectiveConfigProvider.php`
|
||||
|
||||
## Technische Umsetzung
|
||||
|
||||
### 1. OllamaClient unterstuetzt optionalen Modellnamen pro Call
|
||||
|
||||
Die bestehenden Methoden akzeptieren nun optional einen Modellnamen:
|
||||
|
||||
```php
|
||||
public function stream(string $prompt, ?string $modelName = null): Generator
|
||||
public function generate(string $prompt, ?string $modelName = null): string
|
||||
```
|
||||
|
||||
Der Request-Payload nutzt dann:
|
||||
|
||||
- den uebergebenen Modellnamen, wenn er nicht leer ist
|
||||
- sonst das bestehende Standardmodell aus `ModelGenerationConfig`
|
||||
|
||||
Alle sonstigen Optionen wie Temperatur, `top_k`, `top_p`, `repeat_penalty`, `num_ctx` und `num_predict` bleiben unveraendert.
|
||||
|
||||
### 2. AgentRunner reicht nur den konfigurierten Override weiter
|
||||
|
||||
Die drei bestehenden LLM-Calls wurden minimal erweitert:
|
||||
|
||||
- `input_normalization`
|
||||
- `shop_query_optimization`
|
||||
- `final_answer`
|
||||
|
||||
Die fachliche Verarbeitung vor und nach diesen Calls bleibt unveraendert.
|
||||
|
||||
### 3. Effective Config Dump zeigt die Werte an
|
||||
|
||||
`RetriexEffectiveConfigProvider` gibt die neue Konfiguration unter `llm.call_models` aus und validiert sie grob:
|
||||
|
||||
- `llm.call_models` muss eine Map sein
|
||||
- Werte muessen `null` oder String sein
|
||||
- unbekannte Call-Namen erzeugen nur eine Warnung, keinen Fehler
|
||||
|
||||
## Bewusst nicht geaendert
|
||||
|
||||
- Keine separaten Modellparameter pro Call.
|
||||
- Keine getrennten Timeouts pro Call.
|
||||
- Kein JSON-Schema-Output fuer Shopquery.
|
||||
- Keine Aenderung daran, wann welcher LLM-Call ausgefuehrt wird.
|
||||
- Keine Aenderung an bestehenden Follow-up-, Accessory-, Code- oder Product-Identity-Guards.
|
||||
|
||||
## Lokale Checks
|
||||
|
||||
Im Patch-Arbeitsverzeichnis ausgefuehrt:
|
||||
|
||||
```bash
|
||||
php -l src/Infrastructure/OllamaClient.php
|
||||
php -l src/Agent/AgentRunner.php
|
||||
php -l src/Config/RetriexEffectiveConfigProvider.php
|
||||
python3 - <<'PY'
|
||||
import yaml, pathlib
|
||||
for path in pathlib.Path('config/retriex').glob('*.yaml'):
|
||||
with path.open(encoding='utf-8') as f:
|
||||
yaml.safe_load(f)
|
||||
print('all retriex yaml OK')
|
||||
PY
|
||||
```
|
||||
|
||||
Nicht lokal ausfuehrbar, weil `vendor/` im ZIP nicht enthalten ist:
|
||||
|
||||
```bash
|
||||
bin/console mto:agent:config:validate
|
||||
bin/console mto:agent:regression:test
|
||||
bin/console mto:agent:config:audit-source --details
|
||||
bin/console mto:agent:config:audit-patterns --details
|
||||
```
|
||||
|
||||
## Test nach Einspielen
|
||||
|
||||
### Default-Verhalten
|
||||
|
||||
Mit der ausgelieferten Konfiguration:
|
||||
|
||||
```yaml
|
||||
retriex.llm.call_models:
|
||||
input_normalization: null
|
||||
shop_query_optimization: null
|
||||
final_answer: null
|
||||
```
|
||||
|
||||
Erwartung:
|
||||
|
||||
- Alle LLM-Calls nutzen weiterhin das aktive Standardmodell.
|
||||
- Bestehende Regressionen muessen unveraendert gruen bleiben.
|
||||
|
||||
### Schnelles Modell fuer Shopquery testen
|
||||
|
||||
Konfiguration:
|
||||
|
||||
```yaml
|
||||
retriex.llm.call_models:
|
||||
input_normalization: null
|
||||
shop_query_optimization: 'llama3.1'
|
||||
final_answer: null
|
||||
```
|
||||
|
||||
Erwartung:
|
||||
|
||||
- Nur die Shopquery-Optimierung sendet `model: llama3.1` an Ollama.
|
||||
- Die finale Antwort nutzt weiterhin das aktive Standardmodell.
|
||||
- Shopquery-Guards und Shop-Ergebnislogik bleiben identisch.
|
||||
|
||||
### Schnelles Modell fuer Normalisierung und Shopquery testen
|
||||
|
||||
Konfiguration:
|
||||
|
||||
```yaml
|
||||
retriex.llm.call_models:
|
||||
input_normalization: 'llama3.1'
|
||||
shop_query_optimization: 'llama3.1'
|
||||
final_answer: null
|
||||
```
|
||||
|
||||
Erwartung:
|
||||
|
||||
- Input-Normalisierung und Shopquery-Optimierung verwenden `llama3.1`.
|
||||
- Die finale Antwort verwendet weiterhin das aktive Standardmodell.
|
||||
|
||||
### Finalantwort testweise umstellen
|
||||
|
||||
Konfiguration:
|
||||
|
||||
```yaml
|
||||
retriex.llm.call_models:
|
||||
input_normalization: null
|
||||
shop_query_optimization: null
|
||||
final_answer: 'llama3.1'
|
||||
```
|
||||
|
||||
Erwartung:
|
||||
|
||||
- Nur der finale Antwortstream nutzt `llama3.1`.
|
||||
- Die restliche Pipeline bleibt unveraendert.
|
||||
@@ -39,6 +39,7 @@ final readonly class AgentRunner
|
||||
private LoggerInterface $agentLogger,
|
||||
private AgentRunnerConfig $agentRunnerConfig,
|
||||
private LanguageCleanupConfig $languageCleanupConfig,
|
||||
private array $llmCallModels,
|
||||
private bool $debug,
|
||||
private bool $logPrompt,
|
||||
private bool $logContext,
|
||||
@@ -46,6 +47,18 @@ final readonly class AgentRunner
|
||||
$this->systemMsgOn = true;
|
||||
}
|
||||
|
||||
private function llmCallModel(string $callName): ?string
|
||||
{
|
||||
$modelName = $this->llmCallModels[$callName] ?? null;
|
||||
if (!is_string($modelName)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$modelName = trim($modelName);
|
||||
|
||||
return $modelName !== '' ? $modelName : null;
|
||||
}
|
||||
|
||||
public function run(string $prompt, string $userId, bool $forceFullContext = false, string $requestContextHint = ''): Generator
|
||||
{
|
||||
$originalPrompt = trim($prompt);
|
||||
@@ -973,7 +986,7 @@ final readonly class AgentRunner
|
||||
$this->thinkSuppressor->reset();
|
||||
|
||||
try {
|
||||
foreach ($this->ollamaClient->stream($normalizationPrompt) as $token) {
|
||||
foreach ($this->ollamaClient->stream($normalizationPrompt, $this->llmCallModel('input_normalization')) as $token) {
|
||||
if (!is_string($token)) {
|
||||
continue;
|
||||
}
|
||||
@@ -1539,7 +1552,7 @@ final readonly class AgentRunner
|
||||
$this->thinkSuppressor->reset();
|
||||
|
||||
try {
|
||||
foreach ($this->ollamaClient->stream($shopPrompt) as $token) {
|
||||
foreach ($this->ollamaClient->stream($shopPrompt, $this->llmCallModel('shop_query_optimization')) as $token) {
|
||||
if (!is_string($token)) {
|
||||
continue;
|
||||
}
|
||||
@@ -4655,7 +4668,7 @@ final readonly class AgentRunner
|
||||
$thinkingNoticeShown = true;
|
||||
|
||||
try {
|
||||
foreach ($this->ollamaClient->stream($finalPrompt) as $token) {
|
||||
foreach ($this->ollamaClient->stream($finalPrompt, $this->llmCallModel('final_answer')) as $token) {
|
||||
if (!is_string($token)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -49,6 +49,7 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
'llm' => [
|
||||
'timeout_seconds' => $this->param('retriex.llm.timeout_seconds'),
|
||||
'num_predict' => $this->param('retriex.llm.num_predict'),
|
||||
'call_models' => $this->param('retriex.llm.call_models'),
|
||||
],
|
||||
'retrieval' => $this->retrievalConfig(),
|
||||
'prompt' => $this->promptConfig(),
|
||||
@@ -85,6 +86,7 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
$this->validateRuntime($config['runtime'], $errors, $warnings);
|
||||
$this->validateIndex($config['index'], $errors, $warnings);
|
||||
$this->validateModel($config['model_generation'], $errors, $warnings);
|
||||
$this->validateLlm($config['llm'], $errors, $warnings);
|
||||
$this->validateRetrieval($config['retrieval'], $errors, $warnings);
|
||||
$this->validatePrompt($config['prompt'], $errors, $warnings);
|
||||
$this->validateAgent($config['agent'], $errors, $warnings);
|
||||
@@ -1714,6 +1716,46 @@ final readonly class RetriexEffectiveConfigProvider
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $llm
|
||||
* @param list<string> $errors
|
||||
* @param list<string> $warnings
|
||||
*/
|
||||
private function validateLlm(array $llm, array &$errors, array &$warnings): void
|
||||
{
|
||||
$callModels = $llm['call_models'] ?? [];
|
||||
if (!is_array($callModels)) {
|
||||
$errors[] = 'llm.call_models must be a map.';
|
||||
return;
|
||||
}
|
||||
|
||||
$knownCalls = [
|
||||
'input_normalization',
|
||||
'shop_query_optimization',
|
||||
'final_answer',
|
||||
];
|
||||
|
||||
foreach ($callModels as $callName => $modelName) {
|
||||
if (!is_string($callName) || trim($callName) === '') {
|
||||
$errors[] = 'llm.call_models contains an invalid call name.';
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!in_array($callName, $knownCalls, true)) {
|
||||
$warnings[] = 'llm.call_models contains an unknown call name: ' . $callName . '.';
|
||||
}
|
||||
|
||||
if ($modelName !== null && !is_string($modelName)) {
|
||||
$errors[] = 'llm.call_models.' . $callName . ' must be null or a string model name.';
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_string($modelName) && trim($modelName) === '') {
|
||||
$warnings[] = 'llm.call_models.' . $callName . ' is empty and will use the default model.';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $retrieval
|
||||
* @param list<string> $errors
|
||||
|
||||
@@ -17,7 +17,7 @@ final class OllamaClient
|
||||
private const LOW_SPEED_LIMIT_BYTES = 1;
|
||||
private const LOW_SPEED_TIME_SECONDS = 45;
|
||||
private ?ModelGenerationConfig $cachedConfig = null;
|
||||
private $config = null;
|
||||
private ?ModelGenerationConfig $config = null;
|
||||
|
||||
public function __construct(
|
||||
private string $apiUrl,
|
||||
@@ -29,33 +29,35 @@ final class OllamaClient
|
||||
/**
|
||||
* Public Streaming API
|
||||
*/
|
||||
public function stream(string $prompt): Generator
|
||||
public function stream(string $prompt, ?string $modelName = null): Generator
|
||||
{
|
||||
$this->config = $this->getConfig();
|
||||
|
||||
if ($this->config->isStream()) {
|
||||
yield from $this->streamInternal($prompt);
|
||||
yield from $this->streamInternal($prompt, $modelName);
|
||||
return;
|
||||
}
|
||||
|
||||
// Fallback: Blocking generate → Generator-kompatibel ausgeben
|
||||
yield $this->generateInternal($prompt);
|
||||
// Fallback: Blocking generate with Generator-compatible output
|
||||
yield $this->generateInternal($prompt, $modelName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Public Blocking API
|
||||
*/
|
||||
public function generate(string $prompt): string
|
||||
public function generate(string $prompt, ?string $modelName = null): string
|
||||
{
|
||||
return $this->generateInternal($prompt);
|
||||
$this->config = $this->getConfig();
|
||||
|
||||
return $this->generateInternal($prompt, $modelName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal streaming transport
|
||||
*/
|
||||
private function streamInternal(string $prompt): Generator
|
||||
private function streamInternal(string $prompt, ?string $modelName = null): Generator
|
||||
{
|
||||
$payload = $this->buildPayload($prompt, true);
|
||||
$payload = $this->buildPayload($prompt, true, $modelName);
|
||||
|
||||
$buffer = '';
|
||||
$done = false;
|
||||
@@ -137,9 +139,9 @@ final class OllamaClient
|
||||
/**
|
||||
* Internal blocking transport
|
||||
*/
|
||||
private function generateInternal(string $prompt): string
|
||||
private function generateInternal(string $prompt, ?string $modelName = null): string
|
||||
{
|
||||
$payload = $this->buildPayload($prompt, false);
|
||||
$payload = $this->buildPayload($prompt, false, $modelName);
|
||||
|
||||
$ch = curl_init($this->apiUrl);
|
||||
if ($ch === false) {
|
||||
@@ -173,10 +175,18 @@ final class OllamaClient
|
||||
/**
|
||||
* Central Payload Builder (DRY)
|
||||
*/
|
||||
private function buildPayload(string $prompt, bool $stream): string
|
||||
private function buildPayload(string $prompt, bool $stream, ?string $modelName = null): string
|
||||
{
|
||||
$config = $this->getConfig();
|
||||
$this->config = $config;
|
||||
|
||||
$effectiveModelName = trim((string) $modelName);
|
||||
if ($effectiveModelName === '') {
|
||||
$effectiveModelName = $config->getModelName();
|
||||
}
|
||||
|
||||
return json_encode([
|
||||
'model' => $this->config->getModelName(),
|
||||
'model' => $effectiveModelName,
|
||||
'prompt' => $prompt,
|
||||
'stream' => $stream,
|
||||
'options' => $this->buildOptions()
|
||||
|
||||
Reference in New Issue
Block a user