optimize texts retrieval

This commit is contained in:
team2
2026-03-01 20:17:23 +01:00
parent e1c3993c96
commit 4a619cd6a8
3 changed files with 36 additions and 8 deletions

View File

@@ -4,6 +4,7 @@ declare(strict_types=1);
namespace App\Intent; namespace App\Intent;
use App\Knowledge\Retrieval\QueryCleaner;
use App\Tag\TagVectorSearchClient; use App\Tag\TagVectorSearchClient;
use App\Tag\TagTypes; use App\Tag\TagTypes;
@@ -29,7 +30,7 @@ final class CatalogIntentLite
* Minimaler Similarity-Score. * Minimaler Similarity-Score.
* Verhindert Rauschen. * Verhindert Rauschen.
*/ */
private const MIN_SCORE = 0.75; private const MIN_SCORE = 0.72;
/** /**
* Differenz zwischen Top1 und Top2, * Differenz zwischen Top1 und Top2,
@@ -39,6 +40,7 @@ final class CatalogIntentLite
public function __construct( public function __construct(
private readonly TagVectorSearchClient $tagVectorClient, private readonly TagVectorSearchClient $tagVectorClient,
private readonly QueryCleaner $queryCleaner,
) {} ) {}
/** /**
@@ -52,8 +54,10 @@ final class CatalogIntentLite
return null; return null;
} }
// 1) Vector-Suche $promptTag = $this->queryCleaner->clean($prompt);
$hits = $this->tagVectorClient->search($prompt, 3);
// 1) Tag-Vector-Suche
$hits = $this->tagVectorClient->search($promptTag, 3);
if ($hits === []) { if ($hits === []) {
return null; return null;
@@ -62,7 +66,7 @@ final class CatalogIntentLite
$best = $hits[0]; $best = $hits[0];
$bestScore = (float)($best['score'] ?? 0.0); $bestScore = (float)($best['score'] ?? 0.0);
// 2) Score-Gate // 2) Score-Tags
if ($bestScore < self::MIN_SCORE) { if ($bestScore < self::MIN_SCORE) {
return null; return null;
} }

View File

@@ -74,7 +74,7 @@ final class ModelGenerationConfigAdminService
return []; return [];
} }
return $this->retriever->retrieveInternal($prompt, $config); return $this->retriever->retrieveDebug($prompt);
} }
private function requireString(mixed $value, string $field): string private function requireString(mixed $value, string $field): string

View File

@@ -77,13 +77,37 @@
</h5> </h5>
<div style="max-height: 500px; overflow-y: auto;"> <div style="max-height: 500px; overflow-y: auto;">
{% for chunk in results %} {% for chunk in results %}
<div class="border border-secondary p-3 mb-3 small"> <div class="border border-secondary p-3 mb-3 small">
{{ chunk|nl2br }}
{# ================= META-ZEILE ================= #}
<div class="mb-2 text-warning" style="font-size: 11px; line-height: 1.4;">
<span class="text-info"><strong>rank:</strong> {{ chunk.rank }}</span> |
<span class="text-info"><strong>chunk_id:</strong> {{ chunk.chunk_id }}</span> |
<span class="text-info"><strong>document_id:</strong> <a
class="text-info"
href="{{ path('admin_document_show', { id: chunk.document_id }) }}">
{{ chunk.document_id }}
</a></span> |
<span><strong>raw_score:</strong> {{ chunk.raw_score|number_format(6, '.', '') }}</span>
|
<span><strong>rrf_score:</strong> {{ chunk.rrf_score|number_format(6, '.', '') }}</span>
|
<span><strong>threshold:</strong> {{ chunk.threshold }}</span> |
<span><strong>intent:</strong> {{ chunk.intent }}</span> |
<span>
<strong>is_list_query:</strong>
{{ chunk.is_list_query ? 'true' : 'false' }}
</span>
</div>
{# ================= CHUNK TEXT ================= #}
<div>
{{ chunk.text }}
</div>
</div> </div>
{% endfor %} {% endfor %}
</div> </div>
</div> </div>