move intent an config value into config files

This commit is contained in:
team2
2026-04-23 21:49:54 +02:00
parent 87417febf4
commit fce44e971d
17 changed files with 1937 additions and 1133 deletions

View File

@@ -17,6 +17,7 @@ final readonly class NdjsonKeywordRetriever
public function __construct(
private string $projectDir,
private LoggerInterface $agentLogger,
private StopWords $stopWords,
) {
}
@@ -170,7 +171,7 @@ final readonly class NdjsonKeywordRetriever
return true;
}
return StopWords::isStopWord($token);
return $this->stopWords->isStopWord($token);
}
private function normalizeText(string $value): string
@@ -348,7 +349,7 @@ final readonly class NdjsonKeywordRetriever
* token:string,
* chunk_id:string,
* document_id:string,
* chunk_index:?int,
* chunk_index $rows :?int,
* tf:int,
* title_tf:int,
* df:int

View File

@@ -1,6 +1,5 @@
<?php
declare(strict_types=1);
namespace App\Knowledge\Retrieval;
@@ -21,10 +20,10 @@ final readonly class NdjsonLexicalIndexBuilder
private const MAX_UNIQUE_TOKENS_PER_CHUNK = 256;
public function __construct(
private string $projectDir,
private string $projectDir,
private LoggerInterface $agentLogger,
)
{
private StopWords $stopWords,
) {
}
/**
@@ -345,7 +344,7 @@ final readonly class NdjsonLexicalIndexBuilder
return true;
}
return StopWords::isStopWord($token);
return $this->stopWords->isStopWord($token);
}
private function normalizeText(string $value): string

View File

@@ -6,8 +6,13 @@ namespace App\Knowledge\Retrieval;
use App\Knowledge\StopWords;
final class QueryCleaner
final readonly class QueryCleaner
{
public function __construct(
private StopWords $stopWords
) {
}
/**
* Cleans a query strictly for retrieval purposes.
*
@@ -66,7 +71,7 @@ final class QueryCleaner
}
// Remove stop words
if (StopWords::isStopWord($token)) {
if ($this->stopWords->isStopWord($token)) {
continue;
}

View File

@@ -4,62 +4,25 @@ declare(strict_types=1);
namespace App\Knowledge;
final class StopWords
use App\Config\StopWordsConfig;
final readonly class StopWords
{
/**
* Retrieval-optimierte Stopwortliste (Deutsch).
*
* WICHTIG:
* - Keine Negationen entfernen
* - Keine Fragewörter entfernen
* - Keine fachlichen Begriffe entfernen
* - Nur echte Füll- und Strukturwörter
*/
private const STOP_WORDS = [
'mit',
// Artikel
'der', 'die', 'das',
'ein', 'eine', 'einer', 'eines',
'den', 'dem', 'des',
// Konjunktionen
'und', 'oder', 'aber', 'sowie',
// Schwache Pronomen
'ich', 'du', 'er', 'sie', 'es',
'wir', 'ihr',
// Füllwörter
'halt', 'eben', 'auch', 'schon',
'noch', 'mal', 'bitte', 'danke',
// Strukturwörter
'also', 'nun', 'tja',
'dann', 'danach', 'davor',
'hier', 'dort',
// Zeit-Füller (kontextarm)
'heute', 'gestern', 'morgen',
// Höflichkeits-/Modalformen
'könnte', 'kannst', 'kann',
'würde', 'würdest', 'würden',
];
/**
* Gibt die vollständige Stopwortliste zurück.
*/
public static function getStopWords(): array
{
return self::STOP_WORDS;
public function __construct(
private StopWordsConfig $config
) {
}
/**
* Prüft, ob ein Wort ein Stopwort ist.
* @return string[]
*/
public static function isStopWord(string $word): bool
public function getStopWords(): array
{
return in_array($word, self::STOP_WORDS, true);
return $this->config->getStopWords();
}
public function isStopWord(string $word): bool
{
return in_array($word, $this->config->getStopWords(), true);
}
}