move intent an config value into config files
This commit is contained in:
@@ -17,6 +17,7 @@ final readonly class NdjsonKeywordRetriever
|
||||
public function __construct(
|
||||
private string $projectDir,
|
||||
private LoggerInterface $agentLogger,
|
||||
private StopWords $stopWords,
|
||||
) {
|
||||
}
|
||||
|
||||
@@ -170,7 +171,7 @@ final readonly class NdjsonKeywordRetriever
|
||||
return true;
|
||||
}
|
||||
|
||||
return StopWords::isStopWord($token);
|
||||
return $this->stopWords->isStopWord($token);
|
||||
}
|
||||
|
||||
private function normalizeText(string $value): string
|
||||
@@ -348,7 +349,7 @@ final readonly class NdjsonKeywordRetriever
|
||||
* token:string,
|
||||
* chunk_id:string,
|
||||
* document_id:string,
|
||||
* chunk_index:?int,
|
||||
* chunk_index $rows :?int,
|
||||
* tf:int,
|
||||
* title_tf:int,
|
||||
* df:int
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
<?php
|
||||
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Knowledge\Retrieval;
|
||||
@@ -21,10 +20,10 @@ final readonly class NdjsonLexicalIndexBuilder
|
||||
private const MAX_UNIQUE_TOKENS_PER_CHUNK = 256;
|
||||
|
||||
public function __construct(
|
||||
private string $projectDir,
|
||||
private string $projectDir,
|
||||
private LoggerInterface $agentLogger,
|
||||
)
|
||||
{
|
||||
private StopWords $stopWords,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -345,7 +344,7 @@ final readonly class NdjsonLexicalIndexBuilder
|
||||
return true;
|
||||
}
|
||||
|
||||
return StopWords::isStopWord($token);
|
||||
return $this->stopWords->isStopWord($token);
|
||||
}
|
||||
|
||||
private function normalizeText(string $value): string
|
||||
|
||||
@@ -6,8 +6,13 @@ namespace App\Knowledge\Retrieval;
|
||||
|
||||
use App\Knowledge\StopWords;
|
||||
|
||||
final class QueryCleaner
|
||||
final readonly class QueryCleaner
|
||||
{
|
||||
public function __construct(
|
||||
private StopWords $stopWords
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans a query strictly for retrieval purposes.
|
||||
*
|
||||
@@ -66,7 +71,7 @@ final class QueryCleaner
|
||||
}
|
||||
|
||||
// Remove stop words
|
||||
if (StopWords::isStopWord($token)) {
|
||||
if ($this->stopWords->isStopWord($token)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -4,62 +4,25 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Knowledge;
|
||||
|
||||
final class StopWords
|
||||
use App\Config\StopWordsConfig;
|
||||
|
||||
final readonly class StopWords
|
||||
{
|
||||
/**
|
||||
* Retrieval-optimierte Stopwortliste (Deutsch).
|
||||
*
|
||||
* WICHTIG:
|
||||
* - Keine Negationen entfernen
|
||||
* - Keine Fragewörter entfernen
|
||||
* - Keine fachlichen Begriffe entfernen
|
||||
* - Nur echte Füll- und Strukturwörter
|
||||
*/
|
||||
private const STOP_WORDS = [
|
||||
|
||||
'mit',
|
||||
// Artikel
|
||||
'der', 'die', 'das',
|
||||
'ein', 'eine', 'einer', 'eines',
|
||||
'den', 'dem', 'des',
|
||||
|
||||
// Konjunktionen
|
||||
'und', 'oder', 'aber', 'sowie',
|
||||
|
||||
// Schwache Pronomen
|
||||
'ich', 'du', 'er', 'sie', 'es',
|
||||
'wir', 'ihr',
|
||||
|
||||
// Füllwörter
|
||||
'halt', 'eben', 'auch', 'schon',
|
||||
'noch', 'mal', 'bitte', 'danke',
|
||||
|
||||
// Strukturwörter
|
||||
'also', 'nun', 'tja',
|
||||
'dann', 'danach', 'davor',
|
||||
'hier', 'dort',
|
||||
|
||||
// Zeit-Füller (kontextarm)
|
||||
'heute', 'gestern', 'morgen',
|
||||
|
||||
// Höflichkeits-/Modalformen
|
||||
'könnte', 'kannst', 'kann',
|
||||
'würde', 'würdest', 'würden',
|
||||
];
|
||||
|
||||
/**
|
||||
* Gibt die vollständige Stopwortliste zurück.
|
||||
*/
|
||||
public static function getStopWords(): array
|
||||
{
|
||||
return self::STOP_WORDS;
|
||||
public function __construct(
|
||||
private StopWordsConfig $config
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Prüft, ob ein Wort ein Stopwort ist.
|
||||
* @return string[]
|
||||
*/
|
||||
public static function isStopWord(string $word): bool
|
||||
public function getStopWords(): array
|
||||
{
|
||||
return in_array($word, self::STOP_WORDS, true);
|
||||
return $this->config->getStopWords();
|
||||
}
|
||||
|
||||
public function isStopWord(string $word): bool
|
||||
{
|
||||
return in_array($word, $this->config->getStopWords(), true);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user