second step
This commit is contained in:
@@ -4,39 +4,18 @@ declare(strict_types=1);
|
||||
|
||||
namespace App\Config;
|
||||
|
||||
/**
|
||||
* YAML-backed stop-word configuration.
|
||||
*
|
||||
* This class intentionally has no PHP fallback list. The complete list lives in
|
||||
* config/retriex/language.yaml.
|
||||
*/
|
||||
final class StopWordsConfig
|
||||
{
|
||||
/**
|
||||
* Retrieval-optimized stop-word list.
|
||||
*
|
||||
* Important:
|
||||
* - keep negations
|
||||
* - keep question words
|
||||
* - keep domain terms
|
||||
* - remove only structural filler words
|
||||
*/
|
||||
private const DEFAULT_STOP_WORDS = [
|
||||
'mit',
|
||||
'der', 'die', 'das',
|
||||
'ein', 'eine', 'einer', 'eines',
|
||||
'den', 'dem', 'des',
|
||||
'und', 'oder', 'aber', 'sowie',
|
||||
'ich', 'du', 'er', 'sie', 'es',
|
||||
'wir', 'ihr',
|
||||
'halt', 'eben', 'auch', 'schon',
|
||||
'noch', 'mal', 'bitte', 'danke',
|
||||
'also', 'nun', 'tja',
|
||||
'dann', 'danach', 'davor',
|
||||
'hier', 'dort',
|
||||
'heute', 'gestern', 'morgen',
|
||||
'könnte', 'kannst', 'kann',
|
||||
'würde', 'würdest', 'würden',
|
||||
];
|
||||
|
||||
/**
|
||||
* @param array<string, mixed> $config
|
||||
*/
|
||||
public function __construct(private array $config = [])
|
||||
public function __construct(private readonly array $config)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -45,19 +24,22 @@ final class StopWordsConfig
|
||||
*/
|
||||
public function getStopWords(): array
|
||||
{
|
||||
return $this->stringList('words', self::DEFAULT_STOP_WORDS);
|
||||
return $this->requiredStringList('words');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[] $default
|
||||
* @return string[]
|
||||
*/
|
||||
private function stringList(string $key, array $default): array
|
||||
private function requiredStringList(string $key): array
|
||||
{
|
||||
$value = $this->config[$key] ?? $default;
|
||||
if (!array_key_exists($key, $this->config)) {
|
||||
throw new \InvalidArgumentException(sprintf('Missing required RetrieX stopwords config key "%s".', $key));
|
||||
}
|
||||
|
||||
$value = $this->config[$key];
|
||||
|
||||
if (!is_array($value)) {
|
||||
return $default;
|
||||
throw new \InvalidArgumentException(sprintf('RetrieX stopwords config key "%s" must be a list.', $key));
|
||||
}
|
||||
|
||||
$out = [];
|
||||
@@ -76,6 +58,10 @@ final class StopWordsConfig
|
||||
}
|
||||
}
|
||||
|
||||
return $out !== [] ? $out : $default;
|
||||
if ($out === []) {
|
||||
throw new \InvalidArgumentException(sprintf('RetrieX stopwords config key "%s" must not be empty.', $key));
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user