add pdf reader modul
This commit is contained in:
25
src/Knowledge/Text/TextNormalizer.php
Normal file
25
src/Knowledge/Text/TextNormalizer.php
Normal file
@@ -0,0 +1,25 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Knowledge\Text;
|
||||
|
||||
final class TextNormalizer
|
||||
{
|
||||
public function normalize(string $text): string
|
||||
{
|
||||
// Silbentrennungen entfernen
|
||||
$text = preg_replace('/-\n/', '', $text);
|
||||
|
||||
// Windows-Zeilenumbrüche vereinheitlichen
|
||||
$text = str_replace("\r\n", "\n", $text);
|
||||
|
||||
// Mehrfache Leerzeichen reduzieren
|
||||
$text = preg_replace('/[ \t]+/', ' ', $text);
|
||||
|
||||
// Mehrfache Leerzeilen reduzieren
|
||||
$text = preg_replace('/\n{3,}/', "\n\n", $text);
|
||||
|
||||
return trim($text);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user