add pdf reader modul

This commit is contained in:
team2
2026-02-12 20:57:54 +01:00
parent 14d7f3b2b9
commit a625468a9a
9 changed files with 229 additions and 6 deletions

View File

@@ -0,0 +1,25 @@
<?php
declare(strict_types=1);
namespace App\Knowledge\Text;
final class TextNormalizer
{
public function normalize(string $text): string
{
// Silbentrennungen entfernen
$text = preg_replace('/-\n/', '', $text);
// Windows-Zeilenumbrüche vereinheitlichen
$text = str_replace("\r\n", "\n", $text);
// Mehrfache Leerzeichen reduzieren
$text = preg_replace('/[ \t]+/', ' ', $text);
// Mehrfache Leerzeilen reduzieren
$text = preg_replace('/\n{3,}/', "\n\n", $text);
return trim($text);
}
}