[ 'timeout' => $this->timeoutSeconds, 'user_agent' => 'mithoAgent/1.0', 'ignore_errors' => true, ], ]); $html = @file_get_contents($url, false, $context); if ($html === false || $html === '') { return ''; } $config = new Configuration(); $config->setFixRelativeURLs(true); $config->setOriginalURL($url); $readability = new Readability($config); try { $readability->parse($html); $content = $readability->getContent() ?? ''; } catch (ParseException) { return ''; } if ($content === '') { return ''; } $text = strip_tags($content); $text = preg_replace('/\s+/u', ' ', $text) ?? $text; return mb_substr(trim($text), 0, $this->maxChars); } /** * Determines whether a URL belongs to a trusted internal domain. * * @param string $url * @return bool */ public function isInternalDomainUrl(string $url): bool { $parts = parse_url($url); if ($parts === false || empty($parts['host'])) { return false; } $host = mb_strtolower($parts['host']); foreach ($this->internalDomains as $domain) { if ($host === $domain || str_ends_with($host, '.' . $domain)) { return true; } } return false; } }