first commit
This commit is contained in:
126
src/Context/ContextService.php
Normal file
126
src/Context/ContextService.php
Normal file
@@ -0,0 +1,126 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Context;
|
||||
|
||||
/**
|
||||
* ContextService
|
||||
*
|
||||
* Manages conversational history persistence and retrieval.
|
||||
*
|
||||
* Responsibilities:
|
||||
* - Persist completed conversation turns (append-only)
|
||||
* - Provide recent or extended conversation context
|
||||
* - Resolve history storage paths safely
|
||||
*
|
||||
* Non-responsibilities:
|
||||
* - No follow-up detection
|
||||
* - No prompt semantics
|
||||
* - No interpretation of user intent
|
||||
*
|
||||
* Context levels:
|
||||
* - Regular context: last N lines (default)
|
||||
* - Full context: extended history for special cases
|
||||
*/
|
||||
final class ContextService
|
||||
{
|
||||
private string $historyDir;
|
||||
|
||||
/**
|
||||
* Number of lines included in regular context.
|
||||
* Intended for normal conversational continuity.
|
||||
*/
|
||||
private int $maxRegularLines = 20;
|
||||
|
||||
/**
|
||||
* Number of lines included in full context.
|
||||
* Intended for exceptional or diagnostic scenarios.
|
||||
*/
|
||||
private int $maxFullLines = 500;
|
||||
|
||||
public function __construct(
|
||||
string $historyDir,
|
||||
string $projectDir,
|
||||
) {
|
||||
/**
|
||||
* Normalize history directory:
|
||||
* - Allow relative paths in env (e.g. "var/agent-history")
|
||||
* - Always resolve to an absolute path based on project root
|
||||
*/
|
||||
$historyDir = rtrim($historyDir, '/');
|
||||
|
||||
if (!str_starts_with($historyDir, '/')) {
|
||||
$historyDir = rtrim($projectDir, '/') . '/' . ltrim($historyDir, '/');
|
||||
}
|
||||
|
||||
$this->historyDir = $historyDir;
|
||||
|
||||
// Ensure directory exists
|
||||
if (!is_dir($this->historyDir)) {
|
||||
mkdir($this->historyDir, 0777, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the conversation context for a given user.
|
||||
*
|
||||
* @param string $userId Stable client identifier
|
||||
* @param bool $full Whether to load extended history
|
||||
*/
|
||||
public function buildUserContext(string $userId, bool $full = false): string
|
||||
{
|
||||
$path = $this->getHistoryPath($userId);
|
||||
|
||||
if (!is_file($path)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$lines = file($path, FILE_IGNORE_NEW_LINES);
|
||||
if ($lines === false) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$maxLines = $full ? $this->maxFullLines : $this->maxRegularLines;
|
||||
$selected = array_slice($lines, -$maxLines);
|
||||
|
||||
return implode("\n", $selected);
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends a completed interaction to the user's history.
|
||||
*
|
||||
* Format (append-only):
|
||||
* Question: <user prompt>
|
||||
* <assistant response>
|
||||
*/
|
||||
public function appendHistory(string $userId, string $prompt, string $response): void
|
||||
{
|
||||
$path = $this->getHistoryPath($userId);
|
||||
|
||||
$entry = "Question: {$prompt}\n{$response}\n";
|
||||
file_put_contents($path, $entry, FILE_APPEND | LOCK_EX);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the complete conversation history for a user.
|
||||
*/
|
||||
public function deleteHistory(string $userId): void
|
||||
{
|
||||
$path = $this->getHistoryPath($userId);
|
||||
|
||||
if (is_file($path)) {
|
||||
unlink($path);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the absolute history file path for a user.
|
||||
*/
|
||||
private function getHistoryPath(string $userId): string
|
||||
{
|
||||
$safeUserId = preg_replace('/[^a-zA-Z0-9_-]/', '_', $userId);
|
||||
|
||||
return $this->historyDir . '/' . $safeUserId . '.txt';
|
||||
}
|
||||
}
|
||||
120
src/Context/UrlAnalyzer.php
Normal file
120
src/Context/UrlAnalyzer.php
Normal file
@@ -0,0 +1,120 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Context;
|
||||
|
||||
use RuntimeException;
|
||||
|
||||
/**
|
||||
* UrlAnalyzer
|
||||
*
|
||||
* Extracts and analyzes URL content from user prompts in a production-safe way.
|
||||
*
|
||||
* Responsibilities:
|
||||
* - Detect the first URL inside a prompt
|
||||
* - Fetch remote content with strict limits
|
||||
* - Clean and normalize readable text
|
||||
* - Identify trusted internal domains based on URL host
|
||||
*
|
||||
* Design constraints:
|
||||
* - No framework dependencies
|
||||
* - No prompt or agent logic
|
||||
* - Defensive against slow or large responses
|
||||
*/
|
||||
final class UrlAnalyzer
|
||||
{
|
||||
private int $timeoutSeconds = 20;
|
||||
private int $maxChars = 5000;
|
||||
|
||||
/**
|
||||
* List of trusted internal domains.
|
||||
* Used for marking content as authoritative.
|
||||
*/
|
||||
private array $internalDomains = [
|
||||
'mitho-media.de',
|
||||
];
|
||||
|
||||
/**
|
||||
* Extracts readable text from the first URL found in a prompt.
|
||||
*
|
||||
* @param string $prompt
|
||||
* @return string Cleaned page text or empty string on failure
|
||||
*/
|
||||
public function extractContentFromPrompt(string $prompt): string
|
||||
{
|
||||
if (!preg_match('~https?://\S+|www\.\S+~i', $prompt, $matches)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$url = $matches[0];
|
||||
if (!str_starts_with($url, 'http')) {
|
||||
$url = 'https://' . $url;
|
||||
}
|
||||
|
||||
$parts = parse_url($url);
|
||||
if ($parts === false || empty($parts['host'])) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$context = stream_context_create([
|
||||
'http' => [
|
||||
'timeout' => $this->timeoutSeconds,
|
||||
'user_agent' => 'mithoAgent/1.0',
|
||||
'ignore_errors' => true,
|
||||
],
|
||||
]);
|
||||
|
||||
$handle = @fopen($url, 'rb', false, $context);
|
||||
if ($handle === false) {
|
||||
return '';
|
||||
}
|
||||
|
||||
try {
|
||||
$html = '';
|
||||
while (!feof($handle) && strlen($html) < $this->maxChars * 2) {
|
||||
$html .= fread($handle, 1024);
|
||||
}
|
||||
} finally {
|
||||
fclose($handle);
|
||||
}
|
||||
|
||||
if ($html === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Remove script and style blocks
|
||||
$html = preg_replace('~<script[^>]*>.*?</script>~is', '', $html) ?? $html;
|
||||
$html = preg_replace('~<style[^>]*>.*?</style>~is', '', $html) ?? $html;
|
||||
|
||||
// Strip remaining HTML and normalize whitespace
|
||||
$text = strip_tags($html);
|
||||
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
|
||||
|
||||
return mb_substr(trim($text), 0, $this->maxChars);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether a URL belongs to a trusted internal domain.
|
||||
*
|
||||
* @param string $url
|
||||
* @return bool
|
||||
*/
|
||||
public function isInternalDomainUrl(string $url): bool
|
||||
{
|
||||
$parts = parse_url($url);
|
||||
if ($parts === false || empty($parts['host'])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$host = mb_strtolower($parts['host']);
|
||||
|
||||
foreach ($this->internalDomains as $domain) {
|
||||
if ($host === $domain || str_ends_with($host, '.' . $domain)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user