diff --git a/composer.json b/composer.json index e7bc8e4..f155307 100644 --- a/composer.json +++ b/composer.json @@ -7,10 +7,13 @@ "php": "^8.2", "ext-ctype": "*", "ext-curl": "*", + "ext-dom": "*", "ext-iconv": "*", + "ext-libxml": "*", "doctrine/doctrine-bundle": "^2.18", "doctrine/doctrine-migrations-bundle": "^3.7", "doctrine/orm": "^3.6", + "fivefilters/readability.php": ">=3.0", "league/commonmark": "^2.8", "smalot/pdfparser": "^2.12", "symfony/asset": "7.4.*", diff --git a/composer.lock b/composer.lock index cea8f7a..4321cd1 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "bf516574b65f7c2abdc053c964f769aa", + "content-hash": "6b5cec5df97930b08d52d1e9599d125b", "packages": [ { "name": "dflydev/dot-access-data", @@ -1194,6 +1194,71 @@ }, "time": "2026-02-08T16:21:46+00:00" }, + { + "name": "fivefilters/readability.php", + "version": "v3.3.3", + "source": { + "type": "git", + "url": "https://github.com/fivefilters/readability.php.git", + "reference": "e2ee7b9e49eae89ac7ed2c74b15718100a73b4c8" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/fivefilters/readability.php/zipball/e2ee7b9e49eae89ac7ed2c74b15718100a73b4c8", + "reference": "e2ee7b9e49eae89ac7ed2c74b15718100a73b4c8", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-mbstring": "*", + "ext-xml": "*", + "league/uri": "^7.0", + "masterminds/html5": "^2.0", + "php": ">=8.1", + "psr/log": "^1.0 || ^2.0 || ^3.0" + }, + "require-dev": { + "monolog/monolog": "^3.0", + "phpunit/phpunit": "^10.0 || ^11.0" + }, + "suggest": { + "monolog/monolog": "Allow logging debug information" + }, + "type": "library", + "autoload": { + "psr-4": { + "fivefilters\\Readability\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache-2.0" + ], + "authors": [ + { + "name": "Andres Rey", + "email": "andreskrey@gmail.com", + "role": "Original Developer" + }, + { + "name": "Keyvan Minoukadeh", + "email": "keyvan@fivefilters.org", + "homepage": "https://www.fivefilters.org", + "role": "Developer/Maintainer" + } + ], + "description": "A PHP port of Readability.js", + "homepage": "https://github.com/fivefilters/readability.php", + "keywords": [ + "html", + "readability" + ], + "support": { + "issues": "https://github.com/fivefilters/readability.php/issues", + "source": "https://github.com/fivefilters/readability.php/tree/v3.3.3" + }, + "time": "2025-04-26T23:45:37+00:00" + }, { "name": "league/commonmark", "version": "2.8.0", @@ -1383,6 +1448,255 @@ ], "time": "2022-12-11T20:36:23+00:00" }, + { + "name": "league/uri", + "version": "7.8.1", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/uri.git", + "reference": "08cf38e3924d4f56238125547b5720496fac8fd4" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/uri/zipball/08cf38e3924d4f56238125547b5720496fac8fd4", + "reference": "08cf38e3924d4f56238125547b5720496fac8fd4", + "shasum": "" + }, + "require": { + "league/uri-interfaces": "^7.8.1", + "php": "^8.1", + "psr/http-factory": "^1" + }, + "conflict": { + "league/uri-schemes": "^1.0" + }, + "suggest": { + "ext-bcmath": "to improve IPV4 host parsing", + "ext-dom": "to convert the URI into an HTML anchor tag", + "ext-fileinfo": "to create Data URI from file contennts", + "ext-gmp": "to improve IPV4 host parsing", + "ext-intl": "to handle IDN host with the best performance", + "ext-uri": "to use the PHP native URI class", + "jeremykendall/php-domain-parser": "to further parse the URI host and resolve its Public Suffix and Top Level Domain", + "league/uri-components": "to provide additional tools to manipulate URI objects components", + "league/uri-polyfill": "to backport the PHP URI extension for older versions of PHP", + "php-64bit": "to improve IPV4 host parsing", + "rowbot/url": "to handle URLs using the WHATWG URL Living Standard specification", + "symfony/polyfill-intl-idn": "to handle IDN host via the Symfony polyfill if ext-intl is not present" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "7.x-dev" + } + }, + "autoload": { + "psr-4": { + "League\\Uri\\": "" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ignace Nyamagana Butera", + "email": "nyamsprod@gmail.com", + "homepage": "https://nyamsprod.com" + } + ], + "description": "URI manipulation library", + "homepage": "https://uri.thephpleague.com", + "keywords": [ + "URN", + "data-uri", + "file-uri", + "ftp", + "hostname", + "http", + "https", + "middleware", + "parse_str", + "parse_url", + "psr-7", + "query-string", + "querystring", + "rfc2141", + "rfc3986", + "rfc3987", + "rfc6570", + "rfc8141", + "uri", + "uri-template", + "url", + "ws" + ], + "support": { + "docs": "https://uri.thephpleague.com", + "forum": "https://thephpleague.slack.com", + "issues": "https://github.com/thephpleague/uri-src/issues", + "source": "https://github.com/thephpleague/uri/tree/7.8.1" + }, + "funding": [ + { + "url": "https://github.com/sponsors/nyamsprod", + "type": "github" + } + ], + "time": "2026-03-15T20:22:25+00:00" + }, + { + "name": "league/uri-interfaces", + "version": "7.8.1", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/uri-interfaces.git", + "reference": "85d5c77c5d6d3af6c54db4a78246364908f3c928" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/uri-interfaces/zipball/85d5c77c5d6d3af6c54db4a78246364908f3c928", + "reference": "85d5c77c5d6d3af6c54db4a78246364908f3c928", + "shasum": "" + }, + "require": { + "ext-filter": "*", + "php": "^8.1", + "psr/http-message": "^1.1 || ^2.0" + }, + "suggest": { + "ext-bcmath": "to improve IPV4 host parsing", + "ext-gmp": "to improve IPV4 host parsing", + "ext-intl": "to handle IDN host with the best performance", + "php-64bit": "to improve IPV4 host parsing", + "rowbot/url": "to handle URLs using the WHATWG URL Living Standard specification", + "symfony/polyfill-intl-idn": "to handle IDN host via the Symfony polyfill if ext-intl is not present" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "7.x-dev" + } + }, + "autoload": { + "psr-4": { + "League\\Uri\\": "" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ignace Nyamagana Butera", + "email": "nyamsprod@gmail.com", + "homepage": "https://nyamsprod.com" + } + ], + "description": "Common tools for parsing and resolving RFC3987/RFC3986 URI", + "homepage": "https://uri.thephpleague.com", + "keywords": [ + "data-uri", + "file-uri", + "ftp", + "hostname", + "http", + "https", + "parse_str", + "parse_url", + "psr-7", + "query-string", + "querystring", + "rfc3986", + "rfc3987", + "rfc6570", + "uri", + "url", + "ws" + ], + "support": { + "docs": "https://uri.thephpleague.com", + "forum": "https://thephpleague.slack.com", + "issues": "https://github.com/thephpleague/uri-src/issues", + "source": "https://github.com/thephpleague/uri-interfaces/tree/7.8.1" + }, + "funding": [ + { + "url": "https://github.com/sponsors/nyamsprod", + "type": "github" + } + ], + "time": "2026-03-08T20:05:35+00:00" + }, + { + "name": "masterminds/html5", + "version": "2.10.0", + "source": { + "type": "git", + "url": "https://github.com/Masterminds/html5-php.git", + "reference": "fcf91eb64359852f00d921887b219479b4f21251" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Masterminds/html5-php/zipball/fcf91eb64359852f00d921887b219479b4f21251", + "reference": "fcf91eb64359852f00d921887b219479b4f21251", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7 || ^8 || ^9" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.7-dev" + } + }, + "autoload": { + "psr-4": { + "Masterminds\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Matt Butcher", + "email": "technosophos@gmail.com" + }, + { + "name": "Matt Farina", + "email": "matt@mattfarina.com" + }, + { + "name": "Asmir Mustafic", + "email": "goetas@gmail.com" + } + ], + "description": "An HTML5 parser and serializer.", + "homepage": "http://masterminds.github.io/html5-php", + "keywords": [ + "HTML5", + "dom", + "html", + "parser", + "querypath", + "serializer", + "xml" + ], + "support": { + "issues": "https://github.com/Masterminds/html5-php/issues", + "source": "https://github.com/Masterminds/html5-php/tree/2.10.0" + }, + "time": "2025-07-25T09:04:22+00:00" + }, { "name": "monolog/monolog", "version": "3.10.0", @@ -1844,6 +2158,114 @@ }, "time": "2019-01-08T18:20:26+00:00" }, + { + "name": "psr/http-factory", + "version": "1.1.0", + "source": { + "type": "git", + "url": "https://github.com/php-fig/http-factory.git", + "reference": "2b4765fddfe3b508ac62f829e852b1501d3f6e8a" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/http-factory/zipball/2b4765fddfe3b508ac62f829e852b1501d3f6e8a", + "reference": "2b4765fddfe3b508ac62f829e852b1501d3f6e8a", + "shasum": "" + }, + "require": { + "php": ">=7.1", + "psr/http-message": "^1.0 || ^2.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Http\\Message\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "https://www.php-fig.org/" + } + ], + "description": "PSR-17: Common interfaces for PSR-7 HTTP message factories", + "keywords": [ + "factory", + "http", + "message", + "psr", + "psr-17", + "psr-7", + "request", + "response" + ], + "support": { + "source": "https://github.com/php-fig/http-factory" + }, + "time": "2024-04-15T12:06:14+00:00" + }, + { + "name": "psr/http-message", + "version": "2.0", + "source": { + "type": "git", + "url": "https://github.com/php-fig/http-message.git", + "reference": "402d35bcb92c70c026d1a6a9883f06b2ead23d71" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/http-message/zipball/402d35bcb92c70c026d1a6a9883f06b2ead23d71", + "reference": "402d35bcb92c70c026d1a6a9883f06b2ead23d71", + "shasum": "" + }, + "require": { + "php": "^7.2 || ^8.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Http\\Message\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "https://www.php-fig.org/" + } + ], + "description": "Common interface for HTTP messages", + "homepage": "https://github.com/php-fig/http-message", + "keywords": [ + "http", + "http-message", + "psr", + "psr-7", + "request", + "response" + ], + "support": { + "source": "https://github.com/php-fig/http-message/tree/2.0" + }, + "time": "2023-04-04T09:54:51+00:00" + }, { "name": "psr/log", "version": "3.0.2", @@ -6817,8 +7239,10 @@ "php": "^8.2", "ext-ctype": "*", "ext-curl": "*", - "ext-iconv": "*" + "ext-dom": "*", + "ext-iconv": "*", + "ext-libxml": "*" }, "platform-dev": {}, - "plugin-api-version": "2.6.0" + "plugin-api-version": "2.9.0" } diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php index 31e323b..be95105 100644 --- a/src/Agent/AgentRunner.php +++ b/src/Agent/AgentRunner.php @@ -40,6 +40,8 @@ final readonly class AgentRunner { $prompt = trim($prompt); $swagFullOutPut = ''; + $firstThinkLoop = true; + $shopResults = []; if ($prompt === '') { yield '❌ Empty prompt.'; @@ -61,7 +63,28 @@ final readonly class AgentRunner yield $this->systemMsg("Ich analysiere deine Anfrage...", "think"); - $promptSwagSearch = ' + // --------------------------------------------------------- + // 2) Extract URL content (if present) + // --------------------------------------------------------- + yield $this->systemMsg("Ich prüfe auf Internet Quellen...", "think"); + $urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt); + + // --------------------------------------------------------- + // 3) Retrieve RAG knowledge + // --------------------------------------------------------- + yield $this->systemMsg("Ich hole relevante Daten aus meinem RAG Wissen...", "think"); + $knowledgeChunks = $this->retriever->retrieve($prompt); + + // --------------------------------------------------------- + // 4) commerce/shop search + // --------------------------------------------------------- + + $commerceMeta = $this->commerceIntentLite->detect($prompt); + $commerceIntent = (string)($commerceMeta['intent'] ?? CommerceIntentLite::NONE); + + if ($commerceIntent === CommerceIntentLite::PRODUCT_SEARCH || $commerceIntent === CommerceIntentLite::ADVISORY_PRODUCT_SEARCH) { + //PreOptimize swag search query + $promptSwagSearch = ' Erzeuge aus dem folgenden Nutzereingabetext einen kurzen Suchtext für die Shopware-6-Suche. Regeln: @@ -78,63 +101,43 @@ final readonly class AgentRunner Ausgabeformat: Keyword1 Keyword2 Keyword3 - Text: ' . $prompt . ' + Nutzereingabetext: ' . $prompt . ' '; - $this->thinkSuppressor->reset(); + //Reset thinkSuppressor + $this->thinkSuppressor->reset(); - foreach ($this->ollamaClient->stream($promptSwagSearch) as $swagToken) { + yield $this->systemMsg("Ich optimere die Shopanfrage...", "think"); - if (!is_string($swagToken)) { - continue; + //Call ai for optimized swag query + foreach ($this->ollamaClient->stream($promptSwagSearch) as $swagToken) { + + if (!is_string($swagToken)) { + continue; + } + + $swagCleanToken = $this->thinkSuppressor->filter($swagToken); + + if ($swagCleanToken === '') { + continue; + } + + $swagFullOutPut .= $swagCleanToken; } - $swagCleanToken = $this->thinkSuppressor->filter($swagToken); + yield $this->systemMsg("Ich rufe Shopdaten ab (type: " . $commerceIntent . ")", "think"); - if ($swagCleanToken === '') { - continue; - } - - $swagFullOutPut .= $swagCleanToken; + //Search in swag by ai optimized query + $shopResults = $swagFullOutPut ? $this->shopSearchService->search($swagFullOutPut, $commerceIntent) : ''; } - yield $this->systemMsg("Ich habe folgende Keywords an die Shopsuche geschickt: " . $swagFullOutPut, "think"); - - // --------------------------------------------------------- - // 2) Extract URL content (if present) - // --------------------------------------------------------- - $urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt); - - // --------------------------------------------------------- - // 3) Retrieve RAG knowledge - // --------------------------------------------------------- - yield $this->systemMsg("Ich hole relevante Daten aus meinem RAG Wissen...", "think"); - - $knowledgeChunks = $this->retriever->retrieve($prompt); - - // --------------------------------------------------------- - // 4) commerce/shop search - // --------------------------------------------------------- - - $commerceMeta = $this->commerceIntentLite->detect($prompt); - $commerceIntent = (string)($commerceMeta['intent'] ?? CommerceIntentLite::NONE); - - yield $this->systemMsg("Ich rufe Shopdaten ab (type: " . $commerceIntent . ")", "think"); - $shopResults = $swagFullOutPut ? $this->shopSearchService->search($swagFullOutPut, $commerceIntent) : ''; - if ($commerceIntent === CommerceIntentLite::PRODUCT_SEARCH) { $knowledgeChunks = array_slice($knowledgeChunks, 0, 2); } elseif ($commerceIntent === CommerceIntentLite::ADVISORY_PRODUCT_SEARCH) { $knowledgeChunks = array_slice($knowledgeChunks, 0, 3); } - if ($shopResults) { - yield $this->systemMsg("Ich verarbeite Shopdaten...", "think"); - } else { - yield $this->systemMsg("Ich habe keine releveanten Shopdaten gefunden...", "think"); - } - - yield $this->systemMsg("Ich analysiere gefundene Informationen...", "think"); + yield $this->systemMsg("Ich analysiere alle Informationen...", "think"); // --------------------------------------------------------- // 5) Build final prompt @@ -179,8 +182,10 @@ final readonly class AgentRunner $cleanToken = $this->thinkSuppressor->filter((string)$token); if ($cleanToken === '') { - yield $this->systemMsg("Denke nach...", "think"); - usleep(500); + if ($firstThinkLoop) { + yield $this->systemMsg("Denke nach...", "think"); + $firstThinkLoop = false; + } continue; } diff --git a/src/Context/UrlAnalyzer.php b/src/Context/UrlAnalyzer.php index b31194c..caabcca 100644 --- a/src/Context/UrlAnalyzer.php +++ b/src/Context/UrlAnalyzer.php @@ -4,7 +4,9 @@ declare(strict_types=1); namespace App\Context; -use RuntimeException; +use fivefilters\Readability\Configuration; +use fivefilters\Readability\ParseException; +use fivefilters\Readability\Readability; /** * UrlAnalyzer @@ -65,30 +67,29 @@ final class UrlAnalyzer ], ]); - $handle = @fopen($url, 'rb', false, $context); - if ($handle === false) { + $html = @file_get_contents($url, false, $context); + if ($html === false || $html === '') { return ''; } + $config = new Configuration(); + $config->setFixRelativeURLs(true); + $config->setOriginalURL($url); + + $readability = new Readability($config); + try { - $html = ''; - while (!feof($handle) && strlen($html) < $this->maxChars * 2) { - $html .= fread($handle, 1024); - } - } finally { - fclose($handle); - } - - if ($html === '') { + $readability->parse($html); + $content = $readability->getContent() ?? ''; + } catch (ParseException) { return ''; } - // Remove script and style blocks - $html = preg_replace('~]*>.*?~is', '', $html) ?? $html; - $html = preg_replace('~]*>.*?~is', '', $html) ?? $html; + if ($content === '') { + return ''; + } - // Strip remaining HTML and normalize whitespace - $text = strip_tags($html); + $text = strip_tags($content); $text = preg_replace('/\s+/u', ' ', $text) ?? $text; return mb_substr(trim($text), 0, $this->maxChars);