diff --git a/composer.json b/composer.json
index e7bc8e4..f155307 100644
--- a/composer.json
+++ b/composer.json
@@ -7,10 +7,13 @@
"php": "^8.2",
"ext-ctype": "*",
"ext-curl": "*",
+ "ext-dom": "*",
"ext-iconv": "*",
+ "ext-libxml": "*",
"doctrine/doctrine-bundle": "^2.18",
"doctrine/doctrine-migrations-bundle": "^3.7",
"doctrine/orm": "^3.6",
+ "fivefilters/readability.php": ">=3.0",
"league/commonmark": "^2.8",
"smalot/pdfparser": "^2.12",
"symfony/asset": "7.4.*",
diff --git a/composer.lock b/composer.lock
index cea8f7a..4321cd1 100644
--- a/composer.lock
+++ b/composer.lock
@@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
- "content-hash": "bf516574b65f7c2abdc053c964f769aa",
+ "content-hash": "6b5cec5df97930b08d52d1e9599d125b",
"packages": [
{
"name": "dflydev/dot-access-data",
@@ -1194,6 +1194,71 @@
},
"time": "2026-02-08T16:21:46+00:00"
},
+ {
+ "name": "fivefilters/readability.php",
+ "version": "v3.3.3",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/fivefilters/readability.php.git",
+ "reference": "e2ee7b9e49eae89ac7ed2c74b15718100a73b4c8"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/fivefilters/readability.php/zipball/e2ee7b9e49eae89ac7ed2c74b15718100a73b4c8",
+ "reference": "e2ee7b9e49eae89ac7ed2c74b15718100a73b4c8",
+ "shasum": ""
+ },
+ "require": {
+ "ext-dom": "*",
+ "ext-mbstring": "*",
+ "ext-xml": "*",
+ "league/uri": "^7.0",
+ "masterminds/html5": "^2.0",
+ "php": ">=8.1",
+ "psr/log": "^1.0 || ^2.0 || ^3.0"
+ },
+ "require-dev": {
+ "monolog/monolog": "^3.0",
+ "phpunit/phpunit": "^10.0 || ^11.0"
+ },
+ "suggest": {
+ "monolog/monolog": "Allow logging debug information"
+ },
+ "type": "library",
+ "autoload": {
+ "psr-4": {
+ "fivefilters\\Readability\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "Apache-2.0"
+ ],
+ "authors": [
+ {
+ "name": "Andres Rey",
+ "email": "andreskrey@gmail.com",
+ "role": "Original Developer"
+ },
+ {
+ "name": "Keyvan Minoukadeh",
+ "email": "keyvan@fivefilters.org",
+ "homepage": "https://www.fivefilters.org",
+ "role": "Developer/Maintainer"
+ }
+ ],
+ "description": "A PHP port of Readability.js",
+ "homepage": "https://github.com/fivefilters/readability.php",
+ "keywords": [
+ "html",
+ "readability"
+ ],
+ "support": {
+ "issues": "https://github.com/fivefilters/readability.php/issues",
+ "source": "https://github.com/fivefilters/readability.php/tree/v3.3.3"
+ },
+ "time": "2025-04-26T23:45:37+00:00"
+ },
{
"name": "league/commonmark",
"version": "2.8.0",
@@ -1383,6 +1448,255 @@
],
"time": "2022-12-11T20:36:23+00:00"
},
+ {
+ "name": "league/uri",
+ "version": "7.8.1",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/thephpleague/uri.git",
+ "reference": "08cf38e3924d4f56238125547b5720496fac8fd4"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/thephpleague/uri/zipball/08cf38e3924d4f56238125547b5720496fac8fd4",
+ "reference": "08cf38e3924d4f56238125547b5720496fac8fd4",
+ "shasum": ""
+ },
+ "require": {
+ "league/uri-interfaces": "^7.8.1",
+ "php": "^8.1",
+ "psr/http-factory": "^1"
+ },
+ "conflict": {
+ "league/uri-schemes": "^1.0"
+ },
+ "suggest": {
+ "ext-bcmath": "to improve IPV4 host parsing",
+ "ext-dom": "to convert the URI into an HTML anchor tag",
+ "ext-fileinfo": "to create Data URI from file contennts",
+ "ext-gmp": "to improve IPV4 host parsing",
+ "ext-intl": "to handle IDN host with the best performance",
+ "ext-uri": "to use the PHP native URI class",
+ "jeremykendall/php-domain-parser": "to further parse the URI host and resolve its Public Suffix and Top Level Domain",
+ "league/uri-components": "to provide additional tools to manipulate URI objects components",
+ "league/uri-polyfill": "to backport the PHP URI extension for older versions of PHP",
+ "php-64bit": "to improve IPV4 host parsing",
+ "rowbot/url": "to handle URLs using the WHATWG URL Living Standard specification",
+ "symfony/polyfill-intl-idn": "to handle IDN host via the Symfony polyfill if ext-intl is not present"
+ },
+ "type": "library",
+ "extra": {
+ "branch-alias": {
+ "dev-master": "7.x-dev"
+ }
+ },
+ "autoload": {
+ "psr-4": {
+ "League\\Uri\\": ""
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Ignace Nyamagana Butera",
+ "email": "nyamsprod@gmail.com",
+ "homepage": "https://nyamsprod.com"
+ }
+ ],
+ "description": "URI manipulation library",
+ "homepage": "https://uri.thephpleague.com",
+ "keywords": [
+ "URN",
+ "data-uri",
+ "file-uri",
+ "ftp",
+ "hostname",
+ "http",
+ "https",
+ "middleware",
+ "parse_str",
+ "parse_url",
+ "psr-7",
+ "query-string",
+ "querystring",
+ "rfc2141",
+ "rfc3986",
+ "rfc3987",
+ "rfc6570",
+ "rfc8141",
+ "uri",
+ "uri-template",
+ "url",
+ "ws"
+ ],
+ "support": {
+ "docs": "https://uri.thephpleague.com",
+ "forum": "https://thephpleague.slack.com",
+ "issues": "https://github.com/thephpleague/uri-src/issues",
+ "source": "https://github.com/thephpleague/uri/tree/7.8.1"
+ },
+ "funding": [
+ {
+ "url": "https://github.com/sponsors/nyamsprod",
+ "type": "github"
+ }
+ ],
+ "time": "2026-03-15T20:22:25+00:00"
+ },
+ {
+ "name": "league/uri-interfaces",
+ "version": "7.8.1",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/thephpleague/uri-interfaces.git",
+ "reference": "85d5c77c5d6d3af6c54db4a78246364908f3c928"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/thephpleague/uri-interfaces/zipball/85d5c77c5d6d3af6c54db4a78246364908f3c928",
+ "reference": "85d5c77c5d6d3af6c54db4a78246364908f3c928",
+ "shasum": ""
+ },
+ "require": {
+ "ext-filter": "*",
+ "php": "^8.1",
+ "psr/http-message": "^1.1 || ^2.0"
+ },
+ "suggest": {
+ "ext-bcmath": "to improve IPV4 host parsing",
+ "ext-gmp": "to improve IPV4 host parsing",
+ "ext-intl": "to handle IDN host with the best performance",
+ "php-64bit": "to improve IPV4 host parsing",
+ "rowbot/url": "to handle URLs using the WHATWG URL Living Standard specification",
+ "symfony/polyfill-intl-idn": "to handle IDN host via the Symfony polyfill if ext-intl is not present"
+ },
+ "type": "library",
+ "extra": {
+ "branch-alias": {
+ "dev-master": "7.x-dev"
+ }
+ },
+ "autoload": {
+ "psr-4": {
+ "League\\Uri\\": ""
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Ignace Nyamagana Butera",
+ "email": "nyamsprod@gmail.com",
+ "homepage": "https://nyamsprod.com"
+ }
+ ],
+ "description": "Common tools for parsing and resolving RFC3987/RFC3986 URI",
+ "homepage": "https://uri.thephpleague.com",
+ "keywords": [
+ "data-uri",
+ "file-uri",
+ "ftp",
+ "hostname",
+ "http",
+ "https",
+ "parse_str",
+ "parse_url",
+ "psr-7",
+ "query-string",
+ "querystring",
+ "rfc3986",
+ "rfc3987",
+ "rfc6570",
+ "uri",
+ "url",
+ "ws"
+ ],
+ "support": {
+ "docs": "https://uri.thephpleague.com",
+ "forum": "https://thephpleague.slack.com",
+ "issues": "https://github.com/thephpleague/uri-src/issues",
+ "source": "https://github.com/thephpleague/uri-interfaces/tree/7.8.1"
+ },
+ "funding": [
+ {
+ "url": "https://github.com/sponsors/nyamsprod",
+ "type": "github"
+ }
+ ],
+ "time": "2026-03-08T20:05:35+00:00"
+ },
+ {
+ "name": "masterminds/html5",
+ "version": "2.10.0",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/Masterminds/html5-php.git",
+ "reference": "fcf91eb64359852f00d921887b219479b4f21251"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/Masterminds/html5-php/zipball/fcf91eb64359852f00d921887b219479b4f21251",
+ "reference": "fcf91eb64359852f00d921887b219479b4f21251",
+ "shasum": ""
+ },
+ "require": {
+ "ext-dom": "*",
+ "php": ">=5.3.0"
+ },
+ "require-dev": {
+ "phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7 || ^8 || ^9"
+ },
+ "type": "library",
+ "extra": {
+ "branch-alias": {
+ "dev-master": "2.7-dev"
+ }
+ },
+ "autoload": {
+ "psr-4": {
+ "Masterminds\\": "src"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Matt Butcher",
+ "email": "technosophos@gmail.com"
+ },
+ {
+ "name": "Matt Farina",
+ "email": "matt@mattfarina.com"
+ },
+ {
+ "name": "Asmir Mustafic",
+ "email": "goetas@gmail.com"
+ }
+ ],
+ "description": "An HTML5 parser and serializer.",
+ "homepage": "http://masterminds.github.io/html5-php",
+ "keywords": [
+ "HTML5",
+ "dom",
+ "html",
+ "parser",
+ "querypath",
+ "serializer",
+ "xml"
+ ],
+ "support": {
+ "issues": "https://github.com/Masterminds/html5-php/issues",
+ "source": "https://github.com/Masterminds/html5-php/tree/2.10.0"
+ },
+ "time": "2025-07-25T09:04:22+00:00"
+ },
{
"name": "monolog/monolog",
"version": "3.10.0",
@@ -1844,6 +2158,114 @@
},
"time": "2019-01-08T18:20:26+00:00"
},
+ {
+ "name": "psr/http-factory",
+ "version": "1.1.0",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/php-fig/http-factory.git",
+ "reference": "2b4765fddfe3b508ac62f829e852b1501d3f6e8a"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/php-fig/http-factory/zipball/2b4765fddfe3b508ac62f829e852b1501d3f6e8a",
+ "reference": "2b4765fddfe3b508ac62f829e852b1501d3f6e8a",
+ "shasum": ""
+ },
+ "require": {
+ "php": ">=7.1",
+ "psr/http-message": "^1.0 || ^2.0"
+ },
+ "type": "library",
+ "extra": {
+ "branch-alias": {
+ "dev-master": "1.0.x-dev"
+ }
+ },
+ "autoload": {
+ "psr-4": {
+ "Psr\\Http\\Message\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "PHP-FIG",
+ "homepage": "https://www.php-fig.org/"
+ }
+ ],
+ "description": "PSR-17: Common interfaces for PSR-7 HTTP message factories",
+ "keywords": [
+ "factory",
+ "http",
+ "message",
+ "psr",
+ "psr-17",
+ "psr-7",
+ "request",
+ "response"
+ ],
+ "support": {
+ "source": "https://github.com/php-fig/http-factory"
+ },
+ "time": "2024-04-15T12:06:14+00:00"
+ },
+ {
+ "name": "psr/http-message",
+ "version": "2.0",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/php-fig/http-message.git",
+ "reference": "402d35bcb92c70c026d1a6a9883f06b2ead23d71"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/php-fig/http-message/zipball/402d35bcb92c70c026d1a6a9883f06b2ead23d71",
+ "reference": "402d35bcb92c70c026d1a6a9883f06b2ead23d71",
+ "shasum": ""
+ },
+ "require": {
+ "php": "^7.2 || ^8.0"
+ },
+ "type": "library",
+ "extra": {
+ "branch-alias": {
+ "dev-master": "2.0.x-dev"
+ }
+ },
+ "autoload": {
+ "psr-4": {
+ "Psr\\Http\\Message\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "PHP-FIG",
+ "homepage": "https://www.php-fig.org/"
+ }
+ ],
+ "description": "Common interface for HTTP messages",
+ "homepage": "https://github.com/php-fig/http-message",
+ "keywords": [
+ "http",
+ "http-message",
+ "psr",
+ "psr-7",
+ "request",
+ "response"
+ ],
+ "support": {
+ "source": "https://github.com/php-fig/http-message/tree/2.0"
+ },
+ "time": "2023-04-04T09:54:51+00:00"
+ },
{
"name": "psr/log",
"version": "3.0.2",
@@ -6817,8 +7239,10 @@
"php": "^8.2",
"ext-ctype": "*",
"ext-curl": "*",
- "ext-iconv": "*"
+ "ext-dom": "*",
+ "ext-iconv": "*",
+ "ext-libxml": "*"
},
"platform-dev": {},
- "plugin-api-version": "2.6.0"
+ "plugin-api-version": "2.9.0"
}
diff --git a/src/Agent/AgentRunner.php b/src/Agent/AgentRunner.php
index 31e323b..be95105 100644
--- a/src/Agent/AgentRunner.php
+++ b/src/Agent/AgentRunner.php
@@ -40,6 +40,8 @@ final readonly class AgentRunner
{
$prompt = trim($prompt);
$swagFullOutPut = '';
+ $firstThinkLoop = true;
+ $shopResults = [];
if ($prompt === '') {
yield '❌ Empty prompt.';
@@ -61,7 +63,28 @@ final readonly class AgentRunner
yield $this->systemMsg("Ich analysiere deine Anfrage...", "think");
- $promptSwagSearch = '
+ // ---------------------------------------------------------
+ // 2) Extract URL content (if present)
+ // ---------------------------------------------------------
+ yield $this->systemMsg("Ich prüfe auf Internet Quellen...", "think");
+ $urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
+
+ // ---------------------------------------------------------
+ // 3) Retrieve RAG knowledge
+ // ---------------------------------------------------------
+ yield $this->systemMsg("Ich hole relevante Daten aus meinem RAG Wissen...", "think");
+ $knowledgeChunks = $this->retriever->retrieve($prompt);
+
+ // ---------------------------------------------------------
+ // 4) commerce/shop search
+ // ---------------------------------------------------------
+
+ $commerceMeta = $this->commerceIntentLite->detect($prompt);
+ $commerceIntent = (string)($commerceMeta['intent'] ?? CommerceIntentLite::NONE);
+
+ if ($commerceIntent === CommerceIntentLite::PRODUCT_SEARCH || $commerceIntent === CommerceIntentLite::ADVISORY_PRODUCT_SEARCH) {
+ //PreOptimize swag search query
+ $promptSwagSearch = '
Erzeuge aus dem folgenden Nutzereingabetext einen kurzen Suchtext für die Shopware-6-Suche.
Regeln:
@@ -78,63 +101,43 @@ final readonly class AgentRunner
Ausgabeformat:
Keyword1 Keyword2 Keyword3
- Text: ' . $prompt . '
+ Nutzereingabetext: ' . $prompt . '
';
- $this->thinkSuppressor->reset();
+ //Reset thinkSuppressor
+ $this->thinkSuppressor->reset();
- foreach ($this->ollamaClient->stream($promptSwagSearch) as $swagToken) {
+ yield $this->systemMsg("Ich optimere die Shopanfrage...", "think");
- if (!is_string($swagToken)) {
- continue;
+ //Call ai for optimized swag query
+ foreach ($this->ollamaClient->stream($promptSwagSearch) as $swagToken) {
+
+ if (!is_string($swagToken)) {
+ continue;
+ }
+
+ $swagCleanToken = $this->thinkSuppressor->filter($swagToken);
+
+ if ($swagCleanToken === '') {
+ continue;
+ }
+
+ $swagFullOutPut .= $swagCleanToken;
}
- $swagCleanToken = $this->thinkSuppressor->filter($swagToken);
+ yield $this->systemMsg("Ich rufe Shopdaten ab (type: " . $commerceIntent . ")", "think");
- if ($swagCleanToken === '') {
- continue;
- }
-
- $swagFullOutPut .= $swagCleanToken;
+ //Search in swag by ai optimized query
+ $shopResults = $swagFullOutPut ? $this->shopSearchService->search($swagFullOutPut, $commerceIntent) : '';
}
- yield $this->systemMsg("Ich habe folgende Keywords an die Shopsuche geschickt: " . $swagFullOutPut, "think");
-
- // ---------------------------------------------------------
- // 2) Extract URL content (if present)
- // ---------------------------------------------------------
- $urlContent = $this->urlAnalyzer->extractContentFromPrompt($prompt);
-
- // ---------------------------------------------------------
- // 3) Retrieve RAG knowledge
- // ---------------------------------------------------------
- yield $this->systemMsg("Ich hole relevante Daten aus meinem RAG Wissen...", "think");
-
- $knowledgeChunks = $this->retriever->retrieve($prompt);
-
- // ---------------------------------------------------------
- // 4) commerce/shop search
- // ---------------------------------------------------------
-
- $commerceMeta = $this->commerceIntentLite->detect($prompt);
- $commerceIntent = (string)($commerceMeta['intent'] ?? CommerceIntentLite::NONE);
-
- yield $this->systemMsg("Ich rufe Shopdaten ab (type: " . $commerceIntent . ")", "think");
- $shopResults = $swagFullOutPut ? $this->shopSearchService->search($swagFullOutPut, $commerceIntent) : '';
-
if ($commerceIntent === CommerceIntentLite::PRODUCT_SEARCH) {
$knowledgeChunks = array_slice($knowledgeChunks, 0, 2);
} elseif ($commerceIntent === CommerceIntentLite::ADVISORY_PRODUCT_SEARCH) {
$knowledgeChunks = array_slice($knowledgeChunks, 0, 3);
}
- if ($shopResults) {
- yield $this->systemMsg("Ich verarbeite Shopdaten...", "think");
- } else {
- yield $this->systemMsg("Ich habe keine releveanten Shopdaten gefunden...", "think");
- }
-
- yield $this->systemMsg("Ich analysiere gefundene Informationen...", "think");
+ yield $this->systemMsg("Ich analysiere alle Informationen...", "think");
// ---------------------------------------------------------
// 5) Build final prompt
@@ -179,8 +182,10 @@ final readonly class AgentRunner
$cleanToken = $this->thinkSuppressor->filter((string)$token);
if ($cleanToken === '') {
- yield $this->systemMsg("Denke nach...", "think");
- usleep(500);
+ if ($firstThinkLoop) {
+ yield $this->systemMsg("Denke nach...", "think");
+ $firstThinkLoop = false;
+ }
continue;
}
diff --git a/src/Context/UrlAnalyzer.php b/src/Context/UrlAnalyzer.php
index b31194c..caabcca 100644
--- a/src/Context/UrlAnalyzer.php
+++ b/src/Context/UrlAnalyzer.php
@@ -4,7 +4,9 @@ declare(strict_types=1);
namespace App\Context;
-use RuntimeException;
+use fivefilters\Readability\Configuration;
+use fivefilters\Readability\ParseException;
+use fivefilters\Readability\Readability;
/**
* UrlAnalyzer
@@ -65,30 +67,29 @@ final class UrlAnalyzer
],
]);
- $handle = @fopen($url, 'rb', false, $context);
- if ($handle === false) {
+ $html = @file_get_contents($url, false, $context);
+ if ($html === false || $html === '') {
return '';
}
+ $config = new Configuration();
+ $config->setFixRelativeURLs(true);
+ $config->setOriginalURL($url);
+
+ $readability = new Readability($config);
+
try {
- $html = '';
- while (!feof($handle) && strlen($html) < $this->maxChars * 2) {
- $html .= fread($handle, 1024);
- }
- } finally {
- fclose($handle);
- }
-
- if ($html === '') {
+ $readability->parse($html);
+ $content = $readability->getContent() ?? '';
+ } catch (ParseException) {
return '';
}
- // Remove script and style blocks
- $html = preg_replace('~~is', '', $html) ?? $html;
- $html = preg_replace('~~is', '', $html) ?? $html;
+ if ($content === '') {
+ return '';
+ }
- // Strip remaining HTML and normalize whitespace
- $text = strip_tags($html);
+ $text = strip_tags($content);
$text = preg_replace('/\s+/u', ' ', $text) ?? $text;
return mb_substr(trim($text), 0, $this->maxChars);