From: Dan Brown Date: Wed, 2 Jun 2021 19:25:20 +0000 (+0100) Subject: Merge branch 'master' of https://github.com/awarre/BookStack into awarre-master X-Git-Tag: v21.05.1~1^2~10 X-Git-Url: http://source.bookstackapp.com/bookstack/commitdiff_plain/40ca50e44f3ca0f648d915dc73ff205309730f07?ds=inline;hp=-c Merge branch 'master' of https://github.com/awarre/BookStack into awarre-master --- 40ca50e44f3ca0f648d915dc73ff205309730f07 diff --combined app/Entities/Tools/PageContent.php index ff502d164,b0c1f856f..2c9586265 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@@ -1,10 -1,13 +1,14 @@@ saveBase64Images($this->page, $html); $this->page->html = $this->formatHtml($html); $this->page->text = $this->toPlainText(); $this->page->markdown = ''; @@@ -61,6 -65,60 +66,60 @@@ return $converter->convertToHtml($markdown); } + /** + * Convert all base64 image data to saved images + */ + public function saveBase64Images(Page $page, string $htmlText): string + { + if ($htmlText == '') { + return $htmlText; + } + + libxml_use_internal_errors(true); + $doc = new DOMDocument(); + $doc->loadHTML(mb_convert_encoding($htmlText, 'HTML-ENTITIES', 'UTF-8')); + $container = $doc->documentElement; + $body = $container->childNodes->item(0); + $childNodes = $body->childNodes; + $xPath = new DOMXPath($doc); + + // Get all img elements with image data blobs + $imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]'); + foreach($imageNodes as $imageNode) { + $imageSrc = $imageNode->getAttribute('src'); + + # Parse base64 data + $result = preg_match('"data:image/[a-zA-Z]*(;base64,[a-zA-Z0-9+/\\= ]*)"', $imageSrc, $matches); + + if($result === 1) { + $base64ImageData = $matches[1]; + + $image = new Image(); + $imageService = app()->make(ImageService::class); + $permissionService = app(PermissionService::class); + $imageRepo = new ImageRepo(new Image(), $imageService, $permissionService, $page); + + # Use existing saveDrawing method used for Drawio diagrams + $image = $imageRepo->saveDrawing($base64ImageData, $page->id); + + // Create a new img element with the saved image URI + $newNode = $doc->createElement('img'); + $newNode->setAttribute('src', $image->path); + + // Replace the old img element + $imageNode->parentNode->replaceChild($newNode, $imageNode); + } + } + + // Generate inner html as a string + $html = ''; + foreach ($childNodes as $childNode) { + $html .= $doc->saveHTML($childNode); + } + + return $html; + } + /** * Formats a page's html to be tagged correctly within the system. */ @@@ -170,7 -228,7 +229,7 @@@ $content = $this->page->html; if (!config('app.allow_content_scripts')) { - $content = $this->escapeScripts($content); + $content = HtmlContentFilter::removeScripts($content); } if ($blankIncludes) { @@@ -309,4 -367,65 +368,4 @@@ return $innerContent; } - - /** - * Escape script tags within HTML content. - */ - protected function escapeScripts(string $html) : string - { - if (empty($html)) { - return $html; - } - - libxml_use_internal_errors(true); - $doc = new DOMDocument(); - $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); - $xPath = new DOMXPath($doc); - - // Remove standard script tags - $scriptElems = $xPath->query('//script'); - foreach ($scriptElems as $scriptElem) { - $scriptElem->parentNode->removeChild($scriptElem); - } - - // Remove clickable links to JavaScript URI - $badLinks = $xPath->query('//*[contains(@href, \'javascript:\')]'); - foreach ($badLinks as $badLink) { - $badLink->parentNode->removeChild($badLink); - } - - // Remove forms with calls to JavaScript URI - $badForms = $xPath->query('//*[contains(@action, \'javascript:\')] | //*[contains(@formaction, \'javascript:\')]'); - foreach ($badForms as $badForm) { - $badForm->parentNode->removeChild($badForm); - } - - // Remove meta tag to prevent external redirects - $metaTags = $xPath->query('//meta[contains(@content, \'url\')]'); - foreach ($metaTags as $metaTag) { - $metaTag->parentNode->removeChild($metaTag); - } - - // Remove data or JavaScript iFrames - $badIframes = $xPath->query('//*[contains(@src, \'data:\')] | //*[contains(@src, \'javascript:\')] | //*[@srcdoc]'); - foreach ($badIframes as $badIframe) { - $badIframe->parentNode->removeChild($badIframe); - } - - // Remove 'on*' attributes - $onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]'); - foreach ($onAttributes as $attr) { - /** @var \DOMAttr $attr*/ - $attrName = $attr->nodeName; - $attr->parentNode->removeAttribute($attrName); - } - - $html = ''; - $topElems = $doc->documentElement->childNodes->item(0)->childNodes; - foreach ($topElems as $child) { - $html .= $doc->saveHTML($child); - } - - return $html; - } }