]> BookStack Code Mirror - bookstack/commitdiff
Reviewed base64 image upload support
authorDan Brown <redacted>
Wed, 2 Jun 2021 20:34:34 +0000 (21:34 +0100)
committerDan Brown <redacted>
Wed, 2 Jun 2021 20:34:34 +0000 (21:34 +0100)
- Added test cases to cover.
- Altered parsing logic to be a little less reliant on regex.
- Added new iamge repo method for creating from data.
- Added extension validation and additional type support.
- Done some cleanup of common operations within PageContent.
- Added message to API docs/method to mention image usage.

For #2700 and #2631.

app/Entities/Tools/PageContent.php
app/Http/Controllers/Api/PageApiController.php
app/Uploads/ImageRepo.php
tests/Entity/PageContentTest.php

index 2c95862657b3c5741a8fbdc75c8dc363836e6c36..fbee2ccb64385fb324f5d21a7a047a6120d4394c 100644 (file)
@@ -1,17 +1,16 @@
 <?php namespace BookStack\Entities\Tools;
 
-use BookStack\Auth\Permissions\PermissionService;
 use BookStack\Entities\Models\Page;
 use BookStack\Entities\Tools\Markdown\CustomStrikeThroughExtension;
+use BookStack\Exceptions\ImageUploadException;
 use BookStack\Facades\Theme;
 use BookStack\Theming\ThemeEvents;
 use BookStack\Util\HtmlContentFilter;
-use BookStack\Uploads\Image;
 use BookStack\Uploads\ImageRepo;
-use BookStack\Uploads\ImageService;
 use DOMDocument;
 use DOMNodeList;
 use DOMXPath;
+use Illuminate\Support\Str;
 use League\CommonMark\CommonMarkConverter;
 use League\CommonMark\Environment;
 use League\CommonMark\Extension\Table\TableExtension;
@@ -35,7 +34,7 @@ class PageContent
      */
     public function setNewHTML(string $html)
     {
-        $html = $this->saveBase64Images($this->page, $html);
+        $html = $this->extractBase64Images($this->page, $html);
         $this->page->html = $this->formatHtml($html);
         $this->page->text = $this->toPlainText();
         $this->page->markdown = '';
@@ -69,45 +68,40 @@ class PageContent
     /**
      * Convert all base64 image data to saved images
      */
-    public function saveBase64Images(Page $page, string $htmlText): string
+    public function extractBase64Images(Page $page, string $htmlText): string
     {
-        if ($htmlText == '') {
+        if (empty($htmlText) || strpos($htmlText, 'data:image') === false) {
             return $htmlText;
         }
 
-        libxml_use_internal_errors(true);
-        $doc = new DOMDocument();
-        $doc->loadHTML(mb_convert_encoding($htmlText, 'HTML-ENTITIES', 'UTF-8'));
+        $doc = $this->loadDocumentFromHtml($htmlText);
         $container = $doc->documentElement;
         $body = $container->childNodes->item(0);
         $childNodes = $body->childNodes;
         $xPath = new DOMXPath($doc);
+        $imageRepo = app()->make(ImageRepo::class);
+        $allowedExtensions = ['jpg', 'jpeg', 'png', 'gif', 'webp'];
 
         // Get all img elements with image data blobs
         $imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]');
-        foreach($imageNodes as $imageNode) {
+        foreach ($imageNodes as $imageNode) {
             $imageSrc = $imageNode->getAttribute('src');
+            [$dataDefinition, $base64ImageData] = explode(',', $imageSrc, 2);
+            $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? 'png');
 
-            # Parse base64 data
-            $result = preg_match('"data:image/[a-zA-Z]*(;base64,[a-zA-Z0-9+/\\= ]*)"', $imageSrc, $matches);
-
-            if($result === 1) {
-                $base64ImageData = $matches[1];
-
-                $image = new Image();
-                $imageService = app()->make(ImageService::class);
-                $permissionService = app(PermissionService::class);
-                $imageRepo = new ImageRepo(new Image(), $imageService, $permissionService, $page);
-
-                # Use existing saveDrawing method used for Drawio diagrams
-                $image = $imageRepo->saveDrawing($base64ImageData, $page->id);
-                
-                // Create a new img element with the saved image URI
-                $newNode = $doc->createElement('img');
-                $newNode->setAttribute('src', $image->path);
+            // Validate extension
+            if (!in_array($extension, $allowedExtensions)) {
+                $imageNode->setAttribute('src', '');
+                continue;
+            }
 
-                // Replace the old img element
-                $imageNode->parentNode->replaceChild($newNode, $imageNode);
+            // Save image from data with a random name
+            $imageName = 'embedded-image-' . Str::random(8) . '.' . $extension;
+            try {
+                $image = $imageRepo->saveNewFromData($imageName, base64_decode($base64ImageData), 'gallery', $page->id);
+                $imageNode->setAttribute('src', $image->path);
+            } catch (ImageUploadException $exception) {
+                $imageNode->setAttribute('src', '');
             }
         }
 
@@ -125,14 +119,11 @@ class PageContent
      */
     protected function formatHtml(string $htmlText): string
     {
-        if ($htmlText == '') {
+        if (empty($htmlText)) {
             return $htmlText;
         }
 
-        libxml_use_internal_errors(true);
-        $doc = new DOMDocument();
-        $doc->loadHTML(mb_convert_encoding($htmlText, 'HTML-ENTITIES', 'UTF-8'));
-
+        $doc = $this->loadDocumentFromHtml($htmlText);
         $container = $doc->documentElement;
         $body = $container->childNodes->item(0);
         $childNodes = $body->childNodes;
@@ -171,7 +162,7 @@ class PageContent
     protected function updateLinks(DOMXPath $xpath, string $old, string $new)
     {
         $old = str_replace('"', '', $old);
-        $matchingLinks = $xpath->query('//body//*//*[@href="'.$old.'"]');
+        $matchingLinks = $xpath->query('//body//*//*[@href="' . $old . '"]');
         foreach ($matchingLinks as $domElem) {
             $domElem->setAttribute('href', $new);
         }
@@ -224,7 +215,7 @@ class PageContent
     /**
      * Render the page for viewing
      */
-    public function render(bool $blankIncludes = false) : string
+    public function render(bool $blankIncludes = false): string
     {
         $content = $this->page->html;
 
@@ -250,9 +241,7 @@ class PageContent
             return [];
         }
 
-        libxml_use_internal_errors(true);
-        $doc = new DOMDocument();
-        $doc->loadHTML(mb_convert_encoding($htmlContent, 'HTML-ENTITIES', 'UTF-8'));
+        $doc = $this->loadDocumentFromHtml($htmlContent);
         $xPath = new DOMXPath($doc);
         $headers = $xPath->query("//h1|//h2|//h3|//h4|//h5|//h6");
 
@@ -292,7 +281,7 @@ class PageContent
     /**
      * Remove any page include tags within the given HTML.
      */
-    protected function blankPageIncludes(string $html) : string
+    protected function blankPageIncludes(string $html): string
     {
         return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html);
     }
@@ -300,7 +289,7 @@ class PageContent
     /**
      * Parse any include tags "{{@<page_id>#section}}" to be part of the page.
      */
-    protected function parsePageIncludes(string $html) : string
+    protected function parsePageIncludes(string $html): string
     {
         $matches = [];
         preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches);
@@ -343,9 +332,7 @@ class PageContent
     protected function fetchSectionOfPage(Page $page, string $sectionId): string
     {
         $topLevelTags = ['table', 'ul', 'ol'];
-        $doc = new DOMDocument();
-        libxml_use_internal_errors(true);
-        $doc->loadHTML(mb_convert_encoding('<body>'.$page->html.'</body>', 'HTML-ENTITIES', 'UTF-8'));
+        $doc = $this->loadDocumentFromHtml('<body>' . $page->html . '</body>');
 
         // Search included content for the id given and blank out if not exists.
         $matchingElem = $doc->getElementById($sectionId);
@@ -368,4 +355,15 @@ class PageContent
 
         return $innerContent;
     }
+
+    /**
+     * Create and load a DOMDocument from the given html content.
+     */
+    protected function loadDocumentFromHtml(string $html): DOMDocument
+    {
+        libxml_use_internal_errors(true);
+        $doc = new DOMDocument();
+        $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
+        return $doc;
+    }
 }
index a6db0583380e610626cef1794e8e5d8a2e501ea1..fd4a16effeb56dce464f9ebeef2a427eb6ecce3e 100644 (file)
@@ -60,6 +60,8 @@ class PageApiController extends ApiController
      *
      * Any HTML content provided should be kept to a single-block depth of plain HTML
      * elements to remain compatible with the BookStack front-end and editors.
+     * Any images included via base64 data URIs will be extracted and saved as gallery
+     * images against the page during upload.
      */
     public function create(Request $request)
     {
index e6f7668241dafa4dcfad7a0b852bfc6d214904c8..ef249c58b02b59710133d25e71d871b4ff255b9c 100644 (file)
@@ -130,6 +130,17 @@ class ImageRepo
         return $image;
     }
 
+    /**
+     * Save a new image from an existing image data string.
+     * @throws ImageUploadException
+     */
+    public function saveNewFromData(string $imageName, string $imageData, string $type, int $uploadedTo = 0)
+    {
+        $image = $this->imageService->saveNew($imageName, $imageData, $type, $uploadedTo);
+        $this->loadThumbs($image);
+        return $image;
+    }
+
     /**
      * Save a drawing the the database.
      * @throws ImageUploadException
index 6d5200794bc79354bd8243d80711867323f9f0c6..670557b0c8f841f67c0ca82281e0cc92ae45e431 100644 (file)
@@ -3,9 +3,13 @@
 use BookStack\Entities\Tools\PageContent;
 use BookStack\Entities\Models\Page;
 use Tests\TestCase;
+use Tests\Uploads\UsesImages;
 
 class PageContentTest extends TestCase
 {
+    use UsesImages;
+
+    protected $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k=';
 
     public function test_page_includes()
     {
@@ -479,4 +483,64 @@ class PageContentTest extends TestCase
         $pageView = $this->get($page->getUrl());
         $pageView->assertElementExists('.page-content p > s');
     }
+
+    public function test_base64_images_get_extracted_from_page_content()
+    {
+        $this->asEditor();
+        $page = Page::query()->first();
+
+        $this->put($page->getUrl(), [
+            'name' => $page->name, 'summary' => '',
+            'html' => '<p>test<img src="data:image/jpeg;base64,'.$this->base64Jpeg.'"/></p>',
+        ]);
+
+        $page->refresh();
+        $this->assertStringMatchesFormat('%A<p%A>test<img src="/uploads/images/gallery/%A.jpeg">%A</p>%A', $page->html);
+
+        $matches = [];
+        preg_match('/src="(.*?)"/', $page->html, $matches);
+        $imagePath = $matches[1];
+        $imageFile = public_path($imagePath);
+        $this->assertEquals(base64_decode($this->base64Jpeg), file_get_contents($imageFile));
+
+        $this->deleteImage($imagePath);
+    }
+
+    public function test_base64_images_get_extracted_when_containing_whitespace()
+    {
+        $this->asEditor();
+        $page = Page::query()->first();
+
+        $base64PngWithWhitespace = "iVBORw0KGg\noAAAANSUhE\tUgAAAAEAAAA BCA   YAAAAfFcSJAAA\n\t ACklEQVR4nGMAAQAABQAB";
+        $base64PngWithoutWhitespace = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQAB';
+        $this->put($page->getUrl(), [
+            'name' => $page->name, 'summary' => '',
+            'html' => '<p>test<img src="data:image/png;base64,'.$base64PngWithWhitespace.'"/></p>',
+        ]);
+
+        $page->refresh();
+        $this->assertStringMatchesFormat('%A<p%A>test<img src="/uploads/images/gallery/%A.png">%A</p>%A', $page->html);
+
+        $matches = [];
+        preg_match('/src="(.*?)"/', $page->html, $matches);
+        $imagePath = $matches[1];
+        $imageFile = public_path($imagePath);
+        $this->assertEquals(base64_decode($base64PngWithoutWhitespace), file_get_contents($imageFile));
+
+        $this->deleteImage($imagePath);
+    }
+
+    public function test_base64_images_blanked_if_not_supported_extension_for_extract()
+    {
+        $this->asEditor();
+        $page = Page::query()->first();
+
+        $this->put($page->getUrl(), [
+            'name' => $page->name, 'summary' => '',
+            'html' => '<p>test<img src="data:image/jiff;base64,'.$this->base64Jpeg.'"/></p>',
+        ]);
+
+        $page->refresh();
+        $this->assertStringContainsString('<img src=""', $page->html);
+    }
 }