]> BookStack Code Mirror - bookstack/commitdiff
Added base64 image extraction to markdown page content
authorDan Brown <redacted>
Mon, 18 Oct 2021 10:42:50 +0000 (11:42 +0100)
committerDan Brown <redacted>
Mon, 18 Oct 2021 10:42:50 +0000 (11:42 +0100)
- Included tests to cover.
- Manually tested via API update and interface page update.

Closes #2898

app/Entities/Models/Page.php
app/Entities/Tools/PageContent.php
app/Uploads/ImageRepo.php
tests/Entity/PageContentTest.php

index b8467c38cf58ce6cc8334b287959824bf8f5fd16..601e9630db07eec6fef204c7a6b9fac7f52c7e48 100644 (file)
@@ -28,7 +28,7 @@ class Page extends BookChild
     public static $listAttributes = ['name', 'id', 'slug', 'book_id', 'chapter_id', 'draft', 'template', 'text', 'created_at', 'updated_at', 'priority'];
     public static $contentAttributes = ['name', 'id', 'slug', 'book_id', 'chapter_id', 'draft', 'template', 'html', 'text', 'created_at', 'updated_at', 'priority'];
 
-    protected $fillable = ['name', 'priority', 'markdown'];
+    protected $fillable = ['name', 'priority'];
 
     public $textField = 'text';
 
index 661c554da4809c799d38f2aba8321756ca992019..9f4ac2893f7fe0857acbf3ea476035f39b887cec 100644 (file)
@@ -37,7 +37,7 @@ class PageContent
      */
     public function setNewHTML(string $html)
     {
-        $html = $this->extractBase64Images($this->page, $html);
+        $html = $this->extractBase64ImagesFromHtml($html);
         $this->page->html = $this->formatHtml($html);
         $this->page->text = $this->toPlainText();
         $this->page->markdown = '';
@@ -48,6 +48,7 @@ class PageContent
      */
     public function setNewMarkdown(string $markdown)
     {
+        $markdown = $this->extractBase64ImagesFromMarkdown($markdown);
         $this->page->markdown = $markdown;
         $html = $this->markdownToHtml($markdown);
         $this->page->html = $this->formatHtml($html);
@@ -74,7 +75,7 @@ class PageContent
     /**
      * Convert all base64 image data to saved images.
      */
-    public function extractBase64Images(Page $page, string $htmlText): string
+    protected function extractBase64ImagesFromHtml(string $htmlText): string
     {
         if (empty($htmlText) || strpos($htmlText, 'data:image') === false) {
             return $htmlText;
@@ -86,7 +87,6 @@ class PageContent
         $childNodes = $body->childNodes;
         $xPath = new DOMXPath($doc);
         $imageRepo = app()->make(ImageRepo::class);
-        $allowedExtensions = ['jpg', 'jpeg', 'png', 'gif', 'webp'];
 
         // Get all img elements with image data blobs
         $imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]');
@@ -96,7 +96,7 @@ class PageContent
             $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? 'png');
 
             // Validate extension
-            if (!in_array($extension, $allowedExtensions)) {
+            if (!$imageRepo->imageExtensionSupported($extension)) {
                 $imageNode->setAttribute('src', '');
                 continue;
             }
@@ -105,7 +105,7 @@ class PageContent
             $imageName = 'embedded-image-' . Str::random(8) . '.' . $extension;
 
             try {
-                $image = $imageRepo->saveNewFromData($imageName, base64_decode($base64ImageData), 'gallery', $page->id);
+                $image = $imageRepo->saveNewFromData($imageName, base64_decode($base64ImageData), 'gallery', $this->page->id);
                 $imageNode->setAttribute('src', $image->url);
             } catch (ImageUploadException $exception) {
                 $imageNode->setAttribute('src', '');
@@ -121,6 +121,39 @@ class PageContent
         return $html;
     }
 
+    /**
+     * Convert all inline base64 content to uploaded image files.
+     */
+    protected function extractBase64ImagesFromMarkdown(string $markdown)
+    {
+        $imageRepo = app()->make(ImageRepo::class);
+        $matches = [];
+        preg_match_all('/!\[.*?]\(.*?(data:image\/.*?)[)"\s]/', $markdown, $matches);
+
+        foreach ($matches[1] as $base64Match) {
+            [$dataDefinition, $base64ImageData] = explode(',', $base64Match, 2);
+            $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? 'png');
+
+            // Validate extension
+            if (!$imageRepo->imageExtensionSupported($extension)) {
+                $markdown = str_replace($base64Match, '', $markdown);
+                continue;
+            }
+
+            // Save image from data with a random name
+            $imageName = 'embedded-image-' . Str::random(8) . '.' . $extension;
+
+            try {
+                $image = $imageRepo->saveNewFromData($imageName, base64_decode($base64ImageData), 'gallery', $this->page->id);
+                $markdown = str_replace($base64Match, $image->url, $markdown);
+            } catch (ImageUploadException $exception) {
+                $markdown = str_replace($base64Match, '', $markdown);
+            }
+        }
+
+        return $markdown;
+    }
+
     /**
      * Formats a page's html to be tagged correctly within the system.
      */
index 11507856140a0a7c330991cb8c488a1167257170..c4205e35740d0fce777ac8fd0c358b241dedfd49 100644 (file)
@@ -16,6 +16,8 @@ class ImageRepo
     protected $restrictionService;
     protected $page;
 
+    protected static $supportedExtensions = ['jpg', 'jpeg', 'png', 'gif', 'webp'];
+
     /**
      * ImageRepo constructor.
      */
@@ -31,6 +33,14 @@ class ImageRepo
         $this->page = $page;
     }
 
+    /**
+     * Check if the given image extension is supported by BookStack.
+     */
+    public function imageExtensionSupported(string $extension): bool
+    {
+        return in_array(trim($extension, '. \t\n\r\0\x0B'), static::$supportedExtensions);
+    }
+
     /**
      * Get an image with the given id.
      */
index 45c27c9f9545cb4cec5b7a919a8e56d8df5a1ec4..60fa6fd7760d24f6d9eebeed77e4ff3bf759c4a1 100644 (file)
@@ -594,7 +594,7 @@ class PageContentTest extends TestCase
         $this->deleteImage($imagePath);
     }
 
-    public function test_base64_images_blanked_if_not_supported_extension_for_extract()
+    public function test_base64_images_within_html_blanked_if_not_supported_extension_for_extract()
     {
         $this->asEditor();
         $page = Page::query()->first();
@@ -607,4 +607,40 @@ class PageContentTest extends TestCase
         $page->refresh();
         $this->assertStringContainsString('<img src=""', $page->html);
     }
+
+    public function test_base64_images_get_extracted_from_markdown_page_content()
+    {
+        $this->asEditor();
+        $page = Page::query()->first();
+
+        $this->put($page->getUrl(), [
+            'name' => $page->name, 'summary' => '',
+            'markdown' => 'test ![test](data:image/jpeg;base64,' . $this->base64Jpeg . ')',
+        ]);
+
+        $page->refresh();
+        $this->assertStringMatchesFormat('%A<p%A>test <img src="http://localhost/uploads/images/gallery/%A.jpeg" alt="test">%A</p>%A', $page->html);
+
+        $matches = [];
+        preg_match('/src="https:\/\/p.rizon.top:443\/http\/localhost(.*?)"/', $page->html, $matches);
+        $imagePath = $matches[1];
+        $imageFile = public_path($imagePath);
+        $this->assertEquals(base64_decode($this->base64Jpeg), file_get_contents($imageFile));
+
+        $this->deleteImage($imagePath);
+    }
+
+    public function test_base64_images_within_markdown_blanked_if_not_supported_extension_for_extract()
+    {
+        $this->asEditor();
+        $page = Page::query()->first();
+
+        $this->put($page->getUrl(), [
+            'name' => $page->name, 'summary' => '',
+            'markdown' => 'test ![test](data:image/jiff;base64,' . $this->base64Jpeg . ')',
+        ]);
+
+        $page->refresh();
+        $this->assertStringContainsString('<img src=""', $page->html);
+    }
 }