]> BookStack Code Mirror - bookstack/blobdiff - app/Entities/Tools/ExportFormatter.php
MD Exports: Added HTML description conversion
[bookstack] / app / Entities / Tools / ExportFormatter.php
index b462abec5671666ebb806d953ab1dd34a7c3b558..0af68b8db3cfcd3f4f9957c0a949259a2144593b 100644 (file)
-<?php namespace BookStack\Entities\Tools;
+<?php
+
+namespace BookStack\Entities\Tools;
 
 use BookStack\Entities\Models\Book;
 use BookStack\Entities\Models\Chapter;
 use BookStack\Entities\Models\Page;
+use BookStack\Entities\Tools\Markdown\HtmlToMarkdown;
 use BookStack\Uploads\ImageService;
-use DomPDF;
+use BookStack\Util\CspService;
+use BookStack\Util\HtmlDocument;
+use DOMElement;
 use Exception;
-use SnappyPDF;
-use League\HTMLToMarkdown\HtmlConverter;
 use Throwable;
-use ZipArchive;
 
 class ExportFormatter
 {
-
-    protected $imageService;
-
-    /**
-     * ExportService constructor.
-     */
-    public function __construct(ImageService $imageService)
-    {
-        $this->imageService = $imageService;
+    public function __construct(
+        protected ImageService $imageService,
+        protected PdfGenerator $pdfGenerator,
+        protected CspService $cspService
+    ) {
     }
 
     /**
      * Convert a page to a self-contained HTML file.
      * Includes required CSS & image content. Images are base64 encoded into the HTML.
+     *
      * @throws Throwable
      */
-    public function pageToContainedHtml(Page $page)
+    public function pageToContainedHtml(Page $page): string
     {
         $page->html = (new PageContent($page))->render();
-        $pageHtml = view('pages.export', [
-            'page' => $page,
-            'format' => 'html',
+        $pageHtml = view('exports.page', [
+            'page'       => $page,
+            'format'     => 'html',
+            'cspContent' => $this->cspService->getCspMetaTagValue(),
+            'locale'     => user()->getLocale(),
         ])->render();
+
         return $this->containHtml($pageHtml);
     }
 
     /**
      * Convert a chapter to a self-contained HTML file.
+     *
      * @throws Throwable
      */
-    public function chapterToContainedHtml(Chapter $chapter)
+    public function chapterToContainedHtml(Chapter $chapter): string
     {
         $pages = $chapter->getVisiblePages();
         $pages->each(function ($page) {
             $page->html = (new PageContent($page))->render();
         });
-        $html = view('chapters.export', [
-            'chapter' => $chapter,
-            'pages' => $pages,
-            'format' => 'html',
+        $html = view('exports.chapter', [
+            'chapter'    => $chapter,
+            'pages'      => $pages,
+            'format'     => 'html',
+            'cspContent' => $this->cspService->getCspMetaTagValue(),
+            'locale'     => user()->getLocale(),
         ])->render();
+
         return $this->containHtml($html);
     }
 
     /**
      * Convert a book to a self-contained HTML file.
+     *
      * @throws Throwable
      */
-    public function bookToContainedHtml(Book $book)
+    public function bookToContainedHtml(Book $book): string
     {
         $bookTree = (new BookContents($book))->getTree(false, true);
-        $html = view('books.export', [
-            'book' => $book,
+        $html = view('exports.book', [
+            'book'         => $book,
             'bookChildren' => $bookTree,
-            'format' => 'html',
+            'format'       => 'html',
+            'cspContent'   => $this->cspService->getCspMetaTagValue(),
+            'locale'       => user()->getLocale(),
         ])->render();
+
         return $this->containHtml($html);
     }
 
     /**
      * Convert a page to a PDF file.
+     *
      * @throws Throwable
      */
-    public function pageToPdf(Page $page)
+    public function pageToPdf(Page $page): string
     {
         $page->html = (new PageContent($page))->render();
-        $html = view('pages.export', [
-            'page' => $page,
+        $html = view('exports.page', [
+            'page'   => $page,
             'format' => 'pdf',
+            'engine' => $this->pdfGenerator->getActiveEngine(),
+            'locale' => user()->getLocale(),
         ])->render();
+
         return $this->htmlToPdf($html);
     }
 
     /**
      * Convert a chapter to a PDF file.
+     *
      * @throws Throwable
      */
-    public function chapterToPdf(Chapter $chapter)
+    public function chapterToPdf(Chapter $chapter): string
     {
         $pages = $chapter->getVisiblePages();
         $pages->each(function ($page) {
             $page->html = (new PageContent($page))->render();
         });
 
-        $html = view('chapters.export', [
+        $html = view('exports.chapter', [
             'chapter' => $chapter,
-            'pages' => $pages,
-            'format' => 'pdf',
+            'pages'   => $pages,
+            'format'  => 'pdf',
+            'engine'  => $this->pdfGenerator->getActiveEngine(),
+            'locale'  => user()->getLocale(),
         ])->render();
 
         return $this->htmlToPdf($html);
@@ -108,38 +125,79 @@ class ExportFormatter
 
     /**
      * Convert a book to a PDF file.
+     *
      * @throws Throwable
      */
-    public function bookToPdf(Book $book)
+    public function bookToPdf(Book $book): string
     {
         $bookTree = (new BookContents($book))->getTree(false, true);
-        $html = view('books.export', [
-            'book' => $book,
+        $html = view('exports.book', [
+            'book'         => $book,
             'bookChildren' => $bookTree,
-            'format' => 'pdf',
+            'format'       => 'pdf',
+            'engine'       => $this->pdfGenerator->getActiveEngine(),
+            'locale'       => user()->getLocale(),
         ])->render();
+
         return $this->htmlToPdf($html);
     }
 
     /**
      * Convert normal web-page HTML to a PDF.
+     *
      * @throws Exception
      */
     protected function htmlToPdf(string $html): string
     {
-        $containedHtml = $this->containHtml($html);
-        $useWKHTML = config('snappy.pdf.binary') !== false;
-        if ($useWKHTML) {
-            $pdf = SnappyPDF::loadHTML($containedHtml);
-            $pdf->setOption('print-media-type', true);
-        } else {
-            $pdf = DomPDF::loadHTML($containedHtml);
+        $html = $this->containHtml($html);
+        $doc = new HtmlDocument();
+        $doc->loadCompleteHtml($html);
+
+        $this->replaceIframesWithLinks($doc);
+        $this->openDetailElements($doc);
+        $cleanedHtml = $doc->getHtml();
+
+        return $this->pdfGenerator->fromHtml($cleanedHtml);
+    }
+
+    /**
+     * Within the given HTML content, Open any detail blocks.
+     */
+    protected function openDetailElements(HtmlDocument $doc): void
+    {
+        $details = $doc->queryXPath('//details');
+        /** @var DOMElement $detail */
+        foreach ($details as $detail) {
+            $detail->setAttribute('open', 'open');
+        }
+    }
+
+    /**
+     * Within the given HTML document, replace any iframe elements
+     * with anchor links within paragraph blocks.
+     */
+    protected function replaceIframesWithLinks(HtmlDocument $doc): void
+    {
+        $iframes = $doc->queryXPath('//iframe');
+
+        /** @var DOMElement $iframe */
+        foreach ($iframes as $iframe) {
+            $link = $iframe->getAttribute('src');
+            if (str_starts_with($link, '//')) {
+                $link = 'https:' . $link;
+            }
+
+            $anchor = $doc->createElement('a', $link);
+            $anchor->setAttribute('href', $link);
+            $paragraph = $doc->createElement('p');
+            $paragraph->appendChild($anchor);
+            $iframe->parentNode->replaceChild($paragraph, $iframe);
         }
-        return $pdf->output();
     }
 
     /**
      * Bundle of the contents of a html file to be self-contained.
+     *
      * @throws Exception
      */
     protected function containHtml(string $htmlContent): string
@@ -152,7 +210,7 @@ class ExportFormatter
             foreach ($imageTagsOutput[0] as $index => $imgMatch) {
                 $oldImgTagString = $imgMatch;
                 $srcString = $imageTagsOutput[2][$index];
-                $imageEncoded = $this->imageService->imageUriToBase64($srcString);
+                $imageEncoded = $this->imageService->imageUrlToBase64($srcString);
                 if ($imageEncoded === null) {
                     $imageEncoded = $srcString;
                 }
@@ -164,12 +222,12 @@ class ExportFormatter
         $linksOutput = [];
         preg_match_all("/\<a.*href\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $linksOutput);
 
-        // Replace image src with base64 encoded image strings
+        // Update relative links to be absolute, with instance url
         if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) {
             foreach ($linksOutput[0] as $index => $linkMatch) {
                 $oldLinkString = $linkMatch;
                 $srcString = $linksOutput[2][$index];
-                if (strpos(trim($srcString), 'http') !== 0) {
+                if (!str_starts_with(trim($srcString), 'http')) {
                     $newSrcString = url($srcString);
                     $newLinkString = str_replace($srcString, $newSrcString, $oldLinkString);
                     $htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent);
@@ -177,7 +235,6 @@ class ExportFormatter
             }
         }
 
-        // Replace any relative links with system domain
         return $htmlContent;
     }
 
@@ -185,17 +242,21 @@ class ExportFormatter
      * Converts the page contents into simple plain text.
      * This method filters any bad looking content to provide a nice final output.
      */
-    public function pageToPlainText(Page $page): string
+    public function pageToPlainText(Page $page, bool $pageRendered = false, bool $fromParent = false): string
     {
-        $html = (new PageContent($page))->render();
-        $text = strip_tags($html);
+        $html = $pageRendered ? $page->html : (new PageContent($page))->render();
+        // Add proceeding spaces before tags so spaces remain between
+        // text within elements after stripping tags.
+        $html = str_replace('<', " <", $html);
+        $text = trim(strip_tags($html));
         // Replace multiple spaces with single spaces
-        $text = preg_replace('/\ {2,}/', ' ', $text);
+        $text = preg_replace('/ {2,}/', ' ', $text);
         // Reduce multiple horrid whitespace characters.
         $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text);
         $text = html_entity_decode($text);
         // Add title
-        $text = $page->name . "\n\n" . $text;
+        $text = $page->name . ($fromParent ? "\n" : "\n\n") . $text;
+
         return $text;
     }
 
@@ -204,12 +265,15 @@ class ExportFormatter
      */
     public function chapterToPlainText(Chapter $chapter): string
     {
-        $text = $chapter->name . "\n\n";
-        $text .= $chapter->description . "\n\n";
+        $text = $chapter->name . "\n" . $chapter->description;
+        $text = trim($text) . "\n\n";
+
+        $parts = [];
         foreach ($chapter->getVisiblePages() as $page) {
-            $text .= $this->pageToPlainText($page);
+            $parts[] = $this->pageToPlainText($page, false, true);
         }
-        return $text;
+
+        return $text . implode("\n\n", $parts);
     }
 
     /**
@@ -217,44 +281,51 @@ class ExportFormatter
      */
     public function bookToPlainText(Book $book): string
     {
-        $bookTree = (new BookContents($book))->getTree(false, false);
-        $text = $book->name . "\n\n";
+        $bookTree = (new BookContents($book))->getTree(false, true);
+        $text = $book->name . "\n" . $book->description;
+        $text = rtrim($text) . "\n\n";
+
+        $parts = [];
         foreach ($bookTree as $bookChild) {
             if ($bookChild->isA('chapter')) {
-                $text .= $this->chapterToPlainText($bookChild);
+                $parts[] = $this->chapterToPlainText($bookChild);
             } else {
-                $text .= $this->pageToPlainText($bookChild);
+                $parts[] = $this->pageToPlainText($bookChild, true, true);
             }
         }
-        return $text;
+
+        return $text . implode("\n\n", $parts);
     }
 
     /**
      * Convert a page to a Markdown file.
-     * @throws Throwable
      */
-    public function pageToMarkdown(Page $page)
+    public function pageToMarkdown(Page $page): string
     {
-        if (property_exists($page, 'markdown') && $page->markdown != '') {
-            return "# " . $page->name . "\n\n" . $page->markdown;
-        } else {
-            $converter = new HtmlConverter();
-            return "# " . $page->name . "\n\n" . $converter->convert($page->html);
+        if ($page->markdown) {
+            return '# ' . $page->name . "\n\n" . $page->markdown;
         }
+
+        return '# ' . $page->name . "\n\n" . (new HtmlToMarkdown($page->html))->convert();
     }
 
     /**
      * Convert a chapter to a Markdown file.
-     * @throws Throwable
      */
-    public function chapterToMarkdown(Chapter $chapter)
+    public function chapterToMarkdown(Chapter $chapter): string
     {
-        $text = "# " . $chapter->name . "\n\n";
-        $text .= $chapter->description . "\n\n";
+        $text = '# ' . $chapter->name . "\n\n";
+
+        $description = (new HtmlToMarkdown($chapter->descriptionHtml()))->convert();
+        if ($description) {
+            $text .= $description . "\n\n";
+        }
+
         foreach ($chapter->pages as $page) {
-            $text .= $this->pageToMarkdown($page);
+            $text .= $this->pageToMarkdown($page) . "\n\n";
         }
-        return $text;
+
+        return trim($text);
     }
 
     /**
@@ -263,37 +334,21 @@ class ExportFormatter
     public function bookToMarkdown(Book $book): string
     {
         $bookTree = (new BookContents($book))->getTree(false, true);
-        $text = "# " . $book->name . "\n\n";
-        foreach ($bookTree as $bookChild) {
-            if ($bookChild->isA('chapter')) {
-                $text .= $this->chapterToMarkdown($bookChild);
-            } else {
-                $text .= $this->pageToMarkdown($bookChild);
-            }
+        $text = '# ' . $book->name . "\n\n";
+
+        $description = (new HtmlToMarkdown($book->descriptionHtml()))->convert();
+        if ($description) {
+            $text .= $description . "\n\n";
         }
-        return $text;
-    }
 
-    /**
-     * Convert a book into a zip file.
-     */
-    public function bookToZip(Book $book): string
-    {
-        // TODO: Is not unlinking the file a security risk?
-        $z = new ZipArchive();
-        $z->open("book.zip", \ZipArchive::CREATE | \ZipArchive::OVERWRITE);
-        $bookTree = (new BookContents($book))->getTree(false, true);
         foreach ($bookTree as $bookChild) {
-            if ($bookChild->isA('chapter')) {
-                $z->addEmptyDir($bookChild->name);
-                foreach ($bookChild->pages as $page) {
-                    $filename = $bookChild->name . "/" . $page->name . ".md";
-                    $z->addFromString($filename, $this->pageToMarkdown($page));
-                }
+            if ($bookChild instanceof Chapter) {
+                $text .= $this->chapterToMarkdown($bookChild) . "\n\n";
             } else {
-                $z->addFromString($bookChild->name . ".md", $this->pageToMarkdown($bookChild));
+                $text .= $this->pageToMarkdown($bookChild) . "\n\n";
             }
         }
-        return "book.zip";
+
+        return trim($text);
     }
 }