]> BookStack Code Mirror - bookstack/blob - app/Entities/Tools/ExportFormatter.php
Added a "skip to content" link.
[bookstack] / app / Entities / Tools / ExportFormatter.php
1 <?php namespace BookStack\Entities\Tools;
2
3 use BookStack\Entities\Models\Book;
4 use BookStack\Entities\Models\Chapter;
5 use BookStack\Entities\Models\Page;
6 use BookStack\Uploads\ImageService;
7 use DomPDF;
8 use Exception;
9 use SnappyPDF;
10 use Throwable;
11
12 class ExportFormatter
13 {
14
15     protected $imageService;
16
17     /**
18      * ExportService constructor.
19      */
20     public function __construct(ImageService $imageService)
21     {
22         $this->imageService = $imageService;
23     }
24
25     /**
26      * Convert a page to a self-contained HTML file.
27      * Includes required CSS & image content. Images are base64 encoded into the HTML.
28      * @throws Throwable
29      */
30     public function pageToContainedHtml(Page $page)
31     {
32         $page->html = (new PageContent($page))->render();
33         $pageHtml = view('pages.export', [
34             'page' => $page,
35             'format' => 'html',
36         ])->render();
37         return $this->containHtml($pageHtml);
38     }
39
40     /**
41      * Convert a chapter to a self-contained HTML file.
42      * @throws Throwable
43      */
44     public function chapterToContainedHtml(Chapter $chapter)
45     {
46         $pages = $chapter->getVisiblePages();
47         $pages->each(function ($page) {
48             $page->html = (new PageContent($page))->render();
49         });
50         $html = view('chapters.export', [
51             'chapter' => $chapter,
52             'pages' => $pages,
53             'format' => 'html',
54         ])->render();
55         return $this->containHtml($html);
56     }
57
58     /**
59      * Convert a book to a self-contained HTML file.
60      * @throws Throwable
61      */
62     public function bookToContainedHtml(Book $book)
63     {
64         $bookTree = (new BookContents($book))->getTree(false, true);
65         $html = view('books.export', [
66             'book' => $book,
67             'bookChildren' => $bookTree,
68             'format' => 'html',
69         ])->render();
70         return $this->containHtml($html);
71     }
72
73     /**
74      * Convert a page to a PDF file.
75      * @throws Throwable
76      */
77     public function pageToPdf(Page $page)
78     {
79         $page->html = (new PageContent($page))->render();
80         $html = view('pages.export', [
81             'page' => $page,
82             'format' => 'pdf',
83         ])->render();
84         return $this->htmlToPdf($html);
85     }
86
87     /**
88      * Convert a chapter to a PDF file.
89      * @throws Throwable
90      */
91     public function chapterToPdf(Chapter $chapter)
92     {
93         $pages = $chapter->getVisiblePages();
94         $pages->each(function ($page) {
95             $page->html = (new PageContent($page))->render();
96         });
97
98         $html = view('chapters.export', [
99             'chapter' => $chapter,
100             'pages' => $pages,
101             'format' => 'pdf',
102         ])->render();
103
104         return $this->htmlToPdf($html);
105     }
106
107     /**
108      * Convert a book to a PDF file.
109      * @throws Throwable
110      */
111     public function bookToPdf(Book $book)
112     {
113         $bookTree = (new BookContents($book))->getTree(false, true);
114         $html = view('books.export', [
115             'book' => $book,
116             'bookChildren' => $bookTree,
117             'format' => 'pdf',
118         ])->render();
119         return $this->htmlToPdf($html);
120     }
121
122     /**
123      * Convert normal web-page HTML to a PDF.
124      * @throws Exception
125      */
126     protected function htmlToPdf(string $html): string
127     {
128         $containedHtml = $this->containHtml($html);
129         $useWKHTML = config('snappy.pdf.binary') !== false;
130         if ($useWKHTML) {
131             $pdf = SnappyPDF::loadHTML($containedHtml);
132             $pdf->setOption('print-media-type', true);
133         } else {
134             $pdf = DomPDF::loadHTML($containedHtml);
135         }
136         return $pdf->output();
137     }
138
139     /**
140      * Bundle of the contents of a html file to be self-contained.
141      * @throws Exception
142      */
143     protected function containHtml(string $htmlContent): string
144     {
145         $imageTagsOutput = [];
146         preg_match_all("/\<img.*?src\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $imageTagsOutput);
147
148         // Replace image src with base64 encoded image strings
149         if (isset($imageTagsOutput[0]) && count($imageTagsOutput[0]) > 0) {
150             foreach ($imageTagsOutput[0] as $index => $imgMatch) {
151                 $oldImgTagString = $imgMatch;
152                 $srcString = $imageTagsOutput[2][$index];
153                 $imageEncoded = $this->imageService->imageUriToBase64($srcString);
154                 if ($imageEncoded === null) {
155                     $imageEncoded = $srcString;
156                 }
157                 $newImgTagString = str_replace($srcString, $imageEncoded, $oldImgTagString);
158                 $htmlContent = str_replace($oldImgTagString, $newImgTagString, $htmlContent);
159             }
160         }
161
162         $linksOutput = [];
163         preg_match_all("/\<a.*href\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $linksOutput);
164
165         // Replace image src with base64 encoded image strings
166         if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) {
167             foreach ($linksOutput[0] as $index => $linkMatch) {
168                 $oldLinkString = $linkMatch;
169                 $srcString = $linksOutput[2][$index];
170                 if (strpos(trim($srcString), 'http') !== 0) {
171                     $newSrcString = url($srcString);
172                     $newLinkString = str_replace($srcString, $newSrcString, $oldLinkString);
173                     $htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent);
174                 }
175             }
176         }
177
178         // Replace any relative links with system domain
179         return $htmlContent;
180     }
181
182     /**
183      * Converts the page contents into simple plain text.
184      * This method filters any bad looking content to provide a nice final output.
185      */
186     public function pageToPlainText(Page $page): string
187     {
188         $html = (new PageContent($page))->render();
189         $text = strip_tags($html);
190         // Replace multiple spaces with single spaces
191         $text = preg_replace('/\ {2,}/', ' ', $text);
192         // Reduce multiple horrid whitespace characters.
193         $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text);
194         $text = html_entity_decode($text);
195         // Add title
196         $text = $page->name . "\n\n" . $text;
197         return $text;
198     }
199
200     /**
201      * Convert a chapter into a plain text string.
202      */
203     public function chapterToPlainText(Chapter $chapter): string
204     {
205         $text = $chapter->name . "\n\n";
206         $text .= $chapter->description . "\n\n";
207         foreach ($chapter->getVisiblePages() as $page) {
208             $text .= $this->pageToPlainText($page);
209         }
210         return $text;
211     }
212
213     /**
214      * Convert a book into a plain text string.
215      */
216     public function bookToPlainText(Book $book): string
217     {
218         $bookTree = (new BookContents($book))->getTree(false, false);
219         $text = $book->name . "\n\n";
220         foreach ($bookTree as $bookChild) {
221             if ($bookChild->isA('chapter')) {
222                 $text .= $this->chapterToPlainText($bookChild);
223             } else {
224                 $text .= $this->pageToPlainText($bookChild);
225             }
226         }
227         return $text;
228     }
229 }