]> BookStack Code Mirror - bookstack/blob - app/Entities/ExportService.php
d07c093f1e32f5a1dbd9e41c8b97036be5556fb4
[bookstack] / app / Entities / ExportService.php
1 <?php namespace BookStack\Entities;
2
3 use BookStack\Entities\Repos\EntityRepo;
4 use BookStack\Uploads\ImageService;
5
6 class ExportService
7 {
8
9     const VIDEO_REGEX = "/\<video.*?\>\<source.*?\ src\=(\")(.*?)(\").*?><\/video>/";
10     const YOUTUBE_REGEX = "/\<iframe.*src\=(\'|\")(\/\/www\.youtube\.com.*?)(\'|\").*?><\/iframe>/";
11     const VIMEO_REGEX = "/\<iframe.*src\=(\'|\")(\/\/player\.vimeo\.com.*?)(\'|\").*?><\/iframe>/";
12     const GOOGLE_MAP_REGEX = "/\<iframe.*src\=(\'|\")(\/\/maps\.google\.com.*?)(\'|\").*?><\/iframe>/";
13     const DAILYMOTION_REGEX = "/\<iframe.*src\=(\'|\")(\/\/www\.dailymotion\.com.*?)(\'|\").*?><\/iframe>/";
14
15     protected $entityRepo;
16     protected $imageService;
17
18     /**
19      * ExportService constructor.
20      * @param EntityRepo $entityRepo
21      * @param ImageService $imageService
22      */
23     public function __construct(EntityRepo $entityRepo, ImageService $imageService)
24     {
25         $this->entityRepo = $entityRepo;
26         $this->imageService = $imageService;
27     }
28
29     /**
30      * Convert a page to a self-contained HTML file.
31      * Includes required CSS & image content. Images are base64 encoded into the HTML.
32      * @param \BookStack\Entities\Page $page
33      * @return mixed|string
34      * @throws \Throwable
35      */
36     public function pageToContainedHtml(Page $page)
37     {
38         $this->entityRepo->renderPage($page);
39         $pageHtml = view('pages/export', [
40             'page' => $page
41         ])->render();
42         return $this->containHtml($pageHtml);
43     }
44
45     /**
46      * Convert a chapter to a self-contained HTML file.
47      * @param \BookStack\Entities\Chapter $chapter
48      * @return mixed|string
49      * @throws \Throwable
50      */
51     public function chapterToContainedHtml(Chapter $chapter)
52     {
53         $pages = $this->entityRepo->getChapterChildren($chapter);
54         $pages->each(function ($page) {
55             $page->html = $this->entityRepo->renderPage($page);
56         });
57         $html = view('chapters/export', [
58             'chapter' => $chapter,
59             'pages' => $pages
60         ])->render();
61         return $this->containHtml($html);
62     }
63
64     /**
65      * Convert a book to a self-contained HTML file.
66      * @param Book $book
67      * @return mixed|string
68      * @throws \Throwable
69      */
70     public function bookToContainedHtml(Book $book)
71     {
72         $bookTree = $this->entityRepo->getBookChildren($book, true, true);
73         $html = view('books/export', [
74             'book' => $book,
75             'bookChildren' => $bookTree
76         ])->render();
77         return $this->containHtml($html);
78     }
79
80     /**
81      * Convert a page to a PDF file.
82      * @param Page $page
83      * @return mixed|string
84      * @throws \Throwable
85      */
86     public function pageToPdf(Page $page)
87     {
88         $this->entityRepo->renderPage($page);
89         $html = view('pages/pdf', [
90             'page' => $page
91         ])->render();
92         return $this->htmlToPdf($html);
93     }
94
95     /**
96      * Convert a chapter to a PDF file.
97      * @param \BookStack\Entities\Chapter $chapter
98      * @return mixed|string
99      * @throws \Throwable
100      */
101     public function chapterToPdf(Chapter $chapter)
102     {
103         $pages = $this->entityRepo->getChapterChildren($chapter);
104         $pages->each(function ($page) {
105             $page->html = $this->entityRepo->renderPage($page);
106         });
107         $html = view('chapters/export', [
108             'chapter' => $chapter,
109             'pages' => $pages
110         ])->render();
111         return $this->htmlToPdf($html);
112     }
113
114     /**
115      * Convert a book to a PDF file
116      * @param \BookStack\Entities\Book $book
117      * @return string
118      * @throws \Throwable
119      */
120     public function bookToPdf(Book $book)
121     {
122         $bookTree = $this->entityRepo->getBookChildren($book, true, true);
123         $html = view('books/export', [
124             'book' => $book,
125             'bookChildren' => $bookTree
126         ])->render();
127         return $this->htmlToPdf($html);
128     }
129
130     /**
131      * Convert normal webpage HTML to a PDF.
132      * @param $html
133      * @return string
134      * @throws \Exception
135      */
136     protected function htmlToPdf($html)
137     {
138         $containedHtml = $this->containHtml($html, true);
139         $useWKHTML = config('snappy.pdf.binary') !== false;
140         if ($useWKHTML) {
141             $pdf = \SnappyPDF::loadHTML($containedHtml);
142             $pdf->setOption('print-media-type', true);
143         } else {
144             $pdf = \DomPDF::loadHTML($containedHtml);
145         }
146         return $pdf->output();
147     }
148
149     /**
150      * Bundle of the contents of a html file to be self-contained.
151      * @param $htmlContent
152      * @param bool $isPDF
153      * @return mixed|string
154      * @throws \Illuminate\Contracts\Filesystem\FileNotFoundException
155      */
156     protected function containHtml($htmlContent, $isPDF = false)
157     {
158         $imageTagsOutput = [];
159         preg_match_all("/\<img.*src\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $imageTagsOutput);
160
161         // Replace image src with base64 encoded image strings
162         if (isset($imageTagsOutput[0]) && count($imageTagsOutput[0]) > 0) {
163             foreach ($imageTagsOutput[0] as $index => $imgMatch) {
164                 $oldImgTagString = $imgMatch;
165                 $srcString = $imageTagsOutput[2][$index];
166                 $imageEncoded = $this->imageService->imageUriToBase64($srcString);
167                 if ($imageEncoded === null) {
168                     $imageEncoded = $srcString;
169                 }
170                 $newImgTagString = str_replace($srcString, $imageEncoded, $oldImgTagString);
171                 $htmlContent = str_replace($oldImgTagString, $newImgTagString, $htmlContent);
172             }
173         }
174
175         $linksOutput = [];
176         preg_match_all("/\<a.*href\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $linksOutput);
177
178         // Replace image src with base64 encoded image strings
179         if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) {
180             foreach ($linksOutput[0] as $index => $linkMatch) {
181                 $oldLinkString = $linkMatch;
182                 $srcString = $linksOutput[2][$index];
183                 if (strpos(trim($srcString), 'http') !== 0) {
184                     $newSrcString = url($srcString);
185                     $newLinkString = str_replace($srcString, $newSrcString, $oldLinkString);
186                     $htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent);
187                 }
188             }
189         }
190
191         // Replace problems caused by TinyMCE removing the protocol for YouTube, Google Maps, DailyMotion and Vimeo
192         if ($isPDF) {
193             $callback = [$this, 'replaceContentPDF'];
194             $htmlContent = $this->replaceLinkedTags(self::VIDEO_REGEX, $htmlContent, $callback, 'Video');
195         } else {
196             $callback = [$this, 'replaceContentHtml'];
197         }
198         $htmlContent = $this->replaceLinkedTags(self::YOUTUBE_REGEX, $htmlContent, $callback, 'Video');
199         $htmlContent = $this->replaceLinkedTags(self::GOOGLE_MAP_REGEX, $htmlContent, $callback, 'Map');
200         $htmlContent = $this->replaceLinkedTags(self::DAILYMOTION_REGEX, $htmlContent, $callback, 'Video');
201         $htmlContent = $this->replaceLinkedTags(self::VIMEO_REGEX, $htmlContent, $callback, 'Video');
202
203         return $htmlContent;
204     }
205
206     /**
207      * Converts the page contents into simple plain text.
208      * This method filters any bad looking content to provide a nice final output.
209      * @param Page $page
210      * @return mixed
211      */
212     public function pageToPlainText(Page $page)
213     {
214         $html = $this->entityRepo->renderPage($page);
215
216         $callback = [$this, 'replaceContentText'];
217         // Replace video tag in PDF
218         $html = $this->replaceLinkedTags(self::VIDEO_REGEX, $html, $callback, 'Video');
219         // Replace problems caused by TinyMCE removing the protocol for YouTube, Google Maps, DailyMotion and Vimeo
220         $html = $this->replaceLinkedTags(self::YOUTUBE_REGEX, $html, $callback, 'Video');
221         $html = $this->replaceLinkedTags(self::GOOGLE_MAP_REGEX, $html, $callback, 'Map');
222         $html = $this->replaceLinkedTags(self::DAILYMOTION_REGEX, $html, $callback, 'Video');
223         $html = $this->replaceLinkedTags(self::VIMEO_REGEX, $html, $callback, 'Video');
224
225         $text = strip_tags($html);
226         // Replace multiple spaces with single spaces
227         $text = preg_replace('/\ {2,}/', ' ', $text);
228         // Reduce multiple horrid whitespace characters.
229         $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text);
230         $text = html_entity_decode($text);
231         // Add title
232         $text = $page->name . "\n\n" . $text;
233         return $text;
234     }
235
236     /**
237      * Convert a chapter into a plain text string.
238      * @param \BookStack\Entities\Chapter $chapter
239      * @return string
240      */
241     public function chapterToPlainText(Chapter $chapter)
242     {
243         $text = $chapter->name . "\n\n";
244         $text .= $chapter->description . "\n\n";
245         foreach ($chapter->pages as $page) {
246             $text .= $this->pageToPlainText($page);
247         }
248         return $text;
249     }
250
251     /**
252      * Convert a book into a plain text string.
253      * @param Book $book
254      * @return string
255      */
256     public function bookToPlainText(Book $book)
257     {
258         $bookTree = $this->entityRepo->getBookChildren($book, true, true);
259         $text = $book->name . "\n\n";
260         foreach ($bookTree as $bookChild) {
261             if ($bookChild->isA('chapter')) {
262                 $text .= $this->chapterToPlainText($bookChild);
263             } else {
264                 $text .= $this->pageToPlainText($bookChild);
265             }
266         }
267         return $text;
268     }
269
270     /**
271      * Can be used to replace certain tags that cause problems such as the TinyMCE video tag
272      * modification that have to be undone.
273      * See - https://github.com/tinymce/tinymce/blob/0f7a0f12667bde6eae9377b50b797f4479aa1ac7/src/plugins/media/main/ts/core/UrlPatterns.ts#L22
274      * @param String $regex
275      * @param String $htmlContent
276      * @param array $callback
277      * @param String $contentLabel
278      * @return String $htmlContent - Modified html content
279      */
280     protected function replaceLinkedTags($regex, $htmlContent, $callback, $contentLabel = '') {
281         $iframeOutput = [];
282         preg_match_all($regex, $htmlContent, $iframeOutput);
283         if (isset($iframeOutput[0]) && count($iframeOutput[0]) > 0) {
284             foreach ($iframeOutput[0] as $index => $iframeMatch) {
285                 $htmlContent = call_user_func($callback, $htmlContent, $iframeOutput, $index, $contentLabel);
286             }
287         }
288         return $htmlContent;
289     }
290
291     protected function replaceContentHtml($htmlContent, $iframeOutput, $index, $contentLabel) {
292         $srcString = $iframeOutput[2][$index];
293         $newSrcString = $srcString;
294         if (strpos($srcString, 'http') !== 0) {
295             $newSrcString = 'https:' . $srcString;
296         }
297         $htmlContent = str_replace($srcString, $newSrcString, $htmlContent);
298         return $htmlContent;
299     }
300
301     protected function replaceContentPDF($htmlContent, $iframeOutput, $index, $contentLabel) {
302         $srcString = $iframeOutput[2][$index];
303         $newSrcString = $srcString;
304         if (strpos($srcString, 'http') !== 0) {
305             $newSrcString = 'https:' . $srcString;
306         }
307         $finalHtmlString = "$contentLabel: <a href='$newSrcString'>$newSrcString</a>";
308         $htmlContent = str_replace($iframeOutput[0][$index], $finalHtmlString, $htmlContent);
309         return $htmlContent;
310     }
311
312     protected function replaceContentText($htmlContent, $iframeOutput, $index, $contentLabel) {
313         $srcString = $iframeOutput[2][$index];
314         $newSrcString = $srcString;
315         if (strpos($srcString, 'http') !== 0) {
316             $newSrcString = 'https:' . $srcString;
317         }
318         $finalHtmlString = "$contentLabel: $newSrcString";
319         $htmlContent = str_replace($iframeOutput[0][$index], $finalHtmlString, $htmlContent);
320         return $htmlContent;
321     }
322 }