1 <?php namespace BookStack\Entities;
3 use BookStack\Entities\Repos\EntityRepo;
4 use BookStack\Uploads\ImageService;
9 const VIDEO_REGEX = "/\<video.*?\>\<source.*?\ src\=(\")(.*?)(\").*?><\/video>/";
10 const YOUTUBE_REGEX = "/\<iframe.*src\=(\'|\")(\/\/www\.youtube\.com.*?)(\'|\").*?><\/iframe>/";
11 const VIMEO_REGEX = "/\<iframe.*src\=(\'|\")(\/\/player\.vimeo\.com.*?)(\'|\").*?><\/iframe>/";
12 const GOOGLE_MAP_REGEX = "/\<iframe.*src\=(\'|\")(\/\/maps\.google\.com.*?)(\'|\").*?><\/iframe>/";
13 const DAILYMOTION_REGEX = "/\<iframe.*src\=(\'|\")(\/\/www\.dailymotion\.com.*?)(\'|\").*?><\/iframe>/";
15 protected $entityRepo;
16 protected $imageService;
19 * ExportService constructor.
20 * @param EntityRepo $entityRepo
21 * @param ImageService $imageService
23 public function __construct(EntityRepo $entityRepo, ImageService $imageService)
25 $this->entityRepo = $entityRepo;
26 $this->imageService = $imageService;
30 * Convert a page to a self-contained HTML file.
31 * Includes required CSS & image content. Images are base64 encoded into the HTML.
32 * @param \BookStack\Entities\Page $page
33 * @return mixed|string
36 public function pageToContainedHtml(Page $page)
38 $this->entityRepo->renderPage($page);
39 $pageHtml = view('pages/export', [
42 return $this->containHtml($pageHtml);
46 * Convert a chapter to a self-contained HTML file.
47 * @param \BookStack\Entities\Chapter $chapter
48 * @return mixed|string
51 public function chapterToContainedHtml(Chapter $chapter)
53 $pages = $this->entityRepo->getChapterChildren($chapter);
54 $pages->each(function ($page) {
55 $page->html = $this->entityRepo->renderPage($page);
57 $html = view('chapters/export', [
58 'chapter' => $chapter,
61 return $this->containHtml($html);
65 * Convert a book to a self-contained HTML file.
67 * @return mixed|string
70 public function bookToContainedHtml(Book $book)
72 $bookTree = $this->entityRepo->getBookChildren($book, true, true);
73 $html = view('books/export', [
75 'bookChildren' => $bookTree
77 return $this->containHtml($html);
81 * Convert a page to a PDF file.
83 * @return mixed|string
86 public function pageToPdf(Page $page)
88 $this->entityRepo->renderPage($page);
89 $html = view('pages/pdf', [
92 return $this->htmlToPdf($html);
96 * Convert a chapter to a PDF file.
97 * @param \BookStack\Entities\Chapter $chapter
98 * @return mixed|string
101 public function chapterToPdf(Chapter $chapter)
103 $pages = $this->entityRepo->getChapterChildren($chapter);
104 $pages->each(function ($page) {
105 $page->html = $this->entityRepo->renderPage($page);
107 $html = view('chapters/export', [
108 'chapter' => $chapter,
111 return $this->htmlToPdf($html);
115 * Convert a book to a PDF file
116 * @param \BookStack\Entities\Book $book
120 public function bookToPdf(Book $book)
122 $bookTree = $this->entityRepo->getBookChildren($book, true, true);
123 $html = view('books/export', [
125 'bookChildren' => $bookTree
127 return $this->htmlToPdf($html);
131 * Convert normal webpage HTML to a PDF.
136 protected function htmlToPdf($html)
138 $containedHtml = $this->containHtml($html, true);
139 $useWKHTML = config('snappy.pdf.binary') !== false;
141 $pdf = \SnappyPDF::loadHTML($containedHtml);
142 $pdf->setOption('print-media-type', true);
144 $pdf = \DomPDF::loadHTML($containedHtml);
146 return $pdf->output();
150 * Bundle of the contents of a html file to be self-contained.
151 * @param $htmlContent
153 * @return mixed|string
154 * @throws \Illuminate\Contracts\Filesystem\FileNotFoundException
156 protected function containHtml($htmlContent, $isPDF = false)
158 $imageTagsOutput = [];
159 preg_match_all("/\<img.*src\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $imageTagsOutput);
161 // Replace image src with base64 encoded image strings
162 if (isset($imageTagsOutput[0]) && count($imageTagsOutput[0]) > 0) {
163 foreach ($imageTagsOutput[0] as $index => $imgMatch) {
164 $oldImgTagString = $imgMatch;
165 $srcString = $imageTagsOutput[2][$index];
166 $imageEncoded = $this->imageService->imageUriToBase64($srcString);
167 if ($imageEncoded === null) {
168 $imageEncoded = $srcString;
170 $newImgTagString = str_replace($srcString, $imageEncoded, $oldImgTagString);
171 $htmlContent = str_replace($oldImgTagString, $newImgTagString, $htmlContent);
176 preg_match_all("/\<a.*href\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $linksOutput);
178 // Replace image src with base64 encoded image strings
179 if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) {
180 foreach ($linksOutput[0] as $index => $linkMatch) {
181 $oldLinkString = $linkMatch;
182 $srcString = $linksOutput[2][$index];
183 if (strpos(trim($srcString), 'http') !== 0) {
184 $newSrcString = url($srcString);
185 $newLinkString = str_replace($srcString, $newSrcString, $oldLinkString);
186 $htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent);
191 // Replace problems caused by TinyMCE removing the protocol for YouTube, Google Maps, DailyMotion and Vimeo
193 $callback = [$this, 'replaceContentPDF'];
194 $htmlContent = $this->replaceLinkedTags(self::VIDEO_REGEX, $htmlContent, $callback, 'Video');
196 $callback = [$this, 'replaceContentHtml'];
198 $htmlContent = $this->replaceLinkedTags(self::YOUTUBE_REGEX, $htmlContent, $callback, 'Video');
199 $htmlContent = $this->replaceLinkedTags(self::GOOGLE_MAP_REGEX, $htmlContent, $callback, 'Map');
200 $htmlContent = $this->replaceLinkedTags(self::DAILYMOTION_REGEX, $htmlContent, $callback, 'Video');
201 $htmlContent = $this->replaceLinkedTags(self::VIMEO_REGEX, $htmlContent, $callback, 'Video');
207 * Converts the page contents into simple plain text.
208 * This method filters any bad looking content to provide a nice final output.
212 public function pageToPlainText(Page $page)
214 $html = $this->entityRepo->renderPage($page);
216 $callback = [$this, 'replaceContentText'];
217 // Replace video tag in PDF
218 $html = $this->replaceLinkedTags(self::VIDEO_REGEX, $html, $callback, 'Video');
219 // Replace problems caused by TinyMCE removing the protocol for YouTube, Google Maps, DailyMotion and Vimeo
220 $html = $this->replaceLinkedTags(self::YOUTUBE_REGEX, $html, $callback, 'Video');
221 $html = $this->replaceLinkedTags(self::GOOGLE_MAP_REGEX, $html, $callback, 'Map');
222 $html = $this->replaceLinkedTags(self::DAILYMOTION_REGEX, $html, $callback, 'Video');
223 $html = $this->replaceLinkedTags(self::VIMEO_REGEX, $html, $callback, 'Video');
225 $text = strip_tags($html);
226 // Replace multiple spaces with single spaces
227 $text = preg_replace('/\ {2,}/', ' ', $text);
228 // Reduce multiple horrid whitespace characters.
229 $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text);
230 $text = html_entity_decode($text);
232 $text = $page->name . "\n\n" . $text;
237 * Convert a chapter into a plain text string.
238 * @param \BookStack\Entities\Chapter $chapter
241 public function chapterToPlainText(Chapter $chapter)
243 $text = $chapter->name . "\n\n";
244 $text .= $chapter->description . "\n\n";
245 foreach ($chapter->pages as $page) {
246 $text .= $this->pageToPlainText($page);
252 * Convert a book into a plain text string.
256 public function bookToPlainText(Book $book)
258 $bookTree = $this->entityRepo->getBookChildren($book, true, true);
259 $text = $book->name . "\n\n";
260 foreach ($bookTree as $bookChild) {
261 if ($bookChild->isA('chapter')) {
262 $text .= $this->chapterToPlainText($bookChild);
264 $text .= $this->pageToPlainText($bookChild);
271 * Can be used to replace certain tags that cause problems such as the TinyMCE video tag
272 * modification that have to be undone.
273 * See - https://github.com/tinymce/tinymce/blob/0f7a0f12667bde6eae9377b50b797f4479aa1ac7/src/plugins/media/main/ts/core/UrlPatterns.ts#L22
274 * @param String $regex
275 * @param String $htmlContent
276 * @param array $callback
277 * @param String $contentLabel
278 * @return String $htmlContent - Modified html content
280 protected function replaceLinkedTags($regex, $htmlContent, $callback, $contentLabel = '') {
282 preg_match_all($regex, $htmlContent, $iframeOutput);
283 if (isset($iframeOutput[0]) && count($iframeOutput[0]) > 0) {
284 foreach ($iframeOutput[0] as $index => $iframeMatch) {
285 $htmlContent = call_user_func($callback, $htmlContent, $iframeOutput, $index, $contentLabel);
291 protected function replaceContentHtml($htmlContent, $iframeOutput, $index, $contentLabel) {
292 $srcString = $iframeOutput[2][$index];
293 $newSrcString = $srcString;
294 if (strpos($srcString, 'http') !== 0) {
295 $newSrcString = 'https:' . $srcString;
297 $htmlContent = str_replace($srcString, $newSrcString, $htmlContent);
301 protected function replaceContentPDF($htmlContent, $iframeOutput, $index, $contentLabel) {
302 $srcString = $iframeOutput[2][$index];
303 $newSrcString = $srcString;
304 if (strpos($srcString, 'http') !== 0) {
305 $newSrcString = 'https:' . $srcString;
307 $finalHtmlString = "$contentLabel: <a href='$newSrcString'>$newSrcString</a>";
308 $htmlContent = str_replace($iframeOutput[0][$index], $finalHtmlString, $htmlContent);
312 protected function replaceContentText($htmlContent, $iframeOutput, $index, $contentLabel) {
313 $srcString = $iframeOutput[2][$index];
314 $newSrcString = $srcString;
315 if (strpos($srcString, 'http') !== 0) {
316 $newSrcString = 'https:' . $srcString;
318 $finalHtmlString = "$contentLabel: $newSrcString";
319 $htmlContent = str_replace($iframeOutput[0][$index], $finalHtmlString, $htmlContent);