3 namespace BookStack\Entities\Tools;
5 use BookStack\Entities\Models\Book;
6 use BookStack\Entities\Models\Chapter;
7 use BookStack\Entities\Models\Page;
8 use BookStack\Entities\Tools\Markdown\HtmlToMarkdown;
9 use BookStack\Uploads\ImageService;
10 use BookStack\Util\CspService;
19 protected ImageService $imageService;
20 protected PdfGenerator $pdfGenerator;
21 protected CspService $cspService;
24 * ExportService constructor.
26 public function __construct(ImageService $imageService, PdfGenerator $pdfGenerator, CspService $cspService)
28 $this->imageService = $imageService;
29 $this->pdfGenerator = $pdfGenerator;
30 $this->cspService = $cspService;
34 * Convert a page to a self-contained HTML file.
35 * Includes required CSS & image content. Images are base64 encoded into the HTML.
39 public function pageToContainedHtml(Page $page)
41 $page->html = (new PageContent($page))->render();
42 $pageHtml = view('pages.export', [
45 'cspContent' => $this->cspService->getCspMetaTagValue(),
48 return $this->containHtml($pageHtml);
52 * Convert a chapter to a self-contained HTML file.
56 public function chapterToContainedHtml(Chapter $chapter)
58 $pages = $chapter->getVisiblePages();
59 $pages->each(function ($page) {
60 $page->html = (new PageContent($page))->render();
62 $html = view('chapters.export', [
63 'chapter' => $chapter,
66 'cspContent' => $this->cspService->getCspMetaTagValue(),
69 return $this->containHtml($html);
73 * Convert a book to a self-contained HTML file.
77 public function bookToContainedHtml(Book $book)
79 $bookTree = (new BookContents($book))->getTree(false, true);
80 $html = view('books.export', [
82 'bookChildren' => $bookTree,
84 'cspContent' => $this->cspService->getCspMetaTagValue(),
87 return $this->containHtml($html);
91 * Convert a page to a PDF file.
95 public function pageToPdf(Page $page)
97 $page->html = (new PageContent($page))->render();
98 $html = view('pages.export', [
101 'engine' => $this->pdfGenerator->getActiveEngine(),
104 return $this->htmlToPdf($html);
108 * Convert a chapter to a PDF file.
112 public function chapterToPdf(Chapter $chapter)
114 $pages = $chapter->getVisiblePages();
115 $pages->each(function ($page) {
116 $page->html = (new PageContent($page))->render();
119 $html = view('chapters.export', [
120 'chapter' => $chapter,
123 'engine' => $this->pdfGenerator->getActiveEngine(),
126 return $this->htmlToPdf($html);
130 * Convert a book to a PDF file.
134 public function bookToPdf(Book $book)
136 $bookTree = (new BookContents($book))->getTree(false, true);
137 $html = view('books.export', [
139 'bookChildren' => $bookTree,
141 'engine' => $this->pdfGenerator->getActiveEngine(),
144 return $this->htmlToPdf($html);
148 * Convert normal web-page HTML to a PDF.
152 protected function htmlToPdf(string $html): string
154 $html = $this->containHtml($html);
155 $html = $this->replaceIframesWithLinks($html);
156 $html = $this->openDetailElements($html);
158 return $this->pdfGenerator->fromHtml($html);
162 * Within the given HTML content, Open any detail blocks.
164 protected function openDetailElements(string $html): string
166 libxml_use_internal_errors(true);
168 $doc = new DOMDocument();
169 $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
170 $xPath = new DOMXPath($doc);
172 $details = $xPath->query('//details');
173 /** @var DOMElement $detail */
174 foreach ($details as $detail) {
175 $detail->setAttribute('open', 'open');
178 return $doc->saveHTML();
182 * Within the given HTML content, replace any iframe elements
183 * with anchor links within paragraph blocks.
185 protected function replaceIframesWithLinks(string $html): string
187 libxml_use_internal_errors(true);
189 $doc = new DOMDocument();
190 $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
191 $xPath = new DOMXPath($doc);
193 $iframes = $xPath->query('//iframe');
194 /** @var DOMElement $iframe */
195 foreach ($iframes as $iframe) {
196 $link = $iframe->getAttribute('src');
197 if (strpos($link, '//') === 0) {
198 $link = 'https:' . $link;
201 $anchor = $doc->createElement('a', $link);
202 $anchor->setAttribute('href', $link);
203 $paragraph = $doc->createElement('p');
204 $paragraph->appendChild($anchor);
205 $iframe->parentNode->replaceChild($paragraph, $iframe);
208 return $doc->saveHTML();
212 * Bundle of the contents of a html file to be self-contained.
216 protected function containHtml(string $htmlContent): string
218 // Replace embed tags with images
219 $htmlContent = preg_replace("/<embed (.*?)>/i", '<img $1>', $htmlContent);
221 // Replace image & embed src attributes with base64 encoded data strings
222 $imageTagsOutput = [];
223 preg_match_all("/<img .*?src=['\"](.*?)['\"].*?>/i", $htmlContent, $imageTagsOutput);
224 if (isset($imageTagsOutput[0]) && count($imageTagsOutput[0]) > 0) {
225 foreach ($imageTagsOutput[0] as $index => $imgMatch) {
226 $oldImgTagString = $imgMatch;
227 $srcString = $imageTagsOutput[1][$index];
228 $imageEncoded = $this->imageService->imageUriToBase64($srcString);
229 if ($imageEncoded === null) {
230 $imageEncoded = $srcString;
232 $newImgTagString = str_replace($srcString, $imageEncoded, $oldImgTagString);
233 $htmlContent = str_replace($oldImgTagString, $newImgTagString, $htmlContent);
237 // Replace any relative links with full system URL
239 preg_match_all("/<a .*href=['\"](.*?)['\"].*?>/i", $htmlContent, $linksOutput);
240 if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) {
241 foreach ($linksOutput[0] as $index => $linkMatch) {
242 $oldLinkString = $linkMatch;
243 $srcString = $linksOutput[1][$index];
244 if (strpos(trim($srcString), 'http') !== 0) {
245 $newSrcString = url($srcString);
246 $newLinkString = str_replace($srcString, $newSrcString, $oldLinkString);
247 $htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent);
256 * Converts the page contents into simple plain text.
257 * This method filters any bad looking content to provide a nice final output.
259 public function pageToPlainText(Page $page): string
261 $html = (new PageContent($page))->render();
262 $text = strip_tags($html);
263 // Replace multiple spaces with single spaces
264 $text = preg_replace('/\ {2,}/', ' ', $text);
265 // Reduce multiple horrid whitespace characters.
266 $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text);
267 $text = html_entity_decode($text);
269 $text = $page->name . "\n\n" . $text;
275 * Convert a chapter into a plain text string.
277 public function chapterToPlainText(Chapter $chapter): string
279 $text = $chapter->name . "\n\n";
280 $text .= $chapter->description . "\n\n";
281 foreach ($chapter->getVisiblePages() as $page) {
282 $text .= $this->pageToPlainText($page);
289 * Convert a book into a plain text string.
291 public function bookToPlainText(Book $book): string
293 $bookTree = (new BookContents($book))->getTree(false, false);
294 $text = $book->name . "\n\n";
295 foreach ($bookTree as $bookChild) {
296 if ($bookChild->isA('chapter')) {
297 $text .= $this->chapterToPlainText($bookChild);
299 $text .= $this->pageToPlainText($bookChild);
307 * Convert a page to a Markdown file.
309 public function pageToMarkdown(Page $page): string
311 if ($page->markdown) {
312 return '# ' . $page->name . "\n\n" . $page->markdown;
315 return '# ' . $page->name . "\n\n" . (new HtmlToMarkdown($page->html))->convert();
319 * Convert a chapter to a Markdown file.
321 public function chapterToMarkdown(Chapter $chapter): string
323 $text = '# ' . $chapter->name . "\n\n";
324 $text .= $chapter->description . "\n\n";
325 foreach ($chapter->pages as $page) {
326 $text .= $this->pageToMarkdown($page) . "\n\n";
333 * Convert a book into a plain text string.
335 public function bookToMarkdown(Book $book): string
337 $bookTree = (new BookContents($book))->getTree(false, true);
338 $text = '# ' . $book->name . "\n\n";
339 foreach ($bookTree as $bookChild) {
340 if ($bookChild instanceof Chapter) {
341 $text .= $this->chapterToMarkdown($bookChild) . "\n\n";
343 $text .= $this->pageToMarkdown($bookChild) . "\n\n";