3 namespace BookStack\Exports;
5 use BookStack\Entities\Models\Book;
6 use BookStack\Entities\Models\Chapter;
7 use BookStack\Entities\Models\Page;
8 use BookStack\Entities\Tools\BookContents;
9 use BookStack\Entities\Tools\Markdown\HtmlToMarkdown;
10 use BookStack\Entities\Tools\PageContent;
11 use BookStack\Uploads\ImageService;
12 use BookStack\Util\CspService;
13 use BookStack\Util\HtmlDocument;
20 public function __construct(
21 protected ImageService $imageService,
22 protected PdfGenerator $pdfGenerator,
23 protected CspService $cspService
28 * Convert a page to a self-contained HTML file.
29 * Includes required CSS & image content. Images are base64 encoded into the HTML.
33 public function pageToContainedHtml(Page $page): string
35 $page->html = (new PageContent($page))->render();
36 $pageHtml = view('exports.page', [
39 'cspContent' => $this->cspService->getCspMetaTagValue(),
40 'locale' => user()->getLocale(),
43 return $this->containHtml($pageHtml);
47 * Convert a chapter to a self-contained HTML file.
51 public function chapterToContainedHtml(Chapter $chapter): string
53 $pages = $chapter->getVisiblePages();
54 $pages->each(function ($page) {
55 $page->html = (new PageContent($page))->render();
57 $html = view('exports.chapter', [
58 'chapter' => $chapter,
61 'cspContent' => $this->cspService->getCspMetaTagValue(),
62 'locale' => user()->getLocale(),
65 return $this->containHtml($html);
69 * Convert a book to a self-contained HTML file.
73 public function bookToContainedHtml(Book $book): string
75 $bookTree = (new BookContents($book))->getTree(false, true);
76 $html = view('exports.book', [
78 'bookChildren' => $bookTree,
80 'cspContent' => $this->cspService->getCspMetaTagValue(),
81 'locale' => user()->getLocale(),
84 return $this->containHtml($html);
88 * Convert a page to a PDF file.
92 public function pageToPdf(Page $page): string
94 $page->html = (new PageContent($page))->render();
95 $html = view('exports.page', [
98 'engine' => $this->pdfGenerator->getActiveEngine(),
99 'locale' => user()->getLocale(),
102 return $this->htmlToPdf($html);
106 * Convert a chapter to a PDF file.
110 public function chapterToPdf(Chapter $chapter): string
112 $pages = $chapter->getVisiblePages();
113 $pages->each(function ($page) {
114 $page->html = (new PageContent($page))->render();
117 $html = view('exports.chapter', [
118 'chapter' => $chapter,
121 'engine' => $this->pdfGenerator->getActiveEngine(),
122 'locale' => user()->getLocale(),
125 return $this->htmlToPdf($html);
129 * Convert a book to a PDF file.
133 public function bookToPdf(Book $book): string
135 $bookTree = (new BookContents($book))->getTree(false, true);
136 $html = view('exports.book', [
138 'bookChildren' => $bookTree,
140 'engine' => $this->pdfGenerator->getActiveEngine(),
141 'locale' => user()->getLocale(),
144 return $this->htmlToPdf($html);
148 * Convert normal web-page HTML to a PDF.
152 protected function htmlToPdf(string $html): string
154 $html = $this->containHtml($html);
155 $doc = new HtmlDocument();
156 $doc->loadCompleteHtml($html);
158 $this->replaceIframesWithLinks($doc);
159 $this->openDetailElements($doc);
160 $cleanedHtml = $doc->getHtml();
162 return $this->pdfGenerator->fromHtml($cleanedHtml);
166 * Within the given HTML content, Open any detail blocks.
168 protected function openDetailElements(HtmlDocument $doc): void
170 $details = $doc->queryXPath('//details');
171 /** @var DOMElement $detail */
172 foreach ($details as $detail) {
173 $detail->setAttribute('open', 'open');
178 * Within the given HTML document, replace any iframe elements
179 * with anchor links within paragraph blocks.
181 protected function replaceIframesWithLinks(HtmlDocument $doc): void
183 $iframes = $doc->queryXPath('//iframe');
185 /** @var DOMElement $iframe */
186 foreach ($iframes as $iframe) {
187 $link = $iframe->getAttribute('src');
188 if (str_starts_with($link, '//')) {
189 $link = 'https:' . $link;
192 $anchor = $doc->createElement('a', $link);
193 $anchor->setAttribute('href', $link);
194 $paragraph = $doc->createElement('p');
195 $paragraph->appendChild($anchor);
196 $iframe->parentNode->replaceChild($paragraph, $iframe);
201 * Bundle of the contents of a html file to be self-contained.
205 protected function containHtml(string $htmlContent): string
207 $imageTagsOutput = [];
208 preg_match_all("/\<img.*?src\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $imageTagsOutput);
210 // Replace image src with base64 encoded image strings
211 if (isset($imageTagsOutput[0]) && count($imageTagsOutput[0]) > 0) {
212 foreach ($imageTagsOutput[0] as $index => $imgMatch) {
213 $oldImgTagString = $imgMatch;
214 $srcString = $imageTagsOutput[2][$index];
215 $imageEncoded = $this->imageService->imageUrlToBase64($srcString);
216 if ($imageEncoded === null) {
217 $imageEncoded = $srcString;
219 $newImgTagString = str_replace($srcString, $imageEncoded, $oldImgTagString);
220 $htmlContent = str_replace($oldImgTagString, $newImgTagString, $htmlContent);
225 preg_match_all("/\<a.*href\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $linksOutput);
227 // Update relative links to be absolute, with instance url
228 if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) {
229 foreach ($linksOutput[0] as $index => $linkMatch) {
230 $oldLinkString = $linkMatch;
231 $srcString = $linksOutput[2][$index];
232 if (!str_starts_with(trim($srcString), 'http')) {
233 $newSrcString = url($srcString);
234 $newLinkString = str_replace($srcString, $newSrcString, $oldLinkString);
235 $htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent);
244 * Converts the page contents into simple plain text.
245 * This method filters any bad looking content to provide a nice final output.
247 public function pageToPlainText(Page $page, bool $pageRendered = false, bool $fromParent = false): string
249 $html = $pageRendered ? $page->html : (new PageContent($page))->render();
250 // Add proceeding spaces before tags so spaces remain between
251 // text within elements after stripping tags.
252 $html = str_replace('<', " <", $html);
253 $text = trim(strip_tags($html));
254 // Replace multiple spaces with single spaces
255 $text = preg_replace('/ {2,}/', ' ', $text);
256 // Reduce multiple horrid whitespace characters.
257 $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text);
258 $text = html_entity_decode($text);
260 $text = $page->name . ($fromParent ? "\n" : "\n\n") . $text;
266 * Convert a chapter into a plain text string.
268 public function chapterToPlainText(Chapter $chapter): string
270 $text = $chapter->name . "\n" . $chapter->description;
271 $text = trim($text) . "\n\n";
274 foreach ($chapter->getVisiblePages() as $page) {
275 $parts[] = $this->pageToPlainText($page, false, true);
278 return $text . implode("\n\n", $parts);
282 * Convert a book into a plain text string.
284 public function bookToPlainText(Book $book): string
286 $bookTree = (new BookContents($book))->getTree(false, true);
287 $text = $book->name . "\n" . $book->description;
288 $text = rtrim($text) . "\n\n";
291 foreach ($bookTree as $bookChild) {
292 if ($bookChild->isA('chapter')) {
293 $parts[] = $this->chapterToPlainText($bookChild);
295 $parts[] = $this->pageToPlainText($bookChild, true, true);
299 return $text . implode("\n\n", $parts);
303 * Convert a page to a Markdown file.
305 public function pageToMarkdown(Page $page): string
307 if ($page->markdown) {
308 return '# ' . $page->name . "\n\n" . $page->markdown;
311 return '# ' . $page->name . "\n\n" . (new HtmlToMarkdown($page->html))->convert();
315 * Convert a chapter to a Markdown file.
317 public function chapterToMarkdown(Chapter $chapter): string
319 $text = '# ' . $chapter->name . "\n\n";
320 $text .= $chapter->description . "\n\n";
321 foreach ($chapter->pages as $page) {
322 $text .= $this->pageToMarkdown($page) . "\n\n";
329 * Convert a book into a plain text string.
331 public function bookToMarkdown(Book $book): string
333 $bookTree = (new BookContents($book))->getTree(false, true);
334 $text = '# ' . $book->name . "\n\n";
335 foreach ($bookTree as $bookChild) {
336 if ($bookChild instanceof Chapter) {
337 $text .= $this->chapterToMarkdown($bookChild) . "\n\n";
339 $text .= $this->pageToMarkdown($bookChild) . "\n\n";