3 namespace BookStack\Entities\Tools;
5 use BookStack\Entities\Models\Book;
6 use BookStack\Entities\Models\Chapter;
7 use BookStack\Entities\Models\Page;
8 use BookStack\Entities\Tools\Markdown\HtmlToMarkdown;
9 use BookStack\Uploads\ImageService;
10 use BookStack\Util\CspService;
19 public function __construct(
20 protected ImageService $imageService,
21 protected PdfGenerator $pdfGenerator,
22 protected CspService $cspService
27 * Convert a page to a self-contained HTML file.
28 * Includes required CSS & image content. Images are base64 encoded into the HTML.
32 public function pageToContainedHtml(Page $page): string
34 $page->html = (new PageContent($page))->render();
35 $pageHtml = view('exports.page', [
38 'cspContent' => $this->cspService->getCspMetaTagValue(),
39 'locale' => user()->getLocale(),
42 return $this->containHtml($pageHtml);
46 * Convert a chapter to a self-contained HTML file.
50 public function chapterToContainedHtml(Chapter $chapter): string
52 $pages = $chapter->getVisiblePages();
53 $pages->each(function ($page) {
54 $page->html = (new PageContent($page))->render();
56 $html = view('exports.chapter', [
57 'chapter' => $chapter,
60 'cspContent' => $this->cspService->getCspMetaTagValue(),
61 'locale' => user()->getLocale(),
64 return $this->containHtml($html);
68 * Convert a book to a self-contained HTML file.
72 public function bookToContainedHtml(Book $book): string
74 $bookTree = (new BookContents($book))->getTree(false, true);
75 $html = view('exports.book', [
77 'bookChildren' => $bookTree,
79 'cspContent' => $this->cspService->getCspMetaTagValue(),
80 'locale' => user()->getLocale(),
83 return $this->containHtml($html);
87 * Convert a page to a PDF file.
91 public function pageToPdf(Page $page): string
93 $page->html = (new PageContent($page))->render();
94 $html = view('exports.page', [
97 'engine' => $this->pdfGenerator->getActiveEngine(),
98 'locale' => user()->getLocale(),
101 return $this->htmlToPdf($html);
105 * Convert a chapter to a PDF file.
109 public function chapterToPdf(Chapter $chapter): string
111 $pages = $chapter->getVisiblePages();
112 $pages->each(function ($page) {
113 $page->html = (new PageContent($page))->render();
116 $html = view('exports.chapter', [
117 'chapter' => $chapter,
120 'engine' => $this->pdfGenerator->getActiveEngine(),
121 'locale' => user()->getLocale(),
124 return $this->htmlToPdf($html);
128 * Convert a book to a PDF file.
132 public function bookToPdf(Book $book): string
134 $bookTree = (new BookContents($book))->getTree(false, true);
135 $html = view('exports.book', [
137 'bookChildren' => $bookTree,
139 'engine' => $this->pdfGenerator->getActiveEngine(),
140 'locale' => user()->getLocale(),
143 return $this->htmlToPdf($html);
147 * Convert normal web-page HTML to a PDF.
151 protected function htmlToPdf(string $html): string
153 $html = $this->containHtml($html);
154 $html = $this->replaceIframesWithLinks($html);
155 $html = $this->openDetailElements($html);
157 return $this->pdfGenerator->fromHtml($html);
161 * Within the given HTML content, Open any detail blocks.
163 protected function openDetailElements(string $html): string
165 libxml_use_internal_errors(true);
167 $doc = new DOMDocument();
168 $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
169 $xPath = new DOMXPath($doc);
171 $details = $xPath->query('//details');
172 /** @var DOMElement $detail */
173 foreach ($details as $detail) {
174 $detail->setAttribute('open', 'open');
177 return $doc->saveHTML();
181 * Within the given HTML content, replace any iframe elements
182 * with anchor links within paragraph blocks.
184 protected function replaceIframesWithLinks(string $html): string
186 libxml_use_internal_errors(true);
188 $doc = new DOMDocument();
189 $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
190 $xPath = new DOMXPath($doc);
192 $iframes = $xPath->query('//iframe');
193 /** @var DOMElement $iframe */
194 foreach ($iframes as $iframe) {
195 $link = $iframe->getAttribute('src');
196 if (str_starts_with($link, '//')) {
197 $link = 'https:' . $link;
200 $anchor = $doc->createElement('a', $link);
201 $anchor->setAttribute('href', $link);
202 $paragraph = $doc->createElement('p');
203 $paragraph->appendChild($anchor);
204 $iframe->parentNode->replaceChild($paragraph, $iframe);
207 return $doc->saveHTML();
211 * Bundle of the contents of a html file to be self-contained.
215 protected function containHtml(string $htmlContent): string
217 $imageTagsOutput = [];
218 preg_match_all("/\<img.*?src\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $imageTagsOutput);
220 // Replace image src with base64 encoded image strings
221 if (isset($imageTagsOutput[0]) && count($imageTagsOutput[0]) > 0) {
222 foreach ($imageTagsOutput[0] as $index => $imgMatch) {
223 $oldImgTagString = $imgMatch;
224 $srcString = $imageTagsOutput[2][$index];
225 $imageEncoded = $this->imageService->imageUrlToBase64($srcString);
226 if ($imageEncoded === null) {
227 $imageEncoded = $srcString;
229 $newImgTagString = str_replace($srcString, $imageEncoded, $oldImgTagString);
230 $htmlContent = str_replace($oldImgTagString, $newImgTagString, $htmlContent);
235 preg_match_all("/\<a.*href\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $linksOutput);
237 // Update relative links to be absolute, with instance url
238 if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) {
239 foreach ($linksOutput[0] as $index => $linkMatch) {
240 $oldLinkString = $linkMatch;
241 $srcString = $linksOutput[2][$index];
242 if (!str_starts_with(trim($srcString), 'http')) {
243 $newSrcString = url($srcString);
244 $newLinkString = str_replace($srcString, $newSrcString, $oldLinkString);
245 $htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent);
254 * Converts the page contents into simple plain text.
255 * This method filters any bad looking content to provide a nice final output.
257 public function pageToPlainText(Page $page, bool $pageRendered = false, bool $fromParent = false): string
259 $html = $pageRendered ? $page->html : (new PageContent($page))->render();
260 // Add proceeding spaces before tags so spaces remain between
261 // text within elements after stripping tags.
262 $html = str_replace('<', " <", $html);
263 $text = trim(strip_tags($html));
264 // Replace multiple spaces with single spaces
265 $text = preg_replace('/ {2,}/', ' ', $text);
266 // Reduce multiple horrid whitespace characters.
267 $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text);
268 $text = html_entity_decode($text);
270 $text = $page->name . ($fromParent ? "\n" : "\n\n") . $text;
276 * Convert a chapter into a plain text string.
278 public function chapterToPlainText(Chapter $chapter): string
280 $text = $chapter->name . "\n" . $chapter->description;
281 $text = trim($text) . "\n\n";
284 foreach ($chapter->getVisiblePages() as $page) {
285 $parts[] = $this->pageToPlainText($page, false, true);
288 return $text . implode("\n\n", $parts);
292 * Convert a book into a plain text string.
294 public function bookToPlainText(Book $book): string
296 $bookTree = (new BookContents($book))->getTree(false, true);
297 $text = $book->name . "\n" . $book->description;
298 $text = rtrim($text) . "\n\n";
301 foreach ($bookTree as $bookChild) {
302 if ($bookChild->isA('chapter')) {
303 $parts[] = $this->chapterToPlainText($bookChild);
305 $parts[] = $this->pageToPlainText($bookChild, true, true);
309 return $text . implode("\n\n", $parts);
313 * Convert a page to a Markdown file.
315 public function pageToMarkdown(Page $page): string
317 if ($page->markdown) {
318 return '# ' . $page->name . "\n\n" . $page->markdown;
321 return '# ' . $page->name . "\n\n" . (new HtmlToMarkdown($page->html))->convert();
325 * Convert a chapter to a Markdown file.
327 public function chapterToMarkdown(Chapter $chapter): string
329 $text = '# ' . $chapter->name . "\n\n";
330 $text .= $chapter->description . "\n\n";
331 foreach ($chapter->pages as $page) {
332 $text .= $this->pageToMarkdown($page) . "\n\n";
339 * Convert a book into a plain text string.
341 public function bookToMarkdown(Book $book): string
343 $bookTree = (new BookContents($book))->getTree(false, true);
344 $text = '# ' . $book->name . "\n\n";
345 foreach ($bookTree as $bookChild) {
346 if ($bookChild instanceof Chapter) {
347 $text .= $this->chapterToMarkdown($bookChild) . "\n\n";
349 $text .= $this->pageToMarkdown($bookChild) . "\n\n";