]> BookStack Code Mirror - bookstack/blob - app/Services/ExportService.php
Updated Spanish translation
[bookstack] / app / Services / ExportService.php
1 <?php namespace BookStack\Services;
2
3 use BookStack\Book;
4 use BookStack\Chapter;
5 use BookStack\Page;
6 use BookStack\Repos\EntityRepo;
7
8 class ExportService
9 {
10
11     protected $entityRepo;
12
13     /**
14      * ExportService constructor.
15      * @param $entityRepo
16      */
17     public function __construct(EntityRepo $entityRepo)
18     {
19         $this->entityRepo = $entityRepo;
20     }
21
22     /**
23      * Convert a page to a self-contained HTML file.
24      * Includes required CSS & image content. Images are base64 encoded into the HTML.
25      * @param Page $page
26      * @return mixed|string
27      */
28     public function pageToContainedHtml(Page $page)
29     {
30         $this->entityRepo->renderPage($page);
31         $pageHtml = view('pages/export', [
32             'page' => $page
33         ])->render();
34         return $this->containHtml($pageHtml);
35     }
36
37     /**
38      * Convert a chapter to a self-contained HTML file.
39      * @param Chapter $chapter
40      * @return mixed|string
41      */
42     public function chapterToContainedHtml(Chapter $chapter)
43     {
44         $pages = $this->entityRepo->getChapterChildren($chapter);
45         $pages->each(function ($page) {
46             $page->html = $this->entityRepo->renderPage($page);
47         });
48         $html = view('chapters/export', [
49             'chapter' => $chapter,
50             'pages' => $pages
51         ])->render();
52         return $this->containHtml($html);
53     }
54
55     /**
56      * Convert a book to a self-contained HTML file.
57      * @param Book $book
58      * @return mixed|string
59      */
60     public function bookToContainedHtml(Book $book)
61     {
62         $bookTree = $this->entityRepo->getBookChildren($book, true, true);
63         $html = view('books/export', [
64             'book' => $book,
65             'bookChildren' => $bookTree
66         ])->render();
67         return $this->containHtml($html);
68     }
69
70     /**
71      * Convert a page to a PDF file.
72      * @param Page $page
73      * @return mixed|string
74      */
75     public function pageToPdf(Page $page)
76     {
77         $this->entityRepo->renderPage($page);
78         $html = view('pages/pdf', [
79             'page' => $page
80         ])->render();
81         return $this->htmlToPdf($html);
82     }
83
84     /**
85      * Convert a chapter to a PDF file.
86      * @param Chapter $chapter
87      * @return mixed|string
88      */
89     public function chapterToPdf(Chapter $chapter)
90     {
91         $pages = $this->entityRepo->getChapterChildren($chapter);
92         $pages->each(function ($page) {
93             $page->html = $this->entityRepo->renderPage($page);
94         });
95         $html = view('chapters/export', [
96             'chapter' => $chapter,
97             'pages' => $pages
98         ])->render();
99         return $this->htmlToPdf($html);
100     }
101
102     /**
103      * Convert a book to a PDF file
104      * @param Book $book
105      * @return string
106      */
107     public function bookToPdf(Book $book)
108     {
109         $bookTree = $this->entityRepo->getBookChildren($book, true, true);
110         $html = view('books/export', [
111             'book' => $book,
112             'bookChildren' => $bookTree
113         ])->render();
114         return $this->htmlToPdf($html);
115     }
116
117     /**
118      * Convert normal webpage HTML to a PDF.
119      * @param $html
120      * @return string
121      */
122     protected function htmlToPdf($html)
123     {
124         $containedHtml = $this->containHtml($html);
125         $useWKHTML = config('snappy.pdf.binary') !== false;
126         if ($useWKHTML) {
127             $pdf = \SnappyPDF::loadHTML($containedHtml);
128             $pdf->setOption('print-media-type', true);
129         } else {
130             $pdf = \DomPDF::loadHTML($containedHtml);
131         }
132         return $pdf->output();
133     }
134
135     /**
136      * Bundle of the contents of a html file to be self-contained.
137      * @param $htmlContent
138      * @return mixed|string
139      * @throws \Exception
140      */
141     protected function containHtml($htmlContent)
142     {
143         $imageTagsOutput = [];
144         preg_match_all("/\<img.*src\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $imageTagsOutput);
145
146         // Replace image src with base64 encoded image strings
147         if (isset($imageTagsOutput[0]) && count($imageTagsOutput[0]) > 0) {
148             foreach ($imageTagsOutput[0] as $index => $imgMatch) {
149                 $oldImgString = $imgMatch;
150                 $srcString = $imageTagsOutput[2][$index];
151                 $isLocal = strpos(trim($srcString), 'http') !== 0;
152                 if ($isLocal) {
153                     $pathString = public_path(trim($srcString, '/'));
154                 } else {
155                     $pathString = $srcString;
156                 }
157
158                 // Attempt to find local files even if url not absolute
159                 $base = baseUrl('/');
160                 if (strpos($srcString, $base) === 0) {
161                     $isLocal = true;
162                     $relString = str_replace($base, '', $srcString);
163                     $pathString = public_path(trim($relString, '/'));
164                 }
165
166                 if ($isLocal && !file_exists($pathString)) {
167                     continue;
168                 }
169                 try {
170                     if ($isLocal) {
171                         $imageContent = file_get_contents($pathString);
172                     } else {
173                         $ch = curl_init();
174                         curl_setopt_array($ch, [CURLOPT_URL => $pathString, CURLOPT_RETURNTRANSFER => 1, CURLOPT_CONNECTTIMEOUT => 5]);
175                         $imageContent = curl_exec($ch);
176                         $err = curl_error($ch);
177                         curl_close($ch);
178                         if ($err) {
179                             throw new \Exception("Image fetch failed, Received error: " . $err);
180                         }
181                     }
182                     $imageEncoded = 'data:image/' . pathinfo($pathString, PATHINFO_EXTENSION) . ';base64,' . base64_encode($imageContent);
183                     $newImageString = str_replace($srcString, $imageEncoded, $oldImgString);
184                 } catch (\ErrorException $e) {
185                     $newImageString = '';
186                 }
187                 $htmlContent = str_replace($oldImgString, $newImageString, $htmlContent);
188             }
189         }
190
191         $linksOutput = [];
192         preg_match_all("/\<a.*href\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $linksOutput);
193
194         // Replace image src with base64 encoded image strings
195         if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) {
196             foreach ($linksOutput[0] as $index => $linkMatch) {
197                 $oldLinkString = $linkMatch;
198                 $srcString = $linksOutput[2][$index];
199                 if (strpos(trim($srcString), 'http') !== 0) {
200                     $newSrcString = url($srcString);
201                     $newLinkString = str_replace($srcString, $newSrcString, $oldLinkString);
202                     $htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent);
203                 }
204             }
205         }
206
207         // Replace any relative links with system domain
208         return $htmlContent;
209     }
210
211     /**
212      * Converts the page contents into simple plain text.
213      * This method filters any bad looking content to provide a nice final output.
214      * @param Page $page
215      * @return mixed
216      */
217     public function pageToPlainText(Page $page)
218     {
219         $html = $this->entityRepo->renderPage($page);
220         $text = strip_tags($html);
221         // Replace multiple spaces with single spaces
222         $text = preg_replace('/\ {2,}/', ' ', $text);
223         // Reduce multiple horrid whitespace characters.
224         $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text);
225         $text = html_entity_decode($text);
226         // Add title
227         $text = $page->name . "\n\n" . $text;
228         return $text;
229     }
230
231     /**
232      * Convert a chapter into a plain text string.
233      * @param Chapter $chapter
234      * @return string
235      */
236     public function chapterToPlainText(Chapter $chapter)
237     {
238         $text = $chapter->name . "\n\n";
239         $text .= $chapter->description . "\n\n";
240         foreach ($chapter->pages as $page) {
241             $text .= $this->pageToPlainText($page);
242         }
243         return $text;
244     }
245
246     /**
247      * Convert a book into a plain text string.
248      * @param Book $book
249      * @return string
250      */
251     public function bookToPlainText(Book $book)
252     {
253         $bookTree = $this->entityRepo->getBookChildren($book, true, true);
254         $text = $book->name . "\n\n";
255         foreach ($bookTree as $bookChild) {
256             if ($bookChild->isA('chapter')) {
257                 $text .= $this->chapterToPlainText($bookChild);
258             } else {
259                 $text .= $this->pageToPlainText($bookChild);
260             }
261         }
262         return $text;
263     }
264 }