use BookStack\Entities\Models\Page;
use BookStack\Entities\Tools\Markdown\HtmlToMarkdown;
use BookStack\Uploads\ImageService;
-use DOMDocument;
+use BookStack\Util\CspService;
+use BookStack\Util\HtmlDocument;
use DOMElement;
-use DOMXPath;
use Exception;
use Throwable;
class ExportFormatter
{
- protected $imageService;
- protected $pdfGenerator;
-
- /**
- * ExportService constructor.
- */
- public function __construct(ImageService $imageService, PdfGenerator $pdfGenerator)
- {
- $this->imageService = $imageService;
- $this->pdfGenerator = $pdfGenerator;
+ public function __construct(
+ protected ImageService $imageService,
+ protected PdfGenerator $pdfGenerator,
+ protected CspService $cspService
+ ) {
}
/**
*
* @throws Throwable
*/
- public function pageToContainedHtml(Page $page)
+ public function pageToContainedHtml(Page $page): string
{
$page->html = (new PageContent($page))->render();
- $pageHtml = view('pages.export', [
- 'page' => $page,
- 'format' => 'html',
+ $pageHtml = view('exports.page', [
+ 'page' => $page,
+ 'format' => 'html',
+ 'cspContent' => $this->cspService->getCspMetaTagValue(),
+ 'locale' => user()->getLocale(),
])->render();
return $this->containHtml($pageHtml);
*
* @throws Throwable
*/
- public function chapterToContainedHtml(Chapter $chapter)
+ public function chapterToContainedHtml(Chapter $chapter): string
{
$pages = $chapter->getVisiblePages();
$pages->each(function ($page) {
$page->html = (new PageContent($page))->render();
});
- $html = view('chapters.export', [
- 'chapter' => $chapter,
- 'pages' => $pages,
- 'format' => 'html',
+ $html = view('exports.chapter', [
+ 'chapter' => $chapter,
+ 'pages' => $pages,
+ 'format' => 'html',
+ 'cspContent' => $this->cspService->getCspMetaTagValue(),
+ 'locale' => user()->getLocale(),
])->render();
return $this->containHtml($html);
*
* @throws Throwable
*/
- public function bookToContainedHtml(Book $book)
+ public function bookToContainedHtml(Book $book): string
{
$bookTree = (new BookContents($book))->getTree(false, true);
- $html = view('books.export', [
+ $html = view('exports.book', [
'book' => $book,
'bookChildren' => $bookTree,
'format' => 'html',
+ 'cspContent' => $this->cspService->getCspMetaTagValue(),
+ 'locale' => user()->getLocale(),
])->render();
return $this->containHtml($html);
*
* @throws Throwable
*/
- public function pageToPdf(Page $page)
+ public function pageToPdf(Page $page): string
{
$page->html = (new PageContent($page))->render();
- $html = view('pages.export', [
+ $html = view('exports.page', [
'page' => $page,
'format' => 'pdf',
'engine' => $this->pdfGenerator->getActiveEngine(),
+ 'locale' => user()->getLocale(),
])->render();
return $this->htmlToPdf($html);
*
* @throws Throwable
*/
- public function chapterToPdf(Chapter $chapter)
+ public function chapterToPdf(Chapter $chapter): string
{
$pages = $chapter->getVisiblePages();
$pages->each(function ($page) {
$page->html = (new PageContent($page))->render();
});
- $html = view('chapters.export', [
+ $html = view('exports.chapter', [
'chapter' => $chapter,
'pages' => $pages,
'format' => 'pdf',
'engine' => $this->pdfGenerator->getActiveEngine(),
+ 'locale' => user()->getLocale(),
])->render();
return $this->htmlToPdf($html);
*
* @throws Throwable
*/
- public function bookToPdf(Book $book)
+ public function bookToPdf(Book $book): string
{
$bookTree = (new BookContents($book))->getTree(false, true);
- $html = view('books.export', [
+ $html = view('exports.book', [
'book' => $book,
'bookChildren' => $bookTree,
'format' => 'pdf',
'engine' => $this->pdfGenerator->getActiveEngine(),
+ 'locale' => user()->getLocale(),
])->render();
return $this->htmlToPdf($html);
protected function htmlToPdf(string $html): string
{
$html = $this->containHtml($html);
- $html = $this->replaceIframesWithLinks($html);
+ $doc = new HtmlDocument();
+ $doc->loadCompleteHtml($html);
+
+ $this->replaceIframesWithLinks($doc);
+ $this->openDetailElements($doc);
+ $cleanedHtml = $doc->getHtml();
- return $this->pdfGenerator->fromHtml($html);
+ return $this->pdfGenerator->fromHtml($cleanedHtml);
}
/**
- * Within the given HTML content, replace any iframe elements
- * with anchor links within paragraph blocks.
+ * Within the given HTML content, Open any detail blocks.
*/
- protected function replaceIframesWithLinks(string $html): string
+ protected function openDetailElements(HtmlDocument $doc): void
{
- libxml_use_internal_errors(true);
+ $details = $doc->queryXPath('//details');
+ /** @var DOMElement $detail */
+ foreach ($details as $detail) {
+ $detail->setAttribute('open', 'open');
+ }
+ }
- $doc = new DOMDocument();
- $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
- $xPath = new DOMXPath($doc);
+ /**
+ * Within the given HTML document, replace any iframe elements
+ * with anchor links within paragraph blocks.
+ */
+ protected function replaceIframesWithLinks(HtmlDocument $doc): void
+ {
+ $iframes = $doc->queryXPath('//iframe');
- $iframes = $xPath->query('//iframe');
/** @var DOMElement $iframe */
foreach ($iframes as $iframe) {
$link = $iframe->getAttribute('src');
- if (strpos($link, '//') === 0) {
+ if (str_starts_with($link, '//')) {
$link = 'https:' . $link;
}
$paragraph->appendChild($anchor);
$iframe->parentNode->replaceChild($paragraph, $iframe);
}
-
- return $doc->saveHTML();
}
/**
foreach ($imageTagsOutput[0] as $index => $imgMatch) {
$oldImgTagString = $imgMatch;
$srcString = $imageTagsOutput[2][$index];
- $imageEncoded = $this->imageService->imageUriToBase64($srcString);
+ $imageEncoded = $this->imageService->imageUrlToBase64($srcString);
if ($imageEncoded === null) {
$imageEncoded = $srcString;
}
$linksOutput = [];
preg_match_all("/\<a.*href\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $linksOutput);
- // Replace image src with base64 encoded image strings
+ // Update relative links to be absolute, with instance url
if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) {
foreach ($linksOutput[0] as $index => $linkMatch) {
$oldLinkString = $linkMatch;
$srcString = $linksOutput[2][$index];
- if (strpos(trim($srcString), 'http') !== 0) {
+ if (!str_starts_with(trim($srcString), 'http')) {
$newSrcString = url($srcString);
$newLinkString = str_replace($srcString, $newSrcString, $oldLinkString);
$htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent);
}
}
- // Replace any relative links with system domain
return $htmlContent;
}
* Converts the page contents into simple plain text.
* This method filters any bad looking content to provide a nice final output.
*/
- public function pageToPlainText(Page $page): string
+ public function pageToPlainText(Page $page, bool $pageRendered = false, bool $fromParent = false): string
{
- $html = (new PageContent($page))->render();
- $text = strip_tags($html);
+ $html = $pageRendered ? $page->html : (new PageContent($page))->render();
+ // Add proceeding spaces before tags so spaces remain between
+ // text within elements after stripping tags.
+ $html = str_replace('<', " <", $html);
+ $text = trim(strip_tags($html));
// Replace multiple spaces with single spaces
- $text = preg_replace('/\ {2,}/', ' ', $text);
+ $text = preg_replace('/ {2,}/', ' ', $text);
// Reduce multiple horrid whitespace characters.
$text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text);
$text = html_entity_decode($text);
// Add title
- $text = $page->name . "\n\n" . $text;
+ $text = $page->name . ($fromParent ? "\n" : "\n\n") . $text;
return $text;
}
*/
public function chapterToPlainText(Chapter $chapter): string
{
- $text = $chapter->name . "\n\n";
- $text .= $chapter->description . "\n\n";
+ $text = $chapter->name . "\n" . $chapter->description;
+ $text = trim($text) . "\n\n";
+
+ $parts = [];
foreach ($chapter->getVisiblePages() as $page) {
- $text .= $this->pageToPlainText($page);
+ $parts[] = $this->pageToPlainText($page, false, true);
}
- return $text;
+ return $text . implode("\n\n", $parts);
}
/**
*/
public function bookToPlainText(Book $book): string
{
- $bookTree = (new BookContents($book))->getTree(false, false);
- $text = $book->name . "\n\n";
+ $bookTree = (new BookContents($book))->getTree(false, true);
+ $text = $book->name . "\n" . $book->description;
+ $text = rtrim($text) . "\n\n";
+
+ $parts = [];
foreach ($bookTree as $bookChild) {
if ($bookChild->isA('chapter')) {
- $text .= $this->chapterToPlainText($bookChild);
+ $parts[] = $this->chapterToPlainText($bookChild);
} else {
- $text .= $this->pageToPlainText($bookChild);
+ $parts[] = $this->pageToPlainText($bookChild, true, true);
}
}
- return $text;
+ return $text . implode("\n\n", $parts);
}
/**
public function chapterToMarkdown(Chapter $chapter): string
{
$text = '# ' . $chapter->name . "\n\n";
- $text .= $chapter->description . "\n\n";
+
+ $description = (new HtmlToMarkdown($chapter->descriptionHtml()))->convert();
+ if ($description) {
+ $text .= $description . "\n\n";
+ }
+
foreach ($chapter->pages as $page) {
$text .= $this->pageToMarkdown($page) . "\n\n";
}
- return $text;
+ return trim($text);
}
/**
{
$bookTree = (new BookContents($book))->getTree(false, true);
$text = '# ' . $book->name . "\n\n";
+
+ $description = (new HtmlToMarkdown($book->descriptionHtml()))->convert();
+ if ($description) {
+ $text .= $description . "\n\n";
+ }
+
foreach ($bookTree as $bookChild) {
if ($bookChild instanceof Chapter) {
- $text .= $this->chapterToMarkdown($bookChild);
+ $text .= $this->chapterToMarkdown($bookChild) . "\n\n";
} else {
- $text .= $this->pageToMarkdown($bookChild);
+ $text .= $this->pageToMarkdown($bookChild) . "\n\n";
}
}
- return $text;
+ return trim($text);
}
}