BookStack Code Mirror - bookstack/blobdiff - app/Entities/Tools/PageContent.php

index b1323bc68ac0f02b6f3f799a59a965c0f7953fb9..9aa2e8d352b8650e00a292a959a44414301cf81a 100644 (file)

--- a/app/Entities/Tools/PageContent.php

+++ b/app/Entities/Tools/PageContent.php

@@ -3,8 +3,7 @@

namespace BookStack\Entities\Tools;

use BookStack\Entities\Models\Page;

-use BookStack\Entities\Tools\Markdown\CustomListItemRenderer;

-use BookStack\Entities\Tools\Markdown\CustomStrikeThroughExtension;

+use BookStack\Entities\Tools\Markdown\MarkdownToHtml;

use BookStack\Exceptions\ImageUploadException;

use BookStack\Facades\Theme;

use BookStack\Theming\ThemeEvents;

@@ -12,18 +11,15 @@ use BookStack\Uploads\ImageRepo;

use BookStack\Uploads\ImageService;

use BookStack\Util\HtmlContentFilter;

use DOMDocument;

+use DOMElement;

+use DOMNode;

use DOMNodeList;

use DOMXPath;

use Illuminate\Support\Str;

-use League\CommonMark\Block\Element\ListItem;

-use League\CommonMark\CommonMarkConverter;

-use League\CommonMark\Environment;

-use League\CommonMark\Extension\Table\TableExtension;

-use League\CommonMark\Extension\TaskList\TaskListExtension;

class PageContent

{

- protected $page;

+ protected Page $page;

/**

* PageContent constructor.

@@ -51,28 +47,11 @@ class PageContent

{

$markdown = $this->extractBase64ImagesFromMarkdown($markdown);

$this->page->markdown = $markdown;

- $html = $this->markdownToHtml($markdown);

+ $html = (new MarkdownToHtml($markdown))->convert();

$this->page->html = $this->formatHtml($html);

$this->page->text = $this->toPlainText();

}

- /**

- * Convert the given Markdown content to a HTML string.

- */

- protected function markdownToHtml(string $markdown): string

- {

- $environment = Environment::createCommonMarkEnvironment();

- $environment->addExtension(new TableExtension());

- $environment->addExtension(new TaskListExtension());

- $environment->addExtension(new CustomStrikeThroughExtension());

- $environment = Theme::dispatch(ThemeEvents::COMMONMARK_ENVIRONMENT_CONFIGURE, $environment) ?? $environment;

- $converter = new CommonMarkConverter([], $environment);

- $environment->addBlockRenderer(ListItem::class, new CustomListItemRenderer(), 10);

- return $converter->convertToHtml($markdown);

- }

/**

* Convert all base64 image data to saved images.

@@ -107,15 +86,35 @@ class PageContent

/**

* Convert all inline base64 content to uploaded image files.

+ * Regex is used to locate the start of data-uri definitions then

+ * manual looping over content is done to parse the whole data uri.

+ * Attempting to capture the whole data uri using regex can cause PHP

+ * PCRE limits to be hit with larger, multi-MB, files.

protected function extractBase64ImagesFromMarkdown(string $markdown)

{

$matches = [];

- preg_match_all('/!\[.*?]\(.*?(data:image\/.*?)[)"\s]/', $markdown, $matches);

+ $contentLength = strlen($markdown);

+ $replacements = [];

+ preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);

+ foreach ($matches[1] as $base64MatchPair) {

+ [$dataUri, $index] = $base64MatchPair;

+ for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {

+ $char = $markdown[$i];

+ if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {

+ break;

+ }

+ $dataUri .= $char;

+ }

- foreach ($matches[1] as $base64Match) {

- $newUrl = $this->base64ImageUriToUploadedImageUrl($base64Match);

- $markdown = str_replace($base64Match, $newUrl, $markdown);

+ $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri);

+ $replacements[] = [$dataUri, $newUrl];

+ }

+ foreach ($replacements as [$dataUri, $newUrl]) {

+ $markdown = str_replace($dataUri, $newUrl, $markdown);

}

return $markdown;

@@ -135,6 +134,12 @@ class PageContent

return '';

}

+ // Validate that the content is not over our upload limit

+ $uploadLimitBytes = (config('app.upload_limit') * 1000000);

+ if (strlen($imageInfo['data']) > $uploadLimitBytes) {

+ return '';

+ }

// Save image from data with a random name

$imageName = 'embedded-image-' . Str::random(8) . '.' . $imageInfo['extension'];

@@ -150,7 +155,7 @@ class PageContent

/**

* Parse a base64 image URI into the data and extension.

- * @return array{extension: array, data: string}

+ * @return array{extension: string, data: string}

protected function parseBase64ImageUri(string $uri): array

{

@@ -187,6 +192,15 @@ class PageContent

}

+ // Set ids on nested header nodes

+ $nestedHeaders = $xPath->query('//body//*//h1|//body//*//h2|//body//*//h3|//body//*//h4|//body//*//h5|//body//*//h6');

+ foreach ($nestedHeaders as $nestedHeader) {

+ [$oldId, $newId] = $this->setUniqueId($nestedHeader, $idMap);

+ if ($newId && $newId !== $oldId) {

+ $this->updateLinks($xPath, '#' . $oldId, '#' . $newId);

+ }

// Ensure no duplicate ids within child items

$idElems = $xPath->query('//body//*//*[@id]');

foreach ($idElems as $domElem) {

@@ -202,6 +216,9 @@ class PageContent

$html .= $doc->saveHTML($childNode);

}

+ // Perform required string-level tweaks

+ $html = str_replace(' ', ' ', $html);

return $html;

}

@@ -222,9 +239,9 @@ class PageContent

* A map for existing ID's should be passed in to check for current existence.

* Returns a pair of strings in the format [old_id, new_id].

- protected function setUniqueId(\DOMNode $element, array &$idMap): array

+ protected function setUniqueId(DOMNode $element, array &$idMap): array

{

- if (get_class($element) !== 'DOMElement') {

+ if (!$element instanceof DOMElement) {

return ['', ''];

}

@@ -236,7 +253,7 @@ class PageContent

return [$existingId, $existingId];

}

- // Create an unique id for the element

+ // Create a unique id for the element

// Uses the content as a basis to ensure output is the same every time

// the same content is passed through.

$contentId = 'bkmrk-' . mb_substr(strtolower(preg_replace('/\s+/', '-', trim($element->nodeValue))), 0, 20);

@@ -306,7 +323,7 @@ class PageContent

protected function headerNodesToLevelList(DOMNodeList $nodeList): array

{

- $tree = collect($nodeList)->map(function ($header) {

+ $tree = collect($nodeList)->map(function (DOMElement $header) {

$text = trim(str_replace("\xc2\xa0", '', $header->nodeValue));

$text = mb_substr($text, 0, 100);

@@ -357,23 +374,30 @@ class PageContent

continue;

}

- // Find page and skip this if page not found

+ // Find page to use, and default replacement to empty string for non-matches.

/** @var ?Page $matchedPage */

$matchedPage = Page::visible()->find($pageId);

- if ($matchedPage === null) {

- $html = str_replace($fullMatch, '', $html);

- continue;

+ $replacement = '';

+ if ($matchedPage && count($splitInclude) === 1) {

+ // If we only have page id, just insert all page html and continue.

+ $replacement = $matchedPage->html;

+ } elseif ($matchedPage && count($splitInclude) > 1) {

+ // Otherwise, if our include tag defines a section, load that specific content

+ $innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);

+ $replacement = trim($innerContent);

}

- // If we only have page id, just insert all page html and continue.

- if (count($splitInclude) === 1) {

- $html = str_replace($fullMatch, $matchedPage->html, $html);

- continue;

- }

+ $themeReplacement = Theme::dispatch(

+ ThemeEvents::PAGE_INCLUDE_PARSE,

+ $includeId,

+ $replacement,

+ clone $this->page,

+ $matchedPage ? (clone $matchedPage) : null,

+ );

- // Create and load HTML into a document

- $innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);

- $html = str_replace($fullMatch, trim($innerContent), $html);

+ // Perform the content replacement

+ $html = str_replace($fullMatch, $themeReplacement ?? $replacement, $html);

}

return $html;

@@ -384,7 +408,7 @@ class PageContent

protected function fetchSectionOfPage(Page $page, string $sectionId): string

{

- $topLevelTags = ['table', 'ul', 'ol'];

+ $topLevelTags = ['table', 'ul', 'ol', 'pre'];

$doc = $this->loadDocumentFromHtml($page->html);

// Search included content for the id given and blank out if not exists.