BookStack Code Mirror - bookstack/blobdiff - app/Entities/Tools/PageContent.php

index a6ba352a834c0f5f9709c03ec79fe42f326d2138..9aa2e8d352b8650e00a292a959a44414301cf81a 100644 (file)

--- a/app/Entities/Tools/PageContent.php

+++ b/app/Entities/Tools/PageContent.php

@@ -3,8 +3,7 @@

namespace BookStack\Entities\Tools;

use BookStack\Entities\Models\Page;

-use BookStack\Entities\Tools\Markdown\CustomListItemRenderer;

-use BookStack\Entities\Tools\Markdown\CustomStrikeThroughExtension;

+use BookStack\Entities\Tools\Markdown\MarkdownToHtml;

use BookStack\Exceptions\ImageUploadException;

use BookStack\Facades\Theme;

use BookStack\Theming\ThemeEvents;

@@ -12,18 +11,15 @@ use BookStack\Uploads\ImageRepo;

use BookStack\Uploads\ImageService;

use BookStack\Util\HtmlContentFilter;

use DOMDocument;

+use DOMElement;

+use DOMNode;

use DOMNodeList;

use DOMXPath;

use Illuminate\Support\Str;

-use League\CommonMark\Block\Element\ListItem;

-use League\CommonMark\CommonMarkConverter;

-use League\CommonMark\Environment;

-use League\CommonMark\Extension\Table\TableExtension;

-use League\CommonMark\Extension\TaskList\TaskListExtension;

class PageContent

{

- protected $page;

+ protected Page $page;

/**

* PageContent constructor.

@@ -51,28 +47,11 @@ class PageContent

{

$markdown = $this->extractBase64ImagesFromMarkdown($markdown);

$this->page->markdown = $markdown;

- $html = $this->markdownToHtml($markdown);

+ $html = (new MarkdownToHtml($markdown))->convert();

$this->page->html = $this->formatHtml($html);

$this->page->text = $this->toPlainText();

}

- /**

- * Convert the given Markdown content to a HTML string.

- */

- protected function markdownToHtml(string $markdown): string

- {

- $environment = Environment::createCommonMarkEnvironment();

- $environment->addExtension(new TableExtension());

- $environment->addExtension(new TaskListExtension());

- $environment->addExtension(new CustomStrikeThroughExtension());

- $environment = Theme::dispatch(ThemeEvents::COMMONMARK_ENVIRONMENT_CONFIGURE, $environment) ?? $environment;

- $converter = new CommonMarkConverter([], $environment);

- $environment->addBlockRenderer(ListItem::class, new CustomListItemRenderer(), 10);

- return $converter->convertToHtml($markdown);

- }

/**

* Convert all base64 image data to saved images.

@@ -107,15 +86,35 @@ class PageContent

/**

* Convert all inline base64 content to uploaded image files.

+ * Regex is used to locate the start of data-uri definitions then

+ * manual looping over content is done to parse the whole data uri.

+ * Attempting to capture the whole data uri using regex can cause PHP

+ * PCRE limits to be hit with larger, multi-MB, files.

protected function extractBase64ImagesFromMarkdown(string $markdown)

{

$matches = [];

- preg_match_all('/!\[.*?]\(.*?(data:image\/.*?)[)"\s]/', $markdown, $matches);

+ $contentLength = strlen($markdown);

+ $replacements = [];

+ preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);

+ foreach ($matches[1] as $base64MatchPair) {

+ [$dataUri, $index] = $base64MatchPair;

+ for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {

+ $char = $markdown[$i];

+ if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {

+ break;

+ }

+ $dataUri .= $char;

+ }

+ $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri);

+ $replacements[] = [$dataUri, $newUrl];

+ }

- foreach ($matches[1] as $base64Match) {

- $newUrl = $this->base64ImageUriToUploadedImageUrl($base64Match);

- $markdown = str_replace($base64Match, $newUrl, $markdown);

+ foreach ($replacements as [$dataUri, $newUrl]) {

+ $markdown = str_replace($dataUri, $newUrl, $markdown);

}

return $markdown;

@@ -193,6 +192,15 @@ class PageContent

}

+ // Set ids on nested header nodes

+ $nestedHeaders = $xPath->query('//body//*//h1|//body//*//h2|//body//*//h3|//body//*//h4|//body//*//h5|//body//*//h6');

+ foreach ($nestedHeaders as $nestedHeader) {

+ [$oldId, $newId] = $this->setUniqueId($nestedHeader, $idMap);

+ if ($newId && $newId !== $oldId) {

+ $this->updateLinks($xPath, '#' . $oldId, '#' . $newId);

+ }

// Ensure no duplicate ids within child items

$idElems = $xPath->query('//body//*//*[@id]');

foreach ($idElems as $domElem) {

@@ -208,6 +216,9 @@ class PageContent

$html .= $doc->saveHTML($childNode);

}

+ // Perform required string-level tweaks

+ $html = str_replace(' ', ' ', $html);

return $html;

}

@@ -228,9 +239,9 @@ class PageContent

* A map for existing ID's should be passed in to check for current existence.

* Returns a pair of strings in the format [old_id, new_id].

- protected function setUniqueId(\DOMNode $element, array &$idMap): array

+ protected function setUniqueId(DOMNode $element, array &$idMap): array

{

- if (!$element instanceof \DOMElement) {

+ if (!$element instanceof DOMElement) {

return ['', ''];

}

@@ -312,7 +323,7 @@ class PageContent

protected function headerNodesToLevelList(DOMNodeList $nodeList): array

{

- $tree = collect($nodeList)->map(function ($header) {

+ $tree = collect($nodeList)->map(function (DOMElement $header) {

$text = trim(str_replace("\xc2\xa0", '', $header->nodeValue));

$text = mb_substr($text, 0, 100);

@@ -363,23 +374,30 @@ class PageContent

continue;

}

- // Find page and skip this if page not found

+ // Find page to use, and default replacement to empty string for non-matches.

/** @var ?Page $matchedPage */

$matchedPage = Page::visible()->find($pageId);

- if ($matchedPage === null) {

- $html = str_replace($fullMatch, '', $html);

- continue;

+ $replacement = '';

+ if ($matchedPage && count($splitInclude) === 1) {

+ // If we only have page id, just insert all page html and continue.

+ $replacement = $matchedPage->html;

+ } elseif ($matchedPage && count($splitInclude) > 1) {

+ // Otherwise, if our include tag defines a section, load that specific content

+ $innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);

+ $replacement = trim($innerContent);

}

- // If we only have page id, just insert all page html and continue.

- if (count($splitInclude) === 1) {

- $html = str_replace($fullMatch, $matchedPage->html, $html);

- continue;

- }

+ $themeReplacement = Theme::dispatch(

+ ThemeEvents::PAGE_INCLUDE_PARSE,

+ $includeId,

+ $replacement,

+ clone $this->page,

+ $matchedPage ? (clone $matchedPage) : null,

+ );

- // Create and load HTML into a document

- $innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);

- $html = str_replace($fullMatch, trim($innerContent), $html);

+ // Perform the content replacement

+ $html = str_replace($fullMatch, $themeReplacement ?? $replacement, $html);

}

return $html;