BookStack Code Mirror - bookstack/blobdiff - app/Entities/Tools/PageContent.php

index c60cf03113a9cceaa980f040325b5df8cacd0e8d..b1c750adbdd6a3a3645c75e91b0f44c9e940bc6c 100644 (file)

--- a/app/Entities/Tools/PageContent.php

+++ b/app/Entities/Tools/PageContent.php

@@ -12,6 +12,8 @@ use BookStack\Uploads\ImageRepo;

use BookStack\Uploads\ImageService;

use BookStack\Util\HtmlContentFilter;

use DOMDocument;

+use DOMElement;

+use DOMNode;

use DOMNodeList;

use DOMXPath;

use Illuminate\Support\Str;

@@ -107,15 +109,35 @@ class PageContent

/**

* Convert all inline base64 content to uploaded image files.

+ * Regex is used to locate the start of data-uri definitions then

+ * manual looping over content is done to parse the whole data uri.

+ * Attempting to capture the whole data uri using regex can cause PHP

+ * PCRE limits to be hit with larger, multi-MB, files.

protected function extractBase64ImagesFromMarkdown(string $markdown)

{

$matches = [];

- preg_match_all('/!\[.*?]\(.*?(data:image\/.*?)[)"\s]/', $markdown, $matches);

+ $contentLength = strlen($markdown);

+ $replacements = [];

+ preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);

+ foreach ($matches[1] as $base64MatchPair) {

+ [$dataUri, $index] = $base64MatchPair;

+ for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {

+ $char = $markdown[$i];

+ if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {

+ break;

+ }

+ $dataUri .= $char;

+ }

+ $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri);

+ $replacements[] = [$dataUri, $newUrl];

+ }

- foreach ($matches[1] as $base64Match) {

- $newUrl = $this->base64ImageUriToUploadedImageUrl($base64Match);

- $markdown = str_replace($base64Match, $newUrl, $markdown);

+ foreach ($replacements as [$dataUri, $newUrl]) {

+ $markdown = str_replace($dataUri, $newUrl, $markdown);

}

return $markdown;

@@ -217,6 +239,9 @@ class PageContent

$html .= $doc->saveHTML($childNode);

}

+ // Perform required string-level tweaks

+ $html = str_replace(' ', ' ', $html);

return $html;

}

@@ -237,9 +262,9 @@ class PageContent

* A map for existing ID's should be passed in to check for current existence.

* Returns a pair of strings in the format [old_id, new_id].

- protected function setUniqueId(\DOMNode $element, array &$idMap): array

+ protected function setUniqueId(DOMNode $element, array &$idMap): array

{

- if (!$element instanceof \DOMElement) {

+ if (!$element instanceof DOMElement) {

return ['', ''];

}

@@ -321,7 +346,7 @@ class PageContent

protected function headerNodesToLevelList(DOMNodeList $nodeList): array

{

- $tree = collect($nodeList)->map(function ($header) {

+ $tree = collect($nodeList)->map(function (DOMElement $header) {

$text = trim(str_replace("\xc2\xa0", '', $header->nodeValue));

$text = mb_substr($text, 0, 100);