use BookStack\Facades\Theme;
use BookStack\Theming\ThemeEvents;
use BookStack\Uploads\ImageRepo;
+use BookStack\Uploads\ImageService;
use BookStack\Util\HtmlContentFilter;
use DOMDocument;
+use DOMElement;
+use DOMNode;
use DOMNodeList;
use DOMXPath;
use Illuminate\Support\Str;
$body = $container->childNodes->item(0);
$childNodes = $body->childNodes;
$xPath = new DOMXPath($doc);
- $imageRepo = app()->make(ImageRepo::class);
// Get all img elements with image data blobs
$imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]');
foreach ($imageNodes as $imageNode) {
$imageSrc = $imageNode->getAttribute('src');
- [$dataDefinition, $base64ImageData] = explode(',', $imageSrc, 2);
- $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? 'png');
-
- // Validate extension
- if (!$imageRepo->imageExtensionSupported($extension)) {
- $imageNode->setAttribute('src', '');
- continue;
- }
-
- // Save image from data with a random name
- $imageName = 'embedded-image-' . Str::random(8) . '.' . $extension;
-
- try {
- $image = $imageRepo->saveNewFromData($imageName, base64_decode($base64ImageData), 'gallery', $this->page->id);
- $imageNode->setAttribute('src', $image->url);
- } catch (ImageUploadException $exception) {
- $imageNode->setAttribute('src', '');
- }
+ $newUrl = $this->base64ImageUriToUploadedImageUrl($imageSrc);
+ $imageNode->setAttribute('src', $newUrl);
}
// Generate inner html as a string
/**
* Convert all inline base64 content to uploaded image files.
+ * Regex is used to locate the start of data-uri definitions then
+ * manual looping over content is done to parse the whole data uri.
+ * Attempting to capture the whole data uri using regex can cause PHP
+ * PCRE limits to be hit with larger, multi-MB, files.
*/
protected function extractBase64ImagesFromMarkdown(string $markdown)
{
- $imageRepo = app()->make(ImageRepo::class);
$matches = [];
- preg_match_all('/!\[.*?]\(.*?(data:image\/.*?)[)"\s]/', $markdown, $matches);
-
- foreach ($matches[1] as $base64Match) {
- [$dataDefinition, $base64ImageData] = explode(',', $base64Match, 2);
- $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? 'png');
-
- // Validate extension
- if (!$imageRepo->imageExtensionSupported($extension)) {
- $markdown = str_replace($base64Match, '', $markdown);
- continue;
+ $contentLength = strlen($markdown);
+ $replacements = [];
+ preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);
+
+ foreach ($matches[1] as $base64MatchPair) {
+ [$dataUri, $index] = $base64MatchPair;
+
+ for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {
+ $char = $markdown[$i];
+ if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {
+ break;
+ }
+ $dataUri .= $char;
}
- // Save image from data with a random name
- $imageName = 'embedded-image-' . Str::random(8) . '.' . $extension;
+ $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri);
+ $replacements[] = [$dataUri, $newUrl];
+ }
- try {
- $image = $imageRepo->saveNewFromData($imageName, base64_decode($base64ImageData), 'gallery', $this->page->id);
- $markdown = str_replace($base64Match, $image->url, $markdown);
- } catch (ImageUploadException $exception) {
- $markdown = str_replace($base64Match, '', $markdown);
- }
+ foreach ($replacements as [$dataUri, $newUrl]) {
+ $markdown = str_replace($dataUri, $newUrl, $markdown);
}
return $markdown;
}
+ /**
+ * Parse the given base64 image URI and return the URL to the created image instance.
+ * Returns an empty string if the parsed URI is invalid or causes an error upon upload.
+ */
+ protected function base64ImageUriToUploadedImageUrl(string $uri): string
+ {
+ $imageRepo = app()->make(ImageRepo::class);
+ $imageInfo = $this->parseBase64ImageUri($uri);
+
+ // Validate extension and content
+ if (empty($imageInfo['data']) || !ImageService::isExtensionSupported($imageInfo['extension'])) {
+ return '';
+ }
+
+ // Validate that the content is not over our upload limit
+ $uploadLimitBytes = (config('app.upload_limit') * 1000000);
+ if (strlen($imageInfo['data']) > $uploadLimitBytes) {
+ return '';
+ }
+
+ // Save image from data with a random name
+ $imageName = 'embedded-image-' . Str::random(8) . '.' . $imageInfo['extension'];
+
+ try {
+ $image = $imageRepo->saveNewFromData($imageName, $imageInfo['data'], 'gallery', $this->page->id);
+ } catch (ImageUploadException $exception) {
+ return '';
+ }
+
+ return $image->url;
+ }
+
+ /**
+ * Parse a base64 image URI into the data and extension.
+ *
+ * @return array{extension: string, data: string}
+ */
+ protected function parseBase64ImageUri(string $uri): array
+ {
+ [$dataDefinition, $base64ImageData] = explode(',', $uri, 2);
+ $extension = strtolower(preg_split('/[\/;]/', $dataDefinition)[1] ?? '');
+
+ return [
+ 'extension' => $extension,
+ 'data' => base64_decode($base64ImageData) ?: '',
+ ];
+ }
+
/**
* Formats a page's html to be tagged correctly within the system.
*/
}
}
+ // Set ids on nested header nodes
+ $nestedHeaders = $xPath->query('//body//*//h1|//body//*//h2|//body//*//h3|//body//*//h4|//body//*//h5|//body//*//h6');
+ foreach ($nestedHeaders as $nestedHeader) {
+ [$oldId, $newId] = $this->setUniqueId($nestedHeader, $idMap);
+ if ($newId && $newId !== $oldId) {
+ $this->updateLinks($xPath, '#' . $oldId, '#' . $newId);
+ }
+ }
+
// Ensure no duplicate ids within child items
$idElems = $xPath->query('//body//*//*[@id]');
foreach ($idElems as $domElem) {
$html .= $doc->saveHTML($childNode);
}
+ // Perform required string-level tweaks
+ $html = str_replace(' ', ' ', $html);
+
return $html;
}
* A map for existing ID's should be passed in to check for current existence.
* Returns a pair of strings in the format [old_id, new_id].
*/
- protected function setUniqueId(\DOMNode $element, array &$idMap): array
+ protected function setUniqueId(DOMNode $element, array &$idMap): array
{
- if (get_class($element) !== 'DOMElement') {
+ if (!$element instanceof DOMElement) {
return ['', ''];
}
return [$existingId, $existingId];
}
- // Create an unique id for the element
+ // Create a unique id for the element
// Uses the content as a basis to ensure output is the same every time
// the same content is passed through.
$contentId = 'bkmrk-' . mb_substr(strtolower(preg_replace('/\s+/', '-', trim($element->nodeValue))), 0, 20);
*/
protected function headerNodesToLevelList(DOMNodeList $nodeList): array
{
- $tree = collect($nodeList)->map(function ($header) {
+ $tree = collect($nodeList)->map(function (DOMElement $header) {
$text = trim(str_replace("\xc2\xa0", '', $header->nodeValue));
$text = mb_substr($text, 0, 100);
*/
protected function fetchSectionOfPage(Page $page, string $sectionId): string
{
- $topLevelTags = ['table', 'ul', 'ol'];
+ $topLevelTags = ['table', 'ul', 'ol', 'pre'];
$doc = $this->loadDocumentFromHtml($page->html);
// Search included content for the id given and blank out if not exists.