use BookStack\Uploads\ImageService;
use BookStack\Util\HtmlContentFilter;
use DOMDocument;
+use DOMElement;
+use DOMNode;
use DOMNodeList;
use DOMXPath;
use Illuminate\Support\Str;
/**
* Convert all inline base64 content to uploaded image files.
+ * Regex is used to locate the start of data-uri definitions then
+ * manual looping over content is done to parse the whole data uri.
+ * Attempting to capture the whole data uri using regex can cause PHP
+ * PCRE limits to be hit with larger, multi-MB, files.
*/
protected function extractBase64ImagesFromMarkdown(string $markdown)
{
$matches = [];
- preg_match_all('/!\[.*?]\(.*?(data:image\/.*?)[)"\s]/', $markdown, $matches);
+ $contentLength = strlen($markdown);
+ $replacements = [];
+ preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);
+
+ foreach ($matches[1] as $base64MatchPair) {
+ [$dataUri, $index] = $base64MatchPair;
+
+ for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {
+ $char = $markdown[$i];
+ if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {
+ break;
+ }
+ $dataUri .= $char;
+ }
+
+ $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri);
+ $replacements[] = [$dataUri, $newUrl];
+ }
- foreach ($matches[1] as $base64Match) {
- $newUrl = $this->base64ImageUriToUploadedImageUrl($base64Match);
- $markdown = str_replace($base64Match, $newUrl, $markdown);
+ foreach ($replacements as [$dataUri, $newUrl]) {
+ $markdown = str_replace($dataUri, $newUrl, $markdown);
}
return $markdown;
/**
* Parse a base64 image URI into the data and extension.
*
- * @return array{extension: array, data: string}
+ * @return array{extension: string, data: string}
*/
protected function parseBase64ImageUri(string $uri): array
{
}
}
+ // Set ids on nested header nodes
+ $nestedHeaders = $xPath->query('//body//*//h1|//body//*//h2|//body//*//h3|//body//*//h4|//body//*//h5|//body//*//h6');
+ foreach ($nestedHeaders as $nestedHeader) {
+ [$oldId, $newId] = $this->setUniqueId($nestedHeader, $idMap);
+ if ($newId && $newId !== $oldId) {
+ $this->updateLinks($xPath, '#' . $oldId, '#' . $newId);
+ }
+ }
+
// Ensure no duplicate ids within child items
$idElems = $xPath->query('//body//*//*[@id]');
foreach ($idElems as $domElem) {
$html .= $doc->saveHTML($childNode);
}
+ // Perform required string-level tweaks
+ $html = str_replace(' ', ' ', $html);
+
return $html;
}
* A map for existing ID's should be passed in to check for current existence.
* Returns a pair of strings in the format [old_id, new_id].
*/
- protected function setUniqueId(\DOMNode $element, array &$idMap): array
+ protected function setUniqueId(DOMNode $element, array &$idMap): array
{
- if (get_class($element) !== 'DOMElement') {
+ if (!$element instanceof DOMElement) {
return ['', ''];
}
return [$existingId, $existingId];
}
- // Create an unique id for the element
+ // Create a unique id for the element
// Uses the content as a basis to ensure output is the same every time
// the same content is passed through.
$contentId = 'bkmrk-' . mb_substr(strtolower(preg_replace('/\s+/', '-', trim($element->nodeValue))), 0, 20);
*/
protected function headerNodesToLevelList(DOMNodeList $nodeList): array
{
- $tree = collect($nodeList)->map(function ($header) {
+ $tree = collect($nodeList)->map(function (DOMElement $header) {
$text = trim(str_replace("\xc2\xa0", '', $header->nodeValue));
$text = mb_substr($text, 0, 100);