/**
* Convert all inline base64 content to uploaded image files.
+ * Regex is used to locate the start of data-uri definitions then
+ * manual looping over content is done to parse the whole data uri.
+ * Attempting to capture the whole data uri using regex can cause PHP
+ * PCRE limits to be hit with larger, multi-MB, files.
*/
protected function extractBase64ImagesFromMarkdown(string $markdown)
{
$matches = [];
- preg_match_all('/!\[.*?]\(.*?(data:image\/.*?)[)"\s]/', $markdown, $matches);
+ $contentLength = strlen($markdown);
+ $replacements = [];
+ preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE);
+
+ foreach ($matches[1] as $base64MatchPair) {
+ [$dataUri, $index] = $base64MatchPair;
+
+ for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) {
+ $char = $markdown[$i];
+ if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') {
+ break;
+ }
+ $dataUri .= $char;
+ }
+
+ $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri);
+ $replacements[] = [$dataUri, $newUrl];
+ }
- foreach ($matches[1] as $base64Match) {
- $newUrl = $this->base64ImageUriToUploadedImageUrl($base64Match);
- $markdown = str_replace($base64Match, $newUrl, $markdown);
+ foreach ($replacements as [$dataUri, $newUrl]) {
+ $markdown = str_replace($dataUri, $newUrl, $markdown);
}
return $markdown;
$this->deleteImage($imagePath);
}
+ public function test_markdown_base64_extract_not_limited_by_pcre_limits()
+ {
+ $pcreBacktrackLimit = ini_get("pcre.backtrack_limit");
+ $pcreRecursionLimit = ini_get("pcre.recursion_limit");
+
+ $this->asEditor();
+ $page = Page::query()->first();
+
+ ini_set("pcre.backtrack_limit", "500");
+ ini_set("pcre.recursion_limit", "500");
+
+ $content = str_repeat('a', 5000);
+ $base64Content = base64_encode($content);
+
+ $this->put($page->getUrl(), [
+ 'name' => $page->name, 'summary' => '',
+ 'markdown' => 'test  ',
+ ]);
+
+ $page->refresh();
+ $this->assertStringMatchesFormat('<p%A>test <img src="http://localhost/uploads/images/gallery/%A.jpeg" alt="test"> <img src="http://localhost/uploads/images/gallery/%A.jpeg" alt="test">%A</p>%A', $page->html);
+
+ $matches = [];
+ preg_match('/src="https:\/\/p.rizon.top:443\/http\/localhost(.*?)"/', $page->html, $matches);
+ $imagePath = $matches[1];
+ $imageFile = public_path($imagePath);
+ $this->assertEquals($content, file_get_contents($imageFile));
+
+ $this->deleteImage($imagePath);
+ ini_set("pcre.backtrack_limit", $pcreBacktrackLimit);
+ ini_set("pcre.recursion_limit", $pcreRecursionLimit);
+ }
+
public function test_base64_images_within_markdown_blanked_if_not_supported_extension_for_extract()
{
$this->asEditor();