diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index b95131fce..dbb62021c 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -109,15 +109,35 @@ class PageContent /** * Convert all inline base64 content to uploaded image files. + * Regex is used to locate the start of data-uri definitions then + * manual looping over content is done to parse the whole data uri. + * Attempting to capture the whole data uri using regex can cause PHP + * PCRE limits to be hit with larger, multi-MB, files. */ protected function extractBase64ImagesFromMarkdown(string $markdown) { $matches = []; - preg_match_all('/!\[.*?]\(.*?(data:image\/.*?)[)"\s]/', $markdown, $matches); + $contentLength = strlen($markdown); + $replacements = []; + preg_match_all('/!\[.*?]\(.*?(data:image\/.{1,6};base64,)/', $markdown, $matches, PREG_OFFSET_CAPTURE); - foreach ($matches[1] as $base64Match) { - $newUrl = $this->base64ImageUriToUploadedImageUrl($base64Match); - $markdown = str_replace($base64Match, $newUrl, $markdown); + foreach ($matches[1] as $base64MatchPair) { + [$dataUri, $index] = $base64MatchPair; + + for ($i = strlen($dataUri) + $index; $i < $contentLength; $i++) { + $char = $markdown[$i]; + if ($char === ')' || $char === ' ' || $char === "\n" || $char === '"') { + break; + } + $dataUri .= $char; + } + + $newUrl = $this->base64ImageUriToUploadedImageUrl($dataUri); + $replacements[] = [$dataUri, $newUrl]; + } + + foreach ($replacements as [$dataUri, $newUrl]) { + $markdown = str_replace($dataUri, $newUrl, $markdown); } return $markdown; diff --git a/tests/Entity/PageContentTest.php b/tests/Entity/PageContentTest.php index 9524186c8..cf1ecd84d 100644 --- a/tests/Entity/PageContentTest.php +++ b/tests/Entity/PageContentTest.php @@ -657,6 +657,39 @@ class PageContentTest extends TestCase $this->deleteImage($imagePath); } + public function test_markdown_base64_extract_not_limited_by_pcre_limits() + { + $pcreBacktrackLimit = ini_get("pcre.backtrack_limit"); + $pcreRecursionLimit = ini_get("pcre.recursion_limit"); + + $this->asEditor(); + $page = Page::query()->first(); + + ini_set("pcre.backtrack_limit", "500"); + ini_set("pcre.recursion_limit", "500"); + + $content = str_repeat('a', 5000); + $base64Content = base64_encode($content); + + $this->put($page->getUrl(), [ + 'name' => $page->name, 'summary' => '', + 'markdown' => 'test ![test](data:image/jpeg;base64,' . $base64Content . ') ![test](data:image/jpeg;base64,' . $base64Content . ')', + ]); + + $page->refresh(); + $this->assertStringMatchesFormat('test test test%A

%A', $page->html); + + $matches = []; + preg_match('/src="http:\/\/localhost(.*?)"/', $page->html, $matches); + $imagePath = $matches[1]; + $imageFile = public_path($imagePath); + $this->assertEquals($content, file_get_contents($imageFile)); + + $this->deleteImage($imagePath); + ini_set("pcre.backtrack_limit", $pcreBacktrackLimit); + ini_set("pcre.recursion_limit", $pcreRecursionLimit); + } + public function test_base64_images_within_markdown_blanked_if_not_supported_extension_for_extract() { $this->asEditor();