| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | <?php | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace BookStack\Entities\Tools; | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 |  |  | use BookStack\Entities\Models\Book; | 
					
						
							|  |  |  | use BookStack\Entities\Models\Chapter; | 
					
						
							|  |  |  | use BookStack\Entities\Models\Page; | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  | use BookStack\Entities\Tools\Markdown\HtmlToMarkdown; | 
					
						
							| 
									
										
										
										
											2018-09-25 19:30:50 +08:00
										 |  |  | use BookStack\Uploads\ImageService; | 
					
						
							| 
									
										
										
										
											2022-03-07 22:27:41 +08:00
										 |  |  | use BookStack\Util\CspService; | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  | use DOMDocument; | 
					
						
							|  |  |  | use DOMElement; | 
					
						
							|  |  |  | use DOMXPath; | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  | use Exception; | 
					
						
							|  |  |  | use Throwable; | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 |  |  | class ExportFormatter | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |     public function __construct( | 
					
						
							|  |  |  |         protected ImageService $imageService, | 
					
						
							|  |  |  |         protected PdfGenerator $pdfGenerator, | 
					
						
							|  |  |  |         protected CspService $cspService | 
					
						
							|  |  |  |     ) { | 
					
						
							| 
									
										
										
										
											2017-01-21 21:53:00 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Convert a page to a self-contained HTML file. | 
					
						
							|  |  |  |      * Includes required CSS & image content. Images are base64 encoded into the HTML. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |     public function pageToContainedHtml(Page $page): string | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $page->html = (new PageContent($page))->render(); | 
					
						
							| 
									
										
										
										
											2022-06-09 00:56:59 +08:00
										 |  |  |         $pageHtml = view('exports.page', [ | 
					
						
							| 
									
										
										
										
											2022-03-07 22:27:41 +08:00
										 |  |  |             'page'       => $page, | 
					
						
							|  |  |  |             'format'     => 'html', | 
					
						
							|  |  |  |             'cspContent' => $this->cspService->getCspMetaTagValue(), | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |             'locale'     => user()->getLocale(), | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         return $this->containHtml($pageHtml); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Convert a chapter to a self-contained HTML file. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |     public function chapterToContainedHtml(Chapter $chapter): string | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $pages = $chapter->getVisiblePages(); | 
					
						
							| 
									
										
										
										
											2018-01-29 00:58:52 +08:00
										 |  |  |         $pages->each(function ($page) { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |             $page->html = (new PageContent($page))->render(); | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         }); | 
					
						
							| 
									
										
										
										
											2022-06-09 00:56:59 +08:00
										 |  |  |         $html = view('exports.chapter', [ | 
					
						
							| 
									
										
										
										
											2022-03-07 22:27:41 +08:00
										 |  |  |             'chapter'    => $chapter, | 
					
						
							|  |  |  |             'pages'      => $pages, | 
					
						
							|  |  |  |             'format'     => 'html', | 
					
						
							|  |  |  |             'cspContent' => $this->cspService->getCspMetaTagValue(), | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |             'locale'     => user()->getLocale(), | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         return $this->containHtml($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     /** | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |      * Convert a book to a self-contained HTML file. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |     public function bookToContainedHtml(Book $book): string | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $bookTree = (new BookContents($book))->getTree(false, true); | 
					
						
							| 
									
										
										
										
											2022-06-09 00:56:59 +08:00
										 |  |  |         $html = view('exports.book', [ | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'book'         => $book, | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |             'bookChildren' => $bookTree, | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'format'       => 'html', | 
					
						
							| 
									
										
										
										
											2022-03-07 22:27:41 +08:00
										 |  |  |             'cspContent'   => $this->cspService->getCspMetaTagValue(), | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |             'locale'       => user()->getLocale(), | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         return $this->containHtml($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Convert a page to a PDF file. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |     public function pageToPdf(Page $page): string | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $page->html = (new PageContent($page))->render(); | 
					
						
							| 
									
										
										
										
											2022-06-09 00:56:59 +08:00
										 |  |  |         $html = view('exports.page', [ | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'page'   => $page, | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |             'format' => 'pdf', | 
					
						
							| 
									
										
										
										
											2022-01-25 01:24:00 +08:00
										 |  |  |             'engine' => $this->pdfGenerator->getActiveEngine(), | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |             'locale' => user()->getLocale(), | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         return $this->htmlToPdf($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Convert a chapter to a PDF file. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |     public function chapterToPdf(Chapter $chapter): string | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $pages = $chapter->getVisiblePages(); | 
					
						
							| 
									
										
										
										
											2018-01-29 00:58:52 +08:00
										 |  |  |         $pages->each(function ($page) { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |             $page->html = (new PageContent($page))->render(); | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         }); | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-06-09 00:56:59 +08:00
										 |  |  |         $html = view('exports.chapter', [ | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |             'chapter' => $chapter, | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'pages'   => $pages, | 
					
						
							|  |  |  |             'format'  => 'pdf', | 
					
						
							| 
									
										
										
										
											2022-01-25 01:24:00 +08:00
										 |  |  |             'engine'  => $this->pdfGenerator->getActiveEngine(), | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |             'locale'  => user()->getLocale(), | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         return $this->htmlToPdf($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     /** | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * Convert a book to a PDF file. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |     public function bookToPdf(Book $book): string | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $bookTree = (new BookContents($book))->getTree(false, true); | 
					
						
							| 
									
										
										
										
											2022-06-09 00:56:59 +08:00
										 |  |  |         $html = view('exports.book', [ | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'book'         => $book, | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |             'bookChildren' => $bookTree, | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'format'       => 'pdf', | 
					
						
							| 
									
										
										
										
											2022-01-25 01:24:00 +08:00
										 |  |  |             'engine'       => $this->pdfGenerator->getActiveEngine(), | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |             'locale'       => user()->getLocale(), | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         return $this->htmlToPdf($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * Convert normal web-page HTML to a PDF. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Exception | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |     protected function htmlToPdf(string $html): string | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |         $html = $this->containHtml($html); | 
					
						
							|  |  |  |         $html = $this->replaceIframesWithLinks($html); | 
					
						
							| 
									
										
										
										
											2022-02-09 19:33:23 +08:00
										 |  |  |         $html = $this->openDetailElements($html); | 
					
						
							| 
									
										
										
										
											2021-11-29 05:01:35 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |         return $this->pdfGenerator->fromHtml($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-09 19:33:23 +08:00
										 |  |  |     /** | 
					
						
							| 
									
										
										
										
											2022-02-13 21:16:43 +08:00
										 |  |  |      * Within the given HTML content, Open any detail blocks. | 
					
						
							| 
									
										
										
										
											2022-02-09 19:33:23 +08:00
										 |  |  |      */ | 
					
						
							|  |  |  |     protected function openDetailElements(string $html): string | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         libxml_use_internal_errors(true); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $doc = new DOMDocument(); | 
					
						
							|  |  |  |         $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); | 
					
						
							|  |  |  |         $xPath = new DOMXPath($doc); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $details = $xPath->query('//details'); | 
					
						
							|  |  |  |         /** @var DOMElement $detail */ | 
					
						
							|  |  |  |         foreach ($details as $detail) { | 
					
						
							|  |  |  |             $detail->setAttribute('open', 'open'); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $doc->saveHTML(); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Within the given HTML content, replace any iframe elements | 
					
						
							|  |  |  |      * with anchor links within paragraph blocks. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function replaceIframesWithLinks(string $html): string | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         libxml_use_internal_errors(true); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $doc = new DOMDocument(); | 
					
						
							|  |  |  |         $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); | 
					
						
							|  |  |  |         $xPath = new DOMXPath($doc); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $iframes = $xPath->query('//iframe'); | 
					
						
							|  |  |  |         /** @var DOMElement $iframe */ | 
					
						
							|  |  |  |         foreach ($iframes as $iframe) { | 
					
						
							|  |  |  |             $link = $iframe->getAttribute('src'); | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |             if (str_starts_with($link, '//')) { | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |                 $link = 'https:' . $link; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             $anchor = $doc->createElement('a', $link); | 
					
						
							|  |  |  |             $anchor->setAttribute('href', $link); | 
					
						
							|  |  |  |             $paragraph = $doc->createElement('p'); | 
					
						
							|  |  |  |             $paragraph->appendChild($anchor); | 
					
						
							| 
									
										
										
										
											2021-11-29 05:01:35 +08:00
										 |  |  |             $iframe->parentNode->replaceChild($paragraph, $iframe); | 
					
						
							| 
									
										
										
										
											2017-01-01 20:20:30 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |         return $doc->saveHTML(); | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Bundle of the contents of a html file to be self-contained. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Exception | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |     protected function containHtml(string $htmlContent): string | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |         $imageTagsOutput = []; | 
					
						
							| 
									
										
										
										
											2020-12-07 06:23:21 +08:00
										 |  |  |         preg_match_all("/\<img.*?src\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $imageTagsOutput); | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         // Replace image src with base64 encoded image strings
 | 
					
						
							|  |  |  |         if (isset($imageTagsOutput[0]) && count($imageTagsOutput[0]) > 0) { | 
					
						
							|  |  |  |             foreach ($imageTagsOutput[0] as $index => $imgMatch) { | 
					
						
							| 
									
										
										
										
											2018-04-22 19:23:43 +08:00
										 |  |  |                 $oldImgTagString = $imgMatch; | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |                 $srcString = $imageTagsOutput[2][$index]; | 
					
						
							| 
									
										
										
										
											2023-10-01 01:28:42 +08:00
										 |  |  |                 $imageEncoded = $this->imageService->imageUrlToBase64($srcString); | 
					
						
							| 
									
										
										
										
											2018-04-22 19:23:43 +08:00
										 |  |  |                 if ($imageEncoded === null) { | 
					
						
							|  |  |  |                     $imageEncoded = $srcString; | 
					
						
							| 
									
										
										
										
											2017-01-21 21:53:00 +08:00
										 |  |  |                 } | 
					
						
							| 
									
										
										
										
											2018-04-22 19:23:43 +08:00
										 |  |  |                 $newImgTagString = str_replace($srcString, $imageEncoded, $oldImgTagString); | 
					
						
							|  |  |  |                 $htmlContent = str_replace($oldImgTagString, $newImgTagString, $htmlContent); | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $linksOutput = []; | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         preg_match_all("/\<a.*href\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $linksOutput); | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-02 21:21:43 +08:00
										 |  |  |         // Update relative links to be absolute, with instance url
 | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |         if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) { | 
					
						
							|  |  |  |             foreach ($linksOutput[0] as $index => $linkMatch) { | 
					
						
							|  |  |  |                 $oldLinkString = $linkMatch; | 
					
						
							|  |  |  |                 $srcString = $linksOutput[2][$index]; | 
					
						
							| 
									
										
										
										
											2023-09-18 00:35:00 +08:00
										 |  |  |                 if (!str_starts_with(trim($srcString), 'http')) { | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |                     $newSrcString = url($srcString); | 
					
						
							|  |  |  |                     $newLinkString = str_replace($srcString, $newSrcString, $oldLinkString); | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |                     $htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent); | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         return $htmlContent; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Converts the page contents into simple plain text. | 
					
						
							| 
									
										
										
										
											2017-01-21 21:53:00 +08:00
										 |  |  |      * This method filters any bad looking content to provide a nice final output. | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |     public function pageToPlainText(Page $page, bool $pageRendered = false, bool $fromParent = false): string | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |         $html = $pageRendered ? $page->html : (new PageContent($page))->render(); | 
					
						
							|  |  |  |         // Add proceeding spaces before tags so spaces remain between
 | 
					
						
							|  |  |  |         // text within elements after stripping tags.
 | 
					
						
							|  |  |  |         $html = str_replace('<', " <", $html); | 
					
						
							|  |  |  |         $text = trim(strip_tags($html)); | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         // Replace multiple spaces with single spaces
 | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |         $text = preg_replace('/ {2,}/', ' ', $text); | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         // Reduce multiple horrid whitespace characters.
 | 
					
						
							|  |  |  |         $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text); | 
					
						
							|  |  |  |         $text = html_entity_decode($text); | 
					
						
							|  |  |  |         // Add title
 | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |         $text = $page->name . ($fromParent ? "\n" : "\n\n") . $text; | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         return $text; | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Convert a chapter into a plain text string. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |     public function chapterToPlainText(Chapter $chapter): string | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |         $text = $chapter->name . "\n" . $chapter->description; | 
					
						
							|  |  |  |         $text = trim($text) . "\n\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $parts = []; | 
					
						
							| 
									
										
										
										
											2020-12-18 21:56:00 +08:00
										 |  |  |         foreach ($chapter->getVisiblePages() as $page) { | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |             $parts[] = $this->pageToPlainText($page, false, true); | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |         return $text . implode("\n\n", $parts); | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Convert a book into a plain text string. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |     public function bookToPlainText(Book $book): string | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |         $bookTree = (new BookContents($book))->getTree(false, true); | 
					
						
							|  |  |  |         $text = $book->name . "\n" . $book->description; | 
					
						
							|  |  |  |         $text = rtrim($text) . "\n\n"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $parts = []; | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         foreach ($bookTree as $bookChild) { | 
					
						
							|  |  |  |             if ($bookChild->isA('chapter')) { | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |                 $parts[] = $this->chapterToPlainText($bookChild); | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |             } else { | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |                 $parts[] = $this->pageToPlainText($bookChild, true, true); | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-25 01:03:37 +08:00
										 |  |  |         return $text . implode("\n\n", $parts); | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Convert a page to a Markdown file. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  |     public function pageToMarkdown(Page $page): string | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  |         if ($page->markdown) { | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             return '# ' . $page->name . "\n\n" . $page->markdown; | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |         return '# ' . $page->name . "\n\n" . (new HtmlToMarkdown($page->html))->convert(); | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Convert a chapter to a Markdown file. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  |     public function chapterToMarkdown(Chapter $chapter): string | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |         $text = '# ' . $chapter->name . "\n\n"; | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |         $text .= $chapter->description . "\n\n"; | 
					
						
							|  |  |  |         foreach ($chapter->pages as $page) { | 
					
						
							| 
									
										
										
										
											2021-06-23 04:32:55 +08:00
										 |  |  |             $text .= $this->pageToMarkdown($page) . "\n\n"; | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-03-23 22:31:42 +08:00
										 |  |  |         return trim($text); | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Convert a book into a plain text string. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function bookToMarkdown(Book $book): string | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $bookTree = (new BookContents($book))->getTree(false, true); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |         $text = '# ' . $book->name . "\n\n"; | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |         foreach ($bookTree as $bookChild) { | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  |             if ($bookChild instanceof Chapter) { | 
					
						
							| 
									
										
										
										
											2022-03-23 22:31:42 +08:00
										 |  |  |                 $text .= $this->chapterToMarkdown($bookChild) . "\n\n"; | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |             } else { | 
					
						
							| 
									
										
										
										
											2022-03-23 22:31:42 +08:00
										 |  |  |                 $text .= $this->pageToMarkdown($bookChild) . "\n\n"; | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-03-23 22:31:42 +08:00
										 |  |  |         return trim($text); | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  | } |