| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | <?php | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace BookStack\Entities\Tools; | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 |  |  | use BookStack\Entities\Models\Book; | 
					
						
							|  |  |  | use BookStack\Entities\Models\Chapter; | 
					
						
							|  |  |  | use BookStack\Entities\Models\Page; | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  | use BookStack\Entities\Tools\Markdown\HtmlToMarkdown; | 
					
						
							| 
									
										
										
										
											2018-09-25 19:30:50 +08:00
										 |  |  | use BookStack\Uploads\ImageService; | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  | use DOMDocument; | 
					
						
							|  |  |  | use DOMElement; | 
					
						
							|  |  |  | use DOMXPath; | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  | use Exception; | 
					
						
							|  |  |  | use Throwable; | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 |  |  | class ExportFormatter | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2018-04-22 19:23:43 +08:00
										 |  |  |     protected $imageService; | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |     protected $pdfGenerator; | 
					
						
							| 
									
										
										
										
											2017-01-21 21:53:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * ExportService constructor. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |     public function __construct(ImageService $imageService, PdfGenerator $pdfGenerator) | 
					
						
							| 
									
										
										
										
											2017-01-21 21:53:00 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2018-04-22 19:23:43 +08:00
										 |  |  |         $this->imageService = $imageService; | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |         $this->pdfGenerator = $pdfGenerator; | 
					
						
							| 
									
										
										
										
											2017-01-21 21:53:00 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Convert a page to a self-contained HTML file. | 
					
						
							|  |  |  |      * Includes required CSS & image content. Images are base64 encoded into the HTML. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |      */ | 
					
						
							|  |  |  |     public function pageToContainedHtml(Page $page) | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $page->html = (new PageContent($page))->render(); | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |         $pageHtml = view('pages.export', [ | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'page'   => $page, | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |             'format' => 'html', | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         return $this->containHtml($pageHtml); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Convert a chapter to a self-contained HTML file. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |      */ | 
					
						
							|  |  |  |     public function chapterToContainedHtml(Chapter $chapter) | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $pages = $chapter->getVisiblePages(); | 
					
						
							| 
									
										
										
										
											2018-01-29 00:58:52 +08:00
										 |  |  |         $pages->each(function ($page) { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |             $page->html = (new PageContent($page))->render(); | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         }); | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |         $html = view('chapters.export', [ | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |             'chapter' => $chapter, | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'pages'   => $pages, | 
					
						
							|  |  |  |             'format'  => 'html', | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         return $this->containHtml($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     /** | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |      * Convert a book to a self-contained HTML file. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |      */ | 
					
						
							|  |  |  |     public function bookToContainedHtml(Book $book) | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $bookTree = (new BookContents($book))->getTree(false, true); | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |         $html = view('books.export', [ | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'book'         => $book, | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |             'bookChildren' => $bookTree, | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'format'       => 'html', | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         return $this->containHtml($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Convert a page to a PDF file. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |      */ | 
					
						
							|  |  |  |     public function pageToPdf(Page $page) | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $page->html = (new PageContent($page))->render(); | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |         $html = view('pages.export', [ | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'page'   => $page, | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |             'format' => 'pdf', | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         return $this->htmlToPdf($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Convert a chapter to a PDF file. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |      */ | 
					
						
							|  |  |  |     public function chapterToPdf(Chapter $chapter) | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $pages = $chapter->getVisiblePages(); | 
					
						
							| 
									
										
										
										
											2018-01-29 00:58:52 +08:00
										 |  |  |         $pages->each(function ($page) { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |             $page->html = (new PageContent($page))->render(); | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         }); | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |         $html = view('chapters.export', [ | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |             'chapter' => $chapter, | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'pages'   => $pages, | 
					
						
							|  |  |  |             'format'  => 'pdf', | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         return $this->htmlToPdf($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     /** | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * Convert a book to a PDF file. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Throwable | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |      */ | 
					
						
							|  |  |  |     public function bookToPdf(Book $book) | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $bookTree = (new BookContents($book))->getTree(false, true); | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |         $html = view('books.export', [ | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'book'         => $book, | 
					
						
							| 
									
										
										
										
											2020-02-15 23:34:06 +08:00
										 |  |  |             'bookChildren' => $bookTree, | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             'format'       => 'pdf', | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         ])->render(); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         return $this->htmlToPdf($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * Convert normal web-page HTML to a PDF. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Exception | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |     protected function htmlToPdf(string $html): string | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |         $html = $this->containHtml($html); | 
					
						
							|  |  |  |         $html = $this->replaceIframesWithLinks($html); | 
					
						
							| 
									
										
										
										
											2021-11-29 05:01:35 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |         return $this->pdfGenerator->fromHtml($html); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Within the given HTML content, replace any iframe elements | 
					
						
							|  |  |  |      * with anchor links within paragraph blocks. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function replaceIframesWithLinks(string $html): string | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         libxml_use_internal_errors(true); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $doc = new DOMDocument(); | 
					
						
							|  |  |  |         $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); | 
					
						
							|  |  |  |         $xPath = new DOMXPath($doc); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $iframes = $xPath->query('//iframe'); | 
					
						
							|  |  |  |         /** @var DOMElement $iframe */ | 
					
						
							|  |  |  |         foreach ($iframes as $iframe) { | 
					
						
							|  |  |  |             $link = $iframe->getAttribute('src'); | 
					
						
							|  |  |  |             if (strpos($link, '//') === 0) { | 
					
						
							|  |  |  |                 $link = 'https:' . $link; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             $anchor = $doc->createElement('a', $link); | 
					
						
							|  |  |  |             $anchor->setAttribute('href', $link); | 
					
						
							|  |  |  |             $paragraph = $doc->createElement('p'); | 
					
						
							|  |  |  |             $paragraph->appendChild($anchor); | 
					
						
							| 
									
										
										
										
											2021-11-29 05:01:35 +08:00
										 |  |  |             $iframe->parentNode->replaceChild($paragraph, $iframe); | 
					
						
							| 
									
										
										
										
											2017-01-01 20:20:30 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-25 23:12:32 +08:00
										 |  |  |         return $doc->saveHTML(); | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Bundle of the contents of a html file to be self-contained. | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |      * @throws Exception | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |     protected function containHtml(string $htmlContent): string | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |         $imageTagsOutput = []; | 
					
						
							| 
									
										
										
										
											2020-12-07 06:23:21 +08:00
										 |  |  |         preg_match_all("/\<img.*?src\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $imageTagsOutput); | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         // Replace image src with base64 encoded image strings
 | 
					
						
							|  |  |  |         if (isset($imageTagsOutput[0]) && count($imageTagsOutput[0]) > 0) { | 
					
						
							|  |  |  |             foreach ($imageTagsOutput[0] as $index => $imgMatch) { | 
					
						
							| 
									
										
										
										
											2018-04-22 19:23:43 +08:00
										 |  |  |                 $oldImgTagString = $imgMatch; | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |                 $srcString = $imageTagsOutput[2][$index]; | 
					
						
							| 
									
										
										
										
											2018-04-22 19:23:43 +08:00
										 |  |  |                 $imageEncoded = $this->imageService->imageUriToBase64($srcString); | 
					
						
							|  |  |  |                 if ($imageEncoded === null) { | 
					
						
							|  |  |  |                     $imageEncoded = $srcString; | 
					
						
							| 
									
										
										
										
											2017-01-21 21:53:00 +08:00
										 |  |  |                 } | 
					
						
							| 
									
										
										
										
											2018-04-22 19:23:43 +08:00
										 |  |  |                 $newImgTagString = str_replace($srcString, $imageEncoded, $oldImgTagString); | 
					
						
							|  |  |  |                 $htmlContent = str_replace($oldImgTagString, $newImgTagString, $htmlContent); | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $linksOutput = []; | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         preg_match_all("/\<a.*href\=(\'|\")(.*?)(\'|\").*?\>/i", $htmlContent, $linksOutput); | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         // Replace image src with base64 encoded image strings
 | 
					
						
							|  |  |  |         if (isset($linksOutput[0]) && count($linksOutput[0]) > 0) { | 
					
						
							|  |  |  |             foreach ($linksOutput[0] as $index => $linkMatch) { | 
					
						
							|  |  |  |                 $oldLinkString = $linkMatch; | 
					
						
							|  |  |  |                 $srcString = $linksOutput[2][$index]; | 
					
						
							|  |  |  |                 if (strpos(trim($srcString), 'http') !== 0) { | 
					
						
							|  |  |  |                     $newSrcString = url($srcString); | 
					
						
							|  |  |  |                     $newLinkString = str_replace($srcString, $newSrcString, $oldLinkString); | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |                     $htmlContent = str_replace($oldLinkString, $newLinkString, $htmlContent); | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Replace any relative links with system domain
 | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         return $htmlContent; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Converts the page contents into simple plain text. | 
					
						
							| 
									
										
										
										
											2017-01-21 21:53:00 +08:00
										 |  |  |      * This method filters any bad looking content to provide a nice final output. | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |     public function pageToPlainText(Page $page): string | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |         $html = (new PageContent($page))->render(); | 
					
						
							| 
									
										
										
										
											2017-01-21 21:53:00 +08:00
										 |  |  |         $text = strip_tags($html); | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         // Replace multiple spaces with single spaces
 | 
					
						
							|  |  |  |         $text = preg_replace('/\ {2,}/', ' ', $text); | 
					
						
							|  |  |  |         // Reduce multiple horrid whitespace characters.
 | 
					
						
							|  |  |  |         $text = preg_replace('/(\x0A|\xA0|\x0A|\r|\n){2,}/su', "\n\n", $text); | 
					
						
							|  |  |  |         $text = html_entity_decode($text); | 
					
						
							|  |  |  |         // Add title
 | 
					
						
							|  |  |  |         $text = $page->name . "\n\n" . $text; | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  |         return $text; | 
					
						
							| 
									
										
										
										
											2016-01-21 06:13:13 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Convert a chapter into a plain text string. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |     public function chapterToPlainText(Chapter $chapter): string | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |     { | 
					
						
							|  |  |  |         $text = $chapter->name . "\n\n"; | 
					
						
							|  |  |  |         $text .= $chapter->description . "\n\n"; | 
					
						
							| 
									
										
										
										
											2020-12-18 21:56:00 +08:00
										 |  |  |         foreach ($chapter->getVisiblePages() as $page) { | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |             $text .= $this->pageToPlainText($page); | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |         return $text; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Convert a book into a plain text string. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2019-10-05 19:55:01 +08:00
										 |  |  |     public function bookToPlainText(Book $book): string | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2020-12-18 21:56:00 +08:00
										 |  |  |         $bookTree = (new BookContents($book))->getTree(false, false); | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         $text = $book->name . "\n\n"; | 
					
						
							|  |  |  |         foreach ($bookTree as $bookChild) { | 
					
						
							|  |  |  |             if ($bookChild->isA('chapter')) { | 
					
						
							| 
									
										
										
										
											2017-02-26 22:25:02 +08:00
										 |  |  |                 $text .= $this->chapterToPlainText($bookChild); | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |             } else { | 
					
						
							|  |  |  |                 $text .= $this->pageToPlainText($bookChild); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-26 21:26:51 +08:00
										 |  |  |         return $text; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Convert a page to a Markdown file. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  |     public function pageToMarkdown(Page $page): string | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  |         if ($page->markdown) { | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             return '# ' . $page->name . "\n\n" . $page->markdown; | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |         return '# ' . $page->name . "\n\n" . (new HtmlToMarkdown($page->html))->convert(); | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Convert a chapter to a Markdown file. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  |     public function chapterToMarkdown(Chapter $chapter): string | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |         $text = '# ' . $chapter->name . "\n\n"; | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |         $text .= $chapter->description . "\n\n"; | 
					
						
							|  |  |  |         foreach ($chapter->pages as $page) { | 
					
						
							| 
									
										
										
										
											2021-06-23 04:32:55 +08:00
										 |  |  |             $text .= $this->pageToMarkdown($page) . "\n\n"; | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |         return $text; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Convert a book into a plain text string. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function bookToMarkdown(Book $book): string | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $bookTree = (new BookContents($book))->getTree(false, true); | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |         $text = '# ' . $book->name . "\n\n"; | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |         foreach ($bookTree as $bookChild) { | 
					
						
							| 
									
										
										
										
											2021-06-23 04:02:18 +08:00
										 |  |  |             if ($bookChild instanceof Chapter) { | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |                 $text .= $this->chapterToMarkdown($bookChild); | 
					
						
							|  |  |  |             } else { | 
					
						
							|  |  |  |                 $text .= $this->pageToMarkdown($bookChild); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-13 12:12:26 +08:00
										 |  |  |         return $text; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-02-01 01:53:30 +08:00
										 |  |  | } |