| 
									
										
										
										
											2023-11-14 23:46:32 +08:00
										 |  |  | <?php | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace BookStack\Util; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use DOMDocument; | 
					
						
							|  |  |  | use DOMElement; | 
					
						
							|  |  |  | use DOMNode; | 
					
						
							|  |  |  | use DOMNodeList; | 
					
						
							| 
									
										
										
										
											2024-11-29 00:30:59 +08:00
										 |  |  | use DOMText; | 
					
						
							| 
									
										
										
										
											2023-11-14 23:46:32 +08:00
										 |  |  | use DOMXPath; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /** | 
					
						
							|  |  |  |  * HtmlDocument is a thin wrapper around DOMDocument built | 
					
						
							|  |  |  |  * specifically for loading, querying and generating HTML content. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | class HtmlDocument | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     protected DOMDocument $document; | 
					
						
							|  |  |  |     protected ?DOMXPath $xpath = null; | 
					
						
							|  |  |  |     protected int $loadOptions; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     public function __construct(string $partialHtml = '', int $loadOptions = 0) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         libxml_use_internal_errors(true); | 
					
						
							|  |  |  |         $this->document = new DOMDocument(); | 
					
						
							|  |  |  |         $this->loadOptions = $loadOptions; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if ($partialHtml) { | 
					
						
							|  |  |  |             $this->loadPartialHtml($partialHtml); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Load some HTML content that's part of a document (e.g. body content) | 
					
						
							|  |  |  |      * into the current document. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function loadPartialHtml(string $html): void | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>'; | 
					
						
							|  |  |  |         $this->document->loadHTML($html, $this->loadOptions); | 
					
						
							|  |  |  |         $this->xpath = null; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Load a complete page of HTML content into the document. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function loadCompleteHtml(string $html): void | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $html = '<?xml encoding="utf-8" ?>' . $html; | 
					
						
							|  |  |  |         $this->document->loadHTML($html, $this->loadOptions); | 
					
						
							|  |  |  |         $this->xpath = null; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Start an XPath query on the current document. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function queryXPath(string $expression): DOMNodeList | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         if (is_null($this->xpath)) { | 
					
						
							|  |  |  |             $this->xpath = new DOMXPath($this->document); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $result = $this->xpath->query($expression); | 
					
						
							|  |  |  |         if ($result === false) { | 
					
						
							|  |  |  |             throw new \InvalidArgumentException("XPath query for expression [$expression] failed to execute"); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $result; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Create a new DOMElement instance within the document. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function createElement(string $localName, string $value = ''): DOMElement | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $element = $this->document->createElement($localName, $value); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if ($element === false) { | 
					
						
							|  |  |  |             throw new \InvalidArgumentException("Failed to create element of name [$localName] and value [$value]"); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $element; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-11-29 00:30:59 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Create a new text node within this document. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function createTextNode(string $text): DOMText | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         return $this->document->createTextNode($text); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-14 23:46:32 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Get an element within the document of the given ID. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function getElementById(string $elementId): ?DOMElement | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         return $this->document->getElementById($elementId); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Get the DOMNode that represents the HTML body. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function getBody(): DOMNode | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         return $this->document->getElementsByTagName('body')[0]; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Get the nodes that are a direct child of the body. | 
					
						
							|  |  |  |      * This is usually all the content nodes if loaded partially. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function getBodyChildren(): DOMNodeList | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         return $this->getBody()->childNodes; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Get the inner HTML content of the body. | 
					
						
							|  |  |  |      * This is usually all the content if loaded partially. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function getBodyInnerHtml(): string | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $html = ''; | 
					
						
							|  |  |  |         foreach ($this->getBodyChildren() as $child) { | 
					
						
							|  |  |  |             $html .= $this->document->saveHTML($child); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $html; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Get the HTML content of the whole document. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function getHtml(): string | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2023-11-15 01:23:05 +08:00
										 |  |  |         return $this->document->saveHTML($this->document->documentElement); | 
					
						
							| 
									
										
										
										
											2023-11-14 23:46:32 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Get the inner HTML for the given node. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function getNodeInnerHtml(DOMNode $node): string | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $html = ''; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         foreach ($node->childNodes as $childNode) { | 
					
						
							|  |  |  |             $html .= $this->document->saveHTML($childNode); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $html; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Get the outer HTML for the given node. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function getNodeOuterHtml(DOMNode $node): string | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         return $this->document->saveHTML($node); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } |