| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | <?php | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace BookStack\Util; | 
					
						
							| 
									
										
										
										
											2021-05-04 06:59:52 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | use DOMDocument; | 
					
						
							|  |  |  | use DOMNodeList; | 
					
						
							|  |  |  | use DOMXPath; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class HtmlContentFilter | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Remove all of the script elements from the given HTML. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public static function removeScripts(string $html): string | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         if (empty($html)) { | 
					
						
							|  |  |  |             return $html; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-13 19:53:04 +08:00
										 |  |  |         $html = '<body>' . $html . '</body>'; | 
					
						
							| 
									
										
										
										
											2021-05-04 06:59:52 +08:00
										 |  |  |         libxml_use_internal_errors(true); | 
					
						
							|  |  |  |         $doc = new DOMDocument(); | 
					
						
							|  |  |  |         $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); | 
					
						
							|  |  |  |         $xPath = new DOMXPath($doc); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Remove standard script tags
 | 
					
						
							|  |  |  |         $scriptElems = $xPath->query('//script'); | 
					
						
							|  |  |  |         static::removeNodes($scriptElems); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Remove clickable links to JavaScript URI
 | 
					
						
							|  |  |  |         $badLinks = $xPath->query('//*[contains(@href, \'javascript:\')]'); | 
					
						
							|  |  |  |         static::removeNodes($badLinks); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Remove forms with calls to JavaScript URI
 | 
					
						
							|  |  |  |         $badForms = $xPath->query('//*[contains(@action, \'javascript:\')] | //*[contains(@formaction, \'javascript:\')]'); | 
					
						
							|  |  |  |         static::removeNodes($badForms); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Remove meta tag to prevent external redirects
 | 
					
						
							|  |  |  |         $metaTags = $xPath->query('//meta[contains(@content, \'url\')]'); | 
					
						
							|  |  |  |         static::removeNodes($metaTags); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Remove data or JavaScript iFrames
 | 
					
						
							|  |  |  |         $badIframes = $xPath->query('//*[contains(@src, \'data:\')] | //*[contains(@src, \'javascript:\')] | //*[@srcdoc]'); | 
					
						
							|  |  |  |         static::removeNodes($badIframes); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Remove 'on*' attributes
 | 
					
						
							|  |  |  |         $onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]'); | 
					
						
							|  |  |  |         foreach ($onAttributes as $attr) { | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |             /** @var \DOMAttr $attr */ | 
					
						
							| 
									
										
										
										
											2021-05-04 06:59:52 +08:00
										 |  |  |             $attrName = $attr->nodeName; | 
					
						
							|  |  |  |             $attr->parentNode->removeAttribute($attrName); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $html = ''; | 
					
						
							|  |  |  |         $topElems = $doc->documentElement->childNodes->item(0)->childNodes; | 
					
						
							|  |  |  |         foreach ($topElems as $child) { | 
					
						
							|  |  |  |             $html .= $doc->saveHTML($child); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $html; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Removed all of the given DOMNodes. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2021-06-13 19:53:04 +08:00
										 |  |  |     protected static function removeNodes(DOMNodeList $nodes): void | 
					
						
							| 
									
										
										
										
											2021-05-04 06:59:52 +08:00
										 |  |  |     { | 
					
						
							|  |  |  |         foreach ($nodes as $node) { | 
					
						
							|  |  |  |             $node->parentNode->removeChild($node); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-06-13 19:53:04 +08:00
										 |  |  | } |