71 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
		
		
			
		
	
	
			71 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
|  | <?php namespace BookStack\Util; | ||
|  | 
 | ||
|  | use DOMDocument; | ||
|  | use DOMNode; | ||
|  | use DOMNodeList; | ||
|  | use DOMXPath; | ||
|  | 
 | ||
|  | class HtmlContentFilter | ||
|  | { | ||
|  |     /** | ||
|  |      * Remove all of the script elements from the given HTML. | ||
|  |      */ | ||
|  |     public static function removeScripts(string $html): string | ||
|  |     { | ||
|  |         if (empty($html)) { | ||
|  |             return $html; | ||
|  |         } | ||
|  | 
 | ||
|  |         libxml_use_internal_errors(true); | ||
|  |         $doc = new DOMDocument(); | ||
|  |         $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); | ||
|  |         $xPath = new DOMXPath($doc); | ||
|  | 
 | ||
|  |         // Remove standard script tags
 | ||
|  |         $scriptElems = $xPath->query('//script'); | ||
|  |         static::removeNodes($scriptElems); | ||
|  | 
 | ||
|  |         // Remove clickable links to JavaScript URI
 | ||
|  |         $badLinks = $xPath->query('//*[contains(@href, \'javascript:\')]'); | ||
|  |         static::removeNodes($badLinks); | ||
|  | 
 | ||
|  |         // Remove forms with calls to JavaScript URI
 | ||
|  |         $badForms = $xPath->query('//*[contains(@action, \'javascript:\')] | //*[contains(@formaction, \'javascript:\')]'); | ||
|  |         static::removeNodes($badForms); | ||
|  | 
 | ||
|  |         // Remove meta tag to prevent external redirects
 | ||
|  |         $metaTags = $xPath->query('//meta[contains(@content, \'url\')]'); | ||
|  |         static::removeNodes($metaTags); | ||
|  | 
 | ||
|  |         // Remove data or JavaScript iFrames
 | ||
|  |         $badIframes = $xPath->query('//*[contains(@src, \'data:\')] | //*[contains(@src, \'javascript:\')] | //*[@srcdoc]'); | ||
|  |         static::removeNodes($badIframes); | ||
|  | 
 | ||
|  |         // Remove 'on*' attributes
 | ||
|  |         $onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]'); | ||
|  |         foreach ($onAttributes as $attr) { | ||
|  |             /** @var \DOMAttr $attr*/ | ||
|  |             $attrName = $attr->nodeName; | ||
|  |             $attr->parentNode->removeAttribute($attrName); | ||
|  |         } | ||
|  | 
 | ||
|  |         $html = ''; | ||
|  |         $topElems = $doc->documentElement->childNodes->item(0)->childNodes; | ||
|  |         foreach ($topElems as $child) { | ||
|  |             $html .= $doc->saveHTML($child); | ||
|  |         } | ||
|  | 
 | ||
|  |         return $html; | ||
|  |     } | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Removed all of the given DOMNodes. | ||
|  |      */ | ||
|  |     static protected function removeNodes(DOMNodeList $nodes): void | ||
|  |     { | ||
|  |         foreach ($nodes as $node) { | ||
|  |             $node->parentNode->removeChild($node); | ||
|  |         } | ||
|  |     } | ||
|  | 
 | ||
|  | } |