71 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
			
		
		
	
	
			71 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
<?php namespace BookStack\Util;
 | 
						|
 | 
						|
use DOMDocument;
 | 
						|
use DOMNode;
 | 
						|
use DOMNodeList;
 | 
						|
use DOMXPath;
 | 
						|
 | 
						|
class HtmlContentFilter
 | 
						|
{
 | 
						|
    /**
 | 
						|
     * Remove all of the script elements from the given HTML.
 | 
						|
     */
 | 
						|
    public static function removeScripts(string $html): string
 | 
						|
    {
 | 
						|
        if (empty($html)) {
 | 
						|
            return $html;
 | 
						|
        }
 | 
						|
 | 
						|
        libxml_use_internal_errors(true);
 | 
						|
        $doc = new DOMDocument();
 | 
						|
        $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
 | 
						|
        $xPath = new DOMXPath($doc);
 | 
						|
 | 
						|
        // Remove standard script tags
 | 
						|
        $scriptElems = $xPath->query('//script');
 | 
						|
        static::removeNodes($scriptElems);
 | 
						|
 | 
						|
        // Remove clickable links to JavaScript URI
 | 
						|
        $badLinks = $xPath->query('//*[contains(@href, \'javascript:\')]');
 | 
						|
        static::removeNodes($badLinks);
 | 
						|
 | 
						|
        // Remove forms with calls to JavaScript URI
 | 
						|
        $badForms = $xPath->query('//*[contains(@action, \'javascript:\')] | //*[contains(@formaction, \'javascript:\')]');
 | 
						|
        static::removeNodes($badForms);
 | 
						|
 | 
						|
        // Remove meta tag to prevent external redirects
 | 
						|
        $metaTags = $xPath->query('//meta[contains(@content, \'url\')]');
 | 
						|
        static::removeNodes($metaTags);
 | 
						|
 | 
						|
        // Remove data or JavaScript iFrames
 | 
						|
        $badIframes = $xPath->query('//*[contains(@src, \'data:\')] | //*[contains(@src, \'javascript:\')] | //*[@srcdoc]');
 | 
						|
        static::removeNodes($badIframes);
 | 
						|
 | 
						|
        // Remove 'on*' attributes
 | 
						|
        $onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]');
 | 
						|
        foreach ($onAttributes as $attr) {
 | 
						|
            /** @var \DOMAttr $attr*/
 | 
						|
            $attrName = $attr->nodeName;
 | 
						|
            $attr->parentNode->removeAttribute($attrName);
 | 
						|
        }
 | 
						|
 | 
						|
        $html = '';
 | 
						|
        $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
 | 
						|
        foreach ($topElems as $child) {
 | 
						|
            $html .= $doc->saveHTML($child);
 | 
						|
        }
 | 
						|
 | 
						|
        return $html;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Removed all of the given DOMNodes.
 | 
						|
     */
 | 
						|
    static protected function removeNodes(DOMNodeList $nodes): void
 | 
						|
    {
 | 
						|
        foreach ($nodes as $node) {
 | 
						|
            $node->parentNode->removeChild($node);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
} |