71 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
		
		
			
		
	
	
			71 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
| 
								 | 
							
								<?php namespace BookStack\Util;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								use DOMDocument;
							 | 
						||
| 
								 | 
							
								use DOMNode;
							 | 
						||
| 
								 | 
							
								use DOMNodeList;
							 | 
						||
| 
								 | 
							
								use DOMXPath;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class HtmlContentFilter
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Remove all of the script elements from the given HTML.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public static function removeScripts(string $html): string
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        if (empty($html)) {
							 | 
						||
| 
								 | 
							
								            return $html;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        libxml_use_internal_errors(true);
							 | 
						||
| 
								 | 
							
								        $doc = new DOMDocument();
							 | 
						||
| 
								 | 
							
								        $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
							 | 
						||
| 
								 | 
							
								        $xPath = new DOMXPath($doc);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Remove standard script tags
							 | 
						||
| 
								 | 
							
								        $scriptElems = $xPath->query('//script');
							 | 
						||
| 
								 | 
							
								        static::removeNodes($scriptElems);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Remove clickable links to JavaScript URI
							 | 
						||
| 
								 | 
							
								        $badLinks = $xPath->query('//*[contains(@href, \'javascript:\')]');
							 | 
						||
| 
								 | 
							
								        static::removeNodes($badLinks);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Remove forms with calls to JavaScript URI
							 | 
						||
| 
								 | 
							
								        $badForms = $xPath->query('//*[contains(@action, \'javascript:\')] | //*[contains(@formaction, \'javascript:\')]');
							 | 
						||
| 
								 | 
							
								        static::removeNodes($badForms);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Remove meta tag to prevent external redirects
							 | 
						||
| 
								 | 
							
								        $metaTags = $xPath->query('//meta[contains(@content, \'url\')]');
							 | 
						||
| 
								 | 
							
								        static::removeNodes($metaTags);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Remove data or JavaScript iFrames
							 | 
						||
| 
								 | 
							
								        $badIframes = $xPath->query('//*[contains(@src, \'data:\')] | //*[contains(@src, \'javascript:\')] | //*[@srcdoc]');
							 | 
						||
| 
								 | 
							
								        static::removeNodes($badIframes);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Remove 'on*' attributes
							 | 
						||
| 
								 | 
							
								        $onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]');
							 | 
						||
| 
								 | 
							
								        foreach ($onAttributes as $attr) {
							 | 
						||
| 
								 | 
							
								            /** @var \DOMAttr $attr*/
							 | 
						||
| 
								 | 
							
								            $attrName = $attr->nodeName;
							 | 
						||
| 
								 | 
							
								            $attr->parentNode->removeAttribute($attrName);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        $html = '';
							 | 
						||
| 
								 | 
							
								        $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
							 | 
						||
| 
								 | 
							
								        foreach ($topElems as $child) {
							 | 
						||
| 
								 | 
							
								            $html .= $doc->saveHTML($child);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return $html;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Removed all of the given DOMNodes.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    static protected function removeNodes(DOMNodeList $nodes): void
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        foreach ($nodes as $node) {
							 | 
						||
| 
								 | 
							
								            $node->parentNode->removeChild($node);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								}
							 |