| 
									
										
										
										
											2023-12-19 23:10:29 +08:00
										 |  |  | <?php | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace BookStack\Util; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use DOMAttr; | 
					
						
							|  |  |  | use DOMElement; | 
					
						
							|  |  |  | use DOMNamedNodeMap; | 
					
						
							|  |  |  | use DOMNode; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /** | 
					
						
							|  |  |  |  * Filter to ensure HTML input for description content remains simple and | 
					
						
							|  |  |  |  * to a limited allow-list of elements and attributes. | 
					
						
							|  |  |  |  * More for consistency and to prevent nuisance rather than for security | 
					
						
							|  |  |  |  * (which would be done via a separate content filter and CSP). | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | class HtmlDescriptionFilter | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * @var array<string, string[]> | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected static array $allowedAttrsByElements = [ | 
					
						
							|  |  |  |         'p' => [], | 
					
						
							|  |  |  |         'a' => ['href', 'title'], | 
					
						
							|  |  |  |         'ol' => [], | 
					
						
							|  |  |  |         'ul' => [], | 
					
						
							|  |  |  |         'li' => [], | 
					
						
							|  |  |  |         'strong' => [], | 
					
						
							|  |  |  |         'em' => [], | 
					
						
							|  |  |  |         'br' => [], | 
					
						
							|  |  |  |     ]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     public static function filterFromString(string $html): string | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2023-12-21 01:21:09 +08:00
										 |  |  |         if (empty(trim($html))) { | 
					
						
							|  |  |  |             return ''; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-19 23:10:29 +08:00
										 |  |  |         $doc = new HtmlDocument($html); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $topLevel = [...$doc->getBodyChildren()]; | 
					
						
							|  |  |  |         foreach ($topLevel as $child) { | 
					
						
							|  |  |  |             /** @var DOMNode $child */ | 
					
						
							|  |  |  |             if ($child instanceof DOMElement) { | 
					
						
							|  |  |  |                 static::filterElement($child); | 
					
						
							|  |  |  |             } else { | 
					
						
							|  |  |  |                 $child->parentNode->removeChild($child); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $doc->getBodyInnerHtml(); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     protected static function filterElement(DOMElement $element): void | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $elType = strtolower($element->tagName); | 
					
						
							|  |  |  |         $allowedAttrs = static::$allowedAttrsByElements[$elType] ?? null; | 
					
						
							|  |  |  |         if (is_null($allowedAttrs)) { | 
					
						
							|  |  |  |             $element->remove(); | 
					
						
							|  |  |  |             return; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         /** @var DOMNamedNodeMap $attrs */ | 
					
						
							|  |  |  |         $attrs = $element->attributes; | 
					
						
							|  |  |  |         for ($i = $attrs->length - 1; $i >= 0; $i--) { | 
					
						
							|  |  |  |             /** @var DOMAttr $attr */ | 
					
						
							|  |  |  |             $attr = $attrs->item($i); | 
					
						
							|  |  |  |             $name = strtolower($attr->name); | 
					
						
							|  |  |  |             if (!in_array($name, $allowedAttrs)) { | 
					
						
							|  |  |  |                 $element->removeAttribute($attr->name); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         foreach ($element->childNodes as $child) { | 
					
						
							|  |  |  |             if ($child instanceof DOMElement) { | 
					
						
							|  |  |  |                 static::filterElement($child); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } |