| 
									
										
										
										
											2023-11-23 06:14:28 +08:00
										 |  |  | <?php | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace BookStack\Entities\Tools; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | use BookStack\Util\HtmlDocument; | 
					
						
							|  |  |  | use Closure; | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  | use DOMDocument; | 
					
						
							|  |  |  | use DOMElement; | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  | use DOMNode; | 
					
						
							|  |  |  | use DOMText; | 
					
						
							| 
									
										
										
										
											2023-11-23 06:14:28 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | class PageIncludeParser | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/"; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Elements to clean up and remove if left empty after a parsing operation. | 
					
						
							|  |  |  |      * @var DOMElement[] | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected array $toCleanup = []; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-28 05:38:43 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-11-23 06:14:28 +08:00
										 |  |  |     public function __construct( | 
					
						
							| 
									
										
										
										
											2023-11-28 03:54:47 +08:00
										 |  |  |         protected HtmlDocument $doc, | 
					
						
							| 
									
										
										
										
											2023-11-23 06:14:28 +08:00
										 |  |  |         protected Closure $pageContentForId, | 
					
						
							|  |  |  |     ) { | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Parse out the include tags. | 
					
						
							| 
									
										
										
										
											2023-11-28 03:54:47 +08:00
										 |  |  |      * Returns the count of new content DOM nodes added to the document. | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-11-28 03:54:47 +08:00
										 |  |  |     public function parse(): int | 
					
						
							| 
									
										
										
										
											2023-11-23 06:14:28 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2023-11-28 03:54:47 +08:00
										 |  |  |         $nodesAdded = 0; | 
					
						
							|  |  |  |         $tags = $this->locateAndIsolateIncludeTags(); | 
					
						
							| 
									
										
										
										
											2023-11-23 06:14:28 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  |         foreach ($tags as $tag) { | 
					
						
							| 
									
										
										
										
											2023-11-28 05:38:43 +08:00
										 |  |  |             /** @var PageIncludeContent $content */ | 
					
						
							|  |  |  |             $content = $this->pageContentForId->call($this, $tag); | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |             if (!$content->isInline()) { | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |                 $parentP = $this->getParentParagraph($tag->domNode); | 
					
						
							|  |  |  |                 $isWithinParentP = $parentP === $tag->domNode->parentNode; | 
					
						
							|  |  |  |                 if ($parentP && $isWithinParentP) { | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |                     $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode); | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |                 } else if ($parentP) { | 
					
						
							|  |  |  |                     $this->moveTagNodeToBesideParent($tag, $parentP); | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-28 03:54:47 +08:00
										 |  |  |             $replacementNodes = $content->toDomNodes(); | 
					
						
							|  |  |  |             $nodesAdded += count($replacementNodes); | 
					
						
							|  |  |  |             $this->replaceNodeWithNodes($tag->domNode, $replacementNodes); | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-11-23 06:14:28 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |         $this->cleanup(); | 
					
						
							| 
									
										
										
										
											2023-11-23 06:14:28 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-28 03:54:47 +08:00
										 |  |  |         return $nodesAdded; | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Locate include tags within the given document, isolating them to their | 
					
						
							|  |  |  |      * own nodes in the DOM for future targeted manipulation. | 
					
						
							|  |  |  |      * @return PageIncludeTag[] | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-11-28 03:54:47 +08:00
										 |  |  |     protected function locateAndIsolateIncludeTags(): array | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2023-11-28 03:54:47 +08:00
										 |  |  |         $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]"); | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  |         $includeTags = []; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         /** @var DOMNode $node */ | 
					
						
							|  |  |  |         foreach ($includeHosts as $node) { | 
					
						
							| 
									
										
										
										
											2023-12-10 22:58:05 +08:00
										 |  |  |             /** @var DOMNode $childNode */ | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  |             foreach ($node->childNodes as $childNode) { | 
					
						
							|  |  |  |                 if ($childNode->nodeName === '#text') { | 
					
						
							|  |  |  |                     array_push($includeTags, ...$this->splitTextNodesAtTags($childNode)); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $includeTags; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Takes a text DOMNode and splits its text content at include tags | 
					
						
							|  |  |  |      * into multiple text nodes within the original parent. | 
					
						
							|  |  |  |      * Returns found PageIncludeTag references. | 
					
						
							|  |  |  |      * @return PageIncludeTag[] | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function splitTextNodesAtTags(DOMNode $textNode): array | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $includeTags = []; | 
					
						
							|  |  |  |         $text = $textNode->textContent; | 
					
						
							|  |  |  |         preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $currentOffset = 0; | 
					
						
							|  |  |  |         foreach ($matches[0] as $index => $fullTagMatch) { | 
					
						
							|  |  |  |             $tagOuterContent = $fullTagMatch[0]; | 
					
						
							|  |  |  |             $tagInnerContent = $matches[1][$index][0]; | 
					
						
							|  |  |  |             $tagStartOffset = $fullTagMatch[1]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if ($currentOffset < $tagStartOffset) { | 
					
						
							|  |  |  |                 $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset); | 
					
						
							| 
									
										
										
										
											2024-11-29 00:30:59 +08:00
										 |  |  |                 $textNode->parentNode->insertBefore($this->doc->createTextNode($previousText), $textNode); | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-11-29 00:30:59 +08:00
										 |  |  |             $node = $textNode->parentNode->insertBefore($this->doc->createTextNode($tagOuterContent), $textNode); | 
					
						
							| 
									
										
										
										
											2023-11-23 22:29:07 +08:00
										 |  |  |             $includeTags[] = new PageIncludeTag($tagInnerContent, $node); | 
					
						
							|  |  |  |             $currentOffset = $tagStartOffset + strlen($tagOuterContent); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if ($currentOffset > 0) { | 
					
						
							|  |  |  |             $textNode->textContent = substr($text, $currentOffset); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $includeTags; | 
					
						
							| 
									
										
										
										
											2023-11-23 06:14:28 +08:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |      * Replace the given node with all those in $replacements | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |      * @param DOMNode[] $replacements | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         /** @var DOMDocument $targetDoc */ | 
					
						
							|  |  |  |         $targetDoc = $toReplace->ownerDocument; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         foreach ($replacements as $replacement) { | 
					
						
							|  |  |  |             if ($replacement->ownerDocument !== $targetDoc) { | 
					
						
							| 
									
										
										
										
											2023-11-28 03:54:47 +08:00
										 |  |  |                 $replacement = $targetDoc->importNode($replacement, true); | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             $toReplace->parentNode->insertBefore($replacement, $toReplace); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $toReplace->parentNode->removeChild($toReplace); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Move a tag node to become a sibling of the given parent. | 
					
						
							|  |  |  |      * Will attempt to guess a position based upon the tag content within the parent. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |         $parentText = $parent->textContent; | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |         $tagPos = strpos($parentText, $tag->tagContent); | 
					
						
							|  |  |  |         $before = $tagPos < (strlen($parentText) / 2); | 
					
						
							| 
									
										
										
										
											2023-11-28 04:16:27 +08:00
										 |  |  |         $this->toCleanup[] = $tag->domNode->parentNode; | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if ($before) { | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |             $parent->parentNode->insertBefore($tag->domNode, $parent); | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |         } else { | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |             $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling); | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |     /** | 
					
						
							|  |  |  |      * Splits the given $parentNode at the location of the $domNode within it. | 
					
						
							|  |  |  |      * Attempts replicate the original $parentNode, moving some of their parent | 
					
						
							|  |  |  |      * children in where needed, before adding the $domNode between. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |     protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $children = [...$parentNode->childNodes]; | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |         $splitPos = array_search($domNode, $children, true); | 
					
						
							|  |  |  |         if ($splitPos === false) { | 
					
						
							|  |  |  |             $splitPos = count($children) - 1; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |         $parentClone = $parentNode->cloneNode(); | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |         $parentNode->parentNode->insertBefore($parentClone, $parentNode); | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |         $parentClone->removeAttribute('id'); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for ($i = 0; $i < $splitPos; $i++) { | 
					
						
							| 
									
										
										
										
											2023-12-10 22:58:05 +08:00
										 |  |  |             /** @var DOMNode $child */ | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |             $child = $children[$i]; | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |             $parentClone->appendChild($child); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $parentNode->parentNode->insertBefore($domNode, $parentNode); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |         $this->toCleanup[] = $parentNode; | 
					
						
							|  |  |  |         $this->toCleanup[] = $parentClone; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Get the parent paragraph of the given node, if existing. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function getParentParagraph(DOMNode $parent): ?DOMNode | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         do { | 
					
						
							|  |  |  |             if (strtolower($parent->nodeName) === 'p') { | 
					
						
							|  |  |  |                 return $parent; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-28 03:54:47 +08:00
										 |  |  |             $parent = $parent->parentNode; | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |         } while ($parent !== null); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return null; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Cleanup after a parse operation. | 
					
						
							|  |  |  |      * Removes stranded elements we may have left during the parse. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function cleanup(): void | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         foreach ($this->toCleanup as $element) { | 
					
						
							|  |  |  |             $element->normalize(); | 
					
						
							| 
									
										
										
										
											2023-11-28 04:16:27 +08:00
										 |  |  |             while ($element->parentNode && !$element->hasChildNodes()) { | 
					
						
							|  |  |  |                 $parent = $element->parentNode; | 
					
						
							|  |  |  |                 $parent->removeChild($element); | 
					
						
							|  |  |  |                 $element = $parent; | 
					
						
							| 
									
										
										
										
											2023-11-26 01:32:00 +08:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2023-11-25 07:39:16 +08:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-11-23 06:14:28 +08:00
										 |  |  | } |