From 04d21c8a97da9463e6bedda620ecf1282722ea3e Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Wed, 22 Nov 2023 22:14:28 +0000 Subject: [PATCH 1/7] Includes: Started foundations for new include tag parser --- app/Entities/Tools/PageIncludeParser.php | 57 ++++++++++++++++++++++++ tests/Unit/PageIncludeParserTest.php | 46 +++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 app/Entities/Tools/PageIncludeParser.php create mode 100644 tests/Unit/PageIncludeParserTest.php diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php new file mode 100644 index 000000000..63d3ea8d6 --- /dev/null +++ b/app/Entities/Tools/PageIncludeParser.php @@ -0,0 +1,57 @@ +pageHtml); + + $includeHosts = $html->queryXPath("//body//*[contains(text(), '{{@')]"); + $node = $includeHosts->item(0); + + // One of the direct child textnodes of the "$includeHosts" should be + // the one with the include tag within. + $textNode = $node->childNodes->item(0); + + // TODO: + // Hunt down the specific text nodes with matches + // Split out tag text node from rest of content + // Fetch tag content-> + // If range or top-block: delete tag text node, [Promote to top-block], delete old top-block if empty + // If inline: Replace current text node with new text or elem + // !! "Range" or "inline" status should come from tag parser and content fetcher, not guessed direct from content + // since we could have a range of inline elements + + // [Promote to top-block] + // Tricky operation. + // Can throw in before or after current top-block depending on relative position + // Could [Split] top-block but complex past a single level depth. + // Maybe [Split] if one level depth, otherwise default to before/after block + // Should work for the vast majority of cases, and not for those which would + // technically be invalid in-editor anyway. + + // [Split] + // Copy original top-block node type and attrs (apart from ID) + // Move nodes after promoted tag-node into copy + // Insert copy after original (after promoted top-block eventually) + + // Notes: May want to eventually parse through backwards, which should avoid issues + // in changes affecting the next tag, where tags may be in the same/adjacent nodes. + + + return $html->getBodyInnerHtml(); + } +} diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php new file mode 100644 index 000000000..de31504ff --- /dev/null +++ b/tests/Unit/PageIncludeParserTest.php @@ -0,0 +1,46 @@ +runParserTest( + '

{{@45#content}}

', + ['45' => '

Testing

'], + '

Testing

', + ); + } + + public function test_include_simple_inline_text_with_existing_siblings() + { + $this->runParserTest( + '

{{@45#content}} Hithere!

', + ['45' => '

Testing

'], + '

Testing Hithere!

', + ); + } + + public function test_include_simple_inline_text_within_other_text() + { + $this->runParserTest( + '

Hello {{@45#content}}there!

', + ['45' => '

Testing

'], + '

Hello Testingthere!

', + ); + } + + protected function runParserTest(string $html, array $contentById, string $expected) + { + $parser = new PageIncludeParser($html, function (int $id) use ($contentById) { + return $contentById[strval($id)] ?? null; + }); + + $result = $parser->parse(); + $this->assertEquals($expected, $result); + } +} From 75936454cca139d0b226a95ee7a0070bc8702fdc Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Thu, 23 Nov 2023 14:29:07 +0000 Subject: [PATCH 2/7] Includes: Developed to get new system working with inline includes Adds logic for locating and splitting text nodes. Adds specific classes to offload tag/content specific logic. --- app/Entities/Tools/PageIncludeContent.php | 68 ++++++++++++++++++ app/Entities/Tools/PageIncludeParser.php | 85 +++++++++++++++++++++-- app/Entities/Tools/PageIncludeTag.php | 30 ++++++++ app/Util/HtmlDocument.php | 15 ++++ tests/Unit/PageIncludeParserTest.php | 2 +- 5 files changed, 192 insertions(+), 8 deletions(-) create mode 100644 app/Entities/Tools/PageIncludeContent.php create mode 100644 app/Entities/Tools/PageIncludeTag.php diff --git a/app/Entities/Tools/PageIncludeContent.php b/app/Entities/Tools/PageIncludeContent.php new file mode 100644 index 000000000..97c470c68 --- /dev/null +++ b/app/Entities/Tools/PageIncludeContent.php @@ -0,0 +1,68 @@ +parseHtml($html, $tag); + } + + protected function parseHtml(string $html, PageIncludeTag $tag): void + { + if (empty($html)) { + return; + } + + $doc = new HtmlDocument($html); + + $sectionId = $tag->getSectionId(); + if (!$sectionId) { + $this->contents = [...$doc->getBodyChildren()]; + $this->isTopLevel = true; + return; + } + + $section = $doc->getElementById($sectionId); + if (!$section) { + return; + } + + $isTopLevel = in_array(strtolower($section->nodeName), static::$topLevelTags); + $this->isTopLevel = $isTopLevel; + $this->contents = $isTopLevel ? [$section] : [...$section->childNodes]; + } + + public function isInline(): bool + { + return !$this->isTopLevel; + } + + public function isEmpty(): bool + { + return empty($this->contents); + } + + /** + * @return DOMNode[] + */ + public function toDomNodes(): array + { + return $this->contents; + } +} diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php index 63d3ea8d6..070b0cc11 100644 --- a/app/Entities/Tools/PageIncludeParser.php +++ b/app/Entities/Tools/PageIncludeParser.php @@ -4,6 +4,8 @@ namespace BookStack\Entities\Tools; use BookStack\Util\HtmlDocument; use Closure; +use DOMNode; +use DOMText; class PageIncludeParser { @@ -17,14 +19,25 @@ class PageIncludeParser public function parse(): string { - $html = new HtmlDocument($this->pageHtml); + $doc = new HtmlDocument($this->pageHtml); - $includeHosts = $html->queryXPath("//body//*[contains(text(), '{{@')]"); - $node = $includeHosts->item(0); + $tags = $this->locateAndIsolateIncludeTags($doc); - // One of the direct child textnodes of the "$includeHosts" should be - // the one with the include tag within. - $textNode = $node->childNodes->item(0); + foreach ($tags as $tag) { + $htmlContent = $this->pageContentForId->call($this, $tag->getPageId()); + $content = new PageIncludeContent($htmlContent, $tag); + + if ($content->isInline()) { + $adopted = $doc->adoptNodes($content->toDomNodes()); + foreach ($adopted as $adoptedContentNode) { + $tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode); + } + $tag->domNode->parentNode->removeChild($tag->domNode); + continue; + } + + // TODO - Non-inline + } // TODO: // Hunt down the specific text nodes with matches @@ -52,6 +65,64 @@ class PageIncludeParser // in changes affecting the next tag, where tags may be in the same/adjacent nodes. - return $html->getBodyInnerHtml(); + return $doc->getBodyInnerHtml(); + } + + /** + * Locate include tags within the given document, isolating them to their + * own nodes in the DOM for future targeted manipulation. + * @return PageIncludeTag[] + */ + protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array + { + $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]"); + $includeTags = []; + + /** @var DOMNode $node */ + /** @var DOMNode $childNode */ + foreach ($includeHosts as $node) { + foreach ($node->childNodes as $childNode) { + if ($childNode->nodeName === '#text') { + array_push($includeTags, ...$this->splitTextNodesAtTags($childNode)); + } + } + } + + return $includeTags; + } + + /** + * Takes a text DOMNode and splits its text content at include tags + * into multiple text nodes within the original parent. + * Returns found PageIncludeTag references. + * @return PageIncludeTag[] + */ + protected function splitTextNodesAtTags(DOMNode $textNode): array + { + $includeTags = []; + $text = $textNode->textContent; + preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE); + + $currentOffset = 0; + foreach ($matches[0] as $index => $fullTagMatch) { + $tagOuterContent = $fullTagMatch[0]; + $tagInnerContent = $matches[1][$index][0]; + $tagStartOffset = $fullTagMatch[1]; + + if ($currentOffset < $tagStartOffset) { + $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset); + $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode); + } + + $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode); + $includeTags[] = new PageIncludeTag($tagInnerContent, $node); + $currentOffset = $tagStartOffset + strlen($tagOuterContent); + } + + if ($currentOffset > 0) { + $textNode->textContent = substr($text, $currentOffset); + } + + return $includeTags; } } diff --git a/app/Entities/Tools/PageIncludeTag.php b/app/Entities/Tools/PageIncludeTag.php new file mode 100644 index 000000000..05a532fb2 --- /dev/null +++ b/app/Entities/Tools/PageIncludeTag.php @@ -0,0 +1,30 @@ +tagContent, 2)[0])); + } + + /** + * Get the section ID that this tag references (if any) + */ + public function getSectionId(): string + { + return trim(explode('#', $this->tagContent, 2)[1] ?? ''); + } +} diff --git a/app/Util/HtmlDocument.php b/app/Util/HtmlDocument.php index b8c53d439..ad5dacd82 100644 --- a/app/Util/HtmlDocument.php +++ b/app/Util/HtmlDocument.php @@ -149,4 +149,19 @@ class HtmlDocument { return $this->document->saveHTML($node); } + + /** + * Adopt the given nodes into this document. + * @param DOMNode[] $nodes + * @return DOMNode[] + */ + public function adoptNodes(array $nodes): array + { + $adopted = []; + foreach ($nodes as $node) { + $adopted[] = $this->document->importNode($node, true); + } + + return $adopted; + } } diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php index de31504ff..d1912270e 100644 --- a/tests/Unit/PageIncludeParserTest.php +++ b/tests/Unit/PageIncludeParserTest.php @@ -37,7 +37,7 @@ class PageIncludeParserTest extends TestCase protected function runParserTest(string $html, array $contentById, string $expected) { $parser = new PageIncludeParser($html, function (int $id) use ($contentById) { - return $contentById[strval($id)] ?? null; + return $contentById[strval($id)] ?? ''; }); $result = $parser->parse(); From c88eb729a499197e8b3ab9d5019b4426a65d3d41 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Fri, 24 Nov 2023 23:39:16 +0000 Subject: [PATCH 3/7] Includes: Added block-level handling to new include system Implements block promoting to body (including position choosing based upon likely tag position within parent) and block splitting where we're only a single depth down from the body child. --- app/Entities/Tools/PageIncludeParser.php | 111 ++++++++++++++++------- app/Util/HtmlDocument.php | 15 --- tests/Unit/PageIncludeParserTest.php | 96 +++++++++++++++++++- 3 files changed, 172 insertions(+), 50 deletions(-) diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php index 070b0cc11..5ce847d6c 100644 --- a/app/Entities/Tools/PageIncludeParser.php +++ b/app/Entities/Tools/PageIncludeParser.php @@ -4,6 +4,8 @@ namespace BookStack\Entities\Tools; use BookStack\Util\HtmlDocument; use Closure; +use DOMDocument; +use DOMElement; use DOMNode; use DOMText; @@ -22,48 +24,26 @@ class PageIncludeParser $doc = new HtmlDocument($this->pageHtml); $tags = $this->locateAndIsolateIncludeTags($doc); + $topLevel = [...$doc->getBodyChildren()]; foreach ($tags as $tag) { $htmlContent = $this->pageContentForId->call($this, $tag->getPageId()); $content = new PageIncludeContent($htmlContent, $tag); - if ($content->isInline()) { - $adopted = $doc->adoptNodes($content->toDomNodes()); - foreach ($adopted as $adoptedContentNode) { - $tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode); + if (!$content->isInline()) { + $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true); + if ($isParentTopLevel) { + $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode); + } else { + $this->promoteTagNodeToBody($tag, $doc->getBody()); } - $tag->domNode->parentNode->removeChild($tag->domNode); - continue; } - // TODO - Non-inline + $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes()); } - // TODO: - // Hunt down the specific text nodes with matches - // Split out tag text node from rest of content - // Fetch tag content-> - // If range or top-block: delete tag text node, [Promote to top-block], delete old top-block if empty - // If inline: Replace current text node with new text or elem - // !! "Range" or "inline" status should come from tag parser and content fetcher, not guessed direct from content - // since we could have a range of inline elements - - // [Promote to top-block] - // Tricky operation. - // Can throw in before or after current top-block depending on relative position - // Could [Split] top-block but complex past a single level depth. - // Maybe [Split] if one level depth, otherwise default to before/after block - // Should work for the vast majority of cases, and not for those which would - // technically be invalid in-editor anyway. - - // [Split] - // Copy original top-block node type and attrs (apart from ID) - // Move nodes after promoted tag-node into copy - // Insert copy after original (after promoted top-block eventually) - - // Notes: May want to eventually parse through backwards, which should avoid issues - // in changes affecting the next tag, where tags may be in the same/adjacent nodes. - + // TODO Notes: May want to eventually parse through backwards, which should avoid issues + // in changes affecting the next tag, where tags may be in the same/adjacent nodes. return $doc->getBodyInnerHtml(); } @@ -125,4 +105,71 @@ class PageIncludeParser return $includeTags; } + + /** + * @param DOMNode[] $replacements + */ + protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void + { + /** @var DOMDocument $targetDoc */ + $targetDoc = $toReplace->ownerDocument; + + foreach ($replacements as $replacement) { + if ($replacement->ownerDocument !== $targetDoc) { + $replacement = $targetDoc->adoptNode($replacement); + } + + $toReplace->parentNode->insertBefore($replacement, $toReplace); + } + + $toReplace->parentNode->removeChild($toReplace); + } + + protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void + { + /** @var DOMNode $topParent */ + $topParent = $tag->domNode->parentNode; + while ($topParent->parentNode !== $body) { + $topParent = $topParent->parentNode; + } + + $parentText = $topParent->textContent; + $tagPos = strpos($parentText, $tag->tagContent); + $before = $tagPos < (strlen($parentText) / 2); + + if ($before) { + $body->insertBefore($tag->domNode, $topParent); + } else { + $body->insertBefore($tag->domNode, $topParent->nextSibling); + } + } + + protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void + { + $children = [...$parentNode->childNodes]; + $splitPos = array_search($domNode, $children, true) ?: count($children); + $parentClone = $parentNode->cloneNode(); + $parentClone->removeAttribute('id'); + + /** @var DOMNode $child */ + for ($i = 0; $i < $splitPos; $i++) { + $child = $children[0]; + $parentClone->appendChild($child); + } + + if ($parentClone->hasChildNodes()) { + $parentNode->parentNode->insertBefore($parentClone, $parentNode); + } + + $parentNode->parentNode->insertBefore($domNode, $parentNode); + + $parentClone->normalize(); + $parentNode->normalize(); + if (!$parentNode->hasChildNodes()) { + $parentNode->remove(); + } + if (!$parentClone->hasChildNodes()) { + $parentClone->remove(); + } + } } diff --git a/app/Util/HtmlDocument.php b/app/Util/HtmlDocument.php index ad5dacd82..b8c53d439 100644 --- a/app/Util/HtmlDocument.php +++ b/app/Util/HtmlDocument.php @@ -149,19 +149,4 @@ class HtmlDocument { return $this->document->saveHTML($node); } - - /** - * Adopt the given nodes into this document. - * @param DOMNode[] $nodes - * @return DOMNode[] - */ - public function adoptNodes(array $nodes): array - { - $adopted = []; - foreach ($nodes as $node) { - $adopted[] = $this->document->importNode($node, true); - } - - return $adopted; - } } diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php index d1912270e..e0bd69e93 100644 --- a/tests/Unit/PageIncludeParserTest.php +++ b/tests/Unit/PageIncludeParserTest.php @@ -7,7 +7,7 @@ use Tests\TestCase; class PageIncludeParserTest extends TestCase { - public function test_include_simple_inline_text() + public function test_simple_inline_text() { $this->runParserTest( '

{{@45#content}}

', @@ -16,7 +16,7 @@ class PageIncludeParserTest extends TestCase ); } - public function test_include_simple_inline_text_with_existing_siblings() + public function test_simple_inline_text_with_existing_siblings() { $this->runParserTest( '

{{@45#content}} Hithere!

', @@ -25,7 +25,7 @@ class PageIncludeParserTest extends TestCase ); } - public function test_include_simple_inline_text_within_other_text() + public function test_simple_inline_text_within_other_text() { $this->runParserTest( '

Hello {{@45#content}}there!

', @@ -34,6 +34,96 @@ class PageIncludeParserTest extends TestCase ); } + public function test_block_content_types() + { + $inputs = [ + '
Text
', + '
  • Item A
', + '
  1. Item A
', + '
Code
', + ]; + + foreach ($inputs as $input) { + $this->runParserTest( + '

A{{@45#content}}B

', + ['45' => $input], + '

A

' . $input . '

B

', + ); + } + } + + public function test_block_content_nested_origin_gets_placed_before() + { + $this->runParserTest( + '

A {{@45#content}} there!

', + ['45' => '
Testing
'], + '
Testing

A there!

', + ); + } + + public function test_block_content_nested_origin_gets_placed_after() + { + $this->runParserTest( + '

Some really good {{@45#content}} there!

', + ['45' => '
Testing
'], + '

Some really good there!

Testing
', + ); + } + + public function test_block_content_in_shallow_origin_gets_split() + { + $this->runParserTest( + '

Some really good {{@45#content}} there!

', + ['45' => '
doggos
'], + '

Some really good

doggos

there!

', + ); + } + + public function test_block_content_in_shallow_origin_split_does_not_duplicate_id() + { + $this->runParserTest( + '

Some really good {{@45#content}} there!

', + ['45' => '
doggos
'], + '

Some really good

doggos

there!

', + ); + } + + public function test_block_content_in_shallow_origin_does_not_leave_empty_nodes() + { + $this->runParserTest( + '

{{@45#content}}

', + ['45' => '
doggos
'], + '
doggos
', + ); + } + + public function test_simple_whole_document() + { + $this->runParserTest( + '

{{@45}}

', + ['45' => '

Testing

'], + '

Testing

', + ); + } + + public function test_multi_source_elem_whole_document() + { + $this->runParserTest( + '

{{@45}}

', + ['45' => '

Testing

This
'], + '

Testing

This
', + ); + } + + public function test_multi_source_elem_whole_document_with_shared_content_origin() + { + $this->runParserTest( + '

This is {{@45}} some text

', + ['45' => '

Testing

This
'], + '

This is

Testing

This

some text

', + ); + } + protected function runParserTest(string $html, array $contentById, string $expected) { $parser = new PageIncludeParser($html, function (int $id) use ($contentById) { From 4874dc1304ee26737884d787893743ae323e5cf2 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sat, 25 Nov 2023 17:32:00 +0000 Subject: [PATCH 4/7] Includes: Updated logic regarding parent block els, added tests Expanded tests with many more cases, and added fixes for failed scenarios. Updated logic to specifically handling parent

tags, and now assume compatibility with parent block types elswhere to allow use in a variety of scenarios (td, details, blockquote etc...). --- app/Entities/Tools/PageIncludeParser.php | 99 ++++++++++++++++-------- tests/Unit/PageIncludeParserTest.php | 90 +++++++++++++++++++++ 2 files changed, 158 insertions(+), 31 deletions(-) diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php index 5ce847d6c..e55fc22c7 100644 --- a/app/Entities/Tools/PageIncludeParser.php +++ b/app/Entities/Tools/PageIncludeParser.php @@ -13,37 +13,45 @@ class PageIncludeParser { protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/"; + /** + * Elements to clean up and remove if left empty after a parsing operation. + * @var DOMElement[] + */ + protected array $toCleanup = []; + public function __construct( protected string $pageHtml, protected Closure $pageContentForId, ) { } + /** + * Parse out the include tags. + */ public function parse(): string { $doc = new HtmlDocument($this->pageHtml); $tags = $this->locateAndIsolateIncludeTags($doc); - $topLevel = [...$doc->getBodyChildren()]; foreach ($tags as $tag) { $htmlContent = $this->pageContentForId->call($this, $tag->getPageId()); $content = new PageIncludeContent($htmlContent, $tag); if (!$content->isInline()) { - $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true); - if ($isParentTopLevel) { + $parentP = $this->getParentParagraph($tag->domNode); + $isWithinParentP = $parentP === $tag->domNode->parentNode; + if ($parentP && $isWithinParentP) { $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode); - } else { - $this->promoteTagNodeToBody($tag, $doc->getBody()); + } else if ($parentP) { + $this->moveTagNodeToBesideParent($tag, $parentP); } } $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes()); } - // TODO Notes: May want to eventually parse through backwards, which should avoid issues - // in changes affecting the next tag, where tags may be in the same/adjacent nodes. + $this->cleanup(); return $doc->getBodyInnerHtml(); } @@ -55,7 +63,7 @@ class PageIncludeParser */ protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array { - $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]"); + $includeHosts = $doc->queryXPath("//body//*[text()[contains(., '{{@')]]"); $includeTags = []; /** @var DOMNode $node */ @@ -107,6 +115,7 @@ class PageIncludeParser } /** + * Replace the given node with all those in $replacements * @param DOMNode[] $replacements */ protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void @@ -125,51 +134,79 @@ class PageIncludeParser $toReplace->parentNode->removeChild($toReplace); } - protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void + /** + * Move a tag node to become a sibling of the given parent. + * Will attempt to guess a position based upon the tag content within the parent. + */ + protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void { - /** @var DOMNode $topParent */ - $topParent = $tag->domNode->parentNode; - while ($topParent->parentNode !== $body) { - $topParent = $topParent->parentNode; - } - - $parentText = $topParent->textContent; + $parentText = $parent->textContent; $tagPos = strpos($parentText, $tag->tagContent); $before = $tagPos < (strlen($parentText) / 2); if ($before) { - $body->insertBefore($tag->domNode, $topParent); + $parent->parentNode->insertBefore($tag->domNode, $parent); } else { - $body->insertBefore($tag->domNode, $topParent->nextSibling); + $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling); } } + /** + * Splits the given $parentNode at the location of the $domNode within it. + * Attempts replicate the original $parentNode, moving some of their parent + * children in where needed, before adding the $domNode between. + */ protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void { $children = [...$parentNode->childNodes]; - $splitPos = array_search($domNode, $children, true) ?: count($children); + $splitPos = array_search($domNode, $children, true); + if ($splitPos === false) { + $splitPos = count($children) - 1; + } + $parentClone = $parentNode->cloneNode(); + $parentNode->parentNode->insertBefore($parentClone, $parentNode); $parentClone->removeAttribute('id'); /** @var DOMNode $child */ for ($i = 0; $i < $splitPos; $i++) { - $child = $children[0]; + $child = $children[$i]; $parentClone->appendChild($child); } - if ($parentClone->hasChildNodes()) { - $parentNode->parentNode->insertBefore($parentClone, $parentNode); - } - $parentNode->parentNode->insertBefore($domNode, $parentNode); - $parentClone->normalize(); - $parentNode->normalize(); - if (!$parentNode->hasChildNodes()) { - $parentNode->remove(); - } - if (!$parentClone->hasChildNodes()) { - $parentClone->remove(); + $this->toCleanup[] = $parentNode; + $this->toCleanup[] = $parentClone; + } + + /** + * Get the parent paragraph of the given node, if existing. + */ + protected function getParentParagraph(DOMNode $parent): ?DOMNode + { + do { + if (strtolower($parent->nodeName) === 'p') { + return $parent; + } + + $parent = $parent->parentElement; + } while ($parent !== null); + + return null; + } + + /** + * Cleanup after a parse operation. + * Removes stranded elements we may have left during the parse. + */ + protected function cleanup(): void + { + foreach ($this->toCleanup as $element) { + $element->normalize(); + if ($element->parentNode && !$element->hasChildNodes()) { + $element->parentNode->removeChild($element); + } } } } diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php index e0bd69e93..ee7a64344 100644 --- a/tests/Unit/PageIncludeParserTest.php +++ b/tests/Unit/PageIncludeParserTest.php @@ -34,6 +34,15 @@ class PageIncludeParserTest extends TestCase ); } + public function test_complex_inline_text_within_other_text() + { + $this->runParserTest( + '

Hello {{@45#content}}there!

', + ['45' => '

Testing withsomeextratags

'], + '

Hello Testing withsomeextratagsthere!

', + ); + } + public function test_block_content_types() { $inputs = [ @@ -97,6 +106,51 @@ class PageIncludeParserTest extends TestCase ); } + public function test_block_content_in_allowable_parent_element() + { + $this->runParserTest( + '
{{@45#content}}
', + ['45' => '
doggos
'], + '
doggos
', + ); + } + + public function test_block_content_in_paragraph_origin_with_allowable_grandparent() + { + $this->runParserTest( + '

{{@45#content}}

', + ['45' => '
doggos
'], + '
doggos
', + ); + } + + public function test_block_content_in_paragraph_origin_with_allowable_grandparent_with_adjacent_content() + { + $this->runParserTest( + '

Cute {{@45#content}} over there!

', + ['45' => '
doggos
'], + '

Cute

doggos

over there!

', + ); + } + + public function test_block_content_in_child_within_paragraph_origin_with_allowable_grandparent_with_adjacent_content() + { + $this->runParserTest( + '

Cute {{@45#content}} over there!

', + ['45' => '
doggos
'], + '
doggos

Cute over there!

', + ); + } + + public function test_block_content_in_paragraph_origin_within_details() + { + $this->runParserTest( + '

{{@45#content}}

', + ['45' => '
doggos
'], + '
doggos
', + ); + } + public function test_simple_whole_document() { $this->runParserTest( @@ -124,6 +178,42 @@ class PageIncludeParserTest extends TestCase ); } + public function test_multiple_tags_in_same_origin_with_inline_content() + { + $this->runParserTest( + '

This {{@45#content}}{{@45#content}} content is {{@45#content}}

', + ['45' => '

inline

'], + '

This inlineinline content is inline

', + ); + } + + public function test_multiple_tags_in_same_origin_with_block_content() + { + $this->runParserTest( + '

This {{@45#content}}{{@45#content}} content is {{@45#content}}

', + ['45' => '
block
'], + '

This

block
block

content is

block
', + ); + } + + public function test_multiple_tags_in_differing_origin_levels_with_block_content() + { + $this->runParserTest( + '

This {{@45#content}} content is {{@45#content}}

{{@45#content}}
', + ['45' => '
block
'], + '
block

This content is

block
block
', + ); + } + + public function test_multiple_tags_in_shallow_origin_with_multi_block_content() + { + $this->runParserTest( + '

{{@45}}C{{@45}}

{{@45}}{{@45}}
', + ['45' => '

A

B

'], + '

A

B

C

A

B

A

B

A

B

', + ); + } + protected function runParserTest(string $html, array $contentById, string $expected) { $parser = new PageIncludeParser($html, function (int $id) use ($contentById) { From 71c93c88787fe3522d1f0f49d445eb24e3a2789e Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 27 Nov 2023 19:54:47 +0000 Subject: [PATCH 5/7] Includes: Switched page to new system - Added mulit-level depth parsing. - Updating usage of HTML doc in page content to be efficient. - Removed now redundant PageContentTest cases. - Made some include system fixes based upon testing. --- app/Entities/Tools/PageContent.php | 113 ++++-------------- app/Entities/Tools/PageIncludeContent.php | 2 +- app/Entities/Tools/PageIncludeParser.php | 24 ++-- app/Theming/CustomHtmlHeadContentProvider.php | 2 +- app/Util/HtmlContentFilter.php | 23 ++-- tests/Entity/PageContentTest.php | 34 +----- tests/Unit/PageIncludeParserTest.php | 10 +- 7 files changed, 60 insertions(+), 148 deletions(-) diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index 3e75bd5bb..7f4d695fe 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -275,21 +275,33 @@ class PageContent */ public function render(bool $blankIncludes = false): string { - $content = $this->page->html ?? ''; + $html = $this->page->html ?? ''; + + if (empty($html)) { + return $html; + } + + $doc = new HtmlDocument($html); + + $contentProvider = function (int $id) use ($blankIncludes) { + if ($blankIncludes) { + return ''; + } + return Page::visible()->find($id)->html ?? ''; + }; + + $parser = new PageIncludeParser($doc, $contentProvider); + $nodesAdded = 1; + + for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) { + $nodesAdded = $parser->parse(); + } if (!config('app.allow_content_scripts')) { - $content = HtmlContentFilter::removeScripts($content); + HtmlContentFilter::removeScriptsFromDocument($doc); } - if ($blankIncludes) { - $content = $this->blankPageIncludes($content); - } else { - for ($includeDepth = 0; $includeDepth < 3; $includeDepth++) { - $content = $this->parsePageIncludes($content); - } - } - - return $content; + return $doc->getBodyInnerHtml(); } /** @@ -337,83 +349,4 @@ class PageContent return $tree->toArray(); } - - /** - * Remove any page include tags within the given HTML. - */ - protected function blankPageIncludes(string $html): string - { - return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html); - } - - /** - * Parse any include tags "{{@#section}}" to be part of the page. - */ - protected function parsePageIncludes(string $html): string - { - $matches = []; - preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches); - - foreach ($matches[1] as $index => $includeId) { - $fullMatch = $matches[0][$index]; - $splitInclude = explode('#', $includeId, 2); - - // Get page id from reference - $pageId = intval($splitInclude[0]); - if (is_nan($pageId)) { - continue; - } - - // Find page to use, and default replacement to empty string for non-matches. - /** @var ?Page $matchedPage */ - $matchedPage = Page::visible()->find($pageId); - $replacement = ''; - - if ($matchedPage && count($splitInclude) === 1) { - // If we only have page id, just insert all page html and continue. - $replacement = $matchedPage->html; - } elseif ($matchedPage && count($splitInclude) > 1) { - // Otherwise, if our include tag defines a section, load that specific content - $innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]); - $replacement = trim($innerContent); - } - - $themeReplacement = Theme::dispatch( - ThemeEvents::PAGE_INCLUDE_PARSE, - $includeId, - $replacement, - clone $this->page, - $matchedPage ? (clone $matchedPage) : null, - ); - - // Perform the content replacement - $html = str_replace($fullMatch, $themeReplacement ?? $replacement, $html); - } - - return $html; - } - - /** - * Fetch the content from a specific section of the given page. - */ - protected function fetchSectionOfPage(Page $page, string $sectionId): string - { - $topLevelTags = ['table', 'ul', 'ol', 'pre']; - $doc = new HtmlDocument($page->html); - - // Search included content for the id given and blank out if not exists. - $matchingElem = $doc->getElementById($sectionId); - if ($matchingElem === null) { - return ''; - } - - // Otherwise replace the content with the found content - // Checks if the top-level wrapper should be included by matching on tag types - $isTopLevel = in_array(strtolower($matchingElem->nodeName), $topLevelTags); - if ($isTopLevel) { - return $doc->getNodeOuterHtml($matchingElem); - } - - return $doc->getNodeInnerHtml($matchingElem); - } } diff --git a/app/Entities/Tools/PageIncludeContent.php b/app/Entities/Tools/PageIncludeContent.php index 97c470c68..2e173859d 100644 --- a/app/Entities/Tools/PageIncludeContent.php +++ b/app/Entities/Tools/PageIncludeContent.php @@ -14,7 +14,7 @@ class PageIncludeContent */ protected array $contents = []; - protected bool $isTopLevel; + protected bool $isTopLevel = false; public function __construct( string $html, diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php index e55fc22c7..660c60372 100644 --- a/app/Entities/Tools/PageIncludeParser.php +++ b/app/Entities/Tools/PageIncludeParser.php @@ -20,19 +20,19 @@ class PageIncludeParser protected array $toCleanup = []; public function __construct( - protected string $pageHtml, + protected HtmlDocument $doc, protected Closure $pageContentForId, ) { } /** * Parse out the include tags. + * Returns the count of new content DOM nodes added to the document. */ - public function parse(): string + public function parse(): int { - $doc = new HtmlDocument($this->pageHtml); - - $tags = $this->locateAndIsolateIncludeTags($doc); + $nodesAdded = 0; + $tags = $this->locateAndIsolateIncludeTags(); foreach ($tags as $tag) { $htmlContent = $this->pageContentForId->call($this, $tag->getPageId()); @@ -48,12 +48,14 @@ class PageIncludeParser } } - $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes()); + $replacementNodes = $content->toDomNodes(); + $nodesAdded += count($replacementNodes); + $this->replaceNodeWithNodes($tag->domNode, $replacementNodes); } $this->cleanup(); - return $doc->getBodyInnerHtml(); + return $nodesAdded; } /** @@ -61,9 +63,9 @@ class PageIncludeParser * own nodes in the DOM for future targeted manipulation. * @return PageIncludeTag[] */ - protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array + protected function locateAndIsolateIncludeTags(): array { - $includeHosts = $doc->queryXPath("//body//*[text()[contains(., '{{@')]]"); + $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]"); $includeTags = []; /** @var DOMNode $node */ @@ -125,7 +127,7 @@ class PageIncludeParser foreach ($replacements as $replacement) { if ($replacement->ownerDocument !== $targetDoc) { - $replacement = $targetDoc->adoptNode($replacement); + $replacement = $targetDoc->importNode($replacement, true); } $toReplace->parentNode->insertBefore($replacement, $toReplace); @@ -190,7 +192,7 @@ class PageIncludeParser return $parent; } - $parent = $parent->parentElement; + $parent = $parent->parentNode; } while ($parent !== null); return null; diff --git a/app/Theming/CustomHtmlHeadContentProvider.php b/app/Theming/CustomHtmlHeadContentProvider.php index 041e5d025..95d9ff5ad 100644 --- a/app/Theming/CustomHtmlHeadContentProvider.php +++ b/app/Theming/CustomHtmlHeadContentProvider.php @@ -50,7 +50,7 @@ class CustomHtmlHeadContentProvider $hash = md5($content); return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) { - return HtmlContentFilter::removeScripts($content); + return HtmlContentFilter::removeScriptsFromHtmlString($content); }); } diff --git a/app/Util/HtmlContentFilter.php b/app/Util/HtmlContentFilter.php index 2dbb34086..758591729 100644 --- a/app/Util/HtmlContentFilter.php +++ b/app/Util/HtmlContentFilter.php @@ -9,16 +9,10 @@ use DOMNodeList; class HtmlContentFilter { /** - * Remove all the script elements from the given HTML. + * Remove all the script elements from the given HTML document. */ - public static function removeScripts(string $html): string + public static function removeScriptsFromDocument(HtmlDocument $doc) { - if (empty($html)) { - return $html; - } - - $doc = new HtmlDocument($html); - // Remove standard script tags $scriptElems = $doc->queryXPath('//script'); static::removeNodes($scriptElems); @@ -53,6 +47,19 @@ class HtmlContentFilter // Remove 'on*' attributes $onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]'); static::removeAttributes($onAttributes); + } + + /** + * Remove scripts from the given HTML string. + */ + public static function removeScriptsFromHtmlString(string $html): string + { + if (empty($html)) { + return $html; + } + + $doc = new HtmlDocument($html); + static::removeScriptsFromDocument($doc); return $doc->getBodyInnerHtml(); } diff --git a/tests/Entity/PageContentTest.php b/tests/Entity/PageContentTest.php index d8845fe12..5b46c08a3 100644 --- a/tests/Entity/PageContentTest.php +++ b/tests/Entity/PageContentTest.php @@ -8,7 +8,7 @@ use Tests\TestCase; class PageContentTest extends TestCase { - protected $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k='; + protected string $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k='; public function test_page_includes() { @@ -57,38 +57,6 @@ class PageContentTest extends TestCase $this->assertEquals('', $page->text); } - public function test_page_includes_do_not_break_tables() - { - $page = $this->entities->page(); - $secondPage = $this->entities->page(); - - $content = '
test
'; - $secondPage->html = $content; - $secondPage->save(); - - $page->html = "{{@{$secondPage->id}#table}}"; - $page->save(); - - $pageResp = $this->asEditor()->get($page->getUrl()); - $pageResp->assertSee($content, false); - } - - public function test_page_includes_do_not_break_code() - { - $page = $this->entities->page(); - $secondPage = $this->entities->page(); - - $content = '
var cat = null;
'; - $secondPage->html = $content; - $secondPage->save(); - - $page->html = "{{@{$secondPage->id}#bkmrk-code}}"; - $page->save(); - - $pageResp = $this->asEditor()->get($page->getUrl()); - $pageResp->assertSee($content, false); - } - public function test_page_includes_rendered_on_book_export() { $page = $this->entities->page(); diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php index ee7a64344..c4c127626 100644 --- a/tests/Unit/PageIncludeParserTest.php +++ b/tests/Unit/PageIncludeParserTest.php @@ -3,6 +3,7 @@ namespace Tests\Unit; use BookStack\Entities\Tools\PageIncludeParser; +use BookStack\Util\HtmlDocument; use Tests\TestCase; class PageIncludeParserTest extends TestCase @@ -214,13 +215,14 @@ class PageIncludeParserTest extends TestCase ); } - protected function runParserTest(string $html, array $contentById, string $expected) + protected function runParserTest(string $html, array $contentById, string $expected): void { - $parser = new PageIncludeParser($html, function (int $id) use ($contentById) { + $doc = new HtmlDocument($html); + $parser = new PageIncludeParser($doc, function (int $id) use ($contentById) { return $contentById[strval($id)] ?? ''; }); - $result = $parser->parse(); - $this->assertEquals($expected, $result); + $parser->parse(); + $this->assertEquals($expected, $doc->getBodyInnerHtml()); } } From b569827114693aec0f143a54dce5ddc693174734 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 27 Nov 2023 20:16:27 +0000 Subject: [PATCH 6/7] Includes: Added ID de-duplicating and more thorough clean-up --- app/Entities/Tools/PageContent.php | 10 +++++++--- app/Entities/Tools/PageIncludeParser.php | 7 +++++-- tests/Unit/PageIncludeParserTest.php | 11 ++++++++++- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index 7f4d695fe..22190f03f 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -5,8 +5,6 @@ namespace BookStack\Entities\Tools; use BookStack\Entities\Models\Page; use BookStack\Entities\Tools\Markdown\MarkdownToHtml; use BookStack\Exceptions\ImageUploadException; -use BookStack\Facades\Theme; -use BookStack\Theming\ThemeEvents; use BookStack\Uploads\ImageRepo; use BookStack\Uploads\ImageService; use BookStack\Util\HtmlContentFilter; @@ -293,10 +291,16 @@ class PageContent $parser = new PageIncludeParser($doc, $contentProvider); $nodesAdded = 1; - for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) { + for ($includeDepth = 0; $includeDepth < 1 && $nodesAdded !== 0; $includeDepth++) { $nodesAdded = $parser->parse(); } + if ($includeDepth > 1) { + $idMap = []; + $changeMap = []; + $this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap); + } + if (!config('app.allow_content_scripts')) { HtmlContentFilter::removeScriptsFromDocument($doc); } diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php index 660c60372..02af3fce9 100644 --- a/app/Entities/Tools/PageIncludeParser.php +++ b/app/Entities/Tools/PageIncludeParser.php @@ -145,6 +145,7 @@ class PageIncludeParser $parentText = $parent->textContent; $tagPos = strpos($parentText, $tag->tagContent); $before = $tagPos < (strlen($parentText) / 2); + $this->toCleanup[] = $tag->domNode->parentNode; if ($before) { $parent->parentNode->insertBefore($tag->domNode, $parent); @@ -206,8 +207,10 @@ class PageIncludeParser { foreach ($this->toCleanup as $element) { $element->normalize(); - if ($element->parentNode && !$element->hasChildNodes()) { - $element->parentNode->removeChild($element); + while ($element->parentNode && !$element->hasChildNodes()) { + $parent = $element->parentNode; + $parent->removeChild($element); + $element = $parent; } } } diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php index c4c127626..fc071cf79 100644 --- a/tests/Unit/PageIncludeParserTest.php +++ b/tests/Unit/PageIncludeParserTest.php @@ -179,6 +179,15 @@ class PageIncludeParserTest extends TestCase ); } + public function test_multi_source_elem_whole_document_with_nested_content_origin() + { + $this->runParserTest( + '

{{@45}}

', + ['45' => '

Testing

This
'], + '

Testing

This
', + ); + } + public function test_multiple_tags_in_same_origin_with_inline_content() { $this->runParserTest( @@ -202,7 +211,7 @@ class PageIncludeParserTest extends TestCase $this->runParserTest( '

This {{@45#content}} content is {{@45#content}}

{{@45#content}}
', ['45' => '
block
'], - '
block

This content is

block
block
', + '
block

This content is

block
block
', ); } From 652d5417bf5dbd9700412ad82fdc1a783c83e5a8 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 27 Nov 2023 21:38:43 +0000 Subject: [PATCH 7/7] Includes: Added back support for parse theme event Managed to do this in an API-compatible way although resuling output may differ due to new dom handling in general, although user content is used inline to remain as comptable as possible. --- app/Entities/Tools/PageContent.php | 49 +++++++++++++++++----- app/Entities/Tools/PageIncludeContent.php | 51 +++++++++++++++-------- app/Entities/Tools/PageIncludeParser.php | 7 +++- app/Theming/ThemeEvents.php | 6 +-- app/Theming/ThemeService.php | 8 ++++ tests/Entity/PageContentTest.php | 13 ++++++ tests/Unit/PageIncludeParserTest.php | 7 +++- 7 files changed, 106 insertions(+), 35 deletions(-) diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index 22190f03f..99070ae89 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -5,10 +5,13 @@ namespace BookStack\Entities\Tools; use BookStack\Entities\Models\Page; use BookStack\Entities\Tools\Markdown\MarkdownToHtml; use BookStack\Exceptions\ImageUploadException; +use BookStack\Facades\Theme; +use BookStack\Theming\ThemeEvents; use BookStack\Uploads\ImageRepo; use BookStack\Uploads\ImageService; use BookStack\Util\HtmlContentFilter; use BookStack\Util\HtmlDocument; +use Closure; use DOMElement; use DOMNode; use DOMNodeList; @@ -280,18 +283,11 @@ class PageContent } $doc = new HtmlDocument($html); - - $contentProvider = function (int $id) use ($blankIncludes) { - if ($blankIncludes) { - return ''; - } - return Page::visible()->find($id)->html ?? ''; - }; - + $contentProvider = $this->getContentProviderClosure($blankIncludes); $parser = new PageIncludeParser($doc, $contentProvider); - $nodesAdded = 1; - for ($includeDepth = 0; $includeDepth < 1 && $nodesAdded !== 0; $includeDepth++) { + $nodesAdded = 1; + for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) { $nodesAdded = $parser->parse(); } @@ -308,6 +304,39 @@ class PageContent return $doc->getBodyInnerHtml(); } + /** + * Get the closure used to fetch content for page includes. + */ + protected function getContentProviderClosure(bool $blankIncludes): Closure + { + $contextPage = $this->page; + + return function (PageIncludeTag $tag) use ($blankIncludes, $contextPage): PageIncludeContent { + if ($blankIncludes) { + return PageIncludeContent::fromHtmlAndTag('', $tag); + } + + $matchedPage = Page::visible()->find($tag->getPageId()); + $content = PageIncludeContent::fromHtmlAndTag($matchedPage->html ?? '', $tag); + + if (Theme::hasListeners(ThemeEvents::PAGE_INCLUDE_PARSE)) { + $themeReplacement = Theme::dispatch( + ThemeEvents::PAGE_INCLUDE_PARSE, + $tag->tagContent, + $content->toHtml(), + clone $contextPage, + $matchedPage ? (clone $matchedPage) : null, + ); + + if ($themeReplacement !== null) { + $content = PageIncludeContent::fromInlineHtml(strval($themeReplacement)); + } + } + + return $content; + }; + } + /** * Parse the headers on the page to get a navigation menu. */ diff --git a/app/Entities/Tools/PageIncludeContent.php b/app/Entities/Tools/PageIncludeContent.php index 2e173859d..7c4f943c8 100644 --- a/app/Entities/Tools/PageIncludeContent.php +++ b/app/Entities/Tools/PageIncludeContent.php @@ -10,47 +10,53 @@ class PageIncludeContent protected static array $topLevelTags = ['table', 'ul', 'ol', 'pre']; /** - * @var DOMNode[] + * @param DOMNode[] $contents + * @param bool $isInline */ - protected array $contents = []; - - protected bool $isTopLevel = false; - public function __construct( - string $html, - PageIncludeTag $tag, + protected array $contents, + protected bool $isInline, ) { - $this->parseHtml($html, $tag); } - protected function parseHtml(string $html, PageIncludeTag $tag): void + public static function fromHtmlAndTag(string $html, PageIncludeTag $tag): self { if (empty($html)) { - return; + return new self([], true); } $doc = new HtmlDocument($html); $sectionId = $tag->getSectionId(); if (!$sectionId) { - $this->contents = [...$doc->getBodyChildren()]; - $this->isTopLevel = true; - return; + $contents = [...$doc->getBodyChildren()]; + return new self($contents, false); } $section = $doc->getElementById($sectionId); if (!$section) { - return; + return new self([], true); } $isTopLevel = in_array(strtolower($section->nodeName), static::$topLevelTags); - $this->isTopLevel = $isTopLevel; - $this->contents = $isTopLevel ? [$section] : [...$section->childNodes]; + $contents = $isTopLevel ? [$section] : [...$section->childNodes]; + return new self($contents, !$isTopLevel); + } + + public static function fromInlineHtml(string $html): self + { + if (empty($html)) { + return new self([], true); + } + + $doc = new HtmlDocument($html); + + return new self([...$doc->getBodyChildren()], true); } public function isInline(): bool { - return !$this->isTopLevel; + return $this->isInline; } public function isEmpty(): bool @@ -65,4 +71,15 @@ class PageIncludeContent { return $this->contents; } + + public function toHtml(): string + { + $html = ''; + + foreach ($this->contents as $content) { + $html .= $content->ownerDocument->saveHTML($content); + } + + return $html; + } } diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php index 02af3fce9..f1fbfba03 100644 --- a/app/Entities/Tools/PageIncludeParser.php +++ b/app/Entities/Tools/PageIncludeParser.php @@ -19,6 +19,9 @@ class PageIncludeParser */ protected array $toCleanup = []; + /** + * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId + */ public function __construct( protected HtmlDocument $doc, protected Closure $pageContentForId, @@ -35,8 +38,8 @@ class PageIncludeParser $tags = $this->locateAndIsolateIncludeTags(); foreach ($tags as $tag) { - $htmlContent = $this->pageContentForId->call($this, $tag->getPageId()); - $content = new PageIncludeContent($htmlContent, $tag); + /** @var PageIncludeContent $content */ + $content = $this->pageContentForId->call($this, $tag); if (!$content->isInline()) { $parentP = $this->getParentParagraph($tag->domNode); diff --git a/app/Theming/ThemeEvents.php b/app/Theming/ThemeEvents.php index 9e14707de..3d8cd4167 100644 --- a/app/Theming/ThemeEvents.php +++ b/app/Theming/ThemeEvents.php @@ -2,8 +2,6 @@ namespace BookStack\Theming; -use BookStack\Entities\Models\Page; - /** * The ThemeEvents used within BookStack. * @@ -93,8 +91,8 @@ class ThemeEvents * * @param string $tagReference * @param string $replacementHTML - * @param Page $currentPage - * @param ?Page $referencedPage + * @param \BookStack\Entities\Models\Page $currentPage + * @param ?\BookStack\Entities\Models\Page $referencedPage */ const PAGE_INCLUDE_PARSE = 'page_include_parse'; diff --git a/app/Theming/ThemeService.php b/app/Theming/ThemeService.php index 31a7d3c64..0c2526536 100644 --- a/app/Theming/ThemeService.php +++ b/app/Theming/ThemeService.php @@ -48,6 +48,14 @@ class ThemeService return null; } + /** + * Check if there are listeners registered for the given event name. + */ + public function hasListeners(string $event): bool + { + return count($this->listeners[$event] ?? []) > 0; + } + /** * Register a new custom artisan command to be available. */ diff --git a/tests/Entity/PageContentTest.php b/tests/Entity/PageContentTest.php index 5b46c08a3..958598fda 100644 --- a/tests/Entity/PageContentTest.php +++ b/tests/Entity/PageContentTest.php @@ -88,6 +88,19 @@ class PageContentTest extends TestCase $this->withHtml($pageResp)->assertElementNotContains('#bkmrk-test', 'Hello Barry Hello Barry Hello Barry Hello Barry Hello Barry ' . $tag); } + public function test_page_includes_to_nonexisting_pages_does_not_error() + { + $page = $this->entities->page(); + $missingId = Page::query()->max('id') + 1; + $tag = "{{@{$missingId}}}"; + $page->html = '

Hello Barry ' . $tag . '

'; + $page->save(); + + $pageResp = $this->asEditor()->get($page->getUrl()); + $pageResp->assertOk(); + $pageResp->assertSee('Hello Barry'); + } + public function test_page_content_scripts_removed_by_default() { $this->asEditor(); diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php index fc071cf79..83fded436 100644 --- a/tests/Unit/PageIncludeParserTest.php +++ b/tests/Unit/PageIncludeParserTest.php @@ -2,7 +2,9 @@ namespace Tests\Unit; +use BookStack\Entities\Tools\PageIncludeContent; use BookStack\Entities\Tools\PageIncludeParser; +use BookStack\Entities\Tools\PageIncludeTag; use BookStack\Util\HtmlDocument; use Tests\TestCase; @@ -227,8 +229,9 @@ class PageIncludeParserTest extends TestCase protected function runParserTest(string $html, array $contentById, string $expected): void { $doc = new HtmlDocument($html); - $parser = new PageIncludeParser($doc, function (int $id) use ($contentById) { - return $contentById[strval($id)] ?? ''; + $parser = new PageIncludeParser($doc, function (PageIncludeTag $tag) use ($contentById): PageIncludeContent { + $html = $contentById[strval($tag->getPageId())] ?? ''; + return PageIncludeContent::fromHtmlAndTag($html, $tag); }); $parser->parse();