From 04d21c8a97da9463e6bedda620ecf1282722ea3e Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Wed, 22 Nov 2023 22:14:28 +0000
Subject: [PATCH 1/7] Includes: Started foundations for new include tag parser

---
 app/Entities/Tools/PageIncludeParser.php | 57 ++++++++++++++++++++++++
 tests/Unit/PageIncludeParserTest.php     | 46 +++++++++++++++++++
 2 files changed, 103 insertions(+)
 create mode 100644 app/Entities/Tools/PageIncludeParser.php
 create mode 100644 tests/Unit/PageIncludeParserTest.php
diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php
new file mode 100644
index 000000000..63d3ea8d6
--- /dev/null
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -0,0 +1,57 @@
+<?php
+
+namespace BookStack\Entities\Tools;
+
+use BookStack\Util\HtmlDocument;
+use Closure;
+
+class PageIncludeParser
+{
+    protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
+
+    public function __construct(
+        protected string $pageHtml,
+        protected Closure $pageContentForId,
+    ) {
+    }
+
+    public function parse(): string
+    {
+        $html = new HtmlDocument($this->pageHtml);
+
+        $includeHosts = $html->queryXPath("//body//*[contains(text(), '{{@')]");
+        $node = $includeHosts->item(0);
+
+        // One of the direct child textnodes of the "$includeHosts" should be
+        // the one with the include tag within.
+        $textNode = $node->childNodes->item(0);
+
+        // TODO:
+        // Hunt down the specific text nodes with matches
+        // Split out tag text node from rest of content
+        // Fetch tag content->
+          // If range or top-block: delete tag text node, [Promote to top-block], delete old top-block if empty
+          // If inline: Replace current text node with new text or elem
+        // !! "Range" or "inline" status should come from tag parser and content fetcher, not guessed direct from content
+        //     since we could have a range of inline elements
+
+        // [Promote to top-block]
+        // Tricky operation.
+        // Can throw in before or after current top-block depending on relative position
+        // Could [Split] top-block but complex past a single level depth.
+        // Maybe [Split] if one level depth, otherwise default to before/after block
+        // Should work for the vast majority of cases, and not for those which would
+        // technically be invalid in-editor anyway.
+
+        // [Split]
+        // Copy original top-block node type and attrs (apart from ID)
+        // Move nodes after promoted tag-node into copy
+        // Insert copy after original (after promoted top-block eventually)
+
+        // Notes: May want to eventually parse through backwards, which should avoid issues
+        // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
+
+
+        return $html->getBodyInnerHtml();
+    }
+}
diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php
new file mode 100644
index 000000000..de31504ff
--- /dev/null
+++ b/tests/Unit/PageIncludeParserTest.php
@@ -0,0 +1,46 @@
+<?php
+
+namespace Tests\Unit;
+
+use BookStack\Entities\Tools\PageIncludeParser;
+use Tests\TestCase;
+
+class PageIncludeParserTest extends TestCase
+{
+    public function test_include_simple_inline_text()
+    {
+        $this->runParserTest(
+            '<p>{{@45#content}}</p>',
+            ['45' => '<p id="content">Testing</p>'],
+            '<p>Testing</p>',
+        );
+    }
+
+    public function test_include_simple_inline_text_with_existing_siblings()
+    {
+        $this->runParserTest(
+            '<p>{{@45#content}} <strong>Hi</strong>there!</p>',
+            ['45' => '<p id="content">Testing</p>'],
+            '<p>Testing <strong>Hi</strong>there!</p>',
+        );
+    }
+
+    public function test_include_simple_inline_text_within_other_text()
+    {
+        $this->runParserTest(
+            '<p>Hello {{@45#content}}there!</p>',
+            ['45' => '<p id="content">Testing</p>'],
+            '<p>Hello Testingthere!</p>',
+        );
+    }
+
+    protected function runParserTest(string $html, array $contentById, string $expected)
+    {
+        $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {
+            return $contentById[strval($id)] ?? null;
+        });
+
+        $result = $parser->parse();
+        $this->assertEquals($expected, $result);
+    }
+}

From 75936454cca139d0b226a95ee7a0070bc8702fdc Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Thu, 23 Nov 2023 14:29:07 +0000
Subject: [PATCH 2/7] Includes: Developed to get new system working with inline
 includes

Adds logic for locating and splitting text nodes.
Adds specific classes to offload tag/content specific logic.
---
 app/Entities/Tools/PageIncludeContent.php | 68 ++++++++++++++++++
 app/Entities/Tools/PageIncludeParser.php  | 85 +++++++++++++++++++++--
 app/Entities/Tools/PageIncludeTag.php     | 30 ++++++++
 app/Util/HtmlDocument.php                 | 15 ++++
 tests/Unit/PageIncludeParserTest.php      |  2 +-
 5 files changed, 192 insertions(+), 8 deletions(-)
 create mode 100644 app/Entities/Tools/PageIncludeContent.php
 create mode 100644 app/Entities/Tools/PageIncludeTag.php

diff --git a/app/Entities/Tools/PageIncludeContent.php b/app/Entities/Tools/PageIncludeContent.php
new file mode 100644
index 000000000..97c470c68
--- /dev/null
+++ b/app/Entities/Tools/PageIncludeContent.php
@@ -0,0 +1,68 @@
+<?php
+
+namespace BookStack\Entities\Tools;
+
+use BookStack\Util\HtmlDocument;
+use DOMNode;
+
+class PageIncludeContent
+{
+    protected static array $topLevelTags = ['table', 'ul', 'ol', 'pre'];
+
+    /**
+     * @var DOMNode[]
+     */
+    protected array $contents = [];
+
+    protected bool $isTopLevel;
+
+    public function __construct(
+        string $html,
+        PageIncludeTag $tag,
+    ) {
+        $this->parseHtml($html, $tag);
+    }
+
+    protected function parseHtml(string $html, PageIncludeTag $tag): void
+    {
+        if (empty($html)) {
+            return;
+        }
+
+        $doc = new HtmlDocument($html);
+
+        $sectionId = $tag->getSectionId();
+        if (!$sectionId) {
+            $this->contents = [...$doc->getBodyChildren()];
+            $this->isTopLevel = true;
+            return;
+        }
+
+        $section = $doc->getElementById($sectionId);
+        if (!$section) {
+            return;
+        }
+
+        $isTopLevel = in_array(strtolower($section->nodeName), static::$topLevelTags);
+        $this->isTopLevel = $isTopLevel;
+        $this->contents = $isTopLevel ? [$section] : [...$section->childNodes];
+    }
+
+    public function isInline(): bool
+    {
+        return !$this->isTopLevel;
+    }
+
+    public function isEmpty(): bool
+    {
+        return empty($this->contents);
+    }
+
+    /**
+     * @return DOMNode[]
+     */
+    public function toDomNodes(): array
+    {
+        return $this->contents;
+    }
+}
diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php
index 63d3ea8d6..070b0cc11 100644
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -4,6 +4,8 @@ namespace BookStack\Entities\Tools;
 
 use BookStack\Util\HtmlDocument;
 use Closure;
+use DOMNode;
+use DOMText;
 
 class PageIncludeParser
 {
@@ -17,14 +19,25 @@ class PageIncludeParser
 
     public function parse(): string
     {
-        $html = new HtmlDocument($this->pageHtml);
+        $doc = new HtmlDocument($this->pageHtml);
 
-        $includeHosts = $html->queryXPath("//body//*[contains(text(), '{{@')]");
-        $node = $includeHosts->item(0);
+        $tags = $this->locateAndIsolateIncludeTags($doc);
 
-        // One of the direct child textnodes of the "$includeHosts" should be
-        // the one with the include tag within.
-        $textNode = $node->childNodes->item(0);
+        foreach ($tags as $tag) {
+            $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
+            $content = new PageIncludeContent($htmlContent, $tag);
+
+            if ($content->isInline()) {
+                $adopted = $doc->adoptNodes($content->toDomNodes());
+                foreach ($adopted as $adoptedContentNode) {
+                    $tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode);
+                }
+                $tag->domNode->parentNode->removeChild($tag->domNode);
+                continue;
+            }
+
+            // TODO - Non-inline
+        }
 
         // TODO:
         // Hunt down the specific text nodes with matches
@@ -52,6 +65,64 @@ class PageIncludeParser
         // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
 
 
-        return $html->getBodyInnerHtml();
+        return $doc->getBodyInnerHtml();
+    }
+
+    /**
+     * Locate include tags within the given document, isolating them to their
+     * own nodes in the DOM for future targeted manipulation.
+     * @return PageIncludeTag[]
+     */
+    protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
+    {
+        $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
+        $includeTags = [];
+
+        /** @var DOMNode $node */
+        /** @var DOMNode $childNode */
+        foreach ($includeHosts as $node) {
+            foreach ($node->childNodes as $childNode) {
+                if ($childNode->nodeName === '#text') {
+                    array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
+                }
+            }
+        }
+
+        return $includeTags;
+    }
+
+    /**
+     * Takes a text DOMNode and splits its text content at include tags
+     * into multiple text nodes within the original parent.
+     * Returns found PageIncludeTag references.
+     * @return PageIncludeTag[]
+     */
+    protected function splitTextNodesAtTags(DOMNode $textNode): array
+    {
+        $includeTags = [];
+        $text = $textNode->textContent;
+        preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
+
+        $currentOffset = 0;
+        foreach ($matches[0] as $index => $fullTagMatch) {
+            $tagOuterContent = $fullTagMatch[0];
+            $tagInnerContent = $matches[1][$index][0];
+            $tagStartOffset = $fullTagMatch[1];
+
+            if ($currentOffset < $tagStartOffset) {
+                $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
+                $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
+            }
+
+            $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
+            $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
+            $currentOffset = $tagStartOffset + strlen($tagOuterContent);
+        }
+
+        if ($currentOffset > 0) {
+            $textNode->textContent = substr($text, $currentOffset);
+        }
+
+        return $includeTags;
     }
 }
diff --git a/app/Entities/Tools/PageIncludeTag.php b/app/Entities/Tools/PageIncludeTag.php
new file mode 100644
index 000000000..05a532fb2
--- /dev/null
+++ b/app/Entities/Tools/PageIncludeTag.php
@@ -0,0 +1,30 @@
+<?php
+
+namespace BookStack\Entities\Tools;
+
+use DOMNode;
+
+class PageIncludeTag
+{
+    public function __construct(
+        public string $tagContent,
+        public DOMNode $domNode,
+    ) {
+    }
+
+    /**
+     * Get the page ID that this tag references.
+     */
+    public function getPageId(): int
+    {
+        return intval(trim(explode('#', $this->tagContent, 2)[0]));
+    }
+
+    /**
+     * Get the section ID that this tag references (if any)
+     */
+    public function getSectionId(): string
+    {
+        return trim(explode('#', $this->tagContent, 2)[1] ?? '');
+    }
+}
diff --git a/app/Util/HtmlDocument.php b/app/Util/HtmlDocument.php
index b8c53d439..ad5dacd82 100644
--- a/app/Util/HtmlDocument.php
+++ b/app/Util/HtmlDocument.php
@@ -149,4 +149,19 @@ class HtmlDocument
     {
         return $this->document->saveHTML($node);
     }
+
+    /**
+     * Adopt the given nodes into this document.
+     * @param DOMNode[] $nodes
+     * @return DOMNode[]
+     */
+    public function adoptNodes(array $nodes): array
+    {
+        $adopted = [];
+        foreach ($nodes as $node) {
+            $adopted[] = $this->document->importNode($node, true);
+        }
+
+        return $adopted;
+    }
 }
diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php
index de31504ff..d1912270e 100644
--- a/tests/Unit/PageIncludeParserTest.php
+++ b/tests/Unit/PageIncludeParserTest.php
@@ -37,7 +37,7 @@ class PageIncludeParserTest extends TestCase
     protected function runParserTest(string $html, array $contentById, string $expected)
     {
         $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {
-            return $contentById[strval($id)] ?? null;
+            return $contentById[strval($id)] ?? '';
         });
 
         $result = $parser->parse();

From c88eb729a499197e8b3ab9d5019b4426a65d3d41 Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Fri, 24 Nov 2023 23:39:16 +0000
Subject: [PATCH 3/7] Includes: Added block-level handling to new include
 system

Implements block promoting to body (including position choosing based
upon likely tag position within parent) and block splitting where we're
only a single depth down from the body child.
---
 app/Entities/Tools/PageIncludeParser.php | 111 ++++++++++++++++-------
 app/Util/HtmlDocument.php                |  15 ---
 tests/Unit/PageIncludeParserTest.php     |  96 +++++++++++++++++++-
 3 files changed, 172 insertions(+), 50 deletions(-)

diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php
index 070b0cc11..5ce847d6c 100644
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -4,6 +4,8 @@ namespace BookStack\Entities\Tools;
 
 use BookStack\Util\HtmlDocument;
 use Closure;
+use DOMDocument;
+use DOMElement;
 use DOMNode;
 use DOMText;
 
@@ -22,48 +24,26 @@ class PageIncludeParser
         $doc = new HtmlDocument($this->pageHtml);
 
         $tags = $this->locateAndIsolateIncludeTags($doc);
+        $topLevel = [...$doc->getBodyChildren()];
 
         foreach ($tags as $tag) {
             $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
             $content = new PageIncludeContent($htmlContent, $tag);
 
-            if ($content->isInline()) {
-                $adopted = $doc->adoptNodes($content->toDomNodes());
-                foreach ($adopted as $adoptedContentNode) {
-                    $tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode);
+            if (!$content->isInline()) {
+                $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true);
+                if ($isParentTopLevel) {
+                    $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
+                } else {
+                    $this->promoteTagNodeToBody($tag, $doc->getBody());
                 }
-                $tag->domNode->parentNode->removeChild($tag->domNode);
-                continue;
             }
 
-            // TODO - Non-inline
+            $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
         }
 
-        // TODO:
-        // Hunt down the specific text nodes with matches
-        // Split out tag text node from rest of content
-        // Fetch tag content->
-          // If range or top-block: delete tag text node, [Promote to top-block], delete old top-block if empty
-          // If inline: Replace current text node with new text or elem
-        // !! "Range" or "inline" status should come from tag parser and content fetcher, not guessed direct from content
-        //     since we could have a range of inline elements
-
-        // [Promote to top-block]
-        // Tricky operation.
-        // Can throw in before or after current top-block depending on relative position
-        // Could [Split] top-block but complex past a single level depth.
-        // Maybe [Split] if one level depth, otherwise default to before/after block
-        // Should work for the vast majority of cases, and not for those which would
-        // technically be invalid in-editor anyway.
-
-        // [Split]
-        // Copy original top-block node type and attrs (apart from ID)
-        // Move nodes after promoted tag-node into copy
-        // Insert copy after original (after promoted top-block eventually)
-
-        // Notes: May want to eventually parse through backwards, which should avoid issues
-        // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
-
+        // TODO Notes: May want to eventually parse through backwards, which should avoid issues
+        //   in changes affecting the next tag, where tags may be in the same/adjacent nodes.
 
         return $doc->getBodyInnerHtml();
     }
@@ -125,4 +105,71 @@ class PageIncludeParser
 
         return $includeTags;
     }
+
+    /**
+     * @param DOMNode[] $replacements
+     */
+    protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
+    {
+        /** @var DOMDocument $targetDoc */
+        $targetDoc = $toReplace->ownerDocument;
+
+        foreach ($replacements as $replacement) {
+            if ($replacement->ownerDocument !== $targetDoc) {
+                $replacement = $targetDoc->adoptNode($replacement);
+            }
+
+            $toReplace->parentNode->insertBefore($replacement, $toReplace);
+        }
+
+        $toReplace->parentNode->removeChild($toReplace);
+    }
+
+    protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void
+    {
+        /** @var DOMNode $topParent */
+        $topParent = $tag->domNode->parentNode;
+        while ($topParent->parentNode !== $body) {
+            $topParent = $topParent->parentNode;
+        }
+
+        $parentText = $topParent->textContent;
+        $tagPos = strpos($parentText, $tag->tagContent);
+        $before = $tagPos < (strlen($parentText) / 2);
+
+        if ($before) {
+            $body->insertBefore($tag->domNode, $topParent);
+        } else {
+            $body->insertBefore($tag->domNode, $topParent->nextSibling);
+        }
+    }
+
+    protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
+    {
+        $children = [...$parentNode->childNodes];
+        $splitPos = array_search($domNode, $children, true) ?: count($children);
+        $parentClone = $parentNode->cloneNode();
+        $parentClone->removeAttribute('id');
+
+        /** @var DOMNode $child */
+        for ($i = 0; $i < $splitPos; $i++) {
+            $child = $children[0];
+            $parentClone->appendChild($child);
+        }
+
+        if ($parentClone->hasChildNodes()) {
+            $parentNode->parentNode->insertBefore($parentClone, $parentNode);
+        }
+
+        $parentNode->parentNode->insertBefore($domNode, $parentNode);
+
+        $parentClone->normalize();
+        $parentNode->normalize();
+        if (!$parentNode->hasChildNodes()) {
+            $parentNode->remove();
+        }
+        if (!$parentClone->hasChildNodes()) {
+            $parentClone->remove();
+        }
+    }
 }
diff --git a/app/Util/HtmlDocument.php b/app/Util/HtmlDocument.php
index ad5dacd82..b8c53d439 100644
--- a/app/Util/HtmlDocument.php
+++ b/app/Util/HtmlDocument.php
@@ -149,19 +149,4 @@ class HtmlDocument
     {
         return $this->document->saveHTML($node);
     }
-
-    /**
-     * Adopt the given nodes into this document.
-     * @param DOMNode[] $nodes
-     * @return DOMNode[]
-     */
-    public function adoptNodes(array $nodes): array
-    {
-        $adopted = [];
-        foreach ($nodes as $node) {
-            $adopted[] = $this->document->importNode($node, true);
-        }
-
-        return $adopted;
-    }
 }
diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php
index d1912270e..e0bd69e93 100644
--- a/tests/Unit/PageIncludeParserTest.php
+++ b/tests/Unit/PageIncludeParserTest.php
@@ -7,7 +7,7 @@ use Tests\TestCase;
 
 class PageIncludeParserTest extends TestCase
 {
-    public function test_include_simple_inline_text()
+    public function test_simple_inline_text()
     {
         $this->runParserTest(
             '<p>{{@45#content}}</p>',
@@ -16,7 +16,7 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
-    public function test_include_simple_inline_text_with_existing_siblings()
+    public function test_simple_inline_text_with_existing_siblings()
     {
         $this->runParserTest(
             '<p>{{@45#content}} <strong>Hi</strong>there!</p>',
@@ -25,7 +25,7 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
-    public function test_include_simple_inline_text_within_other_text()
+    public function test_simple_inline_text_within_other_text()
     {
         $this->runParserTest(
             '<p>Hello {{@45#content}}there!</p>',
@@ -34,6 +34,96 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
+    public function test_block_content_types()
+    {
+        $inputs = [
+            '<table id="content"><td>Text</td></table>',
+            '<ul id="content"><li>Item A</li></ul>',
+            '<ol id="content"><li>Item A</li></ol>',
+            '<pre id="content">Code</pre>',
+        ];
+
+        foreach ($inputs as $input) {
+            $this->runParserTest(
+                '<p>A{{@45#content}}B</p>',
+                ['45' => $input],
+                '<p>A</p>' . $input . '<p>B</p>',
+            );
+        }
+    }
+
+    public function test_block_content_nested_origin_gets_placed_before()
+    {
+        $this->runParserTest(
+            '<p><strong>A {{@45#content}} there!</strong></p>',
+            ['45' => '<pre id="content">Testing</pre>'],
+            '<pre id="content">Testing</pre><p><strong>A  there!</strong></p>',
+        );
+    }
+
+    public function test_block_content_nested_origin_gets_placed_after()
+    {
+        $this->runParserTest(
+            '<p><strong>Some really good {{@45#content}} there!</strong></p>',
+            ['45' => '<pre id="content">Testing</pre>'],
+            '<p><strong>Some really good  there!</strong></p><pre id="content">Testing</pre>',
+        );
+    }
+
+    public function test_block_content_in_shallow_origin_gets_split()
+    {
+        $this->runParserTest(
+            '<p>Some really good {{@45#content}} there!</p>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<p>Some really good </p><pre id="content">doggos</pre><p> there!</p>',
+        );
+    }
+
+    public function test_block_content_in_shallow_origin_split_does_not_duplicate_id()
+    {
+        $this->runParserTest(
+            '<p id="test" title="Hi">Some really good {{@45#content}} there!</p>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<p title="Hi">Some really good </p><pre id="content">doggos</pre><p id="test" title="Hi"> there!</p>',
+        );
+    }
+
+    public function test_block_content_in_shallow_origin_does_not_leave_empty_nodes()
+    {
+        $this->runParserTest(
+            '<p>{{@45#content}}</p>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<pre id="content">doggos</pre>',
+        );
+    }
+
+    public function test_simple_whole_document()
+    {
+        $this->runParserTest(
+            '<p>{{@45}}</p>',
+            ['45' => '<p id="content">Testing</p>'],
+            '<p id="content">Testing</p>',
+        );
+    }
+
+    public function test_multi_source_elem_whole_document()
+    {
+        $this->runParserTest(
+            '<p>{{@45}}</p>',
+            ['45' => '<p>Testing</p><blockquote>This</blockquote>'],
+            '<p>Testing</p><blockquote>This</blockquote>',
+        );
+    }
+
+    public function test_multi_source_elem_whole_document_with_shared_content_origin()
+    {
+        $this->runParserTest(
+            '<p>This is {{@45}} some text</p>',
+            ['45' => '<p>Testing</p><blockquote>This</blockquote>'],
+            '<p>This is </p><p>Testing</p><blockquote>This</blockquote><p> some text</p>',
+        );
+    }
+
     protected function runParserTest(string $html, array $contentById, string $expected)
     {
         $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {

From 4874dc1304ee26737884d787893743ae323e5cf2 Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Sat, 25 Nov 2023 17:32:00 +0000
Subject: [PATCH 4/7] Includes: Updated logic regarding parent block els, added
 tests

Expanded tests with many more cases, and added fixes for failed
scenarios.
Updated logic to specifically handling parent <p> tags, and now assume
compatibility with parent block types elswhere to allow use in a
variety of scenarios (td, details, blockquote etc...).
---
 app/Entities/Tools/PageIncludeParser.php | 99 ++++++++++++++++--------
 tests/Unit/PageIncludeParserTest.php     | 90 +++++++++++++++++++++
 2 files changed, 158 insertions(+), 31 deletions(-)

diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php
index 5ce847d6c..e55fc22c7 100644
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -13,37 +13,45 @@ class PageIncludeParser
 {
     protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
 
+    /**
+     * Elements to clean up and remove if left empty after a parsing operation.
+     * @var DOMElement[]
+     */
+    protected array $toCleanup = [];
+
     public function __construct(
         protected string $pageHtml,
         protected Closure $pageContentForId,
     ) {
     }
 
+    /**
+     * Parse out the include tags.
+     */
     public function parse(): string
     {
         $doc = new HtmlDocument($this->pageHtml);
 
         $tags = $this->locateAndIsolateIncludeTags($doc);
-        $topLevel = [...$doc->getBodyChildren()];
 
         foreach ($tags as $tag) {
             $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
             $content = new PageIncludeContent($htmlContent, $tag);
 
             if (!$content->isInline()) {
-                $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true);
-                if ($isParentTopLevel) {
+                $parentP = $this->getParentParagraph($tag->domNode);
+                $isWithinParentP = $parentP === $tag->domNode->parentNode;
+                if ($parentP && $isWithinParentP) {
                     $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
-                } else {
-                    $this->promoteTagNodeToBody($tag, $doc->getBody());
+                } else if ($parentP) {
+                    $this->moveTagNodeToBesideParent($tag, $parentP);
                 }
             }
 
             $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
         }
 
-        // TODO Notes: May want to eventually parse through backwards, which should avoid issues
-        //   in changes affecting the next tag, where tags may be in the same/adjacent nodes.
+        $this->cleanup();
 
         return $doc->getBodyInnerHtml();
     }
@@ -55,7 +63,7 @@ class PageIncludeParser
      */
     protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
     {
-        $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
+        $includeHosts = $doc->queryXPath("//body//*[text()[contains(., '{{@')]]");
         $includeTags = [];
 
         /** @var DOMNode $node */
@@ -107,6 +115,7 @@ class PageIncludeParser
     }
 
     /**
+     * Replace the given node with all those in $replacements
      * @param DOMNode[] $replacements
      */
     protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
@@ -125,51 +134,79 @@ class PageIncludeParser
         $toReplace->parentNode->removeChild($toReplace);
     }
 
-    protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void
+    /**
+     * Move a tag node to become a sibling of the given parent.
+     * Will attempt to guess a position based upon the tag content within the parent.
+     */
+    protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
     {
-        /** @var DOMNode $topParent */
-        $topParent = $tag->domNode->parentNode;
-        while ($topParent->parentNode !== $body) {
-            $topParent = $topParent->parentNode;
-        }
-
-        $parentText = $topParent->textContent;
+        $parentText = $parent->textContent;
         $tagPos = strpos($parentText, $tag->tagContent);
         $before = $tagPos < (strlen($parentText) / 2);
 
         if ($before) {
-            $body->insertBefore($tag->domNode, $topParent);
+            $parent->parentNode->insertBefore($tag->domNode, $parent);
         } else {
-            $body->insertBefore($tag->domNode, $topParent->nextSibling);
+            $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
         }
     }
 
+    /**
+     * Splits the given $parentNode at the location of the $domNode within it.
+     * Attempts replicate the original $parentNode, moving some of their parent
+     * children in where needed, before adding the $domNode between.
+     */
     protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
     {
         $children = [...$parentNode->childNodes];
-        $splitPos = array_search($domNode, $children, true) ?: count($children);
+        $splitPos = array_search($domNode, $children, true);
+        if ($splitPos === false) {
+            $splitPos = count($children) - 1;
+        }
+
         $parentClone = $parentNode->cloneNode();
+        $parentNode->parentNode->insertBefore($parentClone, $parentNode);
         $parentClone->removeAttribute('id');
 
         /** @var DOMNode $child */
         for ($i = 0; $i < $splitPos; $i++) {
-            $child = $children[0];
+            $child = $children[$i];
             $parentClone->appendChild($child);
         }
 
-        if ($parentClone->hasChildNodes()) {
-            $parentNode->parentNode->insertBefore($parentClone, $parentNode);
-        }
-
         $parentNode->parentNode->insertBefore($domNode, $parentNode);
 
-        $parentClone->normalize();
-        $parentNode->normalize();
-        if (!$parentNode->hasChildNodes()) {
-            $parentNode->remove();
-        }
-        if (!$parentClone->hasChildNodes()) {
-            $parentClone->remove();
+        $this->toCleanup[] = $parentNode;
+        $this->toCleanup[] = $parentClone;
+    }
+
+    /**
+     * Get the parent paragraph of the given node, if existing.
+     */
+    protected function getParentParagraph(DOMNode $parent): ?DOMNode
+    {
+        do {
+            if (strtolower($parent->nodeName) === 'p') {
+                return $parent;
+            }
+
+            $parent = $parent->parentElement;
+        } while ($parent !== null);
+
+        return null;
+    }
+
+    /**
+     * Cleanup after a parse operation.
+     * Removes stranded elements we may have left during the parse.
+     */
+    protected function cleanup(): void
+    {
+        foreach ($this->toCleanup as $element) {
+            $element->normalize();
+            if ($element->parentNode && !$element->hasChildNodes()) {
+                $element->parentNode->removeChild($element);
+            }
         }
     }
 }
diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php
index e0bd69e93..ee7a64344 100644
--- a/tests/Unit/PageIncludeParserTest.php
+++ b/tests/Unit/PageIncludeParserTest.php
@@ -34,6 +34,15 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
+    public function test_complex_inline_text_within_other_text()
+    {
+        $this->runParserTest(
+            '<p>Hello {{@45#content}}there!</p>',
+            ['45' => '<p id="content"><strong>Testing</strong> with<em>some</em><i>extra</i>tags</p>'],
+            '<p>Hello <strong>Testing</strong> with<em>some</em><i>extra</i>tagsthere!</p>',
+        );
+    }
+
     public function test_block_content_types()
     {
         $inputs = [
@@ -97,6 +106,51 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
+    public function test_block_content_in_allowable_parent_element()
+    {
+        $this->runParserTest(
+            '<div>{{@45#content}}</div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><pre id="content">doggos</pre></div>',
+        );
+    }
+
+    public function test_block_content_in_paragraph_origin_with_allowable_grandparent()
+    {
+        $this->runParserTest(
+            '<div><p>{{@45#content}}</p></div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><pre id="content">doggos</pre></div>',
+        );
+    }
+
+    public function test_block_content_in_paragraph_origin_with_allowable_grandparent_with_adjacent_content()
+    {
+        $this->runParserTest(
+            '<div><p>Cute {{@45#content}} over there!</p></div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><p>Cute </p><pre id="content">doggos</pre><p> over there!</p></div>',
+        );
+    }
+
+    public function test_block_content_in_child_within_paragraph_origin_with_allowable_grandparent_with_adjacent_content()
+    {
+        $this->runParserTest(
+            '<div><p><strong>Cute {{@45#content}} over there!</strong></p></div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><pre id="content">doggos</pre><p><strong>Cute  over there!</strong></p></div>',
+        );
+    }
+
+    public function test_block_content_in_paragraph_origin_within_details()
+    {
+        $this->runParserTest(
+            '<details><p>{{@45#content}}</p></details>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<details><pre id="content">doggos</pre></details>',
+        );
+    }
+
     public function test_simple_whole_document()
     {
         $this->runParserTest(
@@ -124,6 +178,42 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
+    public function test_multiple_tags_in_same_origin_with_inline_content()
+    {
+        $this->runParserTest(
+            '<p>This {{@45#content}}{{@45#content}} content is {{@45#content}}</p>',
+            ['45' => '<p id="content">inline</p>'],
+            '<p>This inlineinline content is inline</p>',
+        );
+    }
+
+    public function test_multiple_tags_in_same_origin_with_block_content()
+    {
+        $this->runParserTest(
+            '<p>This {{@45#content}}{{@45#content}} content is {{@45#content}}</p>',
+            ['45' => '<pre id="content">block</pre>'],
+            '<p>This </p><pre id="content">block</pre><pre id="content">block</pre><p> content is </p><pre id="content">block</pre>',
+        );
+    }
+
+    public function test_multiple_tags_in_differing_origin_levels_with_block_content()
+    {
+        $this->runParserTest(
+            '<div><p>This <strong>{{@45#content}}</strong> content is {{@45#content}}</p>{{@45#content}}</div>',
+            ['45' => '<pre id="content">block</pre>'],
+            '<div><pre id="content">block</pre><p>This <strong></strong> content is </p><pre id="content">block</pre><pre id="content">block</pre></div>',
+        );
+    }
+
+    public function test_multiple_tags_in_shallow_origin_with_multi_block_content()
+    {
+        $this->runParserTest(
+            '<p>{{@45}}C{{@45}}</p><div>{{@45}}{{@45}}</div>',
+            ['45' => '<p>A</p><p>B</p>'],
+            '<p>A</p><p>B</p><p>C</p><p>A</p><p>B</p><div><p>A</p><p>B</p><p>A</p><p>B</p></div>',
+        );
+    }
+
     protected function runParserTest(string $html, array $contentById, string $expected)
     {
         $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {

From 71c93c88787fe3522d1f0f49d445eb24e3a2789e Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Mon, 27 Nov 2023 19:54:47 +0000
Subject: [PATCH 5/7] Includes: Switched page to new system

- Added mulit-level depth parsing.
- Updating usage of HTML doc in page content to be efficient.
- Removed now redundant PageContentTest cases.
- Made some include system fixes based upon testing.
---
 app/Entities/Tools/PageContent.php            | 113 ++++--------------
 app/Entities/Tools/PageIncludeContent.php     |   2 +-
 app/Entities/Tools/PageIncludeParser.php      |  24 ++--
 app/Theming/CustomHtmlHeadContentProvider.php |   2 +-
 app/Util/HtmlContentFilter.php                |  23 ++--
 tests/Entity/PageContentTest.php              |  34 +-----
 tests/Unit/PageIncludeParserTest.php          |  10 +-
 7 files changed, 60 insertions(+), 148 deletions(-)

diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php
index 3e75bd5bb..7f4d695fe 100644
--- a/app/Entities/Tools/PageContent.php
+++ b/app/Entities/Tools/PageContent.php
@@ -275,21 +275,33 @@ class PageContent
      */
     public function render(bool $blankIncludes = false): string
     {
-        $content = $this->page->html ?? '';
+        $html = $this->page->html ?? '';
+
+        if (empty($html)) {
+            return $html;
+        }
+
+        $doc = new HtmlDocument($html);
+
+        $contentProvider = function (int $id) use ($blankIncludes) {
+            if ($blankIncludes) {
+                return '';
+            }
+            return Page::visible()->find($id)->html ?? '';
+        };
+
+        $parser = new PageIncludeParser($doc, $contentProvider);
+        $nodesAdded = 1;
+
+        for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) {
+            $nodesAdded = $parser->parse();
+        }
 
         if (!config('app.allow_content_scripts')) {
-            $content = HtmlContentFilter::removeScripts($content);
+            HtmlContentFilter::removeScriptsFromDocument($doc);
         }
 
-        if ($blankIncludes) {
-            $content = $this->blankPageIncludes($content);
-        } else {
-            for ($includeDepth = 0; $includeDepth < 3; $includeDepth++) {
-                $content = $this->parsePageIncludes($content);
-            }
-        }
-
-        return $content;
+        return $doc->getBodyInnerHtml();
     }
 
     /**
@@ -337,83 +349,4 @@ class PageContent
 
         return $tree->toArray();
     }
-
-    /**
-     * Remove any page include tags within the given HTML.
-     */
-    protected function blankPageIncludes(string $html): string
-    {
-        return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html);
-    }
-
-    /**
-     * Parse any include tags "{{@<page_id>#section}}" to be part of the page.
-     */
-    protected function parsePageIncludes(string $html): string
-    {
-        $matches = [];
-        preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches);
-
-        foreach ($matches[1] as $index => $includeId) {
-            $fullMatch = $matches[0][$index];
-            $splitInclude = explode('#', $includeId, 2);
-
-            // Get page id from reference
-            $pageId = intval($splitInclude[0]);
-            if (is_nan($pageId)) {
-                continue;
-            }
-
-            // Find page to use, and default replacement to empty string for non-matches.
-            /** @var ?Page $matchedPage */
-            $matchedPage = Page::visible()->find($pageId);
-            $replacement = '';
-
-            if ($matchedPage && count($splitInclude) === 1) {
-                // If we only have page id, just insert all page html and continue.
-                $replacement = $matchedPage->html;
-            } elseif ($matchedPage && count($splitInclude) > 1) {
-                // Otherwise, if our include tag defines a section, load that specific content
-                $innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);
-                $replacement = trim($innerContent);
-            }
-
-            $themeReplacement = Theme::dispatch(
-                ThemeEvents::PAGE_INCLUDE_PARSE,
-                $includeId,
-                $replacement,
-                clone $this->page,
-                $matchedPage ? (clone $matchedPage) : null,
-            );
-
-            // Perform the content replacement
-            $html = str_replace($fullMatch, $themeReplacement ?? $replacement, $html);
-        }
-
-        return $html;
-    }
-
-    /**
-     * Fetch the content from a specific section of the given page.
-     */
-    protected function fetchSectionOfPage(Page $page, string $sectionId): string
-    {
-        $topLevelTags = ['table', 'ul', 'ol', 'pre'];
-        $doc = new HtmlDocument($page->html);
-
-        // Search included content for the id given and blank out if not exists.
-        $matchingElem = $doc->getElementById($sectionId);
-        if ($matchingElem === null) {
-            return '';
-        }
-
-        // Otherwise replace the content with the found content
-        // Checks if the top-level wrapper should be included by matching on tag types
-        $isTopLevel = in_array(strtolower($matchingElem->nodeName), $topLevelTags);
-        if ($isTopLevel) {
-            return $doc->getNodeOuterHtml($matchingElem);
-        }
-
-        return $doc->getNodeInnerHtml($matchingElem);
-    }
 }
diff --git a/app/Entities/Tools/PageIncludeContent.php b/app/Entities/Tools/PageIncludeContent.php
index 97c470c68..2e173859d 100644
--- a/app/Entities/Tools/PageIncludeContent.php
+++ b/app/Entities/Tools/PageIncludeContent.php
@@ -14,7 +14,7 @@ class PageIncludeContent
      */
     protected array $contents = [];
 
-    protected bool $isTopLevel;
+    protected bool $isTopLevel = false;
 
     public function __construct(
         string $html,
diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php
index e55fc22c7..660c60372 100644
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -20,19 +20,19 @@ class PageIncludeParser
     protected array $toCleanup = [];
 
     public function __construct(
-        protected string $pageHtml,
+        protected HtmlDocument $doc,
         protected Closure $pageContentForId,
     ) {
     }
 
     /**
      * Parse out the include tags.
+     * Returns the count of new content DOM nodes added to the document.
      */
-    public function parse(): string
+    public function parse(): int
     {
-        $doc = new HtmlDocument($this->pageHtml);
-
-        $tags = $this->locateAndIsolateIncludeTags($doc);
+        $nodesAdded = 0;
+        $tags = $this->locateAndIsolateIncludeTags();
 
         foreach ($tags as $tag) {
             $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
@@ -48,12 +48,14 @@ class PageIncludeParser
                 }
             }
 
-            $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
+            $replacementNodes = $content->toDomNodes();
+            $nodesAdded += count($replacementNodes);
+            $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
         }
 
         $this->cleanup();
 
-        return $doc->getBodyInnerHtml();
+        return $nodesAdded;
     }
 
     /**
@@ -61,9 +63,9 @@ class PageIncludeParser
      * own nodes in the DOM for future targeted manipulation.
      * @return PageIncludeTag[]
      */
-    protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
+    protected function locateAndIsolateIncludeTags(): array
     {
-        $includeHosts = $doc->queryXPath("//body//*[text()[contains(., '{{@')]]");
+        $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
         $includeTags = [];
 
         /** @var DOMNode $node */
@@ -125,7 +127,7 @@ class PageIncludeParser
 
         foreach ($replacements as $replacement) {
             if ($replacement->ownerDocument !== $targetDoc) {
-                $replacement = $targetDoc->adoptNode($replacement);
+                $replacement = $targetDoc->importNode($replacement, true);
             }
 
             $toReplace->parentNode->insertBefore($replacement, $toReplace);
@@ -190,7 +192,7 @@ class PageIncludeParser
                 return $parent;
             }
 
-            $parent = $parent->parentElement;
+            $parent = $parent->parentNode;
         } while ($parent !== null);
 
         return null;
diff --git a/app/Theming/CustomHtmlHeadContentProvider.php b/app/Theming/CustomHtmlHeadContentProvider.php
index 041e5d025..95d9ff5ad 100644
--- a/app/Theming/CustomHtmlHeadContentProvider.php
+++ b/app/Theming/CustomHtmlHeadContentProvider.php
@@ -50,7 +50,7 @@ class CustomHtmlHeadContentProvider
         $hash = md5($content);
 
         return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) {
-            return HtmlContentFilter::removeScripts($content);
+            return HtmlContentFilter::removeScriptsFromHtmlString($content);
         });
     }
 
diff --git a/app/Util/HtmlContentFilter.php b/app/Util/HtmlContentFilter.php
index 2dbb34086..758591729 100644
--- a/app/Util/HtmlContentFilter.php
+++ b/app/Util/HtmlContentFilter.php
@@ -9,16 +9,10 @@ use DOMNodeList;
 class HtmlContentFilter
 {
     /**
-     * Remove all the script elements from the given HTML.
+     * Remove all the script elements from the given HTML document.
      */
-    public static function removeScripts(string $html): string
+    public static function removeScriptsFromDocument(HtmlDocument $doc)
     {
-        if (empty($html)) {
-            return $html;
-        }
-
-        $doc = new HtmlDocument($html);
-
         // Remove standard script tags
         $scriptElems = $doc->queryXPath('//script');
         static::removeNodes($scriptElems);
@@ -53,6 +47,19 @@ class HtmlContentFilter
         // Remove 'on*' attributes
         $onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]');
         static::removeAttributes($onAttributes);
+    }
+
+    /**
+     * Remove scripts from the given HTML string.
+     */
+    public static function removeScriptsFromHtmlString(string $html): string
+    {
+        if (empty($html)) {
+            return $html;
+        }
+
+        $doc = new HtmlDocument($html);
+        static::removeScriptsFromDocument($doc);
 
         return $doc->getBodyInnerHtml();
     }
diff --git a/tests/Entity/PageContentTest.php b/tests/Entity/PageContentTest.php
index d8845fe12..5b46c08a3 100644
--- a/tests/Entity/PageContentTest.php
+++ b/tests/Entity/PageContentTest.php
@@ -8,7 +8,7 @@ use Tests\TestCase;
 
 class PageContentTest extends TestCase
 {
-    protected $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k=';
+    protected string $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k=';
 
     public function test_page_includes()
     {
@@ -57,38 +57,6 @@ class PageContentTest extends TestCase
         $this->assertEquals('', $page->text);
     }
 
-    public function test_page_includes_do_not_break_tables()
-    {
-        $page = $this->entities->page();
-        $secondPage = $this->entities->page();
-
-        $content = '<table id="table"><tbody><tr><td>test</td></tr></tbody></table>';
-        $secondPage->html = $content;
-        $secondPage->save();
-
-        $page->html = "{{@{$secondPage->id}#table}}";
-        $page->save();
-
-        $pageResp = $this->asEditor()->get($page->getUrl());
-        $pageResp->assertSee($content, false);
-    }
-
-    public function test_page_includes_do_not_break_code()
-    {
-        $page = $this->entities->page();
-        $secondPage = $this->entities->page();
-
-        $content = '<pre id="bkmrk-code"><code>var cat = null;</code></pre>';
-        $secondPage->html = $content;
-        $secondPage->save();
-
-        $page->html = "{{@{$secondPage->id}#bkmrk-code}}";
-        $page->save();
-
-        $pageResp = $this->asEditor()->get($page->getUrl());
-        $pageResp->assertSee($content, false);
-    }
-
     public function test_page_includes_rendered_on_book_export()
     {
         $page = $this->entities->page();
diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php
index ee7a64344..c4c127626 100644
--- a/tests/Unit/PageIncludeParserTest.php
+++ b/tests/Unit/PageIncludeParserTest.php
@@ -3,6 +3,7 @@
 namespace Tests\Unit;
 
 use BookStack\Entities\Tools\PageIncludeParser;
+use BookStack\Util\HtmlDocument;
 use Tests\TestCase;
 
 class PageIncludeParserTest extends TestCase
@@ -214,13 +215,14 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
-    protected function runParserTest(string $html, array $contentById, string $expected)
+    protected function runParserTest(string $html, array $contentById, string $expected): void
     {
-        $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {
+        $doc = new HtmlDocument($html);
+        $parser = new PageIncludeParser($doc, function (int $id) use ($contentById) {
             return $contentById[strval($id)] ?? '';
         });
 
-        $result = $parser->parse();
-        $this->assertEquals($expected, $result);
+        $parser->parse();
+        $this->assertEquals($expected, $doc->getBodyInnerHtml());
     }
 }

From b569827114693aec0f143a54dce5ddc693174734 Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Mon, 27 Nov 2023 20:16:27 +0000
Subject: [PATCH 6/7] Includes: Added ID de-duplicating and more thorough
 clean-up

---
 app/Entities/Tools/PageContent.php       | 10 +++++++---
 app/Entities/Tools/PageIncludeParser.php |  7 +++++--
 tests/Unit/PageIncludeParserTest.php     | 11 ++++++++++-
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php
index 7f4d695fe..22190f03f 100644
--- a/app/Entities/Tools/PageContent.php
+++ b/app/Entities/Tools/PageContent.php
@@ -5,8 +5,6 @@ namespace BookStack\Entities\Tools;
 use BookStack\Entities\Models\Page;
 use BookStack\Entities\Tools\Markdown\MarkdownToHtml;
 use BookStack\Exceptions\ImageUploadException;
-use BookStack\Facades\Theme;
-use BookStack\Theming\ThemeEvents;
 use BookStack\Uploads\ImageRepo;
 use BookStack\Uploads\ImageService;
 use BookStack\Util\HtmlContentFilter;
@@ -293,10 +291,16 @@ class PageContent
         $parser = new PageIncludeParser($doc, $contentProvider);
         $nodesAdded = 1;
 
-        for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) {
+        for ($includeDepth = 0; $includeDepth < 1 && $nodesAdded !== 0; $includeDepth++) {
             $nodesAdded = $parser->parse();
         }
 
+        if ($includeDepth > 1) {
+            $idMap = [];
+            $changeMap = [];
+            $this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
+        }
+
         if (!config('app.allow_content_scripts')) {
             HtmlContentFilter::removeScriptsFromDocument($doc);
         }
diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php
index 660c60372..02af3fce9 100644
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -145,6 +145,7 @@ class PageIncludeParser
         $parentText = $parent->textContent;
         $tagPos = strpos($parentText, $tag->tagContent);
         $before = $tagPos < (strlen($parentText) / 2);
+        $this->toCleanup[] = $tag->domNode->parentNode;
 
         if ($before) {
             $parent->parentNode->insertBefore($tag->domNode, $parent);
@@ -206,8 +207,10 @@ class PageIncludeParser
     {
         foreach ($this->toCleanup as $element) {
             $element->normalize();
-            if ($element->parentNode && !$element->hasChildNodes()) {
-                $element->parentNode->removeChild($element);
+            while ($element->parentNode && !$element->hasChildNodes()) {
+                $parent = $element->parentNode;
+                $parent->removeChild($element);
+                $element = $parent;
             }
         }
     }
diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php
index c4c127626..fc071cf79 100644
--- a/tests/Unit/PageIncludeParserTest.php
+++ b/tests/Unit/PageIncludeParserTest.php
@@ -179,6 +179,15 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
+    public function test_multi_source_elem_whole_document_with_nested_content_origin()
+    {
+        $this->runParserTest(
+            '<p><strong>{{@45}}</strong></p>',
+            ['45' => '<p>Testing</p><blockquote>This</blockquote>'],
+            '<p>Testing</p><blockquote>This</blockquote>',
+        );
+    }
+
     public function test_multiple_tags_in_same_origin_with_inline_content()
     {
         $this->runParserTest(
@@ -202,7 +211,7 @@ class PageIncludeParserTest extends TestCase
         $this->runParserTest(
             '<div><p>This <strong>{{@45#content}}</strong> content is {{@45#content}}</p>{{@45#content}}</div>',
             ['45' => '<pre id="content">block</pre>'],
-            '<div><pre id="content">block</pre><p>This <strong></strong> content is </p><pre id="content">block</pre><pre id="content">block</pre></div>',
+            '<div><pre id="content">block</pre><p>This  content is </p><pre id="content">block</pre><pre id="content">block</pre></div>',
         );
     }
 

From 652d5417bf5dbd9700412ad82fdc1a783c83e5a8 Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Mon, 27 Nov 2023 21:38:43 +0000
Subject: [PATCH 7/7] Includes: Added back support for parse theme event

Managed to do this in an API-compatible way although resuling output may
differ due to new dom handling in general, although user content is used
inline to remain as comptable as possible.
---
 app/Entities/Tools/PageContent.php        | 49 +++++++++++++++++-----
 app/Entities/Tools/PageIncludeContent.php | 51 +++++++++++++++--------
 app/Entities/Tools/PageIncludeParser.php  |  7 +++-
 app/Theming/ThemeEvents.php               |  6 +--
 app/Theming/ThemeService.php              |  8 ++++
 tests/Entity/PageContentTest.php          | 13 ++++++
 tests/Unit/PageIncludeParserTest.php      |  7 +++-
 7 files changed, 106 insertions(+), 35 deletions(-)

diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php
index 22190f03f..99070ae89 100644
--- a/app/Entities/Tools/PageContent.php
+++ b/app/Entities/Tools/PageContent.php
@@ -5,10 +5,13 @@ namespace BookStack\Entities\Tools;
 use BookStack\Entities\Models\Page;
 use BookStack\Entities\Tools\Markdown\MarkdownToHtml;
 use BookStack\Exceptions\ImageUploadException;
+use BookStack\Facades\Theme;
+use BookStack\Theming\ThemeEvents;
 use BookStack\Uploads\ImageRepo;
 use BookStack\Uploads\ImageService;
 use BookStack\Util\HtmlContentFilter;
 use BookStack\Util\HtmlDocument;
+use Closure;
 use DOMElement;
 use DOMNode;
 use DOMNodeList;
@@ -280,18 +283,11 @@ class PageContent
         }
 
         $doc = new HtmlDocument($html);
-
-        $contentProvider = function (int $id) use ($blankIncludes) {
-            if ($blankIncludes) {
-                return '';
-            }
-            return Page::visible()->find($id)->html ?? '';
-        };
-
+        $contentProvider = $this->getContentProviderClosure($blankIncludes);
         $parser = new PageIncludeParser($doc, $contentProvider);
-        $nodesAdded = 1;
 
-        for ($includeDepth = 0; $includeDepth < 1 && $nodesAdded !== 0; $includeDepth++) {
+        $nodesAdded = 1;
+        for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) {
             $nodesAdded = $parser->parse();
         }
 
@@ -308,6 +304,39 @@ class PageContent
         return $doc->getBodyInnerHtml();
     }
 
+    /**
+     * Get the closure used to fetch content for page includes.
+     */
+    protected function getContentProviderClosure(bool $blankIncludes): Closure
+    {
+        $contextPage = $this->page;
+
+        return function (PageIncludeTag $tag) use ($blankIncludes, $contextPage): PageIncludeContent {
+            if ($blankIncludes) {
+                return PageIncludeContent::fromHtmlAndTag('', $tag);
+            }
+
+            $matchedPage = Page::visible()->find($tag->getPageId());
+            $content = PageIncludeContent::fromHtmlAndTag($matchedPage->html ?? '', $tag);
+
+            if (Theme::hasListeners(ThemeEvents::PAGE_INCLUDE_PARSE)) {
+                $themeReplacement = Theme::dispatch(
+                    ThemeEvents::PAGE_INCLUDE_PARSE,
+                    $tag->tagContent,
+                    $content->toHtml(),
+                    clone $contextPage,
+                    $matchedPage ? (clone $matchedPage) : null,
+                );
+
+                if ($themeReplacement !== null) {
+                    $content = PageIncludeContent::fromInlineHtml(strval($themeReplacement));
+                }
+            }
+
+            return $content;
+        };
+    }
+
     /**
      * Parse the headers on the page to get a navigation menu.
      */
diff --git a/app/Entities/Tools/PageIncludeContent.php b/app/Entities/Tools/PageIncludeContent.php
index 2e173859d..7c4f943c8 100644
--- a/app/Entities/Tools/PageIncludeContent.php
+++ b/app/Entities/Tools/PageIncludeContent.php
@@ -10,47 +10,53 @@ class PageIncludeContent
     protected static array $topLevelTags = ['table', 'ul', 'ol', 'pre'];
 
     /**
-     * @var DOMNode[]
+     * @param DOMNode[] $contents
+     * @param bool $isInline
      */
-    protected array $contents = [];
-
-    protected bool $isTopLevel = false;
-
     public function __construct(
-        string $html,
-        PageIncludeTag $tag,
+        protected array $contents,
+        protected bool $isInline,
     ) {
-        $this->parseHtml($html, $tag);
     }
 
-    protected function parseHtml(string $html, PageIncludeTag $tag): void
+    public static function fromHtmlAndTag(string $html, PageIncludeTag $tag): self
     {
         if (empty($html)) {
-            return;
+            return new self([], true);
         }
 
         $doc = new HtmlDocument($html);
 
         $sectionId = $tag->getSectionId();
         if (!$sectionId) {
-            $this->contents = [...$doc->getBodyChildren()];
-            $this->isTopLevel = true;
-            return;
+            $contents = [...$doc->getBodyChildren()];
+            return new self($contents, false);
         }
 
         $section = $doc->getElementById($sectionId);
         if (!$section) {
-            return;
+            return new self([], true);
         }
 
         $isTopLevel = in_array(strtolower($section->nodeName), static::$topLevelTags);
-        $this->isTopLevel = $isTopLevel;
-        $this->contents = $isTopLevel ? [$section] : [...$section->childNodes];
+        $contents = $isTopLevel ? [$section] : [...$section->childNodes];
+        return new self($contents, !$isTopLevel);
+    }
+
+    public static function fromInlineHtml(string $html): self
+    {
+        if (empty($html)) {
+            return new self([], true);
+        }
+
+        $doc = new HtmlDocument($html);
+
+        return new self([...$doc->getBodyChildren()], true);
     }
 
     public function isInline(): bool
     {
-        return !$this->isTopLevel;
+        return $this->isInline;
     }
 
     public function isEmpty(): bool
@@ -65,4 +71,15 @@ class PageIncludeContent
     {
         return $this->contents;
     }
+
+    public function toHtml(): string
+    {
+        $html = '';
+
+        foreach ($this->contents as $content) {
+            $html .= $content->ownerDocument->saveHTML($content);
+        }
+
+        return $html;
+    }
 }
diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php
index 02af3fce9..f1fbfba03 100644
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -19,6 +19,9 @@ class PageIncludeParser
      */
     protected array $toCleanup = [];
 
+    /**
+     * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId
+     */
     public function __construct(
         protected HtmlDocument $doc,
         protected Closure $pageContentForId,
@@ -35,8 +38,8 @@ class PageIncludeParser
         $tags = $this->locateAndIsolateIncludeTags();
 
         foreach ($tags as $tag) {
-            $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
-            $content = new PageIncludeContent($htmlContent, $tag);
+            /** @var PageIncludeContent $content */
+            $content = $this->pageContentForId->call($this, $tag);
 
             if (!$content->isInline()) {
                 $parentP = $this->getParentParagraph($tag->domNode);
diff --git a/app/Theming/ThemeEvents.php b/app/Theming/ThemeEvents.php
index 9e14707de..3d8cd4167 100644
--- a/app/Theming/ThemeEvents.php
+++ b/app/Theming/ThemeEvents.php
@@ -2,8 +2,6 @@
 
 namespace BookStack\Theming;
 
-use BookStack\Entities\Models\Page;
-
 /**
  * The ThemeEvents used within BookStack.
  *
@@ -93,8 +91,8 @@ class ThemeEvents
      *
      * @param string $tagReference
      * @param string $replacementHTML
-     * @param Page   $currentPage
-     * @param ?Page  $referencedPage
+     * @param \BookStack\Entities\Models\Page   $currentPage
+     * @param ?\BookStack\Entities\Models\Page  $referencedPage
      */
     const PAGE_INCLUDE_PARSE = 'page_include_parse';
 
diff --git a/app/Theming/ThemeService.php b/app/Theming/ThemeService.php
index 31a7d3c64..0c2526536 100644
--- a/app/Theming/ThemeService.php
+++ b/app/Theming/ThemeService.php
@@ -48,6 +48,14 @@ class ThemeService
         return null;
     }
 
+    /**
+     * Check if there are listeners registered for the given event name.
+     */
+    public function hasListeners(string $event): bool
+    {
+        return count($this->listeners[$event] ?? []) > 0;
+    }
+
     /**
      * Register a new custom artisan command to be available.
      */
diff --git a/tests/Entity/PageContentTest.php b/tests/Entity/PageContentTest.php
index 5b46c08a3..958598fda 100644
--- a/tests/Entity/PageContentTest.php
+++ b/tests/Entity/PageContentTest.php
@@ -88,6 +88,19 @@ class PageContentTest extends TestCase
         $this->withHtml($pageResp)->assertElementNotContains('#bkmrk-test', 'Hello Barry Hello Barry Hello Barry Hello Barry Hello Barry ' . $tag);
     }
 
+    public function test_page_includes_to_nonexisting_pages_does_not_error()
+    {
+        $page = $this->entities->page();
+        $missingId = Page::query()->max('id') + 1;
+        $tag = "{{@{$missingId}}}";
+        $page->html = '<p id="bkmrk-test">Hello Barry ' . $tag . '</p>';
+        $page->save();
+
+        $pageResp = $this->asEditor()->get($page->getUrl());
+        $pageResp->assertOk();
+        $pageResp->assertSee('Hello Barry');
+    }
+
     public function test_page_content_scripts_removed_by_default()
     {
         $this->asEditor();
diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php
index fc071cf79..83fded436 100644
--- a/tests/Unit/PageIncludeParserTest.php
+++ b/tests/Unit/PageIncludeParserTest.php
@@ -2,7 +2,9 @@
 
 namespace Tests\Unit;
 
+use BookStack\Entities\Tools\PageIncludeContent;
 use BookStack\Entities\Tools\PageIncludeParser;
+use BookStack\Entities\Tools\PageIncludeTag;
 use BookStack\Util\HtmlDocument;
 use Tests\TestCase;
 
@@ -227,8 +229,9 @@ class PageIncludeParserTest extends TestCase
     protected function runParserTest(string $html, array $contentById, string $expected): void
     {
         $doc = new HtmlDocument($html);
-        $parser = new PageIncludeParser($doc, function (int $id) use ($contentById) {
-            return $contentById[strval($id)] ?? '';
+        $parser = new PageIncludeParser($doc, function (PageIncludeTag $tag) use ($contentById): PageIncludeContent {
+            $html = $contentById[strval($tag->getPageId())] ?? '';
+            return PageIncludeContent::fromHtmlAndTag($html, $tag);
         });
 
         $parser->parse();