X-Git-Url: http://source.bookstackapp.com/bookstack/blobdiff_plain/c88eb729a499197e8b3ab9d5019b4426a65d3d41..refs/pull/5721/head:/app/Entities/Tools/PageIncludeParser.php

diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php
index 5ce847d6c..e0b89f158 100644
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -13,39 +13,52 @@ class PageIncludeParser
 {
     protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
 
+    /**
+     * Elements to clean up and remove if left empty after a parsing operation.
+     * @var DOMElement[]
+     */
+    protected array $toCleanup = [];
+
+    /**
+     * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId
+     */
     public function __construct(
-        protected string $pageHtml,
+        protected HtmlDocument $doc,
         protected Closure $pageContentForId,
     ) {
     }
 
-    public function parse(): string
+    /**
+     * Parse out the include tags.
+     * Returns the count of new content DOM nodes added to the document.
+     */
+    public function parse(): int
     {
-        $doc = new HtmlDocument($this->pageHtml);
-
-        $tags = $this->locateAndIsolateIncludeTags($doc);
-        $topLevel = [...$doc->getBodyChildren()];
+        $nodesAdded = 0;
+        $tags = $this->locateAndIsolateIncludeTags();
 
         foreach ($tags as $tag) {
-            $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
-            $content = new PageIncludeContent($htmlContent, $tag);
+            /** @var PageIncludeContent $content */
+            $content = $this->pageContentForId->call($this, $tag);
 
             if (!$content->isInline()) {
-                $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true);
-                if ($isParentTopLevel) {
+                $parentP = $this->getParentParagraph($tag->domNode);
+                $isWithinParentP = $parentP === $tag->domNode->parentNode;
+                if ($parentP && $isWithinParentP) {
                     $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
-                } else {
-                    $this->promoteTagNodeToBody($tag, $doc->getBody());
+                } else if ($parentP) {
+                    $this->moveTagNodeToBesideParent($tag, $parentP);
                 }
             }
 
-            $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
+            $replacementNodes = $content->toDomNodes();
+            $nodesAdded += count($replacementNodes);
+            $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
         }
 
-        // TODO Notes: May want to eventually parse through backwards, which should avoid issues
-        //   in changes affecting the next tag, where tags may be in the same/adjacent nodes.
+        $this->cleanup();
 
-        return $doc->getBodyInnerHtml();
+        return $nodesAdded;
     }
 
     /**
@@ -53,14 +66,14 @@ class PageIncludeParser
      * own nodes in the DOM for future targeted manipulation.
      * @return PageIncludeTag[]
      */
-    protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
+    protected function locateAndIsolateIncludeTags(): array
     {
-        $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
+        $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
         $includeTags = [];
 
         /** @var DOMNode $node */
-        /** @var DOMNode $childNode */
         foreach ($includeHosts as $node) {
+            /** @var DOMNode $childNode */
             foreach ($node->childNodes as $childNode) {
                 if ($childNode->nodeName === '#text') {
                     array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
@@ -91,10 +104,10 @@ class PageIncludeParser
 
             if ($currentOffset < $tagStartOffset) {
                 $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
-                $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
+                $textNode->parentNode->insertBefore($this->doc->createTextNode($previousText), $textNode);
             }
 
-            $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
+            $node = $textNode->parentNode->insertBefore($this->doc->createTextNode($tagOuterContent), $textNode);
             $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
             $currentOffset = $tagStartOffset + strlen($tagOuterContent);
         }
@@ -107,6 +120,7 @@ class PageIncludeParser
     }
 
     /**
+     * Replace the given node with all those in $replacements
      * @param DOMNode[] $replacements
      */
     protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
@@ -116,7 +130,7 @@ class PageIncludeParser
 
         foreach ($replacements as $replacement) {
             if ($replacement->ownerDocument !== $targetDoc) {
-                $replacement = $targetDoc->adoptNode($replacement);
+                $replacement = $targetDoc->importNode($replacement, true);
             }
 
             $toReplace->parentNode->insertBefore($replacement, $toReplace);
@@ -125,51 +139,82 @@ class PageIncludeParser
         $toReplace->parentNode->removeChild($toReplace);
     }
 
-    protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void
+    /**
+     * Move a tag node to become a sibling of the given parent.
+     * Will attempt to guess a position based upon the tag content within the parent.
+     */
+    protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
     {
-        /** @var DOMNode $topParent */
-        $topParent = $tag->domNode->parentNode;
-        while ($topParent->parentNode !== $body) {
-            $topParent = $topParent->parentNode;
-        }
-
-        $parentText = $topParent->textContent;
+        $parentText = $parent->textContent;
         $tagPos = strpos($parentText, $tag->tagContent);
         $before = $tagPos < (strlen($parentText) / 2);
+        $this->toCleanup[] = $tag->domNode->parentNode;
 
         if ($before) {
-            $body->insertBefore($tag->domNode, $topParent);
+            $parent->parentNode->insertBefore($tag->domNode, $parent);
         } else {
-            $body->insertBefore($tag->domNode, $topParent->nextSibling);
+            $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
         }
     }
 
+    /**
+     * Splits the given $parentNode at the location of the $domNode within it.
+     * Attempts replicate the original $parentNode, moving some of their parent
+     * children in where needed, before adding the $domNode between.
+     */
     protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
     {
         $children = [...$parentNode->childNodes];
-        $splitPos = array_search($domNode, $children, true) ?: count($children);
+        $splitPos = array_search($domNode, $children, true);
+        if ($splitPos === false) {
+            $splitPos = count($children) - 1;
+        }
+
         $parentClone = $parentNode->cloneNode();
+        $parentNode->parentNode->insertBefore($parentClone, $parentNode);
         $parentClone->removeAttribute('id');
 
-        /** @var DOMNode $child */
         for ($i = 0; $i < $splitPos; $i++) {
-            $child = $children[0];
+            /** @var DOMNode $child */
+            $child = $children[$i];
             $parentClone->appendChild($child);
         }
 
-        if ($parentClone->hasChildNodes()) {
-            $parentNode->parentNode->insertBefore($parentClone, $parentNode);
-        }
-
         $parentNode->parentNode->insertBefore($domNode, $parentNode);
 
-        $parentClone->normalize();
-        $parentNode->normalize();
-        if (!$parentNode->hasChildNodes()) {
-            $parentNode->remove();
-        }
-        if (!$parentClone->hasChildNodes()) {
-            $parentClone->remove();
+        $this->toCleanup[] = $parentNode;
+        $this->toCleanup[] = $parentClone;
+    }
+
+    /**
+     * Get the parent paragraph of the given node, if existing.
+     */
+    protected function getParentParagraph(DOMNode $parent): ?DOMNode
+    {
+        do {
+            if (strtolower($parent->nodeName) === 'p') {
+                return $parent;
+            }
+
+            $parent = $parent->parentNode;
+        } while ($parent !== null);
+
+        return null;
+    }
+
+    /**
+     * Cleanup after a parse operation.
+     * Removes stranded elements we may have left during the parse.
+     */
+    protected function cleanup(): void
+    {
+        foreach ($this->toCleanup as $element) {
+            $element->normalize();
+            while ($element->parentNode && !$element->hasChildNodes()) {
+                $parent = $element->parentNode;
+                $parent->removeChild($element);
+                $element = $parent;
+            }
         }
     }
 }