X-Git-Url: http://source.bookstackapp.com/bookstack/blobdiff_plain/75936454cca139d0b226a95ee7a0070bc8702fdc..refs/pull/5685/head:/app/Entities/Tools/PageIncludeParser.php

diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php
index 070b0cc11..e0b89f158 100644
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -4,6 +4,8 @@ namespace BookStack\Entities\Tools;
 
 use BookStack\Util\HtmlDocument;
 use Closure;
+use DOMDocument;
+use DOMElement;
 use DOMNode;
 use DOMText;
 
@@ -11,61 +13,52 @@ class PageIncludeParser
 {
     protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
 
+    /**
+     * Elements to clean up and remove if left empty after a parsing operation.
+     * @var DOMElement[]
+     */
+    protected array $toCleanup = [];
+
+    /**
+     * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId
+     */
     public function __construct(
-        protected string $pageHtml,
+        protected HtmlDocument $doc,
         protected Closure $pageContentForId,
     ) {
     }
 
-    public function parse(): string
+    /**
+     * Parse out the include tags.
+     * Returns the count of new content DOM nodes added to the document.
+     */
+    public function parse(): int
     {
-        $doc = new HtmlDocument($this->pageHtml);
-
-        $tags = $this->locateAndIsolateIncludeTags($doc);
+        $nodesAdded = 0;
+        $tags = $this->locateAndIsolateIncludeTags();
 
         foreach ($tags as $tag) {
-            $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
-            $content = new PageIncludeContent($htmlContent, $tag);
-
-            if ($content->isInline()) {
-                $adopted = $doc->adoptNodes($content->toDomNodes());
-                foreach ($adopted as $adoptedContentNode) {
-                    $tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode);
+            /** @var PageIncludeContent $content */
+            $content = $this->pageContentForId->call($this, $tag);
+
+            if (!$content->isInline()) {
+                $parentP = $this->getParentParagraph($tag->domNode);
+                $isWithinParentP = $parentP === $tag->domNode->parentNode;
+                if ($parentP && $isWithinParentP) {
+                    $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
+                } else if ($parentP) {
+                    $this->moveTagNodeToBesideParent($tag, $parentP);
                 }
-                $tag->domNode->parentNode->removeChild($tag->domNode);
-                continue;
             }
 
-            // TODO - Non-inline
+            $replacementNodes = $content->toDomNodes();
+            $nodesAdded += count($replacementNodes);
+            $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
         }
 
-        // TODO:
-        // Hunt down the specific text nodes with matches
-        // Split out tag text node from rest of content
-        // Fetch tag content->
-          // If range or top-block: delete tag text node, [Promote to top-block], delete old top-block if empty
-          // If inline: Replace current text node with new text or elem
-        // !! "Range" or "inline" status should come from tag parser and content fetcher, not guessed direct from content
-        //     since we could have a range of inline elements
-
-        // [Promote to top-block]
-        // Tricky operation.
-        // Can throw in before or after current top-block depending on relative position
-        // Could [Split] top-block but complex past a single level depth.
-        // Maybe [Split] if one level depth, otherwise default to before/after block
-        // Should work for the vast majority of cases, and not for those which would
-        // technically be invalid in-editor anyway.
-
-        // [Split]
-        // Copy original top-block node type and attrs (apart from ID)
-        // Move nodes after promoted tag-node into copy
-        // Insert copy after original (after promoted top-block eventually)
-
-        // Notes: May want to eventually parse through backwards, which should avoid issues
-        // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
-
-
-        return $doc->getBodyInnerHtml();
+        $this->cleanup();
+
+        return $nodesAdded;
     }
 
     /**
@@ -73,14 +66,14 @@ class PageIncludeParser
      * own nodes in the DOM for future targeted manipulation.
      * @return PageIncludeTag[]
      */
-    protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
+    protected function locateAndIsolateIncludeTags(): array
     {
-        $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
+        $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
         $includeTags = [];
 
         /** @var DOMNode $node */
-        /** @var DOMNode $childNode */
         foreach ($includeHosts as $node) {
+            /** @var DOMNode $childNode */
             foreach ($node->childNodes as $childNode) {
                 if ($childNode->nodeName === '#text') {
                     array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
@@ -111,10 +104,10 @@ class PageIncludeParser
 
             if ($currentOffset < $tagStartOffset) {
                 $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
-                $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
+                $textNode->parentNode->insertBefore($this->doc->createTextNode($previousText), $textNode);
             }
 
-            $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
+            $node = $textNode->parentNode->insertBefore($this->doc->createTextNode($tagOuterContent), $textNode);
             $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
             $currentOffset = $tagStartOffset + strlen($tagOuterContent);
         }
@@ -125,4 +118,103 @@ class PageIncludeParser
 
         return $includeTags;
     }
+
+    /**
+     * Replace the given node with all those in $replacements
+     * @param DOMNode[] $replacements
+     */
+    protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
+    {
+        /** @var DOMDocument $targetDoc */
+        $targetDoc = $toReplace->ownerDocument;
+
+        foreach ($replacements as $replacement) {
+            if ($replacement->ownerDocument !== $targetDoc) {
+                $replacement = $targetDoc->importNode($replacement, true);
+            }
+
+            $toReplace->parentNode->insertBefore($replacement, $toReplace);
+        }
+
+        $toReplace->parentNode->removeChild($toReplace);
+    }
+
+    /**
+     * Move a tag node to become a sibling of the given parent.
+     * Will attempt to guess a position based upon the tag content within the parent.
+     */
+    protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
+    {
+        $parentText = $parent->textContent;
+        $tagPos = strpos($parentText, $tag->tagContent);
+        $before = $tagPos < (strlen($parentText) / 2);
+        $this->toCleanup[] = $tag->domNode->parentNode;
+
+        if ($before) {
+            $parent->parentNode->insertBefore($tag->domNode, $parent);
+        } else {
+            $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
+        }
+    }
+
+    /**
+     * Splits the given $parentNode at the location of the $domNode within it.
+     * Attempts replicate the original $parentNode, moving some of their parent
+     * children in where needed, before adding the $domNode between.
+     */
+    protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
+    {
+        $children = [...$parentNode->childNodes];
+        $splitPos = array_search($domNode, $children, true);
+        if ($splitPos === false) {
+            $splitPos = count($children) - 1;
+        }
+
+        $parentClone = $parentNode->cloneNode();
+        $parentNode->parentNode->insertBefore($parentClone, $parentNode);
+        $parentClone->removeAttribute('id');
+
+        for ($i = 0; $i < $splitPos; $i++) {
+            /** @var DOMNode $child */
+            $child = $children[$i];
+            $parentClone->appendChild($child);
+        }
+
+        $parentNode->parentNode->insertBefore($domNode, $parentNode);
+
+        $this->toCleanup[] = $parentNode;
+        $this->toCleanup[] = $parentClone;
+    }
+
+    /**
+     * Get the parent paragraph of the given node, if existing.
+     */
+    protected function getParentParagraph(DOMNode $parent): ?DOMNode
+    {
+        do {
+            if (strtolower($parent->nodeName) === 'p') {
+                return $parent;
+            }
+
+            $parent = $parent->parentNode;
+        } while ($parent !== null);
+
+        return null;
+    }
+
+    /**
+     * Cleanup after a parse operation.
+     * Removes stranded elements we may have left during the parse.
+     */
+    protected function cleanup(): void
+    {
+        foreach ($this->toCleanup as $element) {
+            $element->normalize();
+            while ($element->parentNode && !$element->hasChildNodes()) {
+                $parent = $element->parentNode;
+                $parent->removeChild($element);
+                $element = $parent;
+            }
+        }
+    }
 }