]> BookStack Code Mirror - bookstack/commitdiff
Includes: Updated logic regarding parent block els, added tests
authorDan Brown <redacted>
Sat, 25 Nov 2023 17:32:00 +0000 (17:32 +0000)
committerDan Brown <redacted>
Sat, 25 Nov 2023 17:32:00 +0000 (17:32 +0000)
Expanded tests with many more cases, and added fixes for failed
scenarios.
Updated logic to specifically handling parent <p> tags, and now assume
compatibility with parent block types elswhere to allow use in a
variety of scenarios (td, details, blockquote etc...).

app/Entities/Tools/PageIncludeParser.php
tests/Unit/PageIncludeParserTest.php

index 5ce847d6c1fea7a87a1ec18589f651dcda61023e..e55fc22c7fc2734aaacdb431b056b2cfddc0810d 100644 (file)
@@ -13,37 +13,45 @@ class PageIncludeParser
 {
     protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
 
+    /**
+     * Elements to clean up and remove if left empty after a parsing operation.
+     * @var DOMElement[]
+     */
+    protected array $toCleanup = [];
+
     public function __construct(
         protected string $pageHtml,
         protected Closure $pageContentForId,
     ) {
     }
 
+    /**
+     * Parse out the include tags.
+     */
     public function parse(): string
     {
         $doc = new HtmlDocument($this->pageHtml);
 
         $tags = $this->locateAndIsolateIncludeTags($doc);
-        $topLevel = [...$doc->getBodyChildren()];
 
         foreach ($tags as $tag) {
             $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
             $content = new PageIncludeContent($htmlContent, $tag);
 
             if (!$content->isInline()) {
-                $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true);
-                if ($isParentTopLevel) {
+                $parentP = $this->getParentParagraph($tag->domNode);
+                $isWithinParentP = $parentP === $tag->domNode->parentNode;
+                if ($parentP && $isWithinParentP) {
                     $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
-                } else {
-                    $this->promoteTagNodeToBody($tag, $doc->getBody());
+                } else if ($parentP) {
+                    $this->moveTagNodeToBesideParent($tag, $parentP);
                 }
             }
 
             $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
         }
 
-        // TODO Notes: May want to eventually parse through backwards, which should avoid issues
-        //   in changes affecting the next tag, where tags may be in the same/adjacent nodes.
+        $this->cleanup();
 
         return $doc->getBodyInnerHtml();
     }
@@ -55,7 +63,7 @@ class PageIncludeParser
      */
     protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
     {
-        $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
+        $includeHosts = $doc->queryXPath("//body//*[text()[contains(., '{{@')]]");
         $includeTags = [];
 
         /** @var DOMNode $node */
@@ -107,6 +115,7 @@ class PageIncludeParser
     }
 
     /**
+     * Replace the given node with all those in $replacements
      * @param DOMNode[] $replacements
      */
     protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
@@ -125,51 +134,79 @@ class PageIncludeParser
         $toReplace->parentNode->removeChild($toReplace);
     }
 
-    protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void
+    /**
+     * Move a tag node to become a sibling of the given parent.
+     * Will attempt to guess a position based upon the tag content within the parent.
+     */
+    protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
     {
-        /** @var DOMNode $topParent */
-        $topParent = $tag->domNode->parentNode;
-        while ($topParent->parentNode !== $body) {
-            $topParent = $topParent->parentNode;
-        }
-
-        $parentText = $topParent->textContent;
+        $parentText = $parent->textContent;
         $tagPos = strpos($parentText, $tag->tagContent);
         $before = $tagPos < (strlen($parentText) / 2);
 
         if ($before) {
-            $body->insertBefore($tag->domNode, $topParent);
+            $parent->parentNode->insertBefore($tag->domNode, $parent);
         } else {
-            $body->insertBefore($tag->domNode, $topParent->nextSibling);
+            $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
         }
     }
 
+    /**
+     * Splits the given $parentNode at the location of the $domNode within it.
+     * Attempts replicate the original $parentNode, moving some of their parent
+     * children in where needed, before adding the $domNode between.
+     */
     protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
     {
         $children = [...$parentNode->childNodes];
-        $splitPos = array_search($domNode, $children, true) ?: count($children);
+        $splitPos = array_search($domNode, $children, true);
+        if ($splitPos === false) {
+            $splitPos = count($children) - 1;
+        }
+
         $parentClone = $parentNode->cloneNode();
+        $parentNode->parentNode->insertBefore($parentClone, $parentNode);
         $parentClone->removeAttribute('id');
 
         /** @var DOMNode $child */
         for ($i = 0; $i < $splitPos; $i++) {
-            $child = $children[0];
+            $child = $children[$i];
             $parentClone->appendChild($child);
         }
 
-        if ($parentClone->hasChildNodes()) {
-            $parentNode->parentNode->insertBefore($parentClone, $parentNode);
-        }
-
         $parentNode->parentNode->insertBefore($domNode, $parentNode);
 
-        $parentClone->normalize();
-        $parentNode->normalize();
-        if (!$parentNode->hasChildNodes()) {
-            $parentNode->remove();
-        }
-        if (!$parentClone->hasChildNodes()) {
-            $parentClone->remove();
+        $this->toCleanup[] = $parentNode;
+        $this->toCleanup[] = $parentClone;
+    }
+
+    /**
+     * Get the parent paragraph of the given node, if existing.
+     */
+    protected function getParentParagraph(DOMNode $parent): ?DOMNode
+    {
+        do {
+            if (strtolower($parent->nodeName) === 'p') {
+                return $parent;
+            }
+
+            $parent = $parent->parentElement;
+        } while ($parent !== null);
+
+        return null;
+    }
+
+    /**
+     * Cleanup after a parse operation.
+     * Removes stranded elements we may have left during the parse.
+     */
+    protected function cleanup(): void
+    {
+        foreach ($this->toCleanup as $element) {
+            $element->normalize();
+            if ($element->parentNode && !$element->hasChildNodes()) {
+                $element->parentNode->removeChild($element);
+            }
         }
     }
 }
index e0bd69e934b7823a8c22e091c26850d57f65a23e..ee7a64344de7b468643079772224aec7bf07b4bf 100644 (file)
@@ -34,6 +34,15 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
+    public function test_complex_inline_text_within_other_text()
+    {
+        $this->runParserTest(
+            '<p>Hello {{@45#content}}there!</p>',
+            ['45' => '<p id="content"><strong>Testing</strong> with<em>some</em><i>extra</i>tags</p>'],
+            '<p>Hello <strong>Testing</strong> with<em>some</em><i>extra</i>tagsthere!</p>',
+        );
+    }
+
     public function test_block_content_types()
     {
         $inputs = [
@@ -97,6 +106,51 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
+    public function test_block_content_in_allowable_parent_element()
+    {
+        $this->runParserTest(
+            '<div>{{@45#content}}</div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><pre id="content">doggos</pre></div>',
+        );
+    }
+
+    public function test_block_content_in_paragraph_origin_with_allowable_grandparent()
+    {
+        $this->runParserTest(
+            '<div><p>{{@45#content}}</p></div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><pre id="content">doggos</pre></div>',
+        );
+    }
+
+    public function test_block_content_in_paragraph_origin_with_allowable_grandparent_with_adjacent_content()
+    {
+        $this->runParserTest(
+            '<div><p>Cute {{@45#content}} over there!</p></div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><p>Cute </p><pre id="content">doggos</pre><p> over there!</p></div>',
+        );
+    }
+
+    public function test_block_content_in_child_within_paragraph_origin_with_allowable_grandparent_with_adjacent_content()
+    {
+        $this->runParserTest(
+            '<div><p><strong>Cute {{@45#content}} over there!</strong></p></div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><pre id="content">doggos</pre><p><strong>Cute  over there!</strong></p></div>',
+        );
+    }
+
+    public function test_block_content_in_paragraph_origin_within_details()
+    {
+        $this->runParserTest(
+            '<details><p>{{@45#content}}</p></details>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<details><pre id="content">doggos</pre></details>',
+        );
+    }
+
     public function test_simple_whole_document()
     {
         $this->runParserTest(
@@ -124,6 +178,42 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
+    public function test_multiple_tags_in_same_origin_with_inline_content()
+    {
+        $this->runParserTest(
+            '<p>This {{@45#content}}{{@45#content}} content is {{@45#content}}</p>',
+            ['45' => '<p id="content">inline</p>'],
+            '<p>This inlineinline content is inline</p>',
+        );
+    }
+
+    public function test_multiple_tags_in_same_origin_with_block_content()
+    {
+        $this->runParserTest(
+            '<p>This {{@45#content}}{{@45#content}} content is {{@45#content}}</p>',
+            ['45' => '<pre id="content">block</pre>'],
+            '<p>This </p><pre id="content">block</pre><pre id="content">block</pre><p> content is </p><pre id="content">block</pre>',
+        );
+    }
+
+    public function test_multiple_tags_in_differing_origin_levels_with_block_content()
+    {
+        $this->runParserTest(
+            '<div><p>This <strong>{{@45#content}}</strong> content is {{@45#content}}</p>{{@45#content}}</div>',
+            ['45' => '<pre id="content">block</pre>'],
+            '<div><pre id="content">block</pre><p>This <strong></strong> content is </p><pre id="content">block</pre><pre id="content">block</pre></div>',
+        );
+    }
+
+    public function test_multiple_tags_in_shallow_origin_with_multi_block_content()
+    {
+        $this->runParserTest(
+            '<p>{{@45}}C{{@45}}</p><div>{{@45}}{{@45}}</div>',
+            ['45' => '<p>A</p><p>B</p>'],
+            '<p>A</p><p>B</p><p>C</p><p>A</p><p>B</p><div><p>A</p><p>B</p><p>A</p><p>B</p></div>',
+        );
+    }
+
     protected function runParserTest(string $html, array $contentById, string $expected)
     {
         $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {