Includes: Developed to get new system working with inline includes

author Dan Brown <redacted>

Thu, 23 Nov 2023 14:29:07 +0000 (14:29 +0000)

committer Dan Brown <redacted>

Thu, 23 Nov 2023 14:29:07 +0000 (14:29 +0000)
author Dan Brown <redacted>
Thu, 23 Nov 2023 14:29:07 +0000 (14:29 +0000)
committer Dan Brown <redacted>
Thu, 23 Nov 2023 14:29:07 +0000 (14:29 +0000)
diff --git a/app/Entities/Tools/PageIncludeContent.php b/app/Entities/Tools/PageIncludeContent.php

new file mode 100644 (file)

index 0000000..97c470c
--- /dev/null
+++ b/app/Entities/Tools/PageIncludeContent.php
@@ -0,0 +1,68 @@
+<?php
+
+namespace BookStack\Entities\Tools;
+
+use BookStack\Util\HtmlDocument;
+use DOMNode;
+
+class PageIncludeContent
+{
+    protected static array $topLevelTags = ['table', 'ul', 'ol', 'pre'];
+
+    /**
+     * @var DOMNode[]
+     */
+    protected array $contents = [];
+
+    protected bool $isTopLevel;
+
+    public function __construct(
+        string $html,
+        PageIncludeTag $tag,
+    ) {
+        $this->parseHtml($html, $tag);
+    }
+
+    protected function parseHtml(string $html, PageIncludeTag $tag): void
+    {
+        if (empty($html)) {
+            return;
+        }
+
+        $doc = new HtmlDocument($html);
+
+        $sectionId = $tag->getSectionId();
+        if (!$sectionId) {
+            $this->contents = [...$doc->getBodyChildren()];
+            $this->isTopLevel = true;
+            return;
+        }
+
+        $section = $doc->getElementById($sectionId);
+        if (!$section) {
+            return;
+        }
+
+        $isTopLevel = in_array(strtolower($section->nodeName), static::$topLevelTags);
+        $this->isTopLevel = $isTopLevel;
+        $this->contents = $isTopLevel ? [$section] : [...$section->childNodes];
+    }
+
+    public function isInline(): bool
+    {
+        return !$this->isTopLevel;
+    }
+
+    public function isEmpty(): bool
+    {
+        return empty($this->contents);
+    }
+
+    /**
+     * @return DOMNode[]
+     */
+    public function toDomNodes(): array
+    {
+        return $this->contents;
+    }
+}
diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php

index 63d3ea8d6a8df54fef41d5381dde53a25a744ab9..070b0cc11b7e63c10f5239286a1efb354aca0eae 100644 (file)
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -4,6 +4,8 @@ namespace BookStack\Entities\Tools;
  
  use BookStack\Util\HtmlDocument;
  use Closure;
  
  use BookStack\Util\HtmlDocument;
  use Closure;
+use DOMNode;
+use DOMText;
  
  class PageIncludeParser
  {
  
  class PageIncludeParser
  {
@@ -17,14 +19,25 @@ class PageIncludeParser
  
      public function parse(): string
      {
  
      public function parse(): string
      {
-        $html = new HtmlDocument($this->pageHtml);
+        $doc = new HtmlDocument($this->pageHtml);
  
  
-        $includeHosts = $html->queryXPath("//body//*[contains(text(), '{{@')]");
-        $node = $includeHosts->item(0);
+        $tags = $this->locateAndIsolateIncludeTags($doc);
  
  
-        // One of the direct child textnodes of the "$includeHosts" should be
-        // the one with the include tag within.
-        $textNode = $node->childNodes->item(0);
+        foreach ($tags as $tag) {
+            $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
+            $content = new PageIncludeContent($htmlContent, $tag);
+
+            if ($content->isInline()) {
+                $adopted = $doc->adoptNodes($content->toDomNodes());
+                foreach ($adopted as $adoptedContentNode) {
+                    $tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode);
+                }
+                $tag->domNode->parentNode->removeChild($tag->domNode);
+                continue;
+            }
+
+            // TODO - Non-inline
+        }
  
          // TODO:
          // Hunt down the specific text nodes with matches
  
          // TODO:
          // Hunt down the specific text nodes with matches
@@ -52,6 +65,64 @@ class PageIncludeParser
          // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
  
  
          // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
  
  
-        return $html->getBodyInnerHtml();
+        return $doc->getBodyInnerHtml();
+    }
+
+    /**
+     * Locate include tags within the given document, isolating them to their
+     * own nodes in the DOM for future targeted manipulation.
+     * @return PageIncludeTag[]
+     */
+    protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
+    {
+        $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
+        $includeTags = [];
+
+        /** @var DOMNode $node */
+        /** @var DOMNode $childNode */
+        foreach ($includeHosts as $node) {
+            foreach ($node->childNodes as $childNode) {
+                if ($childNode->nodeName === '#text') {
+                    array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
+                }
+            }
+        }
+
+        return $includeTags;
+    }
+
+    /**
+     * Takes a text DOMNode and splits its text content at include tags
+     * into multiple text nodes within the original parent.
+     * Returns found PageIncludeTag references.
+     * @return PageIncludeTag[]
+     */
+    protected function splitTextNodesAtTags(DOMNode $textNode): array
+    {
+        $includeTags = [];
+        $text = $textNode->textContent;
+        preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
+
+        $currentOffset = 0;
+        foreach ($matches[0] as $index => $fullTagMatch) {
+            $tagOuterContent = $fullTagMatch[0];
+            $tagInnerContent = $matches[1][$index][0];
+            $tagStartOffset = $fullTagMatch[1];
+
+            if ($currentOffset < $tagStartOffset) {
+                $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
+                $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
+            }
+
+            $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
+            $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
+            $currentOffset = $tagStartOffset + strlen($tagOuterContent);
+        }
+
+        if ($currentOffset > 0) {
+            $textNode->textContent = substr($text, $currentOffset);
+        }
+
+        return $includeTags;
      }
  }
      }
  }
diff --git a/app/Entities/Tools/PageIncludeTag.php b/app/Entities/Tools/PageIncludeTag.php

new file mode 100644 (file)

index 0000000..05a532f
--- /dev/null
+++ b/app/Entities/Tools/PageIncludeTag.php
@@ -0,0 +1,30 @@
+<?php
+
+namespace BookStack\Entities\Tools;
+
+use DOMNode;
+
+class PageIncludeTag
+{
+    public function __construct(
+        public string $tagContent,
+        public DOMNode $domNode,
+    ) {
+    }
+
+    /**
+     * Get the page ID that this tag references.
+     */
+    public function getPageId(): int
+    {
+        return intval(trim(explode('#', $this->tagContent, 2)[0]));
+    }
+
+    /**
+     * Get the section ID that this tag references (if any)
+     */
+    public function getSectionId(): string
+    {
+        return trim(explode('#', $this->tagContent, 2)[1] ?? '');
+    }
+}
diff --git a/app/Util/HtmlDocument.php b/app/Util/HtmlDocument.php

index b8c53d43916294dccf63ed3821222f2888edf22d..ad5dacd82f3c23744bbf9992396fd859c822473f 100644 (file)
--- a/app/Util/HtmlDocument.php
+++ b/app/Util/HtmlDocument.php
@@ -149,4 +149,19 @@ class HtmlDocument
      {
          return $this->document->saveHTML($node);
      }
      {
          return $this->document->saveHTML($node);
      }
+
+    /**
+     * Adopt the given nodes into this document.
+     * @param DOMNode[] $nodes
+     * @return DOMNode[]
+     */
+    public function adoptNodes(array $nodes): array
+    {
+        $adopted = [];
+        foreach ($nodes as $node) {
+            $adopted[] = $this->document->importNode($node, true);
+        }
+
+        return $adopted;
+    }
  }
  }
diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php

index de31504ff5976a1fde32dd080de447798d761882..d1912270eb4fc24865e516d2900d03eb4bebefdb 100644 (file)
--- a/tests/Unit/PageIncludeParserTest.php
+++ b/tests/Unit/PageIncludeParserTest.php
@@ -37,7 +37,7 @@ class PageIncludeParserTest extends TestCase
      protected function runParserTest(string $html, array $contentById, string $expected)
      {
          $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {
      protected function runParserTest(string $html, array $contentById, string $expected)
      {
          $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {
-            return $contentById[strval($id)] ?? null;
+            return $contentById[strval($id)] ?? '';
          });
  
          $result = $parser->parse();
          });
  
          $result = $parser->parse();
author	Dan Brown <redacted>
	Thu, 23 Nov 2023 14:29:07 +0000 (14:29 +0000)
committer	Dan Brown <redacted>
	Thu, 23 Nov 2023 14:29:07 +0000 (14:29 +0000)
app/Entities/Tools/PageIncludeContent.php	[new file with mode: 0644]	patch \| blob
app/Entities/Tools/PageIncludeParser.php		patch \| blob \| history
app/Entities/Tools/PageIncludeTag.php	[new file with mode: 0644]	patch \| blob
app/Util/HtmlDocument.php		patch \| blob \| history
tests/Unit/PageIncludeParserTest.php		patch \| blob \| history