BookStack Code Mirror - bookstack/blob - app/Entities/Tools/PageIncludeParser.php

   1 <?php
   2
   3 namespace BookStack\Entities\Tools;
   4
   5 use BookStack\Util\HtmlDocument;
   6 use Closure;
   7 use DOMDocument;
   8 use DOMElement;
   9 use DOMNode;
  10 use DOMText;
  11
  12 class PageIncludeParser
  13 {
  14     protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
  15
  16     /**
  17      * Elements to clean up and remove if left empty after a parsing operation.
  18      * @var DOMElement[]
  19      */
  20     protected array $toCleanup = [];
  21
  22     public function __construct(
  23         protected HtmlDocument $doc,
  24         protected Closure $pageContentForId,
  25     ) {
  26     }
  27
  28     /**
  29      * Parse out the include tags.
  30      * Returns the count of new content DOM nodes added to the document.
  31      */
  32     public function parse(): int
  33     {
  34         $nodesAdded = 0;
  35         $tags = $this->locateAndIsolateIncludeTags();
  36
  37         foreach ($tags as $tag) {
  38             $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
  39             $content = new PageIncludeContent($htmlContent, $tag);
  40
  41             if (!$content->isInline()) {
  42                 $parentP = $this->getParentParagraph($tag->domNode);
  43                 $isWithinParentP = $parentP === $tag->domNode->parentNode;
  44                 if ($parentP && $isWithinParentP) {
  45                     $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
  46                 } else if ($parentP) {
  47                     $this->moveTagNodeToBesideParent($tag, $parentP);
  48                 }
  49             }
  50
  51             $replacementNodes = $content->toDomNodes();
  52             $nodesAdded += count($replacementNodes);
  53             $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
  54         }
  55
  56         $this->cleanup();
  57
  58         return $nodesAdded;
  59     }
  60
  61     /**
  62      * Locate include tags within the given document, isolating them to their
  63      * own nodes in the DOM for future targeted manipulation.
  64      * @return PageIncludeTag[]
  65      */
  66     protected function locateAndIsolateIncludeTags(): array
  67     {
  68         $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
  69         $includeTags = [];
  70
  71         /** @var DOMNode $node */
  72         /** @var DOMNode $childNode */
  73         foreach ($includeHosts as $node) {
  74             foreach ($node->childNodes as $childNode) {
  75                 if ($childNode->nodeName === '#text') {
  76                     array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
  77                 }
  78             }
  79         }
  80
  81         return $includeTags;
  82     }
  83
  84     /**
  85      * Takes a text DOMNode and splits its text content at include tags
  86      * into multiple text nodes within the original parent.
  87      * Returns found PageIncludeTag references.
  88      * @return PageIncludeTag[]
  89      */
  90     protected function splitTextNodesAtTags(DOMNode $textNode): array
  91     {
  92         $includeTags = [];
  93         $text = $textNode->textContent;
  94         preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
  95
  96         $currentOffset = 0;
  97         foreach ($matches[0] as $index => $fullTagMatch) {
  98             $tagOuterContent = $fullTagMatch[0];
  99             $tagInnerContent = $matches[1][$index][0];
 100             $tagStartOffset = $fullTagMatch[1];
 101
 102             if ($currentOffset < $tagStartOffset) {
 103                 $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
 104                 $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
 105             }
 106
 107             $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
 108             $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
 109             $currentOffset = $tagStartOffset + strlen($tagOuterContent);
 110         }
 111
 112         if ($currentOffset > 0) {
 113             $textNode->textContent = substr($text, $currentOffset);
 114         }
 115
 116         return $includeTags;
 117     }
 118
 119     /**
 120      * Replace the given node with all those in $replacements
 121      * @param DOMNode[] $replacements
 122      */
 123     protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
 124     {
 125         /** @var DOMDocument $targetDoc */
 126         $targetDoc = $toReplace->ownerDocument;
 127
 128         foreach ($replacements as $replacement) {
 129             if ($replacement->ownerDocument !== $targetDoc) {
 130                 $replacement = $targetDoc->importNode($replacement, true);
 131             }
 132
 133             $toReplace->parentNode->insertBefore($replacement, $toReplace);
 134         }
 135
 136         $toReplace->parentNode->removeChild($toReplace);
 137     }
 138
 139     /**
 140      * Move a tag node to become a sibling of the given parent.
 141      * Will attempt to guess a position based upon the tag content within the parent.
 142      */
 143     protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
 144     {
 145         $parentText = $parent->textContent;
 146         $tagPos = strpos($parentText, $tag->tagContent);
 147         $before = $tagPos < (strlen($parentText) / 2);
 148         $this->toCleanup[] = $tag->domNode->parentNode;
 149
 150         if ($before) {
 151             $parent->parentNode->insertBefore($tag->domNode, $parent);
 152         } else {
 153             $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
 154         }
 155     }
 156
 157     /**
 158      * Splits the given $parentNode at the location of the $domNode within it.
 159      * Attempts replicate the original $parentNode, moving some of their parent
 160      * children in where needed, before adding the $domNode between.
 161      */
 162     protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
 163     {
 164         $children = [...$parentNode->childNodes];
 165         $splitPos = array_search($domNode, $children, true);
 166         if ($splitPos === false) {
 167             $splitPos = count($children) - 1;
 168         }
 169
 170         $parentClone = $parentNode->cloneNode();
 171         $parentNode->parentNode->insertBefore($parentClone, $parentNode);
 172         $parentClone->removeAttribute('id');
 173
 174         /** @var DOMNode $child */
 175         for ($i = 0; $i < $splitPos; $i++) {
 176             $child = $children[$i];
 177             $parentClone->appendChild($child);
 178         }
 179
 180         $parentNode->parentNode->insertBefore($domNode, $parentNode);
 181
 182         $this->toCleanup[] = $parentNode;
 183         $this->toCleanup[] = $parentClone;
 184     }
 185
 186     /**
 187      * Get the parent paragraph of the given node, if existing.
 188      */
 189     protected function getParentParagraph(DOMNode $parent): ?DOMNode
 190     {
 191         do {
 192             if (strtolower($parent->nodeName) === 'p') {
 193                 return $parent;
 194             }
 195
 196             $parent = $parent->parentNode;
 197         } while ($parent !== null);
 198
 199         return null;
 200     }
 201
 202     /**
 203      * Cleanup after a parse operation.
 204      * Removes stranded elements we may have left during the parse.
 205      */
 206     protected function cleanup(): void
 207     {
 208         foreach ($this->toCleanup as $element) {
 209             $element->normalize();
 210             while ($element->parentNode && !$element->hasChildNodes()) {
 211                 $parent = $element->parentNode;
 212                 $parent->removeChild($element);
 213                 $element = $parent;
 214             }
 215         }
 216     }
 217 }