]> BookStack Code Mirror - bookstack/blob - app/Entities/Tools/PageIncludeParser.php
02af3fce99aef19a751497620fef76e78522469e
[bookstack] / app / Entities / Tools / PageIncludeParser.php
1 <?php
2
3 namespace BookStack\Entities\Tools;
4
5 use BookStack\Util\HtmlDocument;
6 use Closure;
7 use DOMDocument;
8 use DOMElement;
9 use DOMNode;
10 use DOMText;
11
12 class PageIncludeParser
13 {
14     protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
15
16     /**
17      * Elements to clean up and remove if left empty after a parsing operation.
18      * @var DOMElement[]
19      */
20     protected array $toCleanup = [];
21
22     public function __construct(
23         protected HtmlDocument $doc,
24         protected Closure $pageContentForId,
25     ) {
26     }
27
28     /**
29      * Parse out the include tags.
30      * Returns the count of new content DOM nodes added to the document.
31      */
32     public function parse(): int
33     {
34         $nodesAdded = 0;
35         $tags = $this->locateAndIsolateIncludeTags();
36
37         foreach ($tags as $tag) {
38             $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
39             $content = new PageIncludeContent($htmlContent, $tag);
40
41             if (!$content->isInline()) {
42                 $parentP = $this->getParentParagraph($tag->domNode);
43                 $isWithinParentP = $parentP === $tag->domNode->parentNode;
44                 if ($parentP && $isWithinParentP) {
45                     $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
46                 } else if ($parentP) {
47                     $this->moveTagNodeToBesideParent($tag, $parentP);
48                 }
49             }
50
51             $replacementNodes = $content->toDomNodes();
52             $nodesAdded += count($replacementNodes);
53             $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
54         }
55
56         $this->cleanup();
57
58         return $nodesAdded;
59     }
60
61     /**
62      * Locate include tags within the given document, isolating them to their
63      * own nodes in the DOM for future targeted manipulation.
64      * @return PageIncludeTag[]
65      */
66     protected function locateAndIsolateIncludeTags(): array
67     {
68         $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
69         $includeTags = [];
70
71         /** @var DOMNode $node */
72         /** @var DOMNode $childNode */
73         foreach ($includeHosts as $node) {
74             foreach ($node->childNodes as $childNode) {
75                 if ($childNode->nodeName === '#text') {
76                     array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
77                 }
78             }
79         }
80
81         return $includeTags;
82     }
83
84     /**
85      * Takes a text DOMNode and splits its text content at include tags
86      * into multiple text nodes within the original parent.
87      * Returns found PageIncludeTag references.
88      * @return PageIncludeTag[]
89      */
90     protected function splitTextNodesAtTags(DOMNode $textNode): array
91     {
92         $includeTags = [];
93         $text = $textNode->textContent;
94         preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
95
96         $currentOffset = 0;
97         foreach ($matches[0] as $index => $fullTagMatch) {
98             $tagOuterContent = $fullTagMatch[0];
99             $tagInnerContent = $matches[1][$index][0];
100             $tagStartOffset = $fullTagMatch[1];
101
102             if ($currentOffset < $tagStartOffset) {
103                 $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
104                 $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
105             }
106
107             $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
108             $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
109             $currentOffset = $tagStartOffset + strlen($tagOuterContent);
110         }
111
112         if ($currentOffset > 0) {
113             $textNode->textContent = substr($text, $currentOffset);
114         }
115
116         return $includeTags;
117     }
118
119     /**
120      * Replace the given node with all those in $replacements
121      * @param DOMNode[] $replacements
122      */
123     protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
124     {
125         /** @var DOMDocument $targetDoc */
126         $targetDoc = $toReplace->ownerDocument;
127
128         foreach ($replacements as $replacement) {
129             if ($replacement->ownerDocument !== $targetDoc) {
130                 $replacement = $targetDoc->importNode($replacement, true);
131             }
132
133             $toReplace->parentNode->insertBefore($replacement, $toReplace);
134         }
135
136         $toReplace->parentNode->removeChild($toReplace);
137     }
138
139     /**
140      * Move a tag node to become a sibling of the given parent.
141      * Will attempt to guess a position based upon the tag content within the parent.
142      */
143     protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
144     {
145         $parentText = $parent->textContent;
146         $tagPos = strpos($parentText, $tag->tagContent);
147         $before = $tagPos < (strlen($parentText) / 2);
148         $this->toCleanup[] = $tag->domNode->parentNode;
149
150         if ($before) {
151             $parent->parentNode->insertBefore($tag->domNode, $parent);
152         } else {
153             $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
154         }
155     }
156
157     /**
158      * Splits the given $parentNode at the location of the $domNode within it.
159      * Attempts replicate the original $parentNode, moving some of their parent
160      * children in where needed, before adding the $domNode between.
161      */
162     protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
163     {
164         $children = [...$parentNode->childNodes];
165         $splitPos = array_search($domNode, $children, true);
166         if ($splitPos === false) {
167             $splitPos = count($children) - 1;
168         }
169
170         $parentClone = $parentNode->cloneNode();
171         $parentNode->parentNode->insertBefore($parentClone, $parentNode);
172         $parentClone->removeAttribute('id');
173
174         /** @var DOMNode $child */
175         for ($i = 0; $i < $splitPos; $i++) {
176             $child = $children[$i];
177             $parentClone->appendChild($child);
178         }
179
180         $parentNode->parentNode->insertBefore($domNode, $parentNode);
181
182         $this->toCleanup[] = $parentNode;
183         $this->toCleanup[] = $parentClone;
184     }
185
186     /**
187      * Get the parent paragraph of the given node, if existing.
188      */
189     protected function getParentParagraph(DOMNode $parent): ?DOMNode
190     {
191         do {
192             if (strtolower($parent->nodeName) === 'p') {
193                 return $parent;
194             }
195
196             $parent = $parent->parentNode;
197         } while ($parent !== null);
198
199         return null;
200     }
201
202     /**
203      * Cleanup after a parse operation.
204      * Removes stranded elements we may have left during the parse.
205      */
206     protected function cleanup(): void
207     {
208         foreach ($this->toCleanup as $element) {
209             $element->normalize();
210             while ($element->parentNode && !$element->hasChildNodes()) {
211                 $parent = $element->parentNode;
212                 $parent->removeChild($element);
213                 $element = $parent;
214             }
215         }
216     }
217 }