]> BookStack Code Mirror - bookstack/blob - app/Entities/Tools/PageIncludeParser.php
5ce847d6c1fea7a87a1ec18589f651dcda61023e
[bookstack] / app / Entities / Tools / PageIncludeParser.php
1 <?php
2
3 namespace BookStack\Entities\Tools;
4
5 use BookStack\Util\HtmlDocument;
6 use Closure;
7 use DOMDocument;
8 use DOMElement;
9 use DOMNode;
10 use DOMText;
11
12 class PageIncludeParser
13 {
14     protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
15
16     public function __construct(
17         protected string $pageHtml,
18         protected Closure $pageContentForId,
19     ) {
20     }
21
22     public function parse(): string
23     {
24         $doc = new HtmlDocument($this->pageHtml);
25
26         $tags = $this->locateAndIsolateIncludeTags($doc);
27         $topLevel = [...$doc->getBodyChildren()];
28
29         foreach ($tags as $tag) {
30             $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
31             $content = new PageIncludeContent($htmlContent, $tag);
32
33             if (!$content->isInline()) {
34                 $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true);
35                 if ($isParentTopLevel) {
36                     $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
37                 } else {
38                     $this->promoteTagNodeToBody($tag, $doc->getBody());
39                 }
40             }
41
42             $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
43         }
44
45         // TODO Notes: May want to eventually parse through backwards, which should avoid issues
46         //   in changes affecting the next tag, where tags may be in the same/adjacent nodes.
47
48         return $doc->getBodyInnerHtml();
49     }
50
51     /**
52      * Locate include tags within the given document, isolating them to their
53      * own nodes in the DOM for future targeted manipulation.
54      * @return PageIncludeTag[]
55      */
56     protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
57     {
58         $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
59         $includeTags = [];
60
61         /** @var DOMNode $node */
62         /** @var DOMNode $childNode */
63         foreach ($includeHosts as $node) {
64             foreach ($node->childNodes as $childNode) {
65                 if ($childNode->nodeName === '#text') {
66                     array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
67                 }
68             }
69         }
70
71         return $includeTags;
72     }
73
74     /**
75      * Takes a text DOMNode and splits its text content at include tags
76      * into multiple text nodes within the original parent.
77      * Returns found PageIncludeTag references.
78      * @return PageIncludeTag[]
79      */
80     protected function splitTextNodesAtTags(DOMNode $textNode): array
81     {
82         $includeTags = [];
83         $text = $textNode->textContent;
84         preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
85
86         $currentOffset = 0;
87         foreach ($matches[0] as $index => $fullTagMatch) {
88             $tagOuterContent = $fullTagMatch[0];
89             $tagInnerContent = $matches[1][$index][0];
90             $tagStartOffset = $fullTagMatch[1];
91
92             if ($currentOffset < $tagStartOffset) {
93                 $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
94                 $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
95             }
96
97             $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
98             $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
99             $currentOffset = $tagStartOffset + strlen($tagOuterContent);
100         }
101
102         if ($currentOffset > 0) {
103             $textNode->textContent = substr($text, $currentOffset);
104         }
105
106         return $includeTags;
107     }
108
109     /**
110      * @param DOMNode[] $replacements
111      */
112     protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
113     {
114         /** @var DOMDocument $targetDoc */
115         $targetDoc = $toReplace->ownerDocument;
116
117         foreach ($replacements as $replacement) {
118             if ($replacement->ownerDocument !== $targetDoc) {
119                 $replacement = $targetDoc->adoptNode($replacement);
120             }
121
122             $toReplace->parentNode->insertBefore($replacement, $toReplace);
123         }
124
125         $toReplace->parentNode->removeChild($toReplace);
126     }
127
128     protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void
129     {
130         /** @var DOMNode $topParent */
131         $topParent = $tag->domNode->parentNode;
132         while ($topParent->parentNode !== $body) {
133             $topParent = $topParent->parentNode;
134         }
135
136         $parentText = $topParent->textContent;
137         $tagPos = strpos($parentText, $tag->tagContent);
138         $before = $tagPos < (strlen($parentText) / 2);
139
140         if ($before) {
141             $body->insertBefore($tag->domNode, $topParent);
142         } else {
143             $body->insertBefore($tag->domNode, $topParent->nextSibling);
144         }
145     }
146
147     protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
148     {
149         $children = [...$parentNode->childNodes];
150         $splitPos = array_search($domNode, $children, true) ?: count($children);
151         $parentClone = $parentNode->cloneNode();
152         $parentClone->removeAttribute('id');
153
154         /** @var DOMNode $child */
155         for ($i = 0; $i < $splitPos; $i++) {
156             $child = $children[0];
157             $parentClone->appendChild($child);
158         }
159
160         if ($parentClone->hasChildNodes()) {
161             $parentNode->parentNode->insertBefore($parentClone, $parentNode);
162         }
163
164         $parentNode->parentNode->insertBefore($domNode, $parentNode);
165
166         $parentClone->normalize();
167         $parentNode->normalize();
168         if (!$parentNode->hasChildNodes()) {
169             $parentNode->remove();
170         }
171         if (!$parentClone->hasChildNodes()) {
172             $parentClone->remove();
173         }
174     }
175 }