3 namespace BookStack\Entities\Tools;
5 use BookStack\Util\HtmlDocument;
12 class PageIncludeParser
14 protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
17 * Elements to clean up and remove if left empty after a parsing operation.
20 protected array $toCleanup = [];
22 public function __construct(
23 protected string $pageHtml,
24 protected Closure $pageContentForId,
29 * Parse out the include tags.
31 public function parse(): string
33 $doc = new HtmlDocument($this->pageHtml);
35 $tags = $this->locateAndIsolateIncludeTags($doc);
37 foreach ($tags as $tag) {
38 $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
39 $content = new PageIncludeContent($htmlContent, $tag);
41 if (!$content->isInline()) {
42 $parentP = $this->getParentParagraph($tag->domNode);
43 $isWithinParentP = $parentP === $tag->domNode->parentNode;
44 if ($parentP && $isWithinParentP) {
45 $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
46 } else if ($parentP) {
47 $this->moveTagNodeToBesideParent($tag, $parentP);
51 $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
56 return $doc->getBodyInnerHtml();
60 * Locate include tags within the given document, isolating them to their
61 * own nodes in the DOM for future targeted manipulation.
62 * @return PageIncludeTag[]
64 protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
66 $includeHosts = $doc->queryXPath("//body//*[text()[contains(., '{{@')]]");
69 /** @var DOMNode $node */
70 /** @var DOMNode $childNode */
71 foreach ($includeHosts as $node) {
72 foreach ($node->childNodes as $childNode) {
73 if ($childNode->nodeName === '#text') {
74 array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
83 * Takes a text DOMNode and splits its text content at include tags
84 * into multiple text nodes within the original parent.
85 * Returns found PageIncludeTag references.
86 * @return PageIncludeTag[]
88 protected function splitTextNodesAtTags(DOMNode $textNode): array
91 $text = $textNode->textContent;
92 preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
95 foreach ($matches[0] as $index => $fullTagMatch) {
96 $tagOuterContent = $fullTagMatch[0];
97 $tagInnerContent = $matches[1][$index][0];
98 $tagStartOffset = $fullTagMatch[1];
100 if ($currentOffset < $tagStartOffset) {
101 $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
102 $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
105 $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
106 $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
107 $currentOffset = $tagStartOffset + strlen($tagOuterContent);
110 if ($currentOffset > 0) {
111 $textNode->textContent = substr($text, $currentOffset);
118 * Replace the given node with all those in $replacements
119 * @param DOMNode[] $replacements
121 protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
123 /** @var DOMDocument $targetDoc */
124 $targetDoc = $toReplace->ownerDocument;
126 foreach ($replacements as $replacement) {
127 if ($replacement->ownerDocument !== $targetDoc) {
128 $replacement = $targetDoc->adoptNode($replacement);
131 $toReplace->parentNode->insertBefore($replacement, $toReplace);
134 $toReplace->parentNode->removeChild($toReplace);
138 * Move a tag node to become a sibling of the given parent.
139 * Will attempt to guess a position based upon the tag content within the parent.
141 protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
143 $parentText = $parent->textContent;
144 $tagPos = strpos($parentText, $tag->tagContent);
145 $before = $tagPos < (strlen($parentText) / 2);
148 $parent->parentNode->insertBefore($tag->domNode, $parent);
150 $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
155 * Splits the given $parentNode at the location of the $domNode within it.
156 * Attempts replicate the original $parentNode, moving some of their parent
157 * children in where needed, before adding the $domNode between.
159 protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
161 $children = [...$parentNode->childNodes];
162 $splitPos = array_search($domNode, $children, true);
163 if ($splitPos === false) {
164 $splitPos = count($children) - 1;
167 $parentClone = $parentNode->cloneNode();
168 $parentNode->parentNode->insertBefore($parentClone, $parentNode);
169 $parentClone->removeAttribute('id');
171 /** @var DOMNode $child */
172 for ($i = 0; $i < $splitPos; $i++) {
173 $child = $children[$i];
174 $parentClone->appendChild($child);
177 $parentNode->parentNode->insertBefore($domNode, $parentNode);
179 $this->toCleanup[] = $parentNode;
180 $this->toCleanup[] = $parentClone;
184 * Get the parent paragraph of the given node, if existing.
186 protected function getParentParagraph(DOMNode $parent): ?DOMNode
189 if (strtolower($parent->nodeName) === 'p') {
193 $parent = $parent->parentElement;
194 } while ($parent !== null);
200 * Cleanup after a parse operation.
201 * Removes stranded elements we may have left during the parse.
203 protected function cleanup(): void
205 foreach ($this->toCleanup as $element) {
206 $element->normalize();
207 if ($element->parentNode && !$element->hasChildNodes()) {
208 $element->parentNode->removeChild($element);