3 namespace BookStack\Entities\Tools;
5 use BookStack\Util\HtmlDocument;
12 class PageIncludeParser
14 protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
16 public function __construct(
17 protected string $pageHtml,
18 protected Closure $pageContentForId,
22 public function parse(): string
24 $doc = new HtmlDocument($this->pageHtml);
26 $tags = $this->locateAndIsolateIncludeTags($doc);
27 $topLevel = [...$doc->getBodyChildren()];
29 foreach ($tags as $tag) {
30 $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
31 $content = new PageIncludeContent($htmlContent, $tag);
33 if (!$content->isInline()) {
34 $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true);
35 if ($isParentTopLevel) {
36 $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
38 $this->promoteTagNodeToBody($tag, $doc->getBody());
42 $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
45 // TODO Notes: May want to eventually parse through backwards, which should avoid issues
46 // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
48 return $doc->getBodyInnerHtml();
52 * Locate include tags within the given document, isolating them to their
53 * own nodes in the DOM for future targeted manipulation.
54 * @return PageIncludeTag[]
56 protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
58 $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
61 /** @var DOMNode $node */
62 /** @var DOMNode $childNode */
63 foreach ($includeHosts as $node) {
64 foreach ($node->childNodes as $childNode) {
65 if ($childNode->nodeName === '#text') {
66 array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
75 * Takes a text DOMNode and splits its text content at include tags
76 * into multiple text nodes within the original parent.
77 * Returns found PageIncludeTag references.
78 * @return PageIncludeTag[]
80 protected function splitTextNodesAtTags(DOMNode $textNode): array
83 $text = $textNode->textContent;
84 preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
87 foreach ($matches[0] as $index => $fullTagMatch) {
88 $tagOuterContent = $fullTagMatch[0];
89 $tagInnerContent = $matches[1][$index][0];
90 $tagStartOffset = $fullTagMatch[1];
92 if ($currentOffset < $tagStartOffset) {
93 $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
94 $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
97 $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
98 $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
99 $currentOffset = $tagStartOffset + strlen($tagOuterContent);
102 if ($currentOffset > 0) {
103 $textNode->textContent = substr($text, $currentOffset);
110 * @param DOMNode[] $replacements
112 protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
114 /** @var DOMDocument $targetDoc */
115 $targetDoc = $toReplace->ownerDocument;
117 foreach ($replacements as $replacement) {
118 if ($replacement->ownerDocument !== $targetDoc) {
119 $replacement = $targetDoc->adoptNode($replacement);
122 $toReplace->parentNode->insertBefore($replacement, $toReplace);
125 $toReplace->parentNode->removeChild($toReplace);
128 protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void
130 /** @var DOMNode $topParent */
131 $topParent = $tag->domNode->parentNode;
132 while ($topParent->parentNode !== $body) {
133 $topParent = $topParent->parentNode;
136 $parentText = $topParent->textContent;
137 $tagPos = strpos($parentText, $tag->tagContent);
138 $before = $tagPos < (strlen($parentText) / 2);
141 $body->insertBefore($tag->domNode, $topParent);
143 $body->insertBefore($tag->domNode, $topParent->nextSibling);
147 protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
149 $children = [...$parentNode->childNodes];
150 $splitPos = array_search($domNode, $children, true) ?: count($children);
151 $parentClone = $parentNode->cloneNode();
152 $parentClone->removeAttribute('id');
154 /** @var DOMNode $child */
155 for ($i = 0; $i < $splitPos; $i++) {
156 $child = $children[0];
157 $parentClone->appendChild($child);
160 if ($parentClone->hasChildNodes()) {
161 $parentNode->parentNode->insertBefore($parentClone, $parentNode);
164 $parentNode->parentNode->insertBefore($domNode, $parentNode);
166 $parentClone->normalize();
167 $parentNode->normalize();
168 if (!$parentNode->hasChildNodes()) {
169 $parentNode->remove();
171 if (!$parentClone->hasChildNodes()) {
172 $parentClone->remove();