X-Git-Url: http://source.bookstackapp.com/bookstack/blobdiff_plain/75936454cca139d0b226a95ee7a0070bc8702fdc..refs/pull/5685/head:/app/Entities/Tools/PageIncludeParser.php diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php index 070b0cc11..e0b89f158 100644 --- a/app/Entities/Tools/PageIncludeParser.php +++ b/app/Entities/Tools/PageIncludeParser.php @@ -4,6 +4,8 @@ namespace BookStack\Entities\Tools; use BookStack\Util\HtmlDocument; use Closure; +use DOMDocument; +use DOMElement; use DOMNode; use DOMText; @@ -11,61 +13,52 @@ class PageIncludeParser { protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/"; + /** + * Elements to clean up and remove if left empty after a parsing operation. + * @var DOMElement[] + */ + protected array $toCleanup = []; + + /** + * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId + */ public function __construct( - protected string $pageHtml, + protected HtmlDocument $doc, protected Closure $pageContentForId, ) { } - public function parse(): string + /** + * Parse out the include tags. + * Returns the count of new content DOM nodes added to the document. + */ + public function parse(): int { - $doc = new HtmlDocument($this->pageHtml); - - $tags = $this->locateAndIsolateIncludeTags($doc); + $nodesAdded = 0; + $tags = $this->locateAndIsolateIncludeTags(); foreach ($tags as $tag) { - $htmlContent = $this->pageContentForId->call($this, $tag->getPageId()); - $content = new PageIncludeContent($htmlContent, $tag); - - if ($content->isInline()) { - $adopted = $doc->adoptNodes($content->toDomNodes()); - foreach ($adopted as $adoptedContentNode) { - $tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode); + /** @var PageIncludeContent $content */ + $content = $this->pageContentForId->call($this, $tag); + + if (!$content->isInline()) { + $parentP = $this->getParentParagraph($tag->domNode); + $isWithinParentP = $parentP === $tag->domNode->parentNode; + if ($parentP && $isWithinParentP) { + $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode); + } else if ($parentP) { + $this->moveTagNodeToBesideParent($tag, $parentP); } - $tag->domNode->parentNode->removeChild($tag->domNode); - continue; } - // TODO - Non-inline + $replacementNodes = $content->toDomNodes(); + $nodesAdded += count($replacementNodes); + $this->replaceNodeWithNodes($tag->domNode, $replacementNodes); } - // TODO: - // Hunt down the specific text nodes with matches - // Split out tag text node from rest of content - // Fetch tag content-> - // If range or top-block: delete tag text node, [Promote to top-block], delete old top-block if empty - // If inline: Replace current text node with new text or elem - // !! "Range" or "inline" status should come from tag parser and content fetcher, not guessed direct from content - // since we could have a range of inline elements - - // [Promote to top-block] - // Tricky operation. - // Can throw in before or after current top-block depending on relative position - // Could [Split] top-block but complex past a single level depth. - // Maybe [Split] if one level depth, otherwise default to before/after block - // Should work for the vast majority of cases, and not for those which would - // technically be invalid in-editor anyway. - - // [Split] - // Copy original top-block node type and attrs (apart from ID) - // Move nodes after promoted tag-node into copy - // Insert copy after original (after promoted top-block eventually) - - // Notes: May want to eventually parse through backwards, which should avoid issues - // in changes affecting the next tag, where tags may be in the same/adjacent nodes. - - - return $doc->getBodyInnerHtml(); + $this->cleanup(); + + return $nodesAdded; } /** @@ -73,14 +66,14 @@ class PageIncludeParser * own nodes in the DOM for future targeted manipulation. * @return PageIncludeTag[] */ - protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array + protected function locateAndIsolateIncludeTags(): array { - $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]"); + $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]"); $includeTags = []; /** @var DOMNode $node */ - /** @var DOMNode $childNode */ foreach ($includeHosts as $node) { + /** @var DOMNode $childNode */ foreach ($node->childNodes as $childNode) { if ($childNode->nodeName === '#text') { array_push($includeTags, ...$this->splitTextNodesAtTags($childNode)); @@ -111,10 +104,10 @@ class PageIncludeParser if ($currentOffset < $tagStartOffset) { $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset); - $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode); + $textNode->parentNode->insertBefore($this->doc->createTextNode($previousText), $textNode); } - $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode); + $node = $textNode->parentNode->insertBefore($this->doc->createTextNode($tagOuterContent), $textNode); $includeTags[] = new PageIncludeTag($tagInnerContent, $node); $currentOffset = $tagStartOffset + strlen($tagOuterContent); } @@ -125,4 +118,103 @@ class PageIncludeParser return $includeTags; } + + /** + * Replace the given node with all those in $replacements + * @param DOMNode[] $replacements + */ + protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void + { + /** @var DOMDocument $targetDoc */ + $targetDoc = $toReplace->ownerDocument; + + foreach ($replacements as $replacement) { + if ($replacement->ownerDocument !== $targetDoc) { + $replacement = $targetDoc->importNode($replacement, true); + } + + $toReplace->parentNode->insertBefore($replacement, $toReplace); + } + + $toReplace->parentNode->removeChild($toReplace); + } + + /** + * Move a tag node to become a sibling of the given parent. + * Will attempt to guess a position based upon the tag content within the parent. + */ + protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void + { + $parentText = $parent->textContent; + $tagPos = strpos($parentText, $tag->tagContent); + $before = $tagPos < (strlen($parentText) / 2); + $this->toCleanup[] = $tag->domNode->parentNode; + + if ($before) { + $parent->parentNode->insertBefore($tag->domNode, $parent); + } else { + $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling); + } + } + + /** + * Splits the given $parentNode at the location of the $domNode within it. + * Attempts replicate the original $parentNode, moving some of their parent + * children in where needed, before adding the $domNode between. + */ + protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void + { + $children = [...$parentNode->childNodes]; + $splitPos = array_search($domNode, $children, true); + if ($splitPos === false) { + $splitPos = count($children) - 1; + } + + $parentClone = $parentNode->cloneNode(); + $parentNode->parentNode->insertBefore($parentClone, $parentNode); + $parentClone->removeAttribute('id'); + + for ($i = 0; $i < $splitPos; $i++) { + /** @var DOMNode $child */ + $child = $children[$i]; + $parentClone->appendChild($child); + } + + $parentNode->parentNode->insertBefore($domNode, $parentNode); + + $this->toCleanup[] = $parentNode; + $this->toCleanup[] = $parentClone; + } + + /** + * Get the parent paragraph of the given node, if existing. + */ + protected function getParentParagraph(DOMNode $parent): ?DOMNode + { + do { + if (strtolower($parent->nodeName) === 'p') { + return $parent; + } + + $parent = $parent->parentNode; + } while ($parent !== null); + + return null; + } + + /** + * Cleanup after a parse operation. + * Removes stranded elements we may have left during the parse. + */ + protected function cleanup(): void + { + foreach ($this->toCleanup as $element) { + $element->normalize(); + while ($element->parentNode && !$element->hasChildNodes()) { + $parent = $element->parentNode; + $parent->removeChild($element); + $element = $parent; + } + } + } }