BookStack Code Mirror - bookstack/blobdiff - app/Entities/Tools/PageIncludeParser.php

index 070b0cc11b7e63c10f5239286a1efb354aca0eae..dad7c29e60ee3392de7b6d56726fee5455074eb5 100644 (file)

--- a/app/Entities/Tools/PageIncludeParser.php

+++ b/app/Entities/Tools/PageIncludeParser.php

@@ -4,6 +4,8 @@ namespace BookStack\Entities\Tools;

use BookStack\Util\HtmlDocument;

use Closure;

+use DOMDocument;

+use DOMElement;

use DOMNode;

use DOMText;

@@ -11,61 +13,52 @@ class PageIncludeParser

{

protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";

+ /**

+ * Elements to clean up and remove if left empty after a parsing operation.

+ * @var DOMElement[]

+ */

+ protected array $toCleanup = [];

+ /**

+ * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId

+ */

public function __construct(

- protected string $pageHtml,

+ protected HtmlDocument $doc,

protected Closure $pageContentForId,

) {

}

- public function parse(): string

+ /**

+ * Parse out the include tags.

+ * Returns the count of new content DOM nodes added to the document.

+ */

+ public function parse(): int

{

- $doc = new HtmlDocument($this->pageHtml);

- $tags = $this->locateAndIsolateIncludeTags($doc);

+ $nodesAdded = 0;

+ $tags = $this->locateAndIsolateIncludeTags();

foreach ($tags as $tag) {

- $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());

- $content = new PageIncludeContent($htmlContent, $tag);

- if ($content->isInline()) {

- $adopted = $doc->adoptNodes($content->toDomNodes());

- foreach ($adopted as $adoptedContentNode) {

- $tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode);

+ /** @var PageIncludeContent $content */

+ $content = $this->pageContentForId->call($this, $tag);

+ if (!$content->isInline()) {

+ $parentP = $this->getParentParagraph($tag->domNode);

+ $isWithinParentP = $parentP === $tag->domNode->parentNode;

+ if ($parentP && $isWithinParentP) {

+ $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);

+ } else if ($parentP) {

+ $this->moveTagNodeToBesideParent($tag, $parentP);

}

- $tag->domNode->parentNode->removeChild($tag->domNode);

- continue;

}

- // TODO - Non-inline

+ $replacementNodes = $content->toDomNodes();

+ $nodesAdded += count($replacementNodes);

+ $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);

}

- // TODO:

- // Hunt down the specific text nodes with matches

- // Split out tag text node from rest of content

- // Fetch tag content->

- // If range or top-block: delete tag text node, [Promote to top-block], delete old top-block if empty

- // If inline: Replace current text node with new text or elem

- // !! "Range" or "inline" status should come from tag parser and content fetcher, not guessed direct from content

- // since we could have a range of inline elements

- // [Promote to top-block]

- // Tricky operation.

- // Can throw in before or after current top-block depending on relative position

- // Could [Split] top-block but complex past a single level depth.

- // Maybe [Split] if one level depth, otherwise default to before/after block

- // Should work for the vast majority of cases, and not for those which would

- // technically be invalid in-editor anyway.

- // [Split]

- // Copy original top-block node type and attrs (apart from ID)

- // Move nodes after promoted tag-node into copy

- // Insert copy after original (after promoted top-block eventually)

- // Notes: May want to eventually parse through backwards, which should avoid issues

- // in changes affecting the next tag, where tags may be in the same/adjacent nodes.

- return $doc->getBodyInnerHtml();

+ $this->cleanup();

+ return $nodesAdded;

}

/**

@@ -73,14 +66,14 @@ class PageIncludeParser

* own nodes in the DOM for future targeted manipulation.

* @return PageIncludeTag[]

- protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array

+ protected function locateAndIsolateIncludeTags(): array

{

- $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");

+ $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");

$includeTags = [];

/** @var DOMNode $node */

- /** @var DOMNode $childNode */

foreach ($includeHosts as $node) {

+ /** @var DOMNode $childNode */

foreach ($node->childNodes as $childNode) {

if ($childNode->nodeName === '#text') {

array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));

@@ -125,4 +118,103 @@ class PageIncludeParser

return $includeTags;

}

+ /**

+ * Replace the given node with all those in $replacements

+ * @param DOMNode[] $replacements

+ */

+ protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void

+ {

+ /** @var DOMDocument $targetDoc */

+ $targetDoc = $toReplace->ownerDocument;

+ foreach ($replacements as $replacement) {

+ if ($replacement->ownerDocument !== $targetDoc) {

+ $replacement = $targetDoc->importNode($replacement, true);

+ }

+ $toReplace->parentNode->insertBefore($replacement, $toReplace);

+ }

+ $toReplace->parentNode->removeChild($toReplace);

+ }

+ /**

+ * Move a tag node to become a sibling of the given parent.

+ * Will attempt to guess a position based upon the tag content within the parent.

+ */

+ protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void

+ {

+ $parentText = $parent->textContent;

+ $tagPos = strpos($parentText, $tag->tagContent);

+ $before = $tagPos < (strlen($parentText) / 2);

+ $this->toCleanup[] = $tag->domNode->parentNode;

+ if ($before) {

+ $parent->parentNode->insertBefore($tag->domNode, $parent);

+ } else {

+ $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);

+ }

+ /**

+ * Splits the given $parentNode at the location of the $domNode within it.

+ * Attempts replicate the original $parentNode, moving some of their parent

+ * children in where needed, before adding the $domNode between.

+ */

+ protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void

+ {

+ $children = [...$parentNode->childNodes];

+ $splitPos = array_search($domNode, $children, true);

+ if ($splitPos === false) {

+ $splitPos = count($children) - 1;

+ }

+ $parentClone = $parentNode->cloneNode();

+ $parentNode->parentNode->insertBefore($parentClone, $parentNode);

+ $parentClone->removeAttribute('id');

+ for ($i = 0; $i < $splitPos; $i++) {

+ /** @var DOMNode $child */

+ $child = $children[$i];

+ $parentClone->appendChild($child);

+ }

+ $parentNode->parentNode->insertBefore($domNode, $parentNode);

+ $this->toCleanup[] = $parentNode;

+ $this->toCleanup[] = $parentClone;

+ }

+ /**

+ * Get the parent paragraph of the given node, if existing.

+ */

+ protected function getParentParagraph(DOMNode $parent): ?DOMNode

+ {

+ do {

+ if (strtolower($parent->nodeName) === 'p') {

+ return $parent;

+ }

+ $parent = $parent->parentNode;

+ } while ($parent !== null);

+ return null;

+ }

+ /**

+ * Cleanup after a parse operation.

+ * Removes stranded elements we may have left during the parse.

+ */

+ protected function cleanup(): void

+ {

+ foreach ($this->toCleanup as $element) {

+ $element->normalize();

+ while ($element->parentNode && !$element->hasChildNodes()) {

+ $parent = $element->parentNode;

+ $parent->removeChild($element);

+ $element = $parent;

+ }

}