use BookStack\Util\HtmlDocument;
use Closure;
+use DOMDocument;
+use DOMElement;
use DOMNode;
use DOMText;
{
protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
+ /**
+ * Elements to clean up and remove if left empty after a parsing operation.
+ * @var DOMElement[]
+ */
+ protected array $toCleanup = [];
+
+ /**
+ * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId
+ */
public function __construct(
- protected string $pageHtml,
+ protected HtmlDocument $doc,
protected Closure $pageContentForId,
) {
}
- public function parse(): string
+ /**
+ * Parse out the include tags.
+ * Returns the count of new content DOM nodes added to the document.
+ */
+ public function parse(): int
{
- $doc = new HtmlDocument($this->pageHtml);
-
- $tags = $this->locateAndIsolateIncludeTags($doc);
+ $nodesAdded = 0;
+ $tags = $this->locateAndIsolateIncludeTags();
foreach ($tags as $tag) {
- $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
- $content = new PageIncludeContent($htmlContent, $tag);
-
- if ($content->isInline()) {
- $adopted = $doc->adoptNodes($content->toDomNodes());
- foreach ($adopted as $adoptedContentNode) {
- $tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode);
+ /** @var PageIncludeContent $content */
+ $content = $this->pageContentForId->call($this, $tag);
+
+ if (!$content->isInline()) {
+ $parentP = $this->getParentParagraph($tag->domNode);
+ $isWithinParentP = $parentP === $tag->domNode->parentNode;
+ if ($parentP && $isWithinParentP) {
+ $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
+ } else if ($parentP) {
+ $this->moveTagNodeToBesideParent($tag, $parentP);
}
- $tag->domNode->parentNode->removeChild($tag->domNode);
- continue;
}
- // TODO - Non-inline
+ $replacementNodes = $content->toDomNodes();
+ $nodesAdded += count($replacementNodes);
+ $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
}
- // TODO:
- // Hunt down the specific text nodes with matches
- // Split out tag text node from rest of content
- // Fetch tag content->
- // If range or top-block: delete tag text node, [Promote to top-block], delete old top-block if empty
- // If inline: Replace current text node with new text or elem
- // !! "Range" or "inline" status should come from tag parser and content fetcher, not guessed direct from content
- // since we could have a range of inline elements
-
- // [Promote to top-block]
- // Tricky operation.
- // Can throw in before or after current top-block depending on relative position
- // Could [Split] top-block but complex past a single level depth.
- // Maybe [Split] if one level depth, otherwise default to before/after block
- // Should work for the vast majority of cases, and not for those which would
- // technically be invalid in-editor anyway.
-
- // [Split]
- // Copy original top-block node type and attrs (apart from ID)
- // Move nodes after promoted tag-node into copy
- // Insert copy after original (after promoted top-block eventually)
-
- // Notes: May want to eventually parse through backwards, which should avoid issues
- // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
-
-
- return $doc->getBodyInnerHtml();
+ $this->cleanup();
+
+ return $nodesAdded;
}
/**
* own nodes in the DOM for future targeted manipulation.
* @return PageIncludeTag[]
*/
- protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
+ protected function locateAndIsolateIncludeTags(): array
{
- $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
+ $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
$includeTags = [];
/** @var DOMNode $node */
- /** @var DOMNode $childNode */
foreach ($includeHosts as $node) {
+ /** @var DOMNode $childNode */
foreach ($node->childNodes as $childNode) {
if ($childNode->nodeName === '#text') {
array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
return $includeTags;
}
+
+ /**
+ * Replace the given node with all those in $replacements
+ * @param DOMNode[] $replacements
+ */
+ protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
+ {
+ /** @var DOMDocument $targetDoc */
+ $targetDoc = $toReplace->ownerDocument;
+
+ foreach ($replacements as $replacement) {
+ if ($replacement->ownerDocument !== $targetDoc) {
+ $replacement = $targetDoc->importNode($replacement, true);
+ }
+
+ $toReplace->parentNode->insertBefore($replacement, $toReplace);
+ }
+
+ $toReplace->parentNode->removeChild($toReplace);
+ }
+
+ /**
+ * Move a tag node to become a sibling of the given parent.
+ * Will attempt to guess a position based upon the tag content within the parent.
+ */
+ protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
+ {
+ $parentText = $parent->textContent;
+ $tagPos = strpos($parentText, $tag->tagContent);
+ $before = $tagPos < (strlen($parentText) / 2);
+ $this->toCleanup[] = $tag->domNode->parentNode;
+
+ if ($before) {
+ $parent->parentNode->insertBefore($tag->domNode, $parent);
+ } else {
+ $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
+ }
+ }
+
+ /**
+ * Splits the given $parentNode at the location of the $domNode within it.
+ * Attempts replicate the original $parentNode, moving some of their parent
+ * children in where needed, before adding the $domNode between.
+ */
+ protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
+ {
+ $children = [...$parentNode->childNodes];
+ $splitPos = array_search($domNode, $children, true);
+ if ($splitPos === false) {
+ $splitPos = count($children) - 1;
+ }
+
+ $parentClone = $parentNode->cloneNode();
+ $parentNode->parentNode->insertBefore($parentClone, $parentNode);
+ $parentClone->removeAttribute('id');
+
+ for ($i = 0; $i < $splitPos; $i++) {
+ /** @var DOMNode $child */
+ $child = $children[$i];
+ $parentClone->appendChild($child);
+ }
+
+ $parentNode->parentNode->insertBefore($domNode, $parentNode);
+
+ $this->toCleanup[] = $parentNode;
+ $this->toCleanup[] = $parentClone;
+ }
+
+ /**
+ * Get the parent paragraph of the given node, if existing.
+ */
+ protected function getParentParagraph(DOMNode $parent): ?DOMNode
+ {
+ do {
+ if (strtolower($parent->nodeName) === 'p') {
+ return $parent;
+ }
+
+ $parent = $parent->parentNode;
+ } while ($parent !== null);
+
+ return null;
+ }
+
+ /**
+ * Cleanup after a parse operation.
+ * Removes stranded elements we may have left during the parse.
+ */
+ protected function cleanup(): void
+ {
+ foreach ($this->toCleanup as $element) {
+ $element->normalize();
+ while ($element->parentNode && !$element->hasChildNodes()) {
+ $parent = $element->parentNode;
+ $parent->removeChild($element);
+ $element = $parent;
+ }
+ }
+ }
}