HTML: Aligned and standardised DOMDocument usage

author Dan Brown <redacted>

Tue, 14 Nov 2023 15:46:32 +0000 (15:46 +0000)

committer Dan Brown <redacted>

Tue, 14 Nov 2023 15:46:32 +0000 (15:46 +0000)
author Dan Brown <redacted>
Tue, 14 Nov 2023 15:46:32 +0000 (15:46 +0000)
committer Dan Brown <redacted>
Tue, 14 Nov 2023 15:46:32 +0000 (15:46 +0000)
diff --git a/app/Entities/Tools/ExportFormatter.php b/app/Entities/Tools/ExportFormatter.php

index 9a8c687b08de8cf0c87166e6eec8b883bba0979c..beddfe8e6e0f08cb6e0f373a6d46d202a9b6056e 100644 (file)
--- a/app/Entities/Tools/ExportFormatter.php
+++ b/app/Entities/Tools/ExportFormatter.php
@@ -8,9 +8,8 @@ use BookStack\Entities\Models\Page;
  use BookStack\Entities\Tools\Markdown\HtmlToMarkdown;
  use BookStack\Uploads\ImageService;
  use BookStack\Util\CspService;
-use DOMDocument;
+use BookStack\Util\HtmlDocument;
  use DOMElement;
-use DOMXPath;
  use Exception;
  use Throwable;
  
@@ -151,45 +150,36 @@ class ExportFormatter
      protected function htmlToPdf(string $html): string
      {
          $html = $this->containHtml($html);
-        $html = $this->replaceIframesWithLinks($html);
-        $html = $this->openDetailElements($html);
+        $doc = new HtmlDocument();
+        $doc->loadCompleteHtml($html);
  
-        return $this->pdfGenerator->fromHtml($html);
+        $this->replaceIframesWithLinks($doc);
+        $this->openDetailElements($doc);
+        $cleanedHtml = $doc->getHtml();
+
+        return $this->pdfGenerator->fromHtml($cleanedHtml);
      }
  
      /**
       * Within the given HTML content, Open any detail blocks.
       */
-    protected function openDetailElements(string $html): string
+    protected function openDetailElements(HtmlDocument $doc): void
      {
-        libxml_use_internal_errors(true);
-
-        $doc = new DOMDocument();
-        $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
-        $xPath = new DOMXPath($doc);
-
-        $details = $xPath->query('//details');
+        $details = $doc->queryXPath('//details');
          /** @var DOMElement $detail */
          foreach ($details as $detail) {
              $detail->setAttribute('open', 'open');
          }
-
-        return $doc->saveHTML();
      }
  
      /**
-     * Within the given HTML content, replace any iframe elements
+     * Within the given HTML document, replace any iframe elements
       * with anchor links within paragraph blocks.
       */
-    protected function replaceIframesWithLinks(string $html): string
+    protected function replaceIframesWithLinks(HtmlDocument $doc): void
      {
-        libxml_use_internal_errors(true);
-
-        $doc = new DOMDocument();
-        $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
-        $xPath = new DOMXPath($doc);
+        $iframes = $doc->queryXPath('//iframe');
  
-        $iframes = $xPath->query('//iframe');
          /** @var DOMElement $iframe */
          foreach ($iframes as $iframe) {
              $link = $iframe->getAttribute('src');
@@ -203,8 +193,6 @@ class ExportFormatter
              $paragraph->appendChild($anchor);
              $iframe->parentNode->replaceChild($paragraph, $iframe);
          }
-
-        return $doc->saveHTML();
      }
  
      /**
diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php

index 949816eff577cc2fc67e0a36c8507bc4d2efe8d1..3e75bd5bba5f691d67ffa2e1b31c4a73e5a55cf7 100644 (file)
--- a/app/Entities/Tools/PageContent.php
+++ b/app/Entities/Tools/PageContent.php
@@ -10,11 +10,10 @@ use BookStack\Theming\ThemeEvents;
  use BookStack\Uploads\ImageRepo;
  use BookStack\Uploads\ImageService;
  use BookStack\Util\HtmlContentFilter;
-use DOMDocument;
+use BookStack\Util\HtmlDocument;
  use DOMElement;
  use DOMNode;
  use DOMNodeList;
-use DOMXPath;
  use Illuminate\Support\Str;
  
  class PageContent
@@ -56,27 +55,17 @@ class PageContent
              return $htmlText;
          }
  
-        $doc = $this->loadDocumentFromHtml($htmlText);
-        $container = $doc->documentElement;
-        $body = $container->childNodes->item(0);
-        $childNodes = $body->childNodes;
-        $xPath = new DOMXPath($doc);
+        $doc = new HtmlDocument($htmlText);
  
          // Get all img elements with image data blobs
-        $imageNodes = $xPath->query('//img[contains(@src, \'data:image\')]');
+        $imageNodes = $doc->queryXPath('//img[contains(@src, \'data:image\')]');
          foreach ($imageNodes as $imageNode) {
              $imageSrc = $imageNode->getAttribute('src');
              $newUrl = $this->base64ImageUriToUploadedImageUrl($imageSrc);
              $imageNode->setAttribute('src', $newUrl);
          }
  
-        // Generate inner html as a string
-        $html = '';
-        foreach ($childNodes as $childNode) {
-            $html .= $doc->saveHTML($childNode);
-        }
-
-        return $html;
+        return $doc->getBodyInnerHtml();
      }
  
      /**
@@ -172,27 +161,18 @@ class PageContent
              return $htmlText;
          }
  
-        $doc = $this->loadDocumentFromHtml($htmlText);
-        $container = $doc->documentElement;
-        $body = $container->childNodes->item(0);
-        $childNodes = $body->childNodes;
-        $xPath = new DOMXPath($doc);
+        $doc = new HtmlDocument($htmlText);
  
          // Map to hold used ID references
          $idMap = [];
          // Map to hold changing ID references
          $changeMap = [];
  
-        $this->updateIdsRecursively($body, 0, $idMap, $changeMap);
-        $this->updateLinks($xPath, $changeMap);
+        $this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
+        $this->updateLinks($doc, $changeMap);
  
-        // Generate inner html as a string
-        $html = '';
-        foreach ($childNodes as $childNode) {
-            $html .= $doc->saveHTML($childNode);
-        }
-
-        // Perform required string-level tweaks
+        // Generate inner html as a string & perform required string-level tweaks
+        $html = $doc->getBodyInnerHtml();
          $html = str_replace(' ', '&nbsp;', $html);
  
          return $html;
@@ -225,13 +205,13 @@ class PageContent
       * Update the all links in the given xpath to apply requires changes within the
       * given $changeMap array.
       */
-    protected function updateLinks(DOMXPath $xpath, array $changeMap): void
+    protected function updateLinks(HtmlDocument $doc, array $changeMap): void
      {
          if (empty($changeMap)) {
              return;
          }
  
-        $links = $xpath->query('//body//*//*[@href]');
+        $links = $doc->queryXPath('//body//*//*[@href]');
          /** @var DOMElement $domElem */
          foreach ($links as $domElem) {
              $href = ltrim($domElem->getAttribute('href'), '#');
@@ -321,11 +301,10 @@ class PageContent
              return [];
          }
  
-        $doc = $this->loadDocumentFromHtml($htmlContent);
-        $xPath = new DOMXPath($doc);
-        $headers = $xPath->query('//h1|//h2|//h3|//h4|//h5|//h6');
+        $doc = new HtmlDocument($htmlContent);
+        $headers = $doc->queryXPath('//h1|//h2|//h3|//h4|//h5|//h6');
  
-        return $headers ? $this->headerNodesToLevelList($headers) : [];
+        return $headers->count() === 0 ? [] : $this->headerNodesToLevelList($headers);
      }
  
      /**
@@ -420,7 +399,7 @@ class PageContent
      protected function fetchSectionOfPage(Page $page, string $sectionId): string
      {
          $topLevelTags = ['table', 'ul', 'ol', 'pre'];
-        $doc = $this->loadDocumentFromHtml($page->html);
+        $doc = new HtmlDocument($page->html);
  
          // Search included content for the id given and blank out if not exists.
          $matchingElem = $doc->getElementById($sectionId);
@@ -430,30 +409,11 @@ class PageContent
  
          // Otherwise replace the content with the found content
          // Checks if the top-level wrapper should be included by matching on tag types
-        $innerContent = '';
          $isTopLevel = in_array(strtolower($matchingElem->nodeName), $topLevelTags);
          if ($isTopLevel) {
-            $innerContent .= $doc->saveHTML($matchingElem);
-        } else {
-            foreach ($matchingElem->childNodes as $childNode) {
-                $innerContent .= $doc->saveHTML($childNode);
-            }
+            return $doc->getNodeOuterHtml($matchingElem);
          }
-        libxml_clear_errors();
-
-        return $innerContent;
-    }
-
-    /**
-     * Create and load a DOMDocument from the given html content.
-     */
-    protected function loadDocumentFromHtml(string $html): DOMDocument
-    {
-        libxml_use_internal_errors(true);
-        $doc = new DOMDocument();
-        $html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
-        $doc->loadHTML($html);
  
-        return $doc;
+        return $doc->getNodeInnerHtml($matchingElem);
      }
  }
diff --git a/app/References/CrossLinkParser.php b/app/References/CrossLinkParser.php

index 88ca5d6a7a5a23986c9f4e5a8b0d47891eda29b5..b9c3ad205aea12a2f2d38e4199d5506b64b04222 100644 (file)
--- a/app/References/CrossLinkParser.php
+++ b/app/References/CrossLinkParser.php
@@ -9,8 +9,7 @@ use BookStack\References\ModelResolvers\ChapterLinkModelResolver;
  use BookStack\References\ModelResolvers\CrossLinkModelResolver;
  use BookStack\References\ModelResolvers\PageLinkModelResolver;
  use BookStack\References\ModelResolvers\PagePermalinkModelResolver;
-use DOMDocument;
-use DOMXPath;
+use BookStack\Util\HtmlDocument;
  
  class CrossLinkParser
  {
@@ -54,13 +53,8 @@ class CrossLinkParser
      {
          $links = [];
  
-        $html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
-        libxml_use_internal_errors(true);
-        $doc = new DOMDocument();
-        $doc->loadHTML($html);
-
-        $xPath = new DOMXPath($doc);
-        $anchors = $xPath->query('//a[@href]');
+        $doc = new HtmlDocument($html);
+        $anchors = $doc->queryXPath('//a[@href]');
  
          /** @var \DOMElement $anchor */
          foreach ($anchors as $anchor) {
diff --git a/app/References/ReferenceUpdater.php b/app/References/ReferenceUpdater.php

index 2f7b70a87ec0884aa3fd6dc4dd38db45e557af0e..82505e8ab89081b9fb528f050249795d980b9164 100644 (file)
--- a/app/References/ReferenceUpdater.php
+++ b/app/References/ReferenceUpdater.php
@@ -6,18 +6,14 @@ use BookStack\Entities\Models\Book;
  use BookStack\Entities\Models\Entity;
  use BookStack\Entities\Models\Page;
  use BookStack\Entities\Repos\RevisionRepo;
-use DOMDocument;
-use DOMXPath;
+use BookStack\Util\HtmlDocument;
  
  class ReferenceUpdater
  {
-    protected ReferenceFetcher $referenceFetcher;
-    protected RevisionRepo $revisionRepo;
-
-    public function __construct(ReferenceFetcher $referenceFetcher, RevisionRepo $revisionRepo)
-    {
-        $this->referenceFetcher = $referenceFetcher;
-        $this->revisionRepo = $revisionRepo;
+    public function __construct(
+        protected ReferenceFetcher $referenceFetcher,
+        protected RevisionRepo $revisionRepo
+    ) {
      }
  
      public function updateEntityPageReferences(Entity $entity, string $oldLink)
@@ -96,13 +92,8 @@ class ReferenceUpdater
              return $html;
          }
  
-        $html = '<body>' . $html . '</body>';
-        libxml_use_internal_errors(true);
-        $doc = new DOMDocument();
-        $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
-
-        $xPath = new DOMXPath($doc);
-        $anchors = $xPath->query('//a[@href]');
+        $doc = new HtmlDocument($html);
+        $anchors = $doc->queryXPath('//a[@href]');
  
          /** @var \DOMElement $anchor */
          foreach ($anchors as $anchor) {
@@ -111,12 +102,6 @@ class ReferenceUpdater
              $anchor->setAttribute('href', $updated);
          }
  
-        $html = '';
-        $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
-        foreach ($topElems as $child) {
-            $html .= $doc->saveHTML($child);
-        }
-
-        return $html;
+        return $doc->getBodyInnerHtml();
      }
  }
diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php

index e882e987b98866e73e28dfa28cbcd8e5eed11bf4..d9fc4e7aadce9aabc3b8b7aa70e17f6edf8d5af3 100644 (file)
--- a/app/Search/SearchIndex.php
+++ b/app/Search/SearchIndex.php
@@ -6,7 +6,7 @@ use BookStack\Activity\Models\Tag;
  use BookStack\Entities\EntityProvider;
  use BookStack\Entities\Models\Entity;
  use BookStack\Entities\Models\Page;
-use DOMDocument;
+use BookStack\Util\HtmlDocument;
  use DOMNode;
  use Illuminate\Database\Eloquent\Builder;
  use Illuminate\Support\Collection;
@@ -138,16 +138,11 @@ class SearchIndex
              'h6' => 1.5,
          ];
  
-        $html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
          $html = str_ireplace(['<br>', '<br />', '<br/>'], "\n", $html);
+        $doc = new HtmlDocument($html);
  
-        libxml_use_internal_errors(true);
-        $doc = new DOMDocument();
-        $doc->loadHTML($html);
-
-        $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
          /** @var DOMNode $child */
-        foreach ($topElems as $child) {
+        foreach ($doc->getBodyChildren() as $child) {
              $nodeName = $child->nodeName;
              $termCounts = $this->textToTermCountMap(trim($child->textContent));
              foreach ($termCounts as $term => $count) {
@@ -168,7 +163,6 @@ class SearchIndex
       */
      protected function generateTermScoreMapFromTags(array $tags): array
      {
-        $scoreMap = [];
          $names = [];
          $values = [];
  
diff --git a/app/Util/HtmlContentFilter.php b/app/Util/HtmlContentFilter.php

index e51f512bd1fe762d70928203f30be5efca1bbf5f..2dbb340860f830585b96e507db49f494b9d450bc 100644 (file)
--- a/app/Util/HtmlContentFilter.php
+++ b/app/Util/HtmlContentFilter.php
@@ -3,10 +3,8 @@
  namespace BookStack\Util;
  
  use DOMAttr;
-use DOMDocument;
  use DOMElement;
  use DOMNodeList;
-use DOMXPath;
  
  class HtmlContentFilter
  {
@@ -19,54 +17,44 @@ class HtmlContentFilter
              return $html;
          }
  
-        $html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
-        libxml_use_internal_errors(true);
-        $doc = new DOMDocument();
-        $doc->loadHTML($html);
-        $xPath = new DOMXPath($doc);
+        $doc = new HtmlDocument($html);
  
          // Remove standard script tags
-        $scriptElems = $xPath->query('//script');
+        $scriptElems = $doc->queryXPath('//script');
          static::removeNodes($scriptElems);
  
          // Remove clickable links to JavaScript URI
-        $badLinks = $xPath->query('//*[' . static::xpathContains('@href', 'javascript:') . ']');
+        $badLinks = $doc->queryXPath('//*[' . static::xpathContains('@href', 'javascript:') . ']');
          static::removeNodes($badLinks);
  
          // Remove forms with calls to JavaScript URI
-        $badForms = $xPath->query('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']');
+        $badForms = $doc->queryXPath('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']');
          static::removeNodes($badForms);
  
          // Remove meta tag to prevent external redirects
-        $metaTags = $xPath->query('//meta[' . static::xpathContains('@content', 'url') . ']');
+        $metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']');
          static::removeNodes($metaTags);
  
          // Remove data or JavaScript iFrames
-        $badIframes = $xPath->query('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]');
+        $badIframes = $doc->queryXPath('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]');
          static::removeNodes($badIframes);
  
          // Remove attributes, within svg children, hiding JavaScript or data uris.
          // A bunch of svg element and attribute combinations expose xss possibilities.
          // For example, SVG animate tag can exploit javascript in values.
-        $badValuesAttrs = $xPath->query('//svg//@*[' . static::xpathContains('.', 'data:') . '] | //svg//@*[' . static::xpathContains('.', 'javascript:') . ']');
+        $badValuesAttrs = $doc->queryXPath('//svg//@*[' . static::xpathContains('.', 'data:') . '] | //svg//@*[' . static::xpathContains('.', 'javascript:') . ']');
          static::removeAttributes($badValuesAttrs);
  
          // Remove elements with a xlink:href attribute
          // Used in SVG but deprecated anyway, so we'll be a bit more heavy-handed here.
-        $xlinkHrefAttributes = $xPath->query('//@*[contains(name(), \'xlink:href\')]');
+        $xlinkHrefAttributes = $doc->queryXPath('//@*[contains(name(), \'xlink:href\')]');
          static::removeAttributes($xlinkHrefAttributes);
  
          // Remove 'on*' attributes
-        $onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]');
+        $onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]');
          static::removeAttributes($onAttributes);
  
-        $html = '';
-        $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
-        foreach ($topElems as $child) {
-            $html .= $doc->saveHTML($child);
-        }
-
-        return $html;
+        return $doc->getBodyInnerHtml();
      }
  
      /**
diff --git a/app/Util/HtmlDocument.php b/app/Util/HtmlDocument.php

new file mode 100644 (file)

index 0000000..f0e12d5
--- /dev/null
+++ b/app/Util/HtmlDocument.php
@@ -0,0 +1,152 @@
+<?php
+
+namespace BookStack\Util;
+
+use DOMDocument;
+use DOMElement;
+use DOMNode;
+use DOMNodeList;
+use DOMXPath;
+
+/**
+ * HtmlDocument is a thin wrapper around DOMDocument built
+ * specifically for loading, querying and generating HTML content.
+ */
+class HtmlDocument
+{
+    protected DOMDocument $document;
+    protected ?DOMXPath $xpath = null;
+    protected int $loadOptions;
+
+    public function __construct(string $partialHtml = '', int $loadOptions = 0)
+    {
+        libxml_use_internal_errors(true);
+        $this->document = new DOMDocument();
+        $this->loadOptions = $loadOptions;
+
+        if ($partialHtml) {
+            $this->loadPartialHtml($partialHtml);
+        }
+    }
+
+    /**
+     * Load some HTML content that's part of a document (e.g. body content)
+     * into the current document.
+     */
+    public function loadPartialHtml(string $html): void
+    {
+        $html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
+        $this->document->loadHTML($html, $this->loadOptions);
+        $this->xpath = null;
+    }
+
+    /**
+     * Load a complete page of HTML content into the document.
+     */
+    public function loadCompleteHtml(string $html): void
+    {
+        $html = '<?xml encoding="utf-8" ?>' . $html;
+        $this->document->loadHTML($html, $this->loadOptions);
+        $this->xpath = null;
+    }
+
+    /**
+     * Start an XPath query on the current document.
+     */
+    public function queryXPath(string $expression): DOMNodeList
+    {
+        if (is_null($this->xpath)) {
+            $this->xpath = new DOMXPath($this->document);
+        }
+
+        $result = $this->xpath->query($expression);
+        if ($result === false) {
+            throw new \InvalidArgumentException("XPath query for expression [$expression] failed to execute");
+        }
+
+        return $result;
+    }
+
+    /**
+     * Create a new DOMElement instance within the document.
+     */
+    public function createElement(string $localName, string $value = ''): DOMElement
+    {
+        $element = $this->document->createElement($localName, $value);
+
+        if ($element === false) {
+            throw new \InvalidArgumentException("Failed to create element of name [$localName] and value [$value]");
+        }
+
+        return $element;
+    }
+
+    /**
+     * Get an element within the document of the given ID.
+     */
+    public function getElementById(string $elementId): ?DOMElement
+    {
+        return $this->document->getElementById($elementId);
+    }
+
+    /**
+     * Get the DOMNode that represents the HTML body.
+     */
+    public function getBody(): DOMNode
+    {
+        return $this->document->getElementsByTagName('body')[0];
+    }
+
+    /**
+     * Get the nodes that are a direct child of the body.
+     * This is usually all the content nodes if loaded partially.
+     */
+    public function getBodyChildren(): DOMNodeList
+    {
+        return $this->getBody()->childNodes;
+    }
+
+    /**
+     * Get the inner HTML content of the body.
+     * This is usually all the content if loaded partially.
+     */
+    public function getBodyInnerHtml(): string
+    {
+        $html = '';
+        foreach ($this->getBodyChildren() as $child) {
+            $html .= $this->document->saveHTML($child);
+        }
+
+        return $html;
+    }
+
+    /**
+     * Get the HTML content of the whole document.
+     */
+    public function getHtml(): string
+    {
+        return $this->document->saveHTML();
+    }
+
+    /**
+     * Get the inner HTML for the given node.
+     */
+    public function getNodeInnerHtml(DOMNode $node): string
+    {
+        $html = '';
+
+        foreach ($node->childNodes as $childNode) {
+            $html .= $this->document->saveHTML($childNode);
+        }
+
+        return $html;
+    }
+
+    /**
+     * Get the outer HTML for the given node.
+     */
+    public function getNodeOuterHtml(DOMNode $node): string
+    {
+        return $this->document->saveHTML($node);
+    }
+}
diff --git a/app/Util/HtmlNonceApplicator.php b/app/Util/HtmlNonceApplicator.php

index 07298577cf564f4cd4f39b625cddd560770e64aa..3a798e8487c0b5ff4f30186a1eb296c5cbf292cc 100644 (file)
--- a/app/Util/HtmlNonceApplicator.php
+++ b/app/Util/HtmlNonceApplicator.php
@@ -2,14 +2,12 @@
  
  namespace BookStack\Util;
  
-use DOMDocument;
  use DOMElement;
  use DOMNodeList;
-use DOMXPath;
  
  class HtmlNonceApplicator
  {
-    protected static $placeholder = '[CSP_NONCE_VALUE]';
+    protected static string $placeholder = '[CSP_NONCE_VALUE]';
  
      /**
       * Prepare the given HTML content with nonce attributes including a placeholder
@@ -21,28 +19,20 @@ class HtmlNonceApplicator
              return $html;
          }
  
-        $html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
-        libxml_use_internal_errors(true);
-        $doc = new DOMDocument();
-        $doc->loadHTML($html, LIBXML_SCHEMA_CREATE);
-        $xPath = new DOMXPath($doc);
+        // LIBXML_SCHEMA_CREATE was found to be required here otherwise
+        // the PHP DOMDocument handling will attempt to format/close
+        // HTML tags within scripts and therefore change JS content.
+        $doc = new HtmlDocument($html, LIBXML_SCHEMA_CREATE);
  
          // Apply to scripts
-        $scriptElems = $xPath->query('//script');
+        $scriptElems = $doc->queryXPath('//script');
          static::addNonceAttributes($scriptElems, static::$placeholder);
  
          // Apply to styles
-        $styleElems = $xPath->query('//style');
+        $styleElems = $doc->queryXPath('//style');
          static::addNonceAttributes($styleElems, static::$placeholder);
  
-        $returnHtml = '';
-        $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
-        foreach ($topElems as $child) {
-            $content = $doc->saveHTML($child);
-            $returnHtml .= $content;
-        }
-
-        return $returnHtml;
+        return $doc->getBodyInnerHtml();
      }
  
      /**
author	Dan Brown <redacted>
	Tue, 14 Nov 2023 15:46:32 +0000 (15:46 +0000)
committer	Dan Brown <redacted>
	Tue, 14 Nov 2023 15:46:32 +0000 (15:46 +0000)
app/Entities/Tools/ExportFormatter.php		patch \| blob \| history
app/Entities/Tools/PageContent.php		patch \| blob \| history
app/References/CrossLinkParser.php		patch \| blob \| history
app/References/ReferenceUpdater.php		patch \| blob \| history
app/Search/SearchIndex.php		patch \| blob \| history
app/Util/HtmlContentFilter.php		patch \| blob \| history
app/Util/HtmlDocument.php	[new file with mode: 0644]	patch \| blob
app/Util/HtmlNonceApplicator.php		patch \| blob \| history