+ return $doc->getBodyInnerHtml();
+ }
+
+ /**
+ * Locate include tags within the given document, isolating them to their
+ * own nodes in the DOM for future targeted manipulation.
+ * @return PageIncludeTag[]
+ */
+ protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
+ {
+ $includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
+ $includeTags = [];
+
+ /** @var DOMNode $node */
+ /** @var DOMNode $childNode */
+ foreach ($includeHosts as $node) {
+ foreach ($node->childNodes as $childNode) {
+ if ($childNode->nodeName === '#text') {
+ array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
+ }
+ }
+ }
+
+ return $includeTags;
+ }
+
+ /**
+ * Takes a text DOMNode and splits its text content at include tags
+ * into multiple text nodes within the original parent.
+ * Returns found PageIncludeTag references.
+ * @return PageIncludeTag[]
+ */
+ protected function splitTextNodesAtTags(DOMNode $textNode): array
+ {
+ $includeTags = [];
+ $text = $textNode->textContent;
+ preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
+
+ $currentOffset = 0;
+ foreach ($matches[0] as $index => $fullTagMatch) {
+ $tagOuterContent = $fullTagMatch[0];
+ $tagInnerContent = $matches[1][$index][0];
+ $tagStartOffset = $fullTagMatch[1];
+
+ if ($currentOffset < $tagStartOffset) {
+ $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
+ $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
+ }
+
+ $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
+ $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
+ $currentOffset = $tagStartOffset + strlen($tagOuterContent);
+ }
+
+ if ($currentOffset > 0) {
+ $textNode->textContent = substr($text, $currentOffset);
+ }
+
+ return $includeTags;