3 namespace BookStack\Util;
13 * HtmlDocument is a thin wrapper around DOMDocument built
14 * specifically for loading, querying and generating HTML content.
18 protected DOMDocument $document;
19 protected ?DOMXPath $xpath = null;
20 protected int $loadOptions;
22 public function __construct(string $partialHtml = '', int $loadOptions = 0)
24 libxml_use_internal_errors(true);
25 $this->document = new DOMDocument();
26 $this->loadOptions = $loadOptions;
29 $this->loadPartialHtml($partialHtml);
34 * Load some HTML content that's part of a document (e.g. body content)
35 * into the current document.
37 public function loadPartialHtml(string $html): void
39 $html = '<?xml encoding="utf-8" ?><body>' . $html . '</body>';
40 $this->document->loadHTML($html, $this->loadOptions);
45 * Load a complete page of HTML content into the document.
47 public function loadCompleteHtml(string $html): void
49 $html = '<?xml encoding="utf-8" ?>' . $html;
50 $this->document->loadHTML($html, $this->loadOptions);
55 * Start an XPath query on the current document.
57 public function queryXPath(string $expression): DOMNodeList
59 if (is_null($this->xpath)) {
60 $this->xpath = new DOMXPath($this->document);
63 $result = $this->xpath->query($expression);
64 if ($result === false) {
65 throw new \InvalidArgumentException("XPath query for expression [$expression] failed to execute");
72 * Create a new DOMElement instance within the document.
74 public function createElement(string $localName, string $value = ''): DOMElement
76 $element = $this->document->createElement($localName, $value);
78 if ($element === false) {
79 throw new \InvalidArgumentException("Failed to create element of name [$localName] and value [$value]");
86 * Create a new text node within this document.
88 public function createTextNode(string $text): DOMText
90 return $this->document->createTextNode($text);
94 * Get an element within the document of the given ID.
96 public function getElementById(string $elementId): ?DOMElement
98 return $this->document->getElementById($elementId);
102 * Get the DOMNode that represents the HTML body.
104 public function getBody(): DOMNode
106 return $this->document->getElementsByTagName('body')[0];
110 * Get the nodes that are a direct child of the body.
111 * This is usually all the content nodes if loaded partially.
113 public function getBodyChildren(): DOMNodeList
115 return $this->getBody()->childNodes;
119 * Get the inner HTML content of the body.
120 * This is usually all the content if loaded partially.
122 public function getBodyInnerHtml(): string
125 foreach ($this->getBodyChildren() as $child) {
126 $html .= $this->document->saveHTML($child);
133 * Get the HTML content of the whole document.
135 public function getHtml(): string
137 return $this->document->saveHTML($this->document->documentElement);
141 * Get the inner HTML for the given node.
143 public function getNodeInnerHtml(DOMNode $node): string
147 foreach ($node->childNodes as $childNode) {
148 $html .= $this->document->saveHTML($childNode);
155 * Get the outer HTML for the given node.
157 public function getNodeOuterHtml(DOMNode $node): string
159 return $this->document->saveHTML($node);