1 <?php namespace BookStack\Util;
7 class HtmlContentFilter
10 * Remove all of the script elements from the given HTML.
12 public static function removeScripts(string $html): string
18 $html = '<body>' . $html . '</body>';
19 libxml_use_internal_errors(true);
20 $doc = new DOMDocument();
21 $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
22 $xPath = new DOMXPath($doc);
24 // Remove standard script tags
25 $scriptElems = $xPath->query('//script');
26 static::removeNodes($scriptElems);
28 // Remove clickable links to JavaScript URI
29 $badLinks = $xPath->query('//*[contains(@href, \'javascript:\')]');
30 static::removeNodes($badLinks);
32 // Remove forms with calls to JavaScript URI
33 $badForms = $xPath->query('//*[contains(@action, \'javascript:\')] | //*[contains(@formaction, \'javascript:\')]');
34 static::removeNodes($badForms);
36 // Remove meta tag to prevent external redirects
37 $metaTags = $xPath->query('//meta[contains(@content, \'url\')]');
38 static::removeNodes($metaTags);
40 // Remove data or JavaScript iFrames
41 $badIframes = $xPath->query('//*[contains(@src, \'data:\')] | //*[contains(@src, \'javascript:\')] | //*[@srcdoc]');
42 static::removeNodes($badIframes);
44 // Remove 'on*' attributes
45 $onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]');
46 foreach ($onAttributes as $attr) {
47 /** @var \DOMAttr $attr*/
48 $attrName = $attr->nodeName;
49 $attr->parentNode->removeAttribute($attrName);
53 $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
54 foreach ($topElems as $child) {
55 $html .= $doc->saveHTML($child);
62 * Removed all of the given DOMNodes.
64 protected static function removeNodes(DOMNodeList $nodes): void
66 foreach ($nodes as $node) {
67 $node->parentNode->removeChild($node);