3 namespace BookStack\Util;
9 class HtmlContentFilter
12 * Remove all of the script elements from the given HTML.
14 public static function removeScripts(string $html): string
20 $html = '<body>' . $html . '</body>';
21 libxml_use_internal_errors(true);
22 $doc = new DOMDocument();
23 $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
24 $xPath = new DOMXPath($doc);
26 // Remove standard script tags
27 $scriptElems = $xPath->query('//script');
28 static::removeNodes($scriptElems);
30 // Remove clickable links to JavaScript URI
31 $badLinks = $xPath->query('//*[' . static::xpathContains('@href', 'javascript:') . ']');
32 static::removeNodes($badLinks);
34 // Remove forms with calls to JavaScript URI
35 $badForms = $xPath->query('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']');
36 static::removeNodes($badForms);
38 // Remove meta tag to prevent external redirects
39 $metaTags = $xPath->query('//meta[' . static::xpathContains('@content', 'url') . ']');
40 static::removeNodes($metaTags);
42 // Remove data or JavaScript iFrames
43 $badIframes = $xPath->query('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]');
44 static::removeNodes($badIframes);
46 // Remove 'on*' attributes
47 $onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]');
48 foreach ($onAttributes as $attr) {
49 /** @var \DOMAttr $attr */
50 $attrName = $attr->nodeName;
51 $attr->parentNode->removeAttribute($attrName);
55 $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
56 foreach ($topElems as $child) {
57 $html .= $doc->saveHTML($child);
64 * Create a xpath contains statement with a translation automatically built within
65 * to affectively search in a cases-insensitive manner.
67 protected static function xpathContains(string $property, string $value): string
69 $value = strtolower($value);
70 $upperVal = strtoupper($value);
71 return 'contains(translate(' . $property . ', \'' . $upperVal . '\', \'' . $value . '\'), \'' . $value . '\')';
75 * Removed all of the given DOMNodes.
77 protected static function removeNodes(DOMNodeList $nodes): void
79 foreach ($nodes as $node) {
80 $node->parentNode->removeChild($node);