3 namespace BookStack\Util;
9 class HtmlContentFilter
12 * Remove all the script elements from the given HTML document.
14 public static function removeScriptsFromDocument(HtmlDocument $doc)
16 // Remove standard script tags
17 $scriptElems = $doc->queryXPath('//script');
18 static::removeNodes($scriptElems);
20 // Remove clickable links to JavaScript URI
21 $badLinks = $doc->queryXPath('//*[' . static::xpathContains('@href', 'javascript:') . ']');
22 static::removeNodes($badLinks);
24 // Remove forms with calls to JavaScript URI
25 $badForms = $doc->queryXPath('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']');
26 static::removeNodes($badForms);
28 // Remove meta tag to prevent external redirects
29 $metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']');
30 static::removeNodes($metaTags);
32 // Remove data or JavaScript iFrames
33 $badIframes = $doc->queryXPath('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]');
34 static::removeNodes($badIframes);
36 // Remove attributes, within svg children, hiding JavaScript or data uris.
37 // A bunch of svg element and attribute combinations expose xss possibilities.
38 // For example, SVG animate tag can exploit javascript in values.
39 $badValuesAttrs = $doc->queryXPath('//svg//@*[' . static::xpathContains('.', 'data:') . '] | //svg//@*[' . static::xpathContains('.', 'javascript:') . ']');
40 static::removeAttributes($badValuesAttrs);
42 // Remove elements with a xlink:href attribute
43 // Used in SVG but deprecated anyway, so we'll be a bit more heavy-handed here.
44 $xlinkHrefAttributes = $doc->queryXPath('//@*[contains(name(), \'xlink:href\')]');
45 static::removeAttributes($xlinkHrefAttributes);
47 // Remove 'on*' attributes
48 $onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]');
49 static::removeAttributes($onAttributes);
53 * Remove scripts from the given HTML string.
55 public static function removeScriptsFromHtmlString(string $html): string
61 $doc = new HtmlDocument($html);
62 static::removeScriptsFromDocument($doc);
64 return $doc->getBodyInnerHtml();
68 * Create a xpath contains statement with a translation automatically built within
69 * to affectively search in a cases-insensitive manner.
71 protected static function xpathContains(string $property, string $value): string
73 $value = strtolower($value);
74 $upperVal = strtoupper($value);
76 return 'contains(translate(' . $property . ', \'' . $upperVal . '\', \'' . $value . '\'), \'' . $value . '\')';
80 * Remove all the given DOMNodes.
82 protected static function removeNodes(DOMNodeList $nodes): void
84 foreach ($nodes as $node) {
85 $node->parentNode->removeChild($node);
90 * Remove all the given attribute nodes.
92 protected static function removeAttributes(DOMNodeList $attrs): void
94 /** @var DOMAttr $attr */
95 foreach ($attrs as $attr) {
96 $attrName = $attr->nodeName;
97 /** @var DOMElement $parentNode */
98 $parentNode = $attr->parentNode;
99 $parentNode->removeAttribute($attrName);