]> BookStack Code Mirror - bookstack/blob - app/Util/HtmlContentFilter.php
Added deletion of revisions on page delete
[bookstack] / app / Util / HtmlContentFilter.php
1 <?php namespace BookStack\Util;
2
3 use DOMDocument;
4 use DOMNode;
5 use DOMNodeList;
6 use DOMXPath;
7
8 class HtmlContentFilter
9 {
10     /**
11      * Remove all of the script elements from the given HTML.
12      */
13     public static function removeScripts(string $html): string
14     {
15         if (empty($html)) {
16             return $html;
17         }
18
19         libxml_use_internal_errors(true);
20         $doc = new DOMDocument();
21         $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
22         $xPath = new DOMXPath($doc);
23
24         // Remove standard script tags
25         $scriptElems = $xPath->query('//script');
26         static::removeNodes($scriptElems);
27
28         // Remove clickable links to JavaScript URI
29         $badLinks = $xPath->query('//*[contains(@href, \'javascript:\')]');
30         static::removeNodes($badLinks);
31
32         // Remove forms with calls to JavaScript URI
33         $badForms = $xPath->query('//*[contains(@action, \'javascript:\')] | //*[contains(@formaction, \'javascript:\')]');
34         static::removeNodes($badForms);
35
36         // Remove meta tag to prevent external redirects
37         $metaTags = $xPath->query('//meta[contains(@content, \'url\')]');
38         static::removeNodes($metaTags);
39
40         // Remove data or JavaScript iFrames
41         $badIframes = $xPath->query('//*[contains(@src, \'data:\')] | //*[contains(@src, \'javascript:\')] | //*[@srcdoc]');
42         static::removeNodes($badIframes);
43
44         // Remove 'on*' attributes
45         $onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]');
46         foreach ($onAttributes as $attr) {
47             /** @var \DOMAttr $attr*/
48             $attrName = $attr->nodeName;
49             $attr->parentNode->removeAttribute($attrName);
50         }
51
52         $html = '';
53         $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
54         foreach ($topElems as $child) {
55             $html .= $doc->saveHTML($child);
56         }
57
58         return $html;
59     }
60
61     /**
62      * Removed all of the given DOMNodes.
63      */
64     static protected function removeNodes(DOMNodeList $nodes): void
65     {
66         foreach ($nodes as $node) {
67             $node->parentNode->removeChild($node);
68         }
69     }
70
71 }