- $scriptSearchRegex = '/<script.*?>.*?<\/script>/ms';
- $matches = [];
- preg_match_all($scriptSearchRegex, $html, $matches);
+ if ($html == '') {
+ return $html;
+ }
+
+ libxml_use_internal_errors(true);
+ $doc = new DOMDocument();
+ $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
+ $xPath = new DOMXPath($doc);
+
+ // Remove standard script tags
+ $scriptElems = $xPath->query('//script');
+ foreach ($scriptElems as $scriptElem) {
+ $scriptElem->parentNode->removeChild($scriptElem);
+ }
+
+ // Remove data or JavaScript iFrames
+ $badIframes = $xPath->query('//*[contains(@src, \'data:\')] | //*[contains(@src, \'javascript:\')] | //*[@srcdoc]');
+ foreach ($badIframes as $badIframe) {
+ $badIframe->parentNode->removeChild($badIframe);
+ }
+
+ // Remove 'on*' attributes
+ $onAttributes = $xPath->query('//@*[starts-with(name(), \'on\')]');
+ foreach ($onAttributes as $attr) {
+ /** @var \DOMAttr $attr*/
+ $attrName = $attr->nodeName;
+ $attr->parentNode->removeAttribute($attrName);
+ }