Fix encoding problem.
By default DOMDocument::loadHTML treats a string as being encoded with ISO-8859-1. This causes a problem with saving cyrillic pages' text that becomes completely unreadable (like Ð\9fÑ\80овеÑ\80ка instead of normal symbols).
if($htmlText == '') return $htmlText;
libxml_use_internal_errors(true);
$doc = new \DOMDocument();
- $doc->loadHTML($htmlText);
+ $doc->loadHTML(mb_convert_encoding($htmlText, 'HTML-ENTITIES', 'UTF-8'));
$container = $doc->documentElement;
$body = $container->childNodes->item(0);
}
-}
\ No newline at end of file
+}