3 namespace BookStack\Util;
10 * Filter to ensure HTML input for description content remains simple and
11 * to a limited allow-list of elements and attributes.
12 * More for consistency and to prevent nuisance rather than for security
13 * (which would be done via a separate content filter and CSP).
15 class HtmlDescriptionFilter
18 * @var array<string, string[]>
20 protected static array $allowedAttrsByElements = [
22 'a' => ['href', 'title', 'target'],
32 public static function filterFromString(string $html): string
34 if (empty(trim($html))) {
38 $doc = new HtmlDocument($html);
40 $topLevel = [...$doc->getBodyChildren()];
41 foreach ($topLevel as $child) {
42 /** @var DOMNode $child */
43 if ($child instanceof DOMElement) {
44 static::filterElement($child);
46 $child->parentNode->removeChild($child);
50 return $doc->getBodyInnerHtml();
53 protected static function filterElement(DOMElement $element): void
55 $elType = strtolower($element->tagName);
56 $allowedAttrs = static::$allowedAttrsByElements[$elType] ?? null;
57 if (is_null($allowedAttrs)) {
62 $attrs = $element->attributes;
63 for ($i = $attrs->length - 1; $i >= 0; $i--) {
64 /** @var DOMAttr $attr */
65 $attr = $attrs->item($i);
66 $name = strtolower($attr->name);
67 if (!in_array($name, $allowedAttrs)) {
68 $element->removeAttribute($attr->name);
72 $childNodes = [...$element->childNodes];
73 foreach ($childNodes as $child) {
74 if ($child instanceof DOMElement) {
75 static::filterElement($child);