+<?php
+
+namespace BookStack\Util;
+
+use DOMAttr;
+use DOMElement;
+use DOMNamedNodeMap;
+use DOMNode;
+
+/**
+ * Filter to ensure HTML input for description content remains simple and
+ * to a limited allow-list of elements and attributes.
+ * More for consistency and to prevent nuisance rather than for security
+ * (which would be done via a separate content filter and CSP).
+ */
+class HtmlDescriptionFilter
+{
+ /**
+ * @var array<string, string[]>
+ */
+ protected static array $allowedAttrsByElements = [
+ 'p' => [],
+ 'a' => ['href', 'title'],
+ 'ol' => [],
+ 'ul' => [],
+ 'li' => [],
+ 'strong' => [],
+ 'em' => [],
+ 'br' => [],
+ ];
+
+ public static function filterFromString(string $html): string
+ {
+ $doc = new HtmlDocument($html);
+
+ $topLevel = [...$doc->getBodyChildren()];
+ foreach ($topLevel as $child) {
+ /** @var DOMNode $child */
+ if ($child instanceof DOMElement) {
+ static::filterElement($child);
+ } else {
+ $child->parentNode->removeChild($child);
+ }
+ }
+
+ return $doc->getBodyInnerHtml();
+ }
+
+ protected static function filterElement(DOMElement $element): void
+ {
+ $elType = strtolower($element->tagName);
+ $allowedAttrs = static::$allowedAttrsByElements[$elType] ?? null;
+ if (is_null($allowedAttrs)) {
+ $element->remove();
+ return;
+ }
+
+ /** @var DOMNamedNodeMap $attrs */
+ $attrs = $element->attributes;
+ for ($i = $attrs->length - 1; $i >= 0; $i--) {
+ /** @var DOMAttr $attr */
+ $attr = $attrs->item($i);
+ $name = strtolower($attr->name);
+ if (!in_array($name, $allowedAttrs)) {
+ $element->removeAttribute($attr->name);
+ }
+ }
+
+ foreach ($element->childNodes as $child) {
+ if ($child instanceof DOMElement) {
+ static::filterElement($child);
+ }
+ }
+ }
+}