X-Git-Url: http://source.bookstackapp.com/bookstack/blobdiff_plain/f4449928f83748d015a633cdc1cef50fe822648c..refs/pull/5725/head:/app/Search/SearchIndex.php diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php index a8bd2c4b2..844e3584b 100644 --- a/app/Search/SearchIndex.php +++ b/app/Search/SearchIndex.php @@ -16,7 +16,7 @@ class SearchIndex /** * A list of delimiter characters used to break-up parsed content into terms for indexing. */ - public static string $delimiters = " \n\t.-,!?:;()[]{}<>`'\""; + public static string $delimiters = " \n\t.-,!?:;()[]{}<>`'\"«»"; /** * A list of delimiter which could be commonly used within a single term and also indicate a break between terms. @@ -160,7 +160,9 @@ class SearchIndex /** @var DOMNode $child */ foreach ($doc->getBodyChildren() as $child) { $nodeName = $child->nodeName; - $termCounts = $this->textToTermCountMap(trim($child->textContent)); + $text = trim($child->textContent); + $text = str_replace("\u{00A0}", ' ', $text); + $termCounts = $this->textToTermCountMap($text); foreach ($termCounts as $term => $count) { $scoreChange = $count * ($elementScoreAdjustmentMap[$nodeName] ?? 1); $scoresByTerm[$term] = ($scoresByTerm[$term] ?? 0) + $scoreChange;