]> BookStack Code Mirror - bookstack/commitdiff
Search: Updated indexer to handle non-breaking-spaces
authorDan Brown <redacted>
Tue, 17 Jun 2025 12:59:28 +0000 (13:59 +0100)
committerDan Brown <redacted>
Tue, 17 Jun 2025 13:00:13 +0000 (14:00 +0100)
Related to #5640

app/Search/SearchIndex.php
tests/Search/SearchIndexingTest.php

index 36f71f6ccc7759cd09d3de29e8464a187c1507a6..844e3584b209cd49442d91066e535f4d66e293e4 100644 (file)
@@ -160,7 +160,9 @@ class SearchIndex
         /** @var DOMNode $child */
         foreach ($doc->getBodyChildren() as $child) {
             $nodeName = $child->nodeName;
-            $termCounts = $this->textToTermCountMap(trim($child->textContent));
+            $text = trim($child->textContent);
+            $text = str_replace("\u{00A0}", ' ', $text);
+            $termCounts = $this->textToTermCountMap($text);
             foreach ($termCounts as $term => $count) {
                 $scoreChange = $count * ($elementScoreAdjustmentMap[$nodeName] ?? 1);
                 $scoresByTerm[$term] = ($scoresByTerm[$term] ?? 0) + $scoreChange;
index 64779dec6327283c42619e9f59fc68263ccf2fc3..d2bbb2905b457bfbb09ee1730e16d6a804ae6186 100644 (file)
@@ -106,4 +106,14 @@ class SearchIndexingTest extends TestCase
             $this->assertNull($scoreByTerm->get($term), "Failed asserting that \"$term\" is not indexed");
         }
     }
+
+    public function test_non_breaking_spaces_handled_as_spaces()
+    {
+        $page = $this->entities->newPage(['html' => '<p>a&nbsp;tigerbadger is a dangerous&nbsp;animal</p>']);
+
+        $scoreByTerm = $page->searchTerms()->pluck('score', 'term');
+        $this->assertNotNull($scoreByTerm->get('tigerbadger'));
+        $this->assertNotNull($scoreByTerm->get('dangerous'));
+        $this->assertNotNull($scoreByTerm->get('animal'));
+    }
 }