]> BookStack Code Mirror - bookstack/commitdiff
Fixed issue where text after line breaks not indexed
authorDan Brown <redacted>
Mon, 20 Jun 2022 22:47:42 +0000 (23:47 +0100)
committerDan Brown <redacted>
Mon, 20 Jun 2022 22:47:42 +0000 (23:47 +0100)
Linebreaks would previously essentially be removed during index and
hence joined to adjacent words, breaking prefix matching.
Added test to cover.
For #3508

app/Entities/Tools/SearchIndex.php
tests/Entity/EntitySearchTest.php

index d43d982079d88262280dbe9c70b40d11632d0577..db44daadfb931122e2a449c9ab3078bf1974877d 100644 (file)
@@ -147,6 +147,8 @@ class SearchIndex
         ];
 
         $html = '<body>' . $html . '</body>';
+        $html = str_ireplace(['<br>', '<br />', '<br/>'], "\n", $html);
+
         libxml_use_internal_errors(true);
         $doc = new DOMDocument();
         $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
index ab5777e98981e9ec9c7e77db546b48b47ec07c47..b535f5aaa7e177d08efb4717c5c761410aa1732d 100644 (file)
@@ -423,6 +423,17 @@ class EntitySearchTest extends TestCase
         $search->assertSee('My supercool &lt;great&gt; <strong>TestPageContent</strong> page', false);
     }
 
+    public function test_words_adjacent_to_lines_breaks_can_be_matched_with_normal_terms()
+    {
+        $page = $this->newPage(['name' => 'TermA', 'html' => '
+            <p>TermA<br>TermB<br>TermC</p>
+        ']);
+
+        $search = $this->asEditor()->get('/search?term=' . urlencode('TermB TermC'));
+
+        $search->assertSee($page->getUrl(), false);
+    }
+
     public function test_searches_with_user_filters_adds_them_into_advanced_search_form()
     {
         $resp = $this->asEditor()->get('/search?term=' . urlencode('test {updated_by:me} {created_by:dan}'));