From: Dan Brown Date: Mon, 20 Jun 2022 22:47:42 +0000 (+0100) Subject: Fixed issue where text after line breaks not indexed X-Git-Tag: v22.06~1^2~7 X-Git-Url: http://source.bookstackapp.com/bookstack/commitdiff_plain/0c6f598d9172a5b69968a7442b947e2a3694f053?ds=inline Fixed issue where text after line breaks not indexed Linebreaks would previously essentially be removed during index and hence joined to adjacent words, breaking prefix matching. Added test to cover. For #3508 --- diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index d43d98207..db44daadf 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -147,6 +147,8 @@ class SearchIndex ]; $html = '' . $html . ''; + $html = str_ireplace(['
', '
', '
'], "\n", $html); + libxml_use_internal_errors(true); $doc = new DOMDocument(); $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index ab5777e98..b535f5aaa 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -423,6 +423,17 @@ class EntitySearchTest extends TestCase $search->assertSee('My supercool <great> TestPageContent page', false); } + public function test_words_adjacent_to_lines_breaks_can_be_matched_with_normal_terms() + { + $page = $this->newPage(['name' => 'TermA', 'html' => ' +

TermA
TermB
TermC

+ ']); + + $search = $this->asEditor()->get('/search?term=' . urlencode('TermB TermC')); + + $search->assertSee($page->getUrl(), false); + } + public function test_searches_with_user_filters_adds_them_into_advanced_search_form() { $resp = $this->asEditor()->get('/search?term=' . urlencode('test {updated_by:me} {created_by:dan}'));