From: Dan Brown Date: Mon, 20 Jun 2022 22:47:42 +0000 (+0100) Subject: Fixed issue where text after line breaks not indexed X-Git-Tag: v22.06~1^2~7 X-Git-Url: http://source.bookstackapp.com/bookstack/commitdiff_plain/0c6f598d9172a5b69968a7442b947e2a3694f053 Fixed issue where text after line breaks not indexed Linebreaks would previously essentially be removed during index and hence joined to adjacent words, breaking prefix matching. Added test to cover. For #3508 --- diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index d43d98207..db44daadf 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -147,6 +147,8 @@ class SearchIndex ]; $html = '' . $html . ''; + $html = str_ireplace(['
', '
', '
'], "\n", $html); + libxml_use_internal_errors(true); $doc = new DOMDocument(); $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index ab5777e98..b535f5aaa 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -423,6 +423,17 @@ class EntitySearchTest extends TestCase $search->assertSee('My supercool <great> TestPageContent page', false); } + public function test_words_adjacent_to_lines_breaks_can_be_matched_with_normal_terms() + { + $page = $this->newPage(['name' => 'TermA', 'html' => ' +

TermA
TermB
TermC

+ ']); + + $search = $this->asEditor()->get('/search?term=' . urlencode('TermB TermC')); + + $search->assertSee($page->getUrl(), false); + } + public function test_searches_with_user_filters_adds_them_into_advanced_search_form() { $resp = $this->asEditor()->get('/search?term=' . urlencode('test {updated_by:me} {created_by:dan}'));