]> BookStack Code Mirror - bookstack/commitdiff
Search Index: Fixed SQL error when indexing large pages
authorDan Brown <redacted>
Wed, 11 Dec 2024 15:53:57 +0000 (15:53 +0000)
committerDan Brown <redacted>
Wed, 11 Dec 2024 15:55:19 +0000 (15:55 +0000)
Due to hitting statement placeholder limits (typically 65k)
when inserting index terms for single page.

Added test to cover.
Also added skipped tests for tests we don't always want to run.
For #5322

app/Search/SearchIndex.php
tests/Entity/EntitySearchTest.php
tests/LanguageTest.php

index d9fc4e7aadce9aabc3b8b7aa70e17f6edf8d5af3..c7d9d6502e272ed4edae6b2cf6d402b7ea43464b 100644 (file)
@@ -30,7 +30,7 @@ class SearchIndex
     {
         $this->deleteEntityTerms($entity);
         $terms = $this->entityToTermDataArray($entity);
-        SearchTerm::query()->insert($terms);
+        $this->insertTerms($terms);
     }
 
     /**
@@ -46,10 +46,7 @@ class SearchIndex
             array_push($terms, ...$entityTerms);
         }
 
-        $chunkedTerms = array_chunk($terms, 500);
-        foreach ($chunkedTerms as $termChunk) {
-            SearchTerm::query()->insert($termChunk);
-        }
+        $this->insertTerms($terms);
     }
 
     /**
@@ -99,6 +96,19 @@ class SearchIndex
         $entity->searchTerms()->delete();
     }
 
+    /**
+     * Insert the given terms into the database.
+     * Chunks through the given terms to remain within database limits.
+     * @param array[] $terms
+     */
+    protected function insertTerms(array $terms): void
+    {
+        $chunkedTerms = array_chunk($terms, 500);
+        foreach ($chunkedTerms as $termChunk) {
+            SearchTerm::query()->insert($termChunk);
+        }
+    }
+
     /**
      * Create a scored term array from the given text, where the keys are the terms
      * and the values are their scores.
index cabf23bd3d33fa7a4cd531dd61adbe396334f2c8..5ace70e3ab2835c7bdebe6a05d2239eec6c762c3 100644 (file)
@@ -6,6 +6,7 @@ use BookStack\Activity\Models\Tag;
 use BookStack\Entities\Models\Book;
 use BookStack\Entities\Models\Bookshelf;
 use BookStack\Entities\Models\Chapter;
+use Illuminate\Support\Str;
 use Tests\TestCase;
 
 class EntitySearchTest extends TestCase
@@ -477,6 +478,25 @@ class EntitySearchTest extends TestCase
         $this->assertEquals(2, $scoreByTerm->get('TermG'));
     }
 
+    public function test_indexing_works_as_expected_for_page_with_lots_of_terms()
+    {
+        $this->markTestSkipped('Time consuming test');
+
+        $count = 100000;
+        $text = '';
+        $chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_#';
+        for ($i = 0; $i < $count; $i++) {
+            $text .= substr(str_shuffle($chars), 0, 5) . ' ';
+        }
+
+        $page = $this->entities->newPage(['name' => 'Test page A', 'html' => '<p>' . $text . '</p>']);
+
+        $termCount = $page->searchTerms()->count();
+
+        // Expect at least 90% unique rate
+        $this->assertGreaterThan($count * 0.9, $termCount);
+    }
+
     public function test_name_and_content_terms_are_merged_to_single_score()
     {
         $page = $this->entities->newPage(['name' => 'TermA', 'html' => '
index abe06407e1ef7c636234fc14df50f4aab41cde39..28491c3af9d3b112e359dddc4ce4d0f809e3bdd2 100644 (file)
@@ -27,8 +27,10 @@ class LanguageTest extends TestCase
     }
 
     // Not part of standard phpunit test runs since we sometimes expect non-added langs.
-    public function do_test_locales_all_have_language_dropdown_entry()
+    public function test_locales_all_have_language_dropdown_entry()
     {
+        $this->markTestSkipped('Only used when checking language inclusion');
+
         $dropdownLocales = array_keys(trans('settings.language_select', [], 'en'));
         sort($dropdownLocales);
         sort($this->langs);