]> BookStack Code Mirror - bookstack/commitdiff
Updated search indexer to split words better
authorDan Brown <redacted>
Sat, 30 Sep 2017 13:14:23 +0000 (14:14 +0100)
committerDan Brown <redacted>
Sat, 30 Sep 2017 13:14:23 +0000 (14:14 +0100)
Will now split up words based on more chars than just spaces.
Not takes into account newlines, tabs, periods & commas.

Fixed #531

app/Services/SearchService.php

index bb92a1d7c83cfcaba5f7eedd435865f44ac2a1a3..aebeace1eeb06f6a067be51bc827baece48a7c88 100644 (file)
@@ -382,11 +382,13 @@ class SearchService
     protected function generateTermArrayFromText($text, $scoreAdjustment = 1)
     {
         $tokenMap = []; // {TextToken => OccurrenceCount}
-        $splitText = explode(' ', $text);
-        foreach ($splitText as $token) {
-            if ($token === '') continue;
+        $splitChars = " \n\t.,";
+        $token = strtok($text, $splitChars);
+
+        while ($token !== false) {
             if (!isset($tokenMap[$token])) $tokenMap[$token] = 0;
             $tokenMap[$token]++;
+            $token = strtok($splitChars);
         }
 
         $terms = [];