]> BookStack Code Mirror - bookstack/blobdiff - app/Entities/Tools/SearchIndex.php
Fixed occurances of altered titles in search results
[bookstack] / app / Entities / Tools / SearchIndex.php
index bde5ef8606c79bc6ec40a80c4f2cd71c21802150..d748c1695d46369c314eb6f6f121b0bc9dce3649 100644 (file)
@@ -2,6 +2,7 @@
 
 namespace BookStack\Entities\Tools;
 
+use BookStack\Actions\Tag;
 use BookStack\Entities\EntityProvider;
 use BookStack\Entities\Models\Entity;
 use BookStack\Entities\Models\Page;
@@ -12,6 +13,12 @@ use Illuminate\Support\Collection;
 
 class SearchIndex
 {
+    /**
+     * A list of delimiter characters used to break-up parsed content into terms for indexing.
+     *
+     * @var string
+     */
+    public static $delimiters = " \n\t.,!?:;()[]{}<>`'\"";
 
     /**
      * @var EntityProvider
@@ -84,6 +91,7 @@ class SearchIndex
 
             $entityModel->newQuery()
                 ->select($selectFields)
+                ->with(['tags:id,name,value,entity_id,entity_type'])
                 ->chunk($chunkSize, $chunkCallback);
         }
     }
@@ -154,6 +162,30 @@ class SearchIndex
         return $scoresByTerm;
     }
 
+    /**
+     * Create a scored term map from the given set of entity tags.
+     *
+     * @param Tag[] $tags
+     *
+     * @returns array<string, int>
+     */
+    protected function generateTermScoreMapFromTags(array $tags): array
+    {
+        $scoreMap = [];
+        $names = [];
+        $values = [];
+
+        foreach ($tags as $tag) {
+            $names[] = $tag->name;
+            $values[] = $tag->value;
+        }
+
+        $nameMap = $this->generateTermScoreMapFromText(implode(' ', $names), 3);
+        $valueMap = $this->generateTermScoreMapFromText(implode(' ', $values), 5);
+
+        return $this->mergeTermScoreMaps($nameMap, $valueMap);
+    }
+
     /**
      * For the given text, return an array where the keys are the unique term words
      * and the values are the frequency of that term.
@@ -163,7 +195,7 @@ class SearchIndex
     protected function textToTermCountMap(string $text): array
     {
         $tokenMap = []; // {TextToken => OccurrenceCount}
-        $splitChars = " \n\t.,!?:;()[]{}<>`'\"";
+        $splitChars = static::$delimiters;
         $token = strtok($text, $splitChars);
 
         while ($token !== false) {
@@ -186,31 +218,31 @@ class SearchIndex
     protected function entityToTermDataArray(Entity $entity): array
     {
         $nameTermsMap = $this->generateTermScoreMapFromText($entity->name, 40 * $entity->searchFactor);
+        $tagTermsMap = $this->generateTermScoreMapFromTags($entity->tags->all());
 
         if ($entity instanceof Page) {
             $bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html);
         } else {
-            $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description, $entity->searchFactor);
+            $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description ?? '', $entity->searchFactor);
         }
 
-        $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap);
+        $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap, $tagTermsMap);
 
         $dataArray = [];
         $entityId = $entity->id;
         $entityType = $entity->getMorphClass();
         foreach ($mergedScoreMap as $term => $score) {
             $dataArray[] = [
-                'term' => $term,
-                'score' => $score,
+                'term'        => $term,
+                'score'       => $score,
                 'entity_type' => $entityType,
-                'entity_id' => $entityId,
+                'entity_id'   => $entityId,
             ];
         }
 
         return $dataArray;
     }
 
-
     /**
      * For the given term data arrays, Merge their contents by term
      * while combining any scores.