]> BookStack Code Mirror - bookstack/commitdiff
Added search term score popularity adjustment
authorDan Brown <redacted>
Mon, 8 Nov 2021 14:12:40 +0000 (14:12 +0000)
committerDan Brown <redacted>
Mon, 8 Nov 2021 14:23:48 +0000 (14:23 +0000)
Adds adjustment of search term 'score' (Using in result ranking) so that
a relative 0.3 to 1.3 mulitplier is applied based upon relative
popularity within the whole database. At this point the term popularity
is still done via a prefix match against the search term.

Uses a SUM(IF(cond, a, IF(cond, a, ...))) chain to produce the scoring
result in the select query.

app/Entities/Tools/SearchRunner.php

index aa129eb3c8ec0134244bab3daf158ff85c803c3f..3dd0e6b6e94fe341feb2c50f58c8e9faeb8ac8f9 100644 (file)
@@ -7,13 +7,14 @@ use BookStack\Auth\User;
 use BookStack\Entities\EntityProvider;
 use BookStack\Entities\Models\Entity;
 use BookStack\Entities\Models\Page;
+use BookStack\Entities\Models\SearchTerm;
 use Illuminate\Database\Eloquent\Builder as EloquentBuilder;
 use Illuminate\Database\Eloquent\Collection as EloquentCollection;
 use Illuminate\Database\Query\Builder;
-use Illuminate\Database\Query\JoinClause;
 use Illuminate\Support\Collection;
 use Illuminate\Support\Facades\DB;
 use Illuminate\Support\Str;
+use SplObjectStorage;
 
 class SearchRunner
 {
@@ -34,10 +35,19 @@ class SearchRunner
      */
     protected $queryOperators = ['<=', '>=', '=', '<', '>', 'like', '!='];
 
+    /**
+     * Retain a cache of score adjusted terms for specific search options.
+     * From PHP>=8 this can be made into a WeakMap instead.
+     *
+     * @var SplObjectStorage
+     */
+    protected $termAdjustmentCache;
+
     public function __construct(EntityProvider $entityProvider, PermissionService $permissionService)
     {
         $this->entityProvider = $entityProvider;
         $this->permissionService = $permissionService;
+        $this->termAdjustmentCache = new SplObjectStorage();
     }
 
     /**
@@ -138,10 +148,12 @@ class SearchRunner
 
         if ($entity instanceof Page) {
             $entityQuery->select($entity::$listAttributes);
+        } else {
+            $entityQuery->select(['*']);
         }
 
         // Handle normal search terms
-        $this->applyTermSearch($entityQuery, $searchOpts->searches, $entity);
+        $this->applyTermSearch($entityQuery, $searchOpts, $entity);
 
         // Handle exact term matching
         foreach ($searchOpts->exacts as $inputTerm) {
@@ -170,33 +182,107 @@ class SearchRunner
     /**
      * For the given search query, apply the queries for handling the regular search terms.
      */
-    protected function applyTermSearch(EloquentBuilder $entityQuery, array $terms, Entity $entity): void
+    protected function applyTermSearch(EloquentBuilder $entityQuery, SearchOptions $options, Entity $entity): void
     {
+        $terms = $options->searches;
         if (count($terms) === 0) {
             return;
         }
 
+        $scoredTerms = $this->getTermAdjustments($options);
+        $scoreSelect = $this->selectForScoredTerms($scoredTerms);
+
         $subQuery = DB::table('search_terms')->select([
             'entity_id',
             'entity_type',
-            DB::raw('SUM(score) as score'),
+            DB::raw($scoreSelect['statement']),
         ]);
 
-        $subQuery->where('entity_type', '=', $entity->getMorphClass());
+        $subQuery->addBinding($scoreSelect['bindings'], 'select');
 
+        $subQuery->where('entity_type', '=', $entity->getMorphClass());
         $subQuery->where(function (Builder $query) use ($terms) {
             foreach ($terms as $inputTerm) {
                 $query->orWhere('term', 'like', $inputTerm . '%');
             }
-        })->groupBy('entity_type', 'entity_id');
+        });
+        $subQuery->groupBy('entity_type', 'entity_id');
+
+        $entityQuery->joinSub($subQuery, 's', 'id', '=', 'entity_id');
+        $entityQuery->addSelect('s.score');
+        $entityQuery->orderBy('score', 'desc');
+    }
 
-        $entityQuery->join(DB::raw('(' . $subQuery->toSql() . ') as s'), function (JoinClause $join) {
-                $join->on('id', '=', 'entity_id');
-            })
-            ->addSelect(DB::raw('s.score'))
-            ->orderBy('score', 'desc');
+    /**
+     * Create a select statement, with prepared bindings, for the given
+     * set of scored search terms.
+     * @return array{statement: string, bindings: string[]}
+     */
+    protected function selectForScoredTerms(array $scoredTerms): array
+    {
+        // Within this we walk backwards to create the chain of 'if' statements
+        // so that each previous statement is used in the 'else' condition of
+        // the next (earlier) to be built. We start at '0' to have no score
+        // on no match (Should never actually get to this case).
+        $ifChain = '0';
+        $bindings = [];
+        foreach ($scoredTerms as $term => $score) {
+            $ifChain = 'IF(term like ?, score * ' . (float)$score . ', ' . $ifChain . ')';
+            $bindings[] = $term . '%';
+        }
+
+        return [
+            'statement' => 'SUM(' . $ifChain . ') as score',
+            'bindings' => array_reverse($bindings),
+        ];
+    }
+
+    protected function getTermAdjustments(SearchOptions $options): array
+    {
+        if (isset($this->termAdjustmentCache[$options])) {
+            return $this->termAdjustmentCache[$options];
+        }
+
+        $termQuery = SearchTerm::query()->toBase();
+        $whenStatements = [];
+        $whenBindings = [];
+
+        foreach ($options->searches as $term) {
+            $whenStatements[] = 'WHEN term LIKE ? THEN ?';
+            $whenBindings[] = $term . '%';
+            $whenBindings[] = $term;
+
+            $termQuery->orWhere('term', 'like', $term . '%');
+        }
+
+        $case = 'CASE ' . implode(' ', $whenStatements) . ' END';
+        $termQuery->selectRaw( $case . ' as term', $whenBindings);
+        $termQuery->selectRaw('COUNT(*) as count');
+        $termQuery->groupByRaw($case, $whenBindings);
+
+        $termCounts = $termQuery->get()->pluck('count', 'term')->toArray();
+        $adjusted = $this->rawTermCountsToAdjustments($termCounts);
+
+        $this->termAdjustmentCache[$options] = $adjusted;
+        return $this->termAdjustmentCache[$options];
+    }
+
+    /**
+     * Convert counts of terms into a relative-count normalised multiplier.
+     * @param array<string, int> $termCounts
+     * @return array<string, int>
+     */
+    protected function rawTermCountsToAdjustments(array $termCounts): array
+    {
+        $multipliers = [];
+        $max = max(array_values($termCounts));
+
+        foreach ($termCounts as $term => $count) {
+            $percent = round($count / $max, 5);
+            $multipliers[$term] = 1.3 - $percent;
+        }
 
-        $entityQuery->mergeBindings($subQuery);
+        return $multipliers;
     }
 
     /**