]> BookStack Code Mirror - bookstack/blob - app/Entities/Tools/SearchIndex.php
50e471bc9885193a9703b9c497d11b06120bba86
[bookstack] / app / Entities / Tools / SearchIndex.php
1 <?php
2
3 namespace BookStack\Entities\Tools;
4
5 use BookStack\Entities\EntityProvider;
6 use BookStack\Entities\Models\Entity;
7 use BookStack\Entities\Models\SearchTerm;
8 use Illuminate\Support\Collection;
9
10 class SearchIndex
11 {
12
13     /**
14      * @var EntityProvider
15      */
16     protected $entityProvider;
17
18     public function __construct(EntityProvider $entityProvider)
19     {
20         $this->entityProvider = $entityProvider;
21     }
22
23     /**
24      * Index the given entity.
25      */
26     public function indexEntity(Entity $entity)
27     {
28         $this->deleteEntityTerms($entity);
29         $terms = $this->entityToTermDataArray($entity);
30         SearchTerm::query()->insert($terms);
31     }
32
33     /**
34      * Index multiple Entities at once.
35      *
36      * @param Entity[] $entities
37      */
38     public function indexEntities(array $entities)
39     {
40         $terms = [];
41         foreach ($entities as $entity) {
42             $entityTerms = $this->entityToTermDataArray($entity);
43             array_push($terms, ...$entityTerms);
44         }
45
46         $chunkedTerms = array_chunk($terms, 500);
47         foreach ($chunkedTerms as $termChunk) {
48             SearchTerm::query()->insert($termChunk);
49         }
50     }
51
52     /**
53      * Delete and re-index the terms for all entities in the system.
54      * Can take a callback which is used for reporting progress.
55      * Callback receives three arguments:
56      * - An instance of the model being processed
57      * - The number that have been processed so far.
58      * - The total number of that model to be processed.
59      *
60      * @param callable(Entity, int, int)|null $progressCallback
61      */
62     public function indexAllEntities(?callable $progressCallback = null)
63     {
64         SearchTerm::query()->truncate();
65
66         foreach ($this->entityProvider->all() as $entityModel) {
67             $selectFields = ['id', 'name', $entityModel->textField];
68             $total = $entityModel->newQuery()->withTrashed()->count();
69             $chunkSize = 250;
70             $processed = 0;
71
72             $chunkCallback = function (Collection $entities) use ($progressCallback, &$processed, $total, $chunkSize, $entityModel) {
73                 $this->indexEntities($entities->all());
74                 $processed = min($processed + $chunkSize, $total);
75
76                 if (is_callable($progressCallback)) {
77                     $progressCallback($entityModel, $processed, $total);
78                 }
79             };
80
81             $entityModel->newQuery()
82                 ->select($selectFields)
83                 ->chunk($chunkSize, $chunkCallback);
84         }
85     }
86
87     /**
88      * Delete related Entity search terms.
89      */
90     public function deleteEntityTerms(Entity $entity)
91     {
92         $entity->searchTerms()->delete();
93     }
94
95     /**
96      * Create a scored term array from the given text.
97      *
98      * @returns array{term: string, score: float}
99      */
100     protected function generateTermArrayFromText(string $text, int $scoreAdjustment = 1): array
101     {
102         $tokenMap = []; // {TextToken => OccurrenceCount}
103         $splitChars = " \n\t.,!?:;()[]{}<>`'\"";
104         $token = strtok($text, $splitChars);
105
106         while ($token !== false) {
107             if (!isset($tokenMap[$token])) {
108                 $tokenMap[$token] = 0;
109             }
110             $tokenMap[$token]++;
111             $token = strtok($splitChars);
112         }
113
114         $terms = [];
115         foreach ($tokenMap as $token => $count) {
116             $terms[] = [
117                 'term'  => $token,
118                 'score' => $count * $scoreAdjustment,
119             ];
120         }
121
122         return $terms;
123     }
124
125     /**
126      * For the given entity, Generate an array of term data details.
127      * Is the raw term data, not instances of SearchTerm models.
128      *
129      * @returns array{term: string, score: float}[]
130      */
131     protected function entityToTermDataArray(Entity $entity): array
132     {
133         $nameTerms = $this->generateTermArrayFromText($entity->name, 40 * $entity->searchFactor);
134         $bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor);
135         $termData = array_merge($nameTerms, $bodyTerms);
136
137         foreach ($termData as $index => $term) {
138             $termData[$index]['entity_type'] = $entity->getMorphClass();
139             $termData[$index]['entity_id'] = $entity->id;
140         }
141
142         return $termData;
143     }
144 }