X-Git-Url: http://source.bookstackapp.com/bookstack/blobdiff_plain/f28daa01d9d43d36c12b075bddca92be9e8f85e4..refs/pull/3333/head:/app/Entities/Tools/SearchIndex.php diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index bde5ef860..d43d98207 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -2,16 +2,24 @@ namespace BookStack\Entities\Tools; +use BookStack\Actions\Tag; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; use BookStack\Entities\Models\Page; use BookStack\Entities\Models\SearchTerm; use DOMDocument; use DOMNode; +use Illuminate\Database\Eloquent\Builder; use Illuminate\Support\Collection; class SearchIndex { + /** + * A list of delimiter characters used to break-up parsed content into terms for indexing. + * + * @var string + */ + public static $delimiters = " \n\t.,!?:;()[]{}<>`'\""; /** * @var EntityProvider @@ -60,7 +68,7 @@ class SearchIndex * - The number that have been processed so far. * - The total number of that model to be processed. * - * @param callable(Entity, int, int)|null $progressCallback + * @param callable(Entity, int, int):void|null $progressCallback */ public function indexAllEntities(?callable $progressCallback = null) { @@ -69,7 +77,9 @@ class SearchIndex foreach ($this->entityProvider->all() as $entityModel) { $indexContentField = $entityModel instanceof Page ? 'html' : 'description'; $selectFields = ['id', 'name', $indexContentField]; - $total = $entityModel->newQuery()->withTrashed()->count(); + /** @var Builder $query */ + $query = $entityModel->newQuery(); + $total = $query->withTrashed()->count(); $chunkSize = 250; $processed = 0; @@ -84,6 +94,7 @@ class SearchIndex $entityModel->newQuery() ->select($selectFields) + ->with(['tags:id,name,value,entity_id,entity_type']) ->chunk($chunkSize, $chunkCallback); } } @@ -154,6 +165,30 @@ class SearchIndex return $scoresByTerm; } + /** + * Create a scored term map from the given set of entity tags. + * + * @param Tag[] $tags + * + * @returns array + */ + protected function generateTermScoreMapFromTags(array $tags): array + { + $scoreMap = []; + $names = []; + $values = []; + + foreach ($tags as $tag) { + $names[] = $tag->name; + $values[] = $tag->value; + } + + $nameMap = $this->generateTermScoreMapFromText(implode(' ', $names), 3); + $valueMap = $this->generateTermScoreMapFromText(implode(' ', $values), 5); + + return $this->mergeTermScoreMaps($nameMap, $valueMap); + } + /** * For the given text, return an array where the keys are the unique term words * and the values are the frequency of that term. @@ -163,7 +198,7 @@ class SearchIndex protected function textToTermCountMap(string $text): array { $tokenMap = []; // {TextToken => OccurrenceCount} - $splitChars = " \n\t.,!?:;()[]{}<>`'\""; + $splitChars = static::$delimiters; $token = strtok($text, $splitChars); while ($token !== false) { @@ -186,31 +221,31 @@ class SearchIndex protected function entityToTermDataArray(Entity $entity): array { $nameTermsMap = $this->generateTermScoreMapFromText($entity->name, 40 * $entity->searchFactor); + $tagTermsMap = $this->generateTermScoreMapFromTags($entity->tags->all()); if ($entity instanceof Page) { $bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html); } else { - $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description, $entity->searchFactor); + $bodyTermsMap = $this->generateTermScoreMapFromText($entity->getAttribute('description') ?? '', $entity->searchFactor); } - $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap); + $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap, $tagTermsMap); $dataArray = []; $entityId = $entity->id; $entityType = $entity->getMorphClass(); foreach ($mergedScoreMap as $term => $score) { $dataArray[] = [ - 'term' => $term, - 'score' => $score, + 'term' => $term, + 'score' => $score, 'entity_type' => $entityType, - 'entity_id' => $entityId, + 'entity_id' => $entityId, ]; } return $dataArray; } - /** * For the given term data arrays, Merge their contents by term * while combining any scores.