X-Git-Url: http://source.bookstackapp.com/bookstack/blobdiff_plain/f28daa01d9d43d36c12b075bddca92be9e8f85e4..refs/pull/3043/head:/app/Entities/Tools/SearchIndex.php diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index bde5ef860..d748c1695 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -2,6 +2,7 @@ namespace BookStack\Entities\Tools; +use BookStack\Actions\Tag; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; use BookStack\Entities\Models\Page; @@ -12,6 +13,12 @@ use Illuminate\Support\Collection; class SearchIndex { + /** + * A list of delimiter characters used to break-up parsed content into terms for indexing. + * + * @var string + */ + public static $delimiters = " \n\t.,!?:;()[]{}<>`'\""; /** * @var EntityProvider @@ -84,6 +91,7 @@ class SearchIndex $entityModel->newQuery() ->select($selectFields) + ->with(['tags:id,name,value,entity_id,entity_type']) ->chunk($chunkSize, $chunkCallback); } } @@ -154,6 +162,30 @@ class SearchIndex return $scoresByTerm; } + /** + * Create a scored term map from the given set of entity tags. + * + * @param Tag[] $tags + * + * @returns array + */ + protected function generateTermScoreMapFromTags(array $tags): array + { + $scoreMap = []; + $names = []; + $values = []; + + foreach ($tags as $tag) { + $names[] = $tag->name; + $values[] = $tag->value; + } + + $nameMap = $this->generateTermScoreMapFromText(implode(' ', $names), 3); + $valueMap = $this->generateTermScoreMapFromText(implode(' ', $values), 5); + + return $this->mergeTermScoreMaps($nameMap, $valueMap); + } + /** * For the given text, return an array where the keys are the unique term words * and the values are the frequency of that term. @@ -163,7 +195,7 @@ class SearchIndex protected function textToTermCountMap(string $text): array { $tokenMap = []; // {TextToken => OccurrenceCount} - $splitChars = " \n\t.,!?:;()[]{}<>`'\""; + $splitChars = static::$delimiters; $token = strtok($text, $splitChars); while ($token !== false) { @@ -186,31 +218,31 @@ class SearchIndex protected function entityToTermDataArray(Entity $entity): array { $nameTermsMap = $this->generateTermScoreMapFromText($entity->name, 40 * $entity->searchFactor); + $tagTermsMap = $this->generateTermScoreMapFromTags($entity->tags->all()); if ($entity instanceof Page) { $bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html); } else { - $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description, $entity->searchFactor); + $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description ?? '', $entity->searchFactor); } - $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap); + $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap, $tagTermsMap); $dataArray = []; $entityId = $entity->id; $entityType = $entity->getMorphClass(); foreach ($mergedScoreMap as $term => $score) { $dataArray[] = [ - 'term' => $term, - 'score' => $score, + 'term' => $term, + 'score' => $score, 'entity_type' => $entityType, - 'entity_id' => $entityId, + 'entity_id' => $entityId, ]; } return $dataArray; } - /** * For the given term data arrays, Merge their contents by term * while combining any scores.