namespace BookStack\Entities\Tools;
+use BookStack\Actions\Tag;
use BookStack\Entities\EntityProvider;
use BookStack\Entities\Models\Entity;
use BookStack\Entities\Models\Page;
use BookStack\Entities\Models\SearchTerm;
use DOMDocument;
use DOMNode;
+use Illuminate\Database\Eloquent\Builder;
use Illuminate\Support\Collection;
class SearchIndex
{
+ /**
+ * A list of delimiter characters used to break-up parsed content into terms for indexing.
+ *
+ * @var string
+ */
+ public static $delimiters = " \n\t.,!?:;()[]{}<>`'\"";
/**
* @var EntityProvider
* - The number that have been processed so far.
* - The total number of that model to be processed.
*
- * @param callable(Entity, int, int)|null $progressCallback
+ * @param callable(Entity, int, int):void|null $progressCallback
*/
public function indexAllEntities(?callable $progressCallback = null)
{
foreach ($this->entityProvider->all() as $entityModel) {
$indexContentField = $entityModel instanceof Page ? 'html' : 'description';
$selectFields = ['id', 'name', $indexContentField];
- $total = $entityModel->newQuery()->withTrashed()->count();
+ /** @var Builder<Entity> $query */
+ $query = $entityModel->newQuery();
+ $total = $query->withTrashed()->count();
$chunkSize = 250;
$processed = 0;
$entityModel->newQuery()
->select($selectFields)
+ ->with(['tags:id,name,value,entity_id,entity_type'])
->chunk($chunkSize, $chunkCallback);
}
}
return $scoresByTerm;
}
+ /**
+ * Create a scored term map from the given set of entity tags.
+ *
+ * @param Tag[] $tags
+ *
+ * @returns array<string, int>
+ */
+ protected function generateTermScoreMapFromTags(array $tags): array
+ {
+ $scoreMap = [];
+ $names = [];
+ $values = [];
+
+ foreach ($tags as $tag) {
+ $names[] = $tag->name;
+ $values[] = $tag->value;
+ }
+
+ $nameMap = $this->generateTermScoreMapFromText(implode(' ', $names), 3);
+ $valueMap = $this->generateTermScoreMapFromText(implode(' ', $values), 5);
+
+ return $this->mergeTermScoreMaps($nameMap, $valueMap);
+ }
+
/**
* For the given text, return an array where the keys are the unique term words
* and the values are the frequency of that term.
protected function textToTermCountMap(string $text): array
{
$tokenMap = []; // {TextToken => OccurrenceCount}
- $splitChars = " \n\t.,!?:;()[]{}<>`'\"";
+ $splitChars = static::$delimiters;
$token = strtok($text, $splitChars);
while ($token !== false) {
protected function entityToTermDataArray(Entity $entity): array
{
$nameTermsMap = $this->generateTermScoreMapFromText($entity->name, 40 * $entity->searchFactor);
+ $tagTermsMap = $this->generateTermScoreMapFromTags($entity->tags->all());
if ($entity instanceof Page) {
$bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html);
} else {
- $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description, $entity->searchFactor);
+ $bodyTermsMap = $this->generateTermScoreMapFromText($entity->getAttribute('description') ?? '', $entity->searchFactor);
}
- $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap);
+ $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap, $tagTermsMap);
$dataArray = [];
$entityId = $entity->id;
$entityType = $entity->getMorphClass();
foreach ($mergedScoreMap as $term => $score) {
$dataArray[] = [
- 'term' => $term,
- 'score' => $score,
+ 'term' => $term,
+ 'score' => $score,
'entity_type' => $entityType,
- 'entity_id' => $entityId,
+ 'entity_id' => $entityId,
];
}
return $dataArray;
}
-
/**
* For the given term data arrays, Merge their contents by term
* while combining any scores.