namespace BookStack\Entities\Tools;
+use BookStack\Actions\Tag;
use BookStack\Entities\EntityProvider;
use BookStack\Entities\Models\Entity;
use BookStack\Entities\Models\Page;
class SearchIndex
{
+ /**
+ * A list of delimiter characters used to break-up parsed content into terms for indexing.
+ *
+ * @var string
+ */
+ public static $delimiters = " \n\t.,!?:;()[]{}<>`'\"";
/**
* @var EntityProvider
$entityModel->newQuery()
->select($selectFields)
+ ->with(['tags:id,name,value,entity_id,entity_type'])
->chunk($chunkSize, $chunkCallback);
}
}
return $scoresByTerm;
}
+ /**
+ * Create a scored term map from the given set of entity tags.
+ *
+ * @param Tag[] $tags
+ *
+ * @returns array<string, int>
+ */
+ protected function generateTermScoreMapFromTags(array $tags): array
+ {
+ $scoreMap = [];
+ $names = [];
+ $values = [];
+
+ foreach ($tags as $tag) {
+ $names[] = $tag->name;
+ $values[] = $tag->value;
+ }
+
+ $nameMap = $this->generateTermScoreMapFromText(implode(' ', $names), 3);
+ $valueMap = $this->generateTermScoreMapFromText(implode(' ', $values), 5);
+
+ return $this->mergeTermScoreMaps($nameMap, $valueMap);
+ }
+
/**
* For the given text, return an array where the keys are the unique term words
* and the values are the frequency of that term.
protected function textToTermCountMap(string $text): array
{
$tokenMap = []; // {TextToken => OccurrenceCount}
- $splitChars = " \n\t.,!?:;()[]{}<>`'\"";
+ $splitChars = static::$delimiters;
$token = strtok($text, $splitChars);
while ($token !== false) {
protected function entityToTermDataArray(Entity $entity): array
{
$nameTermsMap = $this->generateTermScoreMapFromText($entity->name, 40 * $entity->searchFactor);
+ $tagTermsMap = $this->generateTermScoreMapFromTags($entity->tags->all());
if ($entity instanceof Page) {
$bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html);
} else {
- $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description, $entity->searchFactor);
+ $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description ?? '', $entity->searchFactor);
}
- $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap);
+ $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap, $tagTermsMap);
$dataArray = [];
$entityId = $entity->id;
$entityType = $entity->getMorphClass();
foreach ($mergedScoreMap as $term => $score) {
$dataArray[] = [
- 'term' => $term,
- 'score' => $score,
+ 'term' => $term,
+ 'score' => $score,
'entity_type' => $entityType,
- 'entity_id' => $entityId,
+ 'entity_id' => $entityId,
];
}
return $dataArray;
}
-
/**
* For the given term data arrays, Merge their contents by term
* while combining any scores.