3 namespace BookStack\Search;
5 use BookStack\Activity\Models\Tag;
6 use BookStack\Entities\Models\Entity;
7 use Illuminate\Support\HtmlString;
9 class SearchResultsFormatter
12 * For the given array of entities, Prepare the models to be shown in search result
13 * output. This sets a series of additional attributes.
15 * @param Entity[] $results
17 public function format(array $results, SearchOptions $options): void
19 foreach ($results as $result) {
20 $this->setSearchPreview($result, $options);
25 * Update the given entity model to set attributes used for previews of the item
26 * primarily within search result lists.
28 protected function setSearchPreview(Entity $entity, SearchOptions $options): void
30 $textProperty = $entity->textField;
31 $textContent = $entity->$textProperty;
32 $relevantSearchOptions = $options->exacts->merge($options->searches);
33 $terms = $relevantSearchOptions->toValueArray();
35 $originalContentByNewAttribute = [
36 'preview_name' => $entity->name,
37 'preview_content' => $textContent,
40 foreach ($originalContentByNewAttribute as $attributeName => $content) {
41 $targetLength = ($attributeName === 'preview_name') ? 0 : 260;
42 $matchRefs = $this->getMatchPositions($content, $terms);
43 $mergedRefs = $this->sortAndMergeMatchPositions($matchRefs);
44 $formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content, $targetLength);
45 $entity->setAttribute($attributeName, new HtmlString($formatted));
48 $tags = $entity->relationLoaded('tags') ? $entity->tags->all() : [];
49 $this->highlightTagsContainingTerms($tags, $terms);
53 * Highlight tags which match the given terms.
56 * @param string[] $terms
58 protected function highlightTagsContainingTerms(array $tags, array $terms): void
60 foreach ($tags as $tag) {
61 $tagName = mb_strtolower($tag->name);
62 $tagValue = mb_strtolower($tag->value);
64 foreach ($terms as $term) {
65 $termLower = mb_strtolower($term);
67 if (mb_strpos($tagName, $termLower) !== false) {
68 $tag->setAttribute('highlight_name', true);
71 if (mb_strpos($tagValue, $termLower) !== false) {
72 $tag->setAttribute('highlight_value', true);
79 * Get positions of the given terms within the given text.
80 * Is in the array format of [int $startIndex => int $endIndex] where the indexes
81 * are positions within the provided text.
83 * @return array<int, int>
85 protected function getMatchPositions(string $text, array $terms): array
88 $text = mb_strtolower($text);
90 foreach ($terms as $term) {
92 $term = mb_strtolower($term);
93 $pos = mb_strpos($text, $term, $offset);
94 while ($pos !== false) {
95 $end = $pos + mb_strlen($term);
96 $matchRefs[$pos] = $end;
98 $pos = mb_strpos($text, $term, $offset);
106 * Sort the given match positions before merging them where they're
107 * adjacent or where they overlap.
109 * @param array<int, int> $matchPositions
111 * @return array<int, int>
113 protected function sortAndMergeMatchPositions(array $matchPositions): array
115 ksort($matchPositions);
120 foreach ($matchPositions as $start => $end) {
121 if ($start > $lastEnd) {
122 $mergedRefs[$start] = $end;
125 } elseif ($end > $lastEnd) {
126 $mergedRefs[$lastStart] = $end;
135 * Format the given original text, returning a version where terms are highlighted within.
136 * Returned content is in HTML text format.
137 * A given $targetLength of 0 asserts no target length limit.
139 * This is a complex function but written to be relatively efficient, going through the term matches in order
140 * so that we're only doing a one-time loop through of the matches. There is no further searching
143 protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText, int $targetLength): string
145 $maxEnd = mb_strlen($originalText);
146 $fetchAll = ($targetLength === 0);
147 $contextLength = ($fetchAll ? 0 : 32);
152 $contentTextLength = 0;
155 $targetLength = $maxEnd * 2;
158 foreach ($matchPositions as $start => $end) {
159 // Get our outer text ranges for the added context we want to show upon the result.
160 $contextStart = max($start - $contextLength, 0, $lastEnd);
161 $contextEnd = min($end + $contextLength, $maxEnd);
163 // Adjust the start if we're going to be touching the previous match.
164 $startDiff = $start - $lastEnd;
165 if ($startDiff < 0) {
166 $contextStart = $start;
167 // Trims off '$startDiff' number of characters to bring it back to the start
168 // if this current match zone.
169 $content = mb_substr($content, 0, mb_strlen($content) + $startDiff);
170 $contentTextLength += $startDiff;
173 // Add ellipsis between results
174 if (!$fetchAll && $contextStart !== 0 && $contextStart !== $start) {
176 $contentTextLength += 4;
177 } elseif ($fetchAll) {
178 // Or fill in gap since the previous match
179 $fillLength = $contextStart - $lastEnd;
180 $content .= e(mb_substr($originalText, $lastEnd, $fillLength));
181 $contentTextLength += $fillLength;
184 // Add our content including the bolded matching text
185 $content .= e(mb_substr($originalText, $contextStart, $start - $contextStart));
186 $contentTextLength += $start - $contextStart;
187 $content .= '<strong>' . e(mb_substr($originalText, $start, $end - $start)) . '</strong>';
188 $contentTextLength += $end - $start;
189 $content .= e(mb_substr($originalText, $end, $contextEnd - $end));
190 $contentTextLength += $contextEnd - $end;
192 // Update our last end position
193 $lastEnd = $contextEnd;
195 // Update the first start position if it's not already been set
196 if (is_null($firstStart)) {
197 $firstStart = $contextStart;
200 // Stop if we're near our target
201 if ($contentTextLength >= $targetLength - 10) {
206 // Just copy out the content if we haven't moved along anywhere.
207 if ($lastEnd === 0) {
208 $content = e(mb_substr($originalText, 0, $targetLength));
209 $contentTextLength = $targetLength;
210 $lastEnd = $targetLength;
213 // Pad out the end if we're low
214 $remainder = $targetLength - $contentTextLength;
215 if ($remainder > 10) {
216 $padEndLength = min($maxEnd - $lastEnd, $remainder);
217 $content .= e(mb_substr($originalText, $lastEnd, $padEndLength));
218 $lastEnd += $padEndLength;
219 $contentTextLength += $padEndLength;
222 // Pad out the start if we're still low
223 $remainder = $targetLength - $contentTextLength;
224 $firstStart = $firstStart ?: 0;
225 if (!$fetchAll && $remainder > 10 && $firstStart !== 0) {
226 $padStart = max(0, $firstStart - $remainder);
227 $content = ($padStart === 0 ? '' : '...') . e(mb_substr($originalText, $padStart, $firstStart - $padStart)) . mb_substr($content, 4);
230 // Add ellipsis if we're not at the end
231 if ($lastEnd < $maxEnd) {