];
foreach ($originalContentByNewAttribute as $attributeName => $content) {
+ $targetLength = ($attributeName === 'preview_name') ? 0 : 260;
$matchRefs = $this->getMatchPositions($content, $terms);
$mergedRefs = $this->sortAndMergeMatchPositions($matchRefs);
- $formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content);
+ $formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content, $targetLength);
$entity->setAttribute($attributeName, new HtmlString($formatted));
}
protected function highlightTagsContainingTerms(array $tags, array $terms): void
{
foreach ($tags as $tag) {
- $tagName = strtolower($tag->name);
- $tagValue = strtolower($tag->value);
+ $tagName = mb_strtolower($tag->name);
+ $tagValue = mb_strtolower($tag->value);
foreach ($terms as $term) {
- $termLower = strtolower($term);
+ $termLower = mb_strtolower($term);
- if (strpos($tagName, $termLower) !== false) {
+ if (mb_strpos($tagName, $termLower) !== false) {
$tag->setAttribute('highlight_name', true);
}
- if (strpos($tagValue, $termLower) !== false) {
+ if (mb_strpos($tagValue, $termLower) !== false) {
$tag->setAttribute('highlight_value', true);
}
}
protected function getMatchPositions(string $text, array $terms): array
{
$matchRefs = [];
- $text = strtolower($text);
+ $text = mb_strtolower($text);
foreach ($terms as $term) {
$offset = 0;
- $term = strtolower($term);
- $pos = strpos($text, $term, $offset);
+ $term = mb_strtolower($term);
+ $pos = mb_strpos($text, $term, $offset);
while ($pos !== false) {
- $end = $pos + strlen($term);
+ $end = $pos + mb_strlen($term);
$matchRefs[$pos] = $end;
$offset = $end;
- $pos = strpos($text, $term, $offset);
+ $pos = mb_strpos($text, $term, $offset);
}
}
/**
* Format the given original text, returning a version where terms are highlighted within.
* Returned content is in HTML text format.
+ * A given $targetLength of 0 asserts no target length limit.
+ *
+ * This is a complex function but written to be relatively efficient, going through the term matches in order
+ * so that we're only doing a one-time loop through of the matches. There is no further searching
+ * done within here.
*/
- protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText): string
+ protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText, int $targetLength): string
{
- $contextRange = 32;
- $targetLength = 260;
- $maxEnd = strlen($originalText);
- $lastEnd = 0;
+ $maxEnd = mb_strlen($originalText);
+ $fetchAll = ($targetLength === 0);
+ $contextLength = ($fetchAll ? 0 : 32);
+
$firstStart = null;
+ $lastEnd = 0;
$content = '';
+ $contentTextLength = 0;
+
+ if ($fetchAll) {
+ $targetLength = $maxEnd * 2;
+ }
foreach ($matchPositions as $start => $end) {
// Get our outer text ranges for the added context we want to show upon the result.
- $contextStart = max($start - $contextRange, 0, $lastEnd);
- $contextEnd = min($end + $contextRange, $maxEnd);
+ $contextStart = max($start - $contextLength, 0, $lastEnd);
+ $contextEnd = min($end + $contextLength, $maxEnd);
// Adjust the start if we're going to be touching the previous match.
$startDiff = $start - $lastEnd;
if ($startDiff < 0) {
$contextStart = $start;
- $content = substr($content, 0, strlen($content) + $startDiff);
+ // Trims off '$startDiff' number of characters to bring it back to the start
+ // if this current match zone.
+ $content = mb_substr($content, 0, mb_strlen($content) + $startDiff);
+ $contentTextLength += $startDiff;
}
// Add ellipsis between results
- if ($contextStart !== 0 && $contextStart !== $start) {
+ if (!$fetchAll && $contextStart !== 0 && $contextStart !== $start) {
$content .= ' ...';
+ $contentTextLength += 4;
+ } elseif ($fetchAll) {
+ // Or fill in gap since the previous match
+ $fillLength = $contextStart - $lastEnd;
+ $content .= e(mb_substr($originalText, $lastEnd, $fillLength));
+ $contentTextLength += $fillLength;
}
// Add our content including the bolded matching text
- $content .= e(substr($originalText, $contextStart, $start - $contextStart));
- $content .= '<strong>' . e(substr($originalText, $start, $end - $start)) . '</strong>';
- $content .= e(substr($originalText, $end, $contextEnd - $end));
+ $content .= e(mb_substr($originalText, $contextStart, $start - $contextStart));
+ $contentTextLength += $start - $contextStart;
+ $content .= '<strong>' . e(mb_substr($originalText, $start, $end - $start)) . '</strong>';
+ $contentTextLength += $end - $start;
+ $content .= e(mb_substr($originalText, $end, $contextEnd - $end));
+ $contentTextLength += $contextEnd - $end;
// Update our last end position
$lastEnd = $contextEnd;
}
// Stop if we're near our target
- if (strlen($content) >= $targetLength - 10) {
+ if ($contentTextLength >= $targetLength - 10) {
break;
}
}
// Just copy out the content if we haven't moved along anywhere.
if ($lastEnd === 0) {
- $content = e(substr($originalText, 0, $targetLength));
+ $content = e(mb_substr($originalText, 0, $targetLength));
+ $contentTextLength = $targetLength;
$lastEnd = $targetLength;
}
// Pad out the end if we're low
- $remainder = $targetLength - strlen($content);
+ $remainder = $targetLength - $contentTextLength;
if ($remainder > 10) {
- $content .= e(substr($originalText, $lastEnd, $remainder));
- $lastEnd += $remainder;
+ $padEndLength = min($maxEnd - $lastEnd, $remainder);
+ $content .= e(mb_substr($originalText, $lastEnd, $padEndLength));
+ $lastEnd += $padEndLength;
+ $contentTextLength += $padEndLength;
}
// Pad out the start if we're still low
- $remainder = $targetLength - strlen($content);
+ $remainder = $targetLength - $contentTextLength;
$firstStart = $firstStart ?: 0;
- if ($remainder > 10 && $firstStart !== 0) {
+ if (!$fetchAll && $remainder > 10 && $firstStart !== 0) {
$padStart = max(0, $firstStart - $remainder);
- $content = ($padStart === 0 ? '' : '...') . e(substr($originalText, $padStart, $firstStart - $padStart)) . substr($content, 4);
+ $content = ($padStart === 0 ? '' : '...') . e(mb_substr($originalText, $padStart, $firstStart - $padStart)) . mb_substr($content, 4);
}
// Add ellipsis if we're not at the end