3 namespace BookStack\Search;
6 * A custom text tokenizer which records & provides insight needed for our search indexing.
7 * We used to use basic strtok() but this class does the following which that lacked:
8 * - Tracks and provides the current/previous delimiter that we've stopped at.
9 * - Returns empty tokens upon parsing a delimiter.
11 class SearchTextTokenizer
13 protected int $currentIndex = 0;
14 protected int $length;
15 protected string $currentDelimiter = '';
16 protected string $previousDelimiter = '';
18 public function __construct(
19 protected string $text,
20 protected string $delimiters = ' '
22 $this->length = strlen($this->text);
26 * Get the current delimiter to be found.
28 public function currentDelimiter(): string
30 return $this->currentDelimiter;
34 * Get the previous delimiter found.
36 public function previousDelimiter(): string
38 return $this->previousDelimiter;
42 * Get the next token between delimiters.
43 * Returns false if there's no further tokens.
45 public function next(): string|false
49 for ($i = $this->currentIndex; $i < $this->length; $i++) {
50 $char = $this->text[$i];
51 if (str_contains($this->delimiters, $char)) {
52 $this->previousDelimiter = $this->currentDelimiter;
53 $this->currentDelimiter = $char;
54 $this->currentIndex = $i + 1;
62 $this->currentIndex = $this->length;
63 $this->previousDelimiter = $this->currentDelimiter;
64 $this->currentDelimiter = '';