]> BookStack Code Mirror - bookstack/blob - app/Search/SearchTextTokenizer.php
Updated version and assets for release v25.05.1
[bookstack] / app / Search / SearchTextTokenizer.php
1 <?php
2
3 namespace BookStack\Search;
4
5 /**
6  * A custom text tokenizer which records & provides insight needed for our search indexing.
7  * We used to use basic strtok() but this class does the following which that lacked:
8  * - Tracks and provides the current/previous delimiter that we've stopped at.
9  * - Returns empty tokens upon parsing a delimiter.
10  */
11 class SearchTextTokenizer
12 {
13     protected int $currentIndex = 0;
14     protected int $length;
15     protected string $currentDelimiter = '';
16     protected string $previousDelimiter = '';
17
18     public function __construct(
19         protected string $text,
20         protected string $delimiters = ' '
21     ) {
22         $this->length = strlen($this->text);
23     }
24
25     /**
26      * Get the current delimiter to be found.
27      */
28     public function currentDelimiter(): string
29     {
30         return $this->currentDelimiter;
31     }
32
33     /**
34      * Get the previous delimiter found.
35      */
36     public function previousDelimiter(): string
37     {
38         return $this->previousDelimiter;
39     }
40
41     /**
42      * Get the next token between delimiters.
43      * Returns false if there's no further tokens.
44      */
45     public function next(): string|false
46     {
47         $token = '';
48
49         for ($i = $this->currentIndex; $i < $this->length; $i++) {
50             $char = $this->text[$i];
51             if (str_contains($this->delimiters, $char)) {
52                 $this->previousDelimiter = $this->currentDelimiter;
53                 $this->currentDelimiter = $char;
54                 $this->currentIndex = $i + 1;
55                 return $token;
56             }
57
58             $token .= $char;
59         }
60
61         if ($token) {
62             $this->currentIndex = $this->length;
63             $this->previousDelimiter = $this->currentDelimiter;
64             $this->currentDelimiter = '';
65             return $token;
66         }
67
68         return false;
69     }
70 }