71 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			PHP
		
	
	
	
		
		
			
		
	
	
			71 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			PHP
		
	
	
	
|  | <?php | ||
|  | 
 | ||
|  | namespace BookStack\Search; | ||
|  | 
 | ||
|  | /** | ||
|  |  * A custom text tokenizer which records & provides insight needed for our search indexing. | ||
|  |  * We used to use basic strtok() but this class does the following which that lacked: | ||
|  |  * - Tracks and provides the current/previous delimiter that we've stopped at. | ||
|  |  * - Returns empty tokens upon parsing a delimiter. | ||
|  |  */ | ||
|  | class SearchTextTokenizer | ||
|  | { | ||
|  |     protected int $currentIndex = 0; | ||
|  |     protected int $length; | ||
|  |     protected string $currentDelimiter = ''; | ||
|  |     protected string $previousDelimiter = ''; | ||
|  | 
 | ||
|  |     public function __construct( | ||
|  |         protected string $text, | ||
|  |         protected string $delimiters = ' ' | ||
|  |     ) { | ||
|  |         $this->length = strlen($this->text); | ||
|  |     } | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Get the current delimiter to be found. | ||
|  |      */ | ||
|  |     public function currentDelimiter(): string | ||
|  |     { | ||
|  |         return $this->currentDelimiter; | ||
|  |     } | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Get the previous delimiter found. | ||
|  |      */ | ||
|  |     public function previousDelimiter(): string | ||
|  |     { | ||
|  |         return $this->previousDelimiter; | ||
|  |     } | ||
|  | 
 | ||
|  |     /** | ||
|  |      * Get the next token between delimiters. | ||
|  |      * Returns false if there's no further tokens. | ||
|  |      */ | ||
|  |     public function next(): string|false | ||
|  |     { | ||
|  |         $token = ''; | ||
|  | 
 | ||
|  |         for ($i = $this->currentIndex; $i < $this->length; $i++) { | ||
|  |             $char = $this->text[$i]; | ||
|  |             if (str_contains($this->delimiters, $char)) { | ||
|  |                 $this->previousDelimiter = $this->currentDelimiter; | ||
|  |                 $this->currentDelimiter = $char; | ||
|  |                 $this->currentIndex = $i + 1; | ||
|  |                 return $token; | ||
|  |             } | ||
|  | 
 | ||
|  |             $token .= $char; | ||
|  |         } | ||
|  | 
 | ||
|  |         if ($token) { | ||
|  |             $this->currentIndex = $this->length; | ||
|  |             $this->previousDelimiter = $this->currentDelimiter; | ||
|  |             $this->currentDelimiter = ''; | ||
|  |             return $token; | ||
|  |         } | ||
|  | 
 | ||
|  |         return false; | ||
|  |     } | ||
|  | } |