71 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			PHP
		
	
	
	
		
		
			
		
	
	
			71 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			PHP
		
	
	
	
| 
								 | 
							
								<?php
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								namespace BookStack\Search;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * A custom text tokenizer which records & provides insight needed for our search indexing.
							 | 
						||
| 
								 | 
							
								 * We used to use basic strtok() but this class does the following which that lacked:
							 | 
						||
| 
								 | 
							
								 * - Tracks and provides the current/previous delimiter that we've stopped at.
							 | 
						||
| 
								 | 
							
								 * - Returns empty tokens upon parsing a delimiter.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								class SearchTextTokenizer
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								    protected int $currentIndex = 0;
							 | 
						||
| 
								 | 
							
								    protected int $length;
							 | 
						||
| 
								 | 
							
								    protected string $currentDelimiter = '';
							 | 
						||
| 
								 | 
							
								    protected string $previousDelimiter = '';
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    public function __construct(
							 | 
						||
| 
								 | 
							
								        protected string $text,
							 | 
						||
| 
								 | 
							
								        protected string $delimiters = ' '
							 | 
						||
| 
								 | 
							
								    ) {
							 | 
						||
| 
								 | 
							
								        $this->length = strlen($this->text);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Get the current delimiter to be found.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function currentDelimiter(): string
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return $this->currentDelimiter;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Get the previous delimiter found.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function previousDelimiter(): string
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        return $this->previousDelimiter;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Get the next token between delimiters.
							 | 
						||
| 
								 | 
							
								     * Returns false if there's no further tokens.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function next(): string|false
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        $token = '';
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        for ($i = $this->currentIndex; $i < $this->length; $i++) {
							 | 
						||
| 
								 | 
							
								            $char = $this->text[$i];
							 | 
						||
| 
								 | 
							
								            if (str_contains($this->delimiters, $char)) {
							 | 
						||
| 
								 | 
							
								                $this->previousDelimiter = $this->currentDelimiter;
							 | 
						||
| 
								 | 
							
								                $this->currentDelimiter = $char;
							 | 
						||
| 
								 | 
							
								                $this->currentIndex = $i + 1;
							 | 
						||
| 
								 | 
							
								                return $token;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            $token .= $char;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if ($token) {
							 | 
						||
| 
								 | 
							
								            $this->currentIndex = $this->length;
							 | 
						||
| 
								 | 
							
								            $this->previousDelimiter = $this->currentDelimiter;
							 | 
						||
| 
								 | 
							
								            $this->currentDelimiter = '';
							 | 
						||
| 
								 | 
							
								            return $token;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return false;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								}
							 |