| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  | <?php | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace BookStack\Entities\Tools; | 
					
						
							| 
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | use BookStack\Entities\EntityProvider; | 
					
						
							|  |  |  | use BookStack\Entities\Models\Entity; | 
					
						
							|  |  |  | use BookStack\Entities\Models\SearchTerm; | 
					
						
							| 
									
										
										
										
											2020-11-29 00:42:12 +08:00
										 |  |  | use Illuminate\Support\Collection; | 
					
						
							| 
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | class SearchIndex | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * @var SearchTerm | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected $searchTerm; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * @var EntityProvider | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected $entityProvider; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     public function __construct(SearchTerm $searchTerm, EntityProvider $entityProvider) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $this->searchTerm = $searchTerm; | 
					
						
							|  |  |  |         $this->entityProvider = $entityProvider; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Index the given entity. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function indexEntity(Entity $entity) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $this->deleteEntityTerms($entity); | 
					
						
							|  |  |  |         $nameTerms = $this->generateTermArrayFromText($entity->name, 5 * $entity->searchFactor); | 
					
						
							| 
									
										
										
										
											2020-11-22 09:20:38 +08:00
										 |  |  |         $bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor); | 
					
						
							| 
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 |  |  |         $terms = array_merge($nameTerms, $bodyTerms); | 
					
						
							|  |  |  |         foreach ($terms as $index => $term) { | 
					
						
							|  |  |  |             $terms[$index]['entity_type'] = $entity->getMorphClass(); | 
					
						
							|  |  |  |             $terms[$index]['entity_id'] = $entity->id; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         $this->searchTerm->newQuery()->insert($terms); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |      * Index multiple Entities at once. | 
					
						
							|  |  |  |      * | 
					
						
							| 
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 |  |  |      * @param Entity[] $entities | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function indexEntities(array $entities) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $terms = []; | 
					
						
							|  |  |  |         foreach ($entities as $entity) { | 
					
						
							|  |  |  |             $nameTerms = $this->generateTermArrayFromText($entity->name, 5 * $entity->searchFactor); | 
					
						
							|  |  |  |             $bodyTerms = $this->generateTermArrayFromText($entity->getText(), 1 * $entity->searchFactor); | 
					
						
							|  |  |  |             foreach (array_merge($nameTerms, $bodyTerms) as $term) { | 
					
						
							|  |  |  |                 $term['entity_id'] = $entity->id; | 
					
						
							|  |  |  |                 $term['entity_type'] = $entity->getMorphClass(); | 
					
						
							|  |  |  |                 $terms[] = $term; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $chunkedTerms = array_chunk($terms, 500); | 
					
						
							|  |  |  |         foreach ($chunkedTerms as $termChunk) { | 
					
						
							|  |  |  |             $this->searchTerm->newQuery()->insert($termChunk); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Delete and re-index the terms for all entities in the system. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function indexAllEntities() | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $this->searchTerm->newQuery()->truncate(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         foreach ($this->entityProvider->all() as $entityModel) { | 
					
						
							|  |  |  |             $selectFields = ['id', 'name', $entityModel->textField]; | 
					
						
							|  |  |  |             $entityModel->newQuery() | 
					
						
							|  |  |  |                 ->withTrashed() | 
					
						
							|  |  |  |                 ->select($selectFields) | 
					
						
							| 
									
										
										
										
											2020-11-29 00:42:12 +08:00
										 |  |  |                 ->chunk(1000, function (Collection $entities) { | 
					
						
							|  |  |  |                     $this->indexEntities($entities->all()); | 
					
						
							| 
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 |  |  |                 }); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Delete related Entity search terms. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function deleteEntityTerms(Entity $entity) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $entity->searchTerms()->delete(); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Create a scored term array from the given text. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function generateTermArrayFromText(string $text, int $scoreAdjustment = 1): array | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $tokenMap = []; // {TextToken => OccurrenceCount}
 | 
					
						
							|  |  |  |         $splitChars = " \n\t.,!?:;()[]{}<>`'\""; | 
					
						
							|  |  |  |         $token = strtok($text, $splitChars); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         while ($token !== false) { | 
					
						
							|  |  |  |             if (!isset($tokenMap[$token])) { | 
					
						
							|  |  |  |                 $tokenMap[$token] = 0; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             $tokenMap[$token]++; | 
					
						
							|  |  |  |             $token = strtok($splitChars); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $terms = []; | 
					
						
							|  |  |  |         foreach ($tokenMap as $token => $count) { | 
					
						
							|  |  |  |             $terms[] = [ | 
					
						
							| 
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 |  |  |                 'term'  => $token, | 
					
						
							|  |  |  |                 'score' => $count * $scoreAdjustment, | 
					
						
							| 
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 |  |  |             ]; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $terms; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } |