2021-06-26 23:23:15 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<?php
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								namespace BookStack\Entities\Tools;
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 01:06:01 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								use BookStack\Actions\Tag;
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								use BookStack\Entities\EntityProvider;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								use BookStack\Entities\Models\Entity;
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								use BookStack\Entities\Models\Page;
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								use BookStack\Entities\Models\SearchTerm;
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								use DOMDocument;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								use DOMNode;
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-29 00:42:12 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								use Illuminate\Support\Collection;
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								class SearchIndex
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 02:03:44 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * A list of delimiter characters used to break-up parsed content into terms for indexing.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @var string
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    public static $delimiters = " \n\t.,!?:;()[]{}<>`'\"";
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @var EntityProvider
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    protected $entityProvider;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    public function __construct(EntityProvider $entityProvider)
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $this->entityProvider = $entityProvider;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * Index the given entity.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    public function indexEntity(Entity $entity)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $this->deleteEntityTerms($entity);
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $terms = $this->entityToTermDataArray($entity);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        SearchTerm::query()->insert($terms);
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							
								
									
										
										
										
											2021-06-26 23:23:15 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * Index multiple Entities at once.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @param Entity[] $entities
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-08 19:29:25 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    public function indexEntities(array $entities)
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $terms = [];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        foreach ($entities as $entity) {
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $entityTerms = $this->entityToTermDataArray($entity);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            array_push($terms, ...$entityTerms);
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $chunkedTerms = array_chunk($terms, 500);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        foreach ($chunkedTerms as $termChunk) {
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            SearchTerm::query()->insert($termChunk);
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * Delete and re-index the terms for all entities in the system.
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 22:10:11 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								     * Can take a callback which is used for reporting progress.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * Callback receives three arguments:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * - An instance of the model being processed
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * - The number that have been processed so far.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * - The total number of that model to be processed.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @param callable(Entity, int, int)|null $progressCallback
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 22:10:11 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    public function indexAllEntities(?callable $progressCallback = null)
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        SearchTerm::query()->truncate();
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        foreach ($this->entityProvider->all() as $entityModel) {
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $indexContentField = $entityModel instanceof Page ? 'html' : 'description';
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $selectFields = ['id', 'name', $indexContentField];
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 22:10:11 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $total = $entityModel->newQuery()->withTrashed()->count();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $chunkSize = 250;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $processed = 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $chunkCallback = function (Collection $entities) use ($progressCallback, &$processed, $total, $chunkSize, $entityModel) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                $this->indexEntities($entities->all());
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                $processed = min($processed + $chunkSize, $total);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                if (is_callable($progressCallback)) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    $progressCallback($entityModel, $processed, $total);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            };
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $entityModel->newQuery()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                ->select($selectFields)
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 01:06:01 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                ->with(['tags:id,name,value,entity_id,entity_type'])
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 22:10:11 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                ->chunk($chunkSize, $chunkCallback);
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * Delete related Entity search terms.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    public function deleteEntityTerms(Entity $entity)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $entity->searchTerms()->delete();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * Create a scored term array from the given text, where the keys are the terms
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * and the values are their scores.
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @returns array<string, int>
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    protected function generateTermScoreMapFromText(string $text, int $scoreAdjustment = 1): array
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $termMap = $this->textToTermCountMap($text);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        foreach ($termMap as $term => $count) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $termMap[$term] = $count * $scoreAdjustment;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return $termMap;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * Create a scored term array from the given HTML, where the keys are the terms
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * and the values are their scores.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @returns array<string, int>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    protected function generateTermScoreMapFromHtml(string $html): array
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if (empty($html)) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return [];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $scoresByTerm = [];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $elementScoreAdjustmentMap = [
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'h1' => 10,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'h2' => 5,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'h3' => 4,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'h4' => 3,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'h5' => 2,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            'h6' => 1.5,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $html = '<body>' . $html . '</body>';
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        libxml_use_internal_errors(true);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $doc = new DOMDocument();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $doc->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $topElems = $doc->documentElement->childNodes->item(0)->childNodes;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        /** @var DOMNode $child */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        foreach ($topElems as $child) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $nodeName = $child->nodeName;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $termCounts = $this->textToTermCountMap(trim($child->textContent));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            foreach ($termCounts as $term => $count) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                $scoreChange = $count * ($elementScoreAdjustmentMap[$nodeName] ?? 1);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                $scoresByTerm[$term] = ($scoresByTerm[$term] ?? 0) + $scoreChange;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return $scoresByTerm;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 01:06:01 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * Create a scored term map from the given set of entity tags.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @param Tag[] $tags
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @returns array<string, int>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    protected function generateTermScoreMapFromTags(array $tags): array
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $scoreMap = [];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $names = [];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $values = [];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 21:28:17 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        foreach ($tags as $tag) {
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 01:06:01 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $names[] = $tag->name;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $values[] = $tag->value;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $nameMap = $this->generateTermScoreMapFromText(implode(' ', $names), 3);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $valueMap = $this->generateTermScoreMapFromText(implode(' ', $values), 5);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return $this->mergeTermScoreMaps($nameMap, $valueMap);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * For the given text, return an array where the keys are the unique term words
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * and the values are the frequency of that term.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @returns array<string, int>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    protected function textToTermCountMap(string $text): array
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $tokenMap = []; // {TextToken => OccurrenceCount}
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 02:03:44 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $splitChars = static::$delimiters;
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $token = strtok($text, $splitChars);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        while ($token !== false) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if (!isset($tokenMap[$token])) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                $tokenMap[$token] = 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $tokenMap[$token]++;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $token = strtok($splitChars);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return $tokenMap;
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * For the given entity, Generate an array of term data details.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * Is the raw term data, not instances of SearchTerm models.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @returns array{term: string, score: float, entity_id: int, entity_type: string}[]
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    protected function entityToTermDataArray(Entity $entity): array
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $nameTermsMap = $this->generateTermScoreMapFromText($entity->name, 40 * $entity->searchFactor);
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 01:06:01 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $tagTermsMap = $this->generateTermScoreMapFromTags($entity->tags->all());
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if ($entity instanceof Page) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $bodyTermsMap = $this->generateTermScoreMapFromHtml($entity->html);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        } else {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $bodyTermsMap = $this->generateTermScoreMapFromText($entity->description, $entity->searchFactor);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 01:06:01 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $mergedScoreMap = $this->mergeTermScoreMaps($nameTermsMap, $bodyTermsMap, $tagTermsMap);
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $dataArray = [];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $entityId = $entity->id;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $entityType = $entity->getMorphClass();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        foreach ($mergedScoreMap as $term => $score) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            $dataArray[] = [
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 21:28:17 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'term'        => $term,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'score'       => $score,
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'entity_type' => $entityType,
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-13 21:28:17 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                'entity_id'   => $entityId,
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            ];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return $dataArray;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * For the given term data arrays, Merge their contents by term
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * while combining any scores.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @param array<string, int>[] ...$scoreMaps
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     * @returns array<string, int>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    protected function mergeTermScoreMaps(...$scoreMaps): array
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        $mergedMap = [];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        foreach ($scoreMaps as $scoreMap) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            foreach ($scoreMap as $term => $score) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                $mergedMap[$term] = ($mergedMap[$term] ?? 0) + $score;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            }
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-12 21:47:23 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return $mergedMap;
							 | 
						
					
						
							
								
									
										
										
										
											2021-11-11 21:36:49 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-22 08:17:45 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 |