| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  | <?php | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-16 18:27:22 +08:00
										 |  |  | namespace BookStack\Search; | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-05-18 00:56:55 +08:00
										 |  |  | use BookStack\Activity\Models\Tag; | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  | use BookStack\Entities\Models\Entity; | 
					
						
							|  |  |  | use Illuminate\Support\HtmlString; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SearchResultsFormatter | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * For the given array of entities, Prepare the models to be shown in search result | 
					
						
							|  |  |  |      * output. This sets a series of additional attributes. | 
					
						
							| 
									
										
										
										
											2021-11-13 21:28:17 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |      * @param Entity[] $results | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     public function format(array $results, SearchOptions $options): void | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         foreach ($results as $result) { | 
					
						
							|  |  |  |             $this->setSearchPreview($result, $options); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Update the given entity model to set attributes used for previews of the item | 
					
						
							|  |  |  |      * primarily within search result lists. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-10-03 00:31:45 +08:00
										 |  |  |     protected function setSearchPreview(Entity $entity, SearchOptions $options): void | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |     { | 
					
						
							|  |  |  |         $textProperty = $entity->textField; | 
					
						
							|  |  |  |         $textContent = $entity->$textProperty; | 
					
						
							| 
									
										
										
										
											2024-10-03 00:31:45 +08:00
										 |  |  |         $relevantSearchOptions = $options->exacts->merge($options->searches); | 
					
						
							|  |  |  |         $terms = $relevantSearchOptions->toValueArray(); | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-13 20:44:27 +08:00
										 |  |  |         $originalContentByNewAttribute = [ | 
					
						
							| 
									
										
										
										
											2021-11-13 21:28:17 +08:00
										 |  |  |             'preview_name'    => $entity->name, | 
					
						
							| 
									
										
										
										
											2021-11-13 20:44:27 +08:00
										 |  |  |             'preview_content' => $textContent, | 
					
						
							|  |  |  |         ]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         foreach ($originalContentByNewAttribute as $attributeName => $content) { | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |             $targetLength = ($attributeName === 'preview_name') ? 0 : 260; | 
					
						
							| 
									
										
										
										
											2021-11-13 20:44:27 +08:00
										 |  |  |             $matchRefs = $this->getMatchPositions($content, $terms); | 
					
						
							|  |  |  |             $mergedRefs = $this->sortAndMergeMatchPositions($matchRefs); | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |             $formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content, $targetLength); | 
					
						
							| 
									
										
										
										
											2021-11-13 20:44:27 +08:00
										 |  |  |             $entity->setAttribute($attributeName, new HtmlString($formatted)); | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-11-13 21:02:32 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         $tags = $entity->relationLoaded('tags') ? $entity->tags->all() : []; | 
					
						
							|  |  |  |         $this->highlightTagsContainingTerms($tags, $terms); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Highlight tags which match the given terms. | 
					
						
							| 
									
										
										
										
											2021-11-13 21:28:17 +08:00
										 |  |  |      * | 
					
						
							|  |  |  |      * @param Tag[]    $tags | 
					
						
							| 
									
										
										
										
											2021-11-13 21:02:32 +08:00
										 |  |  |      * @param string[] $terms | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function highlightTagsContainingTerms(array $tags, array $terms): void | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         foreach ($tags as $tag) { | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |             $tagName = mb_strtolower($tag->name); | 
					
						
							|  |  |  |             $tagValue = mb_strtolower($tag->value); | 
					
						
							| 
									
										
										
										
											2021-11-13 21:02:32 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             foreach ($terms as $term) { | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |                 $termLower = mb_strtolower($term); | 
					
						
							| 
									
										
										
										
											2021-11-13 21:02:32 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |                 if (mb_strpos($tagName, $termLower) !== false) { | 
					
						
							| 
									
										
										
										
											2021-11-13 21:02:32 +08:00
										 |  |  |                     $tag->setAttribute('highlight_name', true); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |                 if (mb_strpos($tagValue, $termLower) !== false) { | 
					
						
							| 
									
										
										
										
											2021-11-13 21:02:32 +08:00
										 |  |  |                     $tag->setAttribute('highlight_value', true); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Get positions of the given terms within the given text. | 
					
						
							|  |  |  |      * Is in the array format of [int $startIndex => int $endIndex] where the indexes | 
					
						
							|  |  |  |      * are positions within the provided text. | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * @return array<int, int> | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function getMatchPositions(string $text, array $terms): array | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         $matchRefs = []; | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |         $text = mb_strtolower($text); | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         foreach ($terms as $term) { | 
					
						
							|  |  |  |             $offset = 0; | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |             $term = mb_strtolower($term); | 
					
						
							|  |  |  |             $pos = mb_strpos($text, $term, $offset); | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |             while ($pos !== false) { | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |                 $end = $pos + mb_strlen($term); | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |                 $matchRefs[$pos] = $end; | 
					
						
							|  |  |  |                 $offset = $end; | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |                 $pos = mb_strpos($text, $term, $offset); | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $matchRefs; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Sort the given match positions before merging them where they're | 
					
						
							|  |  |  |      * adjacent or where they overlap. | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * @param array<int, int> $matchPositions | 
					
						
							| 
									
										
										
										
											2021-11-13 21:28:17 +08:00
										 |  |  |      * | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |      * @return array<int, int> | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     protected function sortAndMergeMatchPositions(array $matchPositions): array | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         ksort($matchPositions); | 
					
						
							|  |  |  |         $mergedRefs = []; | 
					
						
							|  |  |  |         $lastStart = 0; | 
					
						
							|  |  |  |         $lastEnd = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         foreach ($matchPositions as $start => $end) { | 
					
						
							|  |  |  |             if ($start > $lastEnd) { | 
					
						
							|  |  |  |                 $mergedRefs[$start] = $end; | 
					
						
							|  |  |  |                 $lastStart = $start; | 
					
						
							|  |  |  |                 $lastEnd = $end; | 
					
						
							| 
									
										
										
										
											2021-11-13 21:28:17 +08:00
										 |  |  |             } elseif ($end > $lastEnd) { | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |                 $mergedRefs[$lastStart] = $end; | 
					
						
							|  |  |  |                 $lastEnd = $end; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $mergedRefs; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /** | 
					
						
							|  |  |  |      * Format the given original text, returning a version where terms are highlighted within. | 
					
						
							|  |  |  |      * Returned content is in HTML text format. | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |      * A given $targetLength of 0 asserts no target length limit. | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * This is a complex function but written to be relatively efficient, going through the term matches in order | 
					
						
							|  |  |  |      * so that we're only doing a one-time loop through of the matches. There is no further searching | 
					
						
							|  |  |  |      * done within here. | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |      */ | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |     protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText, int $targetLength): string | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |         $maxEnd = mb_strlen($originalText); | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |         $fetchAll = ($targetLength === 0); | 
					
						
							| 
									
										
										
										
											2021-11-13 23:04:04 +08:00
										 |  |  |         $contextLength = ($fetchAll ? 0 : 32); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         $firstStart = null; | 
					
						
							|  |  |  |         $lastEnd = 0; | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |         $content = ''; | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |         $contentTextLength = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if ($fetchAll) { | 
					
						
							|  |  |  |             $targetLength = $maxEnd * 2; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |         foreach ($matchPositions as $start => $end) { | 
					
						
							|  |  |  |             // Get our outer text ranges for the added context we want to show upon the result.
 | 
					
						
							| 
									
										
										
										
											2021-11-13 23:04:04 +08:00
										 |  |  |             $contextStart = max($start - $contextLength, 0, $lastEnd); | 
					
						
							|  |  |  |             $contextEnd = min($end + $contextLength, $maxEnd); | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             // Adjust the start if we're going to be touching the previous match.
 | 
					
						
							|  |  |  |             $startDiff = $start - $lastEnd; | 
					
						
							|  |  |  |             if ($startDiff < 0) { | 
					
						
							|  |  |  |                 $contextStart = $start; | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |                 // Trims off '$startDiff' number of characters to bring it back to the start
 | 
					
						
							|  |  |  |                 // if this current match zone.
 | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |                 $content = mb_substr($content, 0, mb_strlen($content) + $startDiff); | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |                 $contentTextLength += $startDiff; | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // Add ellipsis between results
 | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |             if (!$fetchAll && $contextStart !== 0 && $contextStart !== $start) { | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |                 $content .= ' ...'; | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |                 $contentTextLength += 4; | 
					
						
							| 
									
										
										
										
											2021-11-14 23:16:18 +08:00
										 |  |  |             } elseif ($fetchAll) { | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |                 // Or fill in gap since the previous match
 | 
					
						
							|  |  |  |                 $fillLength = $contextStart - $lastEnd; | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |                 $content .= e(mb_substr($originalText, $lastEnd, $fillLength)); | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |                 $contentTextLength += $fillLength; | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // Add our content including the bolded matching text
 | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |             $content .= e(mb_substr($originalText, $contextStart, $start - $contextStart)); | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |             $contentTextLength += $start - $contextStart; | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |             $content .= '<strong>' . e(mb_substr($originalText, $start, $end - $start)) . '</strong>'; | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |             $contentTextLength += $end - $start; | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |             $content .= e(mb_substr($originalText, $end, $contextEnd - $end)); | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |             $contentTextLength += $contextEnd - $end; | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             // Update our last end position
 | 
					
						
							|  |  |  |             $lastEnd = $contextEnd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // Update the first start position if it's not already been set
 | 
					
						
							|  |  |  |             if (is_null($firstStart)) { | 
					
						
							|  |  |  |                 $firstStart = $contextStart; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // Stop if we're near our target
 | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |             if ($contentTextLength >= $targetLength - 10) { | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |                 break; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Just copy out the content if we haven't moved along anywhere.
 | 
					
						
							|  |  |  |         if ($lastEnd === 0) { | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |             $content = e(mb_substr($originalText, 0, $targetLength)); | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |             $contentTextLength = $targetLength; | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |             $lastEnd = $targetLength; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Pad out the end if we're low
 | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |         $remainder = $targetLength - $contentTextLength; | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |         if ($remainder > 10) { | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |             $padEndLength = min($maxEnd - $lastEnd, $remainder); | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |             $content .= e(mb_substr($originalText, $lastEnd, $padEndLength)); | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |             $lastEnd += $padEndLength; | 
					
						
							|  |  |  |             $contentTextLength += $padEndLength; | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Pad out the start if we're still low
 | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |         $remainder = $targetLength - $contentTextLength; | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |         $firstStart = $firstStart ?: 0; | 
					
						
							| 
									
										
										
										
											2021-11-13 22:37:40 +08:00
										 |  |  |         if (!$fetchAll && $remainder > 10 && $firstStart !== 0) { | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |             $padStart = max(0, $firstStart - $remainder); | 
					
						
							| 
									
										
										
										
											2021-12-15 22:29:43 +08:00
										 |  |  |             $content = ($padStart === 0 ? '' : '...') . e(mb_substr($originalText, $padStart, $firstStart - $padStart)) . mb_substr($content, 4); | 
					
						
							| 
									
										
										
										
											2021-11-13 06:57:50 +08:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Add ellipsis if we're not at the end
 | 
					
						
							|  |  |  |         if ($lastEnd < $maxEnd) { | 
					
						
							|  |  |  |             $content .= '...'; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return $content; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-11-13 21:28:17 +08:00
										 |  |  | } |