X-Git-Url: http://source.bookstackapp.com/bookstack/blobdiff_plain/c81cb6f2afd79c33104671ea31f4b0a0e9f85eba..refs/pull/5591/head:/app/Search/SearchIndex.php diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php index 25389226a..36f71f6cc 100644 --- a/app/Search/SearchIndex.php +++ b/app/Search/SearchIndex.php @@ -2,11 +2,11 @@ namespace BookStack\Search; -use BookStack\Actions\Tag; +use BookStack\Activity\Models\Tag; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; use BookStack\Entities\Models\Page; -use DOMDocument; +use BookStack\Util\HtmlDocument; use DOMNode; use Illuminate\Database\Eloquent\Builder; use Illuminate\Support\Collection; @@ -16,7 +16,13 @@ class SearchIndex /** * A list of delimiter characters used to break-up parsed content into terms for indexing. */ - public static string $delimiters = " \n\t.,!?:;()[]{}<>`'\""; + public static string $delimiters = " \n\t.-,!?:;()[]{}<>`'\"«»"; + + /** + * A list of delimiter which could be commonly used within a single term and also indicate a break between terms. + * The indexer will index the full term with these delimiters, plus the terms split via these delimiters. + */ + public static string $softDelimiters = ".-"; public function __construct( protected EntityProvider $entityProvider @@ -30,7 +36,7 @@ class SearchIndex { $this->deleteEntityTerms($entity); $terms = $this->entityToTermDataArray($entity); - SearchTerm::query()->insert($terms); + $this->insertTerms($terms); } /** @@ -46,10 +52,7 @@ class SearchIndex array_push($terms, ...$entityTerms); } - $chunkedTerms = array_chunk($terms, 500); - foreach ($chunkedTerms as $termChunk) { - SearchTerm::query()->insert($termChunk); - } + $this->insertTerms($terms); } /** @@ -99,6 +102,19 @@ class SearchIndex $entity->searchTerms()->delete(); } + /** + * Insert the given terms into the database. + * Chunks through the given terms to remain within database limits. + * @param array[] $terms + */ + protected function insertTerms(array $terms): void + { + $chunkedTerms = array_chunk($terms, 500); + foreach ($chunkedTerms as $termChunk) { + SearchTerm::query()->insert($termChunk); + } + } + /** * Create a scored term array from the given text, where the keys are the terms * and the values are their scores. @@ -138,16 +154,11 @@ class SearchIndex 'h6' => 1.5, ]; - $html = '
' . $html . ''; $html = str_ireplace(['