From: Dan Brown Date: Thu, 21 Aug 2025 11:14:52 +0000 (+0100) Subject: Vectors: Split out vector search and llm query runs X-Git-Url: http://source.bookstackapp.com/bookstack/commitdiff_plain/refs/pull/5552/head Vectors: Split out vector search and llm query runs Added a formal object type to carry across vector search results. Added permission application and entity combining with vector search results. Also updated namespace from vectors to queries. --- diff --git a/app/Console/Commands/RegenerateVectorsCommand.php b/app/Console/Commands/RegenerateVectorsCommand.php index 700d05300..26259e943 100644 --- a/app/Console/Commands/RegenerateVectorsCommand.php +++ b/app/Console/Commands/RegenerateVectorsCommand.php @@ -4,8 +4,8 @@ namespace BookStack\Console\Commands; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; -use BookStack\Search\Vectors\SearchVector; -use BookStack\Search\Vectors\StoreEntityVectorsJob; +use BookStack\Search\Queries\SearchVector; +use BookStack\Search\Queries\StoreEntityVectorsJob; use Illuminate\Console\Command; class RegenerateVectorsCommand extends Command diff --git a/app/Search/Vectors/EntityVectorGenerator.php b/app/Search/Queries/EntityVectorGenerator.php similarity index 95% rename from app/Search/Vectors/EntityVectorGenerator.php rename to app/Search/Queries/EntityVectorGenerator.php index 5f2a7c178..34e37eb03 100644 --- a/app/Search/Vectors/EntityVectorGenerator.php +++ b/app/Search/Queries/EntityVectorGenerator.php @@ -1,10 +1,12 @@ vectorQueryServiceProvider->get(); + + $matchesText = array_values(array_map(fn (VectorSearchResult $result) => $result->matchText, $vectorResults)); + return $queryService->query($query, $matchesText); + } +} diff --git a/app/Search/QueryController.php b/app/Search/Queries/QueryController.php similarity index 71% rename from app/Search/QueryController.php rename to app/Search/Queries/QueryController.php index 895d63f13..95888a88f 100644 --- a/app/Search/QueryController.php +++ b/app/Search/Queries/QueryController.php @@ -1,9 +1,10 @@ get('query', ''); - if ($query) { - $results = $runner->run($query); - } else { - $results = null; - } - - return view('search.query', [ - 'results' => $results, - ]); + $results = $query ? $searchRunner->run($query) : []; + $llmResult = $llmRunner->run($query, $results); + dd($results, $llmResult); } } diff --git a/app/Search/Queries/SearchVector.php b/app/Search/Queries/SearchVector.php new file mode 100644 index 000000000..fcad45da6 --- /dev/null +++ b/app/Search/Queries/SearchVector.php @@ -0,0 +1,26 @@ +hasMany(JointPermission::class, 'entity_id', 'entity_id') + ->whereColumn('search_vectors.entity_type', '=', 'joint_permissions.entity_type'); + } +} diff --git a/app/Search/Vectors/Services/OpenAiVectorQueryService.php b/app/Search/Queries/Services/OpenAiVectorQueryService.php similarity index 98% rename from app/Search/Vectors/Services/OpenAiVectorQueryService.php rename to app/Search/Queries/Services/OpenAiVectorQueryService.php index fea4d5c14..9bd9080ba 100644 --- a/app/Search/Vectors/Services/OpenAiVectorQueryService.php +++ b/app/Search/Queries/Services/OpenAiVectorQueryService.php @@ -1,6 +1,6 @@ vectorQueryServiceProvider->get(); + $queryVector = $queryService->generateEmbeddings($query); + + // TODO - Test permissions applied + $topMatchesQuery = SearchVector::query()->select('text', 'entity_type', 'entity_id') + ->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance') + ->orderBy('distance', 'asc') + ->having('distance', '<', 0.6) + ->limit(10); + + $query = $this->permissions->restrictEntityRelationQuery($topMatchesQuery, 'search_vectors', 'entity_id', 'entity_type'); + $topMatches = $query->get(); + + $this->entityLoader->loadIntoRelations($topMatches->all(), 'entity', true); + + $results = []; + + foreach ($topMatches as $match) { + if ($match->relationLoaded('entity')) { + $results[] = new VectorSearchResult( + $match->getRelation('entity'), + $match->getAttribute('distance'), + $match->getAttribute('text'), + ); + } + } + + return $results; + } +} diff --git a/app/Search/SearchController.php b/app/Search/SearchController.php index 6ae54b393..9050f65f5 100644 --- a/app/Search/SearchController.php +++ b/app/Search/SearchController.php @@ -6,7 +6,7 @@ use BookStack\Entities\Queries\PageQueries; use BookStack\Entities\Queries\QueryPopular; use BookStack\Entities\Tools\SiblingFetcher; use BookStack\Http\Controller; -use BookStack\Search\Vectors\VectorSearchRunner; +use BookStack\Search\Queries\VectorSearchRunner; use Illuminate\Http\Request; class SearchController extends Controller diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php index 569fddc73..aaee97fe7 100644 --- a/app/Search/SearchIndex.php +++ b/app/Search/SearchIndex.php @@ -6,8 +6,8 @@ use BookStack\Activity\Models\Tag; use BookStack\Entities\EntityProvider; use BookStack\Entities\Models\Entity; use BookStack\Entities\Models\Page; -use BookStack\Search\Vectors\StoreEntityVectorsJob; -use BookStack\Search\Vectors\VectorQueryServiceProvider; +use BookStack\Search\Queries\StoreEntityVectorsJob; +use BookStack\Search\Queries\VectorQueryServiceProvider; use BookStack\Util\HtmlDocument; use DOMNode; use Illuminate\Database\Eloquent\Builder; diff --git a/app/Search/Vectors/SearchVector.php b/app/Search/Vectors/SearchVector.php deleted file mode 100644 index 4a5555f87..000000000 --- a/app/Search/Vectors/SearchVector.php +++ /dev/null @@ -1,16 +0,0 @@ -vectorQueryServiceProvider->get(); - $queryVector = $queryService->generateEmbeddings($query); - - // TODO - Apply permissions - // TODO - Join models - $topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id') - ->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance') - ->orderBy('distance', 'asc') - ->having('distance', '<', 0.6) - ->limit(10) - ->get(); - - $matchesText = array_values(array_map(fn (SearchVector $match) => $match->text, $topMatches->all())); - $llmResult = $queryService->query($query, $matchesText); - - return [ - 'llm_result' => $llmResult, - 'entity_matches' => $topMatches->toArray() - ]; - } -} diff --git a/routes/web.php b/routes/web.php index 9982c044a..d27855100 100644 --- a/routes/web.php +++ b/routes/web.php @@ -11,7 +11,7 @@ use BookStack\Exports\Controllers as ExportControllers; use BookStack\Http\Middleware\VerifyCsrfToken; use BookStack\Permissions\PermissionsController; use BookStack\References\ReferenceController; -use BookStack\Search\QueryController; +use BookStack\Search\Queries\QueryController; use BookStack\Search\SearchController; use BookStack\Settings as SettingControllers; use BookStack\Sorting as SortingControllers; @@ -199,6 +199,7 @@ Route::middleware('auth')->group(function () { // Queries Route::get('/query', [QueryController::class, 'show']); + Route::get('/query/run', [QueryController::class, 'run']); // TODO - Development only, remove Route::post('/query', [QueryController::class, 'run']); // User Search diff --git a/tests/Search/TextChunkerTest.php b/tests/Search/TextChunkerTest.php index f78bf11a4..c742c4a64 100644 --- a/tests/Search/TextChunkerTest.php +++ b/tests/Search/TextChunkerTest.php @@ -2,7 +2,7 @@ namespace Search; -use BookStack\Search\Vectors\TextChunker; +use BookStack\Search\Queries\TextChunker; use Tests\TestCase; class TextChunkerTest extends TestCase