]> BookStack Code Mirror - bookstack/commitdiff
Vectors: Got basic LLM querying working using vector search context
authorDan Brown <redacted>
Mon, 24 Mar 2025 19:51:48 +0000 (19:51 +0000)
committerDan Brown <redacted>
Mon, 24 Mar 2025 19:51:48 +0000 (19:51 +0000)
app/Search/SearchController.php
app/Search/Vectors/EntityVectorGenerator.php
app/Search/Vectors/Services/OpenAiVectorQueryService.php
app/Search/Vectors/Services/VectorQueryService.php
app/Search/Vectors/VectorSearchRunner.php [new file with mode: 0644]
database/migrations/2025_03_24_155748_create_search_vectors_table.php
resources/views/search/query.blade.php [new file with mode: 0644]
routes/web.php

index 2fce6a3d53fb86e14e1b773f4126f0b1ba456fb2..a688385e7c37d43f90e1084e8415d37bffea80e6 100644 (file)
@@ -6,6 +6,7 @@ use BookStack\Entities\Queries\PageQueries;
 use BookStack\Entities\Queries\QueryPopular;
 use BookStack\Entities\Tools\SiblingFetcher;
 use BookStack\Http\Controller;
+use BookStack\Search\Vectors\VectorSearchRunner;
 use Illuminate\Http\Request;
 
 class SearchController extends Controller
@@ -139,4 +140,19 @@ class SearchController extends Controller
 
         return view('entities.list-basic', ['entities' => $entities, 'style' => 'compact']);
     }
+
+    public function searchQuery(Request $request, VectorSearchRunner $runner)
+    {
+        $query = $request->get('query', '');
+
+        if ($query) {
+            $results = $runner->run($query);
+        } else {
+            $results = null;
+        }
+
+        return view('search.query', [
+            'results' => $results,
+        ]);
+    }
 }
index 8a49187736b43d64fea1723ebc3f056f463ba067..9563694a321838539a0a9bf7c7daff5c97a172bc 100644 (file)
@@ -42,7 +42,7 @@ class EntityVectorGenerator
             $toInsert[] = [
                 'entity_id' => $entity->id,
                 'entity_type' => $entity->getMorphClass(),
-                'embedding' => DB::raw('STRING_TO_VECTOR("[' . implode(',', $embedding) . ']")'),
+                'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'),
                 'text' => $text,
             ];
         }
index 8d291099846be3e3a3c9523903bd78961e0f882c..e0e145f3ad7cd03908ba1de2133ebbb593640edd 100644 (file)
@@ -33,4 +33,25 @@ class OpenAiVectorQueryService implements VectorQueryService
 
         return $response['data'][0]['embedding'];
     }
+
+    public function query(string $input, array $context): string
+    {
+        $formattedContext = implode("\n", $context);
+
+        $response = $this->jsonRequest('POST', 'v1/chat/completions', [
+            'model' => 'gpt-4o',
+            'messages' => [
+                [
+                    'role' => 'developer',
+                    'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response.'
+                ],
+                [
+                    'role' => 'user',
+                    'content' => "Provide a response to the below given QUERY using the below given CONTEXT\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
+                ]
+            ],
+        ]);
+
+        return $response['choices'][0]['message']['content'] ?? '';
+    }
 }
index 2cc4ed0178f4e8590b0eeb7f219ebf05b3480a66..746f95f5b227d8d690feccf5433727df97052fca 100644 (file)
@@ -9,4 +9,13 @@ interface VectorQueryService
      * @return float[]
      */
     public function generateEmbeddings(string $text): array;
+
+    /**
+     * Query the LLM service using the given user input, and
+     * relevant context text retrieved locally via a vector search.
+     * Returns the response output text from the LLM.
+     *
+     * @param string[] $context
+     */
+    public function query(string $input, array $context): string;
 }
diff --git a/app/Search/Vectors/VectorSearchRunner.php b/app/Search/Vectors/VectorSearchRunner.php
new file mode 100644 (file)
index 0000000..db28779
--- /dev/null
@@ -0,0 +1,33 @@
+<?php
+
+namespace BookStack\Search\Vectors;
+
+class VectorSearchRunner
+{
+    public function __construct(
+        protected VectorQueryServiceProvider $vectorQueryServiceProvider
+    ) {
+    }
+
+    public function run(string $query): array
+    {
+        $queryService = $this->vectorQueryServiceProvider->get();
+        $queryVector = $queryService->generateEmbeddings($query);
+
+        // TODO - Apply permissions
+        // TODO - Join models
+        $topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id')
+            ->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
+            ->orderBy('distance', 'asc')
+            ->limit(10)
+            ->get();
+
+        $matchesText = array_values(array_map(fn (SearchVector $match) => $match->text, $topMatches->all()));
+        $llmResult = $queryService->query($query, $matchesText);
+
+        return [
+            'llm_result' => $llmResult,
+            'entity_matches' => $topMatches->toArray()
+        ];
+    }
+}
index d7fb0118a2fc6b2476fdd2670b73fe7a4d13d2d3..1b552b22c9a69d24be61e1dd8c530ac6d52e7791 100644 (file)
@@ -16,10 +16,13 @@ return new class extends Migration
             $table->string('entity_type', 100);
             $table->integer('entity_id');
             $table->text('text');
-            $table->vector('embedding');
 
             $table->index(['entity_type', 'entity_id']);
         });
+
+        $table = DB::getTablePrefix() . 'search_vectors';
+        DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)");
+        DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine");
     }
 
     /**
diff --git a/resources/views/search/query.blade.php b/resources/views/search/query.blade.php
new file mode 100644 (file)
index 0000000..e8b4c84
--- /dev/null
@@ -0,0 +1,29 @@
+@extends('layouts.simple')
+
+@section('body')
+    <div class="container mt-xl" id="search-system">
+
+        <form action="{{ url('/search/query') }}" method="get">
+            <input name="query" type="text">
+            <button class="button">Query</button>
+        </form>
+
+        @if($results)
+            <h2>Results</h2>
+
+            <h3>LLM Output</h3>
+            <p>{{ $results['llm_result'] }}</p>
+
+            <h3>Entity Matches</h3>
+            @foreach($results['entity_matches'] as $match)
+                <div>
+                    <div><strong>{{ $match['entity_type'] }}:{{ $match['entity_id'] }}; Distance: {{ $match['distance'] }}</strong></div>
+                    <details>
+                        <summary>match text</summary>
+                         <div>{{ $match['text'] }}</div>
+                    </details>
+                </div>
+            @endforeach
+        @endif
+    </div>
+@stop
index 8184725834caae44f3dae98942d2bc981ec84406..15fe6d69b2d6e4ae810a748b897e3145246759b6 100644 (file)
@@ -187,6 +187,7 @@ Route::middleware('auth')->group(function () {
 
     // Search
     Route::get('/search', [SearchController::class, 'search']);
+    Route::get('/search/query', [SearchController::class, 'searchQuery']);
     Route::get('/search/book/{bookId}', [SearchController::class, 'searchBook']);
     Route::get('/search/chapter/{bookId}', [SearchController::class, 'searchChapter']);
     Route::get('/search/entity/siblings', [SearchController::class, 'searchSiblings']);