// Callback URL for social authentication methods
'callback_url' => env('APP_URL', false),
+ // LLM Service
+ // Options: openai
+ 'llm' => env('LLM_SERVICE', ''),
+
+ // OpenAI API-compatible service details
+ 'openai' => [
+ 'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
+ 'key' => env('OPENAI_KEY', ''),
+ ],
+
'github' => [
'client_id' => env('GITHUB_APP_ID', false),
'client_secret' => env('GITHUB_APP_SECRET', false),
use BookStack\Entities\EntityProvider;
use BookStack\Entities\Models\Entity;
use BookStack\Entities\Models\Page;
+use BookStack\Search\Vectors\StoreEntityVectorsJob;
+use BookStack\Search\Vectors\VectorQueryServiceProvider;
use BookStack\Util\HtmlDocument;
use DOMNode;
use Illuminate\Database\Eloquent\Builder;
public static string $softDelimiters = ".-";
public function __construct(
- protected EntityProvider $entityProvider
+ protected EntityProvider $entityProvider,
) {
}
$this->deleteEntityTerms($entity);
$terms = $this->entityToTermDataArray($entity);
$this->insertTerms($terms);
+
+ if (VectorQueryServiceProvider::isEnabled()) {
+ dispatch(new StoreEntityVectorsJob($entity));
+ }
}
/**
public function indexEntities(array $entities): void
{
$terms = [];
+ $vectorQueryEnabled = VectorQueryServiceProvider::isEnabled();
+
foreach ($entities as $entity) {
$entityTerms = $this->entityToTermDataArray($entity);
array_push($terms, ...$entityTerms);
+
+ if ($vectorQueryEnabled) {
+ dispatch(new StoreEntityVectorsJob($entity));
+ }
}
$this->insertTerms($terms);
--- /dev/null
+<?php
+
+namespace BookStack\Search\Vectors;
+
+use BookStack\Entities\Models\Entity;
+use BookStack\Search\Vectors\Services\VectorQueryService;
+use Illuminate\Support\Facades\DB;
+
+class EntityVectorGenerator
+{
+ public function __construct(
+ protected VectorQueryServiceProvider $vectorQueryServiceProvider
+ ) {
+ }
+
+ public function generateAndStore(Entity $entity): void
+ {
+ $vectorService = $this->vectorQueryServiceProvider->get();
+
+ $text = $this->entityToPlainText($entity);
+ $chunks = $this->chunkText($text);
+ $embeddings = $this->chunksToEmbeddings($chunks, $vectorService);
+
+ $this->deleteExistingEmbeddingsForEntity($entity);
+ $this->storeEmbeddings($embeddings, $chunks, $entity);
+ }
+
+ protected function deleteExistingEmbeddingsForEntity(Entity $entity): void
+ {
+ SearchVector::query()
+ ->where('entity_type', '=', $entity->getMorphClass())
+ ->where('entity_id', '=', $entity->id)
+ ->delete();
+ }
+
+ protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void
+ {
+ $toInsert = [];
+
+ foreach ($embeddings as $index => $embedding) {
+ $text = $textChunks[$index];
+ $toInsert[] = [
+ 'entity_id' => $entity->id,
+ 'entity_type' => $entity->getMorphClass(),
+ 'embedding' => DB::raw('STRING_TO_VECTOR("[' . implode(',', $embedding) . ']")'),
+ 'text' => $text,
+ ];
+ }
+
+ // TODO - Chunk inserts
+ SearchVector::query()->insert($toInsert);
+ }
+
+ /**
+ * @param string[] $chunks
+ * @return float[] array
+ */
+ protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
+ {
+ $embeddings = [];
+ foreach ($chunks as $index => $chunk) {
+ $embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk);
+ }
+ return $embeddings;
+ }
+
+ /**
+ * @return string[]
+ */
+ protected function chunkText(string $text): array
+ {
+ // TODO - Join adjacent smaller chunks up
+ return array_filter(array_map(function (string $section): string {
+ return trim($section);
+ }, explode("\n", $text)));
+ }
+
+ protected function entityToPlainText(Entity $entity): string
+ {
+ $text = $entity->name . "\n\n" . $entity->{$entity->textField};
+ // TODO - Add tags
+ return $text;
+ }
+}
--- /dev/null
+<?php
+
+namespace BookStack\Search\Vectors;
+
+use Illuminate\Database\Eloquent\Model;
+
+/**
+ * @property string $entity_type
+ * @property int $entity_id
+ * @property string $text
+ * @property string $embedding
+ */
+class SearchVector extends Model
+{
+ public $timestamps = false;
+}
--- /dev/null
+<?php
+
+namespace BookStack\Search\Vectors\Services;
+
+use BookStack\Http\HttpRequestService;
+
+class OpenAiVectorQueryService implements VectorQueryService
+{
+ public function __construct(
+ protected string $endpoint,
+ protected string $key,
+ protected HttpRequestService $http,
+ ) {
+ }
+
+ protected function jsonRequest(string $method, string $uri, array $data): array
+ {
+ $fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
+ $client = $this->http->buildClient(10);
+ $request = $this->http->jsonRequest($method, $fullUrl, $data)
+ ->withHeader('Authorization', 'Bearer ' . $this->key);
+
+ $response = $client->sendRequest($request);
+ return json_decode($response->getBody()->getContents(), true);
+ }
+
+ public function generateEmbeddings(string $text): array
+ {
+ $response = $this->jsonRequest('POST', 'v1/embeddings', [
+ 'input' => $text,
+ 'model' => 'text-embedding-3-small',
+ ]);
+
+ return $response['data'][0]['embedding'];
+ }
+}
--- /dev/null
+<?php
+
+namespace BookStack\Search\Vectors\Services;
+
+interface VectorQueryService
+{
+ /**
+ * Generate embedding vectors from the given chunk of text.
+ * @return float[]
+ */
+ public function generateEmbeddings(string $text): array;
+}
--- /dev/null
+<?php
+
+namespace BookStack\Search\Vectors;
+
+use BookStack\Entities\Models\Entity;
+use Illuminate\Contracts\Queue\ShouldQueue;
+use Illuminate\Foundation\Queue\Queueable;
+
+class StoreEntityVectorsJob implements ShouldQueue
+{
+ use Queueable;
+
+ /**
+ * Create a new job instance.
+ */
+ public function __construct(
+ protected Entity $entity
+ ) {
+ }
+
+ /**
+ * Execute the job.
+ */
+ public function handle(EntityVectorGenerator $generator): void
+ {
+ $generator->generateAndStore($this->entity);
+ }
+}
--- /dev/null
+<?php
+
+namespace BookStack\Search\Vectors;
+
+use BookStack\Http\HttpRequestService;
+use BookStack\Search\Vectors\Services\OpenAiVectorQueryService;
+use BookStack\Search\Vectors\Services\VectorQueryService;
+
+class VectorQueryServiceProvider
+{
+ public function __construct(
+ protected HttpRequestService $http,
+ ) {
+ }
+
+ public function get(): VectorQueryService
+ {
+ $service = $this->getServiceName();
+
+ if ($service === 'openai') {
+ $key = config('services.openai.key');
+ $endpoint = config('services.openai.endpoint');
+ return new OpenAiVectorQueryService($endpoint, $key, $this->http);
+ }
+
+ throw new \Exception("No '{$service}' LLM service found");
+ }
+
+ protected static function getServiceName(): string
+ {
+ return strtolower(config('services.llm'));
+ }
+
+ public static function isEnabled(): bool
+ {
+ return !empty(static::getServiceName());
+ }
+}
--- /dev/null
+<?php
+
+use Illuminate\Database\Migrations\Migration;
+use Illuminate\Database\Schema\Blueprint;
+use Illuminate\Support\Facades\Schema;
+
+return new class extends Migration
+{
+ /**
+ * Run the migrations.
+ */
+ public function up(): void
+ {
+ // TODO - Handle compatibility with older databases that don't support vectors
+ Schema::create('search_vectors', function (Blueprint $table) {
+ $table->string('entity_type', 100);
+ $table->integer('entity_id');
+ $table->text('text');
+ $table->vector('embedding');
+
+ $table->index(['entity_type', 'entity_id']);
+ });
+ }
+
+ /**
+ * Reverse the migrations.
+ */
+ public function down(): void
+ {
+ Schema::dropIfExists('search_vectors');
+ }
+};