]> BookStack Code Mirror - bookstack/blob - app/Search/Vectors/EntityVectorGenerator.php
Vectors: Got basic LLM querying working using vector search context
[bookstack] / app / Search / Vectors / EntityVectorGenerator.php
1 <?php
2
3 namespace BookStack\Search\Vectors;
4
5 use BookStack\Entities\Models\Entity;
6 use BookStack\Search\Vectors\Services\VectorQueryService;
7 use Illuminate\Support\Facades\DB;
8
9 class EntityVectorGenerator
10 {
11     public function __construct(
12         protected VectorQueryServiceProvider $vectorQueryServiceProvider
13     ) {
14     }
15
16     public function generateAndStore(Entity $entity): void
17     {
18         $vectorService = $this->vectorQueryServiceProvider->get();
19
20         $text = $this->entityToPlainText($entity);
21         $chunks = $this->chunkText($text);
22         $embeddings = $this->chunksToEmbeddings($chunks, $vectorService);
23
24         $this->deleteExistingEmbeddingsForEntity($entity);
25         $this->storeEmbeddings($embeddings, $chunks, $entity);
26     }
27
28     protected function deleteExistingEmbeddingsForEntity(Entity $entity): void
29     {
30         SearchVector::query()
31             ->where('entity_type', '=', $entity->getMorphClass())
32             ->where('entity_id', '=', $entity->id)
33             ->delete();
34     }
35
36     protected function storeEmbeddings(array $embeddings, array $textChunks, Entity $entity): void
37     {
38         $toInsert = [];
39
40         foreach ($embeddings as $index => $embedding) {
41             $text = $textChunks[$index];
42             $toInsert[] = [
43                 'entity_id' => $entity->id,
44                 'entity_type' => $entity->getMorphClass(),
45                 'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'),
46                 'text' => $text,
47             ];
48         }
49
50         // TODO - Chunk inserts
51         SearchVector::query()->insert($toInsert);
52     }
53
54     /**
55      * @param string[] $chunks
56      * @return float[] array
57      */
58     protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
59     {
60         $embeddings = [];
61         foreach ($chunks as $index => $chunk) {
62             $embeddings[$index] = $vectorQueryService->generateEmbeddings($chunk);
63         }
64         return $embeddings;
65     }
66
67     /**
68      * @return string[]
69      */
70     protected function chunkText(string $text): array
71     {
72         // TODO - Join adjacent smaller chunks up
73         return array_filter(array_map(function (string $section): string {
74             return trim($section);
75         }, explode("\n", $text)));
76     }
77
78     protected function entityToPlainText(Entity $entity): string
79     {
80         $text = $entity->name . "\n\n" . $entity->{$entity->textField};
81         // TODO - Add tags
82         return $text;
83     }
84 }