@@ -606,6 +606,9 @@ struct clip_ctx {
606
606
607
607
std::vector<uint8_t > buf_compute_meta;
608
608
609
+ std::vector<ggml_backend_t > backend_ptrs;
610
+ std::vector<ggml_backend_buffer_type_t > backend_buft;
611
+
609
612
ggml_backend_t backend = nullptr ;
610
613
ggml_backend_t backend_cpu = nullptr ;
611
614
ggml_backend_buffer_t buf = nullptr ;
@@ -614,6 +617,29 @@ struct clip_ctx {
614
617
615
618
struct clip_image_size * load_image_size;
616
619
620
+ clip_ctx (clip_context_params & ctx_params) {
621
+ backend_cpu = ggml_backend_init_by_type (GGML_BACKEND_DEVICE_TYPE_CPU, nullptr );
622
+ backend = ctx_params.use_gpu
623
+ ? ggml_backend_init_by_type (GGML_BACKEND_DEVICE_TYPE_GPU, nullptr )
624
+ : nullptr ;
625
+
626
+ if (backend) {
627
+ LOG_INF (" %s: CLIP using %s backend\n " , __func__, ggml_backend_name (backend));
628
+ backend_ptrs.push_back (backend);
629
+ backend_buft.push_back (ggml_backend_get_default_buffer_type (backend));
630
+ } else {
631
+ backend = backend_cpu;
632
+ LOG_INF (" %s: CLIP using CPU backend\n " , __func__);
633
+ }
634
+
635
+ backend_ptrs.push_back (backend_cpu);
636
+ backend_buft.push_back (ggml_backend_get_default_buffer_type (backend_cpu));
637
+
638
+ sched.reset (
639
+ ggml_backend_sched_new (backend_ptrs.data (), backend_buft.data (), backend_ptrs.size (), 8192 , false )
640
+ );
641
+ }
642
+
617
643
~clip_ctx () {
618
644
if (ctx_data) {
619
645
ggml_free (ctx_data);
@@ -1304,7 +1330,7 @@ struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_p
1304
1330
}
1305
1331
}
1306
1332
1307
- clip_ctx * new_clip = new clip_ctx{} ;
1333
+ clip_ctx * new_clip = new clip_ctx (ctx_params) ;
1308
1334
1309
1335
// update projector type
1310
1336
{
@@ -1323,53 +1349,6 @@ struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_p
1323
1349
}
1324
1350
}
1325
1351
1326
- std::vector<ggml_backend_buffer_type_t > backend_buft;
1327
- std::vector<ggml_backend_t > backend_ptrs;
1328
-
1329
- new_clip->backend_cpu = ggml_backend_cpu_init ();
1330
-
1331
- if (ctx_params.use_gpu ) {
1332
- #ifdef GGML_USE_CUDA
1333
- new_clip->backend = ggml_backend_cuda_init (0 );
1334
- LOG_INF (" %s: CLIP using CUDA backend\n " , __func__);
1335
- #endif
1336
-
1337
- #ifdef GGML_USE_METAL
1338
- new_clip->backend = ggml_backend_metal_init ();
1339
- LOG_INF (" %s: CLIP using Metal backend\n " , __func__);
1340
- #endif
1341
-
1342
- #ifdef GGML_USE_CANN
1343
- new_clip->backend = ggml_backend_cann_init (0 );
1344
- LOG_INF (" %s: CLIP using CANN backend\n " , __func__);
1345
- #endif
1346
-
1347
- #ifdef GGML_USE_VULKAN
1348
- new_clip->backend = ggml_backend_vk_init (0 );
1349
- LOG_INF (" %s: CLIP using Vulkan backend\n " , __func__);
1350
- #endif
1351
-
1352
- #ifdef GGML_USE_SYCL
1353
- new_clip->backend = ggml_backend_sycl_init (0 );
1354
- LOG_INF (" %s: CLIP using SYCL backend\n " , __func__);
1355
- #endif
1356
- }
1357
-
1358
- if (new_clip->backend ) {
1359
- backend_ptrs.push_back (new_clip->backend );
1360
- backend_buft.push_back (ggml_backend_get_default_buffer_type (new_clip->backend ));
1361
- } else {
1362
- new_clip->backend = new_clip->backend_cpu ;
1363
- LOG_INF (" %s: CLIP using CPU backend\n " , __func__);
1364
- }
1365
-
1366
- backend_ptrs.push_back (new_clip->backend_cpu );
1367
- backend_buft.push_back (ggml_backend_get_default_buffer_type (new_clip->backend_cpu ));
1368
-
1369
- new_clip->sched .reset (
1370
- ggml_backend_sched_new (backend_ptrs.data (), backend_buft.data (), backend_ptrs.size (), 8192 , false )
1371
- );
1372
-
1373
1352
// model size and capabilities
1374
1353
{
1375
1354
int idx = get_key_idx (ctx, KEY_HAS_TEXT_ENC);
@@ -1771,9 +1750,9 @@ struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_p
1771
1750
batch.data = nullptr ;
1772
1751
ggml_cgraph * gf = clip_image_build_graph (new_clip, &batch, nullptr , false );
1773
1752
ggml_backend_sched_reserve (new_clip->sched .get (), gf);
1774
- for (size_t i = 0 ; i < backend_ptrs.size (); ++i) {
1775
- ggml_backend_t backend = backend_ptrs[i];
1776
- ggml_backend_buffer_type_t buft = backend_buft[i];
1753
+ for (size_t i = 0 ; i < new_clip-> backend_ptrs .size (); ++i) {
1754
+ ggml_backend_t backend = new_clip-> backend_ptrs [i];
1755
+ ggml_backend_buffer_type_t buft = new_clip-> backend_buft [i];
1777
1756
size_t size = ggml_backend_sched_get_buffer_size (new_clip->sched .get (), backend);
1778
1757
if (size > 1 ) {
1779
1758
LOG_INF (" %s: %10s compute buffer size = %8.2f MiB\n " , __func__,
0 commit comments