Skip to content

Commit dc8adeb

Browse files
committed
new API for cpu-accel
1 parent 9899f8e commit dc8adeb

File tree

8 files changed

+1916
-1784
lines changed

8 files changed

+1916
-1784
lines changed

Package.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,5 +87,5 @@ let package = Package(
8787
linkerSettings: linkerSettings
8888
)
8989
],
90-
cxxLanguageStandard: .cxx11
90+
cxxLanguageStandard: .cxx17
9191
)

ggml/include/ggml-cpu.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ extern "C" {
111111
ggml_vec_dot_t vec_dot;
112112
enum ggml_type vec_dot_type;
113113
int64_t nrows; // number of rows to process simultaneously
114-
int64_t ncols; // number of columns to process simultaneously
115114
};
116115

117116
GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);

ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

Lines changed: 1691 additions & 1542 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cpu/ggml-cpu-aarch64.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99
extern "C" {
1010
#endif
1111

12-
#ifdef GGML_USE_CPU_AARCH64
1312
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
14-
#endif
1513

1614
#ifdef __cplusplus
1715
}

ggml/src/ggml-cpu/ggml-cpu-traits.cpp

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,45 @@
33
#include "ggml-backend-impl.h"
44

55
namespace ggml::cpu {
6-
tensor_traits::~tensor_traits(){};
6+
tensor_traits::~tensor_traits() {}
77
extra_buffer_type::~extra_buffer_type() {}
88
}
99

10-
const struct ggml_cpu_tensor_traits* ggml_cpu_get_tensor_traits(
11-
const struct ggml_tensor * tensor)
12-
{
13-
// host buffer can't be repacked!
14-
if (tensor->buffer && !ggml_backend_buft_is_host(tensor->buffer->buft)) {
15-
return (struct ggml_cpu_tensor_traits*)tensor->extra;
10+
bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) {
11+
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
12+
if (extra && extra->context) {
13+
auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
14+
auto tensor_traits = buf_extra->get_tensor_traits(op);
15+
if (tensor_traits && tensor_traits->compute_forward(params, op)) {
16+
return true;
17+
}
18+
}
1619
}
17-
return nullptr;
20+
/*
21+
auto tensor_traits = ggml::cpu::get_tensor_traits(op);
22+
if (tensor_traits) {
23+
return tensor_traits->compute_forward(params, op);
24+
}
25+
*/
26+
return false;
27+
}
28+
29+
bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size) {
30+
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
31+
if (extra && extra->context) {
32+
auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
33+
auto tensor_traits = buf_extra->get_tensor_traits(op);
34+
if (tensor_traits && tensor_traits->work_size(n_threads, op, *size)) {
35+
return true;
36+
}
37+
}
38+
}
39+
/*
40+
auto tensor_traits = ggml::cpu::get_tensor_traits(op);
41+
if (tensor_traits) {
42+
*size = tensor_traits->work_size(n_threads, op);
43+
return true;
44+
}
45+
*/
46+
return false;
1847
}

ggml/src/ggml-cpu/ggml-cpu-traits.h

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,50 +3,48 @@
33
#include "ggml-backend-impl.h"
44

55
#ifdef __cplusplus
6+
#include <vector>
7+
extern "C" {
8+
#endif
9+
struct ggml_compute_params {
10+
// ith = thread index, nth = number of threads
11+
int ith, nth;
12+
13+
// work buffer for all threads
14+
size_t wsize;
15+
void * wdata;
16+
17+
struct ggml_threadpool * threadpool;
18+
};
19+
20+
// implementé dans ggml-cpu.c pas forcement a sa place ici.
21+
void ggml_barrier(struct ggml_threadpool * tp);
22+
23+
// return true if op part of extra "accelerator"
24+
bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op);
25+
bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size);
26+
27+
#ifdef __cplusplus
28+
}
629
namespace ggml::cpu {
730
// enregistré dans tensor->extra
831
class tensor_traits {
932
public:
1033
~tensor_traits();
11-
virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) = 0;
34+
virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size) = 0;
35+
virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0;
1236
};
1337

14-
// ou mettre ca?
1538
class extra_buffer_type {
1639
public:
1740
~extra_buffer_type();
1841
virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
42+
virtual tensor_traits* get_tensor_traits(const struct ggml_tensor * op) = 0;
1943
};
44+
2045
}
21-
extern "C" {
22-
// #else
23-
#endif
24-
// a t'on besoin d'un mapping C?
25-
//bool ggml_cpu_extra_compute_forward(void* extra, struct ggml_compute_params * params, struct ggml_tensor * tensor);
26-
27-
// @ transferer en methode privée pour cpu_aarch64
28-
typedef int (*ggml_repack_t) (struct ggml_tensor *t, int interleave_block, const void * GGML_RESTRICT data,
29-
size_t data_size);
30-
typedef void (*ggml_from_float_to_mat_t)
31-
(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
32-
typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
33-
const void * GGML_RESTRICT y, int nr, int nc);
34-
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
35-
const void * GGML_RESTRICT y, int nr, int nc);
36-
37-
struct ggml_cpu_tensor_traits {
38-
ggml_repack_t repack;
39-
int64_t blck_size_interleave; // + interleave elements in blocks
40-
ggml_from_float_to_mat_t from_float_to_mat; // + mis sur le vec_dot_type ... quantize_mat_q8_0
41-
enum ggml_type vec_dot_type; // +
42-
int64_t nrows; // ? number of rows to process simultaneously
43-
int64_t ncols; // ? number of columns to process simultaneously
44-
ggml_gemv_t gemv; // +
45-
ggml_gemm_t gemm; // +
46-
};
4746

48-
const struct ggml_cpu_tensor_traits* ggml_cpu_get_tensor_traits(const struct ggml_tensor * tensor);
47+
// implementé dans ggml-cpu.cpp.
48+
std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type();
4949

50-
#ifdef __cplusplus
51-
}
5250
#endif

0 commit comments

Comments
 (0)