22
22
23
23
#include " ggml-rpc.h"
24
24
#ifdef _WIN32
25
+ # define NOMINMAX
25
26
# define DIRECTORY_SEPARATOR ' \\ '
26
27
# include < locale>
27
28
# include < windows.h>
37
38
#include < stdio.h>
38
39
#include < vector>
39
40
#include < filesystem>
41
+ #include < algorithm>
42
+ #include < thread>
40
43
41
44
namespace fs = std::filesystem;
42
45
@@ -150,12 +153,14 @@ struct rpc_server_params {
150
153
int port = 50052 ;
151
154
size_t backend_mem = 0 ;
152
155
bool use_cache = false ;
156
+ int n_threads = std::max(1U , std::thread::hardware_concurrency()/2 );
153
157
};
154
158
155
159
static void print_usage (int /* argc*/ , char ** argv, rpc_server_params params) {
156
160
fprintf (stderr, " Usage: %s [options]\n\n " , argv[0 ]);
157
161
fprintf (stderr, " options:\n " );
158
162
fprintf (stderr, " -h, --help show this help message and exit\n " );
163
+ fprintf (stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n " , params.n_threads );
159
164
fprintf (stderr, " -H HOST, --host HOST host to bind to (default: %s)\n " , params.host .c_str ());
160
165
fprintf (stderr, " -p PORT, --port PORT port to bind to (default: %d)\n " , params.port );
161
166
fprintf (stderr, " -m MEM, --mem MEM backend memory size (in MB)\n " );
@@ -172,6 +177,15 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
172
177
return false ;
173
178
}
174
179
params.host = argv[i];
180
+ } else if (arg == " -t" || arg == " --threads" ) {
181
+ if (++i >= argc) {
182
+ return false ;
183
+ }
184
+ params.n_threads = std::stoi (argv[i]);
185
+ if (params.n_threads <= 0 ) {
186
+ fprintf (stderr, " error: invalid number of threads: %d\n " , params.n_threads );
187
+ return false ;
188
+ }
175
189
} else if (arg == " -p" || arg == " --port" ) {
176
190
if (++i >= argc) {
177
191
return false ;
@@ -199,7 +213,7 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
199
213
return true ;
200
214
}
201
215
202
- static ggml_backend_t create_backend () {
216
+ static ggml_backend_t create_backend (const rpc_server_params & params ) {
203
217
ggml_backend_t backend = NULL ;
204
218
#ifdef GGML_USE_CUDA
205
219
fprintf (stderr, " %s: using CUDA backend\n " , __func__);
@@ -231,6 +245,7 @@ static ggml_backend_t create_backend() {
231
245
if (!backend) {
232
246
fprintf (stderr, " %s: using CPU backend\n " , __func__);
233
247
backend = ggml_backend_cpu_init ();
248
+ ggml_backend_cpu_set_n_threads (backend, params.n_threads );
234
249
}
235
250
return backend;
236
251
}
@@ -275,7 +290,7 @@ int main(int argc, char * argv[]) {
275
290
fprintf (stderr, " \n " );
276
291
}
277
292
278
- ggml_backend_t backend = create_backend ();
293
+ ggml_backend_t backend = create_backend (params );
279
294
if (!backend) {
280
295
fprintf (stderr, " Failed to create backend\n " );
281
296
return 1 ;
0 commit comments