server: fix format_chat

ngxson · ngxson · commit c53b34d45752 · 2024-02-20T11:07:38.000+01:00
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -2390,7 +2390,6 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
                 invalid_param = true;
                 break;
             }
-            std::string value(argv[i]);
             if (!verify_custom_template(argv[i])) {
                 fprintf(stderr, "error: the supplied chat template is not supported: %s\n", argv[i]);
                 fprintf(stderr, "note: llama.cpp does not use jinja parser, we only support commonly used templates\n");
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
@@ -168,15 +168,15 @@ static T json_value(const json &body, const std::string &key, const T &default_v
 }
 
 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
-inline bool verify_custom_template(std::string tmpl) {
+inline bool verify_custom_template(const std::string & tmpl) {
     llama_chat_message chat[] = {{"user", "test"}};
     std::vector<char> buf(1);
     int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, buf.data(), buf.size());
     return res >= 0;
 }
 
 // Format given chat. If tmpl is empty, we take the template from model metadata
-inline std::string format_chat(const struct llama_model * model, const std::string tmpl, std::vector<json> messages)
+inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::vector<json> & messages)
 {
     size_t alloc_size = 0;
     // vector holding all allocated string to be passed to llama_chat_apply_template
@@ -185,11 +185,11 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
 
     for (size_t i = 0; i < messages.size(); ++i) {
         auto &curr_msg = messages[i];
-        str[i]          = json_value(curr_msg, "role",    std::string(""));
-        str[i + 1]      = json_value(curr_msg, "content", std::string(""));
-        alloc_size     += str[i + 1].length();
-        chat[i].role    = str[i].c_str();
-        chat[i].content = str[i + 1].c_str();
+        str[i*2 + 0]    = json_value(curr_msg, "role",    std::string(""));
+        str[i*2 + 1]    = json_value(curr_msg, "content", std::string(""));
+        alloc_size     += str[i*2 + 1].length();
+        chat[i].role    = str[i*2 + 0].c_str();
+        chat[i].content = str[i*2 + 1].c_str();
     }
 
     const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str();

Original file line number	Diff line number	Diff line change
`@@ -2390,7 +2390,6 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,`
`2390`	`2390`	`invalid_param = true;`
`2391`	`2391`	`break;`
`2392`	`2392`	`}`
`2393`		`- std::string value(argv[i]);`
`2394`	`2393`	`if (!verify_custom_template(argv[i])) {`
`2395`	`2394`	`fprintf(stderr, "error: the supplied chat template is not supported: %s\n", argv[i]);`
`2396`	`2395`	`fprintf(stderr, "note: llama.cpp does not use jinja parser, we only support commonly used templates\n");`