Skip to content

Commit a38b884

Browse files
authored
cli: add EOT when user hit Ctrl+C (#8296)
* main: add need_insert_eot * do not format system prompt if it is empty
1 parent d7fd29f commit a38b884

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

common/common.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,7 +1394,9 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
13941394
options.push_back({ "*", " --keep N", "number of tokens to keep from the initial prompt (default: %d, -1 = all)", params.n_keep });
13951395
options.push_back({ "*", " --chunks N", "max number of chunks to process (default: %d, -1 = all)", params.n_chunks });
13961396
options.push_back({ "*", "-fa, --flash-attn", "enable Flash Attention (default: %s)", params.flash_attn ? "enabled" : "disabled" });
1397-
options.push_back({ "*", "-p, --prompt PROMPT", "prompt to start generation with (default: '%s')", params.prompt.c_str() });
1397+
options.push_back({ "*", "-p, --prompt PROMPT", "prompt to start generation with\n"
1398+
"in conversation mode, this will be used as system prompt\n"
1399+
"(default: '%s')", params.prompt.c_str() });
13981400
options.push_back({ "*", "-f, --file FNAME", "a file containing the prompt (default: none)" });
13991401
options.push_back({ "*", " --in-file FNAME", "an input file (repeat to specify multiple files)" });
14001402
options.push_back({ "*", "-bf, --binary-file FNAME", "binary file containing the prompt (default: none)" });
@@ -1409,7 +1411,9 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
14091411
"halt generation at PROMPT, return control in interactive mode\n"
14101412
"can be specified more than once for multiple prompts" });
14111413
options.push_back({ "main", "-sp, --special", "special tokens output enabled (default: %s)", params.special ? "true" : "false" });
1412-
options.push_back({ "main", "-cnv, --conversation", "run in conversation mode (does not print special tokens and suffix/prefix, use default chat template) (default: %s)", params.conversation ? "true" : "false" });
1414+
options.push_back({ "main", "-cnv, --conversation", "run in conversation mode, does not print special tokens and suffix/prefix\n"
1415+
"if suffix/prefix are not specified, default chat template will be used\n"
1416+
"(default: %s)", params.conversation ? "true" : "false" });
14131417
options.push_back({ "main infill", "-i, --interactive", "run in interactive mode (default: %s)", params.interactive ? "true" : "false" });
14141418
options.push_back({ "main infill", "-if, --interactive-first", "run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false" });
14151419
options.push_back({ "main infill", "-mli, --multiline-input", "allows you to write or paste multiple lines without ending each in '\\'" });
@@ -1453,6 +1457,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
14531457
options.push_back({ "main", " --cfg-scale N", "strength of guidance (default: %.1f, 1.0 = disable)", (double)sparams.cfg_scale });
14541458
options.push_back({ "main", " --chat-template JINJA_TEMPLATE",
14551459
"set custom jinja chat template (default: template taken from model's metadata)\n"
1460+
"if suffix/prefix are specified, template will be disabled\n"
14561461
"only commonly used templates are accepted:\n"
14571462
"https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" });
14581463
options.push_back({ "grammar" });

examples/main/main.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ static gpt_params * g_params;
3737
static std::vector<llama_token> * g_input_tokens;
3838
static std::ostringstream * g_output_ss;
3939
static std::vector<llama_token> * g_output_tokens;
40-
static bool is_interacting = false;
40+
static bool is_interacting = false;
41+
static bool need_insert_eot = false;
4142

4243
static bool file_exists(const std::string & path) {
4344
std::ifstream f(path.c_str());
@@ -99,7 +100,8 @@ static void write_logfile(
99100
static void sigint_handler(int signo) {
100101
if (signo == SIGINT) {
101102
if (!is_interacting && g_params->interactive) {
102-
is_interacting = true;
103+
is_interacting = true;
104+
need_insert_eot = true;
103105
} else {
104106
console::cleanup();
105107
printf("\n");
@@ -224,7 +226,14 @@ int main(int argc, char ** argv) {
224226
__func__, n_ctx_train, n_ctx);
225227
}
226228

227-
LOG_TEE("%s: chat template example: %s\n", __func__, llama_chat_format_example(model, params.chat_template).c_str());
229+
// print chat template example in conversation mode
230+
if (params.conversation) {
231+
if (params.enable_chat_template) {
232+
LOG_TEE("%s: chat template example: %s\n", __func__, llama_chat_format_example(model, params.chat_template).c_str());
233+
} else {
234+
LOG_TEE("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__);
235+
}
236+
}
228237

229238
// print system information
230239
{
@@ -263,7 +272,7 @@ int main(int argc, char ** argv) {
263272
std::vector<llama_token> embd_inp;
264273

265274
{
266-
auto prompt = (params.conversation && params.enable_chat_template)
275+
auto prompt = (params.conversation && params.enable_chat_template && !params.prompt.empty())
267276
? chat_add_and_format(model, chat_msgs, "system", params.prompt) // format the system prompt in conversation mode
268277
: params.prompt;
269278
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
@@ -905,6 +914,13 @@ int main(int argc, char ** argv) {
905914

906915
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
907916

917+
// if user stop generation mid-way, we must add EOT to finish model's last response
918+
if (need_insert_eot && format_chat) {
919+
llama_token eot = llama_token_eot(model);
920+
embd_inp.push_back(eot == -1 ? llama_token_eos(model) : eot);
921+
need_insert_eot = false;
922+
}
923+
908924
embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end());
909925
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
910926
embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());

0 commit comments

Comments
 (0)