Skip to content

Commit 8852de3

Browse files
committed
server: ensure client request cannot override n_predict if set
1 parent cf7137e commit 8852de3

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

examples/server/server.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ struct llama_client_slot
158158
int32_t n_decoded = 0;
159159
int32_t n_remaining = -1;
160160
int32_t i_batch = -1;
161+
int32_t n_predict = -1;
161162

162163
int32_t num_prompt_tokens = 0;
163164
int32_t num_prompt_tokens_processed = 0;
@@ -409,6 +410,7 @@ struct llama_server_context
409410

410411
slot.id = i;
411412
slot.n_ctx = n_ctx_slot;
413+
slot.n_predict = params.n_predict;
412414

413415
LOG_TEE(" -> Slot %i - max context: %i\n", slot.id, n_ctx_slot);
414416

@@ -545,6 +547,15 @@ struct llama_server_context
545547
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
546548
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
547549

550+
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
551+
// Might be better to reject the request with a 400 ?
552+
LOG_WARNING("Max tokens to predict exceeds server configuration", {
553+
{"params.n_predict", slot->params.n_predict},
554+
{"slot.n_predict", slot->n_predict},
555+
});
556+
slot->params.n_predict = slot->n_predict;
557+
}
558+
548559
// infill
549560
if (data.count("input_prefix") != 0)
550561
{
@@ -1052,6 +1063,7 @@ struct llama_server_context
10521063

10531064
return json {
10541065
{"n_ctx", slot.n_ctx},
1066+
{"n_predict", slot.n_predict},
10551067
{"model", params.model_alias},
10561068
{"seed", slot.params.seed},
10571069
{"temperature", slot.sparams.temp},

0 commit comments

Comments
 (0)