Skip to content

Commit 4f0ea9b

Browse files
committed
context : encode() clears embd_seq
ggml-ci
1 parent 2dba70d commit 4f0ea9b

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

examples/server/server.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3941,7 +3941,7 @@ int main(int argc, char ** argv) {
39413941
const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok](
39423942
server_task_type type,
39433943
json & data,
3944-
std::function<bool()> is_connection_closed,
3944+
const std::function<bool()> & is_connection_closed,
39453945
httplib::Response & res,
39463946
oaicompat_type oaicompat) {
39473947
GGML_ASSERT(type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL);

src/llama-context.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,8 @@ int llama_context::encode(llama_batch & inp_batch) {
699699
t_compute_start_us = ggml_time_us();
700700
}
701701

702+
embd_seq.clear();
703+
702704
n_queued_tokens += n_tokens;
703705

704706
const int64_t n_embd = hparams.n_embd;
@@ -839,13 +841,13 @@ int llama_context::encode(llama_batch & inp_batch) {
839841
}
840842

841843
int llama_context::decode(llama_batch & inp_batch) {
842-
if (inp_batch.n_tokens == 0) {
843-
LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__);
844-
return -1;
845-
}
846-
847844
if (!memory) {
848845
LLAMA_LOG_WARN("%s: cannot decode batches with this context\n", __func__);
846+
return encode(inp_batch);
847+
}
848+
849+
if (inp_batch.n_tokens == 0) {
850+
LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__);
849851
return -1;
850852
}
851853

0 commit comments

Comments
 (0)