File tree Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -3941,7 +3941,7 @@ int main(int argc, char ** argv) {
3941
3941
const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok](
3942
3942
server_task_type type,
3943
3943
json & data,
3944
- std::function<bool ()> is_connection_closed,
3944
+ const std::function<bool ()> & is_connection_closed,
3945
3945
httplib::Response & res,
3946
3946
oaicompat_type oaicompat) {
3947
3947
GGML_ASSERT (type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL);
Original file line number Diff line number Diff line change @@ -699,6 +699,8 @@ int llama_context::encode(llama_batch & inp_batch) {
699
699
t_compute_start_us = ggml_time_us ();
700
700
}
701
701
702
+ embd_seq.clear ();
703
+
702
704
n_queued_tokens += n_tokens;
703
705
704
706
const int64_t n_embd = hparams.n_embd ;
@@ -839,13 +841,13 @@ int llama_context::encode(llama_batch & inp_batch) {
839
841
}
840
842
841
843
int llama_context::decode (llama_batch & inp_batch) {
842
- if (inp_batch.n_tokens == 0 ) {
843
- LLAMA_LOG_ERROR (" %s: n_tokens == 0\n " , __func__);
844
- return -1 ;
845
- }
846
-
847
844
if (!memory) {
848
845
LLAMA_LOG_WARN (" %s: cannot decode batches with this context\n " , __func__);
846
+ return encode (inp_batch);
847
+ }
848
+
849
+ if (inp_batch.n_tokens == 0 ) {
850
+ LLAMA_LOG_ERROR (" %s: n_tokens == 0\n " , __func__);
849
851
return -1 ;
850
852
}
851
853
You can’t perform that action at this time.
0 commit comments