Skip to content

Commit 3d74607

Browse files
chore: function calling cleanup (#2195)
* chore: function calling cleanup * chore: cleanup --------- Co-authored-by: sangjanai <[email protected]>
1 parent 7fda186 commit 3d74607

File tree

10 files changed

+214
-409
lines changed

10 files changed

+214
-409
lines changed

docs/docs/guides/function-calling.md

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,14 @@ tools = [
6363

6464
completion_payload = {
6565
"messages": [
66-
{"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."},
67-
{"role": "user", "content": "Hi, can you tell me the delivery date for my order?"},
66+
{
67+
"role": "system",
68+
"content": 'You have access to the following CUSTOM functions:\n\n<CUSTOM_FUNCTIONS>\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => `<function`\nparameters => a JSON dict with the function argument name as key and function argument value as value.\nend_tag => `</function>`\n\nHere is an example,\n<function=example_function_name>{"example_name": "example_value"}</function>\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.',
69+
},
70+
{
71+
"role": "user",
72+
"content": "Hi, can you tell me the delivery date for my order?"
73+
},
6874
]
6975
}
7076

@@ -126,10 +132,22 @@ Once the user provides their order ID:
126132
```python
127133
completion_payload = {
128134
"messages": [
129-
{"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user."},
130-
{"role": "user", "content": "Hi, can you tell me the delivery date for my order?"},
131-
{"role": "assistant", "content": "Of course! Please provide your order ID so I can look it up."},
132-
{"role": "user", "content": "i think it is order_70705"},
135+
{
136+
"role": "system",
137+
"content": 'You have access to the following CUSTOM functions:\n\n<CUSTOM_FUNCTIONS>\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => `<function`\nparameters => a JSON dict with the function argument name as key and function argument value as value.\nend_tag => `</function>`\n\nHere is an example,\n<function=example_function_name>{"example_name": "example_value"}</function>\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- You can call one or more functions at a time, but remember only chose correct function\n- Put the entire function call reply on one line\n- Always add your sources when using search results to answer the user query\n- If you can not find correct parameters or arguments corresponding to function in the user\'s message, ask user again to provide, do not make assumptions.\n- No explanation are needed when calling a function.\n\nYou are a helpful assistant.',
138+
},
139+
{
140+
"role": "user",
141+
"content": "Hi, can you tell me the delivery date for my order?"
142+
},
143+
{
144+
"role": "assistant",
145+
"content": "Of course! Please provide your order ID so I can look it up."
146+
},
147+
{
148+
"role": "user",
149+
"content": "i think it is order_70705"
150+
},
133151
]
134152
}
135153

engine/controllers/server.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,6 @@ void server::ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
179179
void server::ProcessNonStreamRes(std::function<void(const HttpResponsePtr&)> cb,
180180
SyncQueue& q) {
181181
auto [status, res] = q.wait_and_pop();
182-
function_calling_utils::PostProcessResponse(res);
183182
LOG_DEBUG << "response: " << res.toStyledString();
184183
auto resp = cortex_utils::CreateCortexHttpJsonResponse(res);
185184
resp->setStatusCode(

engine/extensions/local-engine/local_engine.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,7 @@ void LocalEngine::LoadModel(std::shared_ptr<Json::Value> json_body,
544544

545545
params.push_back("--pooling");
546546
params.push_back("mean");
547+
params.push_back("--jinja");
547548

548549
std::vector<std::string> v;
549550
v.reserve(params.size() + 1);

engine/services/inference_service.cc

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
1313
engine_type = (*(json_body)).get("engine", kLlamaRepo).asString();
1414
}
1515
CTL_DBG("engine_type: " << engine_type);
16-
function_calling_utils::PreprocessRequest(json_body);
17-
CTL_DBG("engine_type: " << engine_type);
1816
auto tool_choice = json_body->get("tool_choice", Json::Value::null);
1917
auto model_id = json_body->get("model", "").asString();
2018
if (saved_models_.find(model_id) != saved_models_.end()) {
@@ -46,51 +44,6 @@ cpp::result<void, InferResult> InferenceService::HandleChatCompletion(
4644
return cpp::fail(std::make_pair(stt, res));
4745
}
4846

49-
if (!model_id.empty()) {
50-
if (auto model_service = model_service_.lock()) {
51-
auto metadata_ptr = model_service->GetCachedModelMetadata(model_id);
52-
if (metadata_ptr != nullptr &&
53-
!metadata_ptr->tokenizer->chat_template.empty()) {
54-
auto tokenizer = metadata_ptr->tokenizer;
55-
auto messages = (*json_body)["messages"];
56-
Json::Value messages_jsoncpp(Json::arrayValue);
57-
for (auto message : messages) {
58-
messages_jsoncpp.append(message);
59-
}
60-
61-
Json::Value tools(Json::arrayValue);
62-
Json::Value template_data_json;
63-
template_data_json["messages"] = messages_jsoncpp;
64-
// template_data_json["tools"] = tools;
65-
66-
auto prompt_result = jinja::RenderTemplate(
67-
tokenizer->chat_template, template_data_json, tokenizer->bos_token,
68-
tokenizer->eos_token, tokenizer->add_bos_token,
69-
tokenizer->add_eos_token, tokenizer->add_generation_prompt);
70-
if (prompt_result.has_value()) {
71-
(*json_body)["prompt"] = prompt_result.value();
72-
if (json_body->isMember("stop")) {
73-
bool need_append = true;
74-
for (auto& s : (*json_body)["stop"]) {
75-
if (s.asString() == tokenizer->eos_token) {
76-
need_append = false;
77-
}
78-
}
79-
if (need_append) {
80-
(*json_body)["stop"].append(tokenizer->eos_token);
81-
}
82-
} else {
83-
Json::Value stops(Json::arrayValue);
84-
stops.append(tokenizer->eos_token);
85-
(*json_body)["stop"] = stops;
86-
}
87-
} else {
88-
CTL_ERR("Failed to render prompt: " + prompt_result.error());
89-
}
90-
}
91-
}
92-
}
93-
9447
CTL_DBG("Json body inference: " + json_body->toStyledString());
9548

9649
auto cb = [q, tool_choice](Json::Value status, Json::Value res) {

engine/services/model_service.cc

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -691,21 +691,7 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
691691
auto status = std::get<0>(ir)["status_code"].asInt();
692692
auto data = std::get<1>(ir);
693693

694-
if (status == drogon::k200OK) {
695-
// start model successfully, in case not vision model, we store the metadata so we can use
696-
// for each inference
697-
if (!json_data.isMember("mmproj") || json_data["mmproj"].isNull()) {
698-
auto metadata_res = GetModelMetadata(model_handle);
699-
if (metadata_res.has_value()) {
700-
loaded_model_metadata_map_.emplace(model_handle,
701-
std::move(metadata_res.value()));
702-
CTL_INF("Successfully stored metadata for model " << model_handle);
703-
} else {
704-
CTL_WRN("Failed to get metadata for model " << model_handle << ": "
705-
<< metadata_res.error());
706-
}
707-
}
708-
694+
if (status == drogon::k200OK) {
709695
return StartModelResult{/* .success = */ true,
710696
/* .warning = */ may_fallback_res.value()};
711697
} else if (status == drogon::k409Conflict) {
@@ -760,8 +746,6 @@ cpp::result<bool, std::string> ModelService::StopModel(
760746
if (bypass_check) {
761747
bypass_stop_check_set_.erase(model_handle);
762748
}
763-
loaded_model_metadata_map_.erase(model_handle);
764-
CTL_INF("Removed metadata for model " << model_handle);
765749
return true;
766750
} else {
767751
CTL_ERR("Model failed to stop with status code: " << status);
@@ -1090,14 +1074,6 @@ ModelService::GetModelMetadata(const std::string& model_id) const {
10901074
return std::move(*model_metadata_res);
10911075
}
10921076

1093-
std::shared_ptr<ModelMetadata> ModelService::GetCachedModelMetadata(
1094-
const std::string& model_id) const {
1095-
if (loaded_model_metadata_map_.find(model_id) ==
1096-
loaded_model_metadata_map_.end())
1097-
return nullptr;
1098-
return loaded_model_metadata_map_.at(model_id);
1099-
}
1100-
11011077
std::string ModelService::GetEngineByModelId(
11021078
const std::string& model_id) const {
11031079
namespace fs = std::filesystem;

engine/services/model_service.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,6 @@ class ModelService {
8383
cpp::result<std::shared_ptr<ModelMetadata>, std::string> GetModelMetadata(
8484
const std::string& model_id) const;
8585

86-
std::shared_ptr<ModelMetadata> GetCachedModelMetadata(
87-
const std::string& model_id) const;
88-
8986
std::string GetEngineByModelId(const std::string& model_id) const;
9087

9188
private:
@@ -104,12 +101,6 @@ class ModelService {
104101
std::unordered_set<std::string> bypass_stop_check_set_;
105102
std::shared_ptr<EngineServiceI> engine_svc_ = nullptr;
106103

107-
/**
108-
* Store the chat template of loaded model.
109-
*/
110-
std::unordered_map<std::string, std::shared_ptr<ModelMetadata>>
111-
loaded_model_metadata_map_;
112-
113104
std::mutex es_mtx_;
114105
std::unordered_map<std::string, std::optional<hardware::Estimation>> es_;
115106
cortex::TaskQueue& task_queue_;

engine/test/components/test_function_calling.cc

Lines changed: 0 additions & 157 deletions
This file was deleted.

0 commit comments

Comments
 (0)