-
Notifications
You must be signed in to change notification settings - Fork 12.1k
common: llama_load_model_from_url split support #6192
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ddb13ed
7c63644
fbcf2ab
c7d4db3
dc3469e
08a0c13
4fa1c63
8187983
3ba5f2d
b4a2ed8
52d7f44
bdef0ec
4da00c1
34a7665
72d4eb5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,9 @@ | |
#endif | ||
#if defined(LLAMA_USE_CURL) | ||
#include <curl/curl.h> | ||
#include <curl/easy.h> | ||
#include <thread> | ||
#include <future> | ||
#endif | ||
|
||
#if defined(_MSC_VER) | ||
|
@@ -61,7 +64,7 @@ | |
#else | ||
#include <sys/syslimits.h> | ||
#endif | ||
#define LLAMA_CURL_MAX_PATH_LENGTH PATH_MAX | ||
#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 | ||
#define LLAMA_CURL_MAX_HEADER_LENGTH 256 | ||
#endif // LLAMA_USE_CURL | ||
|
||
|
@@ -1702,27 +1705,13 @@ void llama_batch_add( | |
|
||
#ifdef LLAMA_USE_CURL | ||
|
||
struct llama_model * llama_load_model_from_url( | ||
const char * model_url, | ||
const char * path_model, | ||
const struct llama_model_params & params) { | ||
// Basic validation of the model_url | ||
if (!model_url || strlen(model_url) == 0) { | ||
fprintf(stderr, "%s: invalid model_url\n", __func__); | ||
return NULL; | ||
} | ||
|
||
// Initialize libcurl globally | ||
auto curl = curl_easy_init(); | ||
|
||
if (!curl) { | ||
fprintf(stderr, "%s: error initializing libcurl\n", __func__); | ||
return NULL; | ||
} | ||
static bool llama_download_file(CURL * curl, const char * url, const char * path) { | ||
bool force_download = false; | ||
|
||
// Set the URL, allow to follow http redirection | ||
curl_easy_setopt(curl, CURLOPT_URL, model_url); | ||
curl_easy_setopt(curl, CURLOPT_URL, url); | ||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); | ||
|
||
#if defined(_WIN32) | ||
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of | ||
// operating system. Currently implemented under MS-Windows. | ||
|
@@ -1731,24 +1720,24 @@ struct llama_model * llama_load_model_from_url( | |
|
||
// Check if the file already exists locally | ||
struct stat model_file_info; | ||
auto file_exists = (stat(path_model, &model_file_info) == 0); | ||
auto file_exists = (stat(path, &model_file_info) == 0); | ||
|
||
// If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files | ||
char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; | ||
char etag_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; | ||
snprintf(etag_path, sizeof(etag_path), "%s.etag", path_model); | ||
char etag_path[PATH_MAX] = {0}; | ||
snprintf(etag_path, sizeof(etag_path), "%s.etag", path); | ||
|
||
char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0}; | ||
char last_modified_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; | ||
snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path_model); | ||
char last_modified_path[PATH_MAX] = {0}; | ||
snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path); | ||
|
||
if (file_exists) { | ||
auto * f_etag = fopen(etag_path, "r"); | ||
if (f_etag) { | ||
if (!fgets(etag, sizeof(etag), f_etag)) { | ||
fprintf(stderr, "%s: unable to read file %s\n", __func__, etag_path); | ||
} else { | ||
fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, etag_path, etag); | ||
fprintf(stderr, "%s: previous file found %s: %s\n", __func__, etag_path, etag); | ||
} | ||
fclose(f_etag); | ||
} | ||
|
@@ -1758,7 +1747,7 @@ struct llama_model * llama_load_model_from_url( | |
if (!fgets(last_modified, sizeof(last_modified), f_last_modified)) { | ||
fprintf(stderr, "%s: unable to read file %s\n", __func__, last_modified_path); | ||
} else { | ||
fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, last_modified_path, | ||
fprintf(stderr, "%s: previous file found %s: %s\n", __func__, last_modified_path, | ||
last_modified); | ||
} | ||
fclose(f_last_modified); | ||
|
@@ -1776,6 +1765,11 @@ struct llama_model * llama_load_model_from_url( | |
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { | ||
llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata; | ||
|
||
// Convert header field name to lowercase | ||
for (size_t i = 0; i < n_items && buffer[i] != ':'; ++i) { | ||
buffer[i] = tolower(buffer[i]); | ||
} | ||
|
||
const char * etag_prefix = "etag: "; | ||
if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) { | ||
strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove CRLF | ||
|
@@ -1798,38 +1792,42 @@ struct llama_model * llama_load_model_from_url( | |
if (res != CURLE_OK) { | ||
curl_easy_cleanup(curl); | ||
fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); | ||
return NULL; | ||
return false; | ||
} | ||
|
||
long http_code = 0; | ||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code); | ||
if (http_code != 200) { | ||
// HEAD not supported, we don't know if the file has changed | ||
// force trigger downloading | ||
file_exists = false; | ||
force_download = true; | ||
fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code); | ||
} | ||
} | ||
|
||
// If the ETag or the Last-Modified headers are different: trigger a new download | ||
if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { | ||
char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; | ||
snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model); | ||
bool should_download = !file_exists | ||
|| force_download | ||
|| (strlen(headers.etag) > 0 && strcmp(etag, headers.etag) != 0) | ||
|| (strlen(headers.last_modified) > 0 && strcmp(last_modified, headers.last_modified) != 0); | ||
if (should_download) { | ||
char path_temporary[PATH_MAX] = {0}; | ||
snprintf(path_temporary, sizeof(path_temporary), "%s.downloadInProgress", path); | ||
if (file_exists) { | ||
fprintf(stderr, "%s: deleting previous downloaded model file: %s\n", __func__, path_model); | ||
if (remove(path_model) != 0) { | ||
fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path); | ||
if (remove(path) != 0) { | ||
curl_easy_cleanup(curl); | ||
fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path_model); | ||
return NULL; | ||
fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path); | ||
return false; | ||
} | ||
} | ||
|
||
// Set the output file | ||
auto * outfile = fopen(path_model_temporary, "wb"); | ||
auto * outfile = fopen(path_temporary, "wb"); | ||
if (!outfile) { | ||
curl_easy_cleanup(curl); | ||
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model); | ||
return NULL; | ||
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path); | ||
return false; | ||
} | ||
|
||
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); | ||
|
@@ -1843,15 +1841,30 @@ struct llama_model * llama_load_model_from_url( | |
// display download progress | ||
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here progression of each split will be flushed to stderr concurrently, it can be improved later on, the actual final result is good enough for a first version. |
||
|
||
// helper function to hide password in URL | ||
auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { | ||
std::size_t protocol_pos = url.find("://"); | ||
if (protocol_pos == std::string::npos) { | ||
return url; // Malformed URL | ||
} | ||
|
||
std::size_t at_pos = url.find('@', protocol_pos + 3); | ||
if (at_pos == std::string::npos) { | ||
return url; // No password in URL | ||
} | ||
|
||
return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); | ||
}; | ||
|
||
// start the download | ||
fprintf(stderr, "%s: downloading model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, | ||
model_url, path_model, headers.etag, headers.last_modified); | ||
fprintf(stderr, "%s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, | ||
llama_download_hide_password_in_url(url).c_str(), path, headers.etag, headers.last_modified); | ||
auto res = curl_easy_perform(curl); | ||
if (res != CURLE_OK) { | ||
fclose(outfile); | ||
curl_easy_cleanup(curl); | ||
fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); | ||
return NULL; | ||
return false; | ||
} | ||
|
||
long http_code = 0; | ||
|
@@ -1860,7 +1873,7 @@ struct llama_model * llama_load_model_from_url( | |
fclose(outfile); | ||
curl_easy_cleanup(curl); | ||
fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code); | ||
return NULL; | ||
return false; | ||
} | ||
|
||
// Clean up | ||
|
@@ -1872,7 +1885,7 @@ struct llama_model * llama_load_model_from_url( | |
if (etag_file) { | ||
fputs(headers.etag, etag_file); | ||
fclose(etag_file); | ||
fprintf(stderr, "%s: model etag saved %s: %s\n", __func__, etag_path, headers.etag); | ||
fprintf(stderr, "%s: file etag saved %s: %s\n", __func__, etag_path, headers.etag); | ||
} | ||
} | ||
|
||
|
@@ -1882,20 +1895,118 @@ struct llama_model * llama_load_model_from_url( | |
if (last_modified_file) { | ||
fputs(headers.last_modified, last_modified_file); | ||
fclose(last_modified_file); | ||
fprintf(stderr, "%s: model last modified saved %s: %s\n", __func__, last_modified_path, | ||
fprintf(stderr, "%s: file last modified saved %s: %s\n", __func__, last_modified_path, | ||
headers.last_modified); | ||
} | ||
} | ||
|
||
if (rename(path_model_temporary, path_model) != 0) { | ||
if (rename(path_temporary, path) != 0) { | ||
curl_easy_cleanup(curl); | ||
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary, path); | ||
return false; | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
|
||
struct llama_model * llama_load_model_from_url( | ||
const char * model_url, | ||
const char * path_model, | ||
const struct llama_model_params & params) { | ||
// Basic validation of the model_url | ||
if (!model_url || strlen(model_url) == 0) { | ||
fprintf(stderr, "%s: invalid model_url\n", __func__); | ||
return NULL; | ||
} | ||
|
||
// Initialize libcurl | ||
auto * curl = curl_easy_init(); | ||
|
||
if (!curl) { | ||
fprintf(stderr, "%s: error initializing libcurl\n", __func__); | ||
return NULL; | ||
} | ||
|
||
if (!curl) { | ||
fprintf(stderr, "%s: error initializing libcurl\n", __func__); | ||
return NULL; | ||
} | ||
|
||
if (!llama_download_file(curl, model_url, path_model)) { | ||
return NULL; | ||
} | ||
|
||
// check for additional GGUFs split to download | ||
int n_split = 0; | ||
{ | ||
struct gguf_init_params gguf_params = { | ||
/*.no_alloc = */ true, | ||
/*.ctx = */ NULL, | ||
}; | ||
auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params); | ||
if (!ctx_gguf) { | ||
fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, path_model); | ||
curl_easy_cleanup(curl); | ||
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model); | ||
return NULL; | ||
} | ||
|
||
auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT); | ||
if (key_n_split >= 0) { | ||
n_split = gguf_get_val_u16(ctx_gguf, key_n_split); | ||
} | ||
|
||
gguf_free(ctx_gguf); | ||
} | ||
|
||
curl_easy_cleanup(curl); | ||
|
||
if (n_split > 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the current |
||
char split_prefix[PATH_MAX] = {0}; | ||
char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0}; | ||
|
||
// Verify the first split file format | ||
// and extract split URL and PATH prefixes | ||
{ | ||
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) { | ||
fprintf(stderr, "\n%s: unexpected model file name: %s" | ||
" n_split=%d\n", __func__, path_model, n_split); | ||
return NULL; | ||
} | ||
|
||
if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) { | ||
fprintf(stderr, "\n%s: unexpected model url: %s" | ||
" n_split=%d\n", __func__, model_url, n_split); | ||
return NULL; | ||
} | ||
} | ||
|
||
// Prepare download in parallel | ||
std::vector<std::future<bool>> futures_download; | ||
for (int idx = 1; idx < n_split; idx++) { | ||
futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split](int download_idx) -> bool { | ||
char split_path[PATH_MAX] = {0}; | ||
llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split); | ||
|
||
char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0}; | ||
llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split); | ||
|
||
auto * curl = curl_easy_init(); | ||
bool res = llama_download_file(curl, split_url, split_path); | ||
curl_easy_cleanup(curl); | ||
|
||
return res; | ||
}, idx)); | ||
} | ||
|
||
// Wait for all downloads to complete | ||
for (auto & f : futures_download) { | ||
if (!f.get()) { | ||
return NULL; | ||
} | ||
} | ||
} | ||
|
||
return llama_load_model_from_file(path_model, params); | ||
} | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.