diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
index b74dc5e2114d8..f07d2553669af 100644
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@@ -35,7 +35,6 @@ jobs:
         include:
           - build_type: Release
             sanitizer: ""
-            disabled_on_pr: true
       fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
 
     container:
diff --git a/common/common.cpp b/common/common.cpp
index 69c2d5bf79f91..fb80d4bf72283 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -39,6 +39,9 @@
 #endif
 #if defined(LLAMA_USE_CURL)
 #include <curl/curl.h>
+#include <curl/easy.h>
+#include <thread>
+#include <future>
 #endif
 
 #if defined(_MSC_VER)
@@ -61,7 +64,7 @@
 #else
 #include <sys/syslimits.h>
 #endif
-#define LLAMA_CURL_MAX_PATH_LENGTH PATH_MAX
+#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
 #define LLAMA_CURL_MAX_HEADER_LENGTH 256
 #endif // LLAMA_USE_CURL
 
@@ -1702,27 +1705,13 @@ void llama_batch_add(
 
 #ifdef LLAMA_USE_CURL
 
-struct llama_model * llama_load_model_from_url(
-        const char * model_url,
-        const char * path_model,
-        const struct llama_model_params & params) {
-    // Basic validation of the model_url
-    if (!model_url || strlen(model_url) == 0) {
-        fprintf(stderr, "%s: invalid model_url\n", __func__);
-        return NULL;
-    }
-
-    // Initialize libcurl globally
-    auto curl = curl_easy_init();
-
-    if (!curl) {
-        fprintf(stderr, "%s: error initializing libcurl\n", __func__);
-        return NULL;
-    }
+static bool llama_download_file(CURL * curl, const char * url, const char * path) {
+    bool force_download = false;
 
     // Set the URL, allow to follow http redirection
-    curl_easy_setopt(curl, CURLOPT_URL, model_url);
+    curl_easy_setopt(curl, CURLOPT_URL, url);
     curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+
 #if defined(_WIN32)
     // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
     //   operating system. Currently implemented under MS-Windows.
@@ -1731,16 +1720,16 @@ struct llama_model * llama_load_model_from_url(
 
     // Check if the file already exists locally
     struct stat model_file_info;
-    auto file_exists = (stat(path_model, &model_file_info) == 0);
+    auto file_exists = (stat(path, &model_file_info) == 0);
 
     // If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files
     char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
-    char etag_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
-    snprintf(etag_path, sizeof(etag_path), "%s.etag", path_model);
+    char etag_path[PATH_MAX] = {0};
+    snprintf(etag_path, sizeof(etag_path), "%s.etag", path);
 
     char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
-    char last_modified_path[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
-    snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path_model);
+    char last_modified_path[PATH_MAX] = {0};
+    snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path);
 
     if (file_exists) {
         auto * f_etag = fopen(etag_path, "r");
@@ -1748,7 +1737,7 @@ struct llama_model * llama_load_model_from_url(
             if (!fgets(etag, sizeof(etag), f_etag)) {
                 fprintf(stderr, "%s: unable to read file %s\n", __func__, etag_path);
             } else {
-                fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, etag_path, etag);
+                fprintf(stderr, "%s: previous file found %s: %s\n", __func__, etag_path, etag);
             }
             fclose(f_etag);
         }
@@ -1758,7 +1747,7 @@ struct llama_model * llama_load_model_from_url(
             if (!fgets(last_modified, sizeof(last_modified), f_last_modified)) {
                 fprintf(stderr, "%s: unable to read file %s\n", __func__, last_modified_path);
             } else {
-                fprintf(stderr, "%s: previous model file found %s: %s\n", __func__, last_modified_path,
+                fprintf(stderr, "%s: previous file found %s: %s\n", __func__, last_modified_path,
                         last_modified);
             }
             fclose(f_last_modified);
@@ -1776,6 +1765,11 @@ struct llama_model * llama_load_model_from_url(
         auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
             llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata;
 
+            // Convert header field name to lowercase
+            for (size_t i = 0; i < n_items && buffer[i] != ':'; ++i) {
+                buffer[i] = tolower(buffer[i]);
+            }
+
             const char * etag_prefix = "etag: ";
             if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) {
                 strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove CRLF
@@ -1798,7 +1792,7 @@ struct llama_model * llama_load_model_from_url(
         if (res != CURLE_OK) {
             curl_easy_cleanup(curl);
             fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
-            return NULL;
+            return false;
         }
 
         long http_code = 0;
@@ -1806,30 +1800,34 @@ struct llama_model * llama_load_model_from_url(
         if (http_code != 200) {
             // HEAD not supported, we don't know if the file has changed
             // force trigger downloading
-            file_exists = false;
+            force_download = true;
             fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
         }
     }
 
     // If the ETag or the Last-Modified headers are different: trigger a new download
-    if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) {
-        char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
-        snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model);
+    bool should_download = !file_exists
+        || force_download
+        || (strlen(headers.etag) > 0 && strcmp(etag, headers.etag) != 0)
+        || (strlen(headers.last_modified) > 0 && strcmp(last_modified, headers.last_modified) != 0);
+    if (should_download) {
+        char path_temporary[PATH_MAX] = {0};
+        snprintf(path_temporary, sizeof(path_temporary), "%s.downloadInProgress", path);
         if (file_exists) {
-            fprintf(stderr, "%s: deleting previous downloaded model file: %s\n", __func__, path_model);
-            if (remove(path_model) != 0) {
+            fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path);
+            if (remove(path) != 0) {
                 curl_easy_cleanup(curl);
-                fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path_model);
-                return NULL;
+                fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path);
+                return false;
             }
         }
 
         // Set the output file
-        auto * outfile = fopen(path_model_temporary, "wb");
+        auto * outfile = fopen(path_temporary, "wb");
         if (!outfile) {
             curl_easy_cleanup(curl);
-            fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model);
-            return NULL;
+            fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path);
+            return false;
         }
 
         typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
@@ -1843,15 +1841,30 @@ struct llama_model * llama_load_model_from_url(
         //  display download progress
         curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
 
+        // helper function to hide password in URL
+        auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string {
+            std::size_t protocol_pos = url.find("://");
+            if (protocol_pos == std::string::npos) {
+                return url;  // Malformed URL
+            }
+
+            std::size_t at_pos = url.find('@', protocol_pos + 3);
+            if (at_pos == std::string::npos) {
+                return url;  // No password in URL
+            }
+
+            return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos);
+        };
+
         // start the download
-        fprintf(stderr, "%s: downloading model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
-                model_url, path_model, headers.etag, headers.last_modified);
+        fprintf(stderr, "%s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
+                llama_download_hide_password_in_url(url).c_str(), path, headers.etag, headers.last_modified);
         auto res = curl_easy_perform(curl);
         if (res != CURLE_OK) {
             fclose(outfile);
             curl_easy_cleanup(curl);
             fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res));
-            return NULL;
+            return false;
         }
 
         long http_code = 0;
@@ -1860,7 +1873,7 @@ struct llama_model * llama_load_model_from_url(
             fclose(outfile);
             curl_easy_cleanup(curl);
             fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code);
-            return NULL;
+            return false;
         }
 
         // Clean up
@@ -1872,7 +1885,7 @@ struct llama_model * llama_load_model_from_url(
             if (etag_file) {
                 fputs(headers.etag, etag_file);
                 fclose(etag_file);
-                fprintf(stderr, "%s: model etag saved %s: %s\n", __func__, etag_path, headers.etag);
+                fprintf(stderr, "%s: file etag saved %s: %s\n", __func__, etag_path, headers.etag);
             }
         }
 
@@ -1882,20 +1895,118 @@ struct llama_model * llama_load_model_from_url(
             if (last_modified_file) {
                 fputs(headers.last_modified, last_modified_file);
                 fclose(last_modified_file);
-                fprintf(stderr, "%s: model last modified saved %s: %s\n", __func__, last_modified_path,
+                fprintf(stderr, "%s: file last modified saved %s: %s\n", __func__, last_modified_path,
                         headers.last_modified);
             }
         }
 
-        if (rename(path_model_temporary, path_model) != 0) {
+        if (rename(path_temporary, path) != 0) {
+            curl_easy_cleanup(curl);
+            fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary, path);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+struct llama_model * llama_load_model_from_url(
+        const char * model_url,
+        const char * path_model,
+        const struct llama_model_params & params) {
+    // Basic validation of the model_url
+    if (!model_url || strlen(model_url) == 0) {
+        fprintf(stderr, "%s: invalid model_url\n", __func__);
+        return NULL;
+    }
+
+    // Initialize libcurl
+    auto * curl = curl_easy_init();
+
+    if (!curl) {
+        fprintf(stderr, "%s: error initializing libcurl\n", __func__);
+        return NULL;
+    }
+
+    if (!curl) {
+        fprintf(stderr, "%s: error initializing libcurl\n", __func__);
+        return NULL;
+    }
+
+    if (!llama_download_file(curl, model_url, path_model)) {
+        return NULL;
+    }
+
+    // check for additional GGUFs split to download
+    int n_split = 0;
+    {
+        struct gguf_init_params gguf_params = {
+            /*.no_alloc = */ true,
+            /*.ctx      = */ NULL,
+        };
+        auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params);
+        if (!ctx_gguf) {
+            fprintf(stderr, "\n%s:  failed to load input GGUF from %s\n", __func__, path_model);
             curl_easy_cleanup(curl);
-            fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model);
             return NULL;
         }
+
+        auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
+        if (key_n_split >= 0) {
+            n_split = gguf_get_val_u16(ctx_gguf, key_n_split);
+        }
+
+        gguf_free(ctx_gguf);
     }
 
     curl_easy_cleanup(curl);
 
+    if (n_split > 1) {
+        char split_prefix[PATH_MAX] = {0};
+        char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0};
+
+        // Verify the first split file format
+        // and extract split URL and PATH prefixes
+        {
+            if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) {
+                fprintf(stderr, "\n%s: unexpected model file name: %s"
+                                " n_split=%d\n", __func__, path_model, n_split);
+                return NULL;
+            }
+
+            if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) {
+                fprintf(stderr, "\n%s: unexpected model url: %s"
+                                " n_split=%d\n", __func__, model_url, n_split);
+                return NULL;
+            }
+        }
+
+        // Prepare download in parallel
+        std::vector<std::future<bool>> futures_download;
+        for (int idx = 1; idx < n_split; idx++) {
+            futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split](int download_idx) -> bool {
+                char split_path[PATH_MAX] = {0};
+                llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split);
+
+                char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0};
+                llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split);
+
+                auto * curl = curl_easy_init();
+                bool res = llama_download_file(curl, split_url, split_path);
+                curl_easy_cleanup(curl);
+
+                return res;
+            }, idx));
+        }
+
+        // Wait for all downloads to complete
+        for (auto & f : futures_download) {
+            if (!f.get()) {
+                return NULL;
+            }
+        }
+    }
+
     return llama_load_model_from_file(path_model, params);
 }
 
diff --git a/common/common.h b/common/common.h
index afa4cf6d7d528..a223eceaad87e 100644
--- a/common/common.h
+++ b/common/common.h
@@ -306,3 +306,10 @@ struct llama_control_vector_load_info {
 // Load control vectors, scale each by strength, and add them together.
 // On error, returns {-1, empty}
 llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos);
+
+//
+// Split utils
+//
+static const char * const LLM_KV_SPLIT_NO            = "split.no";
+static const char * const LLM_KV_SPLIT_COUNT         = "split.count";
+static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp
index f703588e164f6..b1af599923809 100644
--- a/examples/gguf-split/gguf-split.cpp
+++ b/examples/gguf-split/gguf-split.cpp
@@ -26,10 +26,6 @@ enum split_operation : uint8_t {
     SPLIT_OP_MERGE,
 };
 
-static const char * const LLM_KV_SPLIT_NO            = "split.no";
-static const char * const LLM_KV_SPLIT_COUNT         = "split.count";
-static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
-
 struct split_params {
     split_operation operation = SPLIT_OP_SPLIT;
     int n_split_tensors = 128;
diff --git a/examples/server/README.md b/examples/server/README.md
index 355601ff4dec2..256a53b7a673d 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -20,7 +20,9 @@ The project is under active development, and we are [looking for feedback and co
 - `-tb N, --threads-batch N`: Set the number of threads to use during batch and prompt processing. If not specified, the number of threads will be set to the number of threads used for generation.
 - `--threads-http N`: number of threads in the http server pool to process requests (default: `max(std::thread::hardware_concurrency() - 1, --parallel N + 2)`)
 - `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`).
-- `-mu MODEL_URL --model-url MODEL_URL`: Specify a remote http url to download the file (e.g https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf).
+- `-mu MODEL_URL --model-url MODEL_URL`: Specify a remote http url to download the file (default: unused).
+- `-hfr REPO, --hf-repo REPO`: Hugging Face model repository (default: unused).
+- `-hff FILE, --hf-file FILE`: Hugging Face model file (default: unused).
 - `-a ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses.
 - `-c N`, `--ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. The size may differ in other models, for example, baichuan models were build with a context of 4096.
 - `-ngl N`, `--n-gpu-layers N`: When compiled with appropriate support (currently CLBlast or cuBLAS), this option allows offloading some layers to the GPU for computation. Generally results in increased performance.
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 27bd2dd70c5f7..b02c2546eb4c6 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2208,7 +2208,11 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
     printf("  -m FNAME, --model FNAME\n");
     printf("                            model path (default: %s)\n", params.model.c_str());
     printf("  -mu MODEL_URL, --model-url MODEL_URL\n");
-    printf("                            model download url (default: %s)\n", params.model_url.c_str());
+    printf("                            model download url (default: unused)\n");
+    printf("  -hfr REPO, --hf-repo REPO\n");
+    printf("                            Hugging Face model repository (default: unused)\n");
+    printf("  -hff FILE, --hf-file FILE\n");
+    printf("                            Hugging Face model file (default: unused)\n");
     printf("  -a ALIAS, --alias ALIAS\n");
     printf("                            set an alias for the model, will be added as `model` field in completion response\n");
     printf("  --lora FNAME              apply LoRA adapter (implies --no-mmap)\n");
@@ -2337,6 +2341,18 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
                 break;
             }
             params.model_url = argv[i];
+        } else if (arg == "-hfr" || arg == "--hf-repo") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.hf_repo = argv[i];
+        } else if (arg == "-hff" || arg == "--hf-file") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.hf_file = argv[i];
         } else if (arg == "-a" || arg == "--alias") {
             if (++i >= argc) {
                 invalid_param = true;
diff --git a/examples/server/tests/features/parallel.feature b/examples/server/tests/features/parallel.feature
index a66fed626619d..6cd306a2bcf7c 100644
--- a/examples/server/tests/features/parallel.feature
+++ b/examples/server/tests/features/parallel.feature
@@ -4,7 +4,8 @@ Feature: Parallel
 
   Background: Server startup
     Given a server listening on localhost:8080
-    And   a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
+    And   a model file tinyllamas/split/stories15M-00001-of-00003.gguf from HF repo ggml-org/models
+    And   a model file test-model-00001-of-00003.gguf
     And   42 as server seed
     And   128 as batch size
     And   256 KV cache size
diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index a2e0e5b3532b5..646a4e49d0d56 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -4,8 +4,8 @@ Feature: llama.cpp server
 
   Background: Server startup
     Given a server listening on localhost:8080
-    And   a model url https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf
-    And   a model file stories260K.gguf
+    And   a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
+    And   a model file test-model.gguf
     And   a model alias tinyllama-2
     And   42 as server seed
       # KV Cache corresponds to the total amount of tokens
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index 03f55f65910de..86c3339dc7183 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -16,7 +16,6 @@
 import openai
 from behave import step
 from behave.api.async_step import async_run_until_complete
-from huggingface_hub import hf_hub_download
 from prometheus_client import parser
 
 
@@ -39,6 +38,8 @@ def step_server_config(context, server_fqdn, server_port):
 
     context.model_alias = None
     context.model_file = None
+    context.model_hf_repo = None
+    context.model_hf_file = None
     context.model_url = None
     context.n_batch = None
     context.n_ubatch = None
@@ -68,9 +69,9 @@ def step_server_config(context, server_fqdn, server_port):
 
 @step('a model file {hf_file} from HF repo {hf_repo}')
 def step_download_hf_model(context, hf_file, hf_repo):
-    context.model_file = hf_hub_download(repo_id=hf_repo, filename=hf_file)
-    if context.debug:
-        print(f"model file: {context.model_file}")
+    context.model_hf_repo = hf_repo
+    context.model_hf_file = hf_file
+    context.model_file = os.path.basename(hf_file)
 
 
 @step('a model file {model_file}')
@@ -1079,6 +1080,10 @@ def start_server_background(context):
         server_args.extend(['--model', context.model_file])
     if context.model_url:
         server_args.extend(['--model-url', context.model_url])
+    if context.model_hf_repo:
+        server_args.extend(['--hf-repo', context.model_hf_repo])
+    if context.model_hf_file:
+        server_args.extend(['--hf-file', context.model_hf_file])
     if context.n_batch:
         server_args.extend(['--batch-size', context.n_batch])
     if context.n_ubatch:
diff --git a/llama.cpp b/llama.cpp
index eedca802b86a7..a1850b44bcf3a 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2934,7 +2934,7 @@ struct llama_model_loader {
                 }
             }
 
-            LLAMA_LOG_INFO("%s: additional %d GGUFs metadata loaded.\n",  __func__, n_split);
+            LLAMA_LOG_INFO("%s: additional %d GGUFs metadata loaded.\n",  __func__, n_split - 1);
         }
 
         n_kv      = gguf_get_n_kv(meta);
@@ -14841,7 +14841,7 @@ int llama_split_prefix(char * dest, size_t maxlen, const char * split_path, int
     // check if dest ends with postfix
     int size_prefix = str_split_path.size() - str_postfix.size();
     if (size_prefix > 0 && str_split_path.find(str_postfix, size_prefix) != std::string::npos) {
-        snprintf(dest, std::min((size_t) size_prefix, maxlen), "%s", split_path);
+        snprintf(dest, std::min((size_t) size_prefix + 1, maxlen), "%s", split_path);
         return size_prefix;
     }