diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index bced1f5617a0f..13e8c95f08c7d 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -40,13 +40,13 @@ def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian: self.ftype = ftype self.fname_out = fname_out self.is_big_endian = is_big_endian - self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE + self.endianness = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE self.is_safetensors = self._is_model_safetensors() self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin") self.part_names = self._get_part_names() self.hparams = Model.load_hparams(self.dir_model) self.model_arch = self._get_model_architecture() - self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess) + self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianness=self.endianness) def set_vocab(self): self._set_vocab_gpt2() diff --git a/convert.py b/convert.py index 6e95d6cb37e79..a858925670fce 100755 --- a/convert.py +++ b/convert.py @@ -812,8 +812,8 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None: class OutputFile: - def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None: - self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess) + def __init__(self, fname_out: Path, endianness:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None: + self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianness=endianness) def add_meta_arch(self, params: Params) -> None: name = "LLaMA" @@ -892,10 +892,10 @@ def close(self) -> None: self.gguf.close() @staticmethod - def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None: + def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianness:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None: check_vocab_size(params, vocab) - of = OutputFile(fname_out, endianess=endianess) + of = OutputFile(fname_out, endianness=endianness) # meta data of.add_meta_arch(params) @@ -920,10 +920,10 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray: return dt.quantize(arr) @staticmethod - def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None: + def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianness: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None: check_vocab_size(params, vocab) - of = OutputFile(fname_out, endianess=endianess) + of = OutputFile(fname_out, endianness=endianness) # meta data of.add_meta_arch(params) @@ -1165,9 +1165,9 @@ def main(args_in: list[str] | None = None) -> None: if args.dump: do_dump_model(model_plus) return - endianess = gguf.GGUFEndian.LITTLE + endianness = gguf.GGUFEndian.LITTLE if args.bigendian: - endianess = gguf.GGUFEndian.BIG + endianness = gguf.GGUFEndian.BIG params = Params.load(model_plus) if params.n_ctx == -1: @@ -1220,7 +1220,7 @@ def main(args_in: list[str] | None = None) -> None: params.ftype = ftype print(f"Writing {outfile}, format {ftype}") - OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianess=endianess) + OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianness=endianness) print(f"Wrote {outfile}") diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 369f81a8428b2..e58d32a03e20d 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -409,7 +409,7 @@ struct llama_client_slot size_t sent_token_probs_index = 0; int64_t t_start_process_prompt; - int64_t t_start_genereration; + int64_t t_start_generation; double t_prompt_processing; // ms double t_token_generation; // ms @@ -477,12 +477,12 @@ struct llama_client_slot void release() { if (state == IDLE || state == PROCESSING) { - t_token_generation = (ggml_time_us() - t_start_genereration) / 1e3; + t_token_generation = (ggml_time_us() - t_start_generation) / 1e3; command = RELEASE; } } - json get_formated_timings() { + json get_formatted_timings() { return json { {"prompt_n", num_prompt_tokens_processed}, @@ -1160,10 +1160,10 @@ struct llama_server_context json get_model_props() { - return get_formated_generation(slots[0]); + return get_formatted_generation(slots[0]); } - json get_formated_generation(llama_client_slot &slot) + json get_formatted_generation(llama_client_slot &slot) { const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model)); const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() && @@ -1254,7 +1254,7 @@ struct llama_server_context {"model", params.model_alias}, {"tokens_predicted", slot.n_decoded}, {"tokens_evaluated", slot.num_prompt_tokens}, - {"generation_settings", get_formated_generation(slot)}, + {"generation_settings", get_formatted_generation(slot)}, {"prompt", slot.prompt}, {"truncated", slot.truncated}, {"stopped_eos", slot.stopped_eos}, @@ -1262,7 +1262,7 @@ struct llama_server_context {"stopped_limit", slot.stopped_limit}, {"stopping_word", slot.stopping_word}, {"tokens_cached", slot.n_past}, - {"timings", slot.get_formated_timings()} + {"timings", slot.get_formatted_timings()} }; if (slot.sparams.n_probs > 0) @@ -1681,7 +1681,7 @@ struct llama_server_context slot.command = NONE; std::vector prompt_tokens; slot.t_start_process_prompt = ggml_time_us(); - slot.t_start_genereration = 0; + slot.t_start_generation = 0; if (slot.infill) { @@ -1871,8 +1871,8 @@ struct llama_server_context if (slot.n_decoded == 1) { - slot.t_start_genereration = ggml_time_us(); - slot.t_prompt_processing = (slot.t_start_genereration - slot.t_start_process_prompt) / 1e3; + slot.t_start_generation = ggml_time_us(); + slot.t_prompt_processing = (slot.t_start_generation - slot.t_start_process_prompt) / 1e3; } llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false }; @@ -2299,13 +2299,13 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, invalid_param = true; break; } - std::string systm_content; + std::string system_content; std::copy( std::istreambuf_iterator(file), std::istreambuf_iterator(), - std::back_inserter(systm_content) + std::back_inserter(system_content) ); - llama.process_system_prompt_data(json::parse(systm_content)); + llama.process_system_prompt_data(json::parse(system_content)); } else if(arg == "--mmproj") { diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index b8ec977c8f3fa..c8fd5fe6f64e4 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -50,11 +50,11 @@ class GGUFWriter: def __init__( self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True, - endianess: GGUFEndian = GGUFEndian.LITTLE, + endianness: GGUFEndian = GGUFEndian.LITTLE, ): self.fout = open(path, "wb") self.arch = arch - self.endianess = endianess + self.endianness = endianness self.offset_tensor = 0 self.data_alignment = GGUF_DEFAULT_ALIGNMENT self.kv_data = bytearray() @@ -65,7 +65,7 @@ def __init__( self.temp_file = None self.tensors = [] print("gguf: This GGUF file is for {0} Endian only".format( - "Big" if self.endianess == GGUFEndian.BIG else "Little", + "Big" if self.endianness == GGUFEndian.BIG else "Little", )) self.state = WriterState.EMPTY @@ -218,7 +218,7 @@ def add_tensor( self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None, ) -> None: - if self.endianess == GGUFEndian.BIG: + if self.endianness == GGUFEndian.BIG: tensor.byteswap(inplace=True) if self.use_temp_file and self.temp_file is None: fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024) @@ -244,7 +244,7 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None: if self.state is not WriterState.TI_DATA: raise ValueError(f'Expected output file to contain tensor info, got {self.state}') - if self.endianess == GGUFEndian.BIG: + if self.endianness == GGUFEndian.BIG: tensor.byteswap(inplace=True) self.write_padding(self.fout, self.fout.tell()) tensor.tofile(self.fout) @@ -405,7 +405,7 @@ def add_chat_template(self, value: str) -> None: def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes: pack_prefix = '' if not skip_pack_prefix: - pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>' + pack_prefix = '<' if self.endianness == GGUFEndian.LITTLE else '>' return struct.pack(f'{pack_prefix}{fmt}', value) def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None: