Skip to content

MPT support in llama.cpp #3417

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Oct 10, 2023
Merged
Changes from 1 commit
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
b49792b
CUDA: added support for ggml_clamp (see also: https://github.com/gger…
jploski Sep 30, 2023
15236e8
mpt : added an implementation based (mostly) on falcon integration, m…
jploski Sep 30, 2023
84e30e8
mpt : protect against "clip_qkv": null in mpt-7b
jploski Sep 30, 2023
00e8c5c
mpt : quick fix to avoid "Strange model" warning when quantizing MPT …
jploski Sep 30, 2023
1be89c4
mpt : addendum to changeset:84e30e8 - leave parameter clamp_kqv out f…
jploski Sep 30, 2023
26c253e
mpt : standardized all tensor names to follow GGUF spec
jploski Sep 30, 2023
df072d2
mpt : addendum to changeset:1be89c40 - use "req" parameter of GGUF_GE…
jploski Sep 30, 2023
90e7d6d
mpt : fixed comment s/gptneox/mpt/
jploski Oct 2, 2023
4708012
mpt : remove tabs, trailing whitespace
jploski Oct 2, 2023
1364bcd
mpt : removed ne01 + n_past == ne00 assertion from alibi (cuda/f32) a…
jploski Oct 3, 2023
7d6a24a
mpt : updated convert-mpt-hf-to-gguf.py to reflect changes made to co…
jploski Oct 6, 2023
292363e
Merge branch 'master' of https://github.com/ggerganov/llama.cpp into …
cebtenzzre Oct 9, 2023
ad3c2f3
comment out n_past instead of marking it unused
cebtenzzre Oct 9, 2023
1a454eb
mpt : removed hardcoded +178 from convert script in favor of utilizin…
jploski Oct 9, 2023
32172f1
mpt : remove unused tokenizer_json in convert script
cebtenzzre Oct 9, 2023
96cf3f5
ggml : remove obsolete n_past assert in ggml_alibi
ggerganov Oct 10, 2023
9b66378
llama : print clam_kqv and max_alibi_bias hparams
ggerganov Oct 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
mpt : updated convert-mpt-hf-to-gguf.py to reflect changes made to co…
…nvert-gptneox-hf-to-gguf.py in pr:3252
  • Loading branch information
jploski authored and cebtenzzre committed Oct 9, 2023
commit 7d6a24aad4d2eae524bd3472290fbfb3efab5510
55 changes: 7 additions & 48 deletions convert-mpt-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,6 @@
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
import gguf

# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py


def bytes_to_unicode():
"""
Returns list of utf-8 byte and a corresponding list of unicode strings.
The reversible bpe codes work on unicode strings.
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
This is a significant percentage of your normal, say, 32K bpe vocab.
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
And avoids mapping to whitespace/control characters the bpe code barfs on.
"""
bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
cs = bs[:]
n = 0
for b in range(2**8):
if b not in bs:
bs.append(b)
cs.append(2**8+n)
n += 1
return dict(zip(bs, (chr(n) for n in cs)))


def count_model_parts(dir_model: Path) -> int:
num_parts = 0
Expand Down Expand Up @@ -131,6 +108,8 @@ def parse_args() -> argparse.Namespace:
print("gguf: get tokenizer metadata")

tokens: list[bytearray] = []
scores: list[float] = []
toktypes: list[int] = []

tokenizer_json_file = dir_model / 'tokenizer.json'
if not tokenizer_json_file.is_file():
Expand All @@ -155,31 +134,15 @@ def parse_args() -> argparse.Namespace:
tokenizer = AutoTokenizer.from_pretrained(dir_model)

reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
byte_encoder = bytes_to_unicode()
byte_decoder = {v: k for k, v in byte_encoder.items()}

for i in range(vocab_size):
if i in reverse_vocab:
try:
text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
except KeyError:
text = bytearray()
for c in reverse_vocab[i]:
if ord(c) < 256: # single byte character
try:
text.append(byte_decoder[c])
except KeyError:
text.extend(c.encode('utf-8'))
else: # multibyte special token character
text.extend(c.encode('utf-8'))
else:
print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token. (It's normal for MPT.)")
pad_token = f"[PAD{i}]".encode("utf8")
text = bytearray(pad_token)

tokens.append(text)
tokens.append(reverse_vocab[i] if i in reverse_vocab else f"[PAD{i}]")
scores.append(0.0) # dummy
toktypes.append(gguf.TokenType.NORMAL)

gguf_writer.add_token_list(tokens)
gguf_writer.add_token_scores(scores)
gguf_writer.add_token_types(toktypes)

special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
special_vocab.add_to_gguf(gguf_writer)
Expand Down Expand Up @@ -239,10 +202,6 @@ def parse_args() -> argparse.Namespace:

print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))


# if new_name == "wte.weight" and data.shape[0] == 50432 and vocab_size == 50254:
# data = data[0:vocab_size,:]

gguf_writer.add_tensor(new_name, data)

# note: MPT output is tied to (same as) wte in original model;
Expand Down