Skip to content

Commit 4f2f228

Browse files
authored
only add mask token when using sentencepiece
1 parent 2946277 commit 4f2f228

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

convert_hf_to_gguf.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3889,11 +3889,11 @@ def _xlmroberta_set_vocab(self) -> None:
38893889
SentencePieceTokenTypes.UNKNOWN,
38903890
] + toktypes[3:-1]
38913891

3892-
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
3893-
# Add mask token missing from sentencepiece.bpe.model
3894-
tokens[250001] = b'<mask>'
3895-
scores[250001] = 0.0
3896-
toktypes[250001] = SentencePieceTokenTypes.CONTROL
3892+
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
3893+
# Add mask token missing from sentencepiece.bpe.model
3894+
tokens[250001] = b'<mask>'
3895+
scores[250001] = 0.0
3896+
toktypes[250001] = SentencePieceTokenTypes.CONTROL
38973897

38983898
self.gguf_writer.add_tokenizer_model("t5")
38993899
self.gguf_writer.add_tokenizer_pre("default")

0 commit comments

Comments
 (0)