@@ -28,14 +28,14 @@ impl Serialize for BPE {
28
28
. map ( |( pair, ( rank, _) ) | ( pair, rank) )
29
29
. collect ( ) ;
30
30
merges. sort_unstable_by_key ( |k| * k. 1 ) ;
31
- let merges_str = merges
31
+ let merges = merges
32
32
. into_iter ( )
33
- . map ( |( pair, _) | format ! ( "{} {}" , self . vocab_r[ & pair. 0 ] , self . vocab_r[ & pair. 1 ] ) )
33
+ . map ( |( pair, _) | ( self . vocab_r [ & pair. 0 ] . clone ( ) , self . vocab_r [ & pair. 1 ] . clone ( ) ) )
34
34
. collect :: < Vec < _ > > ( ) ;
35
35
let ordered_vocab = OrderedVocabIter :: new ( & self . vocab_r ) ;
36
36
37
37
model. serialize_field ( "vocab" , & ordered_vocab) ?;
38
- model. serialize_field ( "merges" , & merges_str ) ?;
38
+ model. serialize_field ( "merges" , & merges ) ?;
39
39
40
40
model. end ( )
41
41
}
@@ -77,7 +77,14 @@ impl<'de> Visitor<'de> for BPEVisitor {
77
77
{
78
78
let mut builder = BpeBuilder :: new ( ) ;
79
79
let mut vocab: Option < HashMap < String , u32 > > = None ;
80
- let mut merges: Option < Vec < String > > = None ;
80
+
81
+ #[ derive( Debug , Deserialize ) ]
82
+ #[ serde( untagged) ]
83
+ enum MergeType {
84
+ Tuple ( Vec < ( String , String ) > ) ,
85
+ Legacy ( Vec < String > ) ,
86
+ }
87
+ let mut merges: Option < MergeType > = None ;
81
88
while let Some ( key) = map. next_key :: < String > ( ) ? {
82
89
match key. as_ref ( ) {
83
90
"dropout" => {
@@ -120,8 +127,12 @@ impl<'de> Visitor<'de> for BPEVisitor {
120
127
}
121
128
}
122
129
if let ( Some ( vocab) , Some ( merges) ) = ( vocab, merges) {
123
- let merges =
124
- convert_merges_to_hashmap ( merges. into_iter ( ) , & vocab) . map_err ( Error :: custom) ?;
130
+ let merges = match merges {
131
+ MergeType :: Tuple ( merges) => merges,
132
+ MergeType :: Legacy ( merges) => {
133
+ convert_merges_to_hashmap ( merges. into_iter ( ) , & vocab) . map_err ( Error :: custom) ?
134
+ }
135
+ } ;
125
136
builder = builder. vocab_and_merges ( vocab, merges) ;
126
137
Ok ( builder. build ( ) . map_err ( Error :: custom) ?)
127
138
} else {
0 commit comments