Skip to content

Commit 557fde7

Browse files
authored
style: simplify string formatting for readability (#1632)
1 parent 3d51a16 commit 557fde7

File tree

15 files changed

+28
-28
lines changed

15 files changed

+28
-28
lines changed

tokenizers/benches/unigram_benchmark.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ pub fn bench_train(c: &mut Criterion) {
2121
let mut word_counts = HashMap::new();
2222
content.split_whitespace().for_each(|word| {
2323
// This is important for the test of char vs u8
24-
let word = format!("▁{}", word);
24+
let word = format!("▁{word}");
2525
*word_counts.entry(word).or_insert(0) += 1;
2626
});
2727

@@ -49,7 +49,7 @@ pub fn bench_train(c: &mut Criterion) {
4949
let mut word_counts = HashMap::new();
5050
content.split_whitespace().for_each(|word| {
5151
// This is important for the test of char vs u8
52-
let word = format!("▁{}", word);
52+
let word = format!("▁{word}");
5353
*word_counts.entry(word).or_insert(0) += 1;
5454
});
5555

tokenizers/examples/serialization.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ fn main() {
88
// Mix special and not special
99
// You can make sure ids are in order, and special status is correct.
1010
let tokens: Vec<_> = (0..120_000)
11-
.map(|i| AddedToken::from(format!("[SPECIAL_{}]", i), i % 2 == 0))
11+
.map(|i| AddedToken::from(format!("[SPECIAL_{i}]"), i % 2 == 0))
1212
.collect();
1313
tokenizer.add_tokens(&tokens);
1414
tokenizer.save("_tok.json", true).unwrap();

tokenizers/src/decoders/wordpiece.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ impl Decoder for WordPiece {
5353
if token.starts_with(&self.prefix) {
5454
*token = token.replacen(&self.prefix, "", 1);
5555
} else {
56-
*token = format!(" {}", token);
56+
*token = format!(" {token}");
5757
}
5858
}
5959
if self.cleanup {

tokenizers/src/models/bpe/model.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -385,13 +385,13 @@ impl BPE {
385385
// Add the `continuing_subword_prefix` if relevant
386386
if !is_first {
387387
if let Some(ref prefix) = self.continuing_subword_prefix {
388-
s = format!("{}{}", prefix, s).into()
388+
s = format!("{prefix}{s}").into()
389389
}
390390
}
391391
// Add the `end_of_word_suffix` if relevant
392392
if is_last {
393393
if let Some(ref suffix) = self.end_of_word_suffix {
394-
s = format!("{}{}", s, suffix).into()
394+
s = format!("{s}{suffix}").into()
395395
}
396396
}
397397

@@ -406,7 +406,7 @@ impl BPE {
406406
let tokens: Option<Vec<_>> = s
407407
.bytes()
408408
.map(|b| -> Option<&u32> {
409-
let code = format!("<{:#04X}>", b);
409+
let code = format!("<{b:#04X}>");
410410

411411
self.vocab.get(&code)
412412
})
@@ -515,7 +515,7 @@ impl Model for BPE {
515515

516516
fn save(&self, folder: &Path, name: Option<&str>) -> Result<Vec<PathBuf>> {
517517
let vocab_file_name = match name {
518-
Some(name) => format!("{}-vocab.json", name),
518+
Some(name) => format!("{name}-vocab.json"),
519519
None => "vocab.json".to_string(),
520520
};
521521

@@ -530,7 +530,7 @@ impl Model for BPE {
530530

531531
// Write merges.txt
532532
let merges_file_name = match name {
533-
Some(name) => format!("{}-merges.txt", name),
533+
Some(name) => format!("{name}-merges.txt"),
534534
None => "merges.txt".to_string(),
535535
};
536536

tokenizers/src/models/bpe/trainer.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,13 +342,13 @@ impl BpeTrainer {
342342
// Add the `continuing_subword_prefix` if relevant
343343
if !is_first {
344344
if let Some(prefix) = &self.continuing_subword_prefix {
345-
s = format!("{}{}", prefix, s);
345+
s = format!("{prefix}{s}");
346346
}
347347
}
348348
// Add the `end_of_word_suffix` if relevant
349349
if is_last {
350350
if let Some(suffix) = &self.end_of_word_suffix {
351-
s = format!("{}{}", s, suffix);
351+
s = format!("{s}{suffix}");
352352
}
353353
}
354354

@@ -513,7 +513,7 @@ impl BpeTrainer {
513513
part_b = part_b[prefix_byte_len..].to_string();
514514
}
515515
}
516-
let new_token = format!("{}{}", part_a, part_b);
516+
let new_token = format!("{part_a}{part_b}");
517517
// implement sentencepiece-like merge.
518518
// if this code were to be merged, integrate a way in the python bindings to communicate this variable
519519
// default should be 0/None to maintain previous behavior. 16 is the spm default.

tokenizers/src/models/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ impl<'a> Serialize for OrderedVocabIter<'a> {
5151

5252
if !holes.is_empty() {
5353
warn!("The OrderedVocab you are attempting to save contains holes for indices {:?}, your vocabulary could be corrupted !", holes);
54-
println!("The OrderedVocab you are attempting to save contains holes for indices {:?}, your vocabulary could be corrupted !", holes);
54+
println!("The OrderedVocab you are attempting to save contains holes for indices {holes:?}, your vocabulary could be corrupted !");
5555
}
5656
result
5757
}

tokenizers/src/models/unigram/model.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ impl Model for Unigram {
425425
let byte_tokens: Option<Vec<_>> = string
426426
.bytes()
427427
.map(|byte| -> Option<Token> {
428-
let byte_string = format!("<0x{:02X}>", byte);
428+
let byte_string = format!("<0x{byte:02X}>");
429429
let id = self.token_to_ids.get(&byte_string);
430430
id.map(|id| Token::new(*id, byte_string, (offset, offset + len)))
431431
})
@@ -457,7 +457,7 @@ impl Model for Unigram {
457457

458458
fn save(&self, folder: &Path, name: Option<&str>) -> Result<Vec<PathBuf>> {
459459
let name = match name {
460-
Some(name) => format!("{}-unigram.json", name),
460+
Some(name) => format!("{name}-unigram.json"),
461461
None => "unigram.json".to_string(),
462462
};
463463
let mut fullpath = PathBuf::new();
@@ -568,7 +568,7 @@ mod tests {
568568

569569
for is_optimized in &[true, false] {
570570
model.set_optimized(*is_optimized);
571-
println!("IsOptimized {:?}", is_optimized);
571+
println!("IsOptimized {is_optimized:?}");
572572
assert_eq!(model.encode("abc").unwrap(), vec!["abc"]);
573573
assert_eq!(model.encode("AB").unwrap(), vec!["AB"]);
574574

tokenizers/src/models/unigram/serialization.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ impl<'de> Visitor<'de> for UnigramVisitor {
7070
}
7171
match (vocab, unk_id, byte_fallback) {
7272
(Some(vocab), unk_id, byte_fallback) => Ok(Unigram::from(vocab, unk_id, byte_fallback)
73-
.map_err(|err| Error::custom(format!("Unable to load vocab {:?}", err)))?),
73+
.map_err(|err| Error::custom(format!("Unable to load vocab {err:?}")))?),
7474
(None, _, _) => Err(Error::custom("Missing vocab")),
7575
}
7676
}

tokenizers/src/models/wordlevel/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ impl Model for WordLevel {
194194

195195
fn save(&self, folder: &Path, name: Option<&str>) -> Result<Vec<PathBuf>> {
196196
let vocab_file_name = match name {
197-
Some(name) => format!("{}-vocab.json", name),
197+
Some(name) => format!("{name}-vocab.json"),
198198
None => "vocab.json".to_string(),
199199
};
200200

tokenizers/src/models/wordpiece/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ impl Model for WordPiece {
271271

272272
fn save(&self, folder: &Path, name: Option<&str>) -> Result<Vec<PathBuf>> {
273273
let vocab_file_name = match name {
274-
Some(name) => format!("{}-vocab.txt", name),
274+
Some(name) => format!("{name}-vocab.txt"),
275275
None => "vocab.txt".to_string(),
276276
};
277277

@@ -285,7 +285,7 @@ impl Model for WordPiece {
285285
vocab_file.write_all(
286286
&vocab
287287
.into_iter()
288-
.flat_map(|(token, _)| format!("{}\n", token).as_bytes().to_owned())
288+
.flat_map(|(token, _)| format!("{token}\n").as_bytes().to_owned())
289289
.collect::<Vec<_>>()[..],
290290
)?;
291291

tokenizers/src/processors/template.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ impl TryFrom<String> for Piece {
150150
fn try_from(s: String) -> StdResult<Self, Self::Error> {
151151
let parts = s.split(':').collect::<Vec<_>>();
152152

153-
let err = || format!("Cannot build Piece from string \"{}\"", s);
153+
let err = || format!("Cannot build Piece from string \"{s}\"");
154154
match parts.as_slice() {
155155
[id, type_id] => {
156156
let type_id: u32 = type_id.parse().map_err(|_| err())?;

tokenizers/src/tokenizer/normalizer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ impl NormalizedString {
351351
match changes {
352352
0 => "Replacing".into(),
353353
ch if ch > 0 => "Adding".into(),
354-
ch if ch < 0 => format!("Replacing + removing {} following chars", ch),
354+
ch if ch < 0 => format!("Replacing + removing {ch} following chars"),
355355
_ => "Undefined".into(),
356356
},
357357
offset

tokenizers/src/tokenizer/serialization.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ where
116116
"version" => {
117117
let v: String = map.next_value()?;
118118
if &v != "1.0" {
119-
return Err(Error::custom(format!("Unknown tokenizer version '{}'", v)));
119+
return Err(Error::custom(format!("Unknown tokenizer version '{v}'")));
120120
}
121121
}
122122
"truncation" => {

tokenizers/tests/documentation.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ fn quicktour() -> tokenizers::Result<()> {
199199
// START quicktour_encode_batch
200200
let output = tokenizer.encode_batch(vec!["Hello, y'all!", "How are you 😁 ?"], true)?;
201201
// END quicktour_encode_batch
202-
println!("{:?}", output);
202+
println!("{output:?}");
203203
// START quicktour_encode_batch_pair
204204
let output = tokenizer.encode_batch(
205205
vec![
@@ -209,7 +209,7 @@ fn quicktour() -> tokenizers::Result<()> {
209209
true,
210210
)?;
211211
// END quicktour_encode_batch_pair
212-
println!("{:?}", output);
212+
println!("{output:?}");
213213
// START quicktour_enable_padding
214214
use tokenizers::PaddingParams;
215215

@@ -350,7 +350,7 @@ fn pipeline() -> tokenizers::Result<()> {
350350
&[1, 27253, 16, 93, 11, 5097, 5, 7961, 5112, 6218, 0, 35, 2],
351351
true,
352352
)?;
353-
println!("{}", decoded);
353+
println!("{decoded}");
354354
// "Hello , y ' all ! How are you ?"
355355
// END pipeline_test_decoding
356356

@@ -436,7 +436,7 @@ fn pipeline_bert() -> tokenizers::Result<()> {
436436
// ["[CLS]", "welcome", "to", "the", "[UNK]", "tok", "##eni", "##zer", "##s", "library", ".", "[SEP]"]
437437

438438
let decoded = bert_tokenizer.decode(output.get_ids(), true)?;
439-
println!("{}", decoded);
439+
println!("{decoded}");
440440
// "welcome to the tok ##eni ##zer ##s library ."
441441
// END bert_test_decoding
442442
assert_eq!(

tokenizers/tests/unigram.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ fn test_train_unigram_from_file() {
4444
let mut word_counts = HashMap::new();
4545
content.split_whitespace().for_each(|word| {
4646
// This is important for the test of char vs u8
47-
let word = format!("▁{}", word);
47+
let word = format!("▁{word}");
4848
*word_counts.entry(word).or_insert(0) += 1;
4949
});
5050

0 commit comments

Comments
 (0)