Skip to content

Commit 048b74b

Browse files
committed
Fix off-by-one error in tokenizer::normalizer::Range::len
1 parent 3d51a16 commit 048b74b

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

tokenizers/src/tokenizer/normalizer.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ where
4545

4646
match range.start_bound() {
4747
Bound::Unbounded => Some(end),
48-
Bound::Included(i) => Some(end - (*i + 1)),
49-
Bound::Excluded(i) => Some(end - *i),
48+
Bound::Included(i) => Some(end - *i),
49+
Bound::Excluded(i) => Some(end - (*i + 1)),
5050
}
5151
}
5252

@@ -1013,6 +1013,20 @@ mod tests {
10131013
use regex::Regex;
10141014
use unicode_categories::UnicodeCategories;
10151015

1016+
#[test]
1017+
fn test_len_range_inclusive() {
1018+
let range = Range::Original(3..=7);
1019+
let len = range.len();
1020+
assert_eq!(len, Some(5)); // 7 - 3 + 1 = 5
1021+
}
1022+
1023+
#[test]
1024+
fn test_len_range_exclusive() {
1025+
let range = Range::Original(3..7);
1026+
let len = range.len();
1027+
assert_eq!(len, Some(4)); // 7 - 3 = 4
1028+
}
1029+
10161030
#[test]
10171031
fn nfd_adds_new_chars() {
10181032
let mut n = NormalizedString::from("élégant");

0 commit comments

Comments
 (0)