Skip to content

Commit ce77cc5

Browse files
committed
Make slice_str similar to truncate_str
1 parent cd1a6b4 commit ce77cc5

File tree

3 files changed

+120
-149
lines changed

3 files changed

+120
-149
lines changed

src/ansi.rs

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ use std::{
44
str::CharIndices,
55
};
66

7-
use crate::utils::char_width;
8-
97
#[derive(Debug, Clone, Copy)]
108
enum State {
119
Start,
@@ -269,63 +267,8 @@ impl<'a> Iterator for AnsiCodeIterator<'a> {
269267

270268
impl<'a> FusedIterator for AnsiCodeIterator<'a> {}
271269

272-
/// Slice a `&str` in terms of text width. This means that only the text
273-
/// columns strictly between `start` and `stop` will be kept.
274-
///
275-
/// If a multi-columns character overlaps with the end of the interval it will
276-
/// not be included. In such a case, the result will be less than `end - start`
277-
/// columns wide.
278-
pub fn slice_ansi_str(s: &str, start: usize, end: usize) -> &str {
279-
if end <= start {
280-
return "";
281-
}
282-
283-
let mut pos = 0;
284-
let mut res_start = 0;
285-
let mut res_end = 0;
286-
287-
'outer: for (sub, is_ansi) in AnsiCodeIterator::new(s) {
288-
// As ansi symbols have a width of 0 we can safely early-interupt
289-
// the outer for loop only if current pos strictly greater than
290-
// `end`.
291-
if pos > end {
292-
break;
293-
}
294-
295-
if is_ansi {
296-
if pos < start {
297-
res_start += sub.len();
298-
res_end = res_start;
299-
} else if pos <= end {
300-
res_end += sub.len();
301-
} else {
302-
break 'outer;
303-
}
304-
} else {
305-
for c in sub.chars() {
306-
let c_width = char_width(c);
307-
308-
if pos < start {
309-
res_start += c.len_utf8();
310-
res_end = res_start;
311-
} else if pos + c_width <= end {
312-
res_end += c.len_utf8();
313-
} else {
314-
break 'outer;
315-
}
316-
317-
pos += char_width(c);
318-
}
319-
}
320-
}
321-
322-
&s[res_start..res_end]
323-
}
324-
325270
#[cfg(test)]
326271
mod tests {
327-
use crate::measure_text_width;
328-
329272
use super::*;
330273

331274
use lazy_static::lazy_static;
@@ -492,37 +435,4 @@ mod tests {
492435
assert_eq!(iter.rest_slice(), "");
493436
assert_eq!(iter.next(), None);
494437
}
495-
496-
#[test]
497-
fn test_slice_ansi_str() {
498-
// Note that 🐶 is two columns wide
499-
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
500-
assert_eq!(slice_ansi_str(test_str, 5, 5), "");
501-
assert_eq!(slice_ansi_str(test_str, 0, test_str.len()), test_str);
502-
503-
if cfg!(feature = "unicode-width") {
504-
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
505-
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
506-
assert_eq!(measure_text_width(test_str), 16);
507-
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
508-
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
509-
assert_eq!(slice_ansi_str(test_str, 0, 7), "Hello\x1b[31m🐶\x1b[1m");
510-
assert_eq!(slice_ansi_str(test_str, 7, 21), "\x1b[1m🐶\x1b[0m world!");
511-
assert_eq!(slice_ansi_str(test_str, 8, 21), "\x1b[0m world!");
512-
assert_eq!(slice_ansi_str(test_str, 9, 21), "\x1b[0m world!");
513-
514-
assert_eq!(
515-
slice_ansi_str(test_str, 4, 9),
516-
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
517-
);
518-
} else {
519-
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
520-
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m🐶\u{1b}[1m");
521-
522-
assert_eq!(
523-
slice_ansi_str(test_str, 4, 9),
524-
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m w"
525-
);
526-
}
527-
}
528438
}

src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,12 @@ pub use crate::term::{
8282
};
8383
pub use crate::utils::{
8484
colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with,
85-
set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute,
86-
Color, Emoji, Style, StyledObject,
85+
set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment,
86+
Attribute, Color, Emoji, Style, StyledObject,
8787
};
8888

8989
#[cfg(feature = "ansi-parsing")]
90-
pub use crate::ansi::{slice_ansi_str, strip_ansi_codes, AnsiCodeIterator};
90+
pub use crate::ansi::{strip_ansi_codes, AnsiCodeIterator};
9191

9292
mod common_term;
9393
mod kb;

src/utils.rs

Lines changed: 117 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::borrow::Cow;
22
use std::collections::BTreeSet;
33
use std::env;
44
use std::fmt;
5+
use std::ops::Range;
56
use std::sync::atomic::{AtomicBool, Ordering};
67

78
use lazy_static::lazy_static;
@@ -724,7 +725,7 @@ fn str_width(s: &str) -> usize {
724725
}
725726

726727
#[cfg(feature = "ansi-parsing")]
727-
pub(crate) fn char_width(c: char) -> usize {
728+
fn char_width(c: char) -> usize {
728729
#[cfg(feature = "unicode-width")]
729730
{
730731
use unicode_width::UnicodeWidthChar;
@@ -737,80 +738,98 @@ pub(crate) fn char_width(c: char) -> usize {
737738
}
738739
}
739740

740-
/// Truncates a string to a certain number of characters.
741+
/// Slice a `&str` in terms of text width. This means that only the text
742+
/// columns strictly between `start` and `stop` will be kept.
741743
///
742-
/// This ensures that escape codes are not screwed up in the process.
743-
/// If the maximum length is hit the string will be truncated but
744-
/// escapes code will still be honored. If truncation takes place
745-
/// the tail string will be appended.
746-
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
744+
/// If a multi-columns character overlaps with the end of the interval it will
745+
/// not be included. In such a case, the result will be less than `end - start`
746+
/// columns wide.
747+
///
748+
/// This ensures that escape codes are not screwed up in the process. And if
749+
/// non-empty head and tail are specified, they are inserted between the ANSI
750+
/// symbols from truncated bounds and the slice.
751+
pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> {
747752
#[cfg(feature = "ansi-parsing")]
748753
{
749-
use std::cmp::Ordering;
750-
let mut iter = AnsiCodeIterator::new(s);
751-
let mut length = 0;
752-
let mut rv = None;
753-
754-
while let Some(item) = iter.next() {
755-
match item {
756-
(s, false) => {
757-
if rv.is_none() {
758-
if str_width(s) + length > width - str_width(tail) {
759-
let ts = iter.current_slice();
760-
761-
let mut s_byte = 0;
762-
let mut s_width = 0;
763-
let rest_width = width - str_width(tail) - length;
764-
for c in s.chars() {
765-
s_byte += c.len_utf8();
766-
s_width += char_width(c);
767-
match s_width.cmp(&rest_width) {
768-
Ordering::Equal => break,
769-
Ordering::Greater => {
770-
s_byte -= c.len_utf8();
771-
break;
772-
}
773-
Ordering::Less => continue,
774-
}
775-
}
776-
777-
let idx = ts.len() - s.len() + s_byte;
778-
let mut buf = ts[..idx].to_string();
779-
buf.push_str(tail);
780-
rv = Some(buf);
781-
}
782-
length += str_width(s);
783-
}
754+
let mut pos = 0;
755+
let mut slice = 0..0;
756+
757+
// ANSI symbols outside of the slice
758+
let mut front_ansi = String::new();
759+
let mut back_ansi = String::new();
760+
761+
// Iterate through each ANSI symbol or unicode character while keeping
762+
// track of:
763+
// - pos: cumulated width of characters iterated so far
764+
// - slice: char indices of the part of the string for which `pos`
765+
// was inside bounds
766+
for (sub, is_ansi) in AnsiCodeIterator::new(s) {
767+
if is_ansi {
768+
if pos < bounds.start {
769+
// An ANSI symbol before the interval: keep for later
770+
front_ansi.push_str(sub);
771+
slice.start += sub.len();
772+
slice.end = slice.start;
773+
} else if pos <= bounds.end {
774+
// An ANSI symbol inside of the interval: extend the slice
775+
slice.end += sub.len();
776+
} else {
777+
// An ANSI symbol after the interval: keep for later
778+
back_ansi.push_str(sub);
784779
}
785-
(s, true) => {
786-
if let Some(ref mut rv) = rv {
787-
rv.push_str(s);
780+
} else {
781+
for c in sub.chars() {
782+
let c_width = char_width(c);
783+
784+
if pos < bounds.start {
785+
// The char is before the interval: move the slice back
786+
slice.start += c.len_utf8();
787+
slice.end = slice.start;
788+
} else if pos + c_width <= bounds.end {
789+
// The char fits into the interval: extend the slice
790+
slice.end += c.len_utf8();
788791
}
792+
793+
pos += c_width;
789794
}
790795
}
791796
}
792797

793-
if let Some(buf) = rv {
794-
Cow::Owned(buf)
798+
let slice = &s[slice];
799+
800+
if front_ansi.is_empty() && back_ansi.is_empty() && head.is_empty() && tail.is_empty() {
801+
Cow::Borrowed(slice)
795802
} else {
796-
Cow::Borrowed(s)
803+
Cow::Owned(front_ansi + head + slice + tail + &back_ansi)
797804
}
798805
}
799-
800806
#[cfg(not(feature = "ansi-parsing"))]
801807
{
802-
if s.len() <= width - tail.len() {
803-
Cow::Borrowed(s)
808+
let slice = s.get(bounds).unwrap_or("");
809+
810+
if head.is_empty() && tail.is_empty() {
811+
Cow::Borrowed(slice)
804812
} else {
805-
Cow::Owned(format!(
806-
"{}{}",
807-
s.get(..width - tail.len()).unwrap_or_default(),
808-
tail
809-
))
813+
Cow::Owned(format!("{head}{slice}{tail}"))
810814
}
811815
}
812816
}
813817

818+
/// Truncates a string to a certain number of characters.
819+
///
820+
/// This ensures that escape codes are not screwed up in the process.
821+
/// If the maximum length is hit the string will be truncated but
822+
/// escapes code will still be honored. If truncation takes place
823+
/// the tail string will be appended.
824+
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
825+
if measure_text_width(s) > width {
826+
let tail_width = measure_text_width(tail);
827+
slice_str(s, "", 0..width.saturating_sub(tail_width), tail)
828+
} else {
829+
Cow::Borrowed(s)
830+
}
831+
}
832+
814833
/// Pads a string to fill a certain number of characters.
815834
///
816835
/// This will honor ansi codes correctly and allows you to align a string
@@ -919,8 +938,50 @@ fn test_truncate_str() {
919938
);
920939
}
921940

941+
#[test]
942+
fn test_slice_ansi_str() {
943+
// Note that 🐶 is two columns wide
944+
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
945+
assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str);
946+
947+
if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") {
948+
assert_eq!(measure_text_width(test_str), 16);
949+
950+
assert_eq!(
951+
slice_str(test_str, "", 5..5, ""),
952+
"\u{1b}[31m\u{1b}[1m\u{1b}[0m"
953+
);
954+
955+
assert_eq!(
956+
slice_str(test_str, "", 0..5, ""),
957+
"Hello\x1b[31m\x1b[1m\x1b[0m"
958+
);
959+
960+
assert_eq!(
961+
slice_str(test_str, "", 0..6, ""),
962+
"Hello\x1b[31m\x1b[1m\x1b[0m"
963+
);
964+
965+
assert_eq!(
966+
slice_str(test_str, "", 0..7, ""),
967+
"Hello\x1b[31m🐶\x1b[1m\x1b[0m"
968+
);
969+
970+
assert_eq!(
971+
slice_str(test_str, "", 4..9, ""),
972+
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
973+
);
974+
975+
assert_eq!(
976+
slice_str(test_str, "", 7..21, ""),
977+
"\x1b[31m\x1b[1m🐶\x1b[0m world!"
978+
);
979+
}
980+
}
981+
922982
#[test]
923983
fn test_truncate_str_no_ansi() {
984+
assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar");
924985
assert_eq!(&truncate_str("foo bar", 5, ""), "foo b");
925986
assert_eq!(&truncate_str("foo bar", 5, "!"), "foo !");
926987
assert_eq!(&truncate_str("foo bar baz", 10, "..."), "foo bar...");

0 commit comments

Comments
 (0)