Skip to content

Commit 9e2092b

Browse files
committed
Add new function ansi::slice_ansi_str
1 parent 72aa50e commit 9e2092b

File tree

2 files changed

+159
-60
lines changed

2 files changed

+159
-60
lines changed

src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ pub use crate::term::{
8484
};
8585
pub use crate::utils::{
8686
colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with,
87-
set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute,
88-
Color, Emoji, Style, StyledObject,
87+
set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment,
88+
Attribute, Color, Emoji, Style, StyledObject,
8989
};
9090

9191
#[cfg(feature = "ansi-parsing")]

src/utils.rs

Lines changed: 157 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::borrow::Cow;
22
use std::env;
33
use std::fmt;
44
use std::fmt::{Debug, Formatter};
5+
use std::ops::Range;
56
use std::sync::atomic::{AtomicBool, Ordering};
67

78
use once_cell::sync::Lazy;
@@ -807,77 +808,124 @@ pub(crate) fn char_width(_c: char) -> usize {
807808
1
808809
}
809810

810-
/// Truncates a string to a certain number of characters.
811+
/// Slice a `&str` in terms of text width. This means that only the text
812+
/// columns strictly between `start` and `stop` will be kept.
811813
///
812-
/// This ensures that escape codes are not screwed up in the process.
813-
/// If the maximum length is hit the string will be truncated but
814-
/// escapes code will still be honored. If truncation takes place
815-
/// the tail string will be appended.
816-
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
817-
if measure_text_width(s) <= width {
818-
return Cow::Borrowed(s);
819-
}
820-
814+
/// If a multi-columns character overlaps with the end of the interval it will
815+
/// not be included. In such a case, the result will be less than `end - start`
816+
/// columns wide.
817+
///
818+
/// This ensures that escape codes are not screwed up in the process. And if
819+
/// non-empty head and tail are specified, they are inserted between the ANSI
820+
/// codes from truncated bounds and the slice.
821+
pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> {
821822
#[cfg(feature = "ansi-parsing")]
822823
{
823-
use std::cmp::Ordering;
824-
let mut iter = AnsiCodeIterator::new(s);
825-
let mut length = 0;
826-
let mut rv = None;
827-
828-
while let Some(item) = iter.next() {
829-
match item {
830-
(s, false) => {
831-
if rv.is_none() {
832-
if str_width(s) + length > width.saturating_sub(str_width(tail)) {
833-
let ts = iter.current_slice();
834-
835-
let mut s_byte = 0;
836-
let mut s_width = 0;
837-
let rest_width = width.saturating_sub(str_width(tail)).saturating_sub(length);
838-
for c in s.chars() {
839-
s_byte += c.len_utf8();
840-
s_width += char_width(c);
841-
match s_width.cmp(&rest_width) {
842-
Ordering::Equal => break,
843-
Ordering::Greater => {
844-
s_byte -= c.len_utf8();
845-
break;
846-
}
847-
Ordering::Less => continue,
848-
}
849-
}
850-
851-
let idx = ts.len() - s.len() + s_byte;
852-
let mut buf = ts[..idx].to_string();
853-
buf.push_str(tail);
854-
rv = Some(buf);
855-
}
856-
length += str_width(s);
824+
let mut pos = 0; // Current search index by width
825+
let mut code_iter = AnsiCodeIterator::new(s).peekable();
826+
827+
// Search for the begining of the slice while collecting heading ANSI
828+
// codes
829+
let mut front_ansi = String::new(); // ANSI codes found before bound start
830+
let mut slice_start = 0; // Current search index by bytes
831+
832+
// Extract the leading slice, which *may be mutated* to remove just its first character.
833+
'search_slice_start: while pos < bounds.start {
834+
let Some((sub, is_ansi)) = code_iter.peek_mut() else {
835+
break;
836+
};
837+
838+
if *is_ansi {
839+
// Keep track of leading ANSI for later output.
840+
front_ansi.push_str(sub);
841+
slice_start += sub.len();
842+
} else {
843+
for (c_idx, c) in sub.char_indices() {
844+
if pos >= bounds.start {
845+
// Ensure we don't drop the remaining of the slice before searching for the
846+
// end bound.
847+
*sub = &sub[c_idx..];
848+
break 'search_slice_start;
857849
}
850+
851+
pos += char_width(c);
852+
slice_start += c.len_utf8();
858853
}
859-
(s, true) => {
860-
if let Some(ref mut rv) = rv {
861-
rv.push_str(s);
862-
}
854+
}
855+
856+
code_iter.next();
857+
}
858+
859+
// Search for the end of the slice. This loop is a bit simpler because we don't need to
860+
// keep track of remaining characters if we cut in the middle of a non-ANSI slice.
861+
let mut slice_end = slice_start;
862+
863+
'search_slice_end: for (sub, is_ansi) in &mut code_iter {
864+
if is_ansi {
865+
// Keep ANSI in the output slice but don't account for them in the total width.
866+
slice_end += sub.len();
867+
continue;
868+
}
869+
870+
for c in sub.chars() {
871+
let c_width = char_width(c);
872+
873+
if pos + c_width > bounds.end {
874+
// We will only search for ANSI codes after breaking this
875+
// loop, so we can safely drop the remaining of `sub`
876+
break 'search_slice_end;
863877
}
878+
879+
pos += c_width;
880+
slice_end += c.len_utf8();
864881
}
865882
}
866883

867-
if let Some(buf) = rv {
868-
Cow::Owned(buf)
869-
} else {
870-
Cow::Borrowed(s)
884+
// Initialise the result (before appending remaining ANSI slices)
885+
let slice = &s[slice_start..slice_end];
886+
887+
let mut result = {
888+
if front_ansi.is_empty() && head.is_empty() && tail.is_empty() {
889+
// No allocation may have to be performed if there are no bounds.
890+
Cow::Borrowed(slice)
891+
} else {
892+
Cow::Owned(front_ansi + head + slice + tail)
893+
}
894+
};
895+
896+
// Push back remaining ANSI codes to result
897+
for (sub, is_ansi) in code_iter {
898+
if is_ansi {
899+
result.to_mut().push_str(sub);
900+
}
871901
}
872-
}
873902

903+
result
904+
}
874905
#[cfg(not(feature = "ansi-parsing"))]
875906
{
876-
Cow::Owned(format!(
877-
"{}{}",
878-
&s[..width.saturating_sub(tail.len())],
879-
tail
880-
))
907+
let slice = s.get(bounds).unwrap_or("");
908+
909+
if head.is_empty() && tail.is_empty() {
910+
Cow::Borrowed(slice)
911+
} else {
912+
Cow::Owned(format!("{head}{slice}{tail}"))
913+
}
914+
}
915+
}
916+
917+
/// Truncates a string to a certain number of characters.
918+
///
919+
/// This ensures that escape codes are not screwed up in the process.
920+
/// If the maximum length is hit the string will be truncated but
921+
/// escapes code will still be honored. If truncation takes place
922+
/// the tail string will be appended.
923+
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
924+
if measure_text_width(s) <= width {
925+
Cow::Borrowed(s)
926+
} else {
927+
let tail_width = measure_text_width(tail);
928+
slice_str(s, "", 0..width.saturating_sub(tail_width), tail)
881929
}
882930
}
883931

@@ -1005,6 +1053,57 @@ fn test_truncate_str() {
10051053
);
10061054
}
10071055

1056+
#[test]
1057+
fn test_slice_ansi_str() {
1058+
// Note that 🐶 is two columns wide
1059+
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
1060+
assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str);
1061+
1062+
assert_eq!(
1063+
slice_str(test_str, ">>>", 0..test_str.len(), "<<<"),
1064+
format!(">>>{test_str}<<<"),
1065+
);
1066+
1067+
if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") {
1068+
assert_eq!(measure_text_width(test_str), 16);
1069+
1070+
assert_eq!(
1071+
slice_str(test_str, "", 5..5, ""),
1072+
"\u{1b}[31m\u{1b}[1m\u{1b}[0m"
1073+
);
1074+
1075+
assert_eq!(
1076+
slice_str(test_str, "", 0..5, ""),
1077+
"Hello\x1b[31m\x1b[1m\x1b[0m"
1078+
);
1079+
1080+
assert_eq!(
1081+
slice_str(test_str, "", 0..6, ""),
1082+
"Hello\x1b[31m\x1b[1m\x1b[0m"
1083+
);
1084+
1085+
assert_eq!(
1086+
slice_str(test_str, "", 0..7, ""),
1087+
"Hello\x1b[31m🐶\x1b[1m\x1b[0m"
1088+
);
1089+
1090+
assert_eq!(
1091+
slice_str(test_str, "", 4..9, ""),
1092+
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
1093+
);
1094+
1095+
assert_eq!(
1096+
slice_str(test_str, "", 7..21, ""),
1097+
"\x1b[31m\x1b[1m🐶\x1b[0m world!"
1098+
);
1099+
1100+
assert_eq!(
1101+
slice_str(test_str, ">>>", 7..21, "<<<"),
1102+
"\x1b[31m>>>\x1b[1m🐶\x1b[0m world!<<<"
1103+
);
1104+
}
1105+
}
1106+
10081107
#[test]
10091108
fn test_truncate_str_no_ansi() {
10101109
assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar");

0 commit comments

Comments
 (0)