Skip to content

Add utility function ansi::slice_ansi_str #206

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ pub use crate::term::{
};
pub use crate::utils::{
colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with,
set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute,
Color, Emoji, Style, StyledObject,
set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment,
Attribute, Color, Emoji, Style, StyledObject,
};

#[cfg(feature = "ansi-parsing")]
Expand Down
215 changes: 157 additions & 58 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::borrow::Cow;
use std::env;
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::ops::Range;
use std::sync::atomic::{AtomicBool, Ordering};

use once_cell::sync::Lazy;
Expand Down Expand Up @@ -807,77 +808,124 @@ pub(crate) fn char_width(_c: char) -> usize {
1
}

/// Truncates a string to a certain number of characters.
/// Slice a `&str` in terms of text width. This means that only the text
/// columns strictly between `start` and `stop` will be kept.
///
/// This ensures that escape codes are not screwed up in the process.
/// If the maximum length is hit the string will be truncated but
/// escapes code will still be honored. If truncation takes place
/// the tail string will be appended.
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
if measure_text_width(s) <= width {
return Cow::Borrowed(s);
}

/// If a multi-columns character overlaps with the end of the interval it will
/// not be included. In such a case, the result will be less than `end - start`
/// columns wide.
///
/// This ensures that escape codes are not screwed up in the process. And if
/// non-empty head and tail are specified, they are inserted between the ANSI
/// codes from truncated bounds and the slice.
pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> {
#[cfg(feature = "ansi-parsing")]
{
use std::cmp::Ordering;
let mut iter = AnsiCodeIterator::new(s);
let mut length = 0;
let mut rv = None;

while let Some(item) = iter.next() {
match item {
(s, false) => {
if rv.is_none() {
if str_width(s) + length > width.saturating_sub(str_width(tail)) {
let ts = iter.current_slice();

let mut s_byte = 0;
let mut s_width = 0;
let rest_width = width.saturating_sub(str_width(tail)).saturating_sub(length);
for c in s.chars() {
s_byte += c.len_utf8();
s_width += char_width(c);
match s_width.cmp(&rest_width) {
Ordering::Equal => break,
Ordering::Greater => {
s_byte -= c.len_utf8();
break;
}
Ordering::Less => continue,
}
}

let idx = ts.len() - s.len() + s_byte;
let mut buf = ts[..idx].to_string();
buf.push_str(tail);
rv = Some(buf);
}
length += str_width(s);
let mut pos = 0; // Current search index by width
let mut code_iter = AnsiCodeIterator::new(s).peekable();

// Search for the begining of the slice while collecting heading ANSI
// codes
let mut front_ansi = String::new(); // ANSI codes found before bound start
let mut slice_start = 0; // Current search index by bytes

// Extract the leading slice, which *may be mutated* to remove just its first character.
'search_slice_start: while pos < bounds.start {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm counting 5 loops here, two of which are nested. Fundamentally, why is that necessary? It seems like there is more complexity here than is warranted.

let Some((sub, is_ansi)) = code_iter.peek_mut() else {
break;
};

if *is_ansi {
// Keep track of leading ANSI for later output.
front_ansi.push_str(sub);
slice_start += sub.len();
} else {
for (c_idx, c) in sub.char_indices() {
if pos >= bounds.start {
// Ensure we don't drop the remaining of the slice before searching for the
// end bound.
*sub = &sub[c_idx..];
break 'search_slice_start;
}

pos += char_width(c);
slice_start += c.len_utf8();
}
(s, true) => {
if let Some(ref mut rv) = rv {
rv.push_str(s);
}
}

code_iter.next();
}

// Search for the end of the slice. This loop is a bit simpler because we don't need to
// keep track of remaining characters if we cut in the middle of a non-ANSI slice.
let mut slice_end = slice_start;

'search_slice_end: for (sub, is_ansi) in &mut code_iter {
if is_ansi {
// Keep ANSI in the output slice but don't account for them in the total width.
slice_end += sub.len();
continue;
}

for c in sub.chars() {
let c_width = char_width(c);

if pos + c_width > bounds.end {
// We will only search for ANSI codes after breaking this
// loop, so we can safely drop the remaining of `sub`
break 'search_slice_end;
}

pos += c_width;
slice_end += c.len_utf8();
}
}

if let Some(buf) = rv {
Cow::Owned(buf)
} else {
Cow::Borrowed(s)
// Initialise the result (before appending remaining ANSI slices)
let slice = &s[slice_start..slice_end];

let mut result = {
if front_ansi.is_empty() && head.is_empty() && tail.is_empty() {
// No allocation may have to be performed if there are no bounds.
Cow::Borrowed(slice)
} else {
Cow::Owned(front_ansi + head + slice + tail)
}
};

// Push back remaining ANSI codes to result
for (sub, is_ansi) in code_iter {
if is_ansi {
result.to_mut().push_str(sub);
}
}
}

result
}
#[cfg(not(feature = "ansi-parsing"))]
{
Cow::Owned(format!(
"{}{}",
&s[..width.saturating_sub(tail.len())],
tail
))
let slice = s.get(bounds).unwrap_or("");

if head.is_empty() && tail.is_empty() {
Cow::Borrowed(slice)
} else {
Cow::Owned(format!("{head}{slice}{tail}"))
}
}
}

/// Truncates a string to a certain number of characters.
///
/// This ensures that escape codes are not screwed up in the process.
/// If the maximum length is hit the string will be truncated but
/// escapes code will still be honored. If truncation takes place
/// the tail string will be appended.
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
if measure_text_width(s) <= width {
Cow::Borrowed(s)
} else {
let tail_width = measure_text_width(tail);
slice_str(s, "", 0..width.saturating_sub(tail_width), tail)
}
}

Expand Down Expand Up @@ -1005,6 +1053,57 @@ fn test_truncate_str() {
);
}

#[test]
fn test_slice_ansi_str() {
// Note that 🐶 is two columns wide
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str);

assert_eq!(
slice_str(test_str, ">>>", 0..test_str.len(), "<<<"),
format!(">>>{test_str}<<<"),
);

if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") {
assert_eq!(measure_text_width(test_str), 16);

assert_eq!(
slice_str(test_str, "", 5..5, ""),
"\u{1b}[31m\u{1b}[1m\u{1b}[0m"
);

assert_eq!(
slice_str(test_str, "", 0..5, ""),
"Hello\x1b[31m\x1b[1m\x1b[0m"
);

assert_eq!(
slice_str(test_str, "", 0..6, ""),
"Hello\x1b[31m\x1b[1m\x1b[0m"
);

assert_eq!(
slice_str(test_str, "", 0..7, ""),
"Hello\x1b[31m🐶\x1b[1m\x1b[0m"
);

assert_eq!(
slice_str(test_str, "", 4..9, ""),
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
);

assert_eq!(
slice_str(test_str, "", 7..21, ""),
"\x1b[31m\x1b[1m🐶\x1b[0m world!"
);

assert_eq!(
slice_str(test_str, ">>>", 7..21, "<<<"),
"\x1b[31m>>>\x1b[1m🐶\x1b[0m world!<<<"
);
}
}

#[test]
fn test_truncate_str_no_ansi() {
assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar");
Expand Down
Loading