Add new function ansi::slice_ansi_str

remi-dupre · remi-dupre · commit 1599bf901ece · 2025-03-11T23:09:30.000+01:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -19,7 +19,7 @@ ansi-parsing = []
 
 [dependencies]
 libc = "0.2.99"
-once_cell = "1.8"
+once_cell = "1.8,<1.21" # version 1.21 requires Rust >= 1.70
 unicode-width = { version = "0.2", optional = true }
 
 [target.'cfg(windows)'.dependencies]
diff --git a/src/lib.rs b/src/lib.rs
@@ -84,8 +84,8 @@ pub use crate::term::{
 };
 pub use crate::utils::{
     colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with,
-    set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute,
-    Color, Emoji, Style, StyledObject,
+    set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment,
+    Attribute, Color, Emoji, Style, StyledObject,
 };
 
 #[cfg(feature = "ansi-parsing")]
diff --git a/src/utils.rs b/src/utils.rs
@@ -2,6 +2,7 @@ use std::borrow::Cow;
 use std::env;
 use std::fmt;
 use std::fmt::{Debug, Formatter};
+use std::ops::Range;
 use std::sync::atomic::{AtomicBool, Ordering};
 
 use once_cell::sync::Lazy;
@@ -800,80 +801,119 @@ pub(crate) fn char_width(_c: char) -> usize {
     1
 }
 
-/// Truncates a string to a certain number of characters.
+/// Slice a `&str` in terms of text width. This means that only the text
+/// columns strictly between `start` and `stop` will be kept.
 ///
-/// This ensures that escape codes are not screwed up in the process.
-/// If the maximum length is hit the string will be truncated but
-/// escapes code will still be honored.  If truncation takes place
-/// the tail string will be appended.
-pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
+/// If a multi-columns character overlaps with the end of the interval it will
+/// not be included. In such a case, the result will be less than `end - start`
+/// columns wide.
+///
+/// This ensures that escape codes are not screwed up in the process. And if
+/// non-empty head and tail are specified, they are inserted between the ANSI
+/// codes from truncated bounds and the slice.
+pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> {
     #[cfg(feature = "ansi-parsing")]
     {
-        use std::cmp::Ordering;
-        let mut iter = AnsiCodeIterator::new(s);
-        let mut length = 0;
-        let mut rv = None;
-
-        while let Some(item) = iter.next() {
-            match item {
-                (s, false) => {
-                    if rv.is_none() {
-                        if str_width(s) + length > width - str_width(tail) {
-                            let ts = iter.current_slice();
-
-                            let mut s_byte = 0;
-                            let mut s_width = 0;
-                            let rest_width = width - str_width(tail) - length;
-                            for c in s.chars() {
-                                s_byte += c.len_utf8();
-                                s_width += char_width(c);
-                                match s_width.cmp(&rest_width) {
-                                    Ordering::Equal => break,
-                                    Ordering::Greater => {
-                                        s_byte -= c.len_utf8();
-                                        break;
-                                    }
-                                    Ordering::Less => continue,
-                                }
-                            }
-
-                            let idx = ts.len() - s.len() + s_byte;
-                            let mut buf = ts[..idx].to_string();
-                            buf.push_str(tail);
-                            rv = Some(buf);
-                        }
-                        length += str_width(s);
-                    }
-                }
-                (s, true) => {
-                    if let Some(ref mut rv) = rv {
-                        rv.push_str(s);
-                    }
+        let mut pos = 0;
+        let mut code_iter = AnsiCodeIterator::new(s).peekable();
+
+        // Search for the begining of the slice while collecting heading ANSI
+        // codes
+        let mut slice_start = 0;
+        let mut front_ansi = String::new();
+
+        while pos < bounds.start {
+            let (sub, is_ansi) = match code_iter.peek_mut() {
+                None => break,
+                Some(x) => x,
+            };
+
+            if *is_ansi {
+                front_ansi.push_str(sub);
+                slice_start += sub.len();
+            } else if let Some(c) = sub.chars().next() {
+                // Pop the head char of `sub` while keeping `sub` on top of
+                // the iterator
+                pos += char_width(c);
+                slice_start += c.len_utf8();
+                *sub = &sub[c.len_utf8()..];
+                continue;
+            }
+
+            code_iter.next();
+        }
+
+        // Search for the end of the slice
+        let mut slice_end = slice_start;
+
+        'search_slice_end: for (sub, is_ansi) in &mut code_iter {
+            if is_ansi {
+                slice_end += sub.len();
+                continue;
+            }
+
+            for c in sub.chars() {
+                let c_width = char_width(c);
+
+                if pos + c_width > bounds.end {
+                    // We will only search for ANSI codes after breaking this
+                    // loop, so we can safely drop the remaining of `sub`
+                    break 'search_slice_end;
                 }
+
+                pos += c_width;
+                slice_end += c.len_utf8();
             }
         }
 
-        if let Some(buf) = rv {
-            Cow::Owned(buf)
-        } else {
-            Cow::Borrowed(s)
+        // Initialise the result, no allocation may have to be performed if
+        // both head and front are empty
+        let slice = &s[slice_start..slice_end];
+
+        let mut result = {
+            if front_ansi.is_empty() && head.is_empty() && tail.is_empty() {
+                Cow::Borrowed(slice)
+            } else {
+                Cow::Owned(front_ansi + head + slice + tail)
+            }
+        };
+
+        // Push back remaining ANSI codes to result
+        for (sub, is_ansi) in code_iter {
+            if is_ansi {
+                *result.to_mut() += sub;
+            }
         }
-    }
 
+        result
+    }
     #[cfg(not(feature = "ansi-parsing"))]
     {
-        if s.len() <= width - tail.len() {
-            Cow::Borrowed(s)
+        let slice = s.get(bounds).unwrap_or("");
+
+        if head.is_empty() && tail.is_empty() {
+            Cow::Borrowed(slice)
         } else {
-            Cow::Owned(format!(
-                "{}{}",
-                s.get(..width - tail.len()).unwrap_or_default(),
-                tail
-            ))
+            Cow::Owned(format!("{head}{slice}{tail}"))
         }
     }
 }
 
+/// Truncates a string to a certain number of characters.
+///
+/// This ensures that escape codes are not screwed up in the process.
+/// If the maximum length is hit the string will be truncated but
+/// escapes code will still be honored.  If truncation takes place
+/// the tail string will be appended.
+pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
+    if measure_text_width(s) > width {
+        let tail_width = measure_text_width(tail);
+        slice_str(s, "", 0..width.saturating_sub(tail_width), tail)
+    } else {
+        Cow::Borrowed(s)
+    }
+}
+
 /// Pads a string to fill a certain number of characters.
 ///
 /// This will honor ansi codes correctly and allows you to align a string
@@ -979,8 +1019,60 @@ fn test_truncate_str() {
     );
 }
 
+#[test]
+fn test_slice_ansi_str() {
+    // Note that 🐶 is two columns wide
+    let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
+    assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str);
+
+    assert_eq!(
+        slice_str(test_str, ">>>", 0..test_str.len(), "<<<"),
+        format!(">>>{test_str}<<<"),
+    );
+
+    if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") {
+        assert_eq!(measure_text_width(test_str), 16);
+
+        assert_eq!(
+            slice_str(test_str, "", 5..5, ""),
+            "\u{1b}[31m\u{1b}[1m\u{1b}[0m"
+        );
+
+        assert_eq!(
+            slice_str(test_str, "", 0..5, ""),
+            "Hello\x1b[31m\x1b[1m\x1b[0m"
+        );
+
+        assert_eq!(
+            slice_str(test_str, "", 0..6, ""),
+            "Hello\x1b[31m\x1b[1m\x1b[0m"
+        );
+
+        assert_eq!(
+            slice_str(test_str, "", 0..7, ""),
+            "Hello\x1b[31m🐶\x1b[1m\x1b[0m"
+        );
+
+        assert_eq!(
+            slice_str(test_str, "", 4..9, ""),
+            "o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
+        );
+
+        assert_eq!(
+            slice_str(test_str, "", 7..21, ""),
+            "\x1b[31m\x1b[1m🐶\x1b[0m world!"
+        );
+
+        assert_eq!(
+            slice_str(test_str, ">>>", 7..21, "<<<"),
+            "\x1b[31m>>>\x1b[1m🐶\x1b[0m world!<<<"
+        );
+    }
+}
+
 #[test]
 fn test_truncate_str_no_ansi() {
+    assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar");
     assert_eq!(&truncate_str("foo bar", 5, ""), "foo b");
     assert_eq!(&truncate_str("foo bar", 5, "!"), "foo !");
     assert_eq!(&truncate_str("foo bar baz", 10, "..."), "foo bar...");