Make slice_str similar to truncate_str

remi-dupre · remi-dupre · commit ce77cc52968c · 2024-02-08T00:25:02.000+01:00
diff --git a/src/ansi.rs b/src/ansi.rs
@@ -4,8 +4,6 @@ use std::{
     str::CharIndices,
 };
 
-use crate::utils::char_width;
-
 #[derive(Debug, Clone, Copy)]
 enum State {
     Start,
@@ -269,63 +267,8 @@ impl<'a> Iterator for AnsiCodeIterator<'a> {
 
 impl<'a> FusedIterator for AnsiCodeIterator<'a> {}
 
-/// Slice a `&str` in terms of text width. This means that only the text
-/// columns strictly between `start` and `stop` will be kept.
-///
-/// If a multi-columns character overlaps with the end of the interval it will
-/// not be included. In such a case, the result will be less than `end - start`
-/// columns wide.
-pub fn slice_ansi_str(s: &str, start: usize, end: usize) -> &str {
-    if end <= start {
-        return "";
-    }
-
-    let mut pos = 0;
-    let mut res_start = 0;
-    let mut res_end = 0;
-
-    'outer: for (sub, is_ansi) in AnsiCodeIterator::new(s) {
-        // As ansi symbols have a width of 0 we can safely early-interupt
-        // the outer for loop only if current pos strictly greater than
-        // `end`.
-        if pos > end {
-            break;
-        }
-
-        if is_ansi {
-            if pos < start {
-                res_start += sub.len();
-                res_end = res_start;
-            } else if pos <= end {
-                res_end += sub.len();
-            } else {
-                break 'outer;
-            }
-        } else {
-            for c in sub.chars() {
-                let c_width = char_width(c);
-
-                if pos < start {
-                    res_start += c.len_utf8();
-                    res_end = res_start;
-                } else if pos + c_width <= end {
-                    res_end += c.len_utf8();
-                } else {
-                    break 'outer;
-                }
-
-                pos += char_width(c);
-            }
-        }
-    }
-
-    &s[res_start..res_end]
-}
-
 #[cfg(test)]
 mod tests {
-    use crate::measure_text_width;
-
     use super::*;
 
     use lazy_static::lazy_static;
@@ -492,37 +435,4 @@ mod tests {
         assert_eq!(iter.rest_slice(), "");
         assert_eq!(iter.next(), None);
     }
-
-    #[test]
-    fn test_slice_ansi_str() {
-        // Note that 🐶 is two columns wide
-        let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
-        assert_eq!(slice_ansi_str(test_str, 5, 5), "");
-        assert_eq!(slice_ansi_str(test_str, 0, test_str.len()), test_str);
-
-        if cfg!(feature = "unicode-width") {
-            assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
-            assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
-            assert_eq!(measure_text_width(test_str), 16);
-            assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
-            assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
-            assert_eq!(slice_ansi_str(test_str, 0, 7), "Hello\x1b[31m🐶\x1b[1m");
-            assert_eq!(slice_ansi_str(test_str, 7, 21), "\x1b[1m🐶\x1b[0m world!");
-            assert_eq!(slice_ansi_str(test_str, 8, 21), "\x1b[0m world!");
-            assert_eq!(slice_ansi_str(test_str, 9, 21), "\x1b[0m world!");
-
-            assert_eq!(
-                slice_ansi_str(test_str, 4, 9),
-                "o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
-            );
-        } else {
-            assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
-            assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m🐶\u{1b}[1m");
-
-            assert_eq!(
-                slice_ansi_str(test_str, 4, 9),
-                "o\x1b[31m🐶\x1b[1m🐶\x1b[0m w"
-            );
-        }
-    }
 }
diff --git a/src/lib.rs b/src/lib.rs
@@ -82,12 +82,12 @@ pub use crate::term::{
 };
 pub use crate::utils::{
     colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with,
-    set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute,
-    Color, Emoji, Style, StyledObject,
+    set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment,
+    Attribute, Color, Emoji, Style, StyledObject,
 };
 
 #[cfg(feature = "ansi-parsing")]
-pub use crate::ansi::{slice_ansi_str, strip_ansi_codes, AnsiCodeIterator};
+pub use crate::ansi::{strip_ansi_codes, AnsiCodeIterator};
 
 mod common_term;
 mod kb;
diff --git a/src/utils.rs b/src/utils.rs
@@ -2,6 +2,7 @@ use std::borrow::Cow;
 use std::collections::BTreeSet;
 use std::env;
 use std::fmt;
+use std::ops::Range;
 use std::sync::atomic::{AtomicBool, Ordering};
 
 use lazy_static::lazy_static;
@@ -724,7 +725,7 @@ fn str_width(s: &str) -> usize {
 }
 
 #[cfg(feature = "ansi-parsing")]
-pub(crate) fn char_width(c: char) -> usize {
+fn char_width(c: char) -> usize {
     #[cfg(feature = "unicode-width")]
     {
         use unicode_width::UnicodeWidthChar;
@@ -737,80 +738,98 @@ pub(crate) fn char_width(c: char) -> usize {
     }
 }
 
-/// Truncates a string to a certain number of characters.
+/// Slice a `&str` in terms of text width. This means that only the text
+/// columns strictly between `start` and `stop` will be kept.
 ///
-/// This ensures that escape codes are not screwed up in the process.
-/// If the maximum length is hit the string will be truncated but
-/// escapes code will still be honored.  If truncation takes place
-/// the tail string will be appended.
-pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
+/// If a multi-columns character overlaps with the end of the interval it will
+/// not be included. In such a case, the result will be less than `end - start`
+/// columns wide.
+///
+/// This ensures that escape codes are not screwed up in the process. And if
+/// non-empty head and tail are specified, they are inserted between the ANSI
+/// symbols from truncated bounds and the slice.
+pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> {
     #[cfg(feature = "ansi-parsing")]
     {
-        use std::cmp::Ordering;
-        let mut iter = AnsiCodeIterator::new(s);
-        let mut length = 0;
-        let mut rv = None;
-
-        while let Some(item) = iter.next() {
-            match item {
-                (s, false) => {
-                    if rv.is_none() {
-                        if str_width(s) + length > width - str_width(tail) {
-                            let ts = iter.current_slice();
-
-                            let mut s_byte = 0;
-                            let mut s_width = 0;
-                            let rest_width = width - str_width(tail) - length;
-                            for c in s.chars() {
-                                s_byte += c.len_utf8();
-                                s_width += char_width(c);
-                                match s_width.cmp(&rest_width) {
-                                    Ordering::Equal => break,
-                                    Ordering::Greater => {
-                                        s_byte -= c.len_utf8();
-                                        break;
-                                    }
-                                    Ordering::Less => continue,
-                                }
-                            }
-
-                            let idx = ts.len() - s.len() + s_byte;
-                            let mut buf = ts[..idx].to_string();
-                            buf.push_str(tail);
-                            rv = Some(buf);
-                        }
-                        length += str_width(s);
-                    }
+        let mut pos = 0;
+        let mut slice = 0..0;
+
+        // ANSI symbols outside of the slice
+        let mut front_ansi = String::new();
+        let mut back_ansi = String::new();
+
+        // Iterate through each ANSI symbol or unicode character while keeping
+        // track of:
+        //   - pos: cumulated width of characters iterated so far
+        //   - slice: char indices of the part of the string for which `pos`
+        //     was inside bounds
+        for (sub, is_ansi) in AnsiCodeIterator::new(s) {
+            if is_ansi {
+                if pos < bounds.start {
+                    // An ANSI symbol before the interval: keep for later
+                    front_ansi.push_str(sub);
+                    slice.start += sub.len();
+                    slice.end = slice.start;
+                } else if pos <= bounds.end {
+                    // An ANSI symbol inside of the interval: extend the slice
+                    slice.end += sub.len();
+                } else {
+                    // An ANSI symbol after the interval: keep for later
+                    back_ansi.push_str(sub);
                 }
-                (s, true) => {
-                    if let Some(ref mut rv) = rv {
-                        rv.push_str(s);
+            } else {
+                for c in sub.chars() {
+                    let c_width = char_width(c);
+
+                    if pos < bounds.start {
+                        // The char is before the interval: move the slice back
+                        slice.start += c.len_utf8();
+                        slice.end = slice.start;
+                    } else if pos + c_width <= bounds.end {
+                        // The char fits into the interval: extend the slice
+                        slice.end += c.len_utf8();
                     }
+
+                    pos += c_width;
                 }
             }
         }
 
-        if let Some(buf) = rv {
-            Cow::Owned(buf)
+        let slice = &s[slice];
+
+        if front_ansi.is_empty() && back_ansi.is_empty() && head.is_empty() && tail.is_empty() {
+            Cow::Borrowed(slice)
         } else {
-            Cow::Borrowed(s)
+            Cow::Owned(front_ansi + head + slice + tail + &back_ansi)
         }
     }
-
     #[cfg(not(feature = "ansi-parsing"))]
     {
-        if s.len() <= width - tail.len() {
-            Cow::Borrowed(s)
+        let slice = s.get(bounds).unwrap_or("");
+
+        if head.is_empty() && tail.is_empty() {
+            Cow::Borrowed(slice)
         } else {
-            Cow::Owned(format!(
-                "{}{}",
-                s.get(..width - tail.len()).unwrap_or_default(),
-                tail
-            ))
+            Cow::Owned(format!("{head}{slice}{tail}"))
         }
     }
 }
 
+/// Truncates a string to a certain number of characters.
+///
+/// This ensures that escape codes are not screwed up in the process.
+/// If the maximum length is hit the string will be truncated but
+/// escapes code will still be honored.  If truncation takes place
+/// the tail string will be appended.
+pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
+    if measure_text_width(s) > width {
+        let tail_width = measure_text_width(tail);
+        slice_str(s, "", 0..width.saturating_sub(tail_width), tail)
+    } else {
+        Cow::Borrowed(s)
+    }
+}
+
 /// Pads a string to fill a certain number of characters.
 ///
 /// This will honor ansi codes correctly and allows you to align a string
@@ -919,8 +938,50 @@ fn test_truncate_str() {
     );
 }
 
+#[test]
+fn test_slice_ansi_str() {
+    // Note that 🐶 is two columns wide
+    let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
+    assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str);
+
+    if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") {
+        assert_eq!(measure_text_width(test_str), 16);
+
+        assert_eq!(
+            slice_str(test_str, "", 5..5, ""),
+            "\u{1b}[31m\u{1b}[1m\u{1b}[0m"
+        );
+
+        assert_eq!(
+            slice_str(test_str, "", 0..5, ""),
+            "Hello\x1b[31m\x1b[1m\x1b[0m"
+        );
+
+        assert_eq!(
+            slice_str(test_str, "", 0..6, ""),
+            "Hello\x1b[31m\x1b[1m\x1b[0m"
+        );
+
+        assert_eq!(
+            slice_str(test_str, "", 0..7, ""),
+            "Hello\x1b[31m🐶\x1b[1m\x1b[0m"
+        );
+
+        assert_eq!(
+            slice_str(test_str, "", 4..9, ""),
+            "o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
+        );
+
+        assert_eq!(
+            slice_str(test_str, "", 7..21, ""),
+            "\x1b[31m\x1b[1m🐶\x1b[0m world!"
+        );
+    }
+}
+
 #[test]
 fn test_truncate_str_no_ansi() {
+    assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar");
     assert_eq!(&truncate_str("foo bar", 5, ""), "foo b");
     assert_eq!(&truncate_str("foo bar", 5, "!"), "foo !");
     assert_eq!(&truncate_str("foo bar baz", 10, "..."), "foo bar...");