@@ -2,6 +2,7 @@ use std::borrow::Cow;
2
2
use std:: collections:: BTreeSet ;
3
3
use std:: env;
4
4
use std:: fmt;
5
+ use std:: ops:: Range ;
5
6
use std:: sync:: atomic:: { AtomicBool , Ordering } ;
6
7
7
8
use lazy_static:: lazy_static;
@@ -724,7 +725,7 @@ fn str_width(s: &str) -> usize {
724
725
}
725
726
726
727
#[ cfg( feature = "ansi-parsing" ) ]
727
- pub ( crate ) fn char_width ( c : char ) -> usize {
728
+ fn char_width ( c : char ) -> usize {
728
729
#[ cfg( feature = "unicode-width" ) ]
729
730
{
730
731
use unicode_width:: UnicodeWidthChar ;
@@ -737,80 +738,98 @@ pub(crate) fn char_width(c: char) -> usize {
737
738
}
738
739
}
739
740
740
- /// Truncates a string to a certain number of characters.
741
+ /// Slice a `&str` in terms of text width. This means that only the text
742
+ /// columns strictly between `start` and `stop` will be kept.
741
743
///
742
- /// This ensures that escape codes are not screwed up in the process.
743
- /// If the maximum length is hit the string will be truncated but
744
- /// escapes code will still be honored. If truncation takes place
745
- /// the tail string will be appended.
746
- pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
744
+ /// If a multi-columns character overlaps with the end of the interval it will
745
+ /// not be included. In such a case, the result will be less than `end - start`
746
+ /// columns wide.
747
+ ///
748
+ /// This ensures that escape codes are not screwed up in the process. And if
749
+ /// non-empty head and tail are specified, they are inserted between the ANSI
750
+ /// symbols from truncated bounds and the slice.
751
+ pub fn slice_str < ' a > ( s : & ' a str , head : & str , bounds : Range < usize > , tail : & str ) -> Cow < ' a , str > {
747
752
#[ cfg( feature = "ansi-parsing" ) ]
748
753
{
749
- use std:: cmp:: Ordering ;
750
- let mut iter = AnsiCodeIterator :: new ( s) ;
751
- let mut length = 0 ;
752
- let mut rv = None ;
753
-
754
- while let Some ( item) = iter. next ( ) {
755
- match item {
756
- ( s, false ) => {
757
- if rv. is_none ( ) {
758
- if str_width ( s) + length > width - str_width ( tail) {
759
- let ts = iter. current_slice ( ) ;
760
-
761
- let mut s_byte = 0 ;
762
- let mut s_width = 0 ;
763
- let rest_width = width - str_width ( tail) - length;
764
- for c in s. chars ( ) {
765
- s_byte += c. len_utf8 ( ) ;
766
- s_width += char_width ( c) ;
767
- match s_width. cmp ( & rest_width) {
768
- Ordering :: Equal => break ,
769
- Ordering :: Greater => {
770
- s_byte -= c. len_utf8 ( ) ;
771
- break ;
772
- }
773
- Ordering :: Less => continue ,
774
- }
775
- }
776
-
777
- let idx = ts. len ( ) - s. len ( ) + s_byte;
778
- let mut buf = ts[ ..idx] . to_string ( ) ;
779
- buf. push_str ( tail) ;
780
- rv = Some ( buf) ;
781
- }
782
- length += str_width ( s) ;
783
- }
754
+ let mut pos = 0 ;
755
+ let mut slice = 0 ..0 ;
756
+
757
+ // ANSI symbols outside of the slice
758
+ let mut front_ansi = String :: new ( ) ;
759
+ let mut back_ansi = String :: new ( ) ;
760
+
761
+ // Iterate through each ANSI symbol or unicode character while keeping
762
+ // track of:
763
+ // - pos: cumulated width of characters iterated so far
764
+ // - slice: char indices of the part of the string for which `pos`
765
+ // was inside bounds
766
+ for ( sub, is_ansi) in AnsiCodeIterator :: new ( s) {
767
+ if is_ansi {
768
+ if pos < bounds. start {
769
+ // An ANSI symbol before the interval: keep for later
770
+ front_ansi. push_str ( sub) ;
771
+ slice. start += sub. len ( ) ;
772
+ slice. end = slice. start ;
773
+ } else if pos <= bounds. end {
774
+ // An ANSI symbol inside of the interval: extend the slice
775
+ slice. end += sub. len ( ) ;
776
+ } else {
777
+ // An ANSI symbol after the interval: keep for later
778
+ back_ansi. push_str ( sub) ;
784
779
}
785
- ( s, true ) => {
786
- if let Some ( ref mut rv) = rv {
787
- rv. push_str ( s) ;
780
+ } else {
781
+ for c in sub. chars ( ) {
782
+ let c_width = char_width ( c) ;
783
+
784
+ if pos < bounds. start {
785
+ // The char is before the interval: move the slice back
786
+ slice. start += c. len_utf8 ( ) ;
787
+ slice. end = slice. start ;
788
+ } else if pos + c_width <= bounds. end {
789
+ // The char fits into the interval: extend the slice
790
+ slice. end += c. len_utf8 ( ) ;
788
791
}
792
+
793
+ pos += c_width;
789
794
}
790
795
}
791
796
}
792
797
793
- if let Some ( buf) = rv {
794
- Cow :: Owned ( buf)
798
+ let slice = & s[ slice] ;
799
+
800
+ if front_ansi. is_empty ( ) && back_ansi. is_empty ( ) && head. is_empty ( ) && tail. is_empty ( ) {
801
+ Cow :: Borrowed ( slice)
795
802
} else {
796
- Cow :: Borrowed ( s )
803
+ Cow :: Owned ( front_ansi + head + slice + tail + & back_ansi )
797
804
}
798
805
}
799
-
800
806
#[ cfg( not( feature = "ansi-parsing" ) ) ]
801
807
{
802
- if s. len ( ) <= width - tail. len ( ) {
803
- Cow :: Borrowed ( s)
808
+ let slice = s. get ( bounds) . unwrap_or ( "" ) ;
809
+
810
+ if head. is_empty ( ) && tail. is_empty ( ) {
811
+ Cow :: Borrowed ( slice)
804
812
} else {
805
- Cow :: Owned ( format ! (
806
- "{}{}" ,
807
- s. get( ..width - tail. len( ) ) . unwrap_or_default( ) ,
808
- tail
809
- ) )
813
+ Cow :: Owned ( format ! ( "{head}{slice}{tail}" ) )
810
814
}
811
815
}
812
816
}
813
817
818
+ /// Truncates a string to a certain number of characters.
819
+ ///
820
+ /// This ensures that escape codes are not screwed up in the process.
821
+ /// If the maximum length is hit the string will be truncated but
822
+ /// escapes code will still be honored. If truncation takes place
823
+ /// the tail string will be appended.
824
+ pub fn truncate_str < ' a > ( s : & ' a str , width : usize , tail : & str ) -> Cow < ' a , str > {
825
+ if measure_text_width ( s) > width {
826
+ let tail_width = measure_text_width ( tail) ;
827
+ slice_str ( s, "" , 0 ..width. saturating_sub ( tail_width) , tail)
828
+ } else {
829
+ Cow :: Borrowed ( s)
830
+ }
831
+ }
832
+
814
833
/// Pads a string to fill a certain number of characters.
815
834
///
816
835
/// This will honor ansi codes correctly and allows you to align a string
@@ -919,8 +938,50 @@ fn test_truncate_str() {
919
938
) ;
920
939
}
921
940
941
+ #[ test]
942
+ fn test_slice_ansi_str ( ) {
943
+ // Note that 🐶 is two columns wide
944
+ let test_str = "Hello\x1b [31m🐶\x1b [1m🐶\x1b [0m world!" ;
945
+ assert_eq ! ( slice_str( test_str, "" , 0 ..test_str. len( ) , "" ) , test_str) ;
946
+
947
+ if cfg ! ( feature = "unicode-width" ) && cfg ! ( feature = "ansi-parsing" ) {
948
+ assert_eq ! ( measure_text_width( test_str) , 16 ) ;
949
+
950
+ assert_eq ! (
951
+ slice_str( test_str, "" , 5 ..5 , "" ) ,
952
+ "\u{1b} [31m\u{1b} [1m\u{1b} [0m"
953
+ ) ;
954
+
955
+ assert_eq ! (
956
+ slice_str( test_str, "" , 0 ..5 , "" ) ,
957
+ "Hello\x1b [31m\x1b [1m\x1b [0m"
958
+ ) ;
959
+
960
+ assert_eq ! (
961
+ slice_str( test_str, "" , 0 ..6 , "" ) ,
962
+ "Hello\x1b [31m\x1b [1m\x1b [0m"
963
+ ) ;
964
+
965
+ assert_eq ! (
966
+ slice_str( test_str, "" , 0 ..7 , "" ) ,
967
+ "Hello\x1b [31m🐶\x1b [1m\x1b [0m"
968
+ ) ;
969
+
970
+ assert_eq ! (
971
+ slice_str( test_str, "" , 4 ..9 , "" ) ,
972
+ "o\x1b [31m🐶\x1b [1m🐶\x1b [0m"
973
+ ) ;
974
+
975
+ assert_eq ! (
976
+ slice_str( test_str, "" , 7 ..21 , "" ) ,
977
+ "\x1b [31m\x1b [1m🐶\x1b [0m world!"
978
+ ) ;
979
+ }
980
+ }
981
+
922
982
#[ test]
923
983
fn test_truncate_str_no_ansi ( ) {
984
+ assert_eq ! ( & truncate_str( "foo bar" , 7 , "!" ) , "foo bar" ) ;
924
985
assert_eq ! ( & truncate_str( "foo bar" , 5 , "" ) , "foo b" ) ;
925
986
assert_eq ! ( & truncate_str( "foo bar" , 5 , "!" ) , "foo !" ) ;
926
987
assert_eq ! ( & truncate_str( "foo bar baz" , 10 , "..." ) , "foo bar..." ) ;
0 commit comments