diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index 82ed0ab0f6..4cc8a9d081 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -119,7 +119,7 @@ fn extract_value(p: Result>, input: &s } ExtendedParserError::PartialMatch(v, rest) => { let bytes = input.as_encoded_bytes(); - if !bytes.is_empty() && bytes[0] == b'\'' { + if !bytes.is_empty() && (bytes[0] == b'\'' || bytes[0] == b'"') { show_warning!( "{}: character(s) following character constant have been ignored", &rest, diff --git a/src/uucore/src/lib/features/format/spec.rs b/src/uucore/src/lib/features/format/spec.rs index e6ac2e8842..72de13747a 100644 --- a/src/uucore/src/lib/features/format/spec.rs +++ b/src/uucore/src/lib/features/format/spec.rs @@ -316,7 +316,7 @@ impl Spec { match self { Self::Char { width, align_left } => { let (width, neg_width) = - resolve_asterisk_maybe_negative(*width, &mut args).unwrap_or_default(); + resolve_asterisk_width(*width, &mut args).unwrap_or_default(); write_padded(writer, &[args.get_char()], width, *align_left || neg_width) } Self::String { @@ -325,7 +325,7 @@ impl Spec { precision, } => { let (width, neg_width) = - resolve_asterisk_maybe_negative(*width, &mut args).unwrap_or_default(); + resolve_asterisk_width(*width, &mut args).unwrap_or_default(); // GNU does do this truncation on a byte level, see for instance: // printf "%.1s" 🙃 @@ -333,7 +333,7 @@ impl Spec { // For now, we let printf panic when we truncate within a code point. // TODO: We need to not use Rust's formatting for aligning the output, // so that we can just write bytes to stdout without panicking. - let precision = resolve_asterisk(*precision, &mut args); + let precision = resolve_asterisk_precision(*precision, &mut args); let s = args.get_str(); let truncated = match precision { Some(p) if p < s.len() => &s[..p], @@ -349,7 +349,7 @@ impl Spec { Self::EscapedString => { let s = args.get_str(); let mut parsed = Vec::new(); - for c in parse_escape_only(s.as_bytes(), OctalParsing::default()) { + for c in parse_escape_only(s.as_bytes(), OctalParsing::ThreeDigits) { match c.write(&mut parsed)? { ControlFlow::Continue(()) => {} ControlFlow::Break(()) => { @@ -382,8 +382,10 @@ impl Spec { positive_sign, alignment, } => { - let width = resolve_asterisk(*width, &mut args).unwrap_or(0); - let precision = resolve_asterisk(*precision, &mut args).unwrap_or(0); + let (width, neg_width) = + resolve_asterisk_width(*width, &mut args).unwrap_or((0, false)); + let precision = + resolve_asterisk_precision(*precision, &mut args).unwrap_or_default(); let i = args.get_i64(); if precision as u64 > i32::MAX as u64 { @@ -394,7 +396,11 @@ impl Spec { width, precision, positive_sign: *positive_sign, - alignment: *alignment, + alignment: if neg_width { + NumberAlignment::Left + } else { + *alignment + }, } .fmt(writer, i) .map_err(FormatError::IoError) @@ -405,8 +411,10 @@ impl Spec { precision, alignment, } => { - let width = resolve_asterisk(*width, &mut args).unwrap_or(0); - let precision = resolve_asterisk(*precision, &mut args).unwrap_or(0); + let (width, neg_width) = + resolve_asterisk_width(*width, &mut args).unwrap_or((0, false)); + let precision = + resolve_asterisk_precision(*precision, &mut args).unwrap_or_default(); let i = args.get_u64(); if precision as u64 > i32::MAX as u64 { @@ -417,7 +425,11 @@ impl Spec { variant: *variant, precision, width, - alignment: *alignment, + alignment: if neg_width { + NumberAlignment::Left + } else { + *alignment + }, } .fmt(writer, i) .map_err(FormatError::IoError) @@ -431,8 +443,9 @@ impl Spec { alignment, precision, } => { - let width = resolve_asterisk(*width, &mut args).unwrap_or(0); - let precision = resolve_asterisk(*precision, &mut args); + let (width, neg_width) = + resolve_asterisk_width(*width, &mut args).unwrap_or((0, false)); + let precision = resolve_asterisk_precision(*precision, &mut args); let f: ExtendedBigDecimal = args.get_extended_big_decimal(); if precision.is_some_and(|p| p as u64 > i32::MAX as u64) { @@ -448,7 +461,11 @@ impl Spec { case: *case, force_decimal: *force_decimal, positive_sign: *positive_sign, - alignment: *alignment, + alignment: if neg_width { + NumberAlignment::Left + } else { + *alignment + }, } .fmt(writer, &f) .map_err(FormatError::IoError) @@ -457,18 +474,9 @@ impl Spec { } } -fn resolve_asterisk<'a>( - option: Option>, - mut args: impl ArgumentIter<'a>, -) -> Option { - match option { - None => None, - Some(CanAsterisk::Asterisk) => Some(usize::try_from(args.get_u64()).ok().unwrap_or(0)), - Some(CanAsterisk::Fixed(w)) => Some(w), - } -} - -fn resolve_asterisk_maybe_negative<'a>( +/// Determine the width, potentially getting a value from args +/// Returns the non-negative width and whether the value should be left-aligned. +fn resolve_asterisk_width<'a>( option: Option>, mut args: impl ArgumentIter<'a>, ) -> Option<(usize, bool)> { @@ -486,6 +494,23 @@ fn resolve_asterisk_maybe_negative<'a>( } } +/// Determines the precision, which should (if defined) +/// be a non-negative number. +fn resolve_asterisk_precision<'a>( + option: Option>, + mut args: impl ArgumentIter<'a>, +) -> Option { + match option { + None => None, + Some(CanAsterisk::Asterisk) => match args.get_i64() { + v if v >= 0 => usize::try_from(v).ok(), + v if v < 0 => Some(0usize), + _ => None, + }, + Some(CanAsterisk::Fixed(w)) => Some(w), + } +} + fn write_padded( mut writer: impl Write, text: &[u8], @@ -527,3 +552,110 @@ fn eat_number(rest: &mut &[u8], index: &mut usize) -> Option { } } } + +#[cfg(test)] +mod tests { + use super::*; + + mod resolve_asterisk_width { + use super::*; + use crate::format::FormatArgument; + + #[test] + fn no_width() { + assert_eq!(None, resolve_asterisk_width(None, Vec::new().iter())); + } + + #[test] + fn fixed_width() { + assert_eq!( + Some((42, false)), + resolve_asterisk_width(Some(CanAsterisk::Fixed(42)), Vec::new().iter()) + ); + } + + #[test] + fn asterisks_with_numbers() { + assert_eq!( + Some((42, false)), + resolve_asterisk_width( + Some(CanAsterisk::Asterisk), + vec![FormatArgument::SignedInt(42)].iter() + ) + ); + assert_eq!( + Some((42, false)), + resolve_asterisk_width( + Some(CanAsterisk::Asterisk), + vec![FormatArgument::Unparsed("42".to_string())].iter() + ) + ); + + assert_eq!( + Some((42, true)), + resolve_asterisk_width( + Some(CanAsterisk::Asterisk), + vec![FormatArgument::SignedInt(-42)].iter() + ) + ); + assert_eq!( + Some((42, true)), + resolve_asterisk_width( + Some(CanAsterisk::Asterisk), + vec![FormatArgument::Unparsed("-42".to_string())].iter() + ) + ); + } + } + + mod resolve_asterisk_precision { + use super::*; + use crate::format::FormatArgument; + + #[test] + fn no_width() { + assert_eq!(None, resolve_asterisk_precision(None, Vec::new().iter())); + } + + #[test] + fn fixed_width() { + assert_eq!( + Some(42), + resolve_asterisk_precision(Some(CanAsterisk::Fixed(42)), Vec::new().iter()) + ); + } + + #[test] + fn asterisks_with_numbers() { + assert_eq!( + Some(42), + resolve_asterisk_precision( + Some(CanAsterisk::Asterisk), + vec![FormatArgument::SignedInt(42)].iter() + ) + ); + assert_eq!( + Some(42), + resolve_asterisk_precision( + Some(CanAsterisk::Asterisk), + vec![FormatArgument::Unparsed("42".to_string())].iter() + ) + ); + + assert_eq!( + Some(0), + resolve_asterisk_precision( + Some(CanAsterisk::Asterisk), + vec![FormatArgument::SignedInt(-42)].iter() + ) + ); + assert_eq!( + Some(0), + resolve_asterisk_precision( + Some(CanAsterisk::Asterisk), + vec![FormatArgument::Unparsed("-42".to_string())].iter() + ) + ); + } + } +} diff --git a/src/uucore/src/lib/features/parser/num_parser.rs b/src/uucore/src/lib/features/parser/num_parser.rs index c072b87029..726e89f67c 100644 --- a/src/uucore/src/lib/features/parser/num_parser.rs +++ b/src/uucore/src/lib/features/parser/num_parser.rs @@ -360,8 +360,8 @@ fn parse( input: &str, integral_only: bool, ) -> Result> { - // Parse the "'" prefix separately - if let Some(rest) = input.strip_prefix('\'') { + // Parse the " and ' prefixes separately + if let Some(rest) = input.strip_prefix(['\'', '"']) { let mut chars = rest.char_indices().fuse(); let v = chars .next() @@ -465,11 +465,11 @@ fn parse( // If nothing has been parsed, check if this is a special value, or declare the parsing unsuccessful if let Some((0, _)) = chars.peek() { - if integral_only { - return Err(ExtendedParserError::NotNumeric); + return if integral_only { + Err(ExtendedParserError::NotNumeric) } else { - return parse_special_value(unsigned, negative); - } + parse_special_value(unsigned, negative) + }; } let ebd_result = construct_extended_big_decimal(digits, negative, base, scale, exponent); diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index 61e14608a4..3f9bd88033 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -69,6 +69,21 @@ fn escaped_octal_and_newline() { .stdout_only("\x1F7\n"); } +#[test] +fn variable_sized_octal() { + for x in ["|\\5|", "|\\05|", "|\\005|"] { + new_ucmd!() + .arg(x) + .succeeds() + .stdout_only_bytes([b'|', 5u8, b'|']); + } + + new_ucmd!() + .arg("|\\0005|") + .succeeds() + .stdout_only_bytes([b'|', 0, b'5', b'|']); +} + #[test] fn escaped_unicode_four_digit() { new_ucmd!().args(&["\\u0125"]).succeeds().stdout_only("ĥ"); @@ -82,6 +97,19 @@ fn escaped_unicode_eight_digit() { .stdout_only("ĥ"); } +#[test] +fn escaped_unicode_null_byte() { + new_ucmd!() + .args(&["\\0001_"]) + .succeeds() + .stdout_is_bytes([0u8, b'1', b'_']); + + new_ucmd!() + .args(&["%b", "\\0001_"]) + .succeeds() + .stdout_is_bytes([1u8, b'_']); +} + #[test] fn escaped_percent_sign() { new_ucmd!() @@ -135,6 +163,21 @@ fn sub_b_string_handle_escapes() { .stdout_only("hello \tworld"); } +#[test] +fn sub_b_string_variable_size_unicode() { + for x in ["\\5|", "\\05|", "\\005|", "\\0005|"] { + new_ucmd!() + .args(&["|%b", x]) + .succeeds() + .stdout_only_bytes([b'|', 5u8, b'|']); + } + + new_ucmd!() + .args(&["|%b", "\\00005|"]) + .succeeds() + .stdout_only_bytes([b'|', 0, b'5', b'|']); +} + #[test] fn sub_b_string_validate_field_params() { new_ucmd!() @@ -260,6 +303,16 @@ fn sub_num_int_char_const_in() { .args(&["emoji is %i", "'🙃"]) .succeeds() .stdout_only("emoji is 128579"); + + new_ucmd!() + .args(&["ninety seven is %i", "\"a"]) + .succeeds() + .stdout_only("ninety seven is 97"); + + new_ucmd!() + .args(&["emoji is %i", "\"🙃"]) + .succeeds() + .stdout_only("emoji is 128579"); } #[test] @@ -544,6 +597,76 @@ fn sub_any_asterisk_negative_first_param() { .stdout_only("a(x )b"); // Would be 'a( x)b' if -5 was 5 } +#[test] +fn sub_any_asterisk_first_param_with_integer() { + new_ucmd!() + .args(&["|%*d|", "3", "0"]) + .succeeds() + .stdout_only("| 0|"); + + new_ucmd!() + .args(&["|%*d|", "1", "0"]) + .succeeds() + .stdout_only("|0|"); + + new_ucmd!() + .args(&["|%*d|", "0", "0"]) + .succeeds() + .stdout_only("|0|"); + + new_ucmd!() + .args(&["|%*d|", "-1", "0"]) + .succeeds() + .stdout_only("|0|"); + + // Negative widths are left-aligned + new_ucmd!() + .args(&["|%*d|", "-3", "0"]) + .succeeds() + .stdout_only("|0 |"); +} + +#[test] +fn sub_any_asterisk_second_param_with_integer() { + new_ucmd!() + .args(&["|%.*d|", "3", "10"]) + .succeeds() + .stdout_only("|010|"); + + new_ucmd!() + .args(&["|%*.d|", "1", "10"]) + .succeeds() + .stdout_only("|10|"); + + new_ucmd!() + .args(&["|%.*d|", "0", "10"]) + .succeeds() + .stdout_only("|10|"); + + new_ucmd!() + .args(&["|%.*d|", "-1", "10"]) + .succeeds() + .stdout_only("|10|"); + + new_ucmd!() + .args(&["|%.*d|", "-2", "10"]) + .succeeds() + .stdout_only("|10|"); + + new_ucmd!() + .args(&["|%.*d|", &i64::MIN.to_string(), "10"]) + .succeeds() + .stdout_only("|10|"); + + new_ucmd!() + .args(&["|%.*d|", &format!("-{}", u128::MAX), "10"]) + .fails_with_code(1) + .stdout_is("|10|") + .stderr_is( + "printf: '-340282366920938463463374607431768211455': Numerical result out of range\n", + ); +} + #[test] fn sub_any_specifiers_no_params() { new_ucmd!() @@ -899,6 +1022,14 @@ fn negative_zero_padding_with_space_test() { .stdout_only("-01"); } +#[test] +fn spaces_before_numbers_are_ignored() { + new_ucmd!() + .args(&["%*.*d", " 5", " 3", " 6"]) + .succeeds() + .stdout_only(" 006"); +} + #[test] fn float_with_zero_precision_should_pad() { new_ucmd!()