Skip to content

printf: Consistently handle negative widths/precision #7620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/uucore/src/lib/features/format/argument.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &s
}
ExtendedParserError::PartialMatch(v, rest) => {
let bytes = input.as_encoded_bytes();
if !bytes.is_empty() && bytes[0] == b'\'' {
if !bytes.is_empty() && (bytes[0] == b'\'' || bytes[0] == b'"') {
show_warning!(
"{}: character(s) following character constant have been ignored",
&rest,
Expand Down
182 changes: 157 additions & 25 deletions src/uucore/src/lib/features/format/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ impl Spec {
match self {
Self::Char { width, align_left } => {
let (width, neg_width) =
resolve_asterisk_maybe_negative(*width, &mut args).unwrap_or_default();
resolve_asterisk_width(*width, &mut args).unwrap_or_default();
write_padded(writer, &[args.get_char()], width, *align_left || neg_width)
}
Self::String {
Expand All @@ -325,15 +325,15 @@ impl Spec {
precision,
} => {
let (width, neg_width) =
resolve_asterisk_maybe_negative(*width, &mut args).unwrap_or_default();
resolve_asterisk_width(*width, &mut args).unwrap_or_default();

// GNU does do this truncation on a byte level, see for instance:
// printf "%.1s" 🙃
// > �
// For now, we let printf panic when we truncate within a code point.
// TODO: We need to not use Rust's formatting for aligning the output,
// so that we can just write bytes to stdout without panicking.
let precision = resolve_asterisk(*precision, &mut args);
let precision = resolve_asterisk_precision(*precision, &mut args);
let s = args.get_str();
let truncated = match precision {
Some(p) if p < s.len() => &s[..p],
Expand All @@ -349,7 +349,7 @@ impl Spec {
Self::EscapedString => {
let s = args.get_str();
let mut parsed = Vec::new();
for c in parse_escape_only(s.as_bytes(), OctalParsing::default()) {
for c in parse_escape_only(s.as_bytes(), OctalParsing::ThreeDigits) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to make sure, this doesn't break stuff like this right?

env printf "%b" "\\05llo" | hexdump -C
00000000  05 6c 6c 6f                                       |.llo|
env printf "%b" "\\5llo" | hexdump -C
00000000  05 6c 6c 6f                                       |.llo|

Copy link
Contributor Author

@sargas sargas Apr 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't break it (this is with GNU coreutils 9.6 in /bin):

$ ./target/debug/printf '%b' "\\05llo"|od -c
0000000 005   l   l   o
0000004
$ ./target/debug/printf '%b' "\\5llo"|od -c
0000000 005   l   l   o
0000004
$ /bin/printf '%b' "\\05llo"|od -c
0000000 005   l   l   o
0000004
$ /bin/printf '%b' "\\5llo"|od -c
0000000 005   l   l   o
0000004

I added some tests to show that octal characters are still accepted at different lengths (1-3 numbers normally and 1-4 with leading zero for %b)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please avoid screenshot.
They are terrible for accessibility and search

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the reminder, I replaced the screenshot

match c.write(&mut parsed)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => {
Expand Down Expand Up @@ -382,8 +382,10 @@ impl Spec {
positive_sign,
alignment,
} => {
let width = resolve_asterisk(*width, &mut args).unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args).unwrap_or(0);
let (width, neg_width) =
resolve_asterisk_width(*width, &mut args).unwrap_or((0, false));
let precision =
resolve_asterisk_precision(*precision, &mut args).unwrap_or_default();
let i = args.get_i64();

if precision as u64 > i32::MAX as u64 {
Expand All @@ -394,7 +396,11 @@ impl Spec {
width,
precision,
positive_sign: *positive_sign,
alignment: *alignment,
alignment: if neg_width {
NumberAlignment::Left
} else {
*alignment
},
}
.fmt(writer, i)
.map_err(FormatError::IoError)
Expand All @@ -405,8 +411,10 @@ impl Spec {
precision,
alignment,
} => {
let width = resolve_asterisk(*width, &mut args).unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args).unwrap_or(0);
let (width, neg_width) =
resolve_asterisk_width(*width, &mut args).unwrap_or((0, false));
let precision =
resolve_asterisk_precision(*precision, &mut args).unwrap_or_default();
let i = args.get_u64();

if precision as u64 > i32::MAX as u64 {
Expand All @@ -417,7 +425,11 @@ impl Spec {
variant: *variant,
precision,
width,
alignment: *alignment,
alignment: if neg_width {
NumberAlignment::Left
} else {
*alignment
},
}
.fmt(writer, i)
.map_err(FormatError::IoError)
Expand All @@ -431,8 +443,9 @@ impl Spec {
alignment,
precision,
} => {
let width = resolve_asterisk(*width, &mut args).unwrap_or(0);
let precision = resolve_asterisk(*precision, &mut args);
let (width, neg_width) =
resolve_asterisk_width(*width, &mut args).unwrap_or((0, false));
let precision = resolve_asterisk_precision(*precision, &mut args);
let f: ExtendedBigDecimal = args.get_extended_big_decimal();

if precision.is_some_and(|p| p as u64 > i32::MAX as u64) {
Expand All @@ -448,7 +461,11 @@ impl Spec {
case: *case,
force_decimal: *force_decimal,
positive_sign: *positive_sign,
alignment: *alignment,
alignment: if neg_width {
NumberAlignment::Left
} else {
*alignment
},
}
.fmt(writer, &f)
.map_err(FormatError::IoError)
Expand All @@ -457,18 +474,9 @@ impl Spec {
}
}

fn resolve_asterisk<'a>(
option: Option<CanAsterisk<usize>>,
mut args: impl ArgumentIter<'a>,
) -> Option<usize> {
match option {
None => None,
Some(CanAsterisk::Asterisk) => Some(usize::try_from(args.get_u64()).ok().unwrap_or(0)),
Some(CanAsterisk::Fixed(w)) => Some(w),
}
}

fn resolve_asterisk_maybe_negative<'a>(
/// Determine the width, potentially getting a value from args
/// Returns the non-negative width and whether the value should be left-aligned.
fn resolve_asterisk_width<'a>(
option: Option<CanAsterisk<usize>>,
mut args: impl ArgumentIter<'a>,
) -> Option<(usize, bool)> {
Expand All @@ -486,6 +494,23 @@ fn resolve_asterisk_maybe_negative<'a>(
}
}

/// Determines the precision, which should (if defined)
/// be a non-negative number.
fn resolve_asterisk_precision<'a>(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be nice to add a comment explaining what this function is doing
and write unit tests (if not too complex)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure thing, added doc and unit tests. Thanks for the review!

option: Option<CanAsterisk<usize>>,
mut args: impl ArgumentIter<'a>,
) -> Option<usize> {
match option {
None => None,
Some(CanAsterisk::Asterisk) => match args.get_i64() {
v if v >= 0 => usize::try_from(v).ok(),
v if v < 0 => Some(0usize),
_ => None,
},
Some(CanAsterisk::Fixed(w)) => Some(w),
}
}

fn write_padded(
mut writer: impl Write,
text: &[u8],
Expand Down Expand Up @@ -527,3 +552,110 @@ fn eat_number(rest: &mut &[u8], index: &mut usize) -> Option<usize> {
}
}
}

#[cfg(test)]
mod tests {
use super::*;

mod resolve_asterisk_width {
use super::*;
use crate::format::FormatArgument;

#[test]
fn no_width() {
assert_eq!(None, resolve_asterisk_width(None, Vec::new().iter()));
}

#[test]
fn fixed_width() {
assert_eq!(
Some((42, false)),
resolve_asterisk_width(Some(CanAsterisk::Fixed(42)), Vec::new().iter())
);
}

#[test]
fn asterisks_with_numbers() {
assert_eq!(
Some((42, false)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk),
vec![FormatArgument::SignedInt(42)].iter()
)
);
assert_eq!(
Some((42, false)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk),
vec![FormatArgument::Unparsed("42".to_string())].iter()
)
);

assert_eq!(
Some((42, true)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk),
vec![FormatArgument::SignedInt(-42)].iter()
)
);
assert_eq!(
Some((42, true)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk),
vec![FormatArgument::Unparsed("-42".to_string())].iter()
)
);
}
}

mod resolve_asterisk_precision {
use super::*;
use crate::format::FormatArgument;

#[test]
fn no_width() {
assert_eq!(None, resolve_asterisk_precision(None, Vec::new().iter()));
}

#[test]
fn fixed_width() {
assert_eq!(
Some(42),
resolve_asterisk_precision(Some(CanAsterisk::Fixed(42)), Vec::new().iter())
);
}

#[test]
fn asterisks_with_numbers() {
assert_eq!(
Some(42),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk),
vec![FormatArgument::SignedInt(42)].iter()
)
);
assert_eq!(
Some(42),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk),
vec![FormatArgument::Unparsed("42".to_string())].iter()
)
);

assert_eq!(
Some(0),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk),
vec![FormatArgument::SignedInt(-42)].iter()
)
);
assert_eq!(
Some(0),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk),
vec![FormatArgument::Unparsed("-42".to_string())].iter()
)
);
}
}
}
12 changes: 6 additions & 6 deletions src/uucore/src/lib/features/parser/num_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,8 @@ fn parse(
input: &str,
integral_only: bool,
) -> Result<ExtendedBigDecimal, ExtendedParserError<'_, ExtendedBigDecimal>> {
// Parse the "'" prefix separately
if let Some(rest) = input.strip_prefix('\'') {
// Parse the " and ' prefixes separately
if let Some(rest) = input.strip_prefix(['\'', '"']) {
let mut chars = rest.char_indices().fuse();
let v = chars
.next()
Expand Down Expand Up @@ -465,11 +465,11 @@ fn parse(

// If nothing has been parsed, check if this is a special value, or declare the parsing unsuccessful
if let Some((0, _)) = chars.peek() {
if integral_only {
return Err(ExtendedParserError::NotNumeric);
return if integral_only {
Err(ExtendedParserError::NotNumeric)
} else {
return parse_special_value(unsigned, negative);
}
parse_special_value(unsigned, negative)
};
}

let ebd_result = construct_extended_big_decimal(digits, negative, base, scale, exponent);
Expand Down
Loading
Loading