Skip to content

Commit f1aa179

Browse files
committed
decode: use exact decoded length rather than estimation
Fixes: #210 Fixes: #212
1 parent f766bc6 commit f1aa179

File tree

10 files changed

+190
-323
lines changed

10 files changed

+190
-323
lines changed

src/decode.rs

Lines changed: 86 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::engine::{general_purpose::STANDARD, DecodeEstimate, Engine};
1+
use crate::engine::Engine;
22
#[cfg(any(feature = "alloc", feature = "std", test))]
33
use alloc::vec::Vec;
44
use core::fmt;
@@ -89,7 +89,7 @@ impl From<DecodeError> for DecodeSliceError {
8989
#[deprecated(since = "0.21.0", note = "Use Engine::decode")]
9090
#[cfg(any(feature = "alloc", feature = "std", test))]
9191
pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> {
92-
STANDARD.decode(input)
92+
crate::engine::general_purpose::STANDARD.decode(input)
9393
}
9494

9595
/// Decode from string reference as octets using the specified [Engine].
@@ -130,6 +130,73 @@ pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>(
130130
engine.decode_slice(input, output)
131131
}
132132

133+
/// Returns the decoded size of the `encoded` input assuming the input is valid
134+
/// base64 string.
135+
///
136+
/// Assumes input is a valid base64-encoded string. Result is unspecified if it
137+
/// isn’t.
138+
///
139+
/// If you don’t need a precise length of the decoded string, you can use
140+
/// [`decoded_len_estimate`] function instead. It’s faster and provides an
141+
/// estimate which is only at most two bytes off from the real length.
142+
///
143+
/// # Examples
144+
///
145+
/// ```
146+
/// use base64::decoded_len;
147+
///
148+
/// assert_eq!(0, decoded_len(b""));
149+
/// assert_eq!(1, decoded_len(b"AA"));
150+
/// assert_eq!(2, decoded_len(b"AAA"));
151+
/// assert_eq!(3, decoded_len(b"AAAA"));
152+
/// assert_eq!(1, decoded_len(b"AA=="));
153+
/// assert_eq!(2, decoded_len(b"AAA="));
154+
/// ```
155+
pub fn decoded_len(encoded: impl AsRef<[u8]>) -> usize {
156+
let encoded = encoded.as_ref();
157+
if encoded.len() < 2 {
158+
return 0;
159+
}
160+
let is_pad = |idx| (encoded[encoded.len() - idx] == b'=') as usize;
161+
let len = encoded.len() - is_pad(1) - is_pad(2);
162+
match len % 4 {
163+
0 => len / 4 * 3,
164+
remainder => len / 4 * 3 + remainder - 1,
165+
}
166+
}
167+
168+
#[test]
169+
fn test_decoded_len() {
170+
for chunks in 0..25 {
171+
let mut input = vec![b'A'; chunks * 4 + 4];
172+
assert_eq!(chunks * 3 + 0, decoded_len(&input[..chunks * 4]));
173+
assert_eq!(chunks * 3 + 1, decoded_len(&input[..chunks * 4 + 2]));
174+
assert_eq!(chunks * 3 + 2, decoded_len(&input[..chunks * 4 + 3]));
175+
assert_eq!(chunks * 3 + 3, decoded_len(&input[..chunks * 4 + 4]));
176+
177+
input[chunks * 4 + 3] = b'=';
178+
assert_eq!(chunks * 3 + 1, decoded_len(&input[..chunks * 4 + 2]));
179+
assert_eq!(chunks * 3 + 2, decoded_len(&input[..chunks * 4 + 3]));
180+
assert_eq!(chunks * 3 + 2, decoded_len(&input[..chunks * 4 + 4]));
181+
input[chunks * 4 + 2] = b'=';
182+
assert_eq!(chunks * 3 + 1, decoded_len(&input[..chunks * 4 + 2]));
183+
assert_eq!(chunks * 3 + 1, decoded_len(&input[..chunks * 4 + 3]));
184+
assert_eq!(chunks * 3 + 1, decoded_len(&input[..chunks * 4 + 4]));
185+
}
186+
187+
// Mustn’t panic or overflow if given bogus input.
188+
for len in 1..100 {
189+
let mut input = vec![b'A'; len];
190+
let got = decoded_len(&input);
191+
debug_assert!(got <= len);
192+
for padding in 1..=len.min(10) {
193+
input[len - padding] = b'=';
194+
let got = decoded_len(&input);
195+
debug_assert!(got <= len);
196+
}
197+
}
198+
}
199+
133200
/// Returns a conservative estimate of the decoded size of `encoded_len` base64 symbols (rounded up
134201
/// to the next group of 3 decoded bytes).
135202
///
@@ -141,6 +208,7 @@ pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>(
141208
/// ```
142209
/// use base64::decoded_len_estimate;
143210
///
211+
/// assert_eq!(0, decoded_len_estimate(0));
144212
/// assert_eq!(3, decoded_len_estimate(1));
145213
/// assert_eq!(3, decoded_len_estimate(2));
146214
/// assert_eq!(3, decoded_len_estimate(3));
@@ -149,17 +217,27 @@ pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>(
149217
/// assert_eq!(6, decoded_len_estimate(5));
150218
/// ```
151219
pub fn decoded_len_estimate(encoded_len: usize) -> usize {
152-
STANDARD
153-
.internal_decoded_len_estimate(encoded_len)
154-
.decoded_len_estimate()
220+
(encoded_len / 4 + (encoded_len % 4 > 0) as usize) * 3
221+
}
222+
223+
#[test]
224+
fn test_decode_len_estimate() {
225+
for chunks in 0..250 {
226+
assert_eq!(chunks * 3, decoded_len_estimate(chunks * 4));
227+
assert_eq!(chunks * 3 + 3, decoded_len_estimate(chunks * 4 + 1));
228+
assert_eq!(chunks * 3 + 3, decoded_len_estimate(chunks * 4 + 2));
229+
assert_eq!(chunks * 3 + 3, decoded_len_estimate(chunks * 4 + 3));
230+
}
231+
// Mustn’t panic or overflow.
232+
assert_eq!(usize::MAX / 4 * 3 + 3, decoded_len_estimate(usize::MAX));
155233
}
156234

157235
#[cfg(test)]
158236
mod tests {
159237
use super::*;
160238
use crate::{
161-
alphabet,
162-
engine::{general_purpose, Config, GeneralPurpose},
239+
engine::{Config, GeneralPurpose},
240+
engine::general_purpose::{STANDARD, NO_PAD},
163241
tests::{assert_encode_sanity, random_engine},
164242
};
165243
use rand::{
@@ -245,7 +323,7 @@ mod tests {
245323

246324
#[test]
247325
fn decode_engine_estimation_works_for_various_lengths() {
248-
let engine = GeneralPurpose::new(&alphabet::STANDARD, general_purpose::NO_PAD);
326+
let engine = GeneralPurpose::new(&crate::alphabet::STANDARD, NO_PAD);
249327
for num_prefix_quads in 0..100 {
250328
for suffix in &["AA", "AAA", "AAAA"] {
251329
let mut prefix = "AAAA".repeat(num_prefix_quads);

src/encode.rs

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -96,24 +96,15 @@ pub(crate) fn encode_with_padding<E: Engine + ?Sized>(
9696
/// input lengths in approximately the top quarter of the range of `usize`.
9797
pub fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> {
9898
let rem = bytes_len % 3;
99-
100-
let complete_input_chunks = bytes_len / 3;
101-
let complete_chunk_output = complete_input_chunks.checked_mul(4);
102-
103-
if rem > 0 {
104-
if padding {
105-
complete_chunk_output.and_then(|c| c.checked_add(4))
106-
} else {
107-
let encoded_rem = match rem {
108-
1 => 2,
109-
2 => 3,
110-
_ => unreachable!("Impossible remainder"),
111-
};
112-
complete_chunk_output.and_then(|c| c.checked_add(encoded_rem))
113-
}
99+
let chunks = bytes_len / 3 + (rem > 0 && padding) as usize;
100+
let encoded_len = chunks.checked_mul(4)?;
101+
Some(if !padding && rem > 0 {
102+
// This doesn’t overflow. encoded_len is divisible by four thus it’s at
103+
// most usize::MAX - 3. rem ≤ 2 so we’re adding at most three.
104+
encoded_len + rem + 1
114105
} else {
115-
complete_chunk_output
116-
}
106+
encoded_len
107+
})
117108
}
118109

119110
/// Write padding characters.

src/engine/general_purpose/decode.rs

Lines changed: 4 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::{
2-
engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodePaddingMode},
2+
engine::{general_purpose::INVALID_VALUE, DecodePaddingMode},
33
DecodeError, PAD_BYTE,
44
};
55

@@ -21,30 +21,6 @@ const INPUT_BLOCK_LEN: usize = CHUNKS_PER_FAST_LOOP_BLOCK * INPUT_CHUNK_LEN;
2121
const DECODED_BLOCK_LEN: usize =
2222
CHUNKS_PER_FAST_LOOP_BLOCK * DECODED_CHUNK_LEN + DECODED_CHUNK_SUFFIX;
2323

24-
#[doc(hidden)]
25-
pub struct GeneralPurposeEstimate {
26-
/// Total number of decode chunks, including a possibly partial last chunk
27-
num_chunks: usize,
28-
decoded_len_estimate: usize,
29-
}
30-
31-
impl GeneralPurposeEstimate {
32-
pub(crate) fn new(encoded_len: usize) -> Self {
33-
// Formulas that won't overflow
34-
Self {
35-
num_chunks: encoded_len / INPUT_CHUNK_LEN
36-
+ (encoded_len % INPUT_CHUNK_LEN > 0) as usize,
37-
decoded_len_estimate: (encoded_len / 4 + (encoded_len % 4 > 0) as usize) * 3,
38-
}
39-
}
40-
}
41-
42-
impl DecodeEstimate for GeneralPurposeEstimate {
43-
fn decoded_len_estimate(&self) -> usize {
44-
self.decoded_len_estimate
45-
}
46-
}
47-
4824
/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs.
4925
/// Returns the number of bytes written, or an error.
5026
// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is
@@ -53,12 +29,11 @@ impl DecodeEstimate for GeneralPurposeEstimate {
5329
#[inline]
5430
pub(crate) fn decode_helper(
5531
input: &[u8],
56-
estimate: GeneralPurposeEstimate,
5732
output: &mut [u8],
5833
decode_table: &[u8; 256],
5934
decode_allow_trailing_bits: bool,
6035
padding_mode: DecodePaddingMode,
61-
) -> Result<usize, DecodeError> {
36+
) -> Result<(), DecodeError> {
6237
let remainder_len = input.len() % INPUT_CHUNK_LEN;
6338

6439
// Because the fast decode loop writes in groups of 8 bytes (unrolled to
@@ -99,7 +74,8 @@ pub(crate) fn decode_helper(
9974
};
10075

10176
// rounded up to include partial chunks
102-
let mut remaining_chunks = estimate.num_chunks;
77+
let mut remaining_chunks =
78+
input.len() / INPUT_CHUNK_LEN + (input.len() % INPUT_CHUNK_LEN > 0) as usize;
10379

10480
let mut input_index = 0;
10581
let mut output_index = 0;
@@ -340,44 +316,4 @@ mod tests {
340316
decode_chunk(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();
341317
assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output);
342318
}
343-
344-
#[test]
345-
fn estimate_short_lengths() {
346-
for (range, (num_chunks, decoded_len_estimate)) in [
347-
(0..=0, (0, 0)),
348-
(1..=4, (1, 3)),
349-
(5..=8, (1, 6)),
350-
(9..=12, (2, 9)),
351-
(13..=16, (2, 12)),
352-
(17..=20, (3, 15)),
353-
] {
354-
for encoded_len in range {
355-
let estimate = GeneralPurposeEstimate::new(encoded_len);
356-
assert_eq!(num_chunks, estimate.num_chunks);
357-
assert_eq!(decoded_len_estimate, estimate.decoded_len_estimate);
358-
}
359-
}
360-
}
361-
362-
#[test]
363-
fn estimate_via_u128_inflation() {
364-
// cover both ends of usize
365-
(0..1000)
366-
.chain(usize::MAX - 1000..=usize::MAX)
367-
.for_each(|encoded_len| {
368-
// inflate to 128 bit type to be able to safely use the easy formulas
369-
let len_128 = encoded_len as u128;
370-
371-
let estimate = GeneralPurposeEstimate::new(encoded_len);
372-
assert_eq!(
373-
((len_128 + (INPUT_CHUNK_LEN - 1) as u128) / (INPUT_CHUNK_LEN as u128))
374-
as usize,
375-
estimate.num_chunks
376-
);
377-
assert_eq!(
378-
((len_128 + 3) / 4 * 3) as usize,
379-
estimate.decoded_len_estimate
380-
);
381-
})
382-
}
383319
}

src/engine/general_purpose/decode_suffix.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ use crate::{
66
/// Decode the last 1-8 bytes, checking for trailing set bits and padding per the provided
77
/// parameters.
88
///
9-
/// Returns the total number of bytes decoded, including the ones indicated as already written by
10-
/// `output_index`.
9+
/// Expects output to be large enough to fit decoded data exactly without any
10+
/// unused space. In debug builds panics if final output length (`output_index`
11+
/// plus any bytes written by this function) doesn’t equal length of the output.
1112
pub(crate) fn decode_suffix(
1213
input: &[u8],
1314
input_index: usize,
@@ -16,7 +17,7 @@ pub(crate) fn decode_suffix(
1617
decode_table: &[u8; 256],
1718
decode_allow_trailing_bits: bool,
1819
padding_mode: DecodePaddingMode,
19-
) -> Result<usize, DecodeError> {
20+
) -> Result<(), DecodeError> {
2021
// Decode any leftovers that aren't a complete input block of 8 bytes.
2122
// Use a u64 as a stack-resident 8 byte buffer.
2223
let mut leftover_bits: u64 = 0;
@@ -157,5 +158,6 @@ pub(crate) fn decode_suffix(
157158
leftover_bits_appended_to_buf += 8;
158159
}
159160

160-
Ok(output_index)
161+
debug_assert_eq!(output.len(), output_index);
162+
Ok(())
161163
}

src/engine/general_purpose/mod.rs

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ use core::convert::TryInto;
99

1010
mod decode;
1111
pub(crate) mod decode_suffix;
12-
pub use decode::GeneralPurposeEstimate;
1312

1413
pub(crate) const INVALID_VALUE: u8 = 255;
1514

@@ -40,7 +39,6 @@ impl GeneralPurpose {
4039

4140
impl super::Engine for GeneralPurpose {
4241
type Config = GeneralPurposeConfig;
43-
type DecodeEstimate = GeneralPurposeEstimate;
4442

4543
fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize {
4644
let mut input_index: usize = 0;
@@ -161,19 +159,9 @@ impl super::Engine for GeneralPurpose {
161159
output_index
162160
}
163161

164-
fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate {
165-
GeneralPurposeEstimate::new(input_len)
166-
}
167-
168-
fn internal_decode(
169-
&self,
170-
input: &[u8],
171-
output: &mut [u8],
172-
estimate: Self::DecodeEstimate,
173-
) -> Result<usize, DecodeError> {
162+
fn internal_decode(&self, input: &[u8], output: &mut [u8]) -> Result<(), DecodeError> {
174163
decode::decode_helper(
175164
input,
176-
estimate,
177165
output,
178166
&self.decode_table,
179167
self.config.decode_allow_trailing_bits,

0 commit comments

Comments
 (0)