Skip to content

Commit

Permalink
Switch to non-overflowing decoded length formula
Browse files Browse the repository at this point in the history
No change to benchmarks.

Motivated by the alternate approach in https://github.com/marshallpierce/rust-base64/pull/216/files.
  • Loading branch information
marshallpierce committed Jan 17, 2023
1 parent 92e94d2 commit fa47981
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 45 deletions.
5 changes: 0 additions & 5 deletions src/decode.rs
Expand Up @@ -148,11 +148,6 @@ pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>(
/// // start of the next quad of encoded symbols
/// assert_eq!(6, decoded_len_estimate(5));
/// ```
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
pub fn decoded_len_estimate(encoded_len: usize) -> usize {
STANDARD
.internal_decoded_len_estimate(encoded_len)
Expand Down
53 changes: 44 additions & 9 deletions src/engine/general_purpose/decode.rs
Expand Up @@ -30,16 +30,11 @@ pub struct GeneralPurposeEstimate {

impl GeneralPurposeEstimate {
pub(crate) fn new(encoded_len: usize) -> Self {
// Formulas that won't overflow
Self {
num_chunks: encoded_len
.checked_add(INPUT_CHUNK_LEN - 1)
.expect("Overflow when calculating number of chunks in input")
/ INPUT_CHUNK_LEN,
decoded_len_estimate: encoded_len
.checked_add(3)
.expect("Overflow when calculating decoded len estimate")
/ 4
* 3,
num_chunks: encoded_len / INPUT_CHUNK_LEN
+ (encoded_len % INPUT_CHUNK_LEN > 0) as usize,
decoded_len_estimate: (encoded_len / 4 + (encoded_len % 4 > 0) as usize) * 3,
}
}
}
Expand Down Expand Up @@ -345,4 +340,44 @@ mod tests {
decode_chunk(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();
assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output);
}

#[test]
fn estimate_short_lengths() {
for (range, (num_chunks, decoded_len_estimate)) in [
(0..=0, (0, 0)),
(1..=4, (1, 3)),
(5..=8, (1, 6)),
(9..=12, (2, 9)),
(13..=16, (2, 12)),
(17..=20, (3, 15)),
] {
for encoded_len in range {
let estimate = GeneralPurposeEstimate::new(encoded_len);
assert_eq!(num_chunks, estimate.num_chunks);
assert_eq!(decoded_len_estimate, estimate.decoded_len_estimate);
}
}
}

#[test]
fn estimate_via_u128_inflation() {
// cover both ends of usize
(0..1000)
.chain(usize::MAX - 1000..=usize::MAX)
.for_each(|encoded_len| {
// inflate to 128 bit type to be able to safely use the easy formulas
let len_128 = encoded_len as u128;

let estimate = GeneralPurposeEstimate::new(encoded_len);
assert_eq!(
((len_128 + (INPUT_CHUNK_LEN - 1) as u128) / (INPUT_CHUNK_LEN as u128))
as usize,
estimate.num_chunks
);
assert_eq!(
((len_128 + 3) / 4 * 3) as usize,
estimate.decoded_len_estimate
);
})
}
}
27 changes: 0 additions & 27 deletions src/engine/mod.rs
Expand Up @@ -62,10 +62,6 @@ pub trait Engine: Send + Sync {
/// As an optimization to prevent the decoded length from being calculated twice, it is
/// sometimes helpful to have a conservative estimate of the decoded size before doing the
/// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
#[doc(hidden)]
fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;

Expand Down Expand Up @@ -225,11 +221,6 @@ pub trait Engine: Send + Sync {
/// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
/// println!("{:?}", bytes_url);
/// ```
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
#[cfg(any(feature = "alloc", feature = "std", test))]
fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
let input_bytes = input.as_ref();
Expand Down Expand Up @@ -272,11 +263,6 @@ pub trait Engine: Send + Sync {
/// println!("{:?}", buffer);
/// }
/// ```
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
#[cfg(any(feature = "alloc", feature = "std", test))]
fn decode_vec<T: AsRef<[u8]>>(
&self,
Expand Down Expand Up @@ -312,11 +298,6 @@ pub trait Engine: Send + Sync {
///
/// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
/// if the output buffer is too small.
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
fn decode_slice<T: AsRef<[u8]>>(
&self,
input: T,
Expand Down Expand Up @@ -344,9 +325,6 @@ pub trait Engine: Send + Sync {
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
///
/// Panics if the provided output buffer is too small for the decoded data.
fn decode_slice_unchecked<T: AsRef<[u8]>>(
&self,
Expand Down Expand Up @@ -387,11 +365,6 @@ pub trait DecodeEstimate {
///
/// The estimate must be no larger than the next largest complete triple of decoded bytes.
/// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
fn decoded_len_estimate(&self) -> usize;
}

Expand Down
26 changes: 26 additions & 0 deletions src/engine/tests.rs
Expand Up @@ -1200,6 +1200,32 @@ fn decode_length_estimate_delta<E: EngineWrapper>(engine_wrapper: E) {
}
}

#[apply(all_engines)]
fn estimate_via_u128_inflation<E: EngineWrapper>(engine_wrapper: E) {
// cover both ends of usize
(0..1000)
.chain(usize::MAX - 1000..=usize::MAX)
.for_each(|encoded_len| {
// inflate to 128 bit type to be able to safely use the easy formulas
let len_128 = encoded_len as u128;

let estimate = E::standard()
.internal_decoded_len_estimate(encoded_len)
.decoded_len_estimate();

// This check is a little too strict: it requires using the (len + 3) / 4 * 3 formula
// or equivalent, but until other engines come along that use a different formula
// requiring that we think more carefully about what the allowable criteria are, this
// will do.
assert_eq!(
((len_128 + 3) / 4 * 3) as usize,
estimate,
"enc len {}",
encoded_len
);
})
}

/// Returns a tuple of the original data length, the encoded data length (just data), and the length including padding.
///
/// Vecs provided should be empty.
Expand Down
6 changes: 2 additions & 4 deletions tests/encode.rs
Expand Up @@ -26,10 +26,9 @@ fn encode_all_ascii() {
fn encode_all_bytes() {
let mut bytes = Vec::<u8>::with_capacity(256);

for i in 0..255 {
for i in 0..=255 {
bytes.push(i);
}
bytes.push(255); //bug with "overflowing" ranges?

compare_encode(
"AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7P\
Expand All @@ -44,10 +43,9 @@ fn encode_all_bytes() {
fn encode_all_bytes_url() {
let mut bytes = Vec::<u8>::with_capacity(256);

for i in 0..255 {
for i in 0..=255 {
bytes.push(i);
}
bytes.push(255); //bug with "overflowing" ranges?

assert_eq!(
"AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0\
Expand Down

0 comments on commit fa47981

Please sign in to comment.