Skip to content

Commit

Permalink
Merge pull request #217 from marshallpierce/mp/decode-estimate
Browse files Browse the repository at this point in the history
Switch to non-overflowing decoded length formula
  • Loading branch information
marshallpierce committed Feb 5, 2023
2 parents 2dbcc5d + 453d15d commit 29ed4d0
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 41 deletions.
4 changes: 4 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 0.21.1

- Remove the possibility of panicking during decoded length calculations

# 0.21.0

## Migration
Expand Down
5 changes: 0 additions & 5 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,6 @@ pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>(
/// // start of the next quad of encoded symbols
/// assert_eq!(6, decoded_len_estimate(5));
/// ```
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
pub fn decoded_len_estimate(encoded_len: usize) -> usize {
STANDARD
.internal_decoded_len_estimate(encoded_len)
Expand Down
53 changes: 44 additions & 9 deletions src/engine/general_purpose/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,11 @@ pub struct GeneralPurposeEstimate {

impl GeneralPurposeEstimate {
pub(crate) fn new(encoded_len: usize) -> Self {
// Formulas that won't overflow
Self {
num_chunks: encoded_len
.checked_add(INPUT_CHUNK_LEN - 1)
.expect("Overflow when calculating number of chunks in input")
/ INPUT_CHUNK_LEN,
decoded_len_estimate: encoded_len
.checked_add(3)
.expect("Overflow when calculating decoded len estimate")
/ 4
* 3,
num_chunks: encoded_len / INPUT_CHUNK_LEN
+ (encoded_len % INPUT_CHUNK_LEN > 0) as usize,
decoded_len_estimate: (encoded_len / 4 + (encoded_len % 4 > 0) as usize) * 3,
}
}
}
Expand Down Expand Up @@ -345,4 +340,44 @@ mod tests {
decode_chunk(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();
assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output);
}

#[test]
fn estimate_short_lengths() {
for (range, (num_chunks, decoded_len_estimate)) in [
(0..=0, (0, 0)),
(1..=4, (1, 3)),
(5..=8, (1, 6)),
(9..=12, (2, 9)),
(13..=16, (2, 12)),
(17..=20, (3, 15)),
] {
for encoded_len in range {
let estimate = GeneralPurposeEstimate::new(encoded_len);
assert_eq!(num_chunks, estimate.num_chunks);
assert_eq!(decoded_len_estimate, estimate.decoded_len_estimate);
}
}
}

#[test]
fn estimate_via_u128_inflation() {
// cover both ends of usize
(0..1000)
.chain(usize::MAX - 1000..=usize::MAX)
.for_each(|encoded_len| {
// inflate to 128 bit type to be able to safely use the easy formulas
let len_128 = encoded_len as u128;

let estimate = GeneralPurposeEstimate::new(encoded_len);
assert_eq!(
((len_128 + (INPUT_CHUNK_LEN - 1) as u128) / (INPUT_CHUNK_LEN as u128))
as usize,
estimate.num_chunks
);
assert_eq!(
((len_128 + 3) / 4 * 3) as usize,
estimate.decoded_len_estimate
);
})
}
}
27 changes: 0 additions & 27 deletions src/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ pub trait Engine: Send + Sync {
/// As an optimization to prevent the decoded length from being calculated twice, it is
/// sometimes helpful to have a conservative estimate of the decoded size before doing the
/// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
#[doc(hidden)]
fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;

Expand Down Expand Up @@ -225,11 +221,6 @@ pub trait Engine: Send + Sync {
/// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
/// println!("{:?}", bytes_url);
/// ```
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
#[cfg(any(feature = "alloc", feature = "std", test))]
fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
let input_bytes = input.as_ref();
Expand Down Expand Up @@ -272,11 +263,6 @@ pub trait Engine: Send + Sync {
/// println!("{:?}", buffer);
/// }
/// ```
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
#[cfg(any(feature = "alloc", feature = "std", test))]
fn decode_vec<T: AsRef<[u8]>>(
&self,
Expand Down Expand Up @@ -312,11 +298,6 @@ pub trait Engine: Send + Sync {
///
/// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
/// if the output buffer is too small.
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
fn decode_slice<T: AsRef<[u8]>>(
&self,
input: T,
Expand Down Expand Up @@ -344,9 +325,6 @@ pub trait Engine: Send + Sync {
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
///
/// Panics if the provided output buffer is too small for the decoded data.
fn decode_slice_unchecked<T: AsRef<[u8]>>(
&self,
Expand Down Expand Up @@ -387,11 +365,6 @@ pub trait DecodeEstimate {
///
/// The estimate must be no larger than the next largest complete triple of decoded bytes.
/// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
///
/// # Panics
///
/// Panics if decoded length estimation overflows.
/// This would happen for sizes within a few bytes of the maximum value of `usize`.
fn decoded_len_estimate(&self) -> usize;
}

Expand Down
26 changes: 26 additions & 0 deletions src/engine/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1200,6 +1200,32 @@ fn decode_length_estimate_delta<E: EngineWrapper>(engine_wrapper: E) {
}
}

#[apply(all_engines)]
fn estimate_via_u128_inflation<E: EngineWrapper>(engine_wrapper: E) {
// cover both ends of usize
(0..1000)
.chain(usize::MAX - 1000..=usize::MAX)
.for_each(|encoded_len| {
// inflate to 128 bit type to be able to safely use the easy formulas
let len_128 = encoded_len as u128;

let estimate = E::standard()
.internal_decoded_len_estimate(encoded_len)
.decoded_len_estimate();

// This check is a little too strict: it requires using the (len + 3) / 4 * 3 formula
// or equivalent, but until other engines come along that use a different formula
// requiring that we think more carefully about what the allowable criteria are, this
// will do.
assert_eq!(
((len_128 + 3) / 4 * 3) as usize,
estimate,
"enc len {}",
encoded_len
);
})
}

/// Returns a tuple of the original data length, the encoded data length (just data), and the length including padding.
///
/// Vecs provided should be empty.
Expand Down

0 comments on commit 29ed4d0

Please sign in to comment.