Skip to content

Commit

Permalink
Fix DecoderReader handling of padding
Browse files Browse the repository at this point in the history
Fixes #226.

DecoderReader now keeps more state so that erroneous internal padding that happens to fall right on a read buffer boundary isn't silently ignored.
  • Loading branch information
marshallpierce committed May 21, 2023
1 parent f766bc6 commit f26d8f2
Show file tree
Hide file tree
Showing 9 changed files with 551 additions and 91 deletions.
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
@@ -1,6 +1,7 @@
# 0.21.1

- Remove the possibility of panicking during decoded length calculations
- `DecoderReader` no longer sometimes erroneously ignores padding [#226](https://github.com/marshallpierce/rust-base64/issues/226)

# 0.21.0

Expand Down
6 changes: 3 additions & 3 deletions src/engine/general_purpose/decode.rs
@@ -1,5 +1,5 @@
use crate::{
engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodePaddingMode},
engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodeMetadata, DecodePaddingMode},
DecodeError, PAD_BYTE,
};

Expand Down Expand Up @@ -46,7 +46,7 @@ impl DecodeEstimate for GeneralPurposeEstimate {
}

/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs.
/// Returns the number of bytes written, or an error.
/// Returns the decode metadata, or an error.
// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is
// inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment,
// but this is fragile and the best setting changes with only minor code modifications.
Expand All @@ -58,7 +58,7 @@ pub(crate) fn decode_helper(
decode_table: &[u8; 256],
decode_allow_trailing_bits: bool,
padding_mode: DecodePaddingMode,
) -> Result<usize, DecodeError> {
) -> Result<DecodeMetadata, DecodeError> {
let remainder_len = input.len() % INPUT_CHUNK_LEN;

// Because the fast decode loop writes in groups of 8 bytes (unrolled to
Expand Down
17 changes: 12 additions & 5 deletions src/engine/general_purpose/decode_suffix.rs
@@ -1,13 +1,13 @@
use crate::{
engine::{general_purpose::INVALID_VALUE, DecodePaddingMode},
engine::{general_purpose::INVALID_VALUE, DecodeMetadata, DecodePaddingMode},
DecodeError, PAD_BYTE,
};

/// Decode the last 1-8 bytes, checking for trailing set bits and padding per the provided
/// parameters.
///
/// Returns the total number of bytes decoded, including the ones indicated as already written by
/// `output_index`.
/// Returns the decode metadata representing the total number of bytes decoded, including the ones
/// indicated as already written by `output_index`.
pub(crate) fn decode_suffix(
input: &[u8],
input_index: usize,
Expand All @@ -16,7 +16,7 @@ pub(crate) fn decode_suffix(
decode_table: &[u8; 256],
decode_allow_trailing_bits: bool,
padding_mode: DecodePaddingMode,
) -> Result<usize, DecodeError> {
) -> Result<DecodeMetadata, DecodeError> {
// Decode any leftovers that aren't a complete input block of 8 bytes.
// Use a u64 as a stack-resident 8 byte buffer.
let mut leftover_bits: u64 = 0;
Expand Down Expand Up @@ -157,5 +157,12 @@ pub(crate) fn decode_suffix(
leftover_bits_appended_to_buf += 8;
}

Ok(output_index)
Ok(DecodeMetadata::new(
output_index,
if padding_bytes > 0 {
Some(input_index + first_padding_index)
} else {
None
},
))
}
5 changes: 3 additions & 2 deletions src/engine/general_purpose/mod.rs
Expand Up @@ -2,13 +2,14 @@
use crate::{
alphabet,
alphabet::Alphabet,
engine::{Config, DecodePaddingMode},
engine::{Config, DecodeMetadata, DecodePaddingMode},
DecodeError,
};
use core::convert::TryInto;

mod decode;
pub(crate) mod decode_suffix;

pub use decode::GeneralPurposeEstimate;

pub(crate) const INVALID_VALUE: u8 = 255;
Expand Down Expand Up @@ -170,7 +171,7 @@ impl super::Engine for GeneralPurpose {
input: &[u8],
output: &mut [u8],
estimate: Self::DecodeEstimate,
) -> Result<usize, DecodeError> {
) -> Result<DecodeMetadata, DecodeError> {
decode::decode_helper(
input,
estimate,
Expand Down
43 changes: 34 additions & 9 deletions src/engine/mod.rs
Expand Up @@ -73,8 +73,6 @@ pub trait Engine: Send + Sync {
/// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
/// calculating it again (expensive on short inputs).`
///
/// Returns the number of bytes written to `output`.
///
/// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
/// function must also handle the final possibly partial chunk.
/// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
Expand All @@ -95,7 +93,7 @@ pub trait Engine: Send + Sync {
input: &[u8],
output: &mut [u8],
decode_estimate: Self::DecodeEstimate,
) -> Result<usize, DecodeError>;
) -> Result<DecodeMetadata, DecodeError>;

/// Returns the config for this engine.
fn config(&self) -> &Self::Config;
Expand Down Expand Up @@ -202,8 +200,7 @@ pub trait Engine: Send + Sync {
Ok(encoded_size)
}

/// Decode from string reference as octets using the specified [Engine].
/// Returns a `Result` containing a `Vec<u8>`.
/// Decode the input into a new `Vec`.
///
/// # Example
///
Expand All @@ -228,13 +225,16 @@ pub trait Engine: Send + Sync {
let estimate = self.internal_decoded_len_estimate(input_bytes.len());
let mut buffer = vec![0; estimate.decoded_len_estimate()];

let bytes_written = self.internal_decode(input_bytes, &mut buffer, estimate)?;
let bytes_written = self
.internal_decode(input_bytes, &mut buffer, estimate)?
.decoded_len;
buffer.truncate(bytes_written);

Ok(buffer)
}

/// Decode from string reference as octets.
/// Decode the `input` into the supplied `buffer`.
///
/// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
/// Returns a `Result` containing an empty tuple, aka `()`.
///
Expand Down Expand Up @@ -281,7 +281,9 @@ pub trait Engine: Send + Sync {
buffer.resize(total_len_estimate, 0);

let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
let bytes_written = self.internal_decode(input_bytes, buffer_slice, estimate)?;
let bytes_written = self
.internal_decode(input_bytes, buffer_slice, estimate)?
.decoded_len;

buffer.truncate(starting_output_len + bytes_written);

Expand All @@ -290,7 +292,8 @@ pub trait Engine: Send + Sync {

/// Decode the input into the provided output slice.
///
/// Returns an error if `output` is smaller than the estimated decoded length.
/// Returns the number of bytes written to the slice, or an error if `output` is smaller than
/// the estimated decoded length.
///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
Expand All @@ -312,10 +315,13 @@ pub trait Engine: Send + Sync {

self.internal_decode(input_bytes, output, estimate)
.map_err(|e| e.into())
.map(|dm| dm.decoded_len)
}

/// Decode the input into the provided output slice.
///
/// Returns the number of bytes written to the slice.
///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
/// See [crate::decoded_len_estimate] for calculating buffer sizes.
Expand All @@ -338,6 +344,7 @@ pub trait Engine: Send + Sync {
output,
self.internal_decoded_len_estimate(input_bytes.len()),
)
.map(|dm| dm.decoded_len)
}
}

Expand Down Expand Up @@ -381,3 +388,21 @@ pub enum DecodePaddingMode {
/// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
RequireNone,
}

/// Metadata about the result of a decode operation
#[derive(PartialEq, Eq, Debug)]
pub struct DecodeMetadata {
/// Number of decoded bytes output
pub(crate) decoded_len: usize,
/// Offset of the first padding byte in the input, if any
pub(crate) padding_offset: Option<usize>,
}

impl DecodeMetadata {
pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
Self {
decoded_len: decoded_bytes,
padding_offset: padding_index,
}
}
}
4 changes: 2 additions & 2 deletions src/engine/naive.rs
Expand Up @@ -2,7 +2,7 @@ use crate::{
alphabet::Alphabet,
engine::{
general_purpose::{self, decode_table, encode_table},
Config, DecodeEstimate, DecodePaddingMode, Engine,
Config, DecodeEstimate, DecodeMetadata, DecodePaddingMode, Engine,
},
DecodeError, PAD_BYTE,
};
Expand Down Expand Up @@ -112,7 +112,7 @@ impl Engine for Naive {
input: &[u8],
output: &mut [u8],
estimate: Self::DecodeEstimate,
) -> Result<usize, DecodeError> {
) -> Result<DecodeMetadata, DecodeError> {
if estimate.rem == 1 {
// trailing whitespace is so common that it's worth it to check the last byte to
// possibly return a better error message
Expand Down

0 comments on commit f26d8f2

Please sign in to comment.