Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix DecoderReader handling of padding #238

Merged
merged 1 commit into from
May 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ workflows:
# be easier on the CI hosts since presumably those fat lower layers will already be cached, and
# therefore faster than a minimal, customized alpine.
# MSRV
'rust:1.57.0'
'rust:1.60.0'
]
# a hacky scheme to work around CircleCI's inability to deal with mutable docker tags, forcing us to
# get a nightly or stable toolchain via rustup instead of a mutable docker tag
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "base64"
version = "0.21.0"
version = "0.21.1"
authors = ["Alice Maz <alice@alicemaz.com>", "Marshall Pierce <marshall@mpierce.org>"]
description = "encodes and decodes base64 as bytes or utf8"
repository = "https://github.com/marshallpierce/rust-base64"
Expand All @@ -10,7 +10,7 @@ keywords = ["base64", "utf8", "encode", "decode", "no_std"]
categories = ["encoding"]
license = "MIT OR Apache-2.0"
edition = "2021"
rust-version = "1.57.0"
rust-version = "1.60.0"

[[bench]]
name = "benchmarks"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ optionally may allow other behaviors.

## Rust version compatibility

The minimum supported Rust version is 1.57.0.
The minimum supported Rust version is 1.60.0.

# Contributing

Expand Down
7 changes: 7 additions & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# 0.21.1

- Remove the possibility of panicking during decoded length calculations
- `DecoderReader` no longer sometimes erroneously ignores padding [#226](https://github.com/marshallpierce/rust-base64/issues/226)

## Breaking changes

- `Engine.internal_decode` return type changed
- Update MSRV to 1.60.0


# 0.21.0

Expand Down
2 changes: 1 addition & 1 deletion clippy.toml
Original file line number Diff line number Diff line change
@@ -1 +1 @@
msrv = "1.57.0"
msrv = "1.60.0"
6 changes: 3 additions & 3 deletions src/engine/general_purpose/decode.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{
engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodePaddingMode},
engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodeMetadata, DecodePaddingMode},
DecodeError, PAD_BYTE,
};

Expand Down Expand Up @@ -46,7 +46,7 @@ impl DecodeEstimate for GeneralPurposeEstimate {
}

/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs.
/// Returns the number of bytes written, or an error.
/// Returns the decode metadata, or an error.
// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is
// inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment,
// but this is fragile and the best setting changes with only minor code modifications.
Expand All @@ -58,7 +58,7 @@ pub(crate) fn decode_helper(
decode_table: &[u8; 256],
decode_allow_trailing_bits: bool,
padding_mode: DecodePaddingMode,
) -> Result<usize, DecodeError> {
) -> Result<DecodeMetadata, DecodeError> {
let remainder_len = input.len() % INPUT_CHUNK_LEN;

// Because the fast decode loop writes in groups of 8 bytes (unrolled to
Expand Down
17 changes: 12 additions & 5 deletions src/engine/general_purpose/decode_suffix.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use crate::{
engine::{general_purpose::INVALID_VALUE, DecodePaddingMode},
engine::{general_purpose::INVALID_VALUE, DecodeMetadata, DecodePaddingMode},
DecodeError, PAD_BYTE,
};

/// Decode the last 1-8 bytes, checking for trailing set bits and padding per the provided
/// parameters.
///
/// Returns the total number of bytes decoded, including the ones indicated as already written by
/// `output_index`.
/// Returns the decode metadata representing the total number of bytes decoded, including the ones
/// indicated as already written by `output_index`.
pub(crate) fn decode_suffix(
input: &[u8],
input_index: usize,
Expand All @@ -16,7 +16,7 @@ pub(crate) fn decode_suffix(
decode_table: &[u8; 256],
decode_allow_trailing_bits: bool,
padding_mode: DecodePaddingMode,
) -> Result<usize, DecodeError> {
) -> Result<DecodeMetadata, DecodeError> {
// Decode any leftovers that aren't a complete input block of 8 bytes.
// Use a u64 as a stack-resident 8 byte buffer.
let mut leftover_bits: u64 = 0;
Expand Down Expand Up @@ -157,5 +157,12 @@ pub(crate) fn decode_suffix(
leftover_bits_appended_to_buf += 8;
}

Ok(output_index)
Ok(DecodeMetadata::new(
output_index,
if padding_bytes > 0 {
Some(input_index + first_padding_index)
} else {
None
},
))
}
5 changes: 3 additions & 2 deletions src/engine/general_purpose/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
use crate::{
alphabet,
alphabet::Alphabet,
engine::{Config, DecodePaddingMode},
engine::{Config, DecodeMetadata, DecodePaddingMode},
DecodeError,
};
use core::convert::TryInto;

mod decode;
pub(crate) mod decode_suffix;

pub use decode::GeneralPurposeEstimate;

pub(crate) const INVALID_VALUE: u8 = 255;
Expand Down Expand Up @@ -170,7 +171,7 @@ impl super::Engine for GeneralPurpose {
input: &[u8],
output: &mut [u8],
estimate: Self::DecodeEstimate,
) -> Result<usize, DecodeError> {
) -> Result<DecodeMetadata, DecodeError> {
decode::decode_helper(
input,
estimate,
Expand Down
43 changes: 34 additions & 9 deletions src/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@ pub trait Engine: Send + Sync {
/// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
/// calculating it again (expensive on short inputs).`
///
/// Returns the number of bytes written to `output`.
///
/// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
/// function must also handle the final possibly partial chunk.
/// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
Expand All @@ -95,7 +93,7 @@ pub trait Engine: Send + Sync {
input: &[u8],
output: &mut [u8],
decode_estimate: Self::DecodeEstimate,
) -> Result<usize, DecodeError>;
) -> Result<DecodeMetadata, DecodeError>;

/// Returns the config for this engine.
fn config(&self) -> &Self::Config;
Expand Down Expand Up @@ -202,8 +200,7 @@ pub trait Engine: Send + Sync {
Ok(encoded_size)
}

/// Decode from string reference as octets using the specified [Engine].
/// Returns a `Result` containing a `Vec<u8>`.
/// Decode the input into a new `Vec`.
///
/// # Example
///
Expand All @@ -228,13 +225,16 @@ pub trait Engine: Send + Sync {
let estimate = self.internal_decoded_len_estimate(input_bytes.len());
let mut buffer = vec![0; estimate.decoded_len_estimate()];

let bytes_written = self.internal_decode(input_bytes, &mut buffer, estimate)?;
let bytes_written = self
.internal_decode(input_bytes, &mut buffer, estimate)?
.decoded_len;
buffer.truncate(bytes_written);

Ok(buffer)
}

/// Decode from string reference as octets.
/// Decode the `input` into the supplied `buffer`.
///
/// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
/// Returns a `Result` containing an empty tuple, aka `()`.
///
Expand Down Expand Up @@ -281,7 +281,9 @@ pub trait Engine: Send + Sync {
buffer.resize(total_len_estimate, 0);

let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
let bytes_written = self.internal_decode(input_bytes, buffer_slice, estimate)?;
let bytes_written = self
.internal_decode(input_bytes, buffer_slice, estimate)?
.decoded_len;

buffer.truncate(starting_output_len + bytes_written);

Expand All @@ -290,7 +292,8 @@ pub trait Engine: Send + Sync {

/// Decode the input into the provided output slice.
///
/// Returns an error if `output` is smaller than the estimated decoded length.
/// Returns the number of bytes written to the slice, or an error if `output` is smaller than
/// the estimated decoded length.
///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
Expand All @@ -312,10 +315,13 @@ pub trait Engine: Send + Sync {

self.internal_decode(input_bytes, output, estimate)
.map_err(|e| e.into())
.map(|dm| dm.decoded_len)
}

/// Decode the input into the provided output slice.
///
/// Returns the number of bytes written to the slice.
///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
/// See [crate::decoded_len_estimate] for calculating buffer sizes.
Expand All @@ -338,6 +344,7 @@ pub trait Engine: Send + Sync {
output,
self.internal_decoded_len_estimate(input_bytes.len()),
)
.map(|dm| dm.decoded_len)
}
}

Expand Down Expand Up @@ -381,3 +388,21 @@ pub enum DecodePaddingMode {
/// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
RequireNone,
}

/// Metadata about the result of a decode operation
#[derive(PartialEq, Eq, Debug)]
pub struct DecodeMetadata {
/// Number of decoded bytes output
pub(crate) decoded_len: usize,
/// Offset of the first padding byte in the input, if any
pub(crate) padding_offset: Option<usize>,
}

impl DecodeMetadata {
pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
Self {
decoded_len: decoded_bytes,
padding_offset: padding_index,
}
}
}
4 changes: 2 additions & 2 deletions src/engine/naive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{
alphabet::Alphabet,
engine::{
general_purpose::{self, decode_table, encode_table},
Config, DecodeEstimate, DecodePaddingMode, Engine,
Config, DecodeEstimate, DecodeMetadata, DecodePaddingMode, Engine,
},
DecodeError, PAD_BYTE,
};
Expand Down Expand Up @@ -112,7 +112,7 @@ impl Engine for Naive {
input: &[u8],
output: &mut [u8],
estimate: Self::DecodeEstimate,
) -> Result<usize, DecodeError> {
) -> Result<DecodeMetadata, DecodeError> {
if estimate.rem == 1 {
// trailing whitespace is so common that it's worth it to check the last byte to
// possibly return a better error message
Expand Down