marshallpierce · marshallpierce · Jan 8, 2024 · Dec 3, 2023 · Dec 4, 2023 · Jan 8, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -40,6 +40,7 @@ rand = { version = "0.8.5", features = ["small_rng"] }
 # Latest is 4.4.13 but specifies MSRV in Cargo.toml which means we can't depend
 # on it (even though we won't compile it in MSRV CI).
 clap = { version = "3.2.25", features = ["derive"] }
+strum = { version = "0.25", features = ["derive"] }
 # test fixtures for engine tests
 rstest = "0.13.0"
 rstest_reuse = "0.6.0"

diff --git a/examples/base64.rs b/examples/base64.rs
@@ -2,45 +2,35 @@ use std::fs::File;
 use std::io::{self, Read};
 use std::path::PathBuf;
 use std::process;
-use std::str::FromStr;
 
 use base64::{alphabet, engine, read, write};
 use clap::Parser;
 
-#[derive(Clone, Debug, Parser)]
+#[derive(Clone, Debug, Parser, strum::EnumString, Default)]
+#[strum(serialize_all = "kebab-case")]
 enum Alphabet {
+    #[default]
     Standard,
     UrlSafe,
 }
 
-impl Default for Alphabet {
-    fn default() -> Self {
-        Self::Standard
-    }
-}
-
-impl FromStr for Alphabet {
-    type Err = String;
-    fn from_str(s: &str) -> Result<Self, String> {
-        match s {
-            "standard" => Ok(Self::Standard),
-            "urlsafe" => Ok(Self::UrlSafe),
-            _ => Err(format!("alphabet '{}' unrecognized", s)),
-        }
-    }
-}
-
 /// Base64 encode or decode FILE (or standard input), to standard output.
 #[derive(Debug, Parser)]
 struct Opt {
-    /// decode data
+    /// Decode the base64-encoded input (default: encode the input as base64).
     #[structopt(short = 'd', long = "decode")]
     decode: bool,
-    /// The alphabet to choose. Defaults to the standard base64 alphabet.
-    /// Supported alphabets include "standard" and "urlsafe".
+
+    /// The encoding alphabet: "standard" (default) or "url-safe".
     #[structopt(long = "alphabet")]
     alphabet: Option<Alphabet>,
-    /// The file to encode/decode.
+
+    /// Omit padding characters while encoding, and reject them while decoding.
+    #[structopt(short = 'p', long = "no-padding")]
+    no_padding: bool,
+
+    /// The file to encode or decode.
+    #[structopt(name = "FILE", parse(from_os_str))]
     file: Option<PathBuf>,
 }
 
@@ -65,7 +55,10 @@ fn main() {
             Alphabet::Standard => alphabet::STANDARD,
             Alphabet::UrlSafe => alphabet::URL_SAFE,
         },
-        engine::general_purpose::PAD,
+        match opt.no_padding {
+            true => engine::general_purpose::NO_PAD,
+            false => engine::general_purpose::PAD,
+        },
     );
 
     let stdout = io::stdout();

diff --git a/src/alphabet.rs b/src/alphabet.rs
@@ -160,21 +160,21 @@ impl fmt::Display for ParseAlphabetError {
 #[cfg(any(feature = "std", test))]
 impl error::Error for ParseAlphabetError {}
 
-/// The standard alphabet (uses `+` and `/`).
+/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
 ///
-/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
+/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
 pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
 );
 
-/// The URL safe alphabet (uses `-` and `_`).
+/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
 ///
-/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
+/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
 pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
 );
 
-/// The `crypt(3)` alphabet (uses `.` and `/` as the first two values).
+/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
 ///
 /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
 pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
@@ -186,7 +186,7 @@ pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
     "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
 );
 
-/// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`).
+/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
 ///
 /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
 pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(

diff --git a/src/lib.rs b/src/lib.rs
@@ -1,100 +1,124 @@
-//! # Getting started
+//! Correct, fast, and configurable [base64][] decoding and encoding. Base64
+//! transports binary data efficiently in contexts where only plain text is
+//! allowed.
 //!
-//! 1. Perhaps one of the preconfigured engines in [engine::general_purpose] will suit, e.g.
-//! [engine::general_purpose::STANDARD_NO_PAD].
-//!     - These are re-exported in [prelude] with a `BASE64_` prefix for those who prefer to
-//!       `use base64::prelude::*` or equivalent, e.g. [prelude::BASE64_STANDARD_NO_PAD]
-//! 1. If not, choose which alphabet you want. Most usage will want [alphabet::STANDARD] or [alphabet::URL_SAFE].
-//! 1. Choose which [Engine] implementation you want. For the moment there is only one: [engine::GeneralPurpose].
-//! 1. Configure the engine appropriately using the engine's `Config` type.
-//!     - This is where you'll select whether to add padding (when encoding) or expect it (when
-//!     decoding). If given the choice, prefer no padding.
-//! 1. Build the engine using the selected alphabet and config.
+//! [base64]: https://developer.mozilla.org/en-US/docs/Glossary/Base64
 //!
-//! For more detail, see below.
+//! # Usage
 //!
-//! ## Alphabets
+//! Use an [`Engine`] to decode or encode base64, configured with the base64
+//! alphabet and padding behavior best suited to your application.
 //!
-//! An [alphabet::Alphabet] defines what ASCII symbols are used to encode to or decode from.
+//! ## Engine setup
 //!
-//! Constants in [alphabet] like [alphabet::STANDARD] or [alphabet::URL_SAFE] provide commonly used
-//! alphabets, but you can also build your own custom [alphabet::Alphabet] if needed.
+//! There is more than one way to encode a stream of bytes as “base64”.
+//! Different applications use different encoding
+//! [alphabets][alphabet::Alphabet] and
+//! [padding behaviors][engine::general_purpose::GeneralPurposeConfig].
 //!
-//! ## Engines
+//! ### Encoding alphabet
 //!
-//! Once you have an `Alphabet`, you can pick which `Engine` you want. A few parts of the public
-//! API provide a default, but otherwise the user must provide an `Engine` to use.
+//! Almost all base64 [alphabets][alphabet::Alphabet] use `A-Z`, `a-z`, and
+//! `0-9`, which gives nearly 64 characters (26 + 26 + 10 = 62), but they differ
+//! in their choice of their final 2.
 //!
-//! See [Engine] for more.
+//! Most applications use the [standard][alphabet::STANDARD] alphabet specified
+//! in [RFC 4648][rfc-alphabet].  If that’s all you need, you can get started
+//! quickly by using the pre-configured
+//! [`STANDARD`][engine::general_purpose::STANDARD] engine, which is also available
+//! in the [`prelude`] module as shown here, if you prefer a minimal `use`
+//! footprint.
 //!
-//! ## Config
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::prelude::*;
 //!
-//! In addition to an `Alphabet`, constructing an `Engine` also requires an [engine::Config]. Each
-//! `Engine` has a corresponding `Config` implementation since different `Engine`s may offer different
-//! levels of configurability.
+//! # fn main() -> Result<(), base64::DecodeError> {
+//! assert_eq!(BASE64_STANDARD.decode(b"+uwgVQA=")?, b"\xFA\xEC\x20\x55\0");
+//! assert_eq!(BASE64_STANDARD.encode(b"\xFF\xEC\x20\x55\0"), "/+wgVQA=");
+//! # Ok(())
+//! # }
+//! ```
 //!
-//! # Encoding
+//! [rfc-alphabet]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
 //!
-//! Several different encoding methods on [Engine] are available to you depending on your desire for
-//! convenience vs performance.
+//! Other common alphabets are available in the [`alphabet`] module.
 //!
-//! | Method                   | Output                       | Allocates                      |
-//! | ------------------------ | ---------------------------- | ------------------------------ |
-//! | [Engine::encode]         | Returns a new `String`       | Always                         |
-//! | [Engine::encode_string]  | Appends to provided `String` | Only if `String` needs to grow |
-//! | [Engine::encode_slice]   | Writes to provided `&[u8]`   | Never - fastest                |
+//! #### URL-safe alphabet
 //!
-//! All of the encoding methods will pad as per the engine's config.
+//! The standard alphabet uses `+` and `/` as its two non-alphanumeric tokens,
+//! which cannot be safely used in URL’s without encoding them as `%2B` and
+//! `%2F`.
 //!
-//! # Decoding
+//! To avoid that, some applications use a [“URL-safe” alphabet][alphabet::URL_SAFE],
+//! which uses `-` and `_` instead. To use that alternative alphabet, use the
+//! [`URL_SAFE`][engine::general_purpose::URL_SAFE] engine. This example doesn't
+//! use [`prelude`] to show what a more explicit `use` would look like.
 //!
-//! Just as for encoding, there are different decoding methods available.
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::{engine::general_purpose::URL_SAFE, Engine as _};
 //!
-//! | Method                   | Output                        | Allocates                      |
-//! | ------------------------ | ----------------------------- | ------------------------------ |
-//! | [Engine::decode]         | Returns a new `Vec<u8>`       | Always                         |
-//! | [Engine::decode_vec]     | Appends to provided `Vec<u8>` | Only if `Vec` needs to grow    |
-//! | [Engine::decode_slice]   | Writes to provided `&[u8]`    | Never - fastest                |
+//! # fn main() -> Result<(), base64::DecodeError> {
+//! assert_eq!(URL_SAFE.decode(b"-uwgVQA=")?, b"\xFA\xEC\x20\x55\0");
+//! assert_eq!(URL_SAFE.encode(b"\xFF\xEC\x20\x55\0"), "_-wgVQA=");
+//! # Ok(())
+//! # }
+//! ```
 //!
-//! Unlike encoding, where all possible input is valid, decoding can fail (see [DecodeError]).
+//! ### Padding characters
 //!
-//! Input can be invalid because it has invalid characters or invalid padding. The nature of how
-//! padding is checked depends on the engine's config.
-//! Whitespace in the input is invalid, just like any other non-base64 byte.
+//! Each base64 character represents 6 bits (2⁶ = 64) of the original binary
+//! data, and every 3 bytes of input binary data will encode to 4 base64
+//! characters (8 bits × 3 = 6 bits × 4 = 24 bits).
 //!
-//! # `Read` and `Write`
+//! When the input is not an even multiple of 3 bytes in length, [canonical][]
+//! base64 encoders insert padding characters at the end, so that the output
+//! length is always a multiple of 4:
 //!
-//! To decode a [std::io::Read] of b64 bytes, wrap a reader (file, network socket, etc) with
-//! [read::DecoderReader].
+//! [canonical]: https://datatracker.ietf.org/doc/html/rfc4648#section-3.5
 //!
-//! To write raw bytes and have them b64 encoded on the fly, wrap a [std::io::Write] with
-//! [write::EncoderWriter].
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::{engine::general_purpose::STANDARD, Engine as _};
 //!
-//! There is some performance overhead (15% or so) because of the necessary buffer shuffling --
-//! still fast enough that almost nobody cares. Also, these implementations do not heap allocate.
+//! assert_eq!(STANDARD.encode(b""),    "");
+//! assert_eq!(STANDARD.encode(b"f"),   "Zg==");
+//! assert_eq!(STANDARD.encode(b"fo"),  "Zm8=");
+//! assert_eq!(STANDARD.encode(b"foo"), "Zm9v");
+//! ```
 //!
-//! # `Display`
+//! Canonical encoding ensures that base64 encodings will be exactly the same,
+//! byte-for-byte, regardless of input length. But the `=` padding characters
+//! aren’t necessary for decoding, and they may be omitted by using a
+//! [`NO_PAD`][engine::general_purpose::NO_PAD] configuration:
 //!
-//! See [display] for how to transparently base64-encode data via a `Display` implementation.
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::{engine::general_purpose::STANDARD_NO_PAD, Engine as _};
 //!
-//! # Examples
+//! assert_eq!(STANDARD_NO_PAD.encode(b""),    "");
+//! assert_eq!(STANDARD_NO_PAD.encode(b"f"),   "Zg");
+//! assert_eq!(STANDARD_NO_PAD.encode(b"fo"),  "Zm8");
+//! assert_eq!(STANDARD_NO_PAD.encode(b"foo"), "Zm9v");
+//! ```
 //!
-//! ## Using predefined engines
+//! The pre-configured `NO_PAD` engines will reject inputs containing padding
+//! `=` characters. To encode without padding and still accept padding while
+//! decoding, create an [engine][engine::general_purpose::GeneralPurpose] with
+//! that [padding mode][engine::DecodePaddingMode].
 //!
 #![cfg_attr(feature = "alloc", doc = "```")]
 #![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
-//! use base64::{Engine as _, engine::general_purpose};
-//!
-//! let orig = b"data";
-//! let encoded: String = general_purpose::STANDARD_NO_PAD.encode(orig);
-//! assert_eq!("ZGF0YQ", encoded);
-//! assert_eq!(orig.as_slice(), &general_purpose::STANDARD_NO_PAD.decode(encoded).unwrap());
-//!
-//! // or, URL-safe
-//! let encoded_url = general_purpose::URL_SAFE_NO_PAD.encode(orig);
+//! # use base64::{engine::general_purpose::STANDARD_NO_PAD, Engine as _};
+//! assert_eq!(STANDARD_NO_PAD.decode(b"Zm8="), Err(base64::DecodeError::InvalidPadding));
 //! ```
 //!
-//! ## Custom alphabet, config, and engine
+//! ### Further customization
+//!
+//! Decoding and encoding behavior can be customized by creating an
+//! [engine][engine::GeneralPurpose] with an [alphabet][alphabet::Alphabet] and
+//! [padding configuration][engine::GeneralPurposeConfig]:
 //!
 #![cfg_attr(feature = "alloc", doc = "```")]
 #![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
@@ -117,6 +141,81 @@
 //!
 //! ```
 //!
+//! ## Memory allocation
+//!
+//! The [decode][Engine::decode()] and [encode][Engine::encode()] engine methods
+//! allocate memory for their results – `decode` returns a `Vec<u8>` and
+//! `encode` returns a `String`. To instead decode or encode into a buffer that
+//! you allocated, use one of the alternative methods:
+//!
+//! #### Decoding
+//!
+//! | Method                     | Output                        | Allocates memory              |
+//! | -------------------------- | ----------------------------- | ----------------------------- |
+//! | [`Engine::decode`]         | returns a new `Vec<u8>`       | always                        |
+//! | [`Engine::decode_vec`]     | appends to provided `Vec<u8>` | if `Vec` lacks capacity       |
+//! | [`Engine::decode_slice`]   | writes to provided `&[u8]`    | never
+//!
+//! #### Encoding
+//!
+//! | Method                     | Output                       | Allocates memory               |
+//! | -------------------------- | ---------------------------- | ------------------------------ |
+//! | [`Engine::encode`]         | returns a new `String`       | always                         |
+//! | [`Engine::encode_string`]  | appends to provided `String` | if `String` lacks capacity     |
+//! | [`Engine::encode_slice`]   | writes to provided `&[u8]`   | never                          |
+//!
+//! ## Input and output
+//!
+//! The `base64` crate can [decode][Engine::decode()] and
+//! [encode][Engine::encode()] values in memory, or
+//! [`DecoderReader`][read::DecoderReader] and
+//! [`EncoderWriter`][write::EncoderWriter] provide streaming decoding and
+//! encoding for any [readable][std::io::Read] or [writable][std::io::Write]
+//! byte stream.
+//!
+//! #### Decoding
+//!
+#![cfg_attr(feature = "std", doc = "```")]
+#![cfg_attr(not(feature = "std"), doc = "```ignore")]
+//! # use std::io;
+//! use base64::{engine::general_purpose::STANDARD, read::DecoderReader};
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! let mut input = io::stdin();
+//! let mut decoder = DecoderReader::new(&mut input, &STANDARD);
+//! io::copy(&mut decoder, &mut io::stdout())?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! #### Encoding
+//!
+#![cfg_attr(feature = "std", doc = "```")]
+#![cfg_attr(not(feature = "std"), doc = "```ignore")]
+//! # use std::io;
+//! use base64::{engine::general_purpose::STANDARD, write::EncoderWriter};
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! let mut output = io::stdout();
+//! let mut encoder = EncoderWriter::new(&mut output, &STANDARD);
+//! io::copy(&mut io::stdin(), &mut encoder)?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! #### Display
+//!
+//! If you only need a base64 representation for implementing the
+//! [`Display`][std::fmt::Display] trait, use
+//! [`Base64Display`][display::Base64Display]:
+//!
+//! ```
+//! use base64::{display::Base64Display, engine::general_purpose::STANDARD};
+//!
+//! let value = Base64Display::new(b"\0\x01\x02\x03", &STANDARD);
+//! assert_eq!("base64: AAECAw==", format!("base64: {}", value));
+//! ```
+//!
 //! # Panics
 //!
 //! If length calculations result in overflowing `usize`, a panic will result.