Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

automata: remove some unsafe code #1189

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions regex-automata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ aho-corasick = { version = "1.0.0", optional = true, default-features = false }
log = { version = "0.4.14", optional = true }
memchr = { version = "2.6.0", optional = true, default-features = false }
regex-syntax = { path = "../regex-syntax", version = "0.8.2", optional = true, default-features = false }
zerocopy = { version = "0.7.32", default-features = false, features = ["derive"] }

[dev-dependencies]
anyhow = "1.0.69"
Expand Down
24 changes: 6 additions & 18 deletions regex-automata/src/dfa/accel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
#[cfg(feature = "dfa-build")]
use alloc::{vec, vec::Vec};

use zerocopy::{AsBytes, FromBytes};

use crate::util::{
int::Pointer,
memchr,
Expand Down Expand Up @@ -208,15 +210,9 @@ impl<'a> Accels<&'a [AccelTy]> {
wire::check_alignment::<AccelTy>(slice)?;
let accel_tys = &slice[..accel_tys_bytes_len];
slice = &slice[accel_tys_bytes_len..];
// SAFETY: We've checked the length and alignment above, and since
// slice is just bytes and AccelTy is just a u32, we can safely cast to
// a slice of &[AccelTy].
let accels = unsafe {
core::slice::from_raw_parts(
accel_tys.as_ptr().cast::<AccelTy>(),
accel_tys_len,
)
};
// PANICS: We've just checked the length and alignment, so this is
// guaranteed to succeed.
let accels = <AccelTy>::slice_from(accel_tys).unwrap();
Ok((Accels { accels }, slice.as_ptr().as_usize() - slice_start))
}
}
Expand All @@ -235,15 +231,7 @@ impl<A: AsRef<[AccelTy]>> Accels<A> {

/// Return the bytes representing the serialization of the accelerators.
pub fn as_bytes(&self) -> &[u8] {
let accels = self.accels.as_ref();
// SAFETY: This is safe because accels is a just a slice of AccelTy,
// and u8 always has a smaller alignment.
unsafe {
core::slice::from_raw_parts(
accels.as_ptr().cast::<u8>(),
accels.len() * ACCEL_TY_SIZE,
)
}
self.accels.as_ref().as_bytes()
}

/// Returns the memory usage, in bytes, of these accelerators.
Expand Down
29 changes: 27 additions & 2 deletions regex-automata/src/util/primitives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ use core::num::NonZeroUsize;
#[cfg(feature = "alloc")]
use alloc::vec::Vec;

use zerocopy::{AsBytes, FromBytes, FromZeroes};

use crate::util::int::{Usize, U16, U32, U64};

/// A `usize` that can never be `usize::MAX`.
Expand Down Expand Up @@ -138,7 +140,18 @@ impl core::fmt::Debug for NonMaxUsize {
/// an invalid value can be done in entirely safe code. This may in turn result
/// in panics or silent logical errors.
#[derive(
Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
Clone,
Copy,
Debug,
Default,
Eq,
Hash,
PartialEq,
PartialOrd,
Ord,
FromZeroes,
FromBytes,
AsBytes,
)]
#[repr(transparent)]
pub struct SmallIndex(u32);
Expand Down Expand Up @@ -746,7 +759,19 @@ pub struct PatternID(SmallIndex);
///
/// See the [`SmallIndex`] type for more information about what it means for
/// a state ID to be a "small index."
#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
#[derive(
Clone,
Copy,
Default,
Eq,
Hash,
PartialEq,
PartialOrd,
Ord,
FromZeroes,
FromBytes,
AsBytes,
)]
#[repr(transparent)]
pub struct StateID(SmallIndex);

Expand Down
29 changes: 7 additions & 22 deletions regex-automata/src/util/wire.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ use core::{cmp, mem::size_of};

#[cfg(feature = "alloc")]
use alloc::{vec, vec::Vec};
use zerocopy::{AsBytes, FromBytes};

use crate::util::{
int::Pointer,
Expand Down Expand Up @@ -268,32 +269,16 @@ impl core::fmt::Display for DeserializeError {
/// Safely converts a `&[u32]` to `&[StateID]` with zero cost.
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn u32s_to_state_ids(slice: &[u32]) -> &[StateID] {
// SAFETY: This is safe because StateID is defined to have the same memory
// representation as a u32 (it is repr(transparent)). While not every u32
// is a "valid" StateID, callers are not permitted to rely on the validity
// of StateIDs for memory safety. It can only lead to logical errors. (This
// is why StateID::new_unchecked is safe.)
unsafe {
core::slice::from_raw_parts(
slice.as_ptr().cast::<StateID>(),
slice.len(),
)
}
// PANICS: This is guaranteed to succeed since `u32` and `StateID` have the
// same size and alignment.
StateID::slice_from(slice.as_bytes()).unwrap()
}

/// Safely converts a `&mut [u32]` to `&mut [StateID]` with zero cost.
pub(crate) fn u32s_to_state_ids_mut(slice: &mut [u32]) -> &mut [StateID] {
// SAFETY: This is safe because StateID is defined to have the same memory
// representation as a u32 (it is repr(transparent)). While not every u32
// is a "valid" StateID, callers are not permitted to rely on the validity
// of StateIDs for memory safety. It can only lead to logical errors. (This
// is why StateID::new_unchecked is safe.)
unsafe {
core::slice::from_raw_parts_mut(
slice.as_mut_ptr().cast::<StateID>(),
slice.len(),
)
}
// PANICS: This is guaranteed to succeed since `u32` and `StateID` have the
// same size and alignment.
StateID::mut_slice_from(slice.as_bytes_mut()).unwrap()
}

/// Safely converts a `&[u32]` to `&[PatternID]` with zero cost.
Expand Down