Skip to content

Commit

Permalink
automata: remove some unsafe code
Browse files Browse the repository at this point in the history
  • Loading branch information
joshlf committed May 4, 2024
1 parent b12a276 commit 2a9b404
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 42 deletions.
1 change: 1 addition & 0 deletions regex-automata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ aho-corasick = { version = "1.0.0", optional = true, default-features = false }
log = { version = "0.4.14", optional = true }
memchr = { version = "2.6.0", optional = true, default-features = false }
regex-syntax = { path = "../regex-syntax", version = "0.8.2", optional = true, default-features = false }
zerocopy = { version = "0.7.32", default-features = false, features = ["derive"] }

[dev-dependencies]
anyhow = "1.0.69"
Expand Down
24 changes: 6 additions & 18 deletions regex-automata/src/dfa/accel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
#[cfg(feature = "dfa-build")]
use alloc::{vec, vec::Vec};

use zerocopy::{AsBytes, FromBytes};

use crate::util::{
int::Pointer,
memchr,
Expand Down Expand Up @@ -208,15 +210,9 @@ impl<'a> Accels<&'a [AccelTy]> {
wire::check_alignment::<AccelTy>(slice)?;
let accel_tys = &slice[..accel_tys_bytes_len];
slice = &slice[accel_tys_bytes_len..];
// SAFETY: We've checked the length and alignment above, and since
// slice is just bytes and AccelTy is just a u32, we can safely cast to
// a slice of &[AccelTy].
let accels = unsafe {
core::slice::from_raw_parts(
accel_tys.as_ptr().cast::<AccelTy>(),
accel_tys_len,
)
};
// PANICS: We've just checked the length and alignment, so this is
// guaranteed to succeed.
let accels = <AccelTy>::slice_from(accel_tys).unwrap();
Ok((Accels { accels }, slice.as_ptr().as_usize() - slice_start))
}
}
Expand All @@ -235,15 +231,7 @@ impl<A: AsRef<[AccelTy]>> Accels<A> {

/// Return the bytes representing the serialization of the accelerators.
pub fn as_bytes(&self) -> &[u8] {
let accels = self.accels.as_ref();
// SAFETY: This is safe because accels is a just a slice of AccelTy,
// and u8 always has a smaller alignment.
unsafe {
core::slice::from_raw_parts(
accels.as_ptr().cast::<u8>(),
accels.len() * ACCEL_TY_SIZE,
)
}
self.accels.as_ref().as_bytes()
}

/// Returns the memory usage, in bytes, of these accelerators.
Expand Down
29 changes: 27 additions & 2 deletions regex-automata/src/util/primitives.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ use core::num::NonZeroUsize;
#[cfg(feature = "alloc")]
use alloc::vec::Vec;

use zerocopy::{FromBytes,AsBytes, FromZeroes};

use crate::util::int::{Usize, U16, U32, U64};

/// A `usize` that can never be `usize::MAX`.
Expand Down Expand Up @@ -138,7 +140,18 @@ impl core::fmt::Debug for NonMaxUsize {
/// an invalid value can be done in entirely safe code. This may in turn result
/// in panics or silent logical errors.
#[derive(
Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord,
Clone,
Copy,
Debug,
Default,
Eq,
Hash,
PartialEq,
PartialOrd,
Ord,
FromZeroes,
FromBytes,
AsBytes,
)]
#[repr(transparent)]
pub struct SmallIndex(u32);
Expand Down Expand Up @@ -746,7 +759,19 @@ pub struct PatternID(SmallIndex);
///
/// See the [`SmallIndex`] type for more information about what it means for
/// a state ID to be a "small index."
#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)]
#[derive(
Clone,
Copy,
Default,
Eq,
Hash,
PartialEq,
PartialOrd,
Ord,
FromZeroes,
FromBytes,
AsBytes,
)]
#[repr(transparent)]
pub struct StateID(SmallIndex);

Expand Down
29 changes: 7 additions & 22 deletions regex-automata/src/util/wire.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ use core::{cmp, mem::size_of};

#[cfg(feature = "alloc")]
use alloc::{vec, vec::Vec};
use zerocopy::{FromBytes, AsBytes};

use crate::util::{
int::Pointer,
Expand Down Expand Up @@ -268,32 +269,16 @@ impl core::fmt::Display for DeserializeError {
/// Safely converts a `&[u32]` to `&[StateID]` with zero cost.
#[cfg_attr(feature = "perf-inline", inline(always))]
pub(crate) fn u32s_to_state_ids(slice: &[u32]) -> &[StateID] {
// SAFETY: This is safe because StateID is defined to have the same memory
// representation as a u32 (it is repr(transparent)). While not every u32
// is a "valid" StateID, callers are not permitted to rely on the validity
// of StateIDs for memory safety. It can only lead to logical errors. (This
// is why StateID::new_unchecked is safe.)
unsafe {
core::slice::from_raw_parts(
slice.as_ptr().cast::<StateID>(),
slice.len(),
)
}
// PANICS: This is guaranteed to succeed since `u32` and `StateID` have the
// same size and alignment.
StateID::slice_from(slice.as_bytes()).unwrap()
}

/// Safely converts a `&mut [u32]` to `&mut [StateID]` with zero cost.
pub(crate) fn u32s_to_state_ids_mut(slice: &mut [u32]) -> &mut [StateID] {
// SAFETY: This is safe because StateID is defined to have the same memory
// representation as a u32 (it is repr(transparent)). While not every u32
// is a "valid" StateID, callers are not permitted to rely on the validity
// of StateIDs for memory safety. It can only lead to logical errors. (This
// is why StateID::new_unchecked is safe.)
unsafe {
core::slice::from_raw_parts_mut(
slice.as_mut_ptr().cast::<StateID>(),
slice.len(),
)
}
// PANICS: This is guaranteed to succeed since `u32` and `StateID` have the
// same size and alignment.
StateID::mut_slice_from(slice.as_bytes_mut()).unwrap()
}

/// Safely converts a `&[u32]` to `&[PatternID]` with zero cost.
Expand Down

0 comments on commit 2a9b404

Please sign in to comment.