Skip to content

Commit

Permalink
Merge pull request #4803 from epage/osstr
Browse files Browse the repository at this point in the history
fix(lex): Clarify unsafe safety
  • Loading branch information
epage committed Mar 28, 2023
2 parents 5b101eb + 73e4025 commit 2c19acc
Showing 1 changed file with 30 additions and 31 deletions.
61 changes: 30 additions & 31 deletions clap_lex/src/ext.rs
Expand Up @@ -213,8 +213,7 @@ pub trait OsStrExt: private::Sealed {

impl OsStrExt for OsStr {
fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
let bytes = to_bytes(self);
std::str::from_utf8(bytes)
}

Expand All @@ -223,23 +222,22 @@ impl OsStrExt for OsStr {
}

fn find(&self, needle: &str) -> Option<usize> {
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
let bytes = to_bytes(self);
(0..=self.len().checked_sub(needle.len())?)
.find(|&x| bytes[x..].starts_with(needle.as_bytes()))
}

fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
let bytes = to_bytes(self);
bytes.strip_prefix(prefix.as_bytes()).map(|s| {
// SAFETY: Only interacting with `OsStr` as `&str
unsafe { to_os_str(s) }
// SAFETY:
// - This came from `to_bytes`
// - Since `prefix` is `&str`, any split will be along UTF-8 boundarie
unsafe { to_os_str_unchecked(s) }
})
}
fn starts_with(&self, prefix: &str) -> bool {
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
let bytes = to_bytes(self);
bytes.starts_with(prefix.as_bytes())
}

Expand All @@ -252,24 +250,24 @@ impl OsStrExt for OsStr {
}

fn split_at(&self, index: usize) -> (&OsStr, &OsStr) {
// BUG: This is unsafe and has been deprecated
let bytes = to_bytes(self);
unsafe {
let bytes = to_bytes(self);
// BUG: This is unsafe and has been deprecated
let (first, second) = bytes.split_at(index);
(to_os_str(first), to_os_str(second))
(to_os_str_unchecked(first), to_os_str_unchecked(second))
}
}

fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
let start = self.find(needle)?;
let end = start + needle.len();
// SAFETY: Only interacting with `OsStr` as `&str
unsafe {
let haystack = to_bytes(self);
let first = &haystack[0..start];
let second = &haystack[end..];
Some((to_os_str(first), to_os_str(second)))
}
let haystack = to_bytes(self);
let first = &haystack[0..start];
let second = &haystack[end..];
// SAFETY:
// - This came from `to_bytes`
// - Since `needle` is `&str`, any split will be along UTF-8 boundarie
unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) }
}
}

Expand All @@ -281,12 +279,14 @@ mod private {

/// Allow access to raw bytes
///
/// # Safety
/// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with
/// 7-bit ASCII or `&str`
///
/// - The bytes only make sense when compared with ASCII or `&str`
/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
/// encoded, even within the same version of this crate (since its dependent on rustc version)
unsafe fn to_bytes(s: &OsStr) -> &[u8] {
/// # Compatibility
///
/// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate
/// (since its dependent on rustc)
fn to_bytes(s: &OsStr) -> &[u8] {
// SAFETY:
// - Lifetimes are the same
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
Expand All @@ -295,17 +295,16 @@ unsafe fn to_bytes(s: &OsStr) -> &[u8] {
//
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
// but its in limbo
std::mem::transmute(s)
unsafe { std::mem::transmute(s) }
}

/// Restore raw bytes as `OsStr`
///
/// # Safety
///
/// - The bytes only make sense when compared with ASCII or `&str`
/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
/// encoded, even within the same version of this crate (since its dependent on rustc version)
unsafe fn to_os_str(s: &[u8]) -> &OsStr {
/// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary
/// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries
unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr {
// SAFETY:
// - Lifetimes are the same
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
Expand Down Expand Up @@ -351,5 +350,5 @@ impl<'s, 'n> Iterator for Split<'s, 'n> {
pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
let bytes = to_bytes(os);
let (first, second) = bytes.split_at(index);
(to_os_str(first), to_os_str(second))
(to_os_str_unchecked(first), to_os_str_unchecked(second))
}

0 comments on commit 2c19acc

Please sign in to comment.