Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(lex): Clarify unsafe safety #4803

Merged
merged 1 commit into from Mar 28, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
61 changes: 30 additions & 31 deletions clap_lex/src/ext.rs
Expand Up @@ -213,8 +213,7 @@ pub trait OsStrExt: private::Sealed {

impl OsStrExt for OsStr {
fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
let bytes = to_bytes(self);
std::str::from_utf8(bytes)
}

Expand All @@ -223,23 +222,22 @@ impl OsStrExt for OsStr {
}

fn find(&self, needle: &str) -> Option<usize> {
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
let bytes = to_bytes(self);
(0..=self.len().checked_sub(needle.len())?)
.find(|&x| bytes[x..].starts_with(needle.as_bytes()))
}

fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
let bytes = to_bytes(self);
bytes.strip_prefix(prefix.as_bytes()).map(|s| {
// SAFETY: Only interacting with `OsStr` as `&str
unsafe { to_os_str(s) }
// SAFETY:
// - This came from `to_bytes`
// - Since `prefix` is `&str`, any split will be along UTF-8 boundarie
unsafe { to_os_str_unchecked(s) }
})
}
fn starts_with(&self, prefix: &str) -> bool {
// SAFETY: Only interacting with `OsStr` as `&str
let bytes = unsafe { to_bytes(self) };
let bytes = to_bytes(self);
bytes.starts_with(prefix.as_bytes())
}

Expand All @@ -252,24 +250,24 @@ impl OsStrExt for OsStr {
}

fn split_at(&self, index: usize) -> (&OsStr, &OsStr) {
// BUG: This is unsafe and has been deprecated
let bytes = to_bytes(self);
unsafe {
let bytes = to_bytes(self);
// BUG: This is unsafe and has been deprecated
let (first, second) = bytes.split_at(index);
(to_os_str(first), to_os_str(second))
(to_os_str_unchecked(first), to_os_str_unchecked(second))
}
}

fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
let start = self.find(needle)?;
let end = start + needle.len();
// SAFETY: Only interacting with `OsStr` as `&str
unsafe {
let haystack = to_bytes(self);
let first = &haystack[0..start];
let second = &haystack[end..];
Some((to_os_str(first), to_os_str(second)))
}
let haystack = to_bytes(self);
let first = &haystack[0..start];
let second = &haystack[end..];
// SAFETY:
// - This came from `to_bytes`
// - Since `needle` is `&str`, any split will be along UTF-8 boundarie
unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) }
}
}

Expand All @@ -281,12 +279,14 @@ mod private {

/// Allow access to raw bytes
///
/// # Safety
/// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with
/// 7-bit ASCII or `&str`
///
/// - The bytes only make sense when compared with ASCII or `&str`
/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
/// encoded, even within the same version of this crate (since its dependent on rustc version)
unsafe fn to_bytes(s: &OsStr) -> &[u8] {
/// # Compatibility
///
/// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate
/// (since its dependent on rustc)
fn to_bytes(s: &OsStr) -> &[u8] {
// SAFETY:
// - Lifetimes are the same
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
Expand All @@ -295,17 +295,16 @@ unsafe fn to_bytes(s: &OsStr) -> &[u8] {
//
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
// but its in limbo
std::mem::transmute(s)
unsafe { std::mem::transmute(s) }
}

/// Restore raw bytes as `OsStr`
///
/// # Safety
///
/// - The bytes only make sense when compared with ASCII or `&str`
/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be
/// encoded, even within the same version of this crate (since its dependent on rustc version)
unsafe fn to_os_str(s: &[u8]) -> &OsStr {
/// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary
/// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries
unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr {
// SAFETY:
// - Lifetimes are the same
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
Expand Down Expand Up @@ -351,5 +350,5 @@ impl<'s, 'n> Iterator for Split<'s, 'n> {
pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
let bytes = to_bytes(os);
let (first, second) = bytes.split_at(index);
(to_os_str(first), to_os_str(second))
(to_os_str_unchecked(first), to_os_str_unchecked(second))
}