diff --git a/clap_lex/src/ext.rs b/clap_lex/src/ext.rs index 3186580424b..0e6b33ebb40 100644 --- a/clap_lex/src/ext.rs +++ b/clap_lex/src/ext.rs @@ -213,8 +213,7 @@ pub trait OsStrExt: private::Sealed { impl OsStrExt for OsStr { fn try_str(&self) -> Result<&str, std::str::Utf8Error> { - // SAFETY: Only interacting with `OsStr` as `&str - let bytes = unsafe { to_bytes(self) }; + let bytes = to_bytes(self); std::str::from_utf8(bytes) } @@ -223,23 +222,22 @@ impl OsStrExt for OsStr { } fn find(&self, needle: &str) -> Option { - // SAFETY: Only interacting with `OsStr` as `&str - let bytes = unsafe { to_bytes(self) }; + let bytes = to_bytes(self); (0..=self.len().checked_sub(needle.len())?) .find(|&x| bytes[x..].starts_with(needle.as_bytes())) } fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> { - // SAFETY: Only interacting with `OsStr` as `&str - let bytes = unsafe { to_bytes(self) }; + let bytes = to_bytes(self); bytes.strip_prefix(prefix.as_bytes()).map(|s| { - // SAFETY: Only interacting with `OsStr` as `&str - unsafe { to_os_str(s) } + // SAFETY: + // - This came from `to_bytes` + // - Since `prefix` is `&str`, any split will be along UTF-8 boundarie + unsafe { to_os_str_unchecked(s) } }) } fn starts_with(&self, prefix: &str) -> bool { - // SAFETY: Only interacting with `OsStr` as `&str - let bytes = unsafe { to_bytes(self) }; + let bytes = to_bytes(self); bytes.starts_with(prefix.as_bytes()) } @@ -252,24 +250,24 @@ impl OsStrExt for OsStr { } fn split_at(&self, index: usize) -> (&OsStr, &OsStr) { - // BUG: This is unsafe and has been deprecated + let bytes = to_bytes(self); unsafe { - let bytes = to_bytes(self); + // BUG: This is unsafe and has been deprecated let (first, second) = bytes.split_at(index); - (to_os_str(first), to_os_str(second)) + (to_os_str_unchecked(first), to_os_str_unchecked(second)) } } fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> { let start = self.find(needle)?; let end = start + needle.len(); - // SAFETY: Only interacting with `OsStr` as `&str - unsafe { - let haystack = to_bytes(self); - let first = &haystack[0..start]; - let second = &haystack[end..]; - Some((to_os_str(first), to_os_str(second))) - } + let haystack = to_bytes(self); + let first = &haystack[0..start]; + let second = &haystack[end..]; + // SAFETY: + // - This came from `to_bytes` + // - Since `needle` is `&str`, any split will be along UTF-8 boundarie + unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) } } } @@ -281,12 +279,14 @@ mod private { /// Allow access to raw bytes /// -/// # Safety +/// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with +/// 7-bit ASCII or `&str` /// -/// - The bytes only make sense when compared with ASCII or `&str` -/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be -/// encoded, even within the same version of this crate (since its dependent on rustc version) -unsafe fn to_bytes(s: &OsStr) -> &[u8] { +/// # Compatibility +/// +/// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate +/// (since its dependent on rustc) +fn to_bytes(s: &OsStr) -> &[u8] { // SAFETY: // - Lifetimes are the same // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`) @@ -295,17 +295,16 @@ unsafe fn to_bytes(s: &OsStr) -> &[u8] { // // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290) // but its in limbo - std::mem::transmute(s) + unsafe { std::mem::transmute(s) } } /// Restore raw bytes as `OsStr` /// /// # Safety /// -/// - The bytes only make sense when compared with ASCII or `&str` -/// - This must never be serialized as there is no guarantee at how invalid UTF-8 will be -/// encoded, even within the same version of this crate (since its dependent on rustc version) -unsafe fn to_os_str(s: &[u8]) -> &OsStr { +/// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary +/// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries +unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr { // SAFETY: // - Lifetimes are the same // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`) @@ -351,5 +350,5 @@ impl<'s, 'n> Iterator for Split<'s, 'n> { pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) { let bytes = to_bytes(os); let (first, second) = bytes.split_at(index); - (to_os_str(first), to_os_str(second)) + (to_os_str_unchecked(first), to_os_str_unchecked(second)) }