Skip to content

Commit

Permalink
Rollup merge of rust-lang#108801 - fee1-dead-contrib:c-str, r=compile…
Browse files Browse the repository at this point in the history
…r-errors

Implement RFC 3348, `c"foo"` literals

RFC: rust-lang/rfcs#3348
Tracking issue: rust-lang#105723
  • Loading branch information
Dylan-DPC committed May 5, 2023
2 parents 3502e48 + d30c668 commit 4891f02
Show file tree
Hide file tree
Showing 33 changed files with 500 additions and 153 deletions.
3 changes: 3 additions & 0 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1821,6 +1821,8 @@ pub enum LitKind {
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
/// non-utf8, and symbols only allow utf8 strings.
ByteStr(Lrc<[u8]>, StrStyle),
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end.
CStr(Lrc<[u8]>, StrStyle),
/// A byte char (`b'f'`).
Byte(u8),
/// A character literal (`'a'`).
Expand Down Expand Up @@ -1875,6 +1877,7 @@ impl LitKind {
// unsuffixed variants
LitKind::Str(..)
| LitKind::ByteStr(..)
| LitKind::CStr(..)
| LitKind::Byte(..)
| LitKind::Char(..)
| LitKind::Int(_, LitIntType::Unsuffixed)
Expand Down
7 changes: 7 additions & 0 deletions compiler/rustc_ast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ pub enum LitKind {
StrRaw(u8), // raw string delimited by `n` hash symbols
ByteStr,
ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
CStr,
CStrRaw(u8),
Err,
}

Expand Down Expand Up @@ -141,6 +143,10 @@ impl fmt::Display for Lit {
delim = "#".repeat(n as usize),
string = symbol
)?,
CStr => write!(f, "c\"{symbol}\"")?,
CStrRaw(n) => {
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
}
Integer | Float | Bool | Err => write!(f, "{symbol}")?,
}

Expand Down Expand Up @@ -170,6 +176,7 @@ impl LitKind {
Float => "float",
Str | StrRaw(..) => "string",
ByteStr | ByteStrRaw(..) => "byte string",
CStr | CStrRaw(..) => "C string",
Err => "error",
}
}
Expand Down
63 changes: 62 additions & 1 deletion compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@

use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
use crate::token::{self, Token};
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
use rustc_lexer::unescape::{
byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
Mode,
};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;
use std::ops::Range;
use std::{ascii, fmt, str};

// Escapes a string, represented as a symbol. Reuses the original symbol,
Expand Down Expand Up @@ -35,6 +39,7 @@ pub enum LitError {
InvalidFloatSuffix,
NonDecimalFloat(u32),
IntTooLarge(u32),
NulInCStr(Range<usize>),
}

impl LitKind {
Expand Down Expand Up @@ -158,6 +163,52 @@ impl LitKind {

LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
}
token::CStr => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
error = Err(LitError::NulInCStr(span));
}
Ok(CStrUnit::Byte(b)) => buf.push(b),
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
Ok(CStrUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
}
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
error?;
buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Cooked)
}
token::CStrRaw(n) => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
error = Err(LitError::NulInCStr(span));
}
Ok(CStrUnit::Byte(b)) => buf.push(b),
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
Ok(CStrUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
}
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
error?;
buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Raw(n))
}
token::Err => LitKind::Err,
})
}
Expand Down Expand Up @@ -191,6 +242,14 @@ impl fmt::Display for LitKind {
string = symbol
)?;
}
LitKind::CStr(ref bytes, StrStyle::Cooked) => {
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
}
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
// This can only be valid UTF-8.
let symbol = str::from_utf8(bytes).unwrap();
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
}
LitKind::Int(n, ty) => {
write!(f, "{n}")?;
match ty {
Expand Down Expand Up @@ -237,6 +296,8 @@ impl MetaItemLit {
LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
LitKind::Byte(_) => token::Byte,
LitKind::Char(_) => token::Char,
LitKind::Int(..) => token::Integer,
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_ast_passes/src/feature_gate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session) {
}
};
}
gate_all!(c_str_literals, "`c\"..\"` literals are experimental");
gate_all!(
if_let_guard,
"`if let` guards are experimental",
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_ast_pretty/src/pprust/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ pub fn literal_to_string(lit: token::Lit) -> String {
token::ByteStrRaw(n) => {
format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol)
}
token::CStr => format!("c\"{symbol}\""),
token::CStrRaw(n) => {
format!("cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))
}
token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(),
};

Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_builtin_macros/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ pub fn expand_concat(
Ok(ast::LitKind::Bool(b)) => {
accumulator.push_str(&b.to_string());
}
Ok(ast::LitKind::CStr(..)) => {
cx.span_err(e.span, "cannot concatenate a C string literal");
has_errors = true;
}
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
cx.emit_err(errors::ConcatBytestr { span: e.span });
has_errors = true;
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_builtin_macros/src/concat_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ fn invalid_type_err(
};
let snippet = cx.sess.source_map().span_to_snippet(span).ok();
match ast::LitKind::from_token_lit(token_lit) {
Ok(ast::LitKind::CStr(_, _)) => {
// FIXME(c_str_literals): should concatenation of C string literals
// include the null bytes in the end?
cx.span_err(span, "cannot concatenate C string literals");
}
Ok(ast::LitKind::Char(_)) => {
let sugg =
snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet });
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_expand/src/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ impl FromInternal<token::LitKind> for LitKind {
token::StrRaw(n) => LitKind::StrRaw(n),
token::ByteStr => LitKind::ByteStr,
token::ByteStrRaw(n) => LitKind::ByteStrRaw(n),
token::CStr => LitKind::CStr,
token::CStrRaw(n) => LitKind::CStrRaw(n),
token::Err => LitKind::Err,
token::Bool => unreachable!(),
}
Expand All @@ -78,6 +80,8 @@ impl ToInternal<token::LitKind> for LitKind {
LitKind::StrRaw(n) => token::StrRaw(n),
LitKind::ByteStr => token::ByteStr,
LitKind::ByteStrRaw(n) => token::ByteStrRaw(n),
LitKind::CStr => token::CStr,
LitKind::CStrRaw(n) => token::CStrRaw(n),
LitKind::Err => token::Err,
}
}
Expand Down Expand Up @@ -436,6 +440,8 @@ impl server::FreeFunctions for Rustc<'_, '_> {
| token::LitKind::StrRaw(_)
| token::LitKind::ByteStr
| token::LitKind::ByteStrRaw(_)
| token::LitKind::CStr
| token::LitKind::CStrRaw(_)
| token::LitKind::Err => return Err(()),
token::LitKind::Integer | token::LitKind::Float => {}
}
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_feature/src/active.rs
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,8 @@ declare_features! (
(active, async_closure, "1.37.0", Some(62290), None),
/// Allows async functions to be declared, implemented, and used in traits.
(active, async_fn_in_trait, "1.66.0", Some(91611), None),
/// Allows `c"foo"` literals.
(active, c_str_literals, "CURRENT_RUSTC_VERSION", Some(105723), None),
/// Treat `extern "C"` function as nounwind.
(active, c_unwind, "1.52.0", Some(74990), None),
/// Allows using C-variadics.
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_hir/src/lang_items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ language_item_table! {
RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None;

String, sym::String, string, Target::Struct, GenericRequirement::None;
CStr, sym::CStr, c_str, Target::Struct, GenericRequirement::None;
}

pub enum GenericRequirement {
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
opt_ty.unwrap_or_else(|| self.next_float_var())
}
ast::LitKind::Bool(_) => tcx.types.bool,
ast::LitKind::CStr(_, _) => tcx.mk_imm_ref(
tcx.lifetimes.re_static,
tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span)))
.skip_binder(),
),
ast::LitKind::Err => tcx.ty_error_misc(),
}
}
Expand Down
90 changes: 57 additions & 33 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,16 @@ pub enum LiteralKind {
Str { terminated: bool },
/// "b"abc"", "b"abc"
ByteStr { terminated: bool },
/// `c"abc"`, `c"abc`
CStr { terminated: bool },
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
/// an invalid literal.
RawStr { n_hashes: Option<u8> },
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
/// indicates an invalid literal.
RawByteStr { n_hashes: Option<u8> },
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
RawCStr { n_hashes: Option<u8> },
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
Expand Down Expand Up @@ -357,39 +361,18 @@ impl Cursor<'_> {
},

// Byte literal, byte string literal, raw byte string literal or identifier.
'b' => match (self.first(), self.second()) {
('\'', _) => {
self.bump();
let terminated = self.single_quoted_string();
let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
let kind = Byte { terminated };
Literal { kind, suffix_start }
}
('"', _) => {
self.bump();
let terminated = self.double_quoted_string();
let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
let kind = ByteStr { terminated };
Literal { kind, suffix_start }
}
('r', '"') | ('r', '#') => {
self.bump();
let res = self.raw_double_quoted_string(2);
let suffix_start = self.pos_within_token();
if res.is_ok() {
self.eat_literal_suffix();
}
let kind = RawByteStr { n_hashes: res.ok() };
Literal { kind, suffix_start }
}
_ => self.ident_or_unknown_prefix(),
},
'b' => self.c_or_byte_string(
|terminated| ByteStr { terminated },
|n_hashes| RawByteStr { n_hashes },
Some(|terminated| Byte { terminated }),
),

// c-string literal, raw c-string literal or identifier.
'c' => self.c_or_byte_string(
|terminated| CStr { terminated },
|n_hashes| RawCStr { n_hashes },
None,
),

// Identifier (this should be checked after other variant that can
// start as identifier).
Expand Down Expand Up @@ -553,6 +536,47 @@ impl Cursor<'_> {
}
}

fn c_or_byte_string(
&mut self,
mk_kind: impl FnOnce(bool) -> LiteralKind,
mk_kind_raw: impl FnOnce(Option<u8>) -> LiteralKind,
single_quoted: Option<fn(bool) -> LiteralKind>,
) -> TokenKind {
match (self.first(), self.second(), single_quoted) {
('\'', _, Some(mk_kind)) => {
self.bump();
let terminated = self.single_quoted_string();
let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
let kind = mk_kind(terminated);
Literal { kind, suffix_start }
}
('"', _, _) => {
self.bump();
let terminated = self.double_quoted_string();
let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
let kind = mk_kind(terminated);
Literal { kind, suffix_start }
}
('r', '"', _) | ('r', '#', _) => {
self.bump();
let res = self.raw_double_quoted_string(2);
let suffix_start = self.pos_within_token();
if res.is_ok() {
self.eat_literal_suffix();
}
let kind = mk_kind_raw(res.ok());
Literal { kind, suffix_start }
}
_ => self.ident_or_unknown_prefix(),
}
}

fn number(&mut self, first_digit: char) -> LiteralKind {
debug_assert!('0' <= self.prev() && self.prev() <= '9');
let mut base = Base::Decimal;
Expand Down

0 comments on commit 4891f02

Please sign in to comment.