Skip to content

Commit f230ff2

Browse files
authoredJan 17, 2025··
perf(es/codegen): Remove needless allocations (#9890)
**Description:** `get_quoted_utf16` allocates too much
1 parent ef29ef6 commit f230ff2

File tree

2 files changed

+70
-77
lines changed

2 files changed

+70
-77
lines changed
 

‎.changeset/mean-boxes-design.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
swc_core: minor
3+
swc_ecma_codegen: minor
4+
---
5+
6+
perf(es/codegen): Remove needless allocations

‎crates/swc_ecma_codegen/src/lib.rs

+64-77
Original file line numberDiff line numberDiff line change
@@ -4126,12 +4126,35 @@ fn get_ascii_only_ident(sym: &str, may_need_quote: bool, target: EsVersion) -> C
41264126
}
41274127

41284128
fn get_quoted_utf16(v: &str, ascii_only: bool, target: EsVersion) -> String {
4129-
let mut buf = String::with_capacity(v.len() + 2);
4130-
let mut iter = v.chars().peekable();
4129+
// Count quotes first to determine which quote character to use
4130+
let (mut single_quote_count, mut double_quote_count) = (0, 0);
4131+
for c in v.chars() {
4132+
match c {
4133+
'\'' => single_quote_count += 1,
4134+
'"' => double_quote_count += 1,
4135+
_ => {}
4136+
}
4137+
}
4138+
4139+
// Pre-calculate capacity to avoid reallocations
4140+
let quote_char = if double_quote_count > single_quote_count {
4141+
'\''
4142+
} else {
4143+
'"'
4144+
};
4145+
let escape_char = if quote_char == '\'' { '\'' } else { '"' };
4146+
let escape_count = if quote_char == '\'' {
4147+
single_quote_count
4148+
} else {
4149+
double_quote_count
4150+
};
41314151

4132-
let mut single_quote_count = 0;
4133-
let mut double_quote_count = 0;
4152+
// Add 2 for quotes, and 1 for each escaped quote
4153+
let capacity = v.len() + 2 + escape_count;
4154+
let mut buf = String::with_capacity(capacity);
4155+
buf.push(quote_char);
41344156

4157+
let mut iter = v.chars().peekable();
41354158
while let Some(c) = iter.next() {
41364159
match c {
41374160
'\x00' => {
@@ -4149,29 +4172,24 @@ fn get_quoted_utf16(v: &str, ascii_only: bool, target: EsVersion) -> String {
41494172
'\t' => buf.push('\t'),
41504173
'\\' => {
41514174
let next = iter.peek();
4152-
41534175
match next {
4154-
// TODO fix me - workaround for surrogate pairs
41554176
Some('u') => {
41564177
let mut inner_iter = iter.clone();
4157-
41584178
inner_iter.next();
41594179

41604180
let mut is_curly = false;
41614181
let mut next = inner_iter.peek();
41624182

41634183
if next == Some(&'{') {
41644184
is_curly = true;
4165-
41664185
inner_iter.next();
41674186
next = inner_iter.peek();
41684187
} else if next != Some(&'D') && next != Some(&'d') {
41694188
buf.push('\\');
41704189
}
41714190

41724191
if let Some(c @ 'D' | c @ 'd') = next {
4173-
let mut inner_buf = String::new();
4174-
4192+
let mut inner_buf = String::with_capacity(8);
41754193
inner_buf.push('\\');
41764194
inner_buf.push('u');
41774195

@@ -4180,21 +4198,17 @@ fn get_quoted_utf16(v: &str, ascii_only: bool, target: EsVersion) -> String {
41804198
}
41814199

41824200
inner_buf.push(*c);
4183-
41844201
inner_iter.next();
41854202

41864203
let mut is_valid = true;
4187-
41884204
for _ in 0..3 {
4189-
let c = inner_iter.next();
4190-
4191-
match c {
4192-
Some('0'..='9') | Some('a'..='f') | Some('A'..='F') => {
4193-
inner_buf.push(c.unwrap());
4205+
match inner_iter.next() {
4206+
Some(c @ '0'..='9') | Some(c @ 'a'..='f')
4207+
| Some(c @ 'A'..='F') => {
4208+
inner_buf.push(c);
41944209
}
41954210
_ => {
41964211
is_valid = false;
4197-
41984212
break;
41994213
}
42004214
}
@@ -4212,108 +4226,81 @@ fn get_quoted_utf16(v: &str, ascii_only: bool, target: EsVersion) -> String {
42124226

42134227
if is_valid {
42144228
let val_str = &inner_buf[range];
4215-
4216-
let v = u32::from_str_radix(val_str, 16).unwrap_or_else(|err| {
4217-
unreachable!(
4218-
"failed to parse {} as a hex value: {:?}",
4219-
val_str, err
4220-
)
4221-
});
4222-
4223-
if v > 0xffff {
4224-
buf.push_str(&inner_buf);
4225-
4226-
let end = if is_curly { 7 } else { 5 };
4227-
4228-
for _ in 0..end {
4229-
iter.next();
4229+
if let Ok(v) = u32::from_str_radix(val_str, 16) {
4230+
if v > 0xffff {
4231+
buf.push_str(&inner_buf);
4232+
let end = if is_curly { 7 } else { 5 };
4233+
for _ in 0..end {
4234+
iter.next();
4235+
}
4236+
} else if (0xd800..=0xdfff).contains(&v) {
4237+
buf.push('\\');
4238+
} else {
4239+
buf.push_str("\\\\");
42304240
}
4231-
} else if (0xd800..=0xdfff).contains(&v) {
4232-
buf.push('\\');
42334241
} else {
42344242
buf.push_str("\\\\");
42354243
}
42364244
} else {
4237-
buf.push_str("\\\\")
4245+
buf.push_str("\\\\");
42384246
}
42394247
} else if is_curly {
42404248
buf.push_str("\\\\");
42414249
} else {
42424250
buf.push('\\');
42434251
}
42444252
}
4245-
_ => {
4246-
buf.push_str("\\\\");
4247-
}
4253+
_ => buf.push_str("\\\\"),
42484254
}
42494255
}
4250-
'\'' => {
4251-
single_quote_count += 1;
4252-
buf.push('\'');
4253-
}
4254-
'"' => {
4255-
double_quote_count += 1;
4256-
buf.push('"');
4256+
c if c == escape_char => {
4257+
buf.push('\\');
4258+
buf.push(c);
42574259
}
42584260
'\x01'..='\x0f' => {
4259-
let _ = write!(buf, "\\x0{:x}", c as u8);
4261+
buf.push_str("\\x0");
4262+
write!(&mut buf, "{:x}", c as u8).unwrap();
42604263
}
42614264
'\x10'..='\x1f' => {
4262-
let _ = write!(buf, "\\x{:x}", c as u8);
4263-
}
4264-
'\x20'..='\x7e' => {
4265-
buf.push(c);
4265+
buf.push_str("\\x");
4266+
write!(&mut buf, "{:x}", c as u8).unwrap();
42664267
}
4268+
'\x20'..='\x7e' => buf.push(c),
42674269
'\u{7f}'..='\u{ff}' => {
42684270
if ascii_only || target <= EsVersion::Es5 {
4269-
let _ = write!(buf, "\\x{:x}", c as u8);
4271+
buf.push_str("\\x");
4272+
write!(&mut buf, "{:x}", c as u8).unwrap();
42704273
} else {
42714274
buf.push(c);
42724275
}
42734276
}
4274-
'\u{2028}' => {
4275-
buf.push_str("\\u2028");
4276-
}
4277-
'\u{2029}' => {
4278-
buf.push_str("\\u2029");
4279-
}
4280-
'\u{FEFF}' => {
4281-
buf.push_str("\\uFEFF");
4282-
}
4283-
_ => {
4277+
'\u{2028}' => buf.push_str("\\u2028"),
4278+
'\u{2029}' => buf.push_str("\\u2029"),
4279+
'\u{FEFF}' => buf.push_str("\\uFEFF"),
4280+
c => {
42844281
if c.is_ascii() {
42854282
buf.push(c);
42864283
} else if c > '\u{FFFF}' {
4287-
// if we've got this far the char isn't reserved and if the callee has specified
4288-
// we should output unicode for non-ascii chars then we have
4289-
// to make sure we output unicode that is safe for the target
4290-
// Es5 does not support code point escapes and so surrograte formula must be
4291-
// used
42924284
if target <= EsVersion::Es5 {
4293-
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
42944285
let h = ((c as u32 - 0x10000) / 0x400) + 0xd800;
42954286
let l = (c as u32 - 0x10000) % 0x400 + 0xdc00;
4296-
4297-
let _ = write!(buf, "\\u{:04X}\\u{:04X}", h, l);
4287+
write!(&mut buf, "\\u{:04X}\\u{:04X}", h, l).unwrap();
42984288
} else if ascii_only {
4299-
let _ = write!(buf, "\\u{{{:04X}}}", c as u32);
4289+
write!(&mut buf, "\\u{{{:04X}}}", c as u32).unwrap();
43004290
} else {
43014291
buf.push(c);
43024292
}
43034293
} else if ascii_only {
4304-
let _ = write!(buf, "\\u{:04X}", c as u16);
4294+
write!(&mut buf, "\\u{:04X}", c as u16).unwrap();
43054295
} else {
43064296
buf.push(c);
43074297
}
43084298
}
43094299
}
43104300
}
43114301

4312-
if double_quote_count > single_quote_count {
4313-
format!("'{}'", buf.replace('\'', "\\'"))
4314-
} else {
4315-
format!("\"{}\"", buf.replace('"', "\\\""))
4316-
}
4302+
buf.push(quote_char);
4303+
buf
43174304
}
43184305

43194306
fn handle_invalid_unicodes(s: &str) -> Cow<str> {

0 commit comments

Comments
 (0)
Please sign in to comment.