Skip to content

Commit 2bea793

Browse files
authoredFeb 14, 2025··
perf(hstr): Use thin arc for hash and length (#10033)
**Description:** This would improve performance and reduce allocation. **Related issue:** - Closes #10030
1 parent ffb7734 commit 2bea793

File tree

3 files changed

+65
-49
lines changed

3 files changed

+65
-49
lines changed
 

‎.changeset/eight-steaks-yawn.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
swc_core: minor
3+
hstr: minor
4+
---
5+
6+
perf(hstr): Use thin arc for hash and length

‎crates/hstr/src/dynamic.rs

+38-38
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,62 @@
11
use std::{
22
borrow::Cow,
3-
fmt::Debug,
3+
ffi::c_void,
44
hash::{BuildHasherDefault, Hash, Hasher},
5+
mem::ManuallyDrop,
6+
ops::Deref,
57
ptr::NonNull,
68
};
79

810
use rustc_hash::FxHasher;
9-
use triomphe::Arc;
11+
use triomphe::{HeaderWithLength, ThinArc};
1012

1113
use crate::{
1214
tagged_value::{TaggedValue, MAX_INLINE_LEN},
1315
Atom, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK,
1416
};
1517

16-
#[derive(Debug)]
17-
pub(crate) struct Entry {
18-
pub string: Box<str>,
18+
pub(crate) struct Metadata {
1919
pub hash: u64,
2020
}
2121

22-
impl Entry {
23-
pub unsafe fn cast(ptr: TaggedValue) -> *const Entry {
24-
ptr.get_ptr().cast()
25-
}
22+
#[derive(Clone)]
23+
pub(crate) struct Item(ThinArc<HeaderWithLength<Metadata>, u8>);
2624

27-
pub unsafe fn deref_from<'i>(ptr: TaggedValue) -> &'i Entry {
28-
&*Self::cast(ptr)
29-
}
25+
impl Deref for Item {
26+
type Target = <ThinArc<HeaderWithLength<Metadata>, u8> as Deref>::Target;
3027

31-
pub unsafe fn restore_arc(v: TaggedValue) -> Arc<Entry> {
32-
let ptr = v.get_ptr() as *const Entry;
33-
Arc::from_raw(ptr)
28+
fn deref(&self) -> &Self::Target {
29+
&self.0
3430
}
3531
}
3632

37-
impl PartialEq for Entry {
38-
fn eq(&self, other: &Self) -> bool {
39-
// Assumption: `store_id` and `alias` don't matter for equality within a single
40-
// store (what we care about here). Compare hash first because that's cheaper.
41-
self.hash == other.hash && self.string == other.string
33+
/// TODO: Use real weak pointer
34+
type WeakItem = Item;
35+
36+
impl Hash for Item {
37+
fn hash<H: Hasher>(&self, state: &mut H) {
38+
state.write_u64(self.0.header.header.header.hash);
4239
}
4340
}
4441

45-
impl Eq for Entry {}
42+
pub(crate) unsafe fn deref_from(ptr: TaggedValue) -> ManuallyDrop<Item> {
43+
let item = restore_arc(ptr);
4644

47-
impl Hash for Entry {
48-
fn hash<H: Hasher>(&self, state: &mut H) {
49-
// Assumption: type H is an EntryHasher
50-
state.write_u64(self.hash);
51-
}
45+
ManuallyDrop::new(item)
46+
}
47+
48+
pub(crate) unsafe fn restore_arc(v: TaggedValue) -> Item {
49+
let ptr = v.get_ptr();
50+
Item(ThinArc::from_raw(ptr))
5251
}
5352

5453
/// A store that stores [Atom]s. Can be merged with other [AtomStore]s for
5554
/// better performance.
5655
///
5756
///
5857
/// # Merging [AtomStore]
59-
#[derive(Debug)]
6058
pub struct AtomStore {
61-
pub(crate) data: hashbrown::HashMap<Arc<Entry>, (), BuildEntryHasher>,
59+
pub(crate) data: hashbrown::HashMap<WeakItem, (), BuildEntryHasher>,
6260
}
6361

6462
impl Default for AtomStore {
@@ -96,11 +94,11 @@ where
9694

9795
let hash = calc_hash(&text);
9896
let entry = storage.insert_entry(text, hash);
99-
let entry = Arc::into_raw(entry);
97+
let entry = ThinArc::into_raw(entry.0) as *mut c_void;
10098

101-
let ptr: NonNull<Entry> = unsafe {
99+
let ptr: NonNull<c_void> = unsafe {
102100
// Safety: Arc::into_raw returns a non-null pointer
103-
NonNull::new_unchecked(entry as *mut Entry)
101+
NonNull::new_unchecked(entry)
104102
};
105103
debug_assert!(0 == ptr.as_ptr() as u8 & TAG_MASK);
106104
Atom {
@@ -109,22 +107,24 @@ where
109107
}
110108

111109
pub(crate) trait Storage {
112-
fn insert_entry(self, text: Cow<str>, hash: u64) -> Arc<Entry>;
110+
fn insert_entry(self, text: Cow<str>, hash: u64) -> Item;
113111
}
114112

115113
impl Storage for &'_ mut AtomStore {
116114
#[inline(never)]
117-
fn insert_entry(self, text: Cow<str>, hash: u64) -> Arc<Entry> {
115+
fn insert_entry(self, text: Cow<str>, hash: u64) -> Item {
118116
let (entry, _) = self
119117
.data
120118
.raw_entry_mut()
121-
.from_hash(hash, |key| key.hash == hash && *key.string == *text)
119+
.from_hash(hash, |key| {
120+
key.header.header.header.hash == hash && key.slice == *text.as_bytes()
121+
})
122122
.or_insert_with(move || {
123123
(
124-
Arc::new(Entry {
125-
string: text.into_owned().into_boxed_str(),
126-
hash,
127-
}),
124+
Item(ThinArc::from_header_and_slice(
125+
HeaderWithLength::new(Metadata { hash }, text.len()),
126+
text.as_bytes(),
127+
)),
128128
(),
129129
)
130130
});

‎crates/hstr/src/lib.rs

+21-11
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
11
#![cfg_attr(feature = "atom_size_128", feature(integer_atomics))]
22
//! See [Atom] for more information.
33
4+
use core::str;
45
use std::{
56
fmt::{Debug, Display},
67
hash::Hash,
7-
mem::{self, forget},
8+
mem::{self, forget, transmute},
89
num::NonZeroU8,
910
ops::Deref,
11+
str::from_utf8_unchecked,
1012
};
1113

1214
use debug_unreachable::debug_unreachable;
1315
use once_cell::sync::Lazy;
14-
use tagged_value::TaggedValue;
1516

1617
pub use crate::dynamic::AtomStore;
17-
use crate::dynamic::Entry;
18+
use crate::tagged_value::TaggedValue;
1819

1920
mod dynamic;
2021
mod global_store;
@@ -233,7 +234,13 @@ impl Atom {
233234
#[inline(never)]
234235
fn get_hash(&self) -> u64 {
235236
match self.tag() {
236-
DYNAMIC_TAG => unsafe { Entry::deref_from(self.unsafe_data) }.hash,
237+
DYNAMIC_TAG => {
238+
unsafe { crate::dynamic::deref_from(self.unsafe_data) }
239+
.header
240+
.header
241+
.header
242+
.hash
243+
}
237244
STATIC_TAG => {
238245
todo!("static hash")
239246
}
@@ -249,7 +256,10 @@ impl Atom {
249256
#[inline(never)]
250257
fn as_str(&self) -> &str {
251258
match self.tag() {
252-
DYNAMIC_TAG => &unsafe { Entry::deref_from(self.unsafe_data) }.string,
259+
DYNAMIC_TAG => unsafe {
260+
let item = crate::dynamic::deref_from(self.unsafe_data);
261+
from_utf8_unchecked(transmute::<&[u8], &'static [u8]>(&item.slice))
262+
},
253263
STATIC_TAG => {
254264
todo!("static as_str")
255265
}
@@ -277,14 +287,14 @@ impl PartialEq for Atom {
277287
}
278288

279289
if self.is_dynamic() && other.is_dynamic() {
280-
let te = unsafe { Entry::deref_from(self.unsafe_data) };
281-
let oe = unsafe { Entry::deref_from(other.unsafe_data) };
290+
let te = unsafe { crate::dynamic::deref_from(self.unsafe_data) };
291+
let oe = unsafe { crate::dynamic::deref_from(other.unsafe_data) };
282292

283-
if te.hash != oe.hash {
293+
if te.header.header.header.hash != oe.header.header.header.hash {
284294
return false;
285295
}
286296

287-
return te.string == oe.string;
297+
return te.slice == oe.slice;
288298
}
289299

290300
if self.get_hash() != other.get_hash() {
@@ -310,7 +320,7 @@ impl Drop for Atom {
310320
#[inline(always)]
311321
fn drop(&mut self) {
312322
if self.is_dynamic() {
313-
unsafe { drop(Entry::restore_arc(self.unsafe_data)) }
323+
unsafe { drop(crate::dynamic::restore_arc(self.unsafe_data)) }
314324
}
315325
}
316326
}
@@ -327,7 +337,7 @@ impl Atom {
327337
pub(crate) fn from_alias(alias: TaggedValue) -> Self {
328338
if alias.tag() & TAG_MASK == DYNAMIC_TAG {
329339
unsafe {
330-
let arc = Entry::restore_arc(alias);
340+
let arc = crate::dynamic::restore_arc(alias);
331341
forget(arc.clone());
332342
forget(arc);
333343
}

0 commit comments

Comments
 (0)
Please sign in to comment.