Skip to content

Commit

Permalink
switch to use only 3 kinds of hashtable
Browse files Browse the repository at this point in the history
use only hashtables with fixed sizes and bit shifts, that allow to
remove bounds checks.
  • Loading branch information
PSeitz committed Feb 6, 2023
1 parent 8032df4 commit 20bb1ab
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 149 deletions.
16 changes: 6 additions & 10 deletions src/block/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
//! high performance. It has fixed memory usage, which contrary to other approachs, makes it less
//! memory hungry.

use crate::block::hashtable::get_table_size;
use crate::block::hashtable::HashTable;
use crate::block::hashtable::{HashTableU16, HashTableU32, HashTableUsize};
use crate::block::END_OFFSET;
use crate::block::LZ4_MIN_LENGTH;
use crate::block::MAX_DISTANCE;
Expand All @@ -18,6 +16,8 @@ use alloc::vec::Vec;
#[cfg(feature = "safe-encode")]
use core::convert::TryInto;

use super::hashtable::HashTable4KU16;
use super::hashtable::HashTable8K;
use super::{CompressError, WINDOW_SIZE};

pub(crate) fn get_vec_with_size(size: usize) -> Vec<u8> {
Expand Down Expand Up @@ -346,7 +346,7 @@ fn backtrack_match(
/// show significant improvement though.
// Intentionally avoid inlining.
// Empirical tests revealed it to be rarely better but often significantly detrimental.
#[inline(never)]
#[inline]
pub(crate) fn compress_internal<T: HashTable, const USE_DICT: bool>(
input: &[u8],
input_pos: usize,
Expand Down Expand Up @@ -596,17 +596,13 @@ pub(crate) fn compress_into_sink_with_dict<const USE_DICT: bool>(
output: &mut SliceSink,
mut dict_data: &[u8],
) -> Result<usize, CompressError> {
let (dict_size, dict_bitshift) = get_table_size(input.len());
if dict_data.len() + input.len() < u16::MAX as usize {
let mut dict = HashTableU16::new(dict_size, dict_bitshift);
init_dict(&mut dict, &mut dict_data);
compress_internal::<_, USE_DICT>(input, 0, output, &mut dict, dict_data, dict_data.len())
} else if dict_data.len() + input.len() < u32::MAX as usize {
let mut dict = HashTableU32::new(dict_size, dict_bitshift);
let mut dict = HashTable4KU16::new();
init_dict(&mut dict, &mut dict_data);
compress_internal::<_, USE_DICT>(input, 0, output, &mut dict, dict_data, dict_data.len())
} else {
let mut dict = HashTableUsize::new(dict_size, dict_bitshift);
// For some reason using a 4K hashtable causes a performance regression (memory layout?)
let mut dict = HashTable8K::new();
init_dict(&mut dict, &mut dict_data);
compress_internal::<_, USE_DICT>(input, 0, output, &mut dict, dict_data, dict_data.len())
}
Expand Down
178 changes: 41 additions & 137 deletions src/block/hashtable.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
use alloc::boxed::Box;
use alloc::vec::Vec;
#[cfg(feature = "frame")]
use core::convert::TryInto;

/// The Hashtable trait used by the compression to store hashed bytes to their position.
Expand Down Expand Up @@ -51,64 +49,55 @@ pub trait HashTable {
}
}

const HASHTABLE_SIZE_4K: usize = 4 * 1024;
const HASHTABLE_BIT_SHIFT_4K: usize = 4;

#[derive(Debug)]
pub struct HashTableUsize {
dict: Vec<usize>,
/// Shift the hash value for the dictionary to the right, to match the dictionary size.
dict_bitshift: usize,
#[repr(align(64))]
pub struct HashTable4KU16 {
dict: Box<[u16; HASHTABLE_SIZE_4K]>,
}

impl HashTableUsize {
impl HashTable4KU16 {
#[inline]
pub fn new(dict_size: usize, dict_bitshift: usize) -> Self {
let dict = alloc::vec![0; dict_size];
Self {
dict,
dict_bitshift,
}
pub fn new() -> Self {
// This generates more efficient assembly in contrast to Box::new(slice), because of an
// optmized call alloc_zeroed, vs. alloc + memset
// try_into is optimized away
let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
.into_boxed_slice()
.try_into()
.unwrap();
Self { dict }
}
}

impl HashTable for HashTableUsize {
#[inline]
#[cfg(feature = "safe-encode")]
fn get_at(&self, hash: usize) -> usize {
self.dict[hash >> self.dict_bitshift] as usize
}
impl HashTable for HashTable4KU16 {
#[inline]
#[cfg(not(feature = "safe-encode"))]
fn get_at(&self, hash: usize) -> usize {
unsafe { *self.dict.get_unchecked(hash >> self.dict_bitshift) as usize }
}

#[inline]
#[cfg(feature = "safe-encode")]
fn put_at(&mut self, hash: usize, val: usize) {
self.dict[hash >> self.dict_bitshift] = val;
self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize
}
#[inline]
#[cfg(not(feature = "safe-encode"))]
fn put_at(&mut self, hash: usize, val: usize) {
(*unsafe { self.dict.get_unchecked_mut(hash >> self.dict_bitshift) }) = val;
self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u16;
}

#[inline]
fn clear(&mut self) {
self.dict.fill(0);
}
#[inline]
fn get_hash_at(input: &[u8], pos: usize) -> usize {
hash(super::get_batch(input, pos)) as usize
}
}

const HASHTABLE_SIZE_4K: usize = 4 * 1024;
const HASHTABLE_BIT_SHIFT_4K: usize = 4;

#[derive(Debug)]
#[repr(align(64))]
#[cfg(feature = "frame")]
pub struct HashTable4K {
dict: Box<[u32; HASHTABLE_SIZE_4K]>,
}
#[cfg(feature = "frame")]
impl HashTable4K {
#[inline]
#[cfg(feature = "frame")]
pub fn new() -> Self {
let dict = alloc::vec![0; HASHTABLE_SIZE_4K]
.into_boxed_slice()
Expand All @@ -125,6 +114,7 @@ impl HashTable4K {
}
}
}
#[cfg(feature = "frame")]
impl HashTable for HashTable4K {
#[inline]
fn get_at(&self, hash: usize) -> usize {
Expand All @@ -140,122 +130,36 @@ impl HashTable for HashTable4K {
}
}

#[derive(Debug)]
#[repr(align(64))]
pub struct HashTableU32 {
dict: Vec<u32>,
/// Shift the hash value for the dictionary to the right, to match the dictionary size.
dict_bitshift: usize,
}
impl HashTableU32 {
#[inline]
pub fn new(dict_size: usize, dict_bitshift: usize) -> Self {
let dict = alloc::vec![0; dict_size];
Self {
dict,
dict_bitshift,
}
}
}
impl HashTable for HashTableU32 {
#[inline]
#[cfg(feature = "safe-encode")]
fn get_at(&self, hash: usize) -> usize {
self.dict[hash >> self.dict_bitshift] as usize
}
#[inline]
#[cfg(not(feature = "safe-encode"))]
fn get_at(&self, hash: usize) -> usize {
unsafe { *self.dict.get_unchecked(hash >> self.dict_bitshift) as usize }
}
#[inline]
#[cfg(feature = "safe-encode")]
fn put_at(&mut self, hash: usize, val: usize) {
self.dict[hash >> self.dict_bitshift] = val as u32;
}
#[inline]
#[cfg(not(feature = "safe-encode"))]
fn put_at(&mut self, hash: usize, val: usize) {
(*unsafe { self.dict.get_unchecked_mut(hash >> self.dict_bitshift) }) = val as u32;
}
#[inline]
fn clear(&mut self) {
self.dict.fill(0);
}
}
const HASHTABLE_SIZE_8K: usize = 8 * 1024;
const HASH_TABLE_BIT_SHIFT_8K: usize = 3;

#[derive(Debug)]
#[repr(align(64))]
pub struct HashTableU16 {
dict: Vec<u16>,
/// Shift the hash value for the dictionary to the right, to match the dictionary size.
dict_bitshift: usize,
pub struct HashTable8K {
dict: Box<[u32; HASHTABLE_SIZE_8K]>,
}
impl HashTableU16 {
impl HashTable8K {
#[inline]
pub fn new(dict_size: usize, dict_bitshift: usize) -> Self {
let dict = alloc::vec![0; dict_size];
Self {
dict,
dict_bitshift,
}
pub fn new() -> Self {
let dict = alloc::vec![0; HASHTABLE_SIZE_8K]
.into_boxed_slice()
.try_into()
.unwrap();

Self { dict }
}
}
impl HashTable for HashTableU16 {
#[inline]
#[cfg(feature = "safe-encode")]
fn get_at(&self, hash: usize) -> usize {
self.dict[hash >> self.dict_bitshift] as usize
}
impl HashTable for HashTable8K {
#[inline]
#[cfg(not(feature = "safe-encode"))]
fn get_at(&self, hash: usize) -> usize {
unsafe { *self.dict.get_unchecked(hash >> self.dict_bitshift) as usize }
self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] as usize
}
#[inline]
#[cfg(feature = "safe-encode")]
fn put_at(&mut self, hash: usize, val: usize) {
self.dict[hash >> self.dict_bitshift] = val as u16;
}
#[inline]
#[cfg(not(feature = "safe-encode"))]
fn put_at(&mut self, hash: usize, val: usize) {
(*unsafe { self.dict.get_unchecked_mut(hash >> self.dict_bitshift) }) = val as u16;
self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] = val as u32;
}
#[inline]
fn clear(&mut self) {
self.dict.fill(0);
}
#[inline]
fn get_hash_at(input: &[u8], pos: usize) -> usize {
hash(super::get_batch(input, pos)) as usize
}
}

#[inline]
pub fn get_table_size(input_len: usize) -> (usize, usize) {
let (dict_size, dict_bitshift) = match input_len {
// U16 Positions
0..=65535 => {
// Considering we want a table with up to 16K bytes and each slot takes 2 bytes.
// Calculate size the matching table size according to the input size,
// so the overhead of "zeroing" the table is not too large for small inputs.
let size = input_len.next_power_of_two().clamp(256, 16 * 1024) / 2;
(size, 16 - size.trailing_zeros() as usize)
}
// U32 positions => 16KB table
// Usize (U64) positions => 32KB table
_ => (4096, 4),
};
(dict_size, dict_bitshift)
}

#[test]
fn test_get_table_size() {
const MAX_HASH: usize = u16::MAX as usize;
for i in 0..32 {
let input_len = 2usize.pow(i);
let (size, shift) = get_table_size(input_len);
assert_eq!(size, (MAX_HASH >> shift) + 1);
}
}
19 changes: 17 additions & 2 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#[macro_use]
extern crate more_asserts;

use std::iter;

use lz4_compress::compress as lz4_rust_compress;
#[cfg(feature = "frame")]
use lz4_flex::frame::BlockMode;
Expand Down Expand Up @@ -158,6 +160,13 @@ fn test_minimum_compression_ratio() {
let ratio = compressed.len() as f64 / COMPRESSION34K.len() as f64;
assert_lt!(ratio, 0.585); // TODO check why compression is not deterministic (fails in ci for
// 0.58)
let compressed = compress(COMPRESSION65);
let ratio = compressed.len() as f64 / COMPRESSION65.len() as f64;
assert_lt!(ratio, 0.574);

let compressed = compress(COMPRESSION66JSON);
let ratio = compressed.len() as f64 / COMPRESSION66JSON.len() as f64;
assert_lt!(ratio, 0.229);
}

use lz_fear::raw::compress2;
Expand Down Expand Up @@ -407,6 +416,12 @@ fn buf_fuzz_5() {
test_roundtrip(data);
}

#[test]
fn test_so_many_zeros() {
let data: Vec<u8> = iter::repeat(0).take(30_000).collect();
test_roundtrip(data);
}

#[test]
fn compression_works() {
let s = r#"An iterator that knows its exact length.
Expand All @@ -432,9 +447,9 @@ fn compression_works() {
#[ignore]
#[test]
fn big_compression() {
let mut s = Vec::with_capacity(80_000000);
let mut s = Vec::with_capacity(80_000_000);

for n in 0..80_000000 {
for n in 0..80_000_000 {
s.push((n as u8).wrapping_mul(0xA).wrapping_add(33) ^ 0xA2);
}

Expand Down

0 comments on commit 20bb1ab

Please sign in to comment.