diff --git a/benches/bigint.rs b/benches/bigint.rs index 960b7cee..80ec191c 100644 --- a/benches/bigint.rs +++ b/benches/bigint.rs @@ -174,35 +174,40 @@ fn fib_to_string(b: &mut Bencher) { b.iter(|| fib.to_string()); } -fn to_str_radix_bench(b: &mut Bencher, radix: u32) { +fn to_str_radix_bench(b: &mut Bencher, radix: u32, bits: u64) { let mut rng = get_rng(); - let x = rng.gen_bigint(1009); + let x = rng.gen_bigint(bits); b.iter(|| x.to_str_radix(radix)); } #[bench] fn to_str_radix_02(b: &mut Bencher) { - to_str_radix_bench(b, 2); + to_str_radix_bench(b, 2, 1009); } #[bench] fn to_str_radix_08(b: &mut Bencher) { - to_str_radix_bench(b, 8); + to_str_radix_bench(b, 8, 1009); } #[bench] fn to_str_radix_10(b: &mut Bencher) { - to_str_radix_bench(b, 10); + to_str_radix_bench(b, 10, 1009); +} + +#[bench] +fn to_str_radix_10_2(b: &mut Bencher) { + to_str_radix_bench(b, 10, 10009); } #[bench] fn to_str_radix_16(b: &mut Bencher) { - to_str_radix_bench(b, 16); + to_str_radix_bench(b, 16, 1009); } #[bench] fn to_str_radix_36(b: &mut Bencher) { - to_str_radix_bench(b, 36); + to_str_radix_bench(b, 36, 1009); } fn from_str_radix_bench(b: &mut Bencher, radix: u32) { diff --git a/src/biguint/convert.rs b/src/biguint/convert.rs index 278ec782..edeed84c 100644 --- a/src/biguint/convert.rs +++ b/src/biguint/convert.rs @@ -15,7 +15,7 @@ use core::cmp::Ordering::{Equal, Greater, Less}; use core::convert::TryFrom; use core::mem; use core::str::FromStr; -use num_integer::Integer; +use num_integer::{Integer, Roots}; use num_traits::float::FloatCore; use num_traits::{FromPrimitive, Num, PrimInt, ToPrimitive, Zero}; @@ -665,6 +665,39 @@ pub(super) fn to_radix_digits_le(u: &BigUint, radix: u32) -> Vec { let (base, power) = get_radix_base(radix, big_digit::HALF_BITS); let radix = radix as BigDigit; + // For very large numbers, the O(n²) loop of repeated `div_rem_digit` dominates the + // performance. We can mitigate this by dividing into chunks of a larger base first. + // The threshold for this was chosen by anecdotal performance measurements to + // approximate where this starts to make a noticeable difference. + if digits.data.len() >= 64 { + let mut big_base = BigUint::from(base * base); + let mut big_power = 2usize; + + // Choose a target base length near √n. + let target_len = digits.data.len().sqrt(); + while big_base.data.len() < target_len { + big_base = &big_base * &big_base; + big_power *= 2; + } + + // This outer loop will run approximately √n times. + while digits > big_base { + // This is still the dominating factor, with n digits divided by √n digits. + let (q, mut big_r) = digits.div_rem(&big_base); + digits = q; + + // This inner loop now has O(√n²)=O(n) behavior altogether. + for _ in 0..big_power { + let (q, mut r) = div_rem_digit(big_r, base); + big_r = q; + for _ in 0..power { + res.push((r % radix) as u8); + r /= radix; + } + } + } + } + while digits.data.len() > 1 { let (q, mut r) = div_rem_digit(digits, base); for _ in 0..power { diff --git a/src/biguint/division.rs b/src/biguint/division.rs index 343705e1..b5d4259c 100644 --- a/src/biguint/division.rs +++ b/src/biguint/division.rs @@ -1,7 +1,7 @@ use super::addition::__add2; #[cfg(not(u64_digit))] use super::u32_to_u128; -use super::BigUint; +use super::{cmp_slice, BigUint}; use crate::big_digit::{self, BigDigit, DoubleBigDigit}; use crate::UsizePromotion; @@ -153,14 +153,14 @@ fn div_rem(mut u: BigUint, mut d: BigUint) -> (BigUint, BigUint) { // let shift = d.data.last().unwrap().leading_zeros() as usize; - let (q, r) = if shift == 0 { + if shift == 0 { // no need to clone d - div_rem_core(u, &d) + div_rem_core(u, &d.data) } else { - div_rem_core(u << shift, &(d << shift)) - }; - // renormalize the remainder - (q, r >> shift) + let (q, r) = div_rem_core(u << shift, &(d << shift).data); + // renormalize the remainder + (q, r >> shift) + } } pub(super) fn div_rem_ref(u: &BigUint, d: &BigUint) -> (BigUint, BigUint) { @@ -195,24 +195,21 @@ pub(super) fn div_rem_ref(u: &BigUint, d: &BigUint) -> (BigUint, BigUint) { // let shift = d.data.last().unwrap().leading_zeros() as usize; - let (q, r) = if shift == 0 { + if shift == 0 { // no need to clone d - div_rem_core(u.clone(), d) + div_rem_core(u.clone(), &d.data) } else { - div_rem_core(u << shift, &(d << shift)) - }; - // renormalize the remainder - (q, r >> shift) + let (q, r) = div_rem_core(u << shift, &(d << shift).data); + // renormalize the remainder + (q, r >> shift) + } } /// An implementation of the base division algorithm. /// Knuth, TAOCP vol 2 section 4.3.1, algorithm D, with an improvement from exercises 19-21. -fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) { - debug_assert!( - a.data.len() >= b.data.len() - && b.data.len() > 1 - && b.data.last().unwrap().leading_zeros() == 0 - ); +fn div_rem_core(mut a: BigUint, b: &[BigDigit]) -> (BigUint, BigUint) { + debug_assert!(a.data.len() >= b.len() && b.len() > 1); + debug_assert!(b.last().unwrap().leading_zeros() == 0); // The algorithm works by incrementally calculating "guesses", q0, for the next digit of the // quotient. Once we have any number q0 such that (q0 << j) * b <= a, we can set @@ -235,16 +232,16 @@ fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) { let mut a0 = 0; // [b1, b0] are the two most significant digits of the divisor. They never change. - let b0 = *b.data.last().unwrap(); - let b1 = b.data[b.data.len() - 2]; + let b0 = *b.last().unwrap(); + let b1 = b[b.len() - 2]; - let q_len = a.data.len() - b.data.len() + 1; + let q_len = a.data.len() - b.len() + 1; let mut q = BigUint { data: vec![0; q_len], }; for j in (0..q_len).rev() { - debug_assert!(a.data.len() == b.data.len() + j); + debug_assert!(a.data.len() == b.len() + j); let a1 = *a.data.last().unwrap(); let a2 = a.data[a.data.len() - 2]; @@ -280,11 +277,11 @@ fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) { // q0 is now either the correct quotient digit, or in rare cases 1 too large. // Subtract (q0 << j) from a. This may overflow, in which case we will have to correct. - let mut borrow = sub_mul_digit_same_len(&mut a.data[j..], &b.data, q0); + let mut borrow = sub_mul_digit_same_len(&mut a.data[j..], b, q0); if borrow > a0 { // q0 is too large. We need to add back one multiple of b. q0 -= 1; - borrow -= __add2(&mut a.data[j..], &b.data); + borrow -= __add2(&mut a.data[j..], b); } // The top digit of a, stored in a0, has now been zeroed. debug_assert!(borrow == a0); @@ -298,7 +295,7 @@ fn div_rem_core(mut a: BigUint, b: &BigUint) -> (BigUint, BigUint) { a.data.push(a0); a.normalize(); - debug_assert!(a < *b); + debug_assert_eq!(cmp_slice(&a.data, b), Less); (q.normalized(), a) } diff --git a/tests/biguint.rs b/tests/biguint.rs index 716bcc81..5a5979c4 100644 --- a/tests/biguint.rs +++ b/tests/biguint.rs @@ -1600,6 +1600,16 @@ fn test_all_str_radix() { } } +#[test] +fn test_big_str() { + for n in 2..=20_u32 { + let x: BigUint = BigUint::from(n).pow(10_000_u32); + let s = x.to_string(); + let y: BigUint = s.parse().unwrap(); + assert_eq!(x, y); + } +} + #[test] fn test_lower_hex() { let a = BigUint::parse_bytes(b"A", 16).unwrap();