From c580dae2cdc254d1e9e2e52e37d1c9f187261f20 Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Tue, 25 Feb 2025 18:23:14 -0800 Subject: [PATCH 01/12] Fix a bug in `BoxedUnsatInt::to_uint()` --- src/modular/safegcd/boxed.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/modular/safegcd/boxed.rs b/src/modular/safegcd/boxed.rs index c2b3e4d6..78f8cf15 100644 --- a/src/modular/safegcd/boxed.rs +++ b/src/modular/safegcd/boxed.rs @@ -310,6 +310,9 @@ impl BoxedUnsatInt { /// Convert to a `BoxedUint` of the given precision. #[allow(trivial_numeric_casts)] fn to_uint(&self, mut bits_precision: u32) -> BoxedUint { + // Shorten to the required value after conversion. + let shorten = bits_precision == 32; + // The current Bernstein-Yang implementation is natively 64-bit on all targets if bits_precision == 32 { bits_precision = 64; @@ -334,7 +337,12 @@ impl BoxedUnsatInt { ret.as_words_mut() ); - ret + if shorten { + debug_assert!(ret.bits_vartime() <= 32); + ret.shorten(32) + } else { + ret + } } /// Conditionally add the given value to this one depending on the given [`Choice`]. From 46013b902369f3ee081252ec0f28076e50992e9b Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Wed, 5 Mar 2025 12:54:52 -0800 Subject: [PATCH 02/12] Add `Monty::div_by_2_assign()` --- benches/monty.rs | 11 +++++++++ src/modular/boxed_monty_form.rs | 10 ++++++++ src/modular/div_by_2.rs | 43 +++++++++++++++------------------ src/traits.rs | 6 +++++ src/uint/boxed/shr.rs | 7 ------ tests/boxed_monty_form.rs | 22 +++++++++++++++++ 6 files changed, 68 insertions(+), 31 deletions(-) diff --git a/benches/monty.rs b/benches/monty.rs index d91375c8..37752da2 100644 --- a/benches/monty.rs +++ b/benches/monty.rs @@ -180,6 +180,17 @@ fn bench_montgomery_ops(group: &mut BenchmarkGroup<'_, M>) { ) }); + group.bench_function("div_by_2, U256", |b| { + b.iter_batched( + || { + let x = U256::random_mod(&mut rng, params.modulus().as_nz_ref()); + MontyForm::new(&x, params) + }, + |x| black_box(x.div_by_2()), + BatchSize::SmallInput, + ) + }); + #[cfg(feature = "alloc")] for i in [1, 2, 3, 4, 10, 100] { group.bench_function( diff --git a/src/modular/boxed_monty_form.rs b/src/modular/boxed_monty_form.rs index fa68e517..8d3947b5 100644 --- a/src/modular/boxed_monty_form.rs +++ b/src/modular/boxed_monty_form.rs @@ -256,6 +256,12 @@ impl BoxedMontyForm { params: self.params.clone(), } } + + /// Performs division by 2 inplace, that is finds `x` such that `x + x = self` + /// and writes it into `self`. + pub fn div_by_2_assign(&mut self) { + div_by_2::div_by_2_boxed_assign(&mut self.montgomery_form, &self.params.modulus) + } } impl Retrieve for BoxedMontyForm { @@ -301,6 +307,10 @@ impl Monty for BoxedMontyForm { BoxedMontyForm::div_by_2(self) } + fn div_by_2_assign(&mut self) { + BoxedMontyForm::div_by_2_assign(self) + } + fn lincomb_vartime(products: &[(&Self, &Self)]) -> Self { BoxedMontyForm::lincomb_vartime(products) } diff --git a/src/modular/div_by_2.rs b/src/modular/div_by_2.rs index c426c620..d8b2a758 100644 --- a/src/modular/div_by_2.rs +++ b/src/modular/div_by_2.rs @@ -1,6 +1,6 @@ #[cfg(feature = "alloc")] -use crate::{BoxedUint, ConstantTimeSelect}; -use crate::{Odd, Uint}; +use crate::{BoxedUint, Integer}; +use crate::{Limb, Odd, Uint}; pub(crate) const fn div_by_2( a: &Uint, @@ -10,38 +10,33 @@ pub(crate) const fn div_by_2( // Two possibilities: // - if `a` is even, we can just divide by 2; // - if `a` is odd, we divide `(a + modulus)` by 2. - // To stay within the modulus we open the parentheses turning it into `a / 2 + modulus / 2 + 1` - // ("+1" because both `a` and `modulus` are odd, we lose 0.5 in each integer division). - // This will not overflow, so we can just use wrapping operations. // Note that this also works if `a` is a Montgomery representation modulo `modulus` // of some integer `x`. // If `b + b = a mod modulus` it means that `y + y = x mod modulus` where `y` is the integer // whose Montgomery representation is `b`. - let (half, is_odd) = a.shr1_with_carry(); - let half_modulus = modulus.0.shr1(); - - let if_even = half; - let if_odd = half - .wrapping_add(&half_modulus) - .wrapping_add(&Uint::::ONE); - - Uint::::select(&if_even, &if_odd, is_odd) + let is_odd = a.is_odd(); + let (if_odd, carry) = a.adc(&modulus.0, Limb::ZERO); + let carry = Limb::select(Limb::ZERO, carry, is_odd); + Uint::::select(a, &if_odd, is_odd) + .shr1() + .set_bit(Uint::::BITS - 1, carry.is_nonzero()) } #[cfg(feature = "alloc")] pub(crate) fn div_by_2_boxed(a: &BoxedUint, modulus: &Odd) -> BoxedUint { - debug_assert_eq!(a.bits_precision(), modulus.bits_precision()); - - let (mut half, is_odd) = a.shr1_with_carry(); - let half_modulus = modulus.shr1(); - - let if_odd = half - .wrapping_add(&half_modulus) - .wrapping_add(&BoxedUint::one_with_precision(a.bits_precision())); + let mut result = a.clone(); + div_by_2_boxed_assign(&mut result, modulus); + result +} - half.ct_assign(&if_odd, is_odd); +#[cfg(feature = "alloc")] +pub(crate) fn div_by_2_boxed_assign(a: &mut BoxedUint, modulus: &Odd) { + debug_assert_eq!(a.bits_precision(), modulus.bits_precision()); - half + let is_odd = a.is_odd(); + let carry = a.conditional_adc_assign(modulus, is_odd); + a.shr1_assign(); + a.set_bit(a.bits_precision() - 1, carry); } diff --git a/src/traits.rs b/src/traits.rs index 962b6f52..2c60c544 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -898,6 +898,12 @@ pub trait Monty: /// Performs division by 2, that is returns `x` such that `x + x = self`. fn div_by_2(&self) -> Self; + /// Performs division by 2 inplace, that is finds `x` such that `x + x = self` + /// and writes it into `self`. + fn div_by_2_assign(&mut self) { + *self = self.div_by_2() + } + /// Calculate the sum of products of pairs `(a, b)` in `products`. /// /// This method is variable time only with the value of the modulus. diff --git a/src/uint/boxed/shr.rs b/src/uint/boxed/shr.rs index 84edd114..cf33a7f1 100644 --- a/src/uint/boxed/shr.rs +++ b/src/uint/boxed/shr.rs @@ -128,13 +128,6 @@ impl BoxedUint { success.map(|_| result) } - /// Computes `self >> 1` in constant-time, returning a true [`Choice`] - /// if the least significant bit was set, and a false [`Choice::FALSE`] otherwise. - pub(crate) fn shr1_with_carry(&self) -> (Self, Choice) { - let carry = self.limbs[0].0 & 1; - (self.shr1(), Choice::from(carry as u8)) - } - /// Computes `self >> 1` in constant-time. pub(crate) fn shr1(&self) -> Self { let mut ret = self.clone(); diff --git a/tests/boxed_monty_form.rs b/tests/boxed_monty_form.rs index 2f4343ce..93eb0853 100644 --- a/tests/boxed_monty_form.rs +++ b/tests/boxed_monty_form.rs @@ -10,6 +10,7 @@ use crypto_bigint::{ modular::{BoxedMontyForm, BoxedMontyParams}, }; use num_bigint::BigUint; +use num_integer::Integer as _; use num_modular::ModularUnaryOps; use proptest::prelude::*; use std::cmp::Ordering; @@ -153,4 +154,25 @@ proptest! { prop_assert_eq!(retrieve_biguint(&actual), expected); } + + #[test] + fn div_by_2(a in monty_form()) { + let actual = a.div_by_2(); + let mut actual_inplace = a.clone(); + actual_inplace.div_by_2_assign(); + + let p = a.params().modulus(); + let a_bi = retrieve_biguint(&a); + let p_bi = to_biguint(&p); + + let expected = if a_bi.is_odd() { + (a_bi + p_bi) >> 1 + } + else { + a_bi >> 1 + }; + + prop_assert_eq!(&retrieve_biguint(&actual), &expected); + prop_assert_eq!(&retrieve_biguint(&actual_inplace), &expected); + } } From dca8c0b06f4a0a44120e3aad2a97355c5c634da6 Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Tue, 25 Feb 2025 11:28:03 -0800 Subject: [PATCH 03/12] More `inv_mod2k` methods and using them in `MontyParams` constructors --- src/modular/boxed_monty_form.rs | 33 +++++++++------ src/modular/monty_form.rs | 4 +- src/uint/boxed/inv_mod.rs | 71 ++++++++++++++++++++++++++++++++- src/uint/boxed/sub.rs | 5 +++ src/uint/inv_mod.rs | 40 +++++++++++++++++++ tests/boxed_uint.rs | 25 +++++++++++- 6 files changed, 161 insertions(+), 17 deletions(-) diff --git a/src/modular/boxed_monty_form.rs b/src/modular/boxed_monty_form.rs index 8d3947b5..3693b20a 100644 --- a/src/modular/boxed_monty_form.rs +++ b/src/modular/boxed_monty_form.rs @@ -59,7 +59,24 @@ impl BoxedMontyParams { .rem(&modulus.as_nz_ref().widen(bits_precision * 2)) .shorten(bits_precision); - Self::new_inner(modulus, one, r2) + // The modular inverse should always exist, because it was ensured odd above, which also ensures it's non-zero + let (inv_mod_limb, inv_mod_limb_exists) = modulus.inv_mod2k_vartime(Word::BITS); + debug_assert!(bool::from(inv_mod_limb_exists)); + + let mod_neg_inv = Limb(Word::MIN.wrapping_sub(inv_mod_limb.limbs[0].0)); + + let mod_leading_zeros = modulus.as_ref().leading_zeros().min(Word::BITS - 1); + + let r3 = montgomery_reduction_boxed(&mut r2.square(), &modulus, mod_neg_inv); + + Self { + modulus, + one, + r2, + r3, + mod_neg_inv, + mod_leading_zeros, + } } /// Instantiates a new set of [`BoxedMontyParams`] representing the given `modulus`, which @@ -82,17 +99,9 @@ impl BoxedMontyParams { .rem_vartime(&modulus.as_nz_ref().widen(bits_precision * 2)) .shorten(bits_precision); - Self::new_inner(modulus, one, r2) - } - - /// Common functionality of `new` and `new_vartime`. - fn new_inner(modulus: Odd, one: BoxedUint, r2: BoxedUint) -> Self { - debug_assert_eq!(one.bits_precision(), modulus.bits_precision()); - debug_assert_eq!(r2.bits_precision(), modulus.bits_precision()); - - // If the inverse exists, it means the modulus is odd. - let (inv_mod_limb, modulus_is_odd) = modulus.inv_mod2k(Word::BITS); - debug_assert!(bool::from(modulus_is_odd)); + // The modular inverse should always exist, because it was ensured odd above, which also ensures it's non-zero + let (inv_mod_limb, inv_mod_limb_exists) = modulus.inv_mod2k_full_vartime(Word::BITS); + debug_assert!(bool::from(inv_mod_limb_exists)); let mod_neg_inv = Limb(Word::MIN.wrapping_sub(inv_mod_limb.limbs[0].0)); diff --git a/src/modular/monty_form.rs b/src/modular/monty_form.rs index 50b0eb12..964ea28e 100644 --- a/src/modular/monty_form.rs +++ b/src/modular/monty_form.rs @@ -55,7 +55,7 @@ where // The modular inverse should always exist, because it was ensured odd above, which also ensures it's non-zero let inv_mod = modulus - .inv_mod2k(Word::BITS) + .inv_mod2k_vartime(Word::BITS) .expect("modular inverse should exist"); let mod_neg_inv = Limb(Word::MIN.wrapping_sub(inv_mod.limbs[0].0)); @@ -90,7 +90,7 @@ impl MontyParams { // The modular inverse should always exist, because it was ensured odd above, which also ensures it's non-zero let inv_mod = modulus - .inv_mod2k_vartime(Word::BITS) + .inv_mod2k_full_vartime(Word::BITS) .expect("modular inverse should exist"); let mod_neg_inv = Limb(Word::MIN.wrapping_sub(inv_mod.limbs[0].0)); diff --git a/src/uint/boxed/inv_mod.rs b/src/uint/boxed/inv_mod.rs index 06391a57..3f34d7d0 100644 --- a/src/uint/boxed/inv_mod.rs +++ b/src/uint/boxed/inv_mod.rs @@ -13,14 +13,78 @@ impl BoxedUint { } /// Computes 1/`self` mod `2^k`. + /// This method is variable w.r.t. `self` and `k`. /// /// If the inverse does not exist (`k > 0` and `self` is even), /// returns `Choice::FALSE` as the second element of the tuple, /// otherwise returns `Choice::TRUE`. - pub(crate) fn inv_mod2k(&self, k: u32) -> (Self, Choice) { + pub(crate) fn inv_mod2k_full_vartime(&self, k: u32) -> (Self, Choice) { let mut x = Self::zero_with_precision(self.bits_precision()); // keeps `x` during iterations let mut b = Self::one_with_precision(self.bits_precision()); // keeps `b_i` during iterations + // The inverse exists either if `k` is 0 or if `self` is odd. + if k != 0 && !bool::from(self.is_odd()) { + return (x, Choice::from(0)); + } + + for i in 0..k { + // X_i = b_i mod 2 + let x_i = b.limbs[0].0 & 1; + // b_{i+1} = (b_i - a * X_i) / 2 + if x_i != 0 { + b.wrapping_sub_assign(self); + } + b.shr1_assign(); + // Store the X_i bit in the result (x = x | (1 << X_i)) + x.set_bit_vartime(i, x_i != 0); + } + + (x, Choice::from(1)) + } + + /// Computes 1/`self` mod `2^k`. + /// This method is constant-time w.r.t. `self` but not `k`. + /// + /// If the inverse does not exist (`k > 0` and `self` is even), + /// returns `Choice::FALSE` as the second element of the tuple, + /// otherwise returns `Choice::TRUE`. + pub fn inv_mod2k_vartime(&self, k: u32) -> (Self, Choice) { + let mut x = Self::zero_with_precision(self.bits_precision()); // keeps `x` during iterations + let mut b = Self::one_with_precision(self.bits_precision()); // keeps `b_i` during iterations + // Additional temporary storage we will need. + let mut b_opt = Self::zero_with_precision(self.bits_precision()); + + // The inverse exists either if `k` is 0 or if `self` is odd. + let is_some = k.ct_eq(&0) | self.is_odd(); + + for i in 0..k { + // X_i = b_i mod 2 + let x_i = b.limbs[0].0 & 1; + let x_i_choice = Choice::from(x_i as u8); + // b_{i+1} = (b_i - a * X_i) / 2 + b_opt.as_words_mut().copy_from_slice(b.as_words()); + b_opt.wrapping_sub_assign(self); + b.ct_assign(&b_opt, x_i_choice); + b.shr1_assign(); + + // Store the X_i bit in the result (x = x | (1 << X_i)) + x.set_bit(i, x_i_choice); + } + + (x, is_some) + } + + /// Computes 1/`self` mod `2^k`. + /// + /// If the inverse does not exist (`k > 0` and `self` is even), + /// returns `Choice::FALSE` as the second element of the tuple, + /// otherwise returns `Choice::TRUE`. + pub fn inv_mod2k(&self, k: u32) -> (Self, Choice) { + let mut x = Self::zero_with_precision(self.bits_precision()); // keeps `x` during iterations + let mut b = Self::one_with_precision(self.bits_precision()); // keeps `b_i` during iterations + // Additional temporary storage we will need. + let mut b_opt = Self::zero_with_precision(self.bits_precision()); + // The inverse exists either if `k` is 0 or if `self` is odd. let is_some = k.ct_eq(&0) | self.is_odd(); @@ -33,7 +97,10 @@ impl BoxedUint { let x_i = b.limbs[0].0 & 1; let x_i_choice = Choice::from(x_i as u8); // b_{i+1} = (b_i - a * X_i) / 2 - b = Self::ct_select(&b, &b.wrapping_sub(self), x_i_choice).shr1(); + b_opt.as_words_mut().copy_from_slice(b.as_words()); + b_opt.wrapping_sub_assign(self); + b.ct_assign(&b_opt, x_i_choice); + b.shr1_assign(); // Store the X_i bit in the result (x = x | (1 << X_i)) // Don't change the result in dummy iterations. diff --git a/src/uint/boxed/sub.rs b/src/uint/boxed/sub.rs index 411123c0..bd0c1d08 100644 --- a/src/uint/boxed/sub.rs +++ b/src/uint/boxed/sub.rs @@ -27,6 +27,11 @@ impl BoxedUint { borrow } + /// Perform wrapping subtraction inplace, discarding overflow. + pub(crate) fn wrapping_sub_assign(&mut self, rhs: &Self) { + self.sbb_assign(rhs, Limb::ZERO); + } + /// Perform wrapping subtraction, discarding overflow. pub fn wrapping_sub(&self, rhs: &Self) -> Self { self.sbb(rhs, Limb::ZERO).0 diff --git a/src/uint/inv_mod.rs b/src/uint/inv_mod.rs index b818f411..235aa923 100644 --- a/src/uint/inv_mod.rs +++ b/src/uint/inv_mod.rs @@ -5,6 +5,46 @@ use crate::{ use subtle::CtOption; impl Uint { + /// Computes 1/`self` mod `2^k`. + /// This method is variable w.r.t. `self` and `k`. + /// + /// If the inverse does not exist (`k > 0` and `self` is even), + /// returns `ConstChoice::FALSE` as the second element of the tuple, + /// otherwise returns `ConstChoice::TRUE`. + pub(crate) const fn inv_mod2k_full_vartime(&self, k: u32) -> Option { + // Using the Algorithm 3 from "A Secure Algorithm for Inversion Modulo 2k" + // by Sadiel de la Fe and Carles Ferrer. + // See . + + // Note that we are not using Alrgorithm 4, since we have a different approach + // of enforcing constant-timeness w.r.t. `self`. + + let mut x = Self::ZERO; // keeps `x` during iterations + let mut b = Self::ONE; // keeps `b_i` during iterations + let mut i = 0; + + // The inverse exists either if `k` is 0 or if `self` is odd. + if k != 0 && !self.is_odd().to_bool_vartime() { + return None; + } + + while i < k { + // X_i = b_i mod 2 + let x_i = b.limbs[0].0 & 1; + // b_{i+1} = (b_i - a * X_i) / 2 + if x_i != 0 { + b = b.wrapping_sub(self); + } + b = b.shr1(); + // Store the X_i bit in the result (x = x | (1 << X_i)) + x = x.set_bit_vartime(i, x_i != 0); + + i += 1; + } + + Some(x) + } + /// Computes 1/`self` mod `2^k`. /// This method is constant-time w.r.t. `self` but not `k`. /// diff --git a/tests/boxed_uint.rs b/tests/boxed_uint.rs index 652a81c6..ccacea40 100644 --- a/tests/boxed_uint.rs +++ b/tests/boxed_uint.rs @@ -6,12 +6,13 @@ mod common; use common::to_biguint; use core::cmp::Ordering; -use crypto_bigint::{BoxedUint, CheckedAdd, Gcd, Integer, Limb, NonZero}; +use crypto_bigint::{BitOps, BoxedUint, CheckedAdd, Gcd, Integer, Limb, NonZero}; use num_bigint::BigUint; use num_integer::Integer as _; use num_modular::ModularUnaryOps; use num_traits::identities::One; use proptest::prelude::*; +use subtle::Choice; fn to_uint(big_uint: BigUint) -> BoxedUint { let bytes = big_uint.to_bytes_be(); @@ -155,6 +156,28 @@ proptest! { prop_assert_eq!(expected, actual); } + #[test] + fn inv_mod2k(mut a in uint(), k in any::()) { + a.set_bit(0, Choice::from(1)); // make odd + let k = k % (a.bits() + 1); + let a_bi = to_biguint(&a); + let m_bi = BigUint::one() << k as usize; + + let actual = a.inv_mod2k(k).0; + let (actual_vartime, exists) = a.inv_mod2k_vartime(k); + prop_assert!(bool::from(exists)); + prop_assert_eq!(&actual, &actual_vartime); + + if k == 0 { + prop_assert_eq!(&actual, &BoxedUint::zero_with_precision(a.bits_precision())); + } + else { + let inv_bi = to_biguint(&actual); + let res = (inv_bi * a_bi) % m_bi; + prop_assert_eq!(res, BigUint::one()); + } + } + #[test] fn mod_inv((a, mut b) in uint_pair()) { if b.is_even().into() { From c820291656fcf122c5b12266fa9d9fdb0e555b76 Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Fri, 28 Feb 2025 14:50:26 -0800 Subject: [PATCH 04/12] Make MontyForm constructors const --- src/modular/monty_form.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/modular/monty_form.rs b/src/modular/monty_form.rs index 964ea28e..84910592 100644 --- a/src/modular/monty_form.rs +++ b/src/modular/monty_form.rs @@ -14,7 +14,7 @@ use super::{ div_by_2::div_by_2, reduction::montgomery_reduction, }; -use crate::{Concat, Limb, Monty, NonZero, Odd, Split, Uint, Word}; +use crate::{Concat, ConstChoice, Limb, Monty, NonZero, Odd, Split, Uint, Word}; use subtle::{Choice, ConditionallySelectable, ConstantTimeEq}; /// Parameters to efficiently go to/from the Montgomery form for an odd modulus provided at runtime. @@ -41,7 +41,7 @@ where Uint: Split>, { /// Instantiates a new set of `MontyParams` representing the given odd `modulus`. - pub fn new(modulus: Odd>) -> Self { + pub const fn new(modulus: Odd>) -> Self { // `R mod modulus` where `R = 2^BITS`. // Represents 1 in Montgomery form. let one = Uint::MAX.rem(modulus.as_nz_ref()).wrapping_add(&Uint::ONE); @@ -55,12 +55,15 @@ where // The modular inverse should always exist, because it was ensured odd above, which also ensures it's non-zero let inv_mod = modulus + .as_ref() .inv_mod2k_vartime(Word::BITS) .expect("modular inverse should exist"); let mod_neg_inv = Limb(Word::MIN.wrapping_sub(inv_mod.limbs[0].0)); - let mod_leading_zeros = modulus.as_ref().leading_zeros().min(Word::BITS - 1); + let mod_leading_zeros = modulus.as_ref().leading_zeros(); + let mod_leading_zeros = ConstChoice::from_u32_lt(mod_leading_zeros, Word::BITS - 1) + .select_u32(Word::BITS - 1, mod_leading_zeros); // `R^3 mod modulus`, used for inversion in Montgomery form. let r3 = montgomery_reduction(&r2.square_wide(), &modulus, mod_neg_inv); @@ -78,7 +81,7 @@ where impl MontyParams { /// Instantiates a new set of `MontyParams` representing the given odd `modulus`. - pub fn new_vartime(modulus: Odd>) -> Self { + pub const fn new_vartime(modulus: Odd>) -> Self { // `R mod modulus` where `R = 2^BITS`. // Represents 1 in Montgomery form. let one = Uint::MAX @@ -90,12 +93,18 @@ impl MontyParams { // The modular inverse should always exist, because it was ensured odd above, which also ensures it's non-zero let inv_mod = modulus + .as_ref() .inv_mod2k_full_vartime(Word::BITS) .expect("modular inverse should exist"); let mod_neg_inv = Limb(Word::MIN.wrapping_sub(inv_mod.limbs[0].0)); - let mod_leading_zeros = modulus.as_ref().leading_zeros_vartime().min(Word::BITS - 1); + let mod_leading_zeros = modulus.as_ref().leading_zeros_vartime(); + let mod_leading_zeros = if mod_leading_zeros < Word::BITS - 1 { + mod_leading_zeros + } else { + Word::BITS - 1 + }; // `R^3 mod modulus`, used for inversion in Montgomery form. let r3 = montgomery_reduction(&r2.square_wide(), &modulus, mod_neg_inv); From 0abfc4b0e04acf6de9955660f8a6b02934b300a1 Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Tue, 25 Feb 2025 12:09:04 -0800 Subject: [PATCH 05/12] Use `sub_assign_mod_with_carry()` in `montgomery_reduction_boxed_mut()` --- src/modular/reduction.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/modular/reduction.rs b/src/modular/reduction.rs index d6d15b55..fc62a317 100644 --- a/src/modular/reduction.rs +++ b/src/modular/reduction.rs @@ -3,7 +3,7 @@ use crate::{Limb, Odd, Uint}; #[cfg(feature = "alloc")] -use {crate::BoxedUint, subtle::Choice}; +use crate::BoxedUint; /// Algorithm 14.32 in Handbook of Applied Cryptography #[inline(always)] @@ -84,15 +84,11 @@ pub(crate) fn montgomery_reduction_boxed_mut( let (lower, upper) = x.limbs.split_at_mut(modulus.nlimbs()); let meta_carry = montgomery_reduction_inner(upper, lower, &modulus.limbs, mod_neg_inv); + // Division is simply taking the upper half of the limbs + // Final reduction (at this point, the value is at most 2 * modulus, + // so `meta_carry` is either 0 or 1) out.limbs.copy_from_slice(upper); - let borrow = out.sbb_assign(modulus, Limb::ZERO); - - // The new `borrow = Word::MAX` iff `carry == 0` and `borrow == Word::MAX`. - let borrow = Limb((!meta_carry.0.wrapping_neg()) & borrow.0); - - // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise - // borrow = 0x000...000. Thus, we use it as a mask to conditionally add the modulus. - out.conditional_adc_assign(modulus, Choice::from((borrow.0 & 1) as u8)); + out.sub_assign_mod_with_carry(meta_carry, modulus, modulus); } /// Algorithm 14.32 in Handbook of Applied Cryptography From 0cc98266e497b44d52e3bb8c5784b3fd3838bc09 Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Tue, 25 Feb 2025 12:09:19 -0800 Subject: [PATCH 06/12] Get rid of allocations in Add/SubAssign for BoxedMontyForm --- src/modular/boxed_monty_form/add.rs | 5 ++--- src/modular/boxed_monty_form/sub.rs | 9 ++++++--- src/uint/boxed/add_mod.rs | 18 +++++++++++++----- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/modular/boxed_monty_form/add.rs b/src/modular/boxed_monty_form/add.rs index 85737195..42190609 100644 --- a/src/modular/boxed_monty_form/add.rs +++ b/src/modular/boxed_monty_form/add.rs @@ -58,9 +58,8 @@ impl Add for BoxedMontyForm { impl AddAssign<&BoxedMontyForm> for BoxedMontyForm { fn add_assign(&mut self, rhs: &BoxedMontyForm) { debug_assert_eq!(self.params, rhs.params); - self.montgomery_form = self - .montgomery_form - .add_mod(&rhs.montgomery_form, &self.params.modulus) + self.montgomery_form + .add_mod_assign(&rhs.montgomery_form, &self.params.modulus); } } diff --git a/src/modular/boxed_monty_form/sub.rs b/src/modular/boxed_monty_form/sub.rs index dc777d0c..3524da91 100644 --- a/src/modular/boxed_monty_form/sub.rs +++ b/src/modular/boxed_monty_form/sub.rs @@ -1,6 +1,7 @@ //! Subtractions between boxed integers in Montgomery form. use super::BoxedMontyForm; +use crate::Limb; use core::ops::{Sub, SubAssign}; impl BoxedMontyForm { @@ -51,9 +52,11 @@ impl Sub for BoxedMontyForm { impl SubAssign<&BoxedMontyForm> for BoxedMontyForm { fn sub_assign(&mut self, rhs: &BoxedMontyForm) { debug_assert_eq!(self.params, rhs.params); - self.montgomery_form = self - .montgomery_form - .sub_mod(&rhs.montgomery_form, &self.params.modulus) + self.montgomery_form.sub_assign_mod_with_carry( + Limb::ZERO, + &rhs.montgomery_form, + &self.params.modulus, + ); } } diff --git a/src/uint/boxed/add_mod.rs b/src/uint/boxed/add_mod.rs index 12bade5b..5b0054ce 100644 --- a/src/uint/boxed/add_mod.rs +++ b/src/uint/boxed/add_mod.rs @@ -7,22 +7,30 @@ impl BoxedUint { /// /// Assumes `self + rhs` as unbounded integer is `< 2p`. pub fn add_mod(&self, rhs: &Self, p: &Self) -> Self { + let mut result = self.clone(); + result.add_mod_assign(rhs, p); + result + } + + /// Computes `self + rhs mod p` and writes the result in `self`. + /// + /// Assumes `self + rhs` as unbounded integer is `< 2p`. + pub fn add_mod_assign(&mut self, rhs: &Self, p: &Self) { debug_assert_eq!(self.bits_precision(), p.bits_precision()); debug_assert_eq!(rhs.bits_precision(), p.bits_precision()); - debug_assert!(self < p); + debug_assert!(&*self < p); debug_assert!(rhs < p); - let (mut w, carry) = self.adc(rhs, Limb::ZERO); + let carry = self.adc_assign(rhs, Limb::ZERO); // Attempt to subtract the modulus, to ensure the result is in the field. - let borrow = w.sbb_assign(p, Limb::ZERO); + let borrow = self.sbb_assign(p, Limb::ZERO); let (_, borrow) = carry.sbb(Limb::ZERO, borrow); // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise // borrow = 0x000...000. Thus, we use it as a mask to conditionally add the // modulus. - w.conditional_adc_assign(p, !borrow.is_zero()); - w + self.conditional_adc_assign(p, !borrow.is_zero()); } /// Computes `self + self mod p`. From 354efe6727a06d2d17c3fbe1f87f9ff145870026 Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Tue, 25 Feb 2025 12:59:54 -0800 Subject: [PATCH 07/12] Use AMM for BoxedMontyForm multiplication --- src/modular/boxed_monty_form/mul.rs | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/modular/boxed_monty_form/mul.rs b/src/modular/boxed_monty_form/mul.rs index d8cc1676..961f5f8c 100644 --- a/src/modular/boxed_monty_form/mul.rs +++ b/src/modular/boxed_monty_form/mul.rs @@ -6,10 +6,7 @@ //! Originally (c) 2014 The Rust Project Developers, dual licensed Apache 2.0+MIT. use super::{BoxedMontyForm, BoxedMontyParams}; -use crate::{ - BoxedUint, Limb, Square, SquareAssign, Word, Zero, - modular::reduction::montgomery_reduction_boxed_mut, uint::mul::mul_limbs, -}; +use crate::{BoxedUint, Limb, Square, SquareAssign, Word, Zero}; use core::{ borrow::Borrow, ops::{Mul, MulAssign}, @@ -132,11 +129,8 @@ impl<'a> MontyMultiplier<'a> { /// Perform a Montgomery multiplication, assigning a fully reduced result to `a`. pub(super) fn mul_assign(&mut self, a: &mut BoxedUint, b: &BoxedUint) { - debug_assert_eq!(a.bits_precision(), self.modulus.bits_precision()); - debug_assert_eq!(b.bits_precision(), self.modulus.bits_precision()); - - mul_limbs(&a.limbs, &b.limbs, &mut self.product.limbs); - montgomery_reduction_boxed_mut(&mut self.product, self.modulus, self.mod_neg_inv, a); + self.mul_amm_assign(a, b); + a.sub_assign_mod_with_carry(Limb::ZERO, self.modulus, self.modulus); debug_assert!(&*a < self.modulus); } @@ -150,11 +144,8 @@ impl<'a> MontyMultiplier<'a> { /// Perform a squaring using Montgomery multiplication, assigning a fully reduced result to `a`. pub(super) fn square_assign(&mut self, a: &mut BoxedUint) { - debug_assert_eq!(a.bits_precision(), self.modulus.bits_precision()); - - // TODO(tarcieri): optimized implementation - mul_limbs(&a.limbs, &a.limbs, &mut self.product.limbs); - montgomery_reduction_boxed_mut(&mut self.product, self.modulus, self.mod_neg_inv, a); + self.square_amm_assign(a); + a.sub_assign_mod_with_carry(Limb::ZERO, self.modulus, self.modulus); debug_assert!(&*a < self.modulus); } @@ -211,6 +202,7 @@ impl<'a> MontyMultiplier<'a> { pub(super) fn square_amm_assign(&mut self, a: &mut BoxedUint) { debug_assert_eq!(a.bits_precision(), self.modulus.bits_precision()); + // TODO(tarcieri): optimized implementation self.clear_product(); almost_montgomery_mul( self.product.as_limbs_mut(), From c1dd82363dfa1716bbd04be486e1071afd49ef56 Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Tue, 25 Feb 2025 12:27:05 -0800 Subject: [PATCH 08/12] Use AMM for conversion to Montgomery and in `BoxedMontyParams` constructor --- src/modular/boxed_monty_form.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/modular/boxed_monty_form.rs b/src/modular/boxed_monty_form.rs index 3693b20a..7429ca5d 100644 --- a/src/modular/boxed_monty_form.rs +++ b/src/modular/boxed_monty_form.rs @@ -8,10 +8,9 @@ mod neg; mod pow; mod sub; -use super::{ - ConstMontyParams, Retrieve, div_by_2, - reduction::{montgomery_reduction_boxed, montgomery_reduction_boxed_mut}, -}; +use super::{ConstMontyParams, Retrieve, div_by_2, reduction::montgomery_reduction_boxed}; +use mul::MontyMultiplier; + use crate::{BoxedUint, Limb, Monty, Odd, Word}; use alloc::sync::Arc; use subtle::Choice; @@ -67,7 +66,10 @@ impl BoxedMontyParams { let mod_leading_zeros = modulus.as_ref().leading_zeros().min(Word::BITS - 1); - let r3 = montgomery_reduction_boxed(&mut r2.square(), &modulus, mod_neg_inv); + let r3 = { + let mut mm = MontyMultiplier::new(&modulus, mod_neg_inv); + mm.square(&r2) + }; Self { modulus, @@ -107,7 +109,10 @@ impl BoxedMontyParams { let mod_leading_zeros = modulus.as_ref().leading_zeros().min(Word::BITS - 1); - let r3 = montgomery_reduction_boxed(&mut r2.square(), &modulus, mod_neg_inv); + let r3 = { + let mut mm = MontyMultiplier::new(&modulus, mod_neg_inv); + mm.square(&r2) + }; Self { modulus, @@ -336,11 +341,8 @@ impl Zeroize for BoxedMontyForm { /// Convert the given integer into the Montgomery domain. #[inline] fn convert_to_montgomery(integer: &mut BoxedUint, params: &BoxedMontyParams) { - let mut product = integer.mul(¶ms.r2); - montgomery_reduction_boxed_mut(&mut product, ¶ms.modulus, params.mod_neg_inv, integer); - - #[cfg(feature = "zeroize")] - product.zeroize(); + let mut mm = MontyMultiplier::from(params); + mm.mul_assign(integer, ¶ms.r2); } #[cfg(test)] From d62e139e1d263df9ce312e846642509020ab2763 Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Wed, 5 Mar 2025 10:33:24 -0800 Subject: [PATCH 09/12] Use AMM in BoxedMontyForm::retrieve --- src/modular/boxed_monty_form.rs | 17 ++----- src/modular/boxed_monty_form/mul.rs | 69 ++++++++++++++++++++++++++++- src/modular/reduction.rs | 41 ----------------- 3 files changed, 71 insertions(+), 56 deletions(-) diff --git a/src/modular/boxed_monty_form.rs b/src/modular/boxed_monty_form.rs index 7429ca5d..5a414c6c 100644 --- a/src/modular/boxed_monty_form.rs +++ b/src/modular/boxed_monty_form.rs @@ -8,7 +8,7 @@ mod neg; mod pow; mod sub; -use super::{ConstMontyParams, Retrieve, div_by_2, reduction::montgomery_reduction_boxed}; +use super::{ConstMontyParams, Retrieve, div_by_2}; use mul::MontyMultiplier; use crate::{BoxedUint, Limb, Monty, Odd, Word}; @@ -187,19 +187,8 @@ impl BoxedMontyForm { /// Retrieves the integer currently encoded in this [`BoxedMontyForm`], guaranteed to be reduced. pub fn retrieve(&self) -> BoxedUint { - let mut montgomery_form = self.montgomery_form.widen(self.bits_precision() * 2); - - let ret = montgomery_reduction_boxed( - &mut montgomery_form, - &self.params.modulus, - self.params.mod_neg_inv, - ); - - #[cfg(feature = "zeroize")] - montgomery_form.zeroize(); - - debug_assert!(ret < self.params.modulus); - ret + let mut mm = MontyMultiplier::from(self.params.as_ref()); + mm.mul_by_one(&self.montgomery_form) } /// Instantiates a new `ConstMontyForm` that represents zero. diff --git a/src/modular/boxed_monty_form/mul.rs b/src/modular/boxed_monty_form/mul.rs index 961f5f8c..cd111498 100644 --- a/src/modular/boxed_monty_form/mul.rs +++ b/src/modular/boxed_monty_form/mul.rs @@ -6,7 +6,7 @@ //! Originally (c) 2014 The Rust Project Developers, dual licensed Apache 2.0+MIT. use super::{BoxedMontyForm, BoxedMontyParams}; -use crate::{BoxedUint, Limb, Square, SquareAssign, Word, Zero}; +use crate::{BoxedUint, ConstChoice, Limb, Square, SquareAssign, Word, Zero}; use core::{ borrow::Borrow, ops::{Mul, MulAssign}, @@ -135,6 +135,26 @@ impl<'a> MontyMultiplier<'a> { debug_assert!(&*a < self.modulus); } + /// Perform a Montgomery multiplication, assigning a fully reduced result to `a`. + pub(super) fn mul_by_one(&mut self, a: &BoxedUint) -> BoxedUint { + debug_assert_eq!(a.bits_precision(), self.modulus.bits_precision()); + + let mut ret = a.clone(); + + self.clear_product(); + almost_montgomery_mul_by_one( + self.product.as_limbs_mut(), + a.as_limbs(), + self.modulus.as_limbs(), + self.mod_neg_inv, + ); + ret.limbs + .copy_from_slice(&self.product.limbs[..a.limbs.len()]); + ret.sub_assign_mod_with_carry(Limb::ZERO, self.modulus, self.modulus); + + ret + } + /// Perform a squaring using Montgomery multiplication, returning a fully reduced result. pub(super) fn square(&mut self, a: &BoxedUint) -> BoxedUint { let mut ret = a.clone(); @@ -279,6 +299,53 @@ fn almost_montgomery_mul(z: &mut [Limb], x: &[Limb], y: &[Limb], m: &[Limb], k: } } +/// Same as `almost_montgomery_mul` with `y == 1`. +/// +/// Used for retrieving from Montgomery form. +fn almost_montgomery_mul_by_one(z: &mut [Limb], x: &[Limb], m: &[Limb], k: Limb) { + // This code assumes x, m are all the same length (required by addMulVVW and the for loop). + // It also assumes that x is already reduced mod m, or else the result will not be properly + // reduced. + let n = m.len(); + + // This preconditions check allows compiler to remove bound checks later in the code. + // `z.len() > n && z[n..].len() == n` is used intentionally instead of `z.len() == 2* n` + // since the latter prevents compiler from removing some bound checks. + let pre_cond = z.len() > n && z[n..].len() == n && x.len() == n && m.len() == n; + if !pre_cond { + panic!("Failed preconditions in montgomery_mul"); + } + + let mut c = ConstChoice::FALSE; + + // The unrolled first iteration. + let c2 = add_mul_vvw(&mut z[0..n], x, Limb::ONE); + let t = z[0].wrapping_mul(k); + let c3 = add_mul_vvw(&mut z[0..n], m, t); + let cx = c2.wrapping_add(Limb(c.to_u8() as Word)); + let cy = cx.wrapping_add(c3); + z[n] = cy; + c = ConstChoice::from_word_lt(cx.0, c2.0).or(ConstChoice::from_word_lt(cy.0, c3.0)); + + for i in 1..n { + let c2 = add_mul_vvw(&mut z[i..n + i], x, Limb::ZERO); + let t = z[i].wrapping_mul(k); + let c3 = add_mul_vvw(&mut z[i..n + i], m, t); + let cx = c2.wrapping_add(Limb(c.to_u8() as Word)); + let cy = cx.wrapping_add(c3); + z[n + i] = cy; + c = ConstChoice::from_word_lt(cx.0, c2.0).or(ConstChoice::from_word_lt(cy.0, c3.0)); + } + + let (lower, upper) = z.split_at_mut(n); + sub_vv(lower, upper, m); + + let is_zero = c.not(); + for (a, b) in lower.iter_mut().zip(upper.iter()) { + *a = Limb::select(*a, *b, is_zero); + } +} + #[inline] fn add_mul_vvw(z: &mut [Limb], x: &[Limb], y: Limb) -> Limb { let mut c = Limb::ZERO; diff --git a/src/modular/reduction.rs b/src/modular/reduction.rs index fc62a317..18f9b52b 100644 --- a/src/modular/reduction.rs +++ b/src/modular/reduction.rs @@ -2,9 +2,6 @@ use crate::{Limb, Odd, Uint}; -#[cfg(feature = "alloc")] -use crate::BoxedUint; - /// Algorithm 14.32 in Handbook of Applied Cryptography #[inline(always)] const fn montgomery_reduction_inner( @@ -67,41 +64,3 @@ pub const fn montgomery_reduction( // so `meta_carry` is either 0 or 1) upper.sub_mod_with_carry(meta_carry, &modulus.0, &modulus.0) } - -/// Algorithm 14.32 in Handbook of Applied Cryptography -/// -/// This version writes the result into the provided [`BoxedUint`]. -#[cfg(feature = "alloc")] -pub(crate) fn montgomery_reduction_boxed_mut( - x: &mut BoxedUint, - modulus: &BoxedUint, - mod_neg_inv: Limb, - out: &mut BoxedUint, -) { - debug_assert_eq!(x.nlimbs(), modulus.nlimbs() * 2); - debug_assert_eq!(out.nlimbs(), modulus.nlimbs()); - - let (lower, upper) = x.limbs.split_at_mut(modulus.nlimbs()); - let meta_carry = montgomery_reduction_inner(upper, lower, &modulus.limbs, mod_neg_inv); - - // Division is simply taking the upper half of the limbs - // Final reduction (at this point, the value is at most 2 * modulus, - // so `meta_carry` is either 0 or 1) - out.limbs.copy_from_slice(upper); - out.sub_assign_mod_with_carry(meta_carry, modulus, modulus); -} - -/// Algorithm 14.32 in Handbook of Applied Cryptography -/// -/// This version allocates and returns a [`BoxedUint`]. -#[cfg(feature = "alloc")] -#[inline] -pub(crate) fn montgomery_reduction_boxed( - x: &mut BoxedUint, - modulus: &BoxedUint, - mod_neg_inv: Limb, -) -> BoxedUint { - let mut ret = BoxedUint::zero_with_precision(modulus.bits_precision()); - montgomery_reduction_boxed_mut(x, modulus, mod_neg_inv, &mut ret); - ret -} From 6529b75ec5f11e7ba7d790239d7c7b298bb714b2 Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Wed, 5 Mar 2025 10:28:44 -0800 Subject: [PATCH 10/12] Make AMM functions const --- src/modular/boxed_monty_form/mul.rs | 91 ++++++++++++++++++----------- 1 file changed, 56 insertions(+), 35 deletions(-) diff --git a/src/modular/boxed_monty_form/mul.rs b/src/modular/boxed_monty_form/mul.rs index cd111498..2680d83b 100644 --- a/src/modular/boxed_monty_form/mul.rs +++ b/src/modular/boxed_monty_form/mul.rs @@ -6,12 +6,11 @@ //! Originally (c) 2014 The Rust Project Developers, dual licensed Apache 2.0+MIT. use super::{BoxedMontyForm, BoxedMontyParams}; -use crate::{BoxedUint, ConstChoice, Limb, Square, SquareAssign, Word, Zero}; +use crate::{BoxedUint, ConstChoice, Limb, Square, SquareAssign, Word}; use core::{ borrow::Borrow, ops::{Mul, MulAssign}, }; -use subtle::{ConditionallySelectable, ConstantTimeLess}; #[cfg(feature = "zeroize")] use zeroize::Zeroize; @@ -264,54 +263,55 @@ impl Drop for MontyMultiplier<'_> { /// /// Note: this was adapted from an implementation in `num-bigint`'s `monty.rs`. // TODO(tarcieri): refactor into `reduction.rs`, share impl with `MontyForm`? -fn almost_montgomery_mul(z: &mut [Limb], x: &[Limb], y: &[Limb], m: &[Limb], k: Limb) { +const fn almost_montgomery_mul(z: &mut [Limb], x: &[Limb], y: &[Limb], m: &[Limb], k: Limb) { // This code assumes x, y, m are all the same length (required by addMulVVW and the for loop). // It also assumes that x, y are already reduced mod m, or else the result will not be properly // reduced. let n = m.len(); // This preconditions check allows compiler to remove bound checks later in the code. - // `z.len() > n && z[n..].len() == n` is used intentionally instead of `z.len() == 2* n` - // since the latter prevents compiler from removing some bound checks. - let pre_cond = z.len() > n && z[n..].len() == n && x.len() == n && y.len() == n && m.len() == n; + let pre_cond = z.len() > n && x.len() == n && y.len() == n && m.len() == n; if !pre_cond { panic!("Failed preconditions in montgomery_mul"); } - let mut c = Limb::ZERO; + let mut c = ConstChoice::FALSE; - for i in 0..n { - let c2 = add_mul_vvw(&mut z[i..n + i], x, y[i]); - let t = z[i].wrapping_mul(k); - let c3 = add_mul_vvw(&mut z[i..n + i], m, t); - let cx = c.wrapping_add(c2); + let mut i = 0; + while i < n { + let (_, z_slice) = z.split_at_mut(i); + let c2 = add_mul_vvw(z_slice, x, y[i]); + let t = z_slice[0].wrapping_mul(k); + let c3 = add_mul_vvw(z_slice, m, t); + let cx = c2.wrapping_add(Limb(c.to_u8() as Word)); let cy = cx.wrapping_add(c3); z[n + i] = cy; - c = Limb((cx.ct_lt(&c2) | cy.ct_lt(&c3)).unwrap_u8() as Word); + c = ConstChoice::from_word_lt(cx.0, c2.0).or(ConstChoice::from_word_lt(cy.0, c3.0)); + i += 1; } let (lower, upper) = z.split_at_mut(n); sub_vv(lower, upper, m); - let is_zero = c.is_zero(); - for (a, b) in lower.iter_mut().zip(upper.iter()) { - a.conditional_assign(b, is_zero); + let is_zero = c.not(); + let mut i = 0; + while i < n { + lower[i] = Limb::select(lower[i], upper[i], is_zero); + i += 1; } } /// Same as `almost_montgomery_mul` with `y == 1`. /// /// Used for retrieving from Montgomery form. -fn almost_montgomery_mul_by_one(z: &mut [Limb], x: &[Limb], m: &[Limb], k: Limb) { +const fn almost_montgomery_mul_by_one(z: &mut [Limb], x: &[Limb], m: &[Limb], k: Limb) { // This code assumes x, m are all the same length (required by addMulVVW and the for loop). // It also assumes that x is already reduced mod m, or else the result will not be properly // reduced. let n = m.len(); // This preconditions check allows compiler to remove bound checks later in the code. - // `z.len() > n && z[n..].len() == n` is used intentionally instead of `z.len() == 2* n` - // since the latter prevents compiler from removing some bound checks. - let pre_cond = z.len() > n && z[n..].len() == n && x.len() == n && m.len() == n; + let pre_cond = z.len() > n && x.len() == n && m.len() == n; if !pre_cond { panic!("Failed preconditions in montgomery_mul"); } @@ -319,53 +319,74 @@ fn almost_montgomery_mul_by_one(z: &mut [Limb], x: &[Limb], m: &[Limb], k: Limb) let mut c = ConstChoice::FALSE; // The unrolled first iteration. - let c2 = add_mul_vvw(&mut z[0..n], x, Limb::ONE); + let c2 = add_mul_vvw(z, x, Limb::ONE); let t = z[0].wrapping_mul(k); - let c3 = add_mul_vvw(&mut z[0..n], m, t); + let c3 = add_mul_vvw(z, m, t); let cx = c2.wrapping_add(Limb(c.to_u8() as Word)); let cy = cx.wrapping_add(c3); z[n] = cy; c = ConstChoice::from_word_lt(cx.0, c2.0).or(ConstChoice::from_word_lt(cy.0, c3.0)); - for i in 1..n { - let c2 = add_mul_vvw(&mut z[i..n + i], x, Limb::ZERO); - let t = z[i].wrapping_mul(k); - let c3 = add_mul_vvw(&mut z[i..n + i], m, t); + let mut i = 1; + while i < n { + let (_, z_slice) = z.split_at_mut(i); + let c2 = add_mul_vvw(z_slice, x, Limb::ZERO); + let t = z_slice[0].wrapping_mul(k); + let c3 = add_mul_vvw(z_slice, m, t); let cx = c2.wrapping_add(Limb(c.to_u8() as Word)); let cy = cx.wrapping_add(c3); z[n + i] = cy; c = ConstChoice::from_word_lt(cx.0, c2.0).or(ConstChoice::from_word_lt(cy.0, c3.0)); + i += 1; } let (lower, upper) = z.split_at_mut(n); sub_vv(lower, upper, m); let is_zero = c.not(); - for (a, b) in lower.iter_mut().zip(upper.iter()) { - *a = Limb::select(*a, *b, is_zero); + let mut i = 0; + while i < n { + lower[i] = Limb::select(lower[i], upper[i], is_zero); + i += 1; } } #[inline] -fn add_mul_vvw(z: &mut [Limb], x: &[Limb], y: Limb) -> Limb { +const fn add_mul_vvw(z: &mut [Limb], x: &[Limb], y: Limb) -> Limb { + let n = x.len(); + if n > z.len() { + panic!("Failed preconditions in montgomery_mul"); + } + let mut c = Limb::ZERO; - for (zi, xi) in z.iter_mut().zip(x.iter()) { - let (z0, z1) = zi.mac(*xi, y, Limb::ZERO); + + let mut i = 0; + while i < n { + let (z0, z1) = z[i].mac(x[i], y, Limb::ZERO); let (zi_, c_) = z0.overflowing_add(c); - *zi = zi_; + z[i] = zi_; c = c_.wrapping_add(z1); + i += 1; } c } #[inline(always)] -fn sub_vv(z: &mut [Limb], x: &[Limb], y: &[Limb]) { +const fn sub_vv(z: &mut [Limb], x: &[Limb], y: &[Limb]) { + let n = z.len(); + if !(n == x.len() && n == y.len()) { + panic!("Failed preconditions in montgomery_mul"); + } + let mut borrow = Limb::ZERO; - for (i, (&xi, &yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) { - let (zi, new_borrow) = xi.sbb(yi, borrow); + + let mut i = 0; + while i < n { + let (zi, new_borrow) = x[i].sbb(y[i], borrow); z[i] = zi; borrow = new_borrow; + i += 1; } } From a3d464c8d15e3e5f1e6cb7454cf4b8404224722e Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Wed, 5 Mar 2025 12:43:56 -0800 Subject: [PATCH 11/12] Use canonical CIOS with N-sized buffer, expand comments. --- src/modular/boxed_monty_form/mul.rs | 211 +++++++++++++++------------- src/modular/boxed_monty_form/pow.rs | 19 ++- src/primitives.rs | 12 +- 3 files changed, 138 insertions(+), 104 deletions(-) diff --git a/src/modular/boxed_monty_form/mul.rs b/src/modular/boxed_monty_form/mul.rs index 2680d83b..e6bd183b 100644 --- a/src/modular/boxed_monty_form/mul.rs +++ b/src/modular/boxed_monty_form/mul.rs @@ -6,7 +6,7 @@ //! Originally (c) 2014 The Rust Project Developers, dual licensed Apache 2.0+MIT. use super::{BoxedMontyForm, BoxedMontyParams}; -use crate::{BoxedUint, ConstChoice, Limb, Square, SquareAssign, Word}; +use crate::{BoxedUint, ConstChoice, Limb, Square, SquareAssign}; use core::{ borrow::Borrow, ops::{Mul, MulAssign}, @@ -113,7 +113,7 @@ impl<'a> MontyMultiplier<'a> { /// Create a new Montgomery multiplier. pub(super) fn new(modulus: &'a BoxedUint, mod_neg_inv: Limb) -> Self { Self { - product: BoxedUint::zero_with_precision(modulus.bits_precision() * 2), + product: BoxedUint::zero_with_precision(modulus.bits_precision()), modulus, mod_neg_inv, } @@ -130,7 +130,6 @@ impl<'a> MontyMultiplier<'a> { pub(super) fn mul_assign(&mut self, a: &mut BoxedUint, b: &BoxedUint) { self.mul_amm_assign(a, b); a.sub_assign_mod_with_carry(Limb::ZERO, self.modulus, self.modulus); - debug_assert!(&*a < self.modulus); } @@ -147,9 +146,9 @@ impl<'a> MontyMultiplier<'a> { self.modulus.as_limbs(), self.mod_neg_inv, ); - ret.limbs - .copy_from_slice(&self.product.limbs[..a.limbs.len()]); - ret.sub_assign_mod_with_carry(Limb::ZERO, self.modulus, self.modulus); + ret.limbs.copy_from_slice(&self.product.limbs); + + // Note: no reduction is required, see the doc comment of `almost_montgomery_mul()`. ret } @@ -165,7 +164,6 @@ impl<'a> MontyMultiplier<'a> { pub(super) fn square_assign(&mut self, a: &mut BoxedUint) { self.square_amm_assign(a); a.sub_assign_mod_with_carry(Limb::ZERO, self.modulus, self.modulus); - debug_assert!(&*a < self.modulus); } @@ -197,8 +195,7 @@ impl<'a> MontyMultiplier<'a> { self.modulus.as_limbs(), self.mod_neg_inv, ); - a.limbs - .copy_from_slice(&self.product.limbs[..a.limbs.len()]); + a.limbs.copy_from_slice(&self.product.limbs); } /// Perform a squaring using "Almost Montgomery Multiplication". @@ -230,8 +227,7 @@ impl<'a> MontyMultiplier<'a> { self.modulus.as_limbs(), self.mod_neg_inv, ); - a.limbs - .copy_from_slice(&self.product.limbs[..a.limbs.len()]); + a.limbs.copy_from_slice(&self.product.limbs); } /// Clear the internal product buffer. @@ -250,140 +246,163 @@ impl Drop for MontyMultiplier<'_> { } } -/// Compute an "Almost Montgomery Multiplication (AMM)" as described in the paper -/// "Efficient Software Implementations of Modular Exponentiation" -/// -/// -/// Computes z mod m = x * y * 2 ** (-n*_W) mod m assuming k = -1/m mod 2**_W. -/// -/// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result z is guaranteed to -/// satisfy 0 <= z < 2**(n*_W), but it may not be < m. -/// -/// Output is written into the lower (i.e. first) half of `z`. -/// -/// Note: this was adapted from an implementation in `num-bigint`'s `monty.rs`. +/** +Computes Montgomery multiplication of `x` and `y` into `z`, that is +`z mod m = x * y * 2^(-n*W) mod m` assuming `k = -1/m mod 2^W`, +where `W` is the bit size of the limb, and `n * W` is the full bit size of the integer. + +NOTE: `z` is assumed to be pre-zeroized. + +This function implements the Coarsely Integrated Operand Scanning (CIOS) variation +of Montgomery multiplication, using the classification from +"Analyzing and Comparing Montgomery Multiplication Algorithms" by Koc et al +(). + +Additionally, unlike in Koc et al, we are reducing the final result only if it overflows +`2^(n*W)`, not when it overflows `m`. +This means that this function does not assume `x` and `y` are reduced `mod m`, +and the result will be correct `mod m`, but potentially greater than `m`, +and smaller than `2^(n * W) + m`. +See "Efficient Software Implementations of Modular Exponentiation" by S. Gueron for details +(). + +This function exhibits certain properties which were discovered via randomized tests, +but (to my knowledge at this moment) have not been proven formally. +Hereinafter I denote `f(x) = floor(x / m)`, that is `f` is the number of subtractions +of the modulus required to fully reduce `x`. + +1. In general, if `f(x) = k` and `f(y) = n`, then `f(AMM(x, y)) <= min(k, n) + 1`. + That is the "reduction error" grows with every operation, + but is determined by the argument with the lower error. +2. To retrieve the number from Montgomery form we MM it by 1. In this case `f(AMM(x, 1)) = 0`, + that is the result is always fully reduced regardless of `f(x)`. +3. `f(AMM(x, x)) <= 1` regardless of `f(x)`. That is, squaring resets the error to at most 1. +*/ // TODO(tarcieri): refactor into `reduction.rs`, share impl with `MontyForm`? -const fn almost_montgomery_mul(z: &mut [Limb], x: &[Limb], y: &[Limb], m: &[Limb], k: Limb) { - // This code assumes x, y, m are all the same length (required by addMulVVW and the for loop). - // It also assumes that x, y are already reduced mod m, or else the result will not be properly - // reduced. - let n = m.len(); +pub(crate) const fn almost_montgomery_mul( + z: &mut [Limb], + x: &[Limb], + y: &[Limb], + m: &[Limb], + k: Limb, +) { + let n = z.len(); // This preconditions check allows compiler to remove bound checks later in the code. - let pre_cond = z.len() > n && x.len() == n && y.len() == n && m.len() == n; - if !pre_cond { - panic!("Failed preconditions in montgomery_mul"); + if !(x.len() == n && y.len() == n && m.len() == n) { + panic!("Failed preconditions in `almost_montgomery_mul`"); } - let mut c = ConstChoice::FALSE; + let mut ts = Limb::ZERO; let mut i = 0; while i < n { - let (_, z_slice) = z.split_at_mut(i); - let c2 = add_mul_vvw(z_slice, x, y[i]); - let t = z_slice[0].wrapping_mul(k); - let c3 = add_mul_vvw(z_slice, m, t); - let cx = c2.wrapping_add(Limb(c.to_u8() as Word)); - let cy = cx.wrapping_add(c3); - z[n + i] = cy; - c = ConstChoice::from_word_lt(cx.0, c2.0).or(ConstChoice::from_word_lt(cy.0, c3.0)); - i += 1; - } + let mut c = add_mul_carry(z, x, y[i]); + (ts, c) = ts.overflowing_add(c); + let ts1 = c; - let (lower, upper) = z.split_at_mut(n); - sub_vv(lower, upper, m); + let t = z[0].wrapping_mul(k); + + c = add_mul_carry_and_shift(z, m, t); + (z[n - 1], c) = ts.overflowing_add(c); + ts = ts1.wrapping_add(c); - let is_zero = c.not(); - let mut i = 0; - while i < n { - lower[i] = Limb::select(lower[i], upper[i], is_zero); i += 1; } + + // If the result overflows the integer size, subtract the modulus. + let overflow = ConstChoice::from_word_lsb(ts.0); + conditional_sub(z, m, overflow); } -/// Same as `almost_montgomery_mul` with `y == 1`. +/// Same as `almost_montgomery_mul()` with `y == 1`. /// /// Used for retrieving from Montgomery form. -const fn almost_montgomery_mul_by_one(z: &mut [Limb], x: &[Limb], m: &[Limb], k: Limb) { - // This code assumes x, m are all the same length (required by addMulVVW and the for loop). - // It also assumes that x is already reduced mod m, or else the result will not be properly - // reduced. - let n = m.len(); +pub(crate) const fn almost_montgomery_mul_by_one(z: &mut [Limb], x: &[Limb], m: &[Limb], k: Limb) { + let n = z.len(); // This preconditions check allows compiler to remove bound checks later in the code. - let pre_cond = z.len() > n && x.len() == n && m.len() == n; - if !pre_cond { - panic!("Failed preconditions in montgomery_mul"); + if !(x.len() == n && m.len() == n) { + panic!("Failed preconditions in `almost_montgomery_mul_by_one`"); } - let mut c = ConstChoice::FALSE; + let mut ts = Limb::ZERO; - // The unrolled first iteration. - let c2 = add_mul_vvw(z, x, Limb::ONE); - let t = z[0].wrapping_mul(k); - let c3 = add_mul_vvw(z, m, t); - let cx = c2.wrapping_add(Limb(c.to_u8() as Word)); - let cy = cx.wrapping_add(c3); - z[n] = cy; - c = ConstChoice::from_word_lt(cx.0, c2.0).or(ConstChoice::from_word_lt(cy.0, c3.0)); - - let mut i = 1; + let mut i = 0; while i < n { - let (_, z_slice) = z.split_at_mut(i); - let c2 = add_mul_vvw(z_slice, x, Limb::ZERO); - let t = z_slice[0].wrapping_mul(k); - let c3 = add_mul_vvw(z_slice, m, t); - let cx = c2.wrapping_add(Limb(c.to_u8() as Word)); - let cy = cx.wrapping_add(c3); - z[n + i] = cy; - c = ConstChoice::from_word_lt(cx.0, c2.0).or(ConstChoice::from_word_lt(cy.0, c3.0)); + let mut c = if i == 0 { + add_mul_carry(z, x, Limb::ONE) + } else { + Limb::ZERO + }; + (ts, c) = ts.overflowing_add(c); + let ts1 = c; + + let t = z[0].wrapping_mul(k); + + c = add_mul_carry_and_shift(z, m, t); + (z[n - 1], c) = ts.overflowing_add(c); + ts = ts1.wrapping_add(c); + i += 1; } - let (lower, upper) = z.split_at_mut(n); - sub_vv(lower, upper, m); + // If the result overflows the integer size, subtract the modulus. + let overflow = ConstChoice::from_word_lsb(ts.0); + conditional_sub(z, m, overflow); +} + +/// Calcaultes `z += x * y` and returns the carry. +#[inline] +const fn add_mul_carry(z: &mut [Limb], x: &[Limb], y: Limb) -> Limb { + let n = z.len(); + if n != x.len() { + panic!("Failed preconditions in `add_mul_carry`"); + } - let is_zero = c.not(); + let mut c = Limb::ZERO; let mut i = 0; while i < n { - lower[i] = Limb::select(lower[i], upper[i], is_zero); + (z[i], c) = z[i].mac(x[i], y, c); i += 1; } + c } +/// Calcaultes `z = (z + x * y) / 2^W` and returns the carry (of the `z + x * y`). #[inline] -const fn add_mul_vvw(z: &mut [Limb], x: &[Limb], y: Limb) -> Limb { - let n = x.len(); - if n > z.len() { - panic!("Failed preconditions in montgomery_mul"); +const fn add_mul_carry_and_shift(z: &mut [Limb], x: &[Limb], y: Limb) -> Limb { + let n = z.len(); + if n != x.len() { + panic!("Failed preconditions in `add_mul_carry_and_shift`"); } - let mut c = Limb::ZERO; + let (_, mut c) = z[0].mac(x[0], y, Limb::ZERO); - let mut i = 0; - while i < n { - let (z0, z1) = z[i].mac(x[i], y, Limb::ZERO); - let (zi_, c_) = z0.overflowing_add(c); - z[i] = zi_; - c = c_.wrapping_add(z1); + let mut i = 1; + let mut i1 = 0; + // Help the compiler elide bound checking + while i < n && i1 < n { + (z[i1], c) = z[i].mac(x[i], y, c); i += 1; + i1 += 1; } c } +/// Calculates `z -= x` if `c` is truthy, otherwise `z` is unchanged. #[inline(always)] -const fn sub_vv(z: &mut [Limb], x: &[Limb], y: &[Limb]) { +const fn conditional_sub(z: &mut [Limb], x: &[Limb], c: ConstChoice) { let n = z.len(); - if !(n == x.len() && n == y.len()) { - panic!("Failed preconditions in montgomery_mul"); + if n != x.len() { + panic!("Failed preconditions in `conditional_sub`"); } let mut borrow = Limb::ZERO; - let mut i = 0; while i < n { - let (zi, new_borrow) = x[i].sbb(y[i], borrow); + let (zi, new_borrow) = z[i].sbb(Limb(c.if_true_word(x[i].0)), borrow); z[i] = zi; borrow = new_borrow; i += 1; diff --git a/src/modular/boxed_monty_form/pow.rs b/src/modular/boxed_monty_form/pow.rs index 72f95e68..9a2c70a3 100644 --- a/src/modular/boxed_monty_form/pow.rs +++ b/src/modular/boxed_monty_form/pow.rs @@ -111,12 +111,21 @@ fn pow_montgomery_form( } } - // Ensure output is properly reduced: AMM only reduces to the bit length of `modulus` - // See RustCrypto/crypto-bigint#441 - z.conditional_sbb_assign(modulus, !z.ct_lt(modulus)); + // Ensure the output is properly reduced. + // + // Using the properties of `almost_mongtomery_mul()` (see its documentation): + // - We have an incoming `x` which is fully reduced (`floor(x / modulus) = 0`). + // - We build an array of `powers` which are produced by multiplying the previous power by `x`, + // so for each power `floor(power / modulus) <= 1`. + // - Then we take turns squaring the accumulator `z` (bringing `floor(z / modulus)` to 1 + // regardless of the previous reduction level) and multiplying by a power of `x` + // (bringing `floor(z / modulus)` to at most 2). + // - Then we either exit the loop, or square again, which brings `floor(z / modulus)` back to 1. + // + // Now that we exited the loop, we need to reduce `z` at most twice + // to bring it within `[0, modulus)`. - // Subtract again to ensure output is fully reduced - // See RustCrypto/crypto-bigint#455 and golang.org/issue/13907 + z.conditional_sbb_assign(modulus, !z.ct_lt(modulus)); z.conditional_sbb_assign(modulus, !z.ct_lt(modulus)); debug_assert!(&z < modulus); diff --git a/src/primitives.rs b/src/primitives.rs index 3a0ae58e..731cf55f 100644 --- a/src/primitives.rs +++ b/src/primitives.rs @@ -61,7 +61,13 @@ pub(crate) const fn mac(a: Word, b: Word, c: Word, carry: Word) -> (Word, Word) let a = a as WideWord; let b = b as WideWord; let c = c as WideWord; - let carry = carry as WideWord; - let ret = a + (b * c) + carry; - (ret as Word, (ret >> Word::BITS) as Word) + let ret = a + (b * c); + let (lo, hi) = (ret as Word, (ret >> Word::BITS) as Word); + + let (lo, c) = lo.overflowing_add(carry); + + // Even if all the arguments are `Word::MAX` we can't overflow `hi`. + let hi = hi.wrapping_add(c as Word); + + (lo, hi) } From 8dc0d4a88781ec0aed85c41418c27b64cef0d5cd Mon Sep 17 00:00:00 2001 From: Bogdan Opanchuk Date: Wed, 5 Mar 2025 12:39:54 -0800 Subject: [PATCH 12/12] Expose MontyMultiplier --- src/modular/boxed_monty_form.rs | 22 +++++++++++---- src/modular/boxed_monty_form/mul.rs | 43 +++++++++++++++++++---------- src/modular/boxed_monty_form/pow.rs | 4 +-- src/modular/monty_form.rs | 7 +++++ src/modular/monty_form/mul.rs | 38 +++++++++++++++++++++++-- src/traits.rs | 25 +++++++++++++++++ 6 files changed, 116 insertions(+), 23 deletions(-) diff --git a/src/modular/boxed_monty_form.rs b/src/modular/boxed_monty_form.rs index 5a414c6c..1ee2abc0 100644 --- a/src/modular/boxed_monty_form.rs +++ b/src/modular/boxed_monty_form.rs @@ -9,7 +9,7 @@ mod pow; mod sub; use super::{ConstMontyParams, Retrieve, div_by_2}; -use mul::MontyMultiplier; +use mul::BoxedMontyMultiplier; use crate::{BoxedUint, Limb, Monty, Odd, Word}; use alloc::sync::Arc; @@ -67,7 +67,7 @@ impl BoxedMontyParams { let mod_leading_zeros = modulus.as_ref().leading_zeros().min(Word::BITS - 1); let r3 = { - let mut mm = MontyMultiplier::new(&modulus, mod_neg_inv); + let mut mm = BoxedMontyMultiplier::new(&modulus, mod_neg_inv); mm.square(&r2) }; @@ -110,7 +110,7 @@ impl BoxedMontyParams { let mod_leading_zeros = modulus.as_ref().leading_zeros().min(Word::BITS - 1); let r3 = { - let mut mm = MontyMultiplier::new(&modulus, mod_neg_inv); + let mut mm = BoxedMontyMultiplier::new(&modulus, mod_neg_inv); mm.square(&r2) }; @@ -187,7 +187,7 @@ impl BoxedMontyForm { /// Retrieves the integer currently encoded in this [`BoxedMontyForm`], guaranteed to be reduced. pub fn retrieve(&self) -> BoxedUint { - let mut mm = MontyMultiplier::from(self.params.as_ref()); + let mut mm = BoxedMontyMultiplier::from(self.params.as_ref()); mm.mul_by_one(&self.montgomery_form) } @@ -277,6 +277,7 @@ impl Retrieve for BoxedMontyForm { impl Monty for BoxedMontyForm { type Integer = BoxedUint; type Params = BoxedMontyParams; + type Multiplier<'a> = BoxedMontyMultiplier<'a>; fn new_params_vartime(modulus: Odd) -> Self::Params { BoxedMontyParams::new_vartime(modulus) @@ -302,6 +303,17 @@ impl Monty for BoxedMontyForm { &self.montgomery_form } + fn copy_montgomery_from(&mut self, other: &Self) { + debug_assert_eq!( + self.montgomery_form.bits_precision(), + other.montgomery_form.bits_precision() + ); + debug_assert_eq!(self.params, other.params); + self.montgomery_form + .limbs + .copy_from_slice(&other.montgomery_form.limbs); + } + fn double(&self) -> Self { BoxedMontyForm::double(self) } @@ -330,7 +342,7 @@ impl Zeroize for BoxedMontyForm { /// Convert the given integer into the Montgomery domain. #[inline] fn convert_to_montgomery(integer: &mut BoxedUint, params: &BoxedMontyParams) { - let mut mm = MontyMultiplier::from(params); + let mut mm = BoxedMontyMultiplier::from(params); mm.mul_assign(integer, ¶ms.r2); } diff --git a/src/modular/boxed_monty_form/mul.rs b/src/modular/boxed_monty_form/mul.rs index e6bd183b..d87d1ffd 100644 --- a/src/modular/boxed_monty_form/mul.rs +++ b/src/modular/boxed_monty_form/mul.rs @@ -6,7 +6,7 @@ //! Originally (c) 2014 The Rust Project Developers, dual licensed Apache 2.0+MIT. use super::{BoxedMontyForm, BoxedMontyParams}; -use crate::{BoxedUint, ConstChoice, Limb, Square, SquareAssign}; +use crate::{BoxedUint, ConstChoice, Limb, MontyMultiplier, Square, SquareAssign}; use core::{ borrow::Borrow, ops::{Mul, MulAssign}, @@ -19,7 +19,7 @@ impl BoxedMontyForm { /// Multiplies by `rhs`. pub fn mul(&self, rhs: &Self) -> Self { debug_assert_eq!(&self.params, &rhs.params); - let montgomery_form = MontyMultiplier::from(self.params.borrow()) + let montgomery_form = BoxedMontyMultiplier::from(self.params.borrow()) .mul(&self.montgomery_form, &rhs.montgomery_form); Self { @@ -31,7 +31,7 @@ impl BoxedMontyForm { /// Computes the (reduced) square. pub fn square(&self) -> Self { let montgomery_form = - MontyMultiplier::from(self.params.borrow()).square(&self.montgomery_form); + BoxedMontyMultiplier::from(self.params.borrow()).square(&self.montgomery_form); Self { montgomery_form, @@ -79,7 +79,7 @@ impl MulAssign for BoxedMontyForm { impl MulAssign<&BoxedMontyForm> for BoxedMontyForm { fn mul_assign(&mut self, rhs: &BoxedMontyForm) { debug_assert_eq!(&self.params, &rhs.params); - MontyMultiplier::from(self.params.borrow()) + BoxedMontyMultiplier::from(self.params.borrow()) .mul_assign(&mut self.montgomery_form, &rhs.montgomery_form); } } @@ -92,24 +92,39 @@ impl Square for BoxedMontyForm { impl SquareAssign for BoxedMontyForm { fn square_assign(&mut self) { - MontyMultiplier::from(self.params.borrow()).square_assign(&mut self.montgomery_form); - } -} - -impl<'a> From<&'a BoxedMontyParams> for MontyMultiplier<'a> { - fn from(params: &'a BoxedMontyParams) -> MontyMultiplier<'a> { - MontyMultiplier::new(¶ms.modulus, params.mod_neg_inv) + BoxedMontyMultiplier::from(self.params.borrow()).square_assign(&mut self.montgomery_form); } } /// Montgomery multiplier with a pre-allocated internal buffer to avoid additional allocations. -pub(super) struct MontyMultiplier<'a> { +#[derive(Debug, Clone)] +pub struct BoxedMontyMultiplier<'a> { product: BoxedUint, modulus: &'a BoxedUint, mod_neg_inv: Limb, } -impl<'a> MontyMultiplier<'a> { +impl<'a> From<&'a BoxedMontyParams> for BoxedMontyMultiplier<'a> { + fn from(params: &'a BoxedMontyParams) -> BoxedMontyMultiplier<'a> { + BoxedMontyMultiplier::new(¶ms.modulus, params.mod_neg_inv) + } +} + +impl<'a> MontyMultiplier<'a> for BoxedMontyMultiplier<'a> { + type Monty = BoxedMontyForm; + + /// Performs a Montgomery multiplication, assigning a fully reduced result to `lhs`. + fn mul_assign(&mut self, lhs: &mut Self::Monty, rhs: &Self::Monty) { + self.mul_assign(&mut lhs.montgomery_form, &rhs.montgomery_form); + } + + /// Performs a Montgomery squaring, assigning a fully reduced result to `lhs`. + fn square_assign(&mut self, lhs: &mut Self::Monty) { + self.square_assign(&mut lhs.montgomery_form); + } +} + +impl<'a> BoxedMontyMultiplier<'a> { /// Create a new Montgomery multiplier. pub(super) fn new(modulus: &'a BoxedUint, mod_neg_inv: Limb) -> Self { Self { @@ -240,7 +255,7 @@ impl<'a> MontyMultiplier<'a> { } #[cfg(feature = "zeroize")] -impl Drop for MontyMultiplier<'_> { +impl Drop for BoxedMontyMultiplier<'_> { fn drop(&mut self) { self.product.zeroize(); } diff --git a/src/modular/boxed_monty_form/pow.rs b/src/modular/boxed_monty_form/pow.rs index 9a2c70a3..93fd5d67 100644 --- a/src/modular/boxed_monty_form/pow.rs +++ b/src/modular/boxed_monty_form/pow.rs @@ -1,6 +1,6 @@ //! Modular exponentiation support for [`BoxedMontyForm`]. -use super::{BoxedMontyForm, mul::MontyMultiplier}; +use super::{BoxedMontyForm, mul::BoxedMontyMultiplier}; use crate::{BoxedUint, ConstantTimeSelect, Limb, PowBoundedExp, Word}; use alloc::vec::Vec; use subtle::{ConstantTimeEq, ConstantTimeLess}; @@ -60,7 +60,7 @@ fn pow_montgomery_form( const WINDOW: u32 = 4; const WINDOW_MASK: Word = (1 << WINDOW) - 1; - let mut multiplier = MontyMultiplier::new(modulus, mod_neg_inv); + let mut multiplier = BoxedMontyMultiplier::new(modulus, mod_neg_inv); // powers[i] contains x^i let mut powers = Vec::with_capacity(1 << WINDOW); diff --git a/src/modular/monty_form.rs b/src/modular/monty_form.rs index 84910592..7e3f8905 100644 --- a/src/modular/monty_form.rs +++ b/src/modular/monty_form.rs @@ -15,6 +15,7 @@ use super::{ reduction::montgomery_reduction, }; use crate::{Concat, ConstChoice, Limb, Monty, NonZero, Odd, Split, Uint, Word}; +use mul::DynMontyMultiplier; use subtle::{Choice, ConditionallySelectable, ConstantTimeEq}; /// Parameters to efficiently go to/from the Montgomery form for an odd modulus provided at runtime. @@ -271,6 +272,7 @@ impl Retrieve for MontyForm { impl Monty for MontyForm { type Integer = Uint; type Params = MontyParams; + type Multiplier<'a> = DynMontyMultiplier<'a, LIMBS>; fn new_params_vartime(modulus: Odd) -> Self::Params { MontyParams::new_vartime(modulus) @@ -296,6 +298,11 @@ impl Monty for MontyForm { &self.montgomery_form } + fn copy_montgomery_from(&mut self, other: &Self) { + debug_assert_eq!(self.params, other.params); + self.montgomery_form = other.montgomery_form; + } + fn double(&self) -> Self { MontyForm::double(self) } diff --git a/src/modular/monty_form/mul.rs b/src/modular/monty_form/mul.rs index 11344ed2..710c719e 100644 --- a/src/modular/monty_form/mul.rs +++ b/src/modular/monty_form/mul.rs @@ -2,8 +2,11 @@ use super::MontyForm; use crate::{ - Square, SquareAssign, - modular::mul::{mul_montgomery_form, square_montgomery_form}, + MontyMultiplier, Square, SquareAssign, + modular::{ + MontyParams, + mul::{mul_montgomery_form, square_montgomery_form}, + }, }; use core::ops::{Mul, MulAssign}; @@ -88,3 +91,34 @@ impl SquareAssign for MontyForm { *self = self.square() } } + +#[derive(Debug, Clone, Copy)] +pub struct DynMontyMultiplier<'a, const LIMBS: usize>(&'a MontyParams); + +impl<'a, const LIMBS: usize> From<&'a MontyParams> for DynMontyMultiplier<'a, LIMBS> { + fn from(source: &'a MontyParams) -> Self { + Self(source) + } +} + +impl<'a, const LIMBS: usize> MontyMultiplier<'a> for DynMontyMultiplier<'a, LIMBS> { + type Monty = MontyForm; + + /// Performs a Montgomery multiplication, assigning a fully reduced result to `lhs`. + fn mul_assign(&mut self, lhs: &mut Self::Monty, rhs: &Self::Monty) { + let product = mul_montgomery_form( + &lhs.montgomery_form, + &rhs.montgomery_form, + &self.0.modulus, + self.0.mod_neg_inv, + ); + lhs.montgomery_form = product; + } + + /// Performs a Montgomery squaring, assigning a fully reduced result to `lhs`. + fn square_assign(&mut self, lhs: &mut Self::Monty) { + let product = + square_montgomery_form(&lhs.montgomery_form, &self.0.modulus, self.0.mod_neg_inv); + lhs.montgomery_form = product; + } +} diff --git a/src/traits.rs b/src/traits.rs index 2c60c544..949069fa 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -870,6 +870,9 @@ pub trait Monty: /// The original integer type. type Integer: Integer; + /// Prepared Montgomery multiplier for tight loops. + type Multiplier<'a>: Debug + Clone + MontyMultiplier<'a, Monty = Self>; + /// The precomputed data needed for this representation. type Params: 'static + Clone + Debug + Eq + Sized + Send + Sync; @@ -892,6 +895,10 @@ pub trait Monty: /// Access the value in Montgomery form. fn as_montgomery(&self) -> &Self::Integer; + /// Copy the Montgomery representation from `other` into `self`. + /// NOTE: the parameters remain unchanged. + fn copy_montgomery_from(&mut self, other: &Self); + /// Performs doubling, returning `self + self`. fn double(&self) -> Self; @@ -913,3 +920,21 @@ pub trait Monty: /// Montgomery parameters. fn lincomb_vartime(products: &[(&Self, &Self)]) -> Self; } + +/// Prepared Montgomery multiplier for tight loops. +/// +/// Allows one to perform inplace multiplication without allocations +/// (important for the `BoxedUint` case). +/// +/// NOTE: You will be operating with Montgomery represntations directly, +/// make sure they all correspond to the same set of parameters. +pub trait MontyMultiplier<'a>: From<&'a ::Params> { + /// The associated Montgomery-representation integer. + type Monty: Monty; + + /// Performs a Montgomery multiplication, assigning a fully reduced result to `lhs`. + fn mul_assign(&mut self, lhs: &mut Self::Monty, rhs: &Self::Monty); + + /// Performs a Montgomery squaring, assigning a fully reduced result to `lhs`. + fn square_assign(&mut self, lhs: &mut Self::Monty); +}