From 1820a5584b9bc606d0b5d12f6a60dfc62e98fe96 Mon Sep 17 00:00:00 2001 From: Mike <45243121+tankf33der@users.noreply.github.com> Date: Fri, 5 Sep 2025 16:47:25 +0300 Subject: [PATCH] math.big: replace division with Knuth, improve performance (#25242) --- vlib/math/big/array_ops.v | 35 ++-- vlib/math/big/array_ops_test.v | 20 +-- vlib/math/big/division_array_ops.v | 217 +++++++++++------------- vlib/math/big/division_array_ops_test.v | 97 +---------- vlib/math/big/integer.v | 4 +- 5 files changed, 135 insertions(+), 238 deletions(-) diff --git a/vlib/math/big/array_ops.v b/vlib/math/big/array_ops.v index dc5f535d1a..105faa0bf6 100644 --- a/vlib/math/big/array_ops.v +++ b/vlib/math/big/array_ops.v @@ -181,10 +181,7 @@ fn divide_digit_array(operand_a []u64, operand_b []u64, mut quotient []u64, mut cmp_result := compare_digit_array(operand_a, operand_b) // a == b => q, r = 1, 0 if cmp_result == 0 { - quotient << 1 - for quotient.len > 1 { - quotient.delete_last() - } + quotient[0] = 1 remainder.clear() return } @@ -192,7 +189,9 @@ fn divide_digit_array(operand_a []u64, operand_b []u64, mut quotient []u64, mut // a < b => q, r = 0, a if cmp_result < 0 { quotient.clear() - remainder << operand_a + for i in 0 .. operand_a.len { + remainder[i] = operand_a[i] + } return } if operand_b.len == 1 { @@ -210,38 +209,36 @@ fn divide_array_by_digit(operand_a []u64, divisor u64, mut quotient []u64, mut r // 1 digit for both dividend and divisor dividend := operand_a[0] q := dividend / divisor - if q != 0 { - quotient << q - } + quotient[0] = q + rem := dividend % divisor - if rem != 0 { - remainder << rem - } + remainder[0] = rem + + shrink_tail_zeros(mut quotient) + shrink_tail_zeros(mut remainder) return } // Dividend has more digits mut rem := u64(0) mut quo := u64(0) - mut qtemp := []u64{len: quotient.cap} - divisor64 := u64(divisor) // Perform division step by step for index := operand_a.len - 1; index >= 0; index-- { hi := rem >> (64 - digit_bits) lo := rem << digit_bits | operand_a[index] - quo, rem = bits.div_64(hi, lo, divisor64) - qtemp[index] = quo & max_digit + quo, rem = bits.div_64(hi, lo, divisor) + quotient[index] = quo & max_digit } + // Remove leading zeros from quotient - shrink_tail_zeros(mut qtemp) - quotient << qtemp - remainder << rem + shrink_tail_zeros(mut quotient) + remainder[0] = rem shrink_tail_zeros(mut remainder) } @[inline] fn divide_array_by_array(operand_a []u64, operand_b []u64, mut quotient []u64, mut remainder []u64) { - binary_divide_array_by_array(operand_a, operand_b, mut quotient, mut remainder) + knuth_divide_array_by_array(operand_a, operand_b, mut quotient, mut remainder) } // Shifts the contents of the original array by the given amount of bits to the left. diff --git a/vlib/math/big/array_ops_test.v b/vlib/math/big/array_ops_test.v index 88441a6103..e6a0fbbc1a 100644 --- a/vlib/math/big/array_ops_test.v +++ b/vlib/math/big/array_ops_test.v @@ -136,8 +136,8 @@ fn test_compare_digit_array_02() { fn test_divide_digit_array_01() { a := [u64(14)] b := [u64(2)] - mut q := []u64{cap: 1} - mut r := []u64{cap: 1} + mut q := []u64{len: 1} + mut r := []u64{len: 1} divide_digit_array(a, b, mut q, mut r) assert q == [u64(7)] @@ -147,8 +147,8 @@ fn test_divide_digit_array_01() { fn test_divide_digit_array_02() { a := [u64(14)] b := [u64(15)] - mut q := []u64{cap: 1} - mut r := []u64{cap: 1} + mut q := []u64{len: 1} + mut r := []u64{len: 1} divide_digit_array(a, b, mut q, mut r) assert q == []u64{len: 0} @@ -158,8 +158,8 @@ fn test_divide_digit_array_02() { fn test_divide_digit_array_03() { a := [u64(0), 4] b := [u64(0), 1] - mut q := []u64{cap: a.len - b.len + 1} - mut r := []u64{cap: a.len} + mut q := []u64{len: a.len - b.len + 1} + mut r := []u64{len: a.len} divide_digit_array(a, b, mut q, mut r) assert q == [u64(4)] @@ -169,8 +169,8 @@ fn test_divide_digit_array_03() { fn test_divide_digit_array_04() { a := [u64(2), 4] b := [u64(0), 1] - mut q := []u64{cap: a.len - b.len + 1} - mut r := []u64{cap: a.len} + mut q := []u64{len: a.len - b.len + 1} + mut r := []u64{len: a.len} divide_digit_array(a, b, mut q, mut r) assert q == [u64(4)] @@ -180,8 +180,8 @@ fn test_divide_digit_array_04() { fn test_divide_digit_array_05() { a := [u64(3)] b := [u64(2)] - mut q := []u64{cap: a.len - b.len + 1} - mut r := []u64{cap: a.len} + mut q := []u64{len: a.len - b.len + 1} + mut r := []u64{len: a.len} divide_digit_array(a, b, mut q, mut r) assert q == [u64(1)] diff --git a/vlib/math/big/division_array_ops.v b/vlib/math/big/division_array_ops.v index 81802f9ed5..66912ca35b 100644 --- a/vlib/math/big/division_array_ops.v +++ b/vlib/math/big/division_array_ops.v @@ -2,57 +2,6 @@ module big import math.bits -// suppose operand_a bigger than operand_b and both not null. -// Both quotient and remaider are allocated but of length 0 -@[direct_array_access] -fn binary_divide_array_by_array(operand_a []u64, operand_b []u64, mut quotient []u64, mut remainder []u64) { - remainder << operand_a - - len_diff := operand_a.len - operand_b.len - $if debug { - assert len_diff >= 0 - } - - // we must do in place shift and operations. - mut divisor := []u64{cap: operand_b.len} - for _ in 0 .. len_diff { - divisor << u64(0) - } - divisor << operand_b - for _ in 0 .. len_diff + 1 { - quotient << u64(0) - } - - lead_zer_remainder := u32(bits.leading_zeros_64(remainder.last()) - (64 - digit_bits)) - lead_zer_divisor := u32(bits.leading_zeros_64(divisor.last()) - (64 - digit_bits)) - bit_offset := (u32(digit_bits) * u32(len_diff)) + (lead_zer_divisor - lead_zer_remainder) - - // align - if lead_zer_remainder < lead_zer_divisor { - left_shift_in_place(mut divisor, lead_zer_divisor - lead_zer_remainder) - } else if lead_zer_remainder > lead_zer_divisor { - left_shift_in_place(mut remainder, lead_zer_remainder - lead_zer_divisor) - } - - $if debug { - assert left_align_p(divisor[divisor.len - 1], remainder[remainder.len - 1]) - } - for bit_idx := int(bit_offset); bit_idx >= 0; bit_idx-- { - if greater_equal_from_end(remainder, divisor) { - bit_set(mut quotient, bit_idx) - subtract_align_last_byte_in_place(mut remainder, divisor) - } - right_shift_in_place(mut divisor, 1) - } - - // adjust - if lead_zer_remainder > lead_zer_divisor { - right_shift_in_place(mut remainder, lead_zer_remainder - lead_zer_divisor) - } - shrink_tail_zeros(mut remainder) - shrink_tail_zeros(mut quotient) -} - // help routines for cleaner code but inline for performance // quicker than BitField.set_bit @[direct_array_access; inline] @@ -65,80 +14,112 @@ fn bit_set(mut a []u64, n int) { a[byte_offset] |= mask } -// a.len is greater or equal to b.len -// returns true if a >= b (completed with zeroes) -@[direct_array_access; inline] -fn greater_equal_from_end(a []u64, b []u64) bool { - $if debug { - assert a.len >= b.len - } - offset := a.len - b.len - for index := a.len - 1; index >= offset; index-- { - if a[index] > b[index - offset] { - return true - } else if a[index] < b[index - offset] { - return false +@[direct_array_access] +fn knuth_divide_array_by_array(operand_a []u64, operand_b []u64, mut quotient []u64, mut remainder []u64) { + m := operand_a.len - operand_b.len + n := operand_b.len + mut u := []u64{len: operand_a.len + 1} + mut v := []u64{len: n} + leading_zeros := bits.leading_zeros_64(operand_b.last()) - (64 - digit_bits) + + if leading_zeros > 0 { + mut carry := u64(0) + amount := digit_bits - leading_zeros + for i in 0 .. operand_a.len { + temp := (operand_a[i] << leading_zeros) | carry + u[i] = temp & max_digit + carry = operand_a[i] >> amount + } + u[operand_a.len] = carry + carry = 0 + for i in 0 .. operand_b.len { + temp := (operand_b[i] << leading_zeros) | carry + v[i] = temp & max_digit + carry = operand_b[i] >> amount + } + } else { + for i in 0 .. operand_a.len { + u[i] = operand_a[i] + } + for i in 0 .. operand_b.len { + v[i] = operand_b[i] } } - return true -} -// a := a - b supposed a >= b -// attention the b operand is align with the a operand before the subtraction -@[direct_array_access; inline] -fn subtract_align_last_byte_in_place(mut a []u64, b []u64) { - mut carry := u64(0) - mut new_carry := u64(0) - offset := a.len - b.len - for index := a.len - b.len; index < a.len; index++ { - if a[index] < (b[index - offset] + carry) || (b[index - offset] == max_digit && carry > 0) { - new_carry = 1 - } else { - new_carry = 0 + if remainder.len >= (n + 1) { + remainder.trim(n + 1) + } else { + remainder = []u64{len: n + 1} + } + + v_n_1 := v[n - 1] + v_n_2 := v[n - 2] + for j := m; j >= 0; j-- { + u_j_n := u[j + n] + u_j_n_1 := u[j + n - 1] + u_j_n_2 := u[j + n - 2] + + mut qhat, mut rhat := bits.div_64(u_j_n >> (64 - digit_bits), (u_j_n << digit_bits) | u_j_n_1, + v_n_1) + mut x1, mut x2 := bits.mul_64(qhat, v_n_2) + x2 = x2 & max_digit + x1 = (x1 << (64 - digit_bits)) | (x2 >> digit_bits) + for greater_than(x1, x2, rhat, u_j_n_2) { + qhat-- + prev := rhat + rhat += v_n_1 + if rhat < prev { + break + } + x1, x2 = bits.mul_64(qhat, v_n_2) + x2 = x2 & max_digit + x1 = (x1 << (64 - digit_bits)) | (x2 >> digit_bits) } - a[index] -= (b[index - offset] + carry) - a[index] = a[index] & max_digit - carry = new_carry + mut carry := u64(0) + for i in 0 .. n { + hi, lo := bits.mul_add_64(v[i], qhat, carry) + remainder[i] = lo & max_digit + carry = (hi << (64 - digit_bits)) | (lo >> digit_bits) + } + remainder[n] = carry + + mut borrow := u64(0) + for i in 0 .. n + 1 { + result := u[j + i] - remainder[i] - borrow + u[j + i] = result & max_digit + borrow = (result >> digit_bits) & 1 + } + if borrow == 1 { + qhat-- + carry = u64(0) + for i in 0 .. n { + sum := u[j + i] + v[i] + carry + u[j + i] = sum & max_digit + carry = sum >> digit_bits + } + } + quotient[j] = qhat } - $if debug { - assert carry == 0 + + remainder.delete_last() + if leading_zeros > 0 { + mut carry := u64(0) + max_leading_digit := (u64(1) << leading_zeros) - 1 + for i := n - 1; i >= 0; i-- { + current_limb := u[i] + remainder[i] = (current_limb >> leading_zeros) | carry + carry = (current_limb & max_leading_digit) << (digit_bits - leading_zeros) + } + } else { + for i in 0 .. n { + remainder[i] = u[i] + } } + shrink_tail_zeros(mut quotient) + shrink_tail_zeros(mut remainder) } -// logical left shift -// there is no overflow. We know that the last bits are zero -// and that n <= `digit_bits` -@[direct_array_access; inline] -fn left_shift_in_place(mut a []u64, n u32) { - mut carry := u64(0) - mut prec_carry := u64(0) - mask := ((u64(1) << n) - 1) << (digit_bits - n) - for index in 0 .. a.len { - prec_carry = carry >> (digit_bits - n) - carry = a[index] & mask - a[index] <<= n - a[index] = a[index] & max_digit - a[index] |= prec_carry - } -} - -// logical right shift without control because these digits have already been -// shift left before -@[direct_array_access; inline] -fn right_shift_in_place(mut a []u64, n u32) { - mut carry := u64(0) - mut prec_carry := u64(0) - mask := (u64(1) << n) - 1 - for index := a.len - 1; index >= 0; index-- { - carry = a[index] & mask - a[index] >>= n - a[index] |= prec_carry << (digit_bits - n) - prec_carry = carry - } -} - -// for assert @[inline] -fn left_align_p(a u64, b u64) bool { - return bits.leading_zeros_64(a) == bits.leading_zeros_64(b) +fn greater_than(x1 u64, x2 u64, y1 u64, y2 u64) bool { + return x1 > y1 || (x1 == y1 && x2 > y2) } diff --git a/vlib/math/big/division_array_ops_test.v b/vlib/math/big/division_array_ops_test.v index 6f81c9427c..7eced22c85 100644 --- a/vlib/math/big/division_array_ops_test.v +++ b/vlib/math/big/division_array_ops_test.v @@ -2,92 +2,11 @@ module big import rand -fn test_left_shift_in_place() { - mut a := [u64(1), 1, 1, 1, 1] - left_shift_in_place(mut a, 1) - assert a == [u64(2), 2, 2, 2, 2] - left_shift_in_place(mut a, 7) - assert a == [u64(256), 256, 256, 256, 256] - mut b := [u64(0x08000000_00000001), 0x0c000000_00000000, 0x08000000_00000000, 0x07ffffff_ffffffff] - left_shift_in_place(mut b, 1) - assert b == [u64(2), 0x08000000_00000001, 1, 0x0fffffff_ffffffff] - mut c := [u64(0x00ffffff_ffffffff), 0x00f0f0f0_f0f0f0f0, 1, 0x03ffffff_ffffffff, 1] - left_shift_in_place(mut c, 2) - assert c == [u64(0x03ffffff_fffffffc), 0x03c3c3c3_c3c3c3c0, 4, 0x0fffffff_fffffffc, 4] -} - -fn test_right_shift_in_place() { - mut a := [u64(2), 2, 2, 2, 2] - right_shift_in_place(mut a, 1) - assert a == [u64(1), 1, 1, 1, 1] - a = [u64(256), 256, 256, 256, 256] - right_shift_in_place(mut a, 7) - assert a == [u64(2), 2, 2, 2, 2] - a = [u64(0), 0, 1] - right_shift_in_place(mut a, 1) - assert a == [u64(0), 0x08000000_00000000, 0] - mut b := [u64(3), 0x08000000_00000001, 1, 0x0fffffff_ffffffff] - right_shift_in_place(mut b, 1) - assert b == [u64(0x08000000_00000001), 0x0c000000_00000000, 0x08000000_00000000, - 0x07ffffff_ffffffff] - mut c := [u64(0x03ffffff), 0x03c3c3c3_c3c3c3c0, 7, 0xfffffffc, 4] - right_shift_in_place(mut c, 2) - assert c == [u64(0x00ffffff), 0x0cf0f0f0_f0f0f0f0, 1, 0x3fffffff, 1] -} - -fn test_subtract_align_last_byte_in_place() { - mut a := [u64(2), 2, 2, 2, 2] - mut b := [u64(1), 1, 2, 1, 1] - subtract_align_last_byte_in_place(mut a, b) - assert a == [u64(1), 1, 0, 1, 1] - - a = [u64(0), 0, 0, 0, 1] - b = [u64(0), 0, 1] - subtract_align_last_byte_in_place(mut a, b) - assert a == [u64(0), 0, 0, 0, 0] - - a = [u64(0), 0, 0, 0, 1, 13] - b = [u64(1), 0, 1] - mut c := []u64{len: a.len} - mut d := [u64(0), 0, 0] - d << b // to have same length - subtract_digit_array(a, d, mut c) - subtract_align_last_byte_in_place(mut a, b) - assert a == [u64(0), 0, 0, u64(-1) & max_digit, 0, 12] - assert c == a -} - -fn test_greater_equal_from_end() { - mut a := [u64(1), 2, 3, 4, 5, 6] - mut b := [u64(3), 4, 5, 6] - assert greater_equal_from_end(a, b) == true - - a = [u64(1), 2, 3, 4, 5, 6] - b = [u64(1), 2, 3, 4, 5, 6] - assert greater_equal_from_end(a, b) == true - - a = [u64(1), 2, 3, 4, 5, 6] - b = [u64(2), 2, 3, 4, 5, 6] - assert greater_equal_from_end(a, b) == false - - a = [u64(0), 0, 0, 4, 5, 6] - b = [u64(4), 5, 6] - assert greater_equal_from_end(a, b) == true - - a = [u64(0), 0, 0, 4, 5, 6] - b = [u64(4), 6, 6] - assert greater_equal_from_end(a, b) == false - - a = [u64(0), 0, 0, 4, 5, 5] - b = [u64(4), 5, 6] - assert greater_equal_from_end(a, b) == false -} - fn test_divide_digit_array_03() { a := [u64(0), 4] b := [u64(0), 1] - mut q := []u64{cap: a.len - b.len + 1} - mut r := []u64{cap: a.len} + mut q := []u64{len: a.len - b.len + 1} + mut r := []u64{len: a.len} divide_digit_array(a, b, mut q, mut r) assert q == [u64(4)] @@ -97,8 +16,8 @@ fn test_divide_digit_array_03() { fn test_divide_digit_array_04() { a := [u64(2), 4] b := [u64(0), 1] - mut q := []u64{cap: a.len - b.len + 1} - mut r := []u64{cap: a.len} + mut q := []u64{len: a.len - b.len + 1} + mut r := []u64{len: a.len} divide_digit_array(a, b, mut q, mut r) assert q == [u64(4)] @@ -108,8 +27,8 @@ fn test_divide_digit_array_04() { fn test_divide_digit_array_05() { a := [u64(2), 4, 5] b := [u64(0), 1] - mut q := []u64{cap: a.len - b.len + 1} - mut r := []u64{cap: a.len} + mut q := []u64{len: a.len - b.len + 1} + mut r := []u64{len: a.len} divide_digit_array(a, b, mut q, mut r) assert q == [u64(4), 5] @@ -119,8 +38,8 @@ fn test_divide_digit_array_05() { fn test_divide_digit_array_06() { a := [u64(2), 4, 5, 3] b := [u64(0), 0x8000] - mut q := []u64{cap: a.len - b.len + 1} - mut r := []u64{cap: a.len} + mut q := []u64{len: a.len - b.len + 1} + mut r := []u64{len: a.len} divide_digit_array(a, b, mut q, mut r) assert q == [u64(0xa000_00000000), 0x6000_00000000] diff --git a/vlib/math/big/integer.v b/vlib/math/big/integer.v index 565398c40d..85f96a058c 100644 --- a/vlib/math/big/integer.v +++ b/vlib/math/big/integer.v @@ -393,8 +393,8 @@ pub fn (multiplicand Integer) * (multiplier Integer) Integer { // // DO NOT use this method if the divisor has any chance of being 0. fn (dividend Integer) div_mod_internal(divisor Integer) (Integer, Integer) { - mut q := []u64{cap: int_max(1, dividend.digits.len - divisor.digits.len + 1)} - mut r := []u64{cap: dividend.digits.len} + mut q := []u64{len: int_max(1, dividend.digits.len - divisor.digits.len + 1)} + mut r := []u64{len: dividend.digits.len} mut q_signum := 0 mut r_signum := 0