math.bits: add asm implementations for some 64 bit ops (#25020)

This commit is contained in:
kbkpbot 2025-08-11 14:01:49 +08:00 committed by GitHub
parent 60108cdd33
commit 350747793b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 517 additions and 7 deletions

100
vlib/math/bits/bits.amd64.v Normal file
View file

@ -0,0 +1,100 @@
// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module bits
fn C._umul128(x u64, y u64, result_hi &u64) u64
fn C._addcarry_u64(carry_in u8, a u64, b u64, out &u64) u8
fn C._udiv128(hi u64, lo u64, y u64, rem &u64) u64
// mul_64 returns the 128-bit product of x and y: (hi, lo) = x * y
// with the product bits' upper half returned in hi and the lower
// half returned in lo.
//
// This function's execution time does not depend on the inputs.
@[inline]
pub fn mul_64(x u64, y u64) (u64, u64) {
mut hi := u64(0)
mut lo := u64(0)
$if msvc {
lo = C._umul128(x, y, &hi)
return hi, lo
} $else $if amd64 {
asm amd64 {
mulq rdx
; =a (lo)
=d (hi)
; a (x)
d (y)
; cc
}
return hi, lo
}
// cross compile
return mul_64_default(x, y)
}
// mul_add_64 returns the 128-bit result of x * y + z: (hi, lo) = x * y + z
// with the result bits' upper half returned in hi and the lower
// half returned in lo.
@[inline]
pub fn mul_add_64(x u64, y u64, z u64) (u64, u64) {
mut hi := u64(0)
mut lo := u64(0)
$if msvc {
lo = C._umul128(x, y, &hi)
carry := C._addcarry_u64(0, lo, z, &lo)
hi += carry
return hi, lo
} $else $if amd64 {
asm amd64 {
mulq rdx
addq rax, z
adcq rdx, 0
; =a (lo)
=d (hi)
; a (x)
d (y)
r (z)
; cc
}
return hi, lo
}
// cross compile
return mul_add_64_default(x, y, z)
}
// div_64 returns the quotient and remainder of (hi, lo) divided by y:
// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
// half in parameter hi and the lower half in parameter lo.
// div_64 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
@[inline]
pub fn div_64(hi u64, lo u64, y1 u64) (u64, u64) {
mut y := y1
if y == 0 {
panic(overflow_error)
}
if y <= hi {
panic(overflow_error)
}
mut quo := u64(0)
mut rem := u64(0)
$if msvc {
quo = C._udiv128(hi, lo, y, &rem)
return quo, rem
} $else $if amd64 {
asm amd64 {
div y
; =a (quo)
=d (rem)
; d (hi)
a (lo)
r (y)
; cc
}
return quo, rem
}
// cross compile
return div_64_default(hi, lo, y1)
}

View file

@ -0,0 +1,55 @@
// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module bits
// mul_64 returns the 128-bit product of x and y: (hi, lo) = x * y
// with the product bits' upper half returned in hi and the lower
// half returned in lo.
//
// This function's execution time does not depend on the inputs.
@[inline]
pub fn mul_64(x u64, y u64) (u64, u64) {
mut hi := u64(0)
mut lo := u64(0)
$if arm64 && !tinyc {
asm arm64 {
mul lo, x, y
umulh hi, x, y
; =&r (hi)
=&r (lo)
; r (x)
r (y)
; cc
}
return hi, lo
}
// cross compile
return mul_64_default(x, y)
}
// mul_add_64 returns the 128-bit result of x * y + z: (hi, lo) = x * y + z
// with the result bits' upper half returned in hi and the lower
// half returned in lo.
@[inline]
pub fn mul_add_64(x u64, y u64, z u64) (u64, u64) {
mut hi := u64(0)
mut lo := u64(0)
$if arm64 && !tinyc {
asm arm64 {
mul lo, x, y
umulh hi, x, y
adds lo, lo, z
adc hi, hi, xzr
; =&r (hi)
=&r (lo)
; r (x)
r (y)
r (z)
; cc
}
return hi, lo
}
// cross compile
return mul_add_64_default(x, y, z)
}

186
vlib/math/bits/bits.c.v Normal file
View file

@ -0,0 +1,186 @@
// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module bits
fn C.__builtin_clz(x u32) int
fn C.__builtin_clzll(x u64) int
fn C.__lzcnt(x u32) int
fn C.__lzcnt64(x u64) int
// --- LeadingZeros ---
// leading_zeros_8 returns the number of leading zero bits in x; the result is 8 for x == 0.
@[inline]
pub fn leading_zeros_8(x u8) int {
if x == 0 {
return 8
}
$if msvc {
return C.__lzcnt(x) - 24
} $else $if !tinyc {
return C.__builtin_clz(x) - 24
}
return leading_zeros_8_default(x)
}
// leading_zeros_16 returns the number of leading zero bits in x; the result is 16 for x == 0.
@[inline]
pub fn leading_zeros_16(x u16) int {
if x == 0 {
return 16
}
$if msvc {
return C.__lzcnt(x) - 16
} $else $if !tinyc {
return C.__builtin_clz(x) - 16
}
return leading_zeros_16_default(x)
}
// leading_zeros_32 returns the number of leading zero bits in x; the result is 32 for x == 0.
@[inline]
pub fn leading_zeros_32(x u32) int {
if x == 0 {
return 32
}
$if msvc {
return C.__lzcnt(x)
} $else $if !tinyc {
return C.__builtin_clz(x)
}
return leading_zeros_32_default(x)
}
// leading_zeros_64 returns the number of leading zero bits in x; the result is 64 for x == 0.
@[inline]
pub fn leading_zeros_64(x u64) int {
if x == 0 {
return 64
}
$if msvc {
return C.__lzcnt64(x)
} $else $if !tinyc {
return C.__builtin_clzll(x)
}
return leading_zeros_64_default(x)
}
fn C.__builtin_ctz(x u32) int
fn C.__builtin_ctzll(x u64) int
fn C._BitScanForward(pos &int, x u32) u8
fn C._BitScanForward64(pos &int, x u64) u8
// --- TrailingZeros ---
// trailing_zeros_8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
@[inline]
pub fn trailing_zeros_8(x u8) int {
if x == 0 {
return 8
}
$if msvc {
mut pos := 0
_ := C._BitScanForward(&pos, x)
return pos
} $else $if !tinyc {
return C.__builtin_ctz(x)
}
return trailing_zeros_8_default(x)
}
// trailing_zeros_16 returns the number of trailing zero bits in x; the result is 16 for x == 0.
@[inline]
pub fn trailing_zeros_16(x u16) int {
if x == 0 {
return 16
}
$if msvc {
mut pos := 0
_ := C._BitScanForward(&pos, x)
return pos
} $else $if !tinyc {
return C.__builtin_ctz(x)
}
return trailing_zeros_16_default(x)
}
// trailing_zeros_32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
@[inline]
pub fn trailing_zeros_32(x u32) int {
if x == 0 {
return 32
}
$if msvc {
mut pos := 0
_ := C._BitScanForward(&pos, x)
return pos
} $else $if !tinyc {
return C.__builtin_ctz(x)
}
return trailing_zeros_32_default(x)
}
// trailing_zeros_64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
@[inline]
pub fn trailing_zeros_64(x u64) int {
if x == 0 {
return 64
}
$if msvc {
mut pos := 0
_ := C._BitScanForward64(&pos, x)
return pos
} $else $if !tinyc {
return C.__builtin_ctzll(x)
}
return trailing_zeros_64_default(x)
}
fn C.__builtin_popcount(x u32) int
fn C.__builtin_popcountll(x u64) int
fn C.__popcnt(x u32) int
fn C.__popcnt64(x u64) int
// --- OnesCount ---
// ones_count_8 returns the number of one bits ("population count") in x.
@[inline]
pub fn ones_count_8(x u8) int {
$if msvc {
return C.__popcnt(x)
} $else $if !tinyc {
return C.__builtin_popcount(x)
}
return ones_count_8_default(x)
}
// ones_count_16 returns the number of one bits ("population count") in x.
@[inline]
pub fn ones_count_16(x u16) int {
$if msvc {
return C.__popcnt(x)
} $else $if !tinyc {
return C.__builtin_popcount(x)
}
return ones_count_16_default(x)
}
// ones_count_32 returns the number of one bits ("population count") in x.
@[inline]
pub fn ones_count_32(x u32) int {
$if msvc {
return C.__popcnt(x)
} $else $if !tinyc {
return C.__builtin_popcount(x)
}
return ones_count_32_default(x)
}
// ones_count_64 returns the number of one bits ("population count") in x.
@[inline]
pub fn ones_count_64(x u64) int {
$if msvc {
return C.__popcnt64(x)
} $else $if !tinyc {
return C.__builtin_popcountll(x)
}
return ones_count_64_default(x)
}

View file

@ -24,35 +24,69 @@ const m4 = u64(0x0000ffff0000ffff)
// --- LeadingZeros --- // --- LeadingZeros ---
// leading_zeros_8 returns the number of leading zero bits in x; the result is 8 for x == 0. // leading_zeros_8 returns the number of leading zero bits in x; the result is 8 for x == 0.
@[inline]
pub fn leading_zeros_8(x u8) int { pub fn leading_zeros_8(x u8) int {
return leading_zeros_8_default(x)
}
@[inline]
fn leading_zeros_8_default(x u8) int {
return 8 - len_8(x) return 8 - len_8(x)
} }
// leading_zeros_16 returns the number of leading zero bits in x; the result is 16 for x == 0. // leading_zeros_16 returns the number of leading zero bits in x; the result is 16 for x == 0.
@[inline]
pub fn leading_zeros_16(x u16) int { pub fn leading_zeros_16(x u16) int {
return leading_zeros_16_default(x)
}
@[inline]
fn leading_zeros_16_default(x u16) int {
return 16 - len_16(x) return 16 - len_16(x)
} }
// leading_zeros_32 returns the number of leading zero bits in x; the result is 32 for x == 0. // leading_zeros_32 returns the number of leading zero bits in x; the result is 32 for x == 0.
@[inline]
pub fn leading_zeros_32(x u32) int { pub fn leading_zeros_32(x u32) int {
return leading_zeros_32_default(x)
}
@[inline]
fn leading_zeros_32_default(x u32) int {
return 32 - len_32(x) return 32 - len_32(x)
} }
// leading_zeros_64 returns the number of leading zero bits in x; the result is 64 for x == 0. // leading_zeros_64 returns the number of leading zero bits in x; the result is 64 for x == 0.
@[inline]
pub fn leading_zeros_64(x u64) int { pub fn leading_zeros_64(x u64) int {
return leading_zeros_64_default(x)
}
@[inline]
fn leading_zeros_64_default(x u64) int {
return 64 - len_64(x) return 64 - len_64(x)
} }
// --- TrailingZeros --- // --- TrailingZeros ---
// trailing_zeros_8 returns the number of trailing zero bits in x; the result is 8 for x == 0. // trailing_zeros_8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
@[direct_array_access] @[inline]
pub fn trailing_zeros_8(x u8) int { pub fn trailing_zeros_8(x u8) int {
return trailing_zeros_8_default(x)
}
@[direct_array_access; inline]
fn trailing_zeros_8_default(x u8) int {
return int(ntz_8_tab[x]) return int(ntz_8_tab[x])
} }
// trailing_zeros_16 returns the number of trailing zero bits in x; the result is 16 for x == 0. // trailing_zeros_16 returns the number of trailing zero bits in x; the result is 16 for x == 0.
@[direct_array_access] @[inline]
pub fn trailing_zeros_16(x u16) int { pub fn trailing_zeros_16(x u16) int {
return trailing_zeros_16_default(x)
}
@[direct_array_access; inline]
fn trailing_zeros_16_default(x u16) int {
if x == 0 { if x == 0 {
return 16 return 16
} }
@ -61,8 +95,13 @@ pub fn trailing_zeros_16(x u16) int {
} }
// trailing_zeros_32 returns the number of trailing zero bits in x; the result is 32 for x == 0. // trailing_zeros_32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
@[direct_array_access] @[inline]
pub fn trailing_zeros_32(x u32) int { pub fn trailing_zeros_32(x u32) int {
return trailing_zeros_32_default(x)
}
@[direct_array_access; inline]
fn trailing_zeros_32_default(x u32) int {
if x == 0 { if x == 0 {
return 32 return 32
} }
@ -71,8 +110,13 @@ pub fn trailing_zeros_32(x u32) int {
} }
// trailing_zeros_64 returns the number of trailing zero bits in x; the result is 64 for x == 0. // trailing_zeros_64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
@[direct_array_access] @[inline]
pub fn trailing_zeros_64(x u64) int { pub fn trailing_zeros_64(x u64) int {
return trailing_zeros_64_default(x)
}
@[direct_array_access; inline]
fn trailing_zeros_64_default(x u64) int {
if x == 0 { if x == 0 {
return 64 return 64
} }
@ -92,26 +136,46 @@ pub fn trailing_zeros_64(x u64) int {
// --- OnesCount --- // --- OnesCount ---
// ones_count_8 returns the number of one bits ("population count") in x. // ones_count_8 returns the number of one bits ("population count") in x.
@[direct_array_access] @[inline]
pub fn ones_count_8(x u8) int { pub fn ones_count_8(x u8) int {
return ones_count_8_default(x)
}
@[direct_array_access; inline]
fn ones_count_8_default(x u8) int {
return int(pop_8_tab[x]) return int(pop_8_tab[x])
} }
// ones_count_16 returns the number of one bits ("population count") in x. // ones_count_16 returns the number of one bits ("population count") in x.
@[direct_array_access] @[inline]
pub fn ones_count_16(x u16) int { pub fn ones_count_16(x u16) int {
return ones_count_16_default(x)
}
@[direct_array_access; inline]
fn ones_count_16_default(x u16) int {
return int(pop_8_tab[x >> 8] + pop_8_tab[x & u16(0xff)]) return int(pop_8_tab[x >> 8] + pop_8_tab[x & u16(0xff)])
} }
// ones_count_32 returns the number of one bits ("population count") in x. // ones_count_32 returns the number of one bits ("population count") in x.
@[direct_array_access] @[inline]
pub fn ones_count_32(x u32) int { pub fn ones_count_32(x u32) int {
return ones_count_32_default(x)
}
@[direct_array_access; inline]
fn ones_count_32_default(x u32) int {
return int(pop_8_tab[x >> 24] + pop_8_tab[(x >> 16) & 0xff] + pop_8_tab[(x >> 8) & 0xff] + return int(pop_8_tab[x >> 24] + pop_8_tab[(x >> 16) & 0xff] + pop_8_tab[(x >> 8) & 0xff] +
pop_8_tab[x & u32(0xff)]) pop_8_tab[x & u32(0xff)])
} }
// ones_count_64 returns the number of one bits ("population count") in x. // ones_count_64 returns the number of one bits ("population count") in x.
@[inline]
pub fn ones_count_64(x u64) int { pub fn ones_count_64(x u64) int {
return ones_count_64_default(x)
}
fn ones_count_64_default(x u64) int {
// Implementation: Parallel summing of adjacent bits. // Implementation: Parallel summing of adjacent bits.
// See "Hacker's Delight", Chap. 5: Counting Bits. // See "Hacker's Delight", Chap. 5: Counting Bits.
// The following pattern shows the general approach: // The following pattern shows the general approach:
@ -376,7 +440,13 @@ const divide_error = 'Divide Error'
// half returned in lo. // half returned in lo.
// //
// This function's execution time does not depend on the inputs. // This function's execution time does not depend on the inputs.
@[inline]
pub fn mul_32(x u32, y u32) (u32, u32) { pub fn mul_32(x u32, y u32) (u32, u32) {
return mul_32_default(x, y)
}
@[inline]
fn mul_32_default(x u32, y u32) (u32, u32) {
tmp := u64(x) * u64(y) tmp := u64(x) * u64(y)
hi := u32(tmp >> 32) hi := u32(tmp >> 32)
lo := u32(tmp) lo := u32(tmp)
@ -388,7 +458,12 @@ pub fn mul_32(x u32, y u32) (u32, u32) {
// half returned in lo. // half returned in lo.
// //
// This function's execution time does not depend on the inputs. // This function's execution time does not depend on the inputs.
@[inline]
pub fn mul_64(x u64, y u64) (u64, u64) { pub fn mul_64(x u64, y u64) (u64, u64) {
return mul_64_default(x, y)
}
fn mul_64_default(x u64, y u64) (u64, u64) {
x0 := x & mask32 x0 := x & mask32
x1 := x >> 32 x1 := x >> 32
y0 := y & mask32 y0 := y & mask32
@ -403,12 +478,49 @@ pub fn mul_64(x u64, y u64) (u64, u64) {
return hi, lo return hi, lo
} }
// mul_add_32 returns the 64-bit result of x * y + z: (hi, lo) = x * y + z
// with the result bits' upper half returned in hi and the lower
// half returned in lo.
@[inline]
pub fn mul_add_32(x u32, y u32, z u32) (u32, u32) {
return mul_add_32_default(x, y, z)
}
@[inline]
fn mul_add_32_default(x u32, y u32, z u32) (u32, u32) {
tmp := u64(x) * u64(y) + u64(z)
hi := u32(tmp >> 32)
lo := u32(tmp)
return hi, lo
}
// mul_add_64 returns the 128-bit result of x * y + z: (hi, lo) = x * y + z
// with the result bits' upper half returned in hi and the lower
// half returned in lo.
@[inline]
pub fn mul_add_64(x u64, y u64, z u64) (u64, u64) {
return mul_add_64_default(x, y, z)
}
@[inline]
fn mul_add_64_default(x u64, y u64, z u64) (u64, u64) {
h, l := mul_64(x, y)
lo := l + z
hi := h + u64(lo < l)
return hi, lo
}
// --- Full-width divide --- // --- Full-width divide ---
// div_32 returns the quotient and remainder of (hi, lo) divided by y: // div_32 returns the quotient and remainder of (hi, lo) divided by y:
// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper // quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
// half in parameter hi and the lower half in parameter lo. // half in parameter hi and the lower half in parameter lo.
// div_32 panics for y == 0 (division by zero) or y <= hi (quotient overflow). // div_32 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
@[inline]
pub fn div_32(hi u32, lo u32, y u32) (u32, u32) { pub fn div_32(hi u32, lo u32, y u32) (u32, u32) {
return div_32_default(hi, lo, y)
}
fn div_32_default(hi u32, lo u32, y u32) (u32, u32) {
if y != 0 && y <= hi { if y != 0 && y <= hi {
panic(overflow_error) panic(overflow_error)
} }
@ -422,7 +534,12 @@ pub fn div_32(hi u32, lo u32, y u32) (u32, u32) {
// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper // quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
// half in parameter hi and the lower half in parameter lo. // half in parameter hi and the lower half in parameter lo.
// div_64 panics for y == 0 (division by zero) or y <= hi (quotient overflow). // div_64 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
@[inline]
pub fn div_64(hi u64, lo u64, y1 u64) (u64, u64) { pub fn div_64(hi u64, lo u64, y1 u64) (u64, u64) {
return div_64_default(hi, lo, y1)
}
fn div_64_default(hi u64, lo u64, y1 u64) (u64, u64) {
mut y := y1 mut y := y1
if y == 0 { if y == 0 {
panic(overflow_error) panic(overflow_error)

View file

@ -17,6 +17,7 @@ fn test_bits() {
// C.printf("x:%02x lz: %d cmp: %d\n", i << x, leading_zeros_8(i << x), 7-x) // C.printf("x:%02x lz: %d cmp: %d\n", i << x, leading_zeros_8(i << x), 7-x)
assert leading_zeros_8(u8(u8(i) << x)) == 7 - x assert leading_zeros_8(u8(u8(i) << x)) == 7 - x
} }
assert leading_zeros_8(0) == 8
// 16 bit // 16 bit
i = 1 i = 1
@ -24,6 +25,7 @@ fn test_bits() {
// C.printf("x:%04x lz: %d cmp: %d\n", u16(i) << x, leading_zeros_16(u16(i) << x), 15-x) // C.printf("x:%04x lz: %d cmp: %d\n", u16(i) << x, leading_zeros_16(u16(i) << x), 15-x)
assert leading_zeros_16(u16(i) << x) == 15 - x assert leading_zeros_16(u16(i) << x) == 15 - x
} }
assert leading_zeros_16(0) == 16
// 32 bit // 32 bit
i = 1 i = 1
@ -31,6 +33,7 @@ fn test_bits() {
// C.printf("x:%08x lz: %d cmp: %d\n", u32(i) << x, leading_zeros_32(u32(i) << x), 31-x) // C.printf("x:%08x lz: %d cmp: %d\n", u32(i) << x, leading_zeros_32(u32(i) << x), 31-x)
assert leading_zeros_32(u32(i) << x) == 31 - x assert leading_zeros_32(u32(i) << x) == 31 - x
} }
assert leading_zeros_32(0) == 32
// 64 bit // 64 bit
i = 1 i = 1
@ -38,6 +41,39 @@ fn test_bits() {
// C.printf("x:%016llx lz: %llu cmp: %d\n", u64(i) << x, leading_zeros_64(u64(i) << x), 63-x) // C.printf("x:%016llx lz: %llu cmp: %d\n", u64(i) << x, leading_zeros_64(u64(i) << x), 63-x)
assert leading_zeros_64(u64(i) << x) == 63 - x assert leading_zeros_64(u64(i) << x) == 63 - x
} }
assert leading_zeros_64(0) == 64
//
// --- TrailingZeros ---
//
// 8 bit
i = 1
for x in 0 .. 8 {
assert trailing_zeros_8(u8(u8(i) << x)) == x
}
assert trailing_zeros_8(0) == 8
// 16 bit
i = 1
for x in 0 .. 16 {
assert trailing_zeros_16(u16(i) << x) == x
}
assert trailing_zeros_16(0) == 16
// 32 bit
i = 1
for x in 0 .. 32 {
assert trailing_zeros_32(u32(i) << x) == x
}
assert trailing_zeros_32(0) == 32
// 64 bit
i = 1
for x in 0 .. 64 {
assert trailing_zeros_64(u64(i) << x) == x
}
assert trailing_zeros_64(0) == 64
// //
// --- ones_count --- // --- ones_count ---
@ -50,6 +86,8 @@ fn test_bits() {
assert ones_count_8(u8(i)) == x assert ones_count_8(u8(i)) == x
i = int(u32(i) << 1) + 1 i = int(u32(i) << 1) + 1
} }
assert ones_count_8(0) == 0
assert ones_count_8(0xFF) == 8
// 16 bit // 16 bit
i = 0 i = 0
@ -58,6 +96,8 @@ fn test_bits() {
assert ones_count_16(u16(i)) == x assert ones_count_16(u16(i)) == x
i = int(u32(i) << 1) + 1 i = int(u32(i) << 1) + 1
} }
assert ones_count_16(0) == 0
assert ones_count_16(0xFFFF) == 16
// 32 bit // 32 bit
i = 0 i = 0
@ -66,6 +106,8 @@ fn test_bits() {
assert ones_count_32(u32(i)) == x assert ones_count_32(u32(i)) == x
i = int(u32(i) << 1) + 1 i = int(u32(i) << 1) + 1
} }
assert ones_count_32(0) == 0
assert ones_count_32(0xFFFF_FFFF) == 32
// 64 bit // 64 bit
i1 = 0 i1 = 0
@ -74,6 +116,8 @@ fn test_bits() {
assert ones_count_64(i1) == x assert ones_count_64(i1) == x
i1 = (i1 << 1) + 1 i1 = (i1 << 1) + 1
} }
assert ones_count_64(0) == 0
assert ones_count_64(0xFFFF_FFFF_FFFF_FFFF) == 64
// //
// --- rotate_left/right --- // --- rotate_left/right ---
@ -241,6 +285,9 @@ fn test_bits() {
v1 := v0 - 1 v1 := v0 - 1
hi, lo := mul_32(v0, v1) hi, lo := mul_32(v0, v1)
assert (u64(hi) << 32) | (u64(lo)) == u64(v0) * u64(v1) assert (u64(hi) << 32) | (u64(lo)) == u64(v0) * u64(v1)
v2 := u32(x)
h, l := mul_add_32(v0, v1, v2)
assert (u64(h) << 32) | (u64(l)) == u64(v0) * u64(v1) + u64(v2)
} }
// 64 bit // 64 bit
@ -252,6 +299,11 @@ fn test_bits() {
// C.printf("v0: %llu v1: %llu [%llu,%llu] tt: %llu\n", v0, v1, hi, lo, (v0 >> 32) * (v1 >> 32)) // C.printf("v0: %llu v1: %llu [%llu,%llu] tt: %llu\n", v0, v1, hi, lo, (v0 >> 32) * (v1 >> 32))
assert (hi & 0xFFFF_FFFF_0000_0000) == (((v0 >> 32) * (v1 >> 32)) & 0xFFFF_FFFF_0000_0000) assert (hi & 0xFFFF_FFFF_0000_0000) == (((v0 >> 32) * (v1 >> 32)) & 0xFFFF_FFFF_0000_0000)
assert (lo & 0x0000_0000_FFFF_FFFF) == (((v0 & 0x0000_0000_FFFF_FFFF) * (v1 & 0x0000_0000_FFFF_FFFF)) & 0x0000_0000_FFFF_FFFF) assert (lo & 0x0000_0000_FFFF_FFFF) == (((v0 & 0x0000_0000_FFFF_FFFF) * (v1 & 0x0000_0000_FFFF_FFFF)) & 0x0000_0000_FFFF_FFFF)
v2 := u64(x)
h, l := mul_add_64(v0, v1, v2)
assert (h & 0xFFFF_FFFF_0000_0000) == (((v0 >> 32) * (v1 >> 32)) & 0xFFFF_FFFF_0000_0000)
assert (l & 0x0000_0000_FFFF_FFFF) == ((
(v0 & 0x0000_0000_FFFF_FFFF) * (v1 & 0x0000_0000_FFFF_FFFF) + v2) & 0x0000_0000_FFFF_FFFF)
} }
// //