mirror of
https://github.com/vlang/v.git
synced 2025-09-13 14:32:26 +03:00
math.bits: add asm implementations for some 64 bit ops (#25020)
This commit is contained in:
parent
60108cdd33
commit
350747793b
5 changed files with 517 additions and 7 deletions
100
vlib/math/bits/bits.amd64.v
Normal file
100
vlib/math/bits/bits.amd64.v
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
|
||||||
|
// Use of this source code is governed by an MIT license
|
||||||
|
// that can be found in the LICENSE file.
|
||||||
|
module bits
|
||||||
|
|
||||||
|
fn C._umul128(x u64, y u64, result_hi &u64) u64
|
||||||
|
fn C._addcarry_u64(carry_in u8, a u64, b u64, out &u64) u8
|
||||||
|
fn C._udiv128(hi u64, lo u64, y u64, rem &u64) u64
|
||||||
|
|
||||||
|
// mul_64 returns the 128-bit product of x and y: (hi, lo) = x * y
|
||||||
|
// with the product bits' upper half returned in hi and the lower
|
||||||
|
// half returned in lo.
|
||||||
|
//
|
||||||
|
// This function's execution time does not depend on the inputs.
|
||||||
|
@[inline]
|
||||||
|
pub fn mul_64(x u64, y u64) (u64, u64) {
|
||||||
|
mut hi := u64(0)
|
||||||
|
mut lo := u64(0)
|
||||||
|
$if msvc {
|
||||||
|
lo = C._umul128(x, y, &hi)
|
||||||
|
return hi, lo
|
||||||
|
} $else $if amd64 {
|
||||||
|
asm amd64 {
|
||||||
|
mulq rdx
|
||||||
|
; =a (lo)
|
||||||
|
=d (hi)
|
||||||
|
; a (x)
|
||||||
|
d (y)
|
||||||
|
; cc
|
||||||
|
}
|
||||||
|
return hi, lo
|
||||||
|
}
|
||||||
|
// cross compile
|
||||||
|
return mul_64_default(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
// mul_add_64 returns the 128-bit result of x * y + z: (hi, lo) = x * y + z
|
||||||
|
// with the result bits' upper half returned in hi and the lower
|
||||||
|
// half returned in lo.
|
||||||
|
@[inline]
|
||||||
|
pub fn mul_add_64(x u64, y u64, z u64) (u64, u64) {
|
||||||
|
mut hi := u64(0)
|
||||||
|
mut lo := u64(0)
|
||||||
|
$if msvc {
|
||||||
|
lo = C._umul128(x, y, &hi)
|
||||||
|
carry := C._addcarry_u64(0, lo, z, &lo)
|
||||||
|
hi += carry
|
||||||
|
return hi, lo
|
||||||
|
} $else $if amd64 {
|
||||||
|
asm amd64 {
|
||||||
|
mulq rdx
|
||||||
|
addq rax, z
|
||||||
|
adcq rdx, 0
|
||||||
|
; =a (lo)
|
||||||
|
=d (hi)
|
||||||
|
; a (x)
|
||||||
|
d (y)
|
||||||
|
r (z)
|
||||||
|
; cc
|
||||||
|
}
|
||||||
|
return hi, lo
|
||||||
|
}
|
||||||
|
// cross compile
|
||||||
|
return mul_add_64_default(x, y, z)
|
||||||
|
}
|
||||||
|
|
||||||
|
// div_64 returns the quotient and remainder of (hi, lo) divided by y:
|
||||||
|
// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
|
||||||
|
// half in parameter hi and the lower half in parameter lo.
|
||||||
|
// div_64 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
|
||||||
|
@[inline]
|
||||||
|
pub fn div_64(hi u64, lo u64, y1 u64) (u64, u64) {
|
||||||
|
mut y := y1
|
||||||
|
if y == 0 {
|
||||||
|
panic(overflow_error)
|
||||||
|
}
|
||||||
|
if y <= hi {
|
||||||
|
panic(overflow_error)
|
||||||
|
}
|
||||||
|
|
||||||
|
mut quo := u64(0)
|
||||||
|
mut rem := u64(0)
|
||||||
|
$if msvc {
|
||||||
|
quo = C._udiv128(hi, lo, y, &rem)
|
||||||
|
return quo, rem
|
||||||
|
} $else $if amd64 {
|
||||||
|
asm amd64 {
|
||||||
|
div y
|
||||||
|
; =a (quo)
|
||||||
|
=d (rem)
|
||||||
|
; d (hi)
|
||||||
|
a (lo)
|
||||||
|
r (y)
|
||||||
|
; cc
|
||||||
|
}
|
||||||
|
return quo, rem
|
||||||
|
}
|
||||||
|
// cross compile
|
||||||
|
return div_64_default(hi, lo, y1)
|
||||||
|
}
|
55
vlib/math/bits/bits.arm64.v
Normal file
55
vlib/math/bits/bits.arm64.v
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
|
||||||
|
// Use of this source code is governed by an MIT license
|
||||||
|
// that can be found in the LICENSE file.
|
||||||
|
module bits
|
||||||
|
|
||||||
|
// mul_64 returns the 128-bit product of x and y: (hi, lo) = x * y
|
||||||
|
// with the product bits' upper half returned in hi and the lower
|
||||||
|
// half returned in lo.
|
||||||
|
//
|
||||||
|
// This function's execution time does not depend on the inputs.
|
||||||
|
@[inline]
|
||||||
|
pub fn mul_64(x u64, y u64) (u64, u64) {
|
||||||
|
mut hi := u64(0)
|
||||||
|
mut lo := u64(0)
|
||||||
|
$if arm64 && !tinyc {
|
||||||
|
asm arm64 {
|
||||||
|
mul lo, x, y
|
||||||
|
umulh hi, x, y
|
||||||
|
; =&r (hi)
|
||||||
|
=&r (lo)
|
||||||
|
; r (x)
|
||||||
|
r (y)
|
||||||
|
; cc
|
||||||
|
}
|
||||||
|
return hi, lo
|
||||||
|
}
|
||||||
|
// cross compile
|
||||||
|
return mul_64_default(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
// mul_add_64 returns the 128-bit result of x * y + z: (hi, lo) = x * y + z
|
||||||
|
// with the result bits' upper half returned in hi and the lower
|
||||||
|
// half returned in lo.
|
||||||
|
@[inline]
|
||||||
|
pub fn mul_add_64(x u64, y u64, z u64) (u64, u64) {
|
||||||
|
mut hi := u64(0)
|
||||||
|
mut lo := u64(0)
|
||||||
|
$if arm64 && !tinyc {
|
||||||
|
asm arm64 {
|
||||||
|
mul lo, x, y
|
||||||
|
umulh hi, x, y
|
||||||
|
adds lo, lo, z
|
||||||
|
adc hi, hi, xzr
|
||||||
|
; =&r (hi)
|
||||||
|
=&r (lo)
|
||||||
|
; r (x)
|
||||||
|
r (y)
|
||||||
|
r (z)
|
||||||
|
; cc
|
||||||
|
}
|
||||||
|
return hi, lo
|
||||||
|
}
|
||||||
|
// cross compile
|
||||||
|
return mul_add_64_default(x, y, z)
|
||||||
|
}
|
186
vlib/math/bits/bits.c.v
Normal file
186
vlib/math/bits/bits.c.v
Normal file
|
@ -0,0 +1,186 @@
|
||||||
|
// Copyright (c) 2019-2024 Alexander Medvednikov. All rights reserved.
|
||||||
|
// Use of this source code is governed by an MIT license
|
||||||
|
// that can be found in the LICENSE file.
|
||||||
|
module bits
|
||||||
|
|
||||||
|
fn C.__builtin_clz(x u32) int
|
||||||
|
fn C.__builtin_clzll(x u64) int
|
||||||
|
fn C.__lzcnt(x u32) int
|
||||||
|
fn C.__lzcnt64(x u64) int
|
||||||
|
|
||||||
|
// --- LeadingZeros ---
|
||||||
|
// leading_zeros_8 returns the number of leading zero bits in x; the result is 8 for x == 0.
|
||||||
|
@[inline]
|
||||||
|
pub fn leading_zeros_8(x u8) int {
|
||||||
|
if x == 0 {
|
||||||
|
return 8
|
||||||
|
}
|
||||||
|
$if msvc {
|
||||||
|
return C.__lzcnt(x) - 24
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_clz(x) - 24
|
||||||
|
}
|
||||||
|
return leading_zeros_8_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// leading_zeros_16 returns the number of leading zero bits in x; the result is 16 for x == 0.
|
||||||
|
@[inline]
|
||||||
|
pub fn leading_zeros_16(x u16) int {
|
||||||
|
if x == 0 {
|
||||||
|
return 16
|
||||||
|
}
|
||||||
|
$if msvc {
|
||||||
|
return C.__lzcnt(x) - 16
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_clz(x) - 16
|
||||||
|
}
|
||||||
|
return leading_zeros_16_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// leading_zeros_32 returns the number of leading zero bits in x; the result is 32 for x == 0.
|
||||||
|
@[inline]
|
||||||
|
pub fn leading_zeros_32(x u32) int {
|
||||||
|
if x == 0 {
|
||||||
|
return 32
|
||||||
|
}
|
||||||
|
$if msvc {
|
||||||
|
return C.__lzcnt(x)
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_clz(x)
|
||||||
|
}
|
||||||
|
return leading_zeros_32_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// leading_zeros_64 returns the number of leading zero bits in x; the result is 64 for x == 0.
|
||||||
|
@[inline]
|
||||||
|
pub fn leading_zeros_64(x u64) int {
|
||||||
|
if x == 0 {
|
||||||
|
return 64
|
||||||
|
}
|
||||||
|
$if msvc {
|
||||||
|
return C.__lzcnt64(x)
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_clzll(x)
|
||||||
|
}
|
||||||
|
return leading_zeros_64_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn C.__builtin_ctz(x u32) int
|
||||||
|
fn C.__builtin_ctzll(x u64) int
|
||||||
|
fn C._BitScanForward(pos &int, x u32) u8
|
||||||
|
fn C._BitScanForward64(pos &int, x u64) u8
|
||||||
|
|
||||||
|
// --- TrailingZeros ---
|
||||||
|
// trailing_zeros_8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
|
||||||
|
@[inline]
|
||||||
|
pub fn trailing_zeros_8(x u8) int {
|
||||||
|
if x == 0 {
|
||||||
|
return 8
|
||||||
|
}
|
||||||
|
$if msvc {
|
||||||
|
mut pos := 0
|
||||||
|
_ := C._BitScanForward(&pos, x)
|
||||||
|
return pos
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_ctz(x)
|
||||||
|
}
|
||||||
|
return trailing_zeros_8_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// trailing_zeros_16 returns the number of trailing zero bits in x; the result is 16 for x == 0.
|
||||||
|
@[inline]
|
||||||
|
pub fn trailing_zeros_16(x u16) int {
|
||||||
|
if x == 0 {
|
||||||
|
return 16
|
||||||
|
}
|
||||||
|
$if msvc {
|
||||||
|
mut pos := 0
|
||||||
|
_ := C._BitScanForward(&pos, x)
|
||||||
|
return pos
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_ctz(x)
|
||||||
|
}
|
||||||
|
return trailing_zeros_16_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// trailing_zeros_32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
|
||||||
|
@[inline]
|
||||||
|
pub fn trailing_zeros_32(x u32) int {
|
||||||
|
if x == 0 {
|
||||||
|
return 32
|
||||||
|
}
|
||||||
|
$if msvc {
|
||||||
|
mut pos := 0
|
||||||
|
_ := C._BitScanForward(&pos, x)
|
||||||
|
return pos
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_ctz(x)
|
||||||
|
}
|
||||||
|
return trailing_zeros_32_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// trailing_zeros_64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
|
||||||
|
@[inline]
|
||||||
|
pub fn trailing_zeros_64(x u64) int {
|
||||||
|
if x == 0 {
|
||||||
|
return 64
|
||||||
|
}
|
||||||
|
$if msvc {
|
||||||
|
mut pos := 0
|
||||||
|
_ := C._BitScanForward64(&pos, x)
|
||||||
|
return pos
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_ctzll(x)
|
||||||
|
}
|
||||||
|
return trailing_zeros_64_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn C.__builtin_popcount(x u32) int
|
||||||
|
fn C.__builtin_popcountll(x u64) int
|
||||||
|
fn C.__popcnt(x u32) int
|
||||||
|
fn C.__popcnt64(x u64) int
|
||||||
|
|
||||||
|
// --- OnesCount ---
|
||||||
|
// ones_count_8 returns the number of one bits ("population count") in x.
|
||||||
|
@[inline]
|
||||||
|
pub fn ones_count_8(x u8) int {
|
||||||
|
$if msvc {
|
||||||
|
return C.__popcnt(x)
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_popcount(x)
|
||||||
|
}
|
||||||
|
return ones_count_8_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ones_count_16 returns the number of one bits ("population count") in x.
|
||||||
|
@[inline]
|
||||||
|
pub fn ones_count_16(x u16) int {
|
||||||
|
$if msvc {
|
||||||
|
return C.__popcnt(x)
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_popcount(x)
|
||||||
|
}
|
||||||
|
return ones_count_16_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ones_count_32 returns the number of one bits ("population count") in x.
|
||||||
|
@[inline]
|
||||||
|
pub fn ones_count_32(x u32) int {
|
||||||
|
$if msvc {
|
||||||
|
return C.__popcnt(x)
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_popcount(x)
|
||||||
|
}
|
||||||
|
return ones_count_32_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ones_count_64 returns the number of one bits ("population count") in x.
|
||||||
|
@[inline]
|
||||||
|
pub fn ones_count_64(x u64) int {
|
||||||
|
$if msvc {
|
||||||
|
return C.__popcnt64(x)
|
||||||
|
} $else $if !tinyc {
|
||||||
|
return C.__builtin_popcountll(x)
|
||||||
|
}
|
||||||
|
return ones_count_64_default(x)
|
||||||
|
}
|
|
@ -24,35 +24,69 @@ const m4 = u64(0x0000ffff0000ffff)
|
||||||
|
|
||||||
// --- LeadingZeros ---
|
// --- LeadingZeros ---
|
||||||
// leading_zeros_8 returns the number of leading zero bits in x; the result is 8 for x == 0.
|
// leading_zeros_8 returns the number of leading zero bits in x; the result is 8 for x == 0.
|
||||||
|
@[inline]
|
||||||
pub fn leading_zeros_8(x u8) int {
|
pub fn leading_zeros_8(x u8) int {
|
||||||
|
return leading_zeros_8_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[inline]
|
||||||
|
fn leading_zeros_8_default(x u8) int {
|
||||||
return 8 - len_8(x)
|
return 8 - len_8(x)
|
||||||
}
|
}
|
||||||
|
|
||||||
// leading_zeros_16 returns the number of leading zero bits in x; the result is 16 for x == 0.
|
// leading_zeros_16 returns the number of leading zero bits in x; the result is 16 for x == 0.
|
||||||
|
@[inline]
|
||||||
pub fn leading_zeros_16(x u16) int {
|
pub fn leading_zeros_16(x u16) int {
|
||||||
|
return leading_zeros_16_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[inline]
|
||||||
|
fn leading_zeros_16_default(x u16) int {
|
||||||
return 16 - len_16(x)
|
return 16 - len_16(x)
|
||||||
}
|
}
|
||||||
|
|
||||||
// leading_zeros_32 returns the number of leading zero bits in x; the result is 32 for x == 0.
|
// leading_zeros_32 returns the number of leading zero bits in x; the result is 32 for x == 0.
|
||||||
|
@[inline]
|
||||||
pub fn leading_zeros_32(x u32) int {
|
pub fn leading_zeros_32(x u32) int {
|
||||||
|
return leading_zeros_32_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[inline]
|
||||||
|
fn leading_zeros_32_default(x u32) int {
|
||||||
return 32 - len_32(x)
|
return 32 - len_32(x)
|
||||||
}
|
}
|
||||||
|
|
||||||
// leading_zeros_64 returns the number of leading zero bits in x; the result is 64 for x == 0.
|
// leading_zeros_64 returns the number of leading zero bits in x; the result is 64 for x == 0.
|
||||||
|
@[inline]
|
||||||
pub fn leading_zeros_64(x u64) int {
|
pub fn leading_zeros_64(x u64) int {
|
||||||
|
return leading_zeros_64_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[inline]
|
||||||
|
fn leading_zeros_64_default(x u64) int {
|
||||||
return 64 - len_64(x)
|
return 64 - len_64(x)
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- TrailingZeros ---
|
// --- TrailingZeros ---
|
||||||
// trailing_zeros_8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
|
// trailing_zeros_8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
|
||||||
@[direct_array_access]
|
@[inline]
|
||||||
pub fn trailing_zeros_8(x u8) int {
|
pub fn trailing_zeros_8(x u8) int {
|
||||||
|
return trailing_zeros_8_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[direct_array_access; inline]
|
||||||
|
fn trailing_zeros_8_default(x u8) int {
|
||||||
return int(ntz_8_tab[x])
|
return int(ntz_8_tab[x])
|
||||||
}
|
}
|
||||||
|
|
||||||
// trailing_zeros_16 returns the number of trailing zero bits in x; the result is 16 for x == 0.
|
// trailing_zeros_16 returns the number of trailing zero bits in x; the result is 16 for x == 0.
|
||||||
@[direct_array_access]
|
@[inline]
|
||||||
pub fn trailing_zeros_16(x u16) int {
|
pub fn trailing_zeros_16(x u16) int {
|
||||||
|
return trailing_zeros_16_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[direct_array_access; inline]
|
||||||
|
fn trailing_zeros_16_default(x u16) int {
|
||||||
if x == 0 {
|
if x == 0 {
|
||||||
return 16
|
return 16
|
||||||
}
|
}
|
||||||
|
@ -61,8 +95,13 @@ pub fn trailing_zeros_16(x u16) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
// trailing_zeros_32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
|
// trailing_zeros_32 returns the number of trailing zero bits in x; the result is 32 for x == 0.
|
||||||
@[direct_array_access]
|
@[inline]
|
||||||
pub fn trailing_zeros_32(x u32) int {
|
pub fn trailing_zeros_32(x u32) int {
|
||||||
|
return trailing_zeros_32_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[direct_array_access; inline]
|
||||||
|
fn trailing_zeros_32_default(x u32) int {
|
||||||
if x == 0 {
|
if x == 0 {
|
||||||
return 32
|
return 32
|
||||||
}
|
}
|
||||||
|
@ -71,8 +110,13 @@ pub fn trailing_zeros_32(x u32) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
// trailing_zeros_64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
|
// trailing_zeros_64 returns the number of trailing zero bits in x; the result is 64 for x == 0.
|
||||||
@[direct_array_access]
|
@[inline]
|
||||||
pub fn trailing_zeros_64(x u64) int {
|
pub fn trailing_zeros_64(x u64) int {
|
||||||
|
return trailing_zeros_64_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[direct_array_access; inline]
|
||||||
|
fn trailing_zeros_64_default(x u64) int {
|
||||||
if x == 0 {
|
if x == 0 {
|
||||||
return 64
|
return 64
|
||||||
}
|
}
|
||||||
|
@ -92,26 +136,46 @@ pub fn trailing_zeros_64(x u64) int {
|
||||||
|
|
||||||
// --- OnesCount ---
|
// --- OnesCount ---
|
||||||
// ones_count_8 returns the number of one bits ("population count") in x.
|
// ones_count_8 returns the number of one bits ("population count") in x.
|
||||||
@[direct_array_access]
|
@[inline]
|
||||||
pub fn ones_count_8(x u8) int {
|
pub fn ones_count_8(x u8) int {
|
||||||
|
return ones_count_8_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[direct_array_access; inline]
|
||||||
|
fn ones_count_8_default(x u8) int {
|
||||||
return int(pop_8_tab[x])
|
return int(pop_8_tab[x])
|
||||||
}
|
}
|
||||||
|
|
||||||
// ones_count_16 returns the number of one bits ("population count") in x.
|
// ones_count_16 returns the number of one bits ("population count") in x.
|
||||||
@[direct_array_access]
|
@[inline]
|
||||||
pub fn ones_count_16(x u16) int {
|
pub fn ones_count_16(x u16) int {
|
||||||
|
return ones_count_16_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[direct_array_access; inline]
|
||||||
|
fn ones_count_16_default(x u16) int {
|
||||||
return int(pop_8_tab[x >> 8] + pop_8_tab[x & u16(0xff)])
|
return int(pop_8_tab[x >> 8] + pop_8_tab[x & u16(0xff)])
|
||||||
}
|
}
|
||||||
|
|
||||||
// ones_count_32 returns the number of one bits ("population count") in x.
|
// ones_count_32 returns the number of one bits ("population count") in x.
|
||||||
@[direct_array_access]
|
@[inline]
|
||||||
pub fn ones_count_32(x u32) int {
|
pub fn ones_count_32(x u32) int {
|
||||||
|
return ones_count_32_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[direct_array_access; inline]
|
||||||
|
fn ones_count_32_default(x u32) int {
|
||||||
return int(pop_8_tab[x >> 24] + pop_8_tab[(x >> 16) & 0xff] + pop_8_tab[(x >> 8) & 0xff] +
|
return int(pop_8_tab[x >> 24] + pop_8_tab[(x >> 16) & 0xff] + pop_8_tab[(x >> 8) & 0xff] +
|
||||||
pop_8_tab[x & u32(0xff)])
|
pop_8_tab[x & u32(0xff)])
|
||||||
}
|
}
|
||||||
|
|
||||||
// ones_count_64 returns the number of one bits ("population count") in x.
|
// ones_count_64 returns the number of one bits ("population count") in x.
|
||||||
|
@[inline]
|
||||||
pub fn ones_count_64(x u64) int {
|
pub fn ones_count_64(x u64) int {
|
||||||
|
return ones_count_64_default(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ones_count_64_default(x u64) int {
|
||||||
// Implementation: Parallel summing of adjacent bits.
|
// Implementation: Parallel summing of adjacent bits.
|
||||||
// See "Hacker's Delight", Chap. 5: Counting Bits.
|
// See "Hacker's Delight", Chap. 5: Counting Bits.
|
||||||
// The following pattern shows the general approach:
|
// The following pattern shows the general approach:
|
||||||
|
@ -376,7 +440,13 @@ const divide_error = 'Divide Error'
|
||||||
// half returned in lo.
|
// half returned in lo.
|
||||||
//
|
//
|
||||||
// This function's execution time does not depend on the inputs.
|
// This function's execution time does not depend on the inputs.
|
||||||
|
@[inline]
|
||||||
pub fn mul_32(x u32, y u32) (u32, u32) {
|
pub fn mul_32(x u32, y u32) (u32, u32) {
|
||||||
|
return mul_32_default(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[inline]
|
||||||
|
fn mul_32_default(x u32, y u32) (u32, u32) {
|
||||||
tmp := u64(x) * u64(y)
|
tmp := u64(x) * u64(y)
|
||||||
hi := u32(tmp >> 32)
|
hi := u32(tmp >> 32)
|
||||||
lo := u32(tmp)
|
lo := u32(tmp)
|
||||||
|
@ -388,7 +458,12 @@ pub fn mul_32(x u32, y u32) (u32, u32) {
|
||||||
// half returned in lo.
|
// half returned in lo.
|
||||||
//
|
//
|
||||||
// This function's execution time does not depend on the inputs.
|
// This function's execution time does not depend on the inputs.
|
||||||
|
@[inline]
|
||||||
pub fn mul_64(x u64, y u64) (u64, u64) {
|
pub fn mul_64(x u64, y u64) (u64, u64) {
|
||||||
|
return mul_64_default(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mul_64_default(x u64, y u64) (u64, u64) {
|
||||||
x0 := x & mask32
|
x0 := x & mask32
|
||||||
x1 := x >> 32
|
x1 := x >> 32
|
||||||
y0 := y & mask32
|
y0 := y & mask32
|
||||||
|
@ -403,12 +478,49 @@ pub fn mul_64(x u64, y u64) (u64, u64) {
|
||||||
return hi, lo
|
return hi, lo
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// mul_add_32 returns the 64-bit result of x * y + z: (hi, lo) = x * y + z
|
||||||
|
// with the result bits' upper half returned in hi and the lower
|
||||||
|
// half returned in lo.
|
||||||
|
@[inline]
|
||||||
|
pub fn mul_add_32(x u32, y u32, z u32) (u32, u32) {
|
||||||
|
return mul_add_32_default(x, y, z)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[inline]
|
||||||
|
fn mul_add_32_default(x u32, y u32, z u32) (u32, u32) {
|
||||||
|
tmp := u64(x) * u64(y) + u64(z)
|
||||||
|
hi := u32(tmp >> 32)
|
||||||
|
lo := u32(tmp)
|
||||||
|
return hi, lo
|
||||||
|
}
|
||||||
|
|
||||||
|
// mul_add_64 returns the 128-bit result of x * y + z: (hi, lo) = x * y + z
|
||||||
|
// with the result bits' upper half returned in hi and the lower
|
||||||
|
// half returned in lo.
|
||||||
|
@[inline]
|
||||||
|
pub fn mul_add_64(x u64, y u64, z u64) (u64, u64) {
|
||||||
|
return mul_add_64_default(x, y, z)
|
||||||
|
}
|
||||||
|
|
||||||
|
@[inline]
|
||||||
|
fn mul_add_64_default(x u64, y u64, z u64) (u64, u64) {
|
||||||
|
h, l := mul_64(x, y)
|
||||||
|
lo := l + z
|
||||||
|
hi := h + u64(lo < l)
|
||||||
|
return hi, lo
|
||||||
|
}
|
||||||
|
|
||||||
// --- Full-width divide ---
|
// --- Full-width divide ---
|
||||||
// div_32 returns the quotient and remainder of (hi, lo) divided by y:
|
// div_32 returns the quotient and remainder of (hi, lo) divided by y:
|
||||||
// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
|
// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
|
||||||
// half in parameter hi and the lower half in parameter lo.
|
// half in parameter hi and the lower half in parameter lo.
|
||||||
// div_32 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
|
// div_32 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
|
||||||
|
@[inline]
|
||||||
pub fn div_32(hi u32, lo u32, y u32) (u32, u32) {
|
pub fn div_32(hi u32, lo u32, y u32) (u32, u32) {
|
||||||
|
return div_32_default(hi, lo, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn div_32_default(hi u32, lo u32, y u32) (u32, u32) {
|
||||||
if y != 0 && y <= hi {
|
if y != 0 && y <= hi {
|
||||||
panic(overflow_error)
|
panic(overflow_error)
|
||||||
}
|
}
|
||||||
|
@ -422,7 +534,12 @@ pub fn div_32(hi u32, lo u32, y u32) (u32, u32) {
|
||||||
// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
|
// quo = (hi, lo)/y, rem = (hi, lo)%y with the dividend bits' upper
|
||||||
// half in parameter hi and the lower half in parameter lo.
|
// half in parameter hi and the lower half in parameter lo.
|
||||||
// div_64 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
|
// div_64 panics for y == 0 (division by zero) or y <= hi (quotient overflow).
|
||||||
|
@[inline]
|
||||||
pub fn div_64(hi u64, lo u64, y1 u64) (u64, u64) {
|
pub fn div_64(hi u64, lo u64, y1 u64) (u64, u64) {
|
||||||
|
return div_64_default(hi, lo, y1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn div_64_default(hi u64, lo u64, y1 u64) (u64, u64) {
|
||||||
mut y := y1
|
mut y := y1
|
||||||
if y == 0 {
|
if y == 0 {
|
||||||
panic(overflow_error)
|
panic(overflow_error)
|
||||||
|
|
|
@ -17,6 +17,7 @@ fn test_bits() {
|
||||||
// C.printf("x:%02x lz: %d cmp: %d\n", i << x, leading_zeros_8(i << x), 7-x)
|
// C.printf("x:%02x lz: %d cmp: %d\n", i << x, leading_zeros_8(i << x), 7-x)
|
||||||
assert leading_zeros_8(u8(u8(i) << x)) == 7 - x
|
assert leading_zeros_8(u8(u8(i) << x)) == 7 - x
|
||||||
}
|
}
|
||||||
|
assert leading_zeros_8(0) == 8
|
||||||
|
|
||||||
// 16 bit
|
// 16 bit
|
||||||
i = 1
|
i = 1
|
||||||
|
@ -24,6 +25,7 @@ fn test_bits() {
|
||||||
// C.printf("x:%04x lz: %d cmp: %d\n", u16(i) << x, leading_zeros_16(u16(i) << x), 15-x)
|
// C.printf("x:%04x lz: %d cmp: %d\n", u16(i) << x, leading_zeros_16(u16(i) << x), 15-x)
|
||||||
assert leading_zeros_16(u16(i) << x) == 15 - x
|
assert leading_zeros_16(u16(i) << x) == 15 - x
|
||||||
}
|
}
|
||||||
|
assert leading_zeros_16(0) == 16
|
||||||
|
|
||||||
// 32 bit
|
// 32 bit
|
||||||
i = 1
|
i = 1
|
||||||
|
@ -31,6 +33,7 @@ fn test_bits() {
|
||||||
// C.printf("x:%08x lz: %d cmp: %d\n", u32(i) << x, leading_zeros_32(u32(i) << x), 31-x)
|
// C.printf("x:%08x lz: %d cmp: %d\n", u32(i) << x, leading_zeros_32(u32(i) << x), 31-x)
|
||||||
assert leading_zeros_32(u32(i) << x) == 31 - x
|
assert leading_zeros_32(u32(i) << x) == 31 - x
|
||||||
}
|
}
|
||||||
|
assert leading_zeros_32(0) == 32
|
||||||
|
|
||||||
// 64 bit
|
// 64 bit
|
||||||
i = 1
|
i = 1
|
||||||
|
@ -38,6 +41,39 @@ fn test_bits() {
|
||||||
// C.printf("x:%016llx lz: %llu cmp: %d\n", u64(i) << x, leading_zeros_64(u64(i) << x), 63-x)
|
// C.printf("x:%016llx lz: %llu cmp: %d\n", u64(i) << x, leading_zeros_64(u64(i) << x), 63-x)
|
||||||
assert leading_zeros_64(u64(i) << x) == 63 - x
|
assert leading_zeros_64(u64(i) << x) == 63 - x
|
||||||
}
|
}
|
||||||
|
assert leading_zeros_64(0) == 64
|
||||||
|
|
||||||
|
//
|
||||||
|
// --- TrailingZeros ---
|
||||||
|
//
|
||||||
|
|
||||||
|
// 8 bit
|
||||||
|
i = 1
|
||||||
|
for x in 0 .. 8 {
|
||||||
|
assert trailing_zeros_8(u8(u8(i) << x)) == x
|
||||||
|
}
|
||||||
|
assert trailing_zeros_8(0) == 8
|
||||||
|
|
||||||
|
// 16 bit
|
||||||
|
i = 1
|
||||||
|
for x in 0 .. 16 {
|
||||||
|
assert trailing_zeros_16(u16(i) << x) == x
|
||||||
|
}
|
||||||
|
assert trailing_zeros_16(0) == 16
|
||||||
|
|
||||||
|
// 32 bit
|
||||||
|
i = 1
|
||||||
|
for x in 0 .. 32 {
|
||||||
|
assert trailing_zeros_32(u32(i) << x) == x
|
||||||
|
}
|
||||||
|
assert trailing_zeros_32(0) == 32
|
||||||
|
|
||||||
|
// 64 bit
|
||||||
|
i = 1
|
||||||
|
for x in 0 .. 64 {
|
||||||
|
assert trailing_zeros_64(u64(i) << x) == x
|
||||||
|
}
|
||||||
|
assert trailing_zeros_64(0) == 64
|
||||||
|
|
||||||
//
|
//
|
||||||
// --- ones_count ---
|
// --- ones_count ---
|
||||||
|
@ -50,6 +86,8 @@ fn test_bits() {
|
||||||
assert ones_count_8(u8(i)) == x
|
assert ones_count_8(u8(i)) == x
|
||||||
i = int(u32(i) << 1) + 1
|
i = int(u32(i) << 1) + 1
|
||||||
}
|
}
|
||||||
|
assert ones_count_8(0) == 0
|
||||||
|
assert ones_count_8(0xFF) == 8
|
||||||
|
|
||||||
// 16 bit
|
// 16 bit
|
||||||
i = 0
|
i = 0
|
||||||
|
@ -58,6 +96,8 @@ fn test_bits() {
|
||||||
assert ones_count_16(u16(i)) == x
|
assert ones_count_16(u16(i)) == x
|
||||||
i = int(u32(i) << 1) + 1
|
i = int(u32(i) << 1) + 1
|
||||||
}
|
}
|
||||||
|
assert ones_count_16(0) == 0
|
||||||
|
assert ones_count_16(0xFFFF) == 16
|
||||||
|
|
||||||
// 32 bit
|
// 32 bit
|
||||||
i = 0
|
i = 0
|
||||||
|
@ -66,6 +106,8 @@ fn test_bits() {
|
||||||
assert ones_count_32(u32(i)) == x
|
assert ones_count_32(u32(i)) == x
|
||||||
i = int(u32(i) << 1) + 1
|
i = int(u32(i) << 1) + 1
|
||||||
}
|
}
|
||||||
|
assert ones_count_32(0) == 0
|
||||||
|
assert ones_count_32(0xFFFF_FFFF) == 32
|
||||||
|
|
||||||
// 64 bit
|
// 64 bit
|
||||||
i1 = 0
|
i1 = 0
|
||||||
|
@ -74,6 +116,8 @@ fn test_bits() {
|
||||||
assert ones_count_64(i1) == x
|
assert ones_count_64(i1) == x
|
||||||
i1 = (i1 << 1) + 1
|
i1 = (i1 << 1) + 1
|
||||||
}
|
}
|
||||||
|
assert ones_count_64(0) == 0
|
||||||
|
assert ones_count_64(0xFFFF_FFFF_FFFF_FFFF) == 64
|
||||||
|
|
||||||
//
|
//
|
||||||
// --- rotate_left/right ---
|
// --- rotate_left/right ---
|
||||||
|
@ -241,6 +285,9 @@ fn test_bits() {
|
||||||
v1 := v0 - 1
|
v1 := v0 - 1
|
||||||
hi, lo := mul_32(v0, v1)
|
hi, lo := mul_32(v0, v1)
|
||||||
assert (u64(hi) << 32) | (u64(lo)) == u64(v0) * u64(v1)
|
assert (u64(hi) << 32) | (u64(lo)) == u64(v0) * u64(v1)
|
||||||
|
v2 := u32(x)
|
||||||
|
h, l := mul_add_32(v0, v1, v2)
|
||||||
|
assert (u64(h) << 32) | (u64(l)) == u64(v0) * u64(v1) + u64(v2)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 64 bit
|
// 64 bit
|
||||||
|
@ -252,6 +299,11 @@ fn test_bits() {
|
||||||
// C.printf("v0: %llu v1: %llu [%llu,%llu] tt: %llu\n", v0, v1, hi, lo, (v0 >> 32) * (v1 >> 32))
|
// C.printf("v0: %llu v1: %llu [%llu,%llu] tt: %llu\n", v0, v1, hi, lo, (v0 >> 32) * (v1 >> 32))
|
||||||
assert (hi & 0xFFFF_FFFF_0000_0000) == (((v0 >> 32) * (v1 >> 32)) & 0xFFFF_FFFF_0000_0000)
|
assert (hi & 0xFFFF_FFFF_0000_0000) == (((v0 >> 32) * (v1 >> 32)) & 0xFFFF_FFFF_0000_0000)
|
||||||
assert (lo & 0x0000_0000_FFFF_FFFF) == (((v0 & 0x0000_0000_FFFF_FFFF) * (v1 & 0x0000_0000_FFFF_FFFF)) & 0x0000_0000_FFFF_FFFF)
|
assert (lo & 0x0000_0000_FFFF_FFFF) == (((v0 & 0x0000_0000_FFFF_FFFF) * (v1 & 0x0000_0000_FFFF_FFFF)) & 0x0000_0000_FFFF_FFFF)
|
||||||
|
v2 := u64(x)
|
||||||
|
h, l := mul_add_64(v0, v1, v2)
|
||||||
|
assert (h & 0xFFFF_FFFF_0000_0000) == (((v0 >> 32) * (v1 >> 32)) & 0xFFFF_FFFF_0000_0000)
|
||||||
|
assert (l & 0x0000_0000_FFFF_FFFF) == ((
|
||||||
|
(v0 & 0x0000_0000_FFFF_FFFF) * (v1 & 0x0000_0000_FFFF_FFFF) + v2) & 0x0000_0000_FFFF_FFFF)
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue