summaryrefslogtreecommitdiff
path: root/libgo/go/math/big/arith.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/math/big/arith.go')
-rw-r--r--libgo/go/math/big/arith.go103
1 files changed, 77 insertions, 26 deletions
diff --git a/libgo/go/math/big/arith.go b/libgo/go/math/big/arith.go
index c5ff4252d50..d7ea8381e7c 100644
--- a/libgo/go/math/big/arith.go
+++ b/libgo/go/math/big/arith.go
@@ -52,8 +52,6 @@ func subWW_g(x, y, c Word) (z1, z0 Word) {
}
// z1<<_W + z0 = x*y
-func mulWW(x, y Word) (z1, z0 Word) { return mulWW_g(x, y) }
-
// Adapted from Warren, Hacker's Delight, p. 132.
func mulWW_g(x, y Word) (z1, z0 Word) {
x0 := x & _M2
@@ -72,7 +70,7 @@ func mulWW_g(x, y Word) (z1, z0 Word) {
// z1<<_W + z0 = x*y + c
func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
- z1, zz0 := mulWW(x, y)
+ z1, zz0 := mulWW_g(x, y)
if z0 = zz0 + c; z0 < zz0 {
z1++
}
@@ -80,7 +78,6 @@ func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
}
// Length of x in bits.
-func bitLen(x Word) (n int) { return bitLen_g(x) }
func bitLen_g(x Word) (n int) {
for ; x >= 0x8000; x >>= 16 {
n += 16
@@ -110,21 +107,34 @@ func log2(x Word) int {
return bitLen(x) - 1
}
-// Number of leading zeros in x.
-func leadingZeros(x Word) uint {
+// nlz returns the number of leading zeros in x.
+func nlz(x Word) uint {
return uint(_W - bitLen(x))
}
-// q = (u1<<_W + u0 - r)/y
-func divWW(x1, x0, y Word) (q, r Word) { return divWW_g(x1, x0, y) }
+// nlz64 returns the number of leading zeros in x.
+func nlz64(x uint64) uint {
+ switch _W {
+ case 32:
+ w := x >> 32
+ if w == 0 {
+ return 32 + nlz(Word(x))
+ }
+ return nlz(Word(w))
+ case 64:
+ return nlz(Word(x))
+ }
+ panic("unreachable")
+}
+// q = (u1<<_W + u0 - r)/y
// Adapted from Warren, Hacker's Delight, p. 152.
func divWW_g(u1, u0, v Word) (q, r Word) {
if u1 >= v {
return 1<<_W - 1, 1<<_W - 1
}
- s := leadingZeros(v)
+ s := nlz(v)
v <<= s
vn1 := v >> _W2
@@ -159,41 +169,85 @@ func divWW_g(u1, u0, v Word) (q, r Word) {
return q1*_B2 + q0, (un21*_B2 + un0 - q0*v) >> s
}
-func addVV(z, x, y []Word) (c Word) { return addVV_g(z, x, y) }
+// Keep for performance debugging.
+// Using addWW_g is likely slower.
+const use_addWW_g = false
+
+// The resulting carry c is either 0 or 1.
func addVV_g(z, x, y []Word) (c Word) {
- for i := range z {
- c, z[i] = addWW_g(x[i], y[i], c)
+ if use_addWW_g {
+ for i := range z {
+ c, z[i] = addWW_g(x[i], y[i], c)
+ }
+ return
+ }
+
+ for i, xi := range x[:len(z)] {
+ yi := y[i]
+ zi := xi + yi + c
+ z[i] = zi
+ // see "Hacker's Delight", section 2-12 (overflow detection)
+ c = (xi&yi | (xi|yi)&^zi) >> (_W - 1)
}
return
}
-func subVV(z, x, y []Word) (c Word) { return subVV_g(z, x, y) }
+// The resulting carry c is either 0 or 1.
func subVV_g(z, x, y []Word) (c Word) {
- for i := range z {
- c, z[i] = subWW_g(x[i], y[i], c)
+ if use_addWW_g {
+ for i := range z {
+ c, z[i] = subWW_g(x[i], y[i], c)
+ }
+ return
+ }
+
+ for i, xi := range x[:len(z)] {
+ yi := y[i]
+ zi := xi - yi - c
+ z[i] = zi
+ // see "Hacker's Delight", section 2-12 (overflow detection)
+ c = (yi&^xi | (yi|^xi)&zi) >> (_W - 1)
}
return
}
-func addVW(z, x []Word, y Word) (c Word) { return addVW_g(z, x, y) }
+// The resulting carry c is either 0 or 1.
func addVW_g(z, x []Word, y Word) (c Word) {
+ if use_addWW_g {
+ c = y
+ for i := range z {
+ c, z[i] = addWW_g(x[i], c, 0)
+ }
+ return
+ }
+
c = y
- for i := range z {
- c, z[i] = addWW_g(x[i], c, 0)
+ for i, xi := range x[:len(z)] {
+ zi := xi + c
+ z[i] = zi
+ c = xi &^ zi >> (_W - 1)
}
return
}
-func subVW(z, x []Word, y Word) (c Word) { return subVW_g(z, x, y) }
func subVW_g(z, x []Word, y Word) (c Word) {
+ if use_addWW_g {
+ c = y
+ for i := range z {
+ c, z[i] = subWW_g(x[i], c, 0)
+ }
+ return
+ }
+
c = y
- for i := range z {
- c, z[i] = subWW_g(x[i], c, 0)
+ for i, xi := range x[:len(z)] {
+ zi := xi - c
+ z[i] = zi
+ c = (zi &^ xi) >> (_W - 1)
}
return
}
-func shlVU(z, x []Word, s uint) (c Word) { return shlVU_g(z, x, s) }
func shlVU_g(z, x []Word, s uint) (c Word) {
if n := len(z); n > 0 {
ŝ := _W - s
@@ -209,7 +263,6 @@ func shlVU_g(z, x []Word, s uint) (c Word) {
return
}
-func shrVU(z, x []Word, s uint) (c Word) { return shrVU_g(z, x, s) }
func shrVU_g(z, x []Word, s uint) (c Word) {
if n := len(z); n > 0 {
ŝ := _W - s
@@ -225,7 +278,6 @@ func shrVU_g(z, x []Word, s uint) (c Word) {
return
}
-func mulAddVWW(z, x []Word, y, r Word) (c Word) { return mulAddVWW_g(z, x, y, r) }
func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
c = r
for i := range z {
@@ -234,7 +286,7 @@ func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
return
}
-func addMulVVW(z, x []Word, y Word) (c Word) { return addMulVVW_g(z, x, y) }
+// TODO(gri) Remove use of addWW_g here and then we can remove addWW_g and subWW_g.
func addMulVVW_g(z, x []Word, y Word) (c Word) {
for i := range z {
z1, z0 := mulAddWWW_g(x[i], y, z[i])
@@ -244,7 +296,6 @@ func addMulVVW_g(z, x []Word, y Word) (c Word) {
return
}
-func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) { return divWVW_g(z, xn, x, y) }
func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) {
r = xn
for i := len(z) - 1; i >= 0; i-- {