diff options
Diffstat (limited to 'libgo/go/math/big/arith.go')
-rw-r--r-- | libgo/go/math/big/arith.go | 103 |
1 files changed, 77 insertions, 26 deletions
diff --git a/libgo/go/math/big/arith.go b/libgo/go/math/big/arith.go index c5ff4252d50..d7ea8381e7c 100644 --- a/libgo/go/math/big/arith.go +++ b/libgo/go/math/big/arith.go @@ -52,8 +52,6 @@ func subWW_g(x, y, c Word) (z1, z0 Word) { } // z1<<_W + z0 = x*y -func mulWW(x, y Word) (z1, z0 Word) { return mulWW_g(x, y) } - // Adapted from Warren, Hacker's Delight, p. 132. func mulWW_g(x, y Word) (z1, z0 Word) { x0 := x & _M2 @@ -72,7 +70,7 @@ func mulWW_g(x, y Word) (z1, z0 Word) { // z1<<_W + z0 = x*y + c func mulAddWWW_g(x, y, c Word) (z1, z0 Word) { - z1, zz0 := mulWW(x, y) + z1, zz0 := mulWW_g(x, y) if z0 = zz0 + c; z0 < zz0 { z1++ } @@ -80,7 +78,6 @@ func mulAddWWW_g(x, y, c Word) (z1, z0 Word) { } // Length of x in bits. -func bitLen(x Word) (n int) { return bitLen_g(x) } func bitLen_g(x Word) (n int) { for ; x >= 0x8000; x >>= 16 { n += 16 @@ -110,21 +107,34 @@ func log2(x Word) int { return bitLen(x) - 1 } -// Number of leading zeros in x. -func leadingZeros(x Word) uint { +// nlz returns the number of leading zeros in x. +func nlz(x Word) uint { return uint(_W - bitLen(x)) } -// q = (u1<<_W + u0 - r)/y -func divWW(x1, x0, y Word) (q, r Word) { return divWW_g(x1, x0, y) } +// nlz64 returns the number of leading zeros in x. +func nlz64(x uint64) uint { + switch _W { + case 32: + w := x >> 32 + if w == 0 { + return 32 + nlz(Word(x)) + } + return nlz(Word(w)) + case 64: + return nlz(Word(x)) + } + panic("unreachable") +} +// q = (u1<<_W + u0 - r)/y // Adapted from Warren, Hacker's Delight, p. 152. func divWW_g(u1, u0, v Word) (q, r Word) { if u1 >= v { return 1<<_W - 1, 1<<_W - 1 } - s := leadingZeros(v) + s := nlz(v) v <<= s vn1 := v >> _W2 @@ -159,41 +169,85 @@ func divWW_g(u1, u0, v Word) (q, r Word) { return q1*_B2 + q0, (un21*_B2 + un0 - q0*v) >> s } -func addVV(z, x, y []Word) (c Word) { return addVV_g(z, x, y) } +// Keep for performance debugging. +// Using addWW_g is likely slower. +const use_addWW_g = false + +// The resulting carry c is either 0 or 1. func addVV_g(z, x, y []Word) (c Word) { - for i := range z { - c, z[i] = addWW_g(x[i], y[i], c) + if use_addWW_g { + for i := range z { + c, z[i] = addWW_g(x[i], y[i], c) + } + return + } + + for i, xi := range x[:len(z)] { + yi := y[i] + zi := xi + yi + c + z[i] = zi + // see "Hacker's Delight", section 2-12 (overflow detection) + c = (xi&yi | (xi|yi)&^zi) >> (_W - 1) } return } -func subVV(z, x, y []Word) (c Word) { return subVV_g(z, x, y) } +// The resulting carry c is either 0 or 1. func subVV_g(z, x, y []Word) (c Word) { - for i := range z { - c, z[i] = subWW_g(x[i], y[i], c) + if use_addWW_g { + for i := range z { + c, z[i] = subWW_g(x[i], y[i], c) + } + return + } + + for i, xi := range x[:len(z)] { + yi := y[i] + zi := xi - yi - c + z[i] = zi + // see "Hacker's Delight", section 2-12 (overflow detection) + c = (yi&^xi | (yi|^xi)&zi) >> (_W - 1) } return } -func addVW(z, x []Word, y Word) (c Word) { return addVW_g(z, x, y) } +// The resulting carry c is either 0 or 1. func addVW_g(z, x []Word, y Word) (c Word) { + if use_addWW_g { + c = y + for i := range z { + c, z[i] = addWW_g(x[i], c, 0) + } + return + } + c = y - for i := range z { - c, z[i] = addWW_g(x[i], c, 0) + for i, xi := range x[:len(z)] { + zi := xi + c + z[i] = zi + c = xi &^ zi >> (_W - 1) } return } -func subVW(z, x []Word, y Word) (c Word) { return subVW_g(z, x, y) } func subVW_g(z, x []Word, y Word) (c Word) { + if use_addWW_g { + c = y + for i := range z { + c, z[i] = subWW_g(x[i], c, 0) + } + return + } + c = y - for i := range z { - c, z[i] = subWW_g(x[i], c, 0) + for i, xi := range x[:len(z)] { + zi := xi - c + z[i] = zi + c = (zi &^ xi) >> (_W - 1) } return } -func shlVU(z, x []Word, s uint) (c Word) { return shlVU_g(z, x, s) } func shlVU_g(z, x []Word, s uint) (c Word) { if n := len(z); n > 0 { ŝ := _W - s @@ -209,7 +263,6 @@ func shlVU_g(z, x []Word, s uint) (c Word) { return } -func shrVU(z, x []Word, s uint) (c Word) { return shrVU_g(z, x, s) } func shrVU_g(z, x []Word, s uint) (c Word) { if n := len(z); n > 0 { ŝ := _W - s @@ -225,7 +278,6 @@ func shrVU_g(z, x []Word, s uint) (c Word) { return } -func mulAddVWW(z, x []Word, y, r Word) (c Word) { return mulAddVWW_g(z, x, y, r) } func mulAddVWW_g(z, x []Word, y, r Word) (c Word) { c = r for i := range z { @@ -234,7 +286,7 @@ func mulAddVWW_g(z, x []Word, y, r Word) (c Word) { return } -func addMulVVW(z, x []Word, y Word) (c Word) { return addMulVVW_g(z, x, y) } +// TODO(gri) Remove use of addWW_g here and then we can remove addWW_g and subWW_g. func addMulVVW_g(z, x []Word, y Word) (c Word) { for i := range z { z1, z0 := mulAddWWW_g(x[i], y, z[i]) @@ -244,7 +296,6 @@ func addMulVVW_g(z, x []Word, y Word) (c Word) { return } -func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) { return divWVW_g(z, xn, x, y) } func divWVW_g(z []Word, xn Word, x []Word, y Word) (r Word) { r = xn for i := len(z) - 1; i >= 0; i-- { |