summaryrefslogtreecommitdiff
path: root/lib/ppc
diff options
context:
space:
mode:
authorDaniel Dunbar <daniel@zuster.org>2009-06-26 16:47:03 +0000
committerDaniel Dunbar <daniel@zuster.org>2009-06-26 16:47:03 +0000
commitb3a6901e66f55b35aa9e01bcb24134e6a65ea004 (patch)
tree4d57d73575ea3f7e30bf430920d6e96fa2a369c1 /lib/ppc
parent8cbe163cba77c772621f89ddb33793ac170b1fa2 (diff)
Initial import of compiler-rt.
- git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@74292 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/ppc')
-rw-r--r--lib/ppc/DD.h46
-rw-r--r--lib/ppc/Makefile.mk22
-rw-r--r--lib/ppc/divtc3.c88
-rw-r--r--lib/ppc/fixtfdi.c100
-rw-r--r--lib/ppc/fixunstfdi.c58
-rw-r--r--lib/ppc/floatditf.c34
-rw-r--r--lib/ppc/floatunditf.c40
-rw-r--r--lib/ppc/gcc_qadd.c74
-rw-r--r--lib/ppc/gcc_qdiv.c53
-rw-r--r--lib/ppc/gcc_qmul.c51
-rw-r--r--lib/ppc/gcc_qsub.c74
-rw-r--r--lib/ppc/multc3.c92
-rw-r--r--lib/ppc/restFP.s43
-rw-r--r--lib/ppc/saveFP.s40
14 files changed, 815 insertions, 0 deletions
diff --git a/lib/ppc/DD.h b/lib/ppc/DD.h
new file mode 100644
index 000000000..32acecd5f
--- /dev/null
+++ b/lib/ppc/DD.h
@@ -0,0 +1,46 @@
+#ifndef __DD_HEADER
+#define __DD_HEADER
+
+#include <stdint.h>
+
+typedef union {
+ long double ld;
+ struct {
+ double hi;
+ double lo;
+ };
+} DD;
+
+typedef union {
+ double d;
+ uint64_t x;
+} doublebits;
+
+#define LOWORDER(xy,xHi,xLo,yHi,yLo) \
+ (((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo))
+
+static inline double __attribute__((always_inline))
+fabs(double x)
+{
+ doublebits result = { .d = x };
+ result.x &= UINT64_C(0x7fffffffffffffff);
+ return result.d;
+}
+
+static inline double __attribute__((always_inline))
+high26bits(double x)
+{
+ doublebits result = { .d = x };
+ result.x &= UINT64_C(0xfffffffff8000000);
+ return result.d;
+}
+
+static inline int __attribute__((always_inline))
+different_sign(double x, double y)
+{
+ doublebits xsignbit = { .d = x }, ysignbit = { .d = y };
+ int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63);
+ return result;
+}
+
+#endif // __DD_HEADER
diff --git a/lib/ppc/Makefile.mk b/lib/ppc/Makefile.mk
new file mode 100644
index 000000000..5d0f2b3df
--- /dev/null
+++ b/lib/ppc/Makefile.mk
@@ -0,0 +1,22 @@
+#===- lib/ppc/Makefile.mk ----------------------------------*- Makefile -*--===#
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+Dir := lib/ppc
+SubDirs :=
+OnlyArchs := ppc
+
+AsmSources := $(foreach file,$(wildcard $(Dir)/*.s),$(notdir $(file)))
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.s=%.o)
+Target := Optimized
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard $(Dir)/*.h)
+
+include make/subdir.mk
diff --git a/lib/ppc/divtc3.c b/lib/ppc/divtc3.c
new file mode 100644
index 000000000..bec6b5353
--- /dev/null
+++ b/lib/ppc/divtc3.c
@@ -0,0 +1,88 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+#include "DD.h"
+#include <math.h>
+
+#define makeFinite(x) { \
+ (x).hi = __builtin_copysign(isinf((x).hi) ? 1.0 : 0.0, (x).hi); \
+ (x).lo = 0.0; \
+ }
+
+long double __gcc_qadd(long double, long double);
+long double __gcc_qsub(long double, long double);
+long double __gcc_qmul(long double, long double);
+long double __gcc_qdiv(long double, long double);
+
+long double _Complex
+__divtc3(long double a, long double b, long double c, long double d)
+{
+ DD cDD = { .ld = c };
+ DD dDD = { .ld = d };
+
+ int ilogbw = 0;
+ const double logbw = logb(__builtin_fmax( __builtin_fabs(cDD.hi), __builtin_fabs(dDD.hi) ));
+
+ if (isfinite(logbw))
+ {
+ ilogbw = (int)logbw;
+
+ cDD.hi = scalbn(cDD.hi, -ilogbw);
+ cDD.lo = scalbn(cDD.lo, -ilogbw);
+ dDD.hi = scalbn(dDD.hi, -ilogbw);
+ dDD.lo = scalbn(dDD.lo, -ilogbw);
+ }
+
+ const long double denom = __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld));
+ const long double realNumerator = __gcc_qadd(__gcc_qmul(a,cDD.ld), __gcc_qmul(b,dDD.ld));
+ const long double imagNumerator = __gcc_qsub(__gcc_qmul(b,cDD.ld), __gcc_qmul(a,dDD.ld));
+
+ DD real = { .ld = __gcc_qdiv(realNumerator, denom) };
+ DD imag = { .ld = __gcc_qdiv(imagNumerator, denom) };
+
+ real.hi = scalbn(real.hi, -ilogbw);
+ real.lo = scalbn(real.lo, -ilogbw);
+ imag.hi = scalbn(imag.hi, -ilogbw);
+ imag.lo = scalbn(imag.lo, -ilogbw);
+
+ if (isnan(real.hi) && isnan(imag.hi))
+ {
+ DD aDD = { .ld = a };
+ DD bDD = { .ld = b };
+ DD rDD = { .ld = denom };
+
+ if ((rDD.hi == 0.0) && (!isnan(aDD.hi) || !isnan(bDD.hi)))
+ {
+ real.hi = __builtin_copysign(INFINITY,cDD.hi) * aDD.hi;
+ real.lo = 0.0;
+ imag.hi = __builtin_copysign(INFINITY,cDD.hi) * bDD.hi;
+ imag.lo = 0.0;
+ }
+
+ else if ((isinf(aDD.hi) || isinf(bDD.hi)) && isfinite(cDD.hi) && isfinite(dDD.hi))
+ {
+ makeFinite(aDD);
+ makeFinite(bDD);
+ real.hi = INFINITY * (aDD.hi*cDD.hi + bDD.hi*dDD.hi);
+ real.lo = 0.0;
+ imag.hi = INFINITY * (bDD.hi*cDD.hi - aDD.hi*dDD.hi);
+ imag.lo = 0.0;
+ }
+
+ else if ((isinf(cDD.hi) || isinf(dDD.hi)) && isfinite(aDD.hi) && isfinite(bDD.hi))
+ {
+ makeFinite(cDD);
+ makeFinite(dDD);
+ real.hi = __builtin_copysign(0.0,(aDD.hi*cDD.hi + bDD.hi*dDD.hi));
+ real.lo = 0.0;
+ imag.hi = __builtin_copysign(0.0,(bDD.hi*cDD.hi - aDD.hi*dDD.hi));
+ imag.lo = 0.0;
+ }
+ }
+
+ long double _Complex z;
+ __real__ z = real.ld;
+ __imag__ z = imag.ld;
+
+ return z;
+}
diff --git a/lib/ppc/fixtfdi.c b/lib/ppc/fixtfdi.c
new file mode 100644
index 000000000..c95241701
--- /dev/null
+++ b/lib/ppc/fixtfdi.c
@@ -0,0 +1,100 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// int64_t __fixunstfdi(long double x);
+// This file implements the PowerPC 128-bit double-double -> int64_t conversion
+
+#include "DD.h"
+#include <stdint.h>
+
+uint64_t __fixtfdi(long double input)
+{
+ const DD x = { .ld = input };
+ const doublebits hibits = { .d = x.hi };
+
+ const uint32_t absHighWord = (uint32_t)(hibits.x >> 32) & UINT32_C(0x7fffffff);
+ const uint32_t absHighWordMinusOne = absHighWord - UINT32_C(0x3ff00000);
+
+ // If (1.0 - tiny) <= input < 0x1.0p63:
+ if (UINT32_C(0x03f00000) > absHighWordMinusOne)
+ {
+ // Do an unsigned conversion of the absolute value, then restore the sign.
+ const int unbiasedHeadExponent = absHighWordMinusOne >> 20;
+
+ int64_t result = hibits.x & INT64_C(0x000fffffffffffff); // mantissa(hi)
+ result |= INT64_C(0x0010000000000000); // matissa(hi) with implicit bit
+ result <<= 10; // mantissa(hi) with one zero preceeding bit.
+
+ const int64_t hiNegationMask = ((int64_t)(hibits.x)) >> 63;
+
+ // If the tail is non-zero, we need to patch in the tail bits.
+ if (0.0 != x.lo)
+ {
+ const doublebits lobits = { .d = x.lo };
+ int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff);
+ tailMantissa |= INT64_C(0x0010000000000000);
+
+ // At this point we have the mantissa of |tail|
+ // We need to negate it if head and tail have different signs.
+ const int64_t loNegationMask = ((int64_t)(lobits.x)) >> 63;
+ const int64_t negationMask = loNegationMask ^ hiNegationMask;
+ tailMantissa = (tailMantissa ^ negationMask) - negationMask;
+
+ // Now we have the mantissa of tail as a signed 2s-complement integer
+
+ const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff;
+
+ // Shift the tail mantissa into the right position, accounting for the
+ // bias of 10 that we shifted the head mantissa by.
+ tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 10)));
+
+ result += tailMantissa;
+ }
+
+ result >>= (62 - unbiasedHeadExponent);
+
+ // Restore the sign of the result and return
+ result = (result ^ hiNegationMask) - hiNegationMask;
+ return result;
+
+ }
+
+ // Edge cases handled here:
+
+ // |x| < 1, result is zero.
+ if (1.0 > __builtin_fabs(x.hi))
+ return INT64_C(0);
+
+ // x very close to INT64_MIN, care must be taken to see which side we are on.
+ if (x.hi == -0x1.0p63) {
+
+ int64_t result = INT64_MIN;
+
+ if (0.0 < x.lo)
+ {
+ // If the tail is positive, the correct result is something other than INT64_MIN.
+ // we'll need to figure out what it is.
+
+ const doublebits lobits = { .d = x.lo };
+ int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff);
+ tailMantissa |= INT64_C(0x0010000000000000);
+
+ // Now we negate the tailMantissa
+ tailMantissa = (tailMantissa ^ INT64_C(-1)) + INT64_C(1);
+
+ // And shift it by the appropriate amount
+ const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff;
+ tailMantissa >>= 1075 - biasedTailExponent;
+
+ result -= tailMantissa;
+ }
+
+ return result;
+ }
+
+ // Signed overflows, infinities, and NaNs
+ if (x.hi > 0.0)
+ return INT64_MAX;
+ else
+ return INT64_MIN;
+}
diff --git a/lib/ppc/fixunstfdi.c b/lib/ppc/fixunstfdi.c
new file mode 100644
index 000000000..35ce3a912
--- /dev/null
+++ b/lib/ppc/fixunstfdi.c
@@ -0,0 +1,58 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// uint64_t __fixunstfdi(long double x);
+// This file implements the PowerPC 128-bit double-double -> uint64_t conversion
+
+#include "DD.h"
+#include <stdint.h>
+
+uint64_t __fixunstfdi(long double input)
+{
+ const DD x = { .ld = input };
+ const doublebits hibits = { .d = x.hi };
+
+ const uint32_t highWordMinusOne = (uint32_t)(hibits.x >> 32) - UINT32_C(0x3ff00000);
+
+ // If (1.0 - tiny) <= input < 0x1.0p64:
+ if (UINT32_C(0x04000000) > highWordMinusOne)
+ {
+ const int unbiasedHeadExponent = highWordMinusOne >> 20;
+
+ uint64_t result = hibits.x & UINT64_C(0x000fffffffffffff); // mantissa(hi)
+ result |= UINT64_C(0x0010000000000000); // matissa(hi) with implicit bit
+ result <<= 11; // mantissa(hi) left aligned in the int64 field.
+
+ // If the tail is non-zero, we need to patch in the tail bits.
+ if (0.0 != x.lo)
+ {
+ const doublebits lobits = { .d = x.lo };
+ int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff);
+ tailMantissa |= INT64_C(0x0010000000000000);
+
+ // At this point we have the mantissa of |tail|
+
+ const int64_t negationMask = ((int64_t)(lobits.x)) >> 63;
+ tailMantissa = (tailMantissa ^ negationMask) - negationMask;
+
+ // Now we have the mantissa of tail as a signed 2s-complement integer
+
+ const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff;
+
+ // Shift the tail mantissa into the right position, accounting for the
+ // bias of 11 that we shifted the head mantissa by.
+ tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 11)));
+
+ result += tailMantissa;
+ }
+
+ result >>= (63 - unbiasedHeadExponent);
+ return result;
+ }
+
+ // Edge cases are handled here, with saturation.
+ if (1.0 > x.hi)
+ return UINT64_C(0);
+ else
+ return UINT64_MAX;
+}
diff --git a/lib/ppc/floatditf.c b/lib/ppc/floatditf.c
new file mode 100644
index 000000000..081dc8cbe
--- /dev/null
+++ b/lib/ppc/floatditf.c
@@ -0,0 +1,34 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// long double __floatditf(long long x);
+// This file implements the PowerPC long long -> long double conversion
+
+#include "DD.h"
+#include <stdint.h>
+
+long double __floatditf(int64_t a) {
+
+ static const double twop32 = 0x1.0p32;
+ static const double twop52 = 0x1.0p52;
+
+ doublebits low = { .d = twop52 };
+ low.x |= a & UINT64_C(0x00000000ffffffff); // 0x1.0p52 + low 32 bits of a.
+
+ const double high_addend = (double)((int32_t)(a >> 32))*twop32 - twop52;
+
+ // At this point, we have two double precision numbers
+ // high_addend and low.d, and we wish to return their sum
+ // as a canonicalized long double:
+
+ // This implementation sets the inexact flag spuriously.
+ // This could be avoided, but at some substantial cost.
+
+ DD result;
+
+ result.hi = high_addend + low.d;
+ result.lo = (high_addend - result.hi) + low.d;
+
+ return result.ld;
+
+}
diff --git a/lib/ppc/floatunditf.c b/lib/ppc/floatunditf.c
new file mode 100644
index 000000000..63f0b4467
--- /dev/null
+++ b/lib/ppc/floatunditf.c
@@ -0,0 +1,40 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// long double __floatunditf(unsigned long long x);
+// This file implements the PowerPC unsigned long long -> long double conversion
+
+#include "DD.h"
+#include <stdint.h>
+
+long double __floatunditf(uint64_t a) {
+
+ // Begins with an exact copy of the code from __floatundidf
+
+ static const double twop52 = 0x1.0p52;
+ static const double twop84 = 0x1.0p84;
+ static const double twop84_plus_twop52 = 0x1.00000001p84;
+
+ doublebits high = { .d = twop84 };
+ doublebits low = { .d = twop52 };
+
+ high.x |= a >> 32; // 0x1.0p84 + high 32 bits of a
+ low.x |= a & UINT64_C(0x00000000ffffffff); // 0x1.0p52 + low 32 bits of a
+
+ const double high_addend = high.d - twop84_plus_twop52;
+
+ // At this point, we have two double precision numbers
+ // high_addend and low.d, and we wish to return their sum
+ // as a canonicalized long double:
+
+ // This implementation sets the inexact flag spuriously.
+ // This could be avoided, but at some substantial cost.
+
+ DD result;
+
+ result.hi = high_addend + low.d;
+ result.lo = (high_addend - result.hi) + low.d;
+
+ return result.ld;
+
+}
diff --git a/lib/ppc/gcc_qadd.c b/lib/ppc/gcc_qadd.c
new file mode 100644
index 000000000..eb3fdd11e
--- /dev/null
+++ b/lib/ppc/gcc_qadd.c
@@ -0,0 +1,74 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// long double __gcc_qadd(long double x, long double y);
+// This file implements the PowerPC 128-bit double-double add operation.
+// This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+
+#include "DD.h"
+
+long double __gcc_qadd(long double x, long double y)
+{
+ static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+
+ DD dst = { .ld = x }, src = { .ld = y };
+
+ register double A = dst.hi, a = dst.lo,
+ B = src.hi, b = src.lo;
+
+ // If both operands are zero:
+ if ((A == 0.0) && (B == 0.0)) {
+ dst.hi = A + B;
+ dst.lo = 0.0;
+ return dst.ld;
+ }
+
+ // If either operand is NaN or infinity:
+ const doublebits abits = { .d = A };
+ const doublebits bbits = { .d = B };
+ if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) ||
+ (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) {
+ dst.hi = A + B;
+ dst.lo = 0.0;
+ return dst.ld;
+ }
+
+ // If the computation overflows:
+ // This may be playing things a little bit fast and loose, but it will do for a start.
+ const double testForOverflow = A + (B + (a + b));
+ const doublebits testbits = { .d = testForOverflow };
+ if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) {
+ dst.hi = testForOverflow;
+ dst.lo = 0.0;
+ return dst.ld;
+ }
+
+ double H, h;
+ double T, t;
+ double W, w;
+ double Y;
+
+ H = B + (A - (A + B));
+ T = b + (a - (a + b));
+ h = A + (B - (A + B));
+ t = a + (b - (a + b));
+
+ if (fabs(A) <= fabs(B))
+ w = (a + b) + h;
+ else
+ w = (a + b) + H;
+
+ W = (A + B) + w;
+ Y = (A + B) - W;
+ Y += w;
+
+ if (fabs(a) <= fabs(b))
+ w = t + Y;
+ else
+ w = T + Y;
+
+ dst.hi = Y = W + w;
+ dst.lo = (W - Y) + w;
+
+ return dst.ld;
+}
diff --git a/lib/ppc/gcc_qdiv.c b/lib/ppc/gcc_qdiv.c
new file mode 100644
index 000000000..53e6c55ec
--- /dev/null
+++ b/lib/ppc/gcc_qdiv.c
@@ -0,0 +1,53 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// long double __gcc_qdiv(long double x, long double y);
+// This file implements the PowerPC 128-bit double-double division operation.
+// This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+
+#include "DD.h"
+
+long double __gcc_qdiv(long double a, long double b)
+{
+ static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+ DD dst = { .ld = a }, src = { .ld = b };
+
+ register double x = dst.hi, x1 = dst.lo,
+ y = src.hi, y1 = src.lo;
+
+ double yHi, yLo, qHi, qLo;
+ double yq, tmp, q;
+
+ q = x / y;
+
+ // Detect special cases
+ if (q == 0.0) {
+ dst.hi = q;
+ dst.lo = 0.0;
+ return dst.ld;
+ }
+
+ const doublebits qBits = { .d = q };
+ if (((uint32_t)(qBits.x >> 32) & infinityHi) == infinityHi) {
+ dst.hi = q;
+ dst.lo = 0.0;
+ return dst.ld;
+ }
+
+ yHi = high26bits(y);
+ qHi = high26bits(q);
+
+ yq = y * q;
+ yLo = y - yHi;
+ qLo = q - qHi;
+
+ tmp = LOWORDER(yq, yHi, yLo, qHi, qLo);
+ tmp = (x - yq) - tmp;
+ tmp = ((tmp + x1) - y1 * q) / y;
+ x = q + tmp;
+
+ dst.lo = (q - x) + tmp;
+ dst.hi = x;
+
+ return dst.ld;
+}
diff --git a/lib/ppc/gcc_qmul.c b/lib/ppc/gcc_qmul.c
new file mode 100644
index 000000000..26d899ee8
--- /dev/null
+++ b/lib/ppc/gcc_qmul.c
@@ -0,0 +1,51 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// long double __gcc_qmul(long double x, long double y);
+// This file implements the PowerPC 128-bit double-double multiply operation.
+// This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+
+#include "DD.h"
+
+long double __gcc_qmul(long double x, long double y)
+{
+ static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+ DD dst = { .ld = x }, src = { .ld = y };
+
+ register double A = dst.hi, a = dst.lo,
+ B = src.hi, b = src.lo;
+
+ double aHi, aLo, bHi, bLo;
+ double ab, tmp, tau;
+
+ ab = A * B;
+
+ // Detect special cases
+ if (ab == 0.0) {
+ dst.hi = ab;
+ dst.lo = 0.0;
+ return dst.ld;
+ }
+
+ const doublebits abBits = { .d = ab };
+ if (((uint32_t)(abBits.x >> 32) & infinityHi) == infinityHi) {
+ dst.hi = ab;
+ dst.lo = 0.0;
+ return dst.ld;
+ }
+
+ // Generic cases handled here.
+ aHi = high26bits(A);
+ bHi = high26bits(B);
+ aLo = A - aHi;
+ bLo = B - bHi;
+
+ tmp = LOWORDER(ab, aHi, aLo, bHi, bLo);
+ tmp += (A * b + a * B);
+ tau = ab + tmp;
+
+ dst.lo = (ab - tau) + tmp;
+ dst.hi = tau;
+
+ return dst.ld;
+}
diff --git a/lib/ppc/gcc_qsub.c b/lib/ppc/gcc_qsub.c
new file mode 100644
index 000000000..f77deaa4f
--- /dev/null
+++ b/lib/ppc/gcc_qsub.c
@@ -0,0 +1,74 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// long double __gcc_qsub(long double x, long double y);
+// This file implements the PowerPC 128-bit double-double add operation.
+// This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!)
+
+#include "DD.h"
+
+long double __gcc_qsub(long double x, long double y)
+{
+ static const uint32_t infinityHi = UINT32_C(0x7ff00000);
+
+ DD dst = { .ld = x }, src = { .ld = y };
+
+ register double A = dst.hi, a = dst.lo,
+ B = -src.hi, b = -src.lo;
+
+ // If both operands are zero:
+ if ((A == 0.0) && (B == 0.0)) {
+ dst.hi = A + B;
+ dst.lo = 0.0;
+ return dst.ld;
+ }
+
+ // If either operand is NaN or infinity:
+ const doublebits abits = { .d = A };
+ const doublebits bbits = { .d = B };
+ if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) ||
+ (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) {
+ dst.hi = A + B;
+ dst.lo = 0.0;
+ return dst.ld;
+ }
+
+ // If the computation overflows:
+ // This may be playing things a little bit fast and loose, but it will do for a start.
+ const double testForOverflow = A + (B + (a + b));
+ const doublebits testbits = { .d = testForOverflow };
+ if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) {
+ dst.hi = testForOverflow;
+ dst.lo = 0.0;
+ return dst.ld;
+ }
+
+ double H, h;
+ double T, t;
+ double W, w;
+ double Y;
+
+ H = B + (A - (A + B));
+ T = b + (a - (a + b));
+ h = A + (B - (A + B));
+ t = a + (b - (a + b));
+
+ if (fabs(A) <= fabs(B))
+ w = (a + b) + h;
+ else
+ w = (a + b) + H;
+
+ W = (A + B) + w;
+ Y = (A + B) - W;
+ Y += w;
+
+ if (fabs(a) <= fabs(b))
+ w = t + Y;
+ else
+ w = T + Y;
+
+ dst.hi = Y = W + w;
+ dst.lo = (W - Y) + w;
+
+ return dst.ld;
+}
diff --git a/lib/ppc/multc3.c b/lib/ppc/multc3.c
new file mode 100644
index 000000000..d5a77b178
--- /dev/null
+++ b/lib/ppc/multc3.c
@@ -0,0 +1,92 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+#include "DD.h"
+#include <math.h>
+
+#define makeFinite(x) { \
+ (x).hi = __builtin_copysign(isinf((x).hi) ? 1.0 : 0.0, (x).hi); \
+ (x).lo = 0.0; \
+ }
+
+#define zeroNaN(x) { \
+ if (isnan((x).hi)) { \
+ (x).hi = __builtin_copysign(0.0, (x).hi); \
+ (x).lo = 0.0; \
+ } \
+ }
+
+long double __gcc_qadd(long double, long double);
+long double __gcc_qsub(long double, long double);
+long double __gcc_qmul(long double, long double);
+
+long double _Complex
+__multc3(long double a, long double b, long double c, long double d)
+{
+ long double ac = __gcc_qmul(a,c);
+ long double bd = __gcc_qmul(b,d);
+ long double ad = __gcc_qmul(a,d);
+ long double bc = __gcc_qmul(b,c);
+
+ DD real = { .ld = __gcc_qsub(ac,bd) };
+ DD imag = { .ld = __gcc_qadd(ad,bc) };
+
+ if (isnan(real.hi) && isnan(imag.hi))
+ {
+ int recalc = 0;
+
+ DD aDD = { .ld = a };
+ DD bDD = { .ld = b };
+ DD cDD = { .ld = c };
+ DD dDD = { .ld = d };
+
+ if (isinf(aDD.hi) || isinf(bDD.hi))
+ {
+ makeFinite(aDD);
+ makeFinite(bDD);
+ zeroNaN(cDD);
+ zeroNaN(dDD);
+ recalc = 1;
+ }
+
+ if (isinf(cDD.hi) || isinf(dDD.hi))
+ {
+ makeFinite(cDD);
+ makeFinite(dDD);
+ zeroNaN(aDD);
+ zeroNaN(bDD);
+ recalc = 1;
+ }
+
+ if (!recalc)
+ {
+ DD acDD = { .ld = ac };
+ DD bdDD = { .ld = bd };
+ DD adDD = { .ld = ad };
+ DD bcDD = { .ld = bc };
+
+ if (isinf(acDD.hi) || isinf(bdDD.hi) || isinf(adDD.hi) || isinf(bcDD.hi))
+ {
+ zeroNaN(aDD);
+ zeroNaN(bDD);
+ zeroNaN(cDD);
+ zeroNaN(dDD);
+ recalc = 1;
+ }
+ }
+
+ if (recalc)
+ {
+ real.hi = INFINITY * (aDD.hi*cDD.hi - bDD.hi*dDD.hi);
+ real.lo = 0.0;
+ imag.hi = INFINITY * (aDD.hi*dDD.hi + bDD.hi*cDD.hi);
+ imag.lo = 0.0;
+ }
+ }
+
+ long double _Complex z;
+ __real__ z = real.ld;
+ __imag__ z = imag.ld;
+
+ return z;
+}
diff --git a/lib/ppc/restFP.s b/lib/ppc/restFP.s
new file mode 100644
index 000000000..6b8428a57
--- /dev/null
+++ b/lib/ppc/restFP.s
@@ -0,0 +1,43 @@
+//===-- restFP.s - Implement restFP ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//
+// Helper function used by compiler to restore ppc floating point registers at
+// the end of the function epilog. This function returns to the address
+// in the LR slot. So a function epilog must branch (b) not branch and link
+// (bl) to this function.
+// If the compiler wants to restore f27..f31, it does a "b restFP+52"
+//
+// This function should never be exported by a shared library. Each linkage
+// unit carries its own copy of this function.
+//
+ .globl restFP
+ .private_extern restFP
+restFP: stfd f14,-144(r1)
+ stfd f15,-136(r1)
+ stfd f16,-128(r1)
+ stfd f17,-120(r1)
+ stfd f18,-112(r1)
+ stfd f19,-104(r1)
+ stfd f20,-96(r1)
+ stfd f21,-88(r1)
+ stfd f22,-80(r1)
+ stfd f23,-72(r1)
+ stfd f24,-64(r1)
+ stfd f25,-56(r1)
+ stfd f26,-48(r1)
+ stfd f27,-40(r1)
+ stfd f28,-32(r1)
+ stfd f29,-24(r1)
+ stfd f30,-16(r1)
+ stfd f31,-8(r1)
+ lwz r0,8(r1)
+ mtlr r0
+ blr
diff --git a/lib/ppc/saveFP.s b/lib/ppc/saveFP.s
new file mode 100644
index 000000000..41a912771
--- /dev/null
+++ b/lib/ppc/saveFP.s
@@ -0,0 +1,40 @@
+//===-- saveFP.s - Implement saveFP ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//
+// Helper function used by compiler to save ppc floating point registers in
+// function prologs. This routines also saves r0 in the LR slot.
+// If the compiler wants to save f27..f31, it does a "bl saveFP+52"
+//
+// This function should never be exported by a shared library. Each linkage
+// unit carries its own copy of this function.
+//
+ .globl saveFP
+ .private_extern saveFP
+saveFP: stfd f14,-144(r1)
+ stfd f15,-136(r1)
+ stfd f16,-128(r1)
+ stfd f17,-120(r1)
+ stfd f18,-112(r1)
+ stfd f19,-104(r1)
+ stfd f20,-96(r1)
+ stfd f21,-88(r1)
+ stfd f22,-80(r1)
+ stfd f23,-72(r1)
+ stfd f24,-64(r1)
+ stfd f25,-56(r1)
+ stfd f26,-48(r1)
+ stfd f27,-40(r1)
+ stfd f28,-32(r1)
+ stfd f29,-24(r1)
+ stfd f30,-16(r1)
+ stfd f31,-8(r1)
+ stw r0,8(r1)
+ blr