diff options
author | Sean Fertile <sfertile@ca.ibm.com> | 2017-01-05 21:43:30 +0000 |
---|---|---|
committer | Sean Fertile <sfertile@ca.ibm.com> | 2017-01-05 21:43:30 +0000 |
commit | d65cd1f9424369c4ae7f945fac7fd9e4357451b2 (patch) | |
tree | 4e4b6ef1b603faaf773088eaf43f65e732f4a8f2 | |
parent | 1c9f404a2528fa5953a556c5adfe1219f94653a9 (diff) |
Add vec_insert4b and vec_extract4b functions to altivec.h
Add builtins for the functions and custom codegen mapping the builtins to their
corresponding intrinsics and handling the endian related swapping.
https://reviews.llvm.org/D26546
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@291179 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang/Basic/BuiltinsPPC.def | 3 | ||||
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 84 | ||||
-rw-r--r-- | lib/Headers/altivec.h | 3 | ||||
-rw-r--r-- | test/CodeGen/builtins-ppc-error.c | 20 | ||||
-rw-r--r-- | test/CodeGen/builtins-ppc-extractword-error.c | 15 | ||||
-rw-r--r-- | test/CodeGen/builtins-ppc-insertword-error.c | 16 | ||||
-rw-r--r-- | test/CodeGen/builtins-ppc-p9vector.c | 47 |
7 files changed, 182 insertions, 6 deletions
diff --git a/include/clang/Basic/BuiltinsPPC.def b/include/clang/Basic/BuiltinsPPC.def index 657ea4225a..f7cddc0313 100644 --- a/include/clang/Basic/BuiltinsPPC.def +++ b/include/clang/Basic/BuiltinsPPC.def @@ -417,6 +417,9 @@ BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "") BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "") BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "") +BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "") +BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 43ca74761f..4d34b3e922 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -35,6 +35,11 @@ using namespace clang; using namespace CodeGen; using namespace llvm; +static +int64_t clamp(int64_t Value, int64_t Low, int64_t High) { + return std::min(High, std::max(Low, Value)); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -8191,6 +8196,85 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } + + case PPC::BI__builtin_vsx_insertword: { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); + + // Third argument is a compile time constant int. It must be clamped to + // to the range [0, 12]. + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); + assert(ArgCI && + "Third arg to xxinsertw intrinsic must be constant integer"); + const int64_t MaxIndex = 12; + int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); + + // The builtin semantics don't exactly match the xxinsertw instructions + // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the + // word from the first argument, and inserts it in the second argument. The + // instruction extracts the word from its second input register and inserts + // it into its first input register, so swap the first and second arguments. + std::swap(Ops[0], Ops[1]); + + // Need to cast the second argument from a vector of unsigned int to a + // vector of long long. + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + + if (getTarget().isLittleEndian()) { + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), + ConstantInt::get(Int32Ty, 0) + }; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + // Reverse the double words in the vector we will extract from. + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); + + // Reverse the index. + Index = MaxIndex - Index; + } + + // Intrinsic expects the first arg to be a vector of int. + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); + Ops[2] = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, Ops); + } + + case PPC::BI__builtin_vsx_extractuword: { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); + + // Intrinsic expects the first argument to be a vector of doublewords. + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + + // The second argument is a compile time constant int that needs to + // be clamped to the range [0, 12]. + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]); + assert(ArgCI && + "Second Arg to xxextractuw intrinsic must be a constant integer!"); + const int64_t MaxIndex = 12; + int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); + + if (getTarget().isLittleEndian()) { + // Reverse the index. + Index = MaxIndex - Index; + Ops[1] = ConstantInt::getSigned(Int32Ty, Index); + + // Emit the call, then reverse the double words of the results vector. + Value *Call = Builder.CreateCall(F, Ops); + + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), + ConstantInt::get(Int32Ty, 0) + }; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); + return ShuffleCall; + } else { + Ops[1] = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, Ops); + } + } } } diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h index d1d1d80263..a8618816d5 100644 --- a/lib/Headers/altivec.h +++ b/lib/Headers/altivec.h @@ -12574,6 +12574,9 @@ static __inline__ float __ATTRS_o_ai vec_extract(vector float __a, int __b) { #ifdef __POWER9_VECTOR__ +#define vec_insert4b __builtin_vsx_insertword +#define vec_extract4b __builtin_vsx_extractuword + /* vec_extract_exp */ static __inline__ vector unsigned int __ATTRS_o_ai diff --git a/test/CodeGen/builtins-ppc-error.c b/test/CodeGen/builtins-ppc-error.c new file mode 100644 index 0000000000..5860c4f9e7 --- /dev/null +++ b/test/CodeGen/builtins-ppc-error.c @@ -0,0 +1,20 @@ +// REQUIRES: powerpc-registered-target + +// RUN: %clang_cc1 -faltivec -target-feature +power9-vector \ +// RUN: -triple powerpc64-unknown-unknown -fsyntax-only \ +// RUN: -Wall -Werror -verify %s + +// RUN: %clang_cc1 -faltivec -target-feature +power9-vector \ +// RUN: -triple powerpc64le-unknown-unknown -fsyntax-only \ +// RUN: -Wall -Werror -verify %s + +#include <altivec.h> + +extern vector signed int vsi; +extern vector unsigned char vuc; + +void testInsertWord1(void) { + int index = 5; + vector unsigned char v1 = vec_insert4b(vsi, vuc, index); // expected-error {{argument to '__builtin_vsx_insertword' must be a constant integer}} + vector unsigned long long v2 = vec_extract4b(vuc, index); // expected-error {{argument to '__builtin_vsx_extractuword' must be a constant integer}} +} diff --git a/test/CodeGen/builtins-ppc-extractword-error.c b/test/CodeGen/builtins-ppc-extractword-error.c new file mode 100644 index 0000000000..238a80ae10 --- /dev/null +++ b/test/CodeGen/builtins-ppc-extractword-error.c @@ -0,0 +1,15 @@ +// REQUIRES: powerpc-registered-target +// XFAIL: powerpc + +// RUN: %clang -faltivec -target powerpc64le-unknown-unknown -mcpu=power8 \ +// RUN: -Wall -Wextra -c %s +// RUN: %clang -faltivec -target powerpc64-unknown-unknown -mcpu=power8 \ +// RUN: -Wall -Wextra -c %s + +// Expect the compile to fail with "cannot compile this builtin function yet" +extern vector signed int vsi; +extern vector unsigned char vuc; + +vector unsigned long long testExtractWord(void) { + return __builtin_vsx_extractuword(vuc, 12); +} diff --git a/test/CodeGen/builtins-ppc-insertword-error.c b/test/CodeGen/builtins-ppc-insertword-error.c new file mode 100644 index 0000000000..6e75abd992 --- /dev/null +++ b/test/CodeGen/builtins-ppc-insertword-error.c @@ -0,0 +1,16 @@ +// REQUIRES: powerpc-registered-target +// XFAIL: powerpc + +// RUN: %clang -faltivec -target powerpc64le-unknown-unknown -mcpu=power8 \ +// RUN: -Wall -Werror -c %s + +// RUN: %clang -faltivec -target powerpc64-unknown-unknown -mcpu=power8 \ +// RUN: -Wall -Werror -c %s + +// expect to fail with diagnostic: "cannot compile this builtin function yet" +extern vector signed int vsi; +extern vector unsigned char vuc; + +vector unsigned char testInsertWord(void) { + return __builtin_vsx_insertword(vsi, vuc, 0); +} diff --git a/test/CodeGen/builtins-ppc-p9vector.c b/test/CodeGen/builtins-ppc-p9vector.c index f70d2f9f15..bd0ad182f1 100644 --- a/test/CodeGen/builtins-ppc-p9vector.c +++ b/test/CodeGen/builtins-ppc-p9vector.c @@ -1166,17 +1166,52 @@ vector float test114(void) { // CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3> // CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) // CHECK-BE-NEXT: ret <4 x float> -// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef> -// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) -// CHECK-LE-NEXT: ret <4 x float> +// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef> +// CHECK: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-NEXT: ret <4 x float> return vec_extract_fp32_from_shorth(vusa); } vector float test115(void) { // CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7> // CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) // CHECK-BE-NEXT: ret <4 x float> -// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef> -// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) -// CHECK-LE-NEXT: ret <4 x float> +// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef> +// CHECK: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-NEXT: ret <4 x float> return vec_extract_fp32_from_shortl(vusa); } +vector unsigned char test116(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 7) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> <i32 1, i32 0> +// CHECK-NEXT: [[T2:%.+]] = bitcast <2 x i64> [[T1]] to <4 x i32> +// CHECK-NEXT: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> [[T2]], <2 x i64> {{.+}}, i32 5) +// CHECK-NEXT: bitcast <4 x i32> [[T3]] to <16 x i8> + return vec_insert4b(vuia, vuca, 7); +} +vector unsigned char test117(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 12) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> <i32 1, i32 0> +// CHECK-NEXT: [[T2:%.+]] = bitcast <2 x i64> [[T1]] to <4 x i32> +// CHECK-NEXT: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> [[T2]], <2 x i64> {{.+}}, i32 0) +// CHECK-NEXT: bitcast <4 x i32> [[T3]] to <16 x i8> + return vec_insert4b(vuia, vuca, 13); +} +vector unsigned long long test118(void) { +// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 11) +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 1) +// CHECK-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> <i32 1, i32 0> +// CHECK-NEXT: ret <2 x i64> + return vec_extract4b(vuca, 11); +} +vector unsigned long long test119(void) { +// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 0) +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 12) +// CHECK-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> <i32 1, i32 0> +// CHECK-NEXT: ret <2 x i64> + return vec_extract4b(vuca, -5); +} + |