Add vec_insert4b and vec_extract4b functions to altivec.h

Add builtins for the functions and custom codegen mapping the builtins to their corresponding intrinsics and handling the endian related swapping. https://reviews.llvm.org/D26546 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@291179 91177308-0d34-0410-b5e6-96231b3b80d8
author: Sean Fertile <sfertile@ca.ibm.com> 2017-01-05 21:43:30 +0000
committer: Sean Fertile <sfertile@ca.ibm.com> 2017-01-05 21:43:30 +0000
commit: d65cd1f9424369c4ae7f945fac7fd9e4357451b2 (patch)
tree: 4e4b6ef1b603faaf773088eaf43f65e732f4a8f2
parent: 1c9f404a2528fa5953a556c5adfe1219f94653a9 (diff)
7 files changed, 182 insertions, 6 deletions
diff --git a/include/clang/Basic/BuiltinsPPC.def b/include/clang/Basic/BuiltinsPPC.def
index 657ea4225a..f7cddc0313 100644
--- a/include/clang/Basic/BuiltinsPPC.def
+++ b/include/clang/Basic/BuiltinsPPC.def
@@ -417,6 +417,9 @@ BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "")
 BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "")
 BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "")
 
+BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "")
+BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "")
+
 // HTM builtins
 BUILTIN(__builtin_tbegin, "UiUIi", "")
 BUILTIN(__builtin_tend, "UiUIi", "")
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 43ca74761f..4d34b3e922 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -35,6 +35,11 @@ using namespace clang;
 using namespace CodeGen;
 using namespace llvm;
 
+static
+int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
+  return std::min(High, std::max(Low, Value));
+}
+
 /// getBuiltinLibFunction - Given a builtin id for a function like
 /// "__builtin_fabsf", return a Function* for "fabsf".
 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
@@ -8191,6 +8196,85 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     llvm_unreachable("Unknown FMA operation");
     return nullptr; // Suppress no-return warning
   }
+
+  case PPC::BI__builtin_vsx_insertword: {
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
+
+    // Third argument is a compile time constant int. It must be clamped to
+    // to the range [0, 12].
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI &&
+           "Third arg to xxinsertw intrinsic must be constant integer");
+    const int64_t MaxIndex = 12;
+    int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
+
+    // The builtin semantics don't exactly match the xxinsertw instructions
+    // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
+    // word from the first argument, and inserts it in the second argument. The
+    // instruction extracts the word from its second input register and inserts
+    // it into its first input register, so swap the first and second arguments.
+    std::swap(Ops[0], Ops[1]);
+
+    // Need to cast the second argument from a vector of unsigned int to a
+    // vector of long long.
+    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
+
+    if (getTarget().isLittleEndian()) {
+      // Create a shuffle mask of (1, 0)
+      Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
+                                   ConstantInt::get(Int32Ty, 0)
+                                 };
+      Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+
+      // Reverse the double words in the vector we will extract from.
+      Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
+      Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
+
+      // Reverse the index.
+      Index = MaxIndex - Index;
+    }
+
+    // Intrinsic expects the first arg to be a vector of int.
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
+    Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
+    return Builder.CreateCall(F, Ops);
+  }
+
+  case PPC::BI__builtin_vsx_extractuword: {
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
+
+    // Intrinsic expects the first argument to be a vector of doublewords.
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
+
+    // The second argument is a compile time constant int that needs to
+    // be clamped to the range [0, 12].
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
+    assert(ArgCI &&
+           "Second Arg to xxextractuw intrinsic must be a constant integer!");
+    const int64_t MaxIndex = 12;
+    int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
+
+    if (getTarget().isLittleEndian()) {
+      // Reverse the index.
+      Index = MaxIndex - Index;
+      Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
+
+      // Emit the call, then reverse the double words of the results vector.
+      Value *Call = Builder.CreateCall(F, Ops);
+
+      // Create a shuffle mask of (1, 0)
+      Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
+                                   ConstantInt::get(Int32Ty, 0)
+                                 };
+      Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+
+      Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
+      return ShuffleCall;
+    } else {
+      Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
+      return Builder.CreateCall(F, Ops);
+    }
+  }
   }
 }
 
diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h
index d1d1d80263..a8618816d5 100644
--- a/lib/Headers/altivec.h
+++ b/lib/Headers/altivec.h
@@ -12574,6 +12574,9 @@ static __inline__ float __ATTRS_o_ai vec_extract(vector float __a, int __b) {
 
 #ifdef __POWER9_VECTOR__
 
+#define vec_insert4b __builtin_vsx_insertword
+#define vec_extract4b __builtin_vsx_extractuword
+
 /* vec_extract_exp */
 
 static __inline__ vector unsigned int __ATTRS_o_ai
diff --git a/test/CodeGen/builtins-ppc-error.c b/test/CodeGen/builtins-ppc-error.c
new file mode 100644
index 0000000000..5860c4f9e7
--- /dev/null
+++ b/test/CodeGen/builtins-ppc-error.c
@@ -0,0 +1,20 @@
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang_cc1 -faltivec -target-feature +power9-vector \
+// RUN:   -triple powerpc64-unknown-unknown -fsyntax-only   \
+// RUN: -Wall -Werror -verify %s
+
+// RUN: %clang_cc1 -faltivec -target-feature +power9-vector  \
+// RUN: -triple powerpc64le-unknown-unknown -fsyntax-only    \
+// RUN: -Wall -Werror -verify %s
+
+#include <altivec.h>
+
+extern vector signed int vsi;
+extern vector unsigned char vuc;
+
+void testInsertWord1(void) {
+  int index = 5;
+  vector unsigned char v1 = vec_insert4b(vsi, vuc, index); // expected-error {{argument to '__builtin_vsx_insertword' must be a constant integer}}
+  vector unsigned long long v2 = vec_extract4b(vuc, index);   // expected-error {{argument to '__builtin_vsx_extractuword' must be a constant integer}}
+}
diff --git a/test/CodeGen/builtins-ppc-extractword-error.c b/test/CodeGen/builtins-ppc-extractword-error.c
new file mode 100644
index 0000000000..238a80ae10
--- /dev/null
+++ b/test/CodeGen/builtins-ppc-extractword-error.c
@@ -0,0 +1,15 @@
+// REQUIRES: powerpc-registered-target
+// XFAIL: powerpc
+
+// RUN: %clang -faltivec -target powerpc64le-unknown-unknown  -mcpu=power8 \
+// RUN: -Wall -Wextra -c %s
+// RUN: %clang -faltivec -target powerpc64-unknown-unknown  -mcpu=power8 \
+// RUN: -Wall -Wextra -c %s
+
+// Expect the compile to fail with "cannot compile this builtin function yet"
+extern vector signed int vsi;
+extern vector unsigned char vuc;
+
+vector unsigned long long testExtractWord(void) {
+  return  __builtin_vsx_extractuword(vuc, 12);
+}
diff --git a/test/CodeGen/builtins-ppc-insertword-error.c b/test/CodeGen/builtins-ppc-insertword-error.c
new file mode 100644
index 0000000000..6e75abd992
--- /dev/null
+++ b/test/CodeGen/builtins-ppc-insertword-error.c
@@ -0,0 +1,16 @@
+// REQUIRES: powerpc-registered-target
+// XFAIL: powerpc
+
+// RUN: %clang -faltivec -target powerpc64le-unknown-unknown -mcpu=power8 \
+// RUN: -Wall -Werror -c %s
+
+// RUN: %clang -faltivec -target powerpc64-unknown-unknown -mcpu=power8 \
+// RUN: -Wall -Werror -c %s
+
+// expect to fail  with diagnostic: "cannot compile this builtin function yet"
+extern vector signed int vsi;
+extern vector unsigned char vuc;
+
+vector  unsigned char testInsertWord(void) {
+  return __builtin_vsx_insertword(vsi, vuc, 0);
+}
diff --git a/test/CodeGen/builtins-ppc-p9vector.c b/test/CodeGen/builtins-ppc-p9vector.c
index f70d2f9f15..bd0ad182f1 100644
--- a/test/CodeGen/builtins-ppc-p9vector.c
+++ b/test/CodeGen/builtins-ppc-p9vector.c
@@ -1166,17 +1166,52 @@ vector float test114(void) {
 // CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
 // CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
 // CHECK-BE-NEXT: ret <4 x float>
-// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
-// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
-// CHECK-LE-NEXT: ret <4 x float>
+// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
+// CHECK: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
+// CHECK-NEXT: ret <4 x float>
   return vec_extract_fp32_from_shorth(vusa);
 }
 vector float test115(void) {
 // CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
 // CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
 // CHECK-BE-NEXT: ret <4 x float>
-// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
-// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
-// CHECK-LE-NEXT: ret <4 x float>
+// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> <i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
+// CHECK: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}})
+// CHECK-NEXT: ret <4 x float>
   return vec_extract_fp32_from_shortl(vusa);
 }
+vector unsigned char test116(void) {
+// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 7)
+// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8>
+// CHECK: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT: [[T2:%.+]] =  bitcast <2 x i64> [[T1]] to <4 x i32>
+// CHECK-NEXT: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> [[T2]], <2 x i64> {{.+}}, i32 5)
+// CHECK-NEXT: bitcast <4 x i32> [[T3]] to <16 x i8>
+  return vec_insert4b(vuia, vuca, 7);
+}
+vector unsigned char test117(void) {
+// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 12)
+// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8>
+// CHECK: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT: [[T2:%.+]] =  bitcast <2 x i64> [[T1]] to <4 x i32>
+// CHECK-NEXT: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> [[T2]], <2 x i64> {{.+}}, i32 0)
+// CHECK-NEXT: bitcast <4 x i32> [[T3]] to <16 x i8>
+  return vec_insert4b(vuia, vuca, 13);
+}
+vector unsigned long long test118(void) {
+// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 11)
+// CHECK-BE-NEXT: ret <2 x i64>
+// CHECK: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 1)
+// CHECK-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT: ret <2 x i64>
+  return vec_extract4b(vuca, 11);
+}
+vector unsigned long long test119(void) {
+// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 0)
+// CHECK-BE-NEXT: ret <2 x i64>
+// CHECK: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 12)
+// CHECK-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT: ret <2 x i64>
+  return vec_extract4b(vuca, -5);
+}
+
author	Sean Fertile <sfertile@ca.ibm.com>	2017-01-05 21:43:30 +0000
committer	Sean Fertile <sfertile@ca.ibm.com>	2017-01-05 21:43:30 +0000
commit	d65cd1f9424369c4ae7f945fac7fd9e4357451b2 (patch)
tree	4e4b6ef1b603faaf773088eaf43f65e732f4a8f2
parent	1c9f404a2528fa5953a556c5adfe1219f94653a9 (diff)