[PPC64] Add vector pack/unpack support from ISA 2.07

This patch adds support for the following new instructions in the Power ISA 2.07: vpksdss vpksdus vpkudus vpkudum vupkhsw vupklsw These instructions are available through the vec_packs, vec_packsu, vec_unpackh, and vec_unpackl built-in interfaces. These are lane-sensitive instructions, so the built-ins have different implementations for big- and little-endian, and the instructions must be marked as killing the vector swap optimization for now. The first three instructions perform saturating pack operations. The fourth performs a modulo pack operation, which means it can be represented with a vector shuffle, and conversely the appropriate vector shuffles may cause this instruction to be generated. The other instructions are only generated via built-in support for now. Appropriate tests have been added. There is a companion patch to clang for the rest of this support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237499 91177308-0d34-0410-b5e6-96231b3b80d8
author: Bill Schmidt <wschmidt@linux.vnet.ibm.com> 2015-05-16 01:02:12 +0000
committer: Bill Schmidt <wschmidt@linux.vnet.ibm.com> 2015-05-16 01:02:12 +0000
commit: 24f0469865686894a86091505330b0170ab30634 (patch)
tree: 2d1d43cb3b20b8ca38cd24354522c595a1784f05 /test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
parent: 9e6378de5744088759f92a7503c7ca81d815c05d (diff)
1 files changed, 43 insertions, 0 deletions
diff --git a/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll b/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
new file mode 100644
index 00000000000..d81aa729f8e
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s
+
+define void @VPKUDUM_unary(<2 x i64>* %A) {
+entry:
+        %tmp = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
+        %tmp3 = extractelement <4 x i32> %tmp2, i32 1
+        %tmp4 = extractelement <4 x i32> %tmp2, i32 3
+        %tmp5 = insertelement <4 x i32> undef, i32 %tmp3, i32 0
+        %tmp6 = insertelement <4 x i32> %tmp5, i32 %tmp4, i32 1
+        %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 2
+        %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 3
+        %tmp9 = bitcast <4 x i32> %tmp8 to <2 x i64>
+        store <2 x i64> %tmp9, <2 x i64>* %A
+        ret void
+}
+
+; CHECK-LABEL: @VPKUDUM_unary
+; CHECK-NOT:   vperm
+; CHECK:       vpkudum
+
+define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) {
+entry:
+        %tmp = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
+        %tmp3 = load <2 x i64>, <2 x i64>* %B
+        %tmp4 = bitcast <2 x i64> %tmp3 to <4 x i32>
+        %tmp5 = extractelement <4 x i32> %tmp2, i32 1
+        %tmp6 = extractelement <4 x i32> %tmp2, i32 3
+        %tmp7 = extractelement <4 x i32> %tmp4, i32 1
+        %tmp8 = extractelement <4 x i32> %tmp4, i32 3
+        %tmp9 = insertelement <4 x i32> undef, i32 %tmp5, i32 0
+        %tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp6, i32 1
+        %tmp11 = insertelement <4 x i32> %tmp10, i32 %tmp7, i32 2
+        %tmp12 = insertelement <4 x i32> %tmp11, i32 %tmp8, i32 3
+        %tmp13 = bitcast <4 x i32> %tmp12 to <2 x i64>
+        store <2 x i64> %tmp13, <2 x i64>* %A
+        ret void
+}
+
+; CHECK-LABEL: @VPKUDUM
+; CHECK-NOT:   vperm
+; CHECK:       vpkudum
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>	2015-05-16 01:02:12 +0000
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>	2015-05-16 01:02:12 +0000
commit	24f0469865686894a86091505330b0170ab30634 (patch)
tree	2d1d43cb3b20b8ca38cd24354522c595a1784f05 /test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
parent	9e6378de5744088759f92a7503c7ca81d815c05d (diff)