[PowerPC] Cannonicalize applicable vector shift immediates as swaps

This patch corresponds to review: http://reviews.llvm.org/D21358 Vector shifts that have the same semantics as a vector swap are cannonicalized as such to provide additional opportunities for swap removal optimization to remove unnecessary swaps. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275168 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2016-07-12 12:16:27 +0000
committer: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2016-07-12 12:16:27 +0000
commit: 695eea88e991443e69a980fed224ad1d9abf631e (patch)
tree: 6d5f087026fbc0a38c070be9f4d126451fdfa2ec
parent: dd79b0c35cd094f2daa68bceef11683e4ad6015f (diff)
6 files changed, 53 insertions, 11 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index b617966c113..0a22a40c84a 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1083,6 +1083,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
+  case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
@@ -7356,6 +7357,14 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
     }
+
+    // Left shifts of 8 bytes are actually swaps. Convert accordingly.
+    if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
+      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
+      SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
+      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
+    }
+
   }
 
   if (Subtarget.hasQPX()) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 4e511414966..8a48320b17e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -311,6 +311,10 @@ namespace llvm {
       /// of outputs.
       XXSWAPD,
 
+      /// An SDNode for swaps that are not associated with any loads/stores
+      /// and thereby have no chain.
+      SWAP_NO_CHAIN,
+
       /// QVFPERM = This corresponds to the QPX qvfperm instruction.
       QVFPERM,
 
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index f461f2bfcf2..8c05c7bb85a 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -71,6 +71,7 @@ def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
 def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
 def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
 def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
+def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
 
 multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
                     string asmstr, InstrItinClass itin, Intrinsic Int,
@@ -963,6 +964,7 @@ def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
 def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
 def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
 def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
 
 // Selects.
 def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
@@ -1318,7 +1320,7 @@ def VectorExtractions {
                        (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
 
   // Word extraction
-  dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64));
+  dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64));
   dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
   dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
                              (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
@@ -1588,7 +1590,7 @@ let Predicates = [IsBigEndian, HasP8Vector] in {
   def : Pat<(f32 (vector_extract v4f32:$S, 1)),
             (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
   def : Pat<(f32 (vector_extract v4f32:$S, 2)),
-            (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+            (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
   def : Pat<(f32 (vector_extract v4f32:$S, 3)),
             (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
   def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
@@ -1693,7 +1695,7 @@ let Predicates = [IsLittleEndian, HasP8Vector] in {
   def : Pat<(f32 (vector_extract v4f32:$S, 0)),
             (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
   def : Pat<(f32 (vector_extract v4f32:$S, 1)),
-            (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+            (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
   def : Pat<(f32 (vector_extract v4f32:$S, 2)),
             (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
   def : Pat<(f32 (vector_extract v4f32:$S, 3)),
diff --git a/test/CodeGen/PowerPC/cannonicalize-vector-shifts.ll b/test/CodeGen/PowerPC/cannonicalize-vector-shifts.ll
new file mode 100644
index 00000000000..68d4530e81e
--- /dev/null
+++ b/test/CodeGen/PowerPC/cannonicalize-vector-shifts.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s
+define <4 x i32> @test1(<4 x i32> %a) {
+entry:
+; CHECK-LABEL: test1
+; CHECK: xxswapd 34, 34
+  %vecins6 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+  ret <4 x i32> %vecins6
+}
+
+define <8 x i16> @test2(<8 x i16> %a) #0 {
+entry:
+; CHECK-LABEL: test2
+; CHECK: xxswapd 34, 34
+  %vecins14 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x i16> %vecins14
+}
+
+define <16 x i8> @test3(<16 x i8> %a) #0 {
+entry:
+; CHECK-LABEL: test3
+; CHECK: xxswapd 34, 34
+  %vecins30 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i8> %vecins30
+}
diff --git a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
index 8da8df58a85..b3451843133 100644
--- a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -1102,7 +1102,7 @@ entry:
 ; CHECK: mfvsrwz 3, [[SHL]]
 ; CHECK: extsw 3, 3
 ; CHECK-LE-LABEL: @getsi0
-; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34
 ; CHECK-LE: mfvsrwz 3, [[SHL]]
 ; CHECK-LE: extsw 3, 3
 }
@@ -1150,7 +1150,7 @@ entry:
   %vecext = extractelement <4 x i32> %0, i32 3
   ret i32 %vecext
 ; CHECK-LABEL: @getsi3
-; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: xxswapd [[SHL:[0-9]+]], 34
 ; CHECK: mfvsrwz 3, [[SHL]]
 ; CHECK: extsw 3, 3
 ; CHECK-LE-LABEL: @getsi3
@@ -1172,7 +1172,7 @@ entry:
 ; CHECK: mfvsrwz 3, [[SHL]]
 ; CHECK: clrldi   3, 3, 32
 ; CHECK-LE-LABEL: @getui0
-; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34
 ; CHECK-LE: mfvsrwz 3, [[SHL]]
 ; CHECK-LE: clrldi   3, 3, 32
 }
@@ -1220,7 +1220,7 @@ entry:
   %vecext = extractelement <4 x i32> %0, i32 3
   ret i32 %vecext
 ; CHECK-LABEL: @getui3
-; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: xxswapd [[SHL:[0-9]+]], 34
 ; CHECK: mfvsrwz 3, [[SHL]]
 ; CHECK: clrldi   3, 3, 32
 ; CHECK-LE-LABEL: @getui3
@@ -1380,7 +1380,7 @@ entry:
 ; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
 ; CHECK: xscvspdpn 1, [[SHL]]
 ; CHECK-LE-LABEL: @getf1
-; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34
 ; CHECK-LE: xscvspdpn 1, [[SHL]]
 }
 
@@ -1393,7 +1393,7 @@ entry:
   %vecext = extractelement <4 x float> %0, i32 2
   ret float %vecext
 ; CHECK-LABEL: @getf2
-; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: xxswapd [[SHL:[0-9]+]], 34
 ; CHECK: xscvspdpn 1, [[SHL]]
 ; CHECK-LE-LABEL: @getf2
 ; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
diff --git a/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll b/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
index 3610c044b94..74dc1561d9f 100644
--- a/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
+++ b/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
@@ -9,8 +9,8 @@ entry:
   ret <2 x i32> %strided.vec
 
 ; CHECK-LABEL: @test1
-; CHECK: vsldoi [[TGT:[0-9]+]], 2, 2, 8
-; CHECK: vmrghw 2, 2, [[TGT]]
+; CHECK: xxswapd 35, 34
+; CHECK: vmrghw 2, 2, 3
 ; CHECK: blr
 }
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2016-07-12 12:16:27 +0000
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2016-07-12 12:16:27 +0000
commit	695eea88e991443e69a980fed224ad1d9abf631e (patch)
tree	6d5f087026fbc0a38c070be9f4d126451fdfa2ec
parent	dd79b0c35cd094f2daa68bceef11683e4ad6015f (diff)