diff options
author | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2016-07-12 12:16:27 +0000 |
---|---|---|
committer | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2016-07-12 12:16:27 +0000 |
commit | 695eea88e991443e69a980fed224ad1d9abf631e (patch) | |
tree | 6d5f087026fbc0a38c070be9f4d126451fdfa2ec | |
parent | dd79b0c35cd094f2daa68bceef11683e4ad6015f (diff) |
[PowerPC] Cannonicalize applicable vector shift immediates as swaps
This patch corresponds to review:
http://reviews.llvm.org/D21358
Vector shifts that have the same semantics as a vector swap are cannonicalized
as such to provide additional opportunities for swap removal optimization to
remove unnecessary swaps.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275168 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 9 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrVSX.td | 8 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/cannonicalize-vector-shifts.ll | 27 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll | 4 |
6 files changed, 53 insertions, 11 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b617966c113..0a22a40c84a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1083,6 +1083,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; case PPCISD::RFEBB: return "PPCISD::RFEBB"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; + case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; @@ -7356,6 +7357,14 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, DAG.getConstant(SplatIdx, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); } + + // Left shifts of 8 bytes are actually swaps. Convert accordingly. + if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); + SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); + } + } if (Subtarget.hasQPX()) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 4e511414966..8a48320b17e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -311,6 +311,10 @@ namespace llvm { /// of outputs. XXSWAPD, + /// An SDNode for swaps that are not associated with any loads/stores + /// and thereby have no chain. + SWAP_NO_CHAIN, + /// QVFPERM = This corresponds to the QPX qvfperm instruction. QVFPERM, diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index f461f2bfcf2..8c05c7bb85a 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -71,6 +71,7 @@ def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>; def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; +def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, @@ -963,6 +964,7 @@ def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; +def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), @@ -1318,7 +1320,7 @@ def VectorExtractions { (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); // Word extraction - dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64)); + dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64)); dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64)); dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64)); @@ -1588,7 +1590,7 @@ let Predicates = [IsBigEndian, HasP8Vector] in { def : Pat<(f32 (vector_extract v4f32:$S, 1)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 2)), - (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; + (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), @@ -1693,7 +1695,7 @@ let Predicates = [IsLittleEndian, HasP8Vector] in { def : Pat<(f32 (vector_extract v4f32:$S, 0)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; def : Pat<(f32 (vector_extract v4f32:$S, 1)), - (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; + (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 2)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), diff --git a/test/CodeGen/PowerPC/cannonicalize-vector-shifts.ll b/test/CodeGen/PowerPC/cannonicalize-vector-shifts.ll new file mode 100644 index 00000000000..68d4530e81e --- /dev/null +++ b/test/CodeGen/PowerPC/cannonicalize-vector-shifts.ll @@ -0,0 +1,27 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +define <4 x i32> @test1(<4 x i32> %a) { +entry: +; CHECK-LABEL: test1 +; CHECK: xxswapd 34, 34 + %vecins6 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> + ret <4 x i32> %vecins6 +} + +define <8 x i16> @test2(<8 x i16> %a) #0 { +entry: +; CHECK-LABEL: test2 +; CHECK: xxswapd 34, 34 + %vecins14 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> + ret <8 x i16> %vecins14 +} + +define <16 x i8> @test3(<16 x i8> %a) #0 { +entry: +; CHECK-LABEL: test3 +; CHECK: xxswapd 34, 34 + %vecins30 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <16 x i8> %vecins30 +} diff --git a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll index 8da8df58a85..b3451843133 100644 --- a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -1102,7 +1102,7 @@ entry: ; CHECK: mfvsrwz 3, [[SHL]] ; CHECK: extsw 3, 3 ; CHECK-LE-LABEL: @getsi0 -; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34 ; CHECK-LE: mfvsrwz 3, [[SHL]] ; CHECK-LE: extsw 3, 3 } @@ -1150,7 +1150,7 @@ entry: %vecext = extractelement <4 x i32> %0, i32 3 ret i32 %vecext ; CHECK-LABEL: @getsi3 -; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK: xxswapd [[SHL:[0-9]+]], 34 ; CHECK: mfvsrwz 3, [[SHL]] ; CHECK: extsw 3, 3 ; CHECK-LE-LABEL: @getsi3 @@ -1172,7 +1172,7 @@ entry: ; CHECK: mfvsrwz 3, [[SHL]] ; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui0 -; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34 ; CHECK-LE: mfvsrwz 3, [[SHL]] ; CHECK-LE: clrldi 3, 3, 32 } @@ -1220,7 +1220,7 @@ entry: %vecext = extractelement <4 x i32> %0, i32 3 ret i32 %vecext ; CHECK-LABEL: @getui3 -; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK: xxswapd [[SHL:[0-9]+]], 34 ; CHECK: mfvsrwz 3, [[SHL]] ; CHECK: clrldi 3, 3, 32 ; CHECK-LE-LABEL: @getui3 @@ -1380,7 +1380,7 @@ entry: ; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 ; CHECK: xscvspdpn 1, [[SHL]] ; CHECK-LE-LABEL: @getf1 -; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK-LE: xxswapd [[SHL:[0-9]+]], 34 ; CHECK-LE: xscvspdpn 1, [[SHL]] } @@ -1393,7 +1393,7 @@ entry: %vecext = extractelement <4 x float> %0, i32 2 ret float %vecext ; CHECK-LABEL: @getf2 -; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2 +; CHECK: xxswapd [[SHL:[0-9]+]], 34 ; CHECK: xscvspdpn 1, [[SHL]] ; CHECK-LE-LABEL: @getf2 ; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1 diff --git a/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll b/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll index 3610c044b94..74dc1561d9f 100644 --- a/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll +++ b/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll @@ -9,8 +9,8 @@ entry: ret <2 x i32> %strided.vec ; CHECK-LABEL: @test1 -; CHECK: vsldoi [[TGT:[0-9]+]], 2, 2, 8 -; CHECK: vmrghw 2, 2, [[TGT]] +; CHECK: xxswapd 35, 34 +; CHECK: vmrghw 2, 2, 3 ; CHECK: blr } |