diff options
author | Craig Topper <craig.topper@intel.com> | 2018-07-15 06:03:19 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-07-15 06:03:19 +0000 |
commit | 9d61922fe7cc1212c4a2252b8bea711f6fea4ec6 (patch) | |
tree | 1cd882b7c20a0f37a0196675052bc6fb29305cd2 | |
parent | 7ed67c245acfbb9dbd99d0ef9d558d2962b47081 (diff) |
[X86] Add some optsize patterns for 256-bit X86vzmovl.
These patterns use VMOVSS/SD. Without optsize we use BLENDI instead.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337119 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 19 | ||||
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-256-v4.ll | 64 |
2 files changed, 83 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5c8b612f49b..eb6a3323491 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -304,6 +304,25 @@ let Predicates = [UseAVX, OptForSize] in { (VMOVSSrr (v4f32 (V_SET0)), VR128:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), (VMOVSSrr (v4i32 (V_SET0)), VR128:$src)>; + + // Move low f32 and clear high bits. + def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (v4f32 (V_SET0)), + (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>; + def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>; + + def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (v2f64 (V_SET0)), + (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (v2i64 (V_SET0)), + (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>; } let Predicates = [UseSSE1] in { diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index 03e159f3d44..e125601a588 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -1882,3 +1882,67 @@ entry: %add = add <4 x i64> %shuffle, %shuffle1 ret <4 x i64> %add } + +define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize { +; AVX1OR2-LABEL: shuffle_v4f64_0zzz_optsize: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1OR2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4f64_0zzz_optsize: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512VL-NEXT: retq + %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> + ret <4 x double> %b +} + +define <4 x i64> @shuffle_v4i64_0zzz_optsize(<4 x i64> %a) optsize { +; AVX1OR2-LABEL: shuffle_v4i64_0zzz_optsize: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1OR2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4i64_0zzz_optsize: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512VL-NEXT: retq + %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> + ret <4 x i64> %b +} + +define <8 x float> @shuffle_v8f32_0zzzzzzz_optsize(<8 x float> %a) optsize { +; AVX1OR2-LABEL: shuffle_v8f32_0zzzzzzz_optsize: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1OR2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v8f32_0zzzzzzz_optsize: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512VL-NEXT: retq + %b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + ret <8 x float> %b +} + +define <8 x i32> @shuffle_v8i32_0zzzzzzz_optsize(<8 x i32> %a) optsize { +; AVX1OR2-LABEL: shuffle_v8i32_0zzzzzzz_optsize: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1OR2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v8i32_0zzzzzzz_optsize: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512VL-NEXT: retq + %b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + ret <8 x i32> %b +} |