DAG: Fix creating select with wrong condition type

This code added in r297930 assumed that it could create a select with a condition type that is just an integer bitcast of the selected type. For AMDGPU any vselect is going to be scalarized (although the vector types are legal), and all select conditions must be i1 (the same as getSetCCResultType). This logic doesn't really make sense to me, but there's never really been a consistent policy in what the select condition mask type is supposed to be. Try to extend the logic for skipping the transform for condition types that aren't setccs. It doesn't seem quite right to me though, but checking conditions that seem more sensible (like whether the vselect is going to be expanded) doesn't work since this seems to depend on that also. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316554 91177308-0d34-0410-b5e6-96231b3b80d8
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-10-25 07:14:07 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-10-25 07:14:07 +0000
commit: 7bb5f9ead4d2bbd386134e8a85caf881ad498443 (patch)
tree: 96da17100215006e2df7f9ea8803b426630934cc /test
parent: b6397326b82bd04dc5e9cee45dd45e8de1f155a4 (diff)
2 files changed, 62 insertions, 29 deletions
diff --git a/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll b/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
new file mode 100644
index 00000000000..4ae219949c2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; Check that DAGTypeLegalizer::WidenVSELECTAndMask doesn't try to
+; create vselects with i64 condition masks.
+
+; FIXME: Should be able to avoid intermediate vselect
+; GCN-LABEL: {{^}}widen_vselect_and_mask_v4f64:
+; GCN: v_cmp_u_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]],
+; GCN: v_cndmask_b32_e64 v[[VSEL:[0-9]+]], 0, -1, [[CMP]]
+; GCN: v_mov_b32_e32 v[[VSEL_EXT:[0-9]+]], v[[VSEL]]
+; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[VSEL]]:[[VSEL_EXT]]{{\]}}
+define amdgpu_kernel void @widen_vselect_and_mask_v4f64(<4 x double> %arg) #0 {
+bb:
+  %tmp = extractelement <4 x double> %arg, i64 0
+  %tmp1 = fcmp uno double %tmp, 0.000000e+00
+  %tmp2 = sext i1 %tmp1 to i64
+  %tmp3 = insertelement <4 x i64> undef, i64 %tmp2, i32 0
+  %tmp4 = insertelement <4 x i64> %tmp3, i64 undef, i32 1
+  %tmp5 = insertelement <4 x i64> %tmp4, i64 undef, i32 2
+  %tmp6 = insertelement <4 x i64> %tmp5, i64 undef, i32 3
+  %tmp7 = fcmp une <4 x double> %arg, zeroinitializer
+  %tmp8 = icmp sgt <4 x i64> %tmp6, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %tmp9 = and <4 x i1> %tmp8, %tmp7
+  %tmp10 = select <4 x i1> %tmp9, <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, <4 x double> zeroinitializer
+  store <4 x double> %tmp10, <4 x double> addrspace(1)* null, align 32
+  ret void
+}
+
+; GCN-LABEL: {{^}}widen_vselect_and_mask_v4i64:
+; GCN: v_cmp_eq_u64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]],
+; GCN: v_cndmask_b32_e64 v[[VSEL:[0-9]+]], 0, -1, [[CMP]]
+; GCN: v_mov_b32_e32 v[[VSEL_EXT:[0-9]+]], v[[VSEL]]
+; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[}}[[VSEL]]:[[VSEL_EXT]]{{\]}}
+define amdgpu_kernel void @widen_vselect_and_mask_v4i64(<4 x i64> %arg) #0 {
+bb:
+  %tmp = extractelement <4 x i64> %arg, i64 0
+  %tmp1 = icmp eq i64 %tmp, 0
+  %tmp2 = sext i1 %tmp1 to i64
+  %tmp3 = insertelement <4 x i64> undef, i64 %tmp2, i32 0
+  %tmp4 = insertelement <4 x i64> %tmp3, i64 undef, i32 1
+  %tmp5 = insertelement <4 x i64> %tmp4, i64 undef, i32 2
+  %tmp6 = insertelement <4 x i64> %tmp5, i64 undef, i32 3
+  %tmp7 = icmp ne <4 x i64> %arg, zeroinitializer
+  %tmp8 = icmp sgt <4 x i64> %tmp6, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %tmp9 = and <4 x i1> %tmp8, %tmp7
+  %tmp10 = select <4 x i1> %tmp9, <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i64> zeroinitializer
+  store <4 x i64> %tmp10, <4 x i64> addrspace(1)* null, align 32
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }
diff --git a/test/CodeGen/X86/avx512-vselect.ll b/test/CodeGen/X86/avx512-vselect.ll
index b9f240909d8..5fc84a0aa81 100644
--- a/test/CodeGen/X86/avx512-vselect.ll
+++ b/test/CodeGen/X86/avx512-vselect.ll
@@ -23,35 +23,16 @@ entry:
 ; both formulations of vselect. All of this trickery is because we can't
 ; directly form an SDAG input to the lowering.
 define <16 x double> @test2(<16 x float> %x, <16 x float> %y, <16 x double> %a, <16 x double> %b) {
-; CHECK-SKX-LABEL: test2:
-; CHECK-SKX:       # BB#0: # %entry
-; CHECK-SKX-NEXT:    vxorps %xmm6, %xmm6, %xmm6
-; CHECK-SKX-NEXT:    vcmpltps %zmm0, %zmm6, %k0
-; CHECK-SKX-NEXT:    vcmpltps %zmm6, %zmm1, %k1
-; CHECK-SKX-NEXT:    korw %k1, %k0, %k0
-; CHECK-SKX-NEXT:    kshiftrw $8, %k0, %k1
-; CHECK-SKX-NEXT:    vpmovm2q %k1, %zmm1
-; CHECK-SKX-NEXT:    vpmovm2q %k0, %zmm0
-; CHECK-SKX-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; CHECK-SKX-NEXT:    vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
-; CHECK-SKX-NEXT:    vptestmq %zmm1, %zmm1, %k1
-; CHECK-SKX-NEXT:    vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
-; CHECK-SKX-NEXT:    retq
-;
-; CHECK-KNL-LABEL: test2:
-; CHECK-KNL:       # BB#0: # %entry
-; CHECK-KNL-NEXT:    vxorps %xmm6, %xmm6, %xmm6
-; CHECK-KNL-NEXT:    vcmpltps %zmm0, %zmm6, %k0
-; CHECK-KNL-NEXT:    vcmpltps %zmm6, %zmm1, %k1
-; CHECK-KNL-NEXT:    korw %k1, %k0, %k1
-; CHECK-KNL-NEXT:    kshiftrw $8, %k1, %k2
-; CHECK-KNL-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; CHECK-KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; CHECK-KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
-; CHECK-KNL-NEXT:    vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
-; CHECK-KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
-; CHECK-KNL-NEXT:    vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
-; CHECK-KNL-NEXT:    retq
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    vxorps %xmm6, %xmm6, %xmm6
+; CHECK-NEXT:    vcmpltps %zmm0, %zmm6, %k0
+; CHECK-NEXT:    vcmpltps %zmm6, %zmm1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k1
+; CHECK-NEXT:    vblendmpd %zmm2, %zmm4, %zmm0 {%k1}
+; CHECK-NEXT:    kshiftrw $8, %k1, %k1
+; CHECK-NEXT:    vblendmpd %zmm3, %zmm5, %zmm1 {%k1}
+; CHECK-NEXT:    retq
 entry:
   %gt.m = fcmp ogt <16 x float> %x, zeroinitializer
   %lt.m = fcmp olt <16 x float> %y, zeroinitializer
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-10-25 07:14:07 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-10-25 07:14:07 +0000
commit	7bb5f9ead4d2bbd386134e8a85caf881ad498443 (patch)
tree	96da17100215006e2df7f9ea8803b426630934cc /test
parent	b6397326b82bd04dc5e9cee45dd45e8de1f155a4 (diff)