diff options
author | Craig Topper <craig.topper@intel.com> | 2017-10-15 16:41:17 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-10-15 16:41:17 +0000 |
commit | c2cf105c00c50efaa41acf5343c62aba621dbfb8 (patch) | |
tree | ac42b8205a16ad6b2d8bd38a0f3db13928c240ee /test | |
parent | 936d75b04b5e0fa86b3a5205cbcab5d15fafcd81 (diff) |
[AVX512] Don't mark EXTLOAD as legal with AVX512. Continue using custom lowering.
Summary:
This was impeding our ability to combine the extending shuffles with other shuffles as you can see from the test changes.
There's one special case that needed to be added to use VZEXT directly for v8i8->v8i64 since the custom lowering requires v64i8.
Reviewers: RKSimon, zvi, delena
Reviewed By: delena
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D38714
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315860 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/avx2-vbroadcast.ll | 70 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll | 59 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_int_to_fp.ll | 84 | ||||
-rw-r--r-- | test/CodeGen/X86/vector-shuffle-128-v4.ll | 32 |
4 files changed, 66 insertions, 179 deletions
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index 08a11607eed..97b20b1e56d 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -235,65 +235,33 @@ entry: } define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) { -; X32-AVX2-LABEL: broadcast_mem_v4i16_v8i16: -; X32-AVX2: ## BB#0: -; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X32-AVX2-NEXT: retl -; -; X64-AVX2-LABEL: broadcast_mem_v4i16_v8i16: -; X64-AVX2: ## BB#0: -; X64-AVX2-NEXT: vpbroadcastq (%rdi), %xmm0 -; X64-AVX2-NEXT: retq -; -; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v8i16: -; X32-AVX512VL: ## BB#0: -; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; X32-AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13] -; X32-AVX512VL-NEXT: retl +; X32-LABEL: broadcast_mem_v4i16_v8i16: +; X32: ## BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X32-NEXT: retl ; -; X64-AVX512VL-LABEL: broadcast_mem_v4i16_v8i16: -; X64-AVX512VL: ## BB#0: -; X64-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; X64-AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13] -; X64-AVX512VL-NEXT: retq +; X64-LABEL: broadcast_mem_v4i16_v8i16: +; X64: ## BB#0: +; X64-NEXT: vpbroadcastq (%rdi), %xmm0 +; X64-NEXT: retq %load = load <4 x i16>, <4 x i16>* %ptr %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> ret <8 x i16> %shuf } define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) { -; X32-AVX2-LABEL: broadcast_mem_v4i16_v16i16: -; X32-AVX2: ## BB#0: -; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 -; X32-AVX2-NEXT: retl -; -; X64-AVX2-LABEL: broadcast_mem_v4i16_v16i16: -; X64-AVX2: ## BB#0: -; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; X64-AVX2-NEXT: retq -; -; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16: -; X32-AVX512VL: ## BB#0: -; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; X32-AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; X32-AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] -; X32-AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X32-AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0 -; X32-AVX512VL-NEXT: retl +; X32-LABEL: broadcast_mem_v4i16_v16i16: +; X32: ## BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: vbroadcastsd %xmm0, %ymm0 +; X32-NEXT: retl ; -; X64-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16: -; X64-AVX512VL: ## BB#0: -; X64-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; X64-AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; X64-AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] -; X64-AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X64-AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0 -; X64-AVX512VL-NEXT: retq +; X64-LABEL: broadcast_mem_v4i16_v16i16: +; X64: ## BB#0: +; X64-NEXT: vbroadcastsd (%rdi), %ymm0 +; X64-NEXT: retq %load = load <4 x i16>, <4 x i16>* %ptr %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> ret <16 x i16> %shuf diff --git a/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll b/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll index ca954ee3280..4cf35868647 100644 --- a/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll +++ b/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll @@ -342,8 +342,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask3(<16 x i32> %vec, <16 x i3 define <4 x i32> @test_2xi32_to_4xi32_mem(<2 x i32>* %vp) { ; CHECK-LABEL: test_2xi32_to_4xi32_mem: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] +; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0 ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -352,10 +351,9 @@ define <4 x i32> @test_2xi32_to_4xi32_mem(<2 x i32>* %vp) { define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask0(<2 x i32>* %vp, <4 x i32> %default, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vpcmpeqd %xmm3, %xmm1, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm2[0,2,0,2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1 +; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -367,10 +365,9 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask0(<2 x i32>* %vp, <4 x i32> define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask0(<2 x i32>* %vp, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm1[0,2,0,2] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 +; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -381,10 +378,9 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask0(<2 x i32>* %vp, <4 x i3 define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask1(<2 x i32>* %vp, <4 x i32> %default, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vpcmpeqd %xmm3, %xmm1, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm2[0,2,0,2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1 +; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -396,10 +392,9 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask1(<2 x i32>* %vp, <4 x i32> define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask1(<2 x i32>* %vp, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm1[0,2,0,2] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 +; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -410,10 +405,9 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask1(<2 x i32>* %vp, <4 x i3 define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask2(<2 x i32>* %vp, <4 x i32> %default, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vpcmpeqd %xmm3, %xmm1, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm2[0,2,0,2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1 +; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -425,10 +419,9 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask2(<2 x i32>* %vp, <4 x i32> define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask2(<2 x i32>* %vp, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm1[0,2,0,2] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 +; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -439,10 +432,9 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask2(<2 x i32>* %vp, <4 x i3 define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask3(<2 x i32>* %vp, <4 x i32> %default, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; CHECK-NEXT: vpcmpeqd %xmm3, %xmm1, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm2[0,2,0,2] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1 +; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -454,10 +446,9 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask3(<2 x i32>* %vp, <4 x i32> define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask3(<2 x i32>* %vp, <4 x i32> %mask) { ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm1[0,2,0,2] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 +; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index a60576761fd..fbb2d14f23c 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -2633,34 +2633,10 @@ define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) { ; SSE-NEXT: cvtdq2pd (%rdi), %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: sitofp_load_2i32_to_2f64: -; VEX: # BB#0: -; VEX-NEXT: vcvtdq2pd (%rdi), %xmm0 -; VEX-NEXT: retq -; -; AVX512F-LABEL: sitofp_load_2i32_to_2f64: -; AVX512F: # BB#0: -; AVX512F-NEXT: vcvtdq2pd (%rdi), %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: sitofp_load_2i32_to_2f64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512DQ-LABEL: sitofp_load_2i32_to_2f64: -; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vcvtdq2pd (%rdi), %xmm0 -; AVX512DQ-NEXT: retq -; -; AVX512VLDQ-LABEL: sitofp_load_2i32_to_2f64: -; AVX512VLDQ: # BB#0: -; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512VLDQ-NEXT: retq +; AVX-LABEL: sitofp_load_2i32_to_2f64: +; AVX: # BB#0: +; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0 +; AVX-NEXT: retq %ld = load <2 x i32>, <2 x i32> *%a %cvt = sitofp <2 x i32> %ld to <2 x double> ret <2 x double> %cvt @@ -2982,9 +2958,7 @@ define <2 x double> @uitofp_load_2i32_to_2f64(<2 x i32> *%a) { ; ; AVX512VL-LABEL: uitofp_load_2i32_to_2f64: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 +; AVX512VL-NEXT: vcvtudq2pd (%rdi), %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_load_2i32_to_2f64: @@ -2997,9 +2971,7 @@ define <2 x double> @uitofp_load_2i32_to_2f64(<2 x i32> *%a) { ; ; AVX512VLDQ-LABEL: uitofp_load_2i32_to_2f64: ; AVX512VLDQ: # BB#0: -; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 ; AVX512VLDQ-NEXT: retq %ld = load <2 x i32>, <2 x i32> *%a %cvt = uitofp <2 x i32> %ld to <2 x double> @@ -3015,44 +2987,12 @@ define <2 x double> @uitofp_load_2i16_to_2f64(<2 x i16> *%a) { ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: uitofp_load_2i16_to_2f64: -; VEX: # BB#0: -; VEX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 -; VEX-NEXT: retq -; -; AVX512F-LABEL: uitofp_load_2i16_to_2f64: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: uitofp_load_2i16_to_2f64: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512DQ-LABEL: uitofp_load_2i16_to_2f64: -; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512DQ-NEXT: retq -; -; AVX512VLDQ-LABEL: uitofp_load_2i16_to_2f64: -; AVX512VLDQ: # BB#0: -; AVX512VLDQ-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero -; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512VLDQ-NEXT: retq +; AVX-LABEL: uitofp_load_2i16_to_2f64: +; AVX: # BB#0: +; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: retq %ld = load <2 x i16>, <2 x i16> *%a %cvt = uitofp <2 x i16> %ld to <2 x double> ret <2 x double> %cvt diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 32efb3cd703..c30c601c777 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -2168,17 +2168,11 @@ define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] ; AVX1-NEXT: retq ; -; AVX2-LABEL: insert_mem_lo_v4i32: -; AVX2: # BB#0: -; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: insert_mem_lo_v4i32: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,3] -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: insert_mem_lo_v4i32: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] +; AVX2OR512VL-NEXT: retq %a = load <2 x i32>, <2 x i32>* %ptr %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> @@ -2210,17 +2204,11 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: insert_mem_hi_v4i32: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX1OR2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: insert_mem_hi_v4i32: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] -; AVX512VL-NEXT: retq +; AVX-LABEL: insert_mem_hi_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: retq %a = load <2 x i32>, <2 x i32>* %ptr %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> |