diff options
author | Igor Breger <igor.breger@intel.com> | 2016-02-15 08:25:28 +0000 |
---|---|---|
committer | Igor Breger <igor.breger@intel.com> | 2016-02-15 08:25:28 +0000 |
commit | 0dd3e9d55e827d66a86622e8c6110efdaec10015 (patch) | |
tree | 22ed616e1b40679b3886ecd20622d2a9ee09b60f /test | |
parent | c7b710ea8f5f26c8085dc6c2c72d42d391a20d85 (diff) |
AVX512: Change store size of kmask. Store size of v8i1, v4i1 , v2i1 and i1 are changed to 16 bits.
If KMOVB not supported (require AVX512DQ) only KMOVW can be used so store size should be 2 bytes.
Differential Revision: http://reviews.llvm.org/D17138
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260878 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/avx512-intel-ocl.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512-mask-spills.ll | 126 |
2 files changed, 130 insertions, 4 deletions
diff --git a/test/CodeGen/X86/avx512-intel-ocl.ll b/test/CodeGen/X86/avx512-intel-ocl.ll index 2e1b27e4aec..70c86347fad 100644 --- a/test/CodeGen/X86/avx512-intel-ocl.ll +++ b/test/CodeGen/X86/avx512-intel-ocl.ll @@ -68,10 +68,10 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { ; WIN64: vmovups {{.*(%rbp).*}}, %zmm21 # 64-byte Reload ; X64-LABEL: test_prolog_epilog -; X64: kmovw %k7, {{.*}}(%rsp) ## 8-byte Folded Spill -; X64: kmovw %k6, {{.*}}(%rsp) ## 8-byte Folded Spill -; X64: kmovw %k5, {{.*}}(%rsp) ## 8-byte Folded Spill -; X64: kmovw %k4, {{.*}}(%rsp) ## 8-byte Folded Spill +; X64: kmovq %k7, {{.*}}(%rsp) ## 8-byte Folded Spill +; X64: kmovq %k6, {{.*}}(%rsp) ## 8-byte Folded Spill +; X64: kmovq %k5, {{.*}}(%rsp) ## 8-byte Folded Spill +; X64: kmovq %k4, {{.*}}(%rsp) ## 8-byte Folded Spill ; X64: vmovups %zmm31, {{.*}}(%rsp) ## 64-byte Spill ; X64: vmovups %zmm16, {{.*}}(%rsp) ## 64-byte Spill ; X64: call diff --git a/test/CodeGen/X86/avx512-mask-spills.ll b/test/CodeGen/X86/avx512-mask-spills.ll new file mode 100644 index 00000000000..1be732a2e94 --- /dev/null +++ b/test/CodeGen/X86/avx512-mask-spills.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX + +declare void @f() +define <4 x i1> @test_4i1(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_4i1: +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: Ltmp0: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 +; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; CHECK-NEXT: callq _f +; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Folded Reload +; CHECK-NEXT: korw %k1, %k0, %k0 +; CHECK-NEXT: vpmovm2d %k0, %xmm0 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + + %cmp_res = icmp ugt <4 x i32> %a, %b + %cmp_res2 = icmp sgt <4 x i32> %a, %b + call void @f() + %res = or <4 x i1> %cmp_res, %cmp_res2 + ret <4 x i1> %res +} + +define <8 x i1> @test_8i1(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: test_8i1: +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: Ltmp1: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 +; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; CHECK-NEXT: callq _f +; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Folded Reload +; CHECK-NEXT: korb %k1, %k0, %k0 +; CHECK-NEXT: vpmovm2w %k0, %xmm0 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + + %cmp_res = icmp ugt <8 x i32> %a, %b + %cmp_res2 = icmp sgt <8 x i32> %a, %b + call void @f() + %res = or <8 x i1> %cmp_res, %cmp_res2 + ret <8 x i1> %res +} + +define <16 x i1> @test_16i1(<16 x i32> %a, <16 x i32> %b) { +; CHECK-LABEL: test_16i1: +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: Ltmp2: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; CHECK-NEXT: callq _f +; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; CHECK-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Folded Reload +; CHECK-NEXT: korw %k1, %k0, %k0 +; CHECK-NEXT: vpmovm2b %k0, %xmm0 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %cmp_res = icmp ugt <16 x i32> %a, %b + %cmp_res2 = icmp sgt <16 x i32> %a, %b + call void @f() + %res = or <16 x i1> %cmp_res, %cmp_res2 + ret <16 x i1> %res +} + +define <32 x i1> @test_32i1(<32 x i16> %a, <32 x i16> %b) { +; CHECK-LABEL: test_32i1: +; CHECK: ## BB#0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: Ltmp3: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovd %k0, {{[0-9]+}}(%rsp) ## 4-byte Folded Spill +; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovd %k0, (%rsp) ## 4-byte Folded Spill +; CHECK-NEXT: callq _f +; CHECK-NEXT: kmovd {{[0-9]+}}(%rsp), %k0 ## 4-byte Folded Reload +; CHECK-NEXT: kmovd (%rsp), %k1 ## 4-byte Folded Reload +; CHECK-NEXT: kord %k1, %k0, %k0 +; CHECK-NEXT: vpmovm2b %k0, %ymm0 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %cmp_res = icmp ugt <32 x i16> %a, %b + %cmp_res2 = icmp sgt <32 x i16> %a, %b + call void @f() + %res = or <32 x i1> %cmp_res, %cmp_res2 + ret <32 x i1> %res +} + +define <64 x i1> @test_64i1(<64 x i8> %a, <64 x i8> %b) { +; CHECK-LABEL: test_64i1: +; CHECK: ## BB#0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: Ltmp4: +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Folded Spill +; CHECK-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Folded Spill +; CHECK-NEXT: callq _f +; CHECK-NEXT: kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Folded Reload +; CHECK-NEXT: kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Folded Reload +; CHECK-NEXT: korq %k1, %k0, %k0 +; CHECK-NEXT: vpmovm2b %k0, %zmm0 +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + + %cmp_res = icmp ugt <64 x i8> %a, %b + %cmp_res2 = icmp sgt <64 x i8> %a, %b + call void @f() + %res = or <64 x i1> %cmp_res, %cmp_res2 + ret <64 x i1> %res +} |