From a1b3df34e9efd6b17978356eb26b2c8f7286db4d Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 14 Feb 2018 10:51:00 +0000 Subject: Revert r319778 (and r319911) due to PR36357 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@325112 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx512-intrinsics-fast-isel.ll | 53 ---- test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 14 - test/CodeGen/X86/avx512-intrinsics.ll | 15 + test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll | 371 ++++++++-------------- test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll | 40 --- test/CodeGen/X86/avx512bw-intrinsics.ll | 49 +++ 6 files changed, 194 insertions(+), 348 deletions(-) (limited to 'test') diff --git a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index 50de773af00..80127f66bdf 100644 --- a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -5,59 +5,6 @@ ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c -define zeroext i16 @test_mm512_kunpackb(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) local_unnamed_addr #0 { -; X32-LABEL: test_mm512_kunpackb: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-64, %esp -; X32-NEXT: subl $64, %esp -; X32-NEXT: vmovdqa64 136(%ebp), %zmm3 -; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 -; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1 -; X32-NEXT: kunpckbw %k0, %k1, %k1 -; X32-NEXT: vpcmpneqd 72(%ebp), %zmm3, %k0 {%k1} -; X32-NEXT: kmovw %k0, %eax -; X32-NEXT: movzwl %ax, %eax -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: vzeroupper -; X32-NEXT: retl -; -; X64-LABEL: test_mm512_kunpackb: -; X64: # %bb.0: # %entry -; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 -; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 -; X64-NEXT: kunpckbw %k0, %k1, %k1 -; X64-NEXT: vpcmpneqd %zmm5, %zmm4, %k0 {%k1} -; X64-NEXT: kmovw %k0, %eax -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: vzeroupper -; X64-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__A to <16 x i32> - %1 = bitcast <8 x i64> %__B to <16 x i32> - %2 = icmp ne <16 x i32> %0, %1 - %3 = bitcast <16 x i1> %2 to i16 - %4 = bitcast <8 x i64> %__C to <16 x i32> - %5 = bitcast <8 x i64> %__D to <16 x i32> - %6 = icmp ne <16 x i32> %4, %5 - %7 = bitcast <16 x i1> %6 to i16 - %8 = and i16 %7, 255 - %shl.i = shl i16 %3, 8 - %or.i = or i16 %8, %shl.i - %9 = bitcast <8 x i64> %__E to <16 x i32> - %10 = bitcast <8 x i64> %__F to <16 x i32> - %11 = icmp ne <16 x i32> %9, %10 - %12 = bitcast i16 %or.i to <16 x i1> - %13 = and <16 x i1> %11, %12 - %14 = bitcast <16 x i1> %13 to i16 - ret i16 %14 -} - define <16 x float> @test_mm512_shuffle_f32x4(<16 x float> %__A, <16 x float> %__B) { ; X32-LABEL: test_mm512_shuffle_f32x4: ; X32: # %bb.0: # %entry diff --git a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index f3ca0644e46..378dbda2dc0 100644 --- a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -1,20 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone - -define i16 @unpckbw_test(i16 %a0, i16 %a1) { -; CHECK-LABEL: unpckbw_test: -; CHECK: ## %bb.0: -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: shll $8, %esi -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax -; CHECK-NEXT: retq - %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) - ret i16 %res -} - define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512: ; CHECK: ## %bb.0: diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 5faa202c30f..628199d4ac9 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -96,6 +96,21 @@ define i16 @test_kor(i16 %a0, i16 %a1) { ret i16 %t2 } +declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone + +define i16 @unpckbw_test(i16 %a0, i16 %a1) { +; CHECK-LABEL: unpckbw_test: +; CHECK: ## %bb.0: +; CHECK-NEXT: kmovw %edi, %k0 +; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: kunpckbw %k1, %k0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax +; CHECK-NEXT: retq + %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) + ret i16 %res +} + declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone ; TODO: the two kxnor instructions here a no op and should be elimintaed, ; probably by FoldConstantArithmetic in SelectionDAG. diff --git a/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll index 1e754be6fe4..a56111f3453 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll @@ -4,117 +4,6 @@ ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c -define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) { -; X32-LABEL: test_mm512_kunpackd: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-64, %esp -; X32-NEXT: subl $64, %esp -; X32-NEXT: vmovdqa64 136(%ebp), %zmm3 -; X32-NEXT: vmovdqa64 72(%ebp), %zmm4 -; X32-NEXT: vmovdqa64 8(%ebp), %zmm5 -; X32-NEXT: vpcmpneqb %zmm0, %zmm1, %k0 -; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) -; X32-NEXT: vpcmpneqb %zmm5, %zmm2, %k0 -; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) -; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; X32-NEXT: kunpckdq %k0, %k1, %k1 -; X32-NEXT: vpcmpneqb %zmm3, %zmm4, %k0 {%k1} -; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: vzeroupper -; X32-NEXT: retl -; -; X64-LABEL: test_mm512_kunpackd: -; X64: # %bb.0: # %entry -; X64-NEXT: vpcmpneqb %zmm0, %zmm1, %k0 -; X64-NEXT: vpcmpneqb %zmm3, %zmm2, %k1 -; X64-NEXT: kunpckdq %k0, %k1, %k1 -; X64-NEXT: vpcmpneqb %zmm5, %zmm4, %k0 {%k1} -; X64-NEXT: kmovq %k0, %rax -; X64-NEXT: vzeroupper -; X64-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__B to <64 x i8> - %1 = bitcast <8 x i64> %__A to <64 x i8> - %2 = icmp ne <64 x i8> %0, %1 - %3 = bitcast <64 x i1> %2 to i64 - %4 = bitcast <8 x i64> %__C to <64 x i8> - %5 = bitcast <8 x i64> %__D to <64 x i8> - %6 = icmp ne <64 x i8> %4, %5 - %7 = bitcast <64 x i1> %6 to i64 - %and.i = and i64 %7, 4294967295 - %shl.i = shl i64 %3, 32 - %or.i = or i64 %and.i, %shl.i - %8 = bitcast <8 x i64> %__E to <64 x i8> - %9 = bitcast <8 x i64> %__F to <64 x i8> - %10 = icmp ne <64 x i8> %8, %9 - %11 = bitcast i64 %or.i to <64 x i1> - %12 = and <64 x i1> %10, %11 - %13 = bitcast <64 x i1> %12 to i64 - ret i64 %13 -} - -define i32 @test_mm512_kunpackw(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) { -; X32-LABEL: test_mm512_kunpackw: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: .cfi_def_cfa_register %ebp -; X32-NEXT: andl $-64, %esp -; X32-NEXT: subl $64, %esp -; X32-NEXT: vmovdqa64 136(%ebp), %zmm3 -; X32-NEXT: vpcmpneqw %zmm0, %zmm1, %k0 -; X32-NEXT: vpcmpneqw 8(%ebp), %zmm2, %k1 -; X32-NEXT: kunpckwd %k0, %k1, %k1 -; X32-NEXT: vpcmpneqw 72(%ebp), %zmm3, %k0 {%k1} -; X32-NEXT: kmovd %k0, %eax -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: vzeroupper -; X32-NEXT: retl -; -; X64-LABEL: test_mm512_kunpackw: -; X64: # %bb.0: # %entry -; X64-NEXT: vpcmpneqw %zmm0, %zmm1, %k0 -; X64-NEXT: vpcmpneqw %zmm3, %zmm2, %k1 -; X64-NEXT: kunpckwd %k0, %k1, %k1 -; X64-NEXT: vpcmpneqw %zmm5, %zmm4, %k0 {%k1} -; X64-NEXT: kmovd %k0, %eax -; X64-NEXT: vzeroupper -; X64-NEXT: retq -entry: - %0 = bitcast <8 x i64> %__B to <32 x i16> - %1 = bitcast <8 x i64> %__A to <32 x i16> - %2 = icmp ne <32 x i16> %0, %1 - %3 = bitcast <32 x i1> %2 to i32 - %4 = bitcast <8 x i64> %__C to <32 x i16> - %5 = bitcast <8 x i64> %__D to <32 x i16> - %6 = icmp ne <32 x i16> %4, %5 - %7 = bitcast <32 x i1> %6 to i32 - %and.i = and i32 %7, 65535 - %shl.i = shl i32 %3, 16 - %or.i = or i32 %and.i, %shl.i - %8 = bitcast <8 x i64> %__E to <32 x i16> - %9 = bitcast <8 x i64> %__F to <32 x i16> - %10 = icmp ne <32 x i16> %8, %9 - %11 = bitcast i32 %or.i to <32 x i1> - %12 = and <32 x i1> %10, %11 - %13 = bitcast <32 x i1> %12 to i32 - ret i32 %13 -} - - define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext %__A) { ; X32-LABEL: test_mm512_mask_set1_epi8: ; X32: # %bb.0: # %entry @@ -189,46 +78,19 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X32-NEXT: movb %ch, %al ; X32-NEXT: kmovd %eax, %k2 ; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $55, %k1, %k1 -; X32-NEXT: kxorq %k0, %k1, %k0 -; X32-NEXT: kshiftrq $9, %k0, %k1 ; X32-NEXT: andb $2, %al ; X32-NEXT: shrb %al ; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $54, %k1, %k1 -; X32-NEXT: kxorq %k0, %k1, %k0 -; X32-NEXT: kshiftrq $10, %k0, %k1 ; X32-NEXT: movb %ch, %al ; X32-NEXT: andb $15, %al ; X32-NEXT: movl %eax, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $53, %k1, %k1 -; X32-NEXT: kxorq %k0, %k1, %k0 -; X32-NEXT: kshiftrq $11, %k0, %k1 +; X32-NEXT: kmovd %edx, %k3 ; X32-NEXT: shrb $3, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrl $12, %eax -; X32-NEXT: andl $15, %eax -; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kmovd %eax, %k4 ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: shrl $13, %eax ; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k3 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrl $14, %eax -; X32-NEXT: andl $3, %eax -; X32-NEXT: kmovd %eax, %k4 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrl $15, %eax -; X32-NEXT: andl $1, %eax ; X32-NEXT: kmovd %eax, %k5 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrl $16, %edx @@ -243,25 +105,52 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X32-NEXT: kmovd %eax, %k7 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $55, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $9, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $54, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $10, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $53, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $11, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $52, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $12, %k0, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $12, %esi +; X32-NEXT: andl $15, %esi +; X32-NEXT: kmovd %esi, %k2 ; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $51, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $13, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $50, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $14, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $14, %esi +; X32-NEXT: andl $3, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $49, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $15, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $15, %esi +; X32-NEXT: andl $1, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $48, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 @@ -494,22 +383,14 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $43, %k0, %k1 ; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $12, %esi -; X32-NEXT: andl $15, %esi -; X32-NEXT: kmovd %esi, %k2 -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $14, %esi -; X32-NEXT: andl $3, %esi -; X32-NEXT: kmovd %esi, %k3 -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $15, %esi -; X32-NEXT: andl $1, %esi -; X32-NEXT: kmovd %esi, %k4 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $20, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $44, %k0, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $12, %esi +; X32-NEXT: andl $15, %esi +; X32-NEXT: kmovd %esi, %k2 ; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $19, %k1, %k1 @@ -520,12 +401,20 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X32-NEXT: kshiftrq $18, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $46, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $14, %esi +; X32-NEXT: andl $3, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $17, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $47, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $15, %esi +; X32-NEXT: andl $1, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $16, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 @@ -551,8 +440,8 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $12, %k1, %k1 -; X32-NEXT: kxorq %k0, %k1, %k4 -; X32-NEXT: kshiftrq $52, %k4, %k0 +; X32-NEXT: kxorq %k0, %k1, %k3 +; X32-NEXT: kshiftrq $52, %k3, %k0 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $4, %dl ; X32-NEXT: kmovd %edx, %k1 @@ -576,19 +465,19 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k3 +; X32-NEXT: kmovd %edx, %k4 ; X32-NEXT: kshiftlq $63, %k5, %k5 ; X32-NEXT: kshiftrq $11, %k5, %k5 -; X32-NEXT: kxorq %k4, %k5, %k4 -; X32-NEXT: kshiftrq $53, %k4, %k5 +; X32-NEXT: kxorq %k3, %k5, %k3 +; X32-NEXT: kshiftrq $53, %k3, %k5 ; X32-NEXT: kxorq %k6, %k5, %k5 ; X32-NEXT: kshiftlq $63, %k5, %k5 ; X32-NEXT: kshiftrq $10, %k5, %k5 -; X32-NEXT: kxorq %k4, %k5, %k5 -; X32-NEXT: kshiftrq $54, %k5, %k4 -; X32-NEXT: kxorq %k7, %k4, %k6 +; X32-NEXT: kxorq %k3, %k5, %k5 +; X32-NEXT: kshiftrq $54, %k5, %k3 +; X32-NEXT: kxorq %k7, %k3, %k6 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k4 +; X32-NEXT: kmovd %ecx, %k3 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $29, %ecx ; X32-NEXT: andb $1, %cl @@ -603,12 +492,6 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X32-NEXT: kxorq %k5, %k0, %k0 ; X32-NEXT: kshiftrq $56, %k0, %k5 ; X32-NEXT: kxorq %k1, %k5, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $28, %ecx -; X32-NEXT: kmovd %ecx, %k5 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $30, %ecx -; X32-NEXT: kmovd %ecx, %k6 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $7, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 @@ -618,17 +501,20 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X32-NEXT: kshiftrq $6, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $58, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $5, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $59, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $4, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $60, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: shrl $28, %ecx +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $3, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 @@ -638,7 +524,10 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X32-NEXT: kshiftrq $2, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $62, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: shrl $30, %ecx +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: shrl $31, %eax ; X32-NEXT: kmovd %eax, %k2 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al @@ -743,46 +632,19 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32-NEXT: movb %ch, %al ; X32-NEXT: kmovd %eax, %k2 ; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $55, %k1, %k1 -; X32-NEXT: kxorq %k0, %k1, %k0 -; X32-NEXT: kshiftrq $9, %k0, %k1 ; X32-NEXT: andb $2, %al ; X32-NEXT: shrb %al ; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $54, %k1, %k1 -; X32-NEXT: kxorq %k0, %k1, %k0 -; X32-NEXT: kshiftrq $10, %k0, %k1 ; X32-NEXT: movb %ch, %al ; X32-NEXT: andb $15, %al ; X32-NEXT: movl %eax, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: kshiftlq $63, %k1, %k1 -; X32-NEXT: kshiftrq $53, %k1, %k1 -; X32-NEXT: kxorq %k0, %k1, %k0 -; X32-NEXT: kshiftrq $11, %k0, %k1 +; X32-NEXT: kmovd %edx, %k3 ; X32-NEXT: shrb $3, %al -; X32-NEXT: kmovd %eax, %k2 -; X32-NEXT: kxorq %k2, %k1, %k1 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrl $12, %eax -; X32-NEXT: andl $15, %eax -; X32-NEXT: kmovd %eax, %k2 +; X32-NEXT: kmovd %eax, %k4 ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: shrl $13, %eax ; X32-NEXT: andb $1, %al -; X32-NEXT: kmovd %eax, %k3 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrl $14, %eax -; X32-NEXT: andl $3, %eax -; X32-NEXT: kmovd %eax, %k4 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: shrl $15, %eax -; X32-NEXT: andl $1, %eax ; X32-NEXT: kmovd %eax, %k5 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrl $16, %edx @@ -797,25 +659,52 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32-NEXT: kmovd %eax, %k7 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $55, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $9, %k0, %k1 +; X32-NEXT: kxorq %k2, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $54, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $10, %k0, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 +; X32-NEXT: kshiftrq $53, %k1, %k1 +; X32-NEXT: kxorq %k0, %k1, %k0 +; X32-NEXT: kshiftrq $11, %k0, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $52, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $12, %k0, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $12, %esi +; X32-NEXT: andl $15, %esi +; X32-NEXT: kmovd %esi, %k2 ; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $51, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $13, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kxorq %k5, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $50, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $14, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $14, %esi +; X32-NEXT: andl $3, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $49, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $15, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: movl %ecx, %esi +; X32-NEXT: shrl $15, %esi +; X32-NEXT: andl $1, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $48, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 @@ -1048,22 +937,14 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $43, %k0, %k1 ; X32-NEXT: kxorq %k4, %k1, %k1 -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $12, %esi -; X32-NEXT: andl $15, %esi -; X32-NEXT: kmovd %esi, %k2 -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $14, %esi -; X32-NEXT: andl $3, %esi -; X32-NEXT: kmovd %esi, %k3 -; X32-NEXT: movl %eax, %esi -; X32-NEXT: shrl $15, %esi -; X32-NEXT: andl $1, %esi -; X32-NEXT: kmovd %esi, %k4 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $20, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $44, %k0, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $12, %esi +; X32-NEXT: andl $15, %esi +; X32-NEXT: kmovd %esi, %k2 ; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $19, %k1, %k1 @@ -1074,12 +955,20 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32-NEXT: kshiftrq $18, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $46, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $14, %esi +; X32-NEXT: andl $3, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $17, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $47, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: movl %eax, %esi +; X32-NEXT: shrl $15, %esi +; X32-NEXT: andl $1, %esi +; X32-NEXT: kmovd %esi, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $16, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 @@ -1105,8 +994,8 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $12, %k1, %k1 -; X32-NEXT: kxorq %k0, %k1, %k4 -; X32-NEXT: kshiftrq $52, %k4, %k0 +; X32-NEXT: kxorq %k0, %k1, %k3 +; X32-NEXT: kshiftrq $52, %k3, %k0 ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $4, %dl ; X32-NEXT: kmovd %edx, %k1 @@ -1130,19 +1019,19 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32-NEXT: andb $15, %cl ; X32-NEXT: movl %ecx, %edx ; X32-NEXT: shrb $2, %dl -; X32-NEXT: kmovd %edx, %k3 +; X32-NEXT: kmovd %edx, %k4 ; X32-NEXT: kshiftlq $63, %k5, %k5 ; X32-NEXT: kshiftrq $11, %k5, %k5 -; X32-NEXT: kxorq %k4, %k5, %k4 -; X32-NEXT: kshiftrq $53, %k4, %k5 +; X32-NEXT: kxorq %k3, %k5, %k3 +; X32-NEXT: kshiftrq $53, %k3, %k5 ; X32-NEXT: kxorq %k6, %k5, %k5 ; X32-NEXT: kshiftlq $63, %k5, %k5 ; X32-NEXT: kshiftrq $10, %k5, %k5 -; X32-NEXT: kxorq %k4, %k5, %k5 -; X32-NEXT: kshiftrq $54, %k5, %k4 -; X32-NEXT: kxorq %k7, %k4, %k6 +; X32-NEXT: kxorq %k3, %k5, %k5 +; X32-NEXT: kshiftrq $54, %k5, %k3 +; X32-NEXT: kxorq %k7, %k3, %k6 ; X32-NEXT: shrb $3, %cl -; X32-NEXT: kmovd %ecx, %k4 +; X32-NEXT: kmovd %ecx, %k3 ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: shrl $29, %ecx ; X32-NEXT: andb $1, %cl @@ -1157,12 +1046,6 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32-NEXT: kxorq %k5, %k0, %k0 ; X32-NEXT: kshiftrq $56, %k0, %k5 ; X32-NEXT: kxorq %k1, %k5, %k1 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $28, %ecx -; X32-NEXT: kmovd %ecx, %k5 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $30, %ecx -; X32-NEXT: kmovd %ecx, %k6 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $7, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 @@ -1172,17 +1055,20 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32-NEXT: kshiftrq $6, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $58, %k0, %k1 -; X32-NEXT: kxorq %k3, %k1, %k1 +; X32-NEXT: kxorq %k4, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $5, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $59, %k0, %k1 -; X32-NEXT: kxorq %k4, %k1, %k1 +; X32-NEXT: kxorq %k3, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $4, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $60, %k0, %k1 -; X32-NEXT: kxorq %k5, %k1, %k1 +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: shrl $28, %ecx +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: kshiftlq $63, %k1, %k1 ; X32-NEXT: kshiftrq $3, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 @@ -1192,7 +1078,10 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X32-NEXT: kshiftrq $2, %k1, %k1 ; X32-NEXT: kxorq %k0, %k1, %k0 ; X32-NEXT: kshiftrq $62, %k0, %k1 -; X32-NEXT: kxorq %k6, %k1, %k1 +; X32-NEXT: movl %eax, %ecx +; X32-NEXT: shrl $30, %ecx +; X32-NEXT: kmovd %ecx, %k2 +; X32-NEXT: kxorq %k2, %k1, %k1 ; X32-NEXT: shrl $31, %eax ; X32-NEXT: kmovd %eax, %k2 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al diff --git a/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index f19e09758f1..13aca464b9e 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -2,46 +2,6 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32 -declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) - -define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { -; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd: -; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: movzwl %di, %eax -; AVX512BW-NEXT: shll $16, %esi -; AVX512BW-NEXT: orl %esi, %eax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: shll $16, %eax -; AVX512F-32-NEXT: orl %ecx, %eax -; AVX512F-32-NEXT: retl - %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) - ret i32 %res -} - -declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) - -define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { -; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd: -; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: movl %edi, %eax -; AVX512BW-NEXT: shlq $32, %rsi -; AVX512BW-NEXT: orq %rsi, %rax -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512F-32-NEXT: retl - %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) - ret i64 %res -} - declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64) define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) { diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 2fa7c2c5b8a..7b5cc5feff0 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1455,6 +1455,55 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> ret <8 x i64> %res2 } +declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) + +define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { +; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: kmovd %edi, %k0 +; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd: +; AVX512F-32: # %bb.0: +; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckwd %k0, %k1, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: retl + %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) + ret i32 %res +} + +declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) + +define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { +; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: kmovq %rdi, %k0 +; AVX512BW-NEXT: kmovq %rsi, %k1 +; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0 +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd: +; AVX512F-32: # %bb.0: +; AVX512F-32-NEXT: subl $12, %esp +; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0 +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $12, %esp +; AVX512F-32-NEXT: retl + %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) + ret i64 %res +} + declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>) define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) { -- cgit v1.2.3