summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorHans Wennborg <hans@hanshq.net>2018-02-14 10:51:00 +0000
committerHans Wennborg <hans@hanshq.net>2018-02-14 10:51:00 +0000
commita1b3df34e9efd6b17978356eb26b2c8f7286db4d (patch)
treedd0a621757b601b5fd3e03b0ad61625519ed1468 /test
parent3af8845fb1af47a669ab753face6fbaaee9a44a1 (diff)
Revert r319778 (and r319911) due to PR36357
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@325112 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/X86/avx512-intrinsics-fast-isel.ll53
-rw-r--r--test/CodeGen/X86/avx512-intrinsics-upgrade.ll14
-rw-r--r--test/CodeGen/X86/avx512-intrinsics.ll15
-rw-r--r--test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll371
-rw-r--r--test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll40
-rw-r--r--test/CodeGen/X86/avx512bw-intrinsics.ll49
6 files changed, 194 insertions, 348 deletions
diff --git a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
index 50de773af00..80127f66bdf 100644
--- a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@@ -5,59 +5,6 @@
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c
-define zeroext i16 @test_mm512_kunpackb(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) local_unnamed_addr #0 {
-; X32-LABEL: test_mm512_kunpackb:
-; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: andl $-64, %esp
-; X32-NEXT: subl $64, %esp
-; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
-; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
-; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1
-; X32-NEXT: kunpckbw %k0, %k1, %k1
-; X32-NEXT: vpcmpneqd 72(%ebp), %zmm3, %k0 {%k1}
-; X32-NEXT: kmovw %k0, %eax
-; X32-NEXT: movzwl %ax, %eax
-; X32-NEXT: movl %ebp, %esp
-; X32-NEXT: popl %ebp
-; X32-NEXT: vzeroupper
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm512_kunpackb:
-; X64: # %bb.0: # %entry
-; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
-; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
-; X64-NEXT: kunpckbw %k0, %k1, %k1
-; X64-NEXT: vpcmpneqd %zmm5, %zmm4, %k0 {%k1}
-; X64-NEXT: kmovw %k0, %eax
-; X64-NEXT: movzwl %ax, %eax
-; X64-NEXT: vzeroupper
-; X64-NEXT: retq
-entry:
- %0 = bitcast <8 x i64> %__A to <16 x i32>
- %1 = bitcast <8 x i64> %__B to <16 x i32>
- %2 = icmp ne <16 x i32> %0, %1
- %3 = bitcast <16 x i1> %2 to i16
- %4 = bitcast <8 x i64> %__C to <16 x i32>
- %5 = bitcast <8 x i64> %__D to <16 x i32>
- %6 = icmp ne <16 x i32> %4, %5
- %7 = bitcast <16 x i1> %6 to i16
- %8 = and i16 %7, 255
- %shl.i = shl i16 %3, 8
- %or.i = or i16 %8, %shl.i
- %9 = bitcast <8 x i64> %__E to <16 x i32>
- %10 = bitcast <8 x i64> %__F to <16 x i32>
- %11 = icmp ne <16 x i32> %9, %10
- %12 = bitcast i16 %or.i to <16 x i1>
- %13 = and <16 x i1> %11, %12
- %14 = bitcast <16 x i1> %13 to i16
- ret i16 %14
-}
-
define <16 x float> @test_mm512_shuffle_f32x4(<16 x float> %__A, <16 x float> %__B) {
; X32-LABEL: test_mm512_shuffle_f32x4:
; X32: # %bb.0: # %entry
diff --git a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index f3ca0644e46..378dbda2dc0 100644
--- a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -1,20 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
-declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
-
-define i16 @unpckbw_test(i16 %a0, i16 %a1) {
-; CHECK-LABEL: unpckbw_test:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: shll $8, %esi
-; CHECK-NEXT: orl %esi, %eax
-; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
-; CHECK-NEXT: retq
- %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
- ret i16 %res
-}
-
define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
; CHECK: ## %bb.0:
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 5faa202c30f..628199d4ac9 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -96,6 +96,21 @@ define i16 @test_kor(i16 %a0, i16 %a1) {
ret i16 %t2
}
+declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
+
+define i16 @unpckbw_test(i16 %a0, i16 %a1) {
+; CHECK-LABEL: unpckbw_test:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: kunpckbw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
+; CHECK-NEXT: retq
+ %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
+ ret i16 %res
+}
+
declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
; TODO: the two kxnor instructions here a no op and should be elimintaed,
; probably by FoldConstantArithmetic in SelectionDAG.
diff --git a/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
index 1e754be6fe4..a56111f3453 100644
--- a/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
@@ -4,117 +4,6 @@
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c
-define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
-; X32-LABEL: test_mm512_kunpackd:
-; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: andl $-64, %esp
-; X32-NEXT: subl $64, %esp
-; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
-; X32-NEXT: vmovdqa64 72(%ebp), %zmm4
-; X32-NEXT: vmovdqa64 8(%ebp), %zmm5
-; X32-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
-; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
-; X32-NEXT: vpcmpneqb %zmm5, %zmm2, %k0
-; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X32-NEXT: kunpckdq %k0, %k1, %k1
-; X32-NEXT: vpcmpneqb %zmm3, %zmm4, %k0 {%k1}
-; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: movl %ebp, %esp
-; X32-NEXT: popl %ebp
-; X32-NEXT: vzeroupper
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm512_kunpackd:
-; X64: # %bb.0: # %entry
-; X64-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
-; X64-NEXT: vpcmpneqb %zmm3, %zmm2, %k1
-; X64-NEXT: kunpckdq %k0, %k1, %k1
-; X64-NEXT: vpcmpneqb %zmm5, %zmm4, %k0 {%k1}
-; X64-NEXT: kmovq %k0, %rax
-; X64-NEXT: vzeroupper
-; X64-NEXT: retq
-entry:
- %0 = bitcast <8 x i64> %__B to <64 x i8>
- %1 = bitcast <8 x i64> %__A to <64 x i8>
- %2 = icmp ne <64 x i8> %0, %1
- %3 = bitcast <64 x i1> %2 to i64
- %4 = bitcast <8 x i64> %__C to <64 x i8>
- %5 = bitcast <8 x i64> %__D to <64 x i8>
- %6 = icmp ne <64 x i8> %4, %5
- %7 = bitcast <64 x i1> %6 to i64
- %and.i = and i64 %7, 4294967295
- %shl.i = shl i64 %3, 32
- %or.i = or i64 %and.i, %shl.i
- %8 = bitcast <8 x i64> %__E to <64 x i8>
- %9 = bitcast <8 x i64> %__F to <64 x i8>
- %10 = icmp ne <64 x i8> %8, %9
- %11 = bitcast i64 %or.i to <64 x i1>
- %12 = and <64 x i1> %10, %11
- %13 = bitcast <64 x i1> %12 to i64
- ret i64 %13
-}
-
-define i32 @test_mm512_kunpackw(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
-; X32-LABEL: test_mm512_kunpackw:
-; X32: # %bb.0: # %entry
-; X32-NEXT: pushl %ebp
-; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: .cfi_offset %ebp, -8
-; X32-NEXT: movl %esp, %ebp
-; X32-NEXT: .cfi_def_cfa_register %ebp
-; X32-NEXT: andl $-64, %esp
-; X32-NEXT: subl $64, %esp
-; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
-; X32-NEXT: vpcmpneqw %zmm0, %zmm1, %k0
-; X32-NEXT: vpcmpneqw 8(%ebp), %zmm2, %k1
-; X32-NEXT: kunpckwd %k0, %k1, %k1
-; X32-NEXT: vpcmpneqw 72(%ebp), %zmm3, %k0 {%k1}
-; X32-NEXT: kmovd %k0, %eax
-; X32-NEXT: movl %ebp, %esp
-; X32-NEXT: popl %ebp
-; X32-NEXT: vzeroupper
-; X32-NEXT: retl
-;
-; X64-LABEL: test_mm512_kunpackw:
-; X64: # %bb.0: # %entry
-; X64-NEXT: vpcmpneqw %zmm0, %zmm1, %k0
-; X64-NEXT: vpcmpneqw %zmm3, %zmm2, %k1
-; X64-NEXT: kunpckwd %k0, %k1, %k1
-; X64-NEXT: vpcmpneqw %zmm5, %zmm4, %k0 {%k1}
-; X64-NEXT: kmovd %k0, %eax
-; X64-NEXT: vzeroupper
-; X64-NEXT: retq
-entry:
- %0 = bitcast <8 x i64> %__B to <32 x i16>
- %1 = bitcast <8 x i64> %__A to <32 x i16>
- %2 = icmp ne <32 x i16> %0, %1
- %3 = bitcast <32 x i1> %2 to i32
- %4 = bitcast <8 x i64> %__C to <32 x i16>
- %5 = bitcast <8 x i64> %__D to <32 x i16>
- %6 = icmp ne <32 x i16> %4, %5
- %7 = bitcast <32 x i1> %6 to i32
- %and.i = and i32 %7, 65535
- %shl.i = shl i32 %3, 16
- %or.i = or i32 %and.i, %shl.i
- %8 = bitcast <8 x i64> %__E to <32 x i16>
- %9 = bitcast <8 x i64> %__F to <32 x i16>
- %10 = icmp ne <32 x i16> %8, %9
- %11 = bitcast i32 %or.i to <32 x i1>
- %12 = and <32 x i1> %10, %11
- %13 = bitcast <32 x i1> %12 to i32
- ret i32 %13
-}
-
-
define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext %__A) {
; X32-LABEL: test_mm512_mask_set1_epi8:
; X32: # %bb.0: # %entry
@@ -189,46 +78,19 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
; X32-NEXT: movb %ch, %al
; X32-NEXT: kmovd %eax, %k2
; X32-NEXT: kxorq %k2, %k1, %k1
-; X32-NEXT: kshiftlq $63, %k1, %k1
-; X32-NEXT: kshiftrq $55, %k1, %k1
-; X32-NEXT: kxorq %k0, %k1, %k0
-; X32-NEXT: kshiftrq $9, %k0, %k1
; X32-NEXT: andb $2, %al
; X32-NEXT: shrb %al
; X32-NEXT: kmovd %eax, %k2
-; X32-NEXT: kxorq %k2, %k1, %k1
-; X32-NEXT: kshiftlq $63, %k1, %k1
-; X32-NEXT: kshiftrq $54, %k1, %k1
-; X32-NEXT: kxorq %k0, %k1, %k0
-; X32-NEXT: kshiftrq $10, %k0, %k1
; X32-NEXT: movb %ch, %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movl %eax, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k2
-; X32-NEXT: kxorq %k2, %k1, %k1
-; X32-NEXT: kshiftlq $63, %k1, %k1
-; X32-NEXT: kshiftrq $53, %k1, %k1
-; X32-NEXT: kxorq %k0, %k1, %k0
-; X32-NEXT: kshiftrq $11, %k0, %k1
+; X32-NEXT: kmovd %edx, %k3
; X32-NEXT: shrb $3, %al
-; X32-NEXT: kmovd %eax, %k2
-; X32-NEXT: kxorq %k2, %k1, %k1
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: shrl $12, %eax
-; X32-NEXT: andl $15, %eax
-; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kmovd %eax, %k4
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: shrl $13, %eax
; X32-NEXT: andb $1, %al
-; X32-NEXT: kmovd %eax, %k3
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: shrl $14, %eax
-; X32-NEXT: andl $3, %eax
-; X32-NEXT: kmovd %eax, %k4
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: shrl $15, %eax
-; X32-NEXT: andl $1, %eax
; X32-NEXT: kmovd %eax, %k5
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrl $16, %edx
@@ -243,25 +105,52 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
; X32-NEXT: kmovd %eax, %k7
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $55, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $9, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $54, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $10, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $53, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $11, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $52, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $12, %k0, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $12, %esi
+; X32-NEXT: andl $15, %esi
+; X32-NEXT: kmovd %esi, %k2
; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $51, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $13, %k0, %k1
-; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $50, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $14, %k0, %k1
-; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $14, %esi
+; X32-NEXT: andl $3, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $49, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $15, %k0, %k1
-; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $15, %esi
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $48, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
@@ -494,22 +383,14 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $43, %k0, %k1
; X32-NEXT: kxorq %k4, %k1, %k1
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: shrl $12, %esi
-; X32-NEXT: andl $15, %esi
-; X32-NEXT: kmovd %esi, %k2
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: shrl $14, %esi
-; X32-NEXT: andl $3, %esi
-; X32-NEXT: kmovd %esi, %k3
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: shrl $15, %esi
-; X32-NEXT: andl $1, %esi
-; X32-NEXT: kmovd %esi, %k4
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $20, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $44, %k0, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $12, %esi
+; X32-NEXT: andl $15, %esi
+; X32-NEXT: kmovd %esi, %k2
; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $19, %k1, %k1
@@ -520,12 +401,20 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
; X32-NEXT: kshiftrq $18, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $46, %k0, %k1
-; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $14, %esi
+; X32-NEXT: andl $3, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $17, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $47, %k0, %k1
-; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $15, %esi
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $16, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
@@ -551,8 +440,8 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $12, %k1, %k1
-; X32-NEXT: kxorq %k0, %k1, %k4
-; X32-NEXT: kshiftrq $52, %k4, %k0
+; X32-NEXT: kxorq %k0, %k1, %k3
+; X32-NEXT: kshiftrq $52, %k3, %k0
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $4, %dl
; X32-NEXT: kmovd %edx, %k1
@@ -576,19 +465,19 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k3
+; X32-NEXT: kmovd %edx, %k4
; X32-NEXT: kshiftlq $63, %k5, %k5
; X32-NEXT: kshiftrq $11, %k5, %k5
-; X32-NEXT: kxorq %k4, %k5, %k4
-; X32-NEXT: kshiftrq $53, %k4, %k5
+; X32-NEXT: kxorq %k3, %k5, %k3
+; X32-NEXT: kshiftrq $53, %k3, %k5
; X32-NEXT: kxorq %k6, %k5, %k5
; X32-NEXT: kshiftlq $63, %k5, %k5
; X32-NEXT: kshiftrq $10, %k5, %k5
-; X32-NEXT: kxorq %k4, %k5, %k5
-; X32-NEXT: kshiftrq $54, %k5, %k4
-; X32-NEXT: kxorq %k7, %k4, %k6
+; X32-NEXT: kxorq %k3, %k5, %k5
+; X32-NEXT: kshiftrq $54, %k5, %k3
+; X32-NEXT: kxorq %k7, %k3, %k6
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k4
+; X32-NEXT: kmovd %ecx, %k3
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $29, %ecx
; X32-NEXT: andb $1, %cl
@@ -603,12 +492,6 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
; X32-NEXT: kxorq %k5, %k0, %k0
; X32-NEXT: kshiftrq $56, %k0, %k5
; X32-NEXT: kxorq %k1, %k5, %k1
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $28, %ecx
-; X32-NEXT: kmovd %ecx, %k5
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $30, %ecx
-; X32-NEXT: kmovd %ecx, %k6
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $7, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
@@ -618,17 +501,20 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
; X32-NEXT: kshiftrq $6, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $58, %k0, %k1
-; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $5, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $59, %k0, %k1
-; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $4, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $60, %k0, %k1
-; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $28, %ecx
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $3, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
@@ -638,7 +524,10 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
; X32-NEXT: kshiftrq $2, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $62, %k0, %k1
-; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $30, %ecx
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: shrl $31, %eax
; X32-NEXT: kmovd %eax, %k2
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
@@ -743,46 +632,19 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
; X32-NEXT: movb %ch, %al
; X32-NEXT: kmovd %eax, %k2
; X32-NEXT: kxorq %k2, %k1, %k1
-; X32-NEXT: kshiftlq $63, %k1, %k1
-; X32-NEXT: kshiftrq $55, %k1, %k1
-; X32-NEXT: kxorq %k0, %k1, %k0
-; X32-NEXT: kshiftrq $9, %k0, %k1
; X32-NEXT: andb $2, %al
; X32-NEXT: shrb %al
; X32-NEXT: kmovd %eax, %k2
-; X32-NEXT: kxorq %k2, %k1, %k1
-; X32-NEXT: kshiftlq $63, %k1, %k1
-; X32-NEXT: kshiftrq $54, %k1, %k1
-; X32-NEXT: kxorq %k0, %k1, %k0
-; X32-NEXT: kshiftrq $10, %k0, %k1
; X32-NEXT: movb %ch, %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movl %eax, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k2
-; X32-NEXT: kxorq %k2, %k1, %k1
-; X32-NEXT: kshiftlq $63, %k1, %k1
-; X32-NEXT: kshiftrq $53, %k1, %k1
-; X32-NEXT: kxorq %k0, %k1, %k0
-; X32-NEXT: kshiftrq $11, %k0, %k1
+; X32-NEXT: kmovd %edx, %k3
; X32-NEXT: shrb $3, %al
-; X32-NEXT: kmovd %eax, %k2
-; X32-NEXT: kxorq %k2, %k1, %k1
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: shrl $12, %eax
-; X32-NEXT: andl $15, %eax
-; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kmovd %eax, %k4
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: shrl $13, %eax
; X32-NEXT: andb $1, %al
-; X32-NEXT: kmovd %eax, %k3
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: shrl $14, %eax
-; X32-NEXT: andl $3, %eax
-; X32-NEXT: kmovd %eax, %k4
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: shrl $15, %eax
-; X32-NEXT: andl $1, %eax
; X32-NEXT: kmovd %eax, %k5
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrl $16, %edx
@@ -797,25 +659,52 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
; X32-NEXT: kmovd %eax, %k7
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $55, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $9, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $54, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $10, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $53, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $11, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $52, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $12, %k0, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $12, %esi
+; X32-NEXT: andl $15, %esi
+; X32-NEXT: kmovd %esi, %k2
; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $51, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $13, %k0, %k1
-; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $50, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $14, %k0, %k1
-; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $14, %esi
+; X32-NEXT: andl $3, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $49, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $15, %k0, %k1
-; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $15, %esi
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $48, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
@@ -1048,22 +937,14 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $43, %k0, %k1
; X32-NEXT: kxorq %k4, %k1, %k1
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: shrl $12, %esi
-; X32-NEXT: andl $15, %esi
-; X32-NEXT: kmovd %esi, %k2
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: shrl $14, %esi
-; X32-NEXT: andl $3, %esi
-; X32-NEXT: kmovd %esi, %k3
-; X32-NEXT: movl %eax, %esi
-; X32-NEXT: shrl $15, %esi
-; X32-NEXT: andl $1, %esi
-; X32-NEXT: kmovd %esi, %k4
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $20, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $44, %k0, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $12, %esi
+; X32-NEXT: andl $15, %esi
+; X32-NEXT: kmovd %esi, %k2
; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $19, %k1, %k1
@@ -1074,12 +955,20 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
; X32-NEXT: kshiftrq $18, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $46, %k0, %k1
-; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $14, %esi
+; X32-NEXT: andl $3, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $17, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $47, %k0, %k1
-; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $15, %esi
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $16, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
@@ -1105,8 +994,8 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $12, %k1, %k1
-; X32-NEXT: kxorq %k0, %k1, %k4
-; X32-NEXT: kshiftrq $52, %k4, %k0
+; X32-NEXT: kxorq %k0, %k1, %k3
+; X32-NEXT: kshiftrq $52, %k3, %k0
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $4, %dl
; X32-NEXT: kmovd %edx, %k1
@@ -1130,19 +1019,19 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k3
+; X32-NEXT: kmovd %edx, %k4
; X32-NEXT: kshiftlq $63, %k5, %k5
; X32-NEXT: kshiftrq $11, %k5, %k5
-; X32-NEXT: kxorq %k4, %k5, %k4
-; X32-NEXT: kshiftrq $53, %k4, %k5
+; X32-NEXT: kxorq %k3, %k5, %k3
+; X32-NEXT: kshiftrq $53, %k3, %k5
; X32-NEXT: kxorq %k6, %k5, %k5
; X32-NEXT: kshiftlq $63, %k5, %k5
; X32-NEXT: kshiftrq $10, %k5, %k5
-; X32-NEXT: kxorq %k4, %k5, %k5
-; X32-NEXT: kshiftrq $54, %k5, %k4
-; X32-NEXT: kxorq %k7, %k4, %k6
+; X32-NEXT: kxorq %k3, %k5, %k5
+; X32-NEXT: kshiftrq $54, %k5, %k3
+; X32-NEXT: kxorq %k7, %k3, %k6
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k4
+; X32-NEXT: kmovd %ecx, %k3
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $29, %ecx
; X32-NEXT: andb $1, %cl
@@ -1157,12 +1046,6 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
; X32-NEXT: kxorq %k5, %k0, %k0
; X32-NEXT: kshiftrq $56, %k0, %k5
; X32-NEXT: kxorq %k1, %k5, %k1
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $28, %ecx
-; X32-NEXT: kmovd %ecx, %k5
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $30, %ecx
-; X32-NEXT: kmovd %ecx, %k6
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $7, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
@@ -1172,17 +1055,20 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
; X32-NEXT: kshiftrq $6, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $58, %k0, %k1
-; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $5, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $59, %k0, %k1
-; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $4, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $60, %k0, %k1
-; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $28, %ecx
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $3, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
@@ -1192,7 +1078,10 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
; X32-NEXT: kshiftrq $2, %k1, %k1
; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftrq $62, %k0, %k1
-; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $30, %ecx
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: shrl $31, %eax
; X32-NEXT: kmovd %eax, %k2
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
diff --git a/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
index f19e09758f1..13aca464b9e 100644
--- a/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
+++ b/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
@@ -2,46 +2,6 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
-declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
-
-define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
-; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: movzwl %di, %eax
-; AVX512BW-NEXT: shll $16, %esi
-; AVX512BW-NEXT: orl %esi, %eax
-; AVX512BW-NEXT: retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
-; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: shll $16, %eax
-; AVX512F-32-NEXT: orl %ecx, %eax
-; AVX512F-32-NEXT: retl
- %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
- ret i32 %res
-}
-
-declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
-
-define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
-; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: movl %edi, %eax
-; AVX512BW-NEXT: shlq $32, %rsi
-; AVX512BW-NEXT: orq %rsi, %rax
-; AVX512BW-NEXT: retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
-; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: retl
- %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
- ret i64 %res
-}
-
declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll
index 2fa7c2c5b8a..7b5cc5feff0 100644
--- a/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -1455,6 +1455,55 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8>
ret <8 x i64> %res2
}
+declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
+
+define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
+; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: kmovd %edi, %k0
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckwd %k0, %k1, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: retl
+ %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
+ ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
+
+define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
+; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
+; AVX512BW: ## %bb.0:
+; AVX512BW-NEXT: kmovq %rdi, %k0
+; AVX512BW-NEXT: kmovq %rsi, %k1
+; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: subl $12, %esp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $12, %esp
+; AVX512F-32-NEXT: retl
+ %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
+ ret i64 %res
+}
+
declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {