; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=avx512bw,avx512vl,avx512dq | FileCheck %s define void @pr34605(i8* nocapture %s, i32 %p) { ; CHECK-LABEL: pr34605: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm0 ; CHECK-NEXT: vpcmpeqd {{\.LCPI.*}}, %zmm0, %k0 ; CHECK-NEXT: vpcmpeqd {{\.LCPI.*}}, %zmm0, %k1 ; CHECK-NEXT: kunpckwd %k0, %k1, %k0 ; CHECK-NEXT: vpcmpeqd {{\.LCPI.*}}, %zmm0, %k1 ; CHECK-NEXT: vpcmpeqd {{\.LCPI.*}}, %zmm0, %k2 ; CHECK-NEXT: kunpckwd %k1, %k2, %k1 ; CHECK-NEXT: kunpckdq %k0, %k1, %k0 ; CHECK-NEXT: movl $1, %ecx ; CHECK-NEXT: kmovd %ecx, %k1 ; CHECK-NEXT: kmovd %k1, %k1 ; CHECK-NEXT: kandq %k1, %k0, %k1 ; CHECK-NEXT: vmovdqu8 {{\.LCPI.*}}, %zmm0 {%k1} {z} ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vmovdqu32 %zmm0, (%eax) ; CHECK-NEXT: vmovups %zmm1, 64(%eax) ; CHECK-NEXT: vmovups %zmm1, 128(%eax) ; CHECK-NEXT: vmovups %zmm1, 192(%eax) ; CHECK-NEXT: vmovups %zmm1, 256(%eax) ; CHECK-NEXT: vmovups %zmm1, 320(%eax) ; CHECK-NEXT: vmovups %zmm1, 384(%eax) ; CHECK-NEXT: vmovups %zmm1, 448(%eax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retl entry: %broadcast.splatinsert = insertelement <64 x i32> undef, i32 %p, i32 0 %broadcast.splat = shufflevector <64 x i32> %broadcast.splatinsert, <64 x i32> undef, <64 x i32> zeroinitializer %0 = icmp eq <64 x i32> %broadcast.splat, %1 = and <64 x i1> %0, %2 = zext <64 x i1> %1 to <64 x i8> %3 = bitcast i8* %s to <64 x i8>* store <64 x i8> %2, <64 x i8>* %3, align 1 %4 = getelementptr inbounds i8, i8* %s, i32 64 %5 = bitcast i8* %4 to <64 x i8>* store <64 x i8> zeroinitializer, <64 x i8>* %5, align 1 %6 = getelementptr inbounds i8, i8* %s, i32 128 %7 = bitcast i8* %6 to <64 x i8>* store <64 x i8> zeroinitializer, <64 x i8>* %7, align 1 %8 = getelementptr inbounds i8, i8* %s, i32 192 %9 = bitcast i8* %8 to <64 x i8>* store <64 x i8> zeroinitializer, <64 x i8>* %9, align 1 %10 = getelementptr inbounds i8, i8* %s, i32 256 %11 = bitcast i8* %10 to <64 x i8>* store <64 x i8> zeroinitializer, <64 x i8>* %11, align 1 %12 = getelementptr inbounds i8, i8* %s, i32 320 %13 = bitcast i8* %12 to <64 x i8>* store <64 x i8> zeroinitializer, <64 x i8>* %13, align 1 %14 = getelementptr inbounds i8, i8* %s, i32 384 %15 = bitcast i8* %14 to <64 x i8>* store <64 x i8> zeroinitializer, <64 x i8>* %15, align 1 %16 = getelementptr inbounds i8, i8* %s, i32 448 %17 = bitcast i8* %16 to <64 x i8>* store <64 x i8> zeroinitializer, <64 x i8>* %17, align 1 ret void }