; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s ; rdar://11314175: SD Scheduler, BuildSchedUnits assert: ; N->getNodeId() == -1 && "Node already inserted! define void @func() nounwind ssp { ; CHECK-LABEL: func: ; CHECK: ## %bb.0: ; CHECK-NEXT: vmovups 0, %xmm0 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3] ; CHECK-NEXT: vbroadcastss 32, %xmm3 ; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; CHECK-NEXT: vmulps %ymm0, %ymm2, %ymm2 ; CHECK-NEXT: vmulps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: vmulps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] ; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: vhaddps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: vsubps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: vhaddps %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: vmovaps %ymm0, (%rax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq ; CHECK-NEXT: ## -- End function %tmp = load <4 x float>, <4 x float>* null, align 1 %tmp14 = getelementptr <4 x float>, <4 x float>* null, i32 2 %tmp15 = load <4 x float>, <4 x float>* %tmp14, align 1 %tmp16 = shufflevector <4 x float> %tmp, <4 x float> , <8 x i32> %tmp17 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %tmp16, <4 x float> undef, i8 1) %tmp18 = bitcast <4 x float> %tmp to <16 x i8> %tmp19 = shufflevector <16 x i8> %tmp18, <16 x i8> undef, <16 x i32> %tmp20 = bitcast <16 x i8> %tmp19 to <4 x float> %tmp21 = bitcast <4 x float> %tmp15 to <16 x i8> %tmp22 = shufflevector <16 x i8> undef, <16 x i8> %tmp21, <16 x i32> %tmp23 = bitcast <16 x i8> %tmp22 to <4 x float> %tmp24 = shufflevector <4 x float> %tmp20, <4 x float> , <8 x i32> %tmp25 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %tmp24, <4 x float> %tmp23, i8 1) %tmp26 = fmul <8 x float> %tmp17, undef %tmp27 = fmul <8 x float> %tmp25, undef %tmp28 = fadd <8 x float> %tmp26, %tmp27 %tmp29 = fadd <8 x float> %tmp28, undef %tmp30 = shufflevector <8 x float> %tmp29, <8 x float> undef, <4 x i32> %tmp31 = fmul <4 x float> undef, %tmp30 %tmp32 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> zeroinitializer, <4 x float> %tmp31, i8 1) %tmp33 = fadd <8 x float> undef, %tmp32 %tmp34 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %tmp33, <8 x float> undef) nounwind %tmp35 = fsub <8 x float> %tmp34, undef %tmp36 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> zeroinitializer, <8 x float> %tmp35) nounwind store <8 x float> %tmp36, <8 x float>* undef, align 32 ret void } declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone