diff options
Diffstat (limited to 'test/CodeGen')
23 files changed, 448 insertions, 126 deletions
diff --git a/test/CodeGen/AArch64/branch-relax-cbz.ll b/test/CodeGen/AArch64/branch-relax-cbz.ll index c654b94e49c..d13c0f677bc 100644 --- a/test/CodeGen/AArch64/branch-relax-cbz.ll +++ b/test/CodeGen/AArch64/branch-relax-cbz.ll @@ -6,23 +6,22 @@ ; CHECK-NEXT: ; BB#1: ; %b3 ; CHECK: ldr [[LOAD:w[0-9]+]] -; CHECK: cbz [[LOAD]], [[SKIP_LONG_B:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: b [[B8:LBB[0-9]+_[0-9]+]] - -; CHECK-NEXT: [[SKIP_LONG_B]]: +; CHECK: cbnz [[LOAD]], [[B8:LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: b [[B7:LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: [[B8]]: ; %b8 +; CHECK-NEXT: ret + ; CHECK-NEXT: [[B2]]: ; %b2 ; CHECK: mov w{{[0-9]+}}, #93 ; CHECK: bl _extfunc ; CHECK: cbz w{{[0-9]+}}, [[B7]] - -; CHECK-NEXT: [[B8]]: ; %b8 -; CHECK-NEXT: ret +; CHECK-NEXT: b [[B8]] ; CHECK-NEXT: [[B7]]: ; %b7 ; CHECK: mov w{{[0-9]+}}, #13 ; CHECK: b _extfunc + define void @split_block_no_fallthrough(i64 %val) #0 { bb: %c0 = icmp sgt i64 %val, -5 diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll index 86be3ccea1d..8c85fc3c68a 100644 --- a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -264,7 +264,7 @@ declare void @do_something() #1 define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 { ; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ ; CHECK: cmn -; CHECK: b.gt +; CHECK: b.le ; CHECK: cmp ; CHECK: b.gt entry: diff --git a/test/CodeGen/AArch64/optimize-cond-branch.ll b/test/CodeGen/AArch64/optimize-cond-branch.ll index 4e3ca6f16e7..ab4ad5e2ce9 100644 --- a/test/CodeGen/AArch64/optimize-cond-branch.ll +++ b/test/CodeGen/AArch64/optimize-cond-branch.ll @@ -11,7 +11,7 @@ target triple = "arm64--" ; ; CHECK-LABEL: func ; CHECK-NOT: and -; CHECK: tbnz +; CHECK: tbz define void @func() { %c0 = icmp sgt i64 0, 0 br i1 %c0, label %b1, label %b6 diff --git a/test/CodeGen/AMDGPU/basic-branch.ll b/test/CodeGen/AMDGPU/basic-branch.ll index 104dd45e8a1..d4538ee2d25 100644 --- a/test/CodeGen/AMDGPU/basic-branch.ll +++ b/test/CodeGen/AMDGPU/basic-branch.ll @@ -8,13 +8,10 @@ ; GCNNOOPT: v_writelane_b32 ; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]] - -; GCN: ; BB#1 ; GCNNOOPT: v_readlane_b32 ; GCNNOOPT: v_readlane_b32 ; GCN: buffer_store_dword -; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; TODO: This waitcnt can be eliminated +; GCNNOOPT: s_endpgm ; GCN: {{^}}[[END]]: ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll index 3fd40521801..18bbc3e5f07 100644 --- a/test/CodeGen/AMDGPU/branch-relaxation.ll +++ b/test/CodeGen/AMDGPU/branch-relaxation.ll @@ -491,7 +491,8 @@ ret: ; GCN-LABEL: {{^}}long_branch_hang: ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6 -; GCN-NEXT: s_cbranch_scc0 [[LONG_BR_0:BB[0-9]+_[0-9]+]] +; GCN-NEXT: s_cbranch_scc1 {{BB[0-9]+_[0-9]+}} +; GCN-NEXT: s_branch [[LONG_BR_0:BB[0-9]+_[0-9]+]] ; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( diff --git a/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/test/CodeGen/AMDGPU/cf-loop-on-constant.ll index 0d919bbf85e..03d8cd9eeea 100644 --- a/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s ; GCN-LABEL: {{^}}test_loop: -; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: +; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: ; %for.body{{$}} ; GCN: ds_read_b32 ; GCN: ds_write_b32 ; GCN: s_branch [[LABEL]] diff --git a/test/CodeGen/AMDGPU/convergent-inlineasm.ll b/test/CodeGen/AMDGPU/convergent-inlineasm.ll index 755f439c686..c3d155ce747 100644 --- a/test/CodeGen/AMDGPU/convergent-inlineasm.ll +++ b/test/CodeGen/AMDGPU/convergent-inlineasm.ll @@ -29,6 +29,7 @@ bb5: ; preds = %bb3, %bb ; GCN: v_cmp_ne_u32_e64 ; GCN: BB{{[0-9]+_[0-9]+}}: + define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) { bb: %tmp = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index 37083fbbd3c..9c153841a63 100644 --- a/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -439,7 +439,7 @@ entry: ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; GCN-NOHSA: buffer_store_dword [[ONE]] ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]] -; GCN; {{^}}[[EXIT]]: +; GCN: {{^}}[[EXIT]]: ; GCN: s_endpgm define void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) { bb3: ; preds = %bb2 diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll index 52cc37e2408..b8f2980be75 100644 --- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll +++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll @@ -12,11 +12,11 @@ ; CHECK: bl _quux ; CHECK-NOT: bl _quux -; NOMERGE: bl _baz -; NOMERGE: bl _baz +; NOMERGE-DAG: bl _baz +; NOMERGE-DAG: bl _baz -; NOMERGE: bl _quux -; NOMERGE: bl _quux +; NOMERGE-DAG: bl _quux +; NOMERGE-DAG: bl _quux ; ModuleID = 'tail.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll index 364bd5d1369..8a82af6a69b 100644 --- a/test/CodeGen/ARM/atomic-cmpxchg.ll +++ b/test/CodeGen/ARM/atomic-cmpxchg.ll @@ -66,14 +66,14 @@ entry: ; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]: ; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0 -; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1 +; CHECK-ARMV7-NEXT: moveq r0, #1 ; CHECK-ARMV7-NEXT: bxeq lr ; CHECK-ARMV7-NEXT: [[TRY]]: -; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] -; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]] +; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0] +; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1 ; CHECK-ARMV7-NEXT: beq [[HEAD]] ; CHECK-ARMV7-NEXT: clrex -; CHECK-ARMV7-NEXT: mov [[RES]], #0 +; CHECK-ARMV7-NEXT: mov r0, #0 ; CHECK-ARMV7-NEXT: bx lr ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll index 442459bc058..eb32ee54c09 100644 --- a/test/CodeGen/ARM/fold-stack-adjust.ll +++ b/test/CodeGen/ARM/fold-stack-adjust.ll @@ -135,7 +135,7 @@ define void @test_fold_point(i1 %tst) minsize { ; Important to check for beginning of basic block, because if it gets ; if-converted the test is probably no longer checking what it should. -; CHECK: {{LBB[0-9]+_2}}: +; CHECK: %end ; CHECK-NEXT: vpop {d7, d8} ; CHECK-NEXT: pop {r4, pc} diff --git a/test/CodeGen/PowerPC/tail-dup-break-cfg.ll b/test/CodeGen/PowerPC/tail-dup-break-cfg.ll index 4be737e89a9..f19b11f2ae4 100644 --- a/test/CodeGen/PowerPC/tail-dup-break-cfg.ll +++ b/test/CodeGen/PowerPC/tail-dup-break-cfg.ll @@ -16,14 +16,14 @@ target triple = "powerpc64le-grtev4-linux-gnu" ;CHECK-NEXT: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]] ;CHECK-NEXT: # %test2 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: beq 0, [[EXITLABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: b [[BODY2LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: bne 0, [[BODY2LABEL:[._0-9A-Za-z]+]] +;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit +;CHECK: blr ;CHECK-NEXT: [[BODY1LABEL]] ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 ;CHECK-NEXT: beq 0, [[EXITLABEL]] -;CHECK-NEXT: [[BODY2LABEL]] -;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit -;CHECK: blr +;CHECK-NEXT: [[BODY2LABEL:[._0-9A-Za-z]+]]: +;CHECK: b [[EXITLABEL]] define void @tail_dup_break_cfg(i32 %tag) { entry: br label %test1 @@ -79,7 +79,7 @@ body1: test2: %tagbit2 = and i32 %tag, 2 %tagbit2eq0 = icmp ne i32 %tagbit2, 0 - br i1 %tagbit2eq0, label %body2, label %exit, !prof !1 ; %body2 more likely + br i1 %tagbit2eq0, label %body2, label %exit, !prof !3 ; %body2 more likely body2: call void @b() call void @b() @@ -137,4 +137,4 @@ ret: !1 = !{!"branch_weights", i32 5, i32 3} !2 = !{!"branch_weights", i32 95, i32 5} -!3 = !{!"branch_weights", i32 7, i32 3} +!3 = !{!"branch_weights", i32 8, i32 3} diff --git a/test/CodeGen/PowerPC/tail-dup-layout.ll b/test/CodeGen/PowerPC/tail-dup-layout.ll index 6790aa8e944..95c23ade513 100644 --- a/test/CodeGen/PowerPC/tail-dup-layout.ll +++ b/test/CodeGen/PowerPC/tail-dup-layout.ll @@ -1,59 +1,59 @@ -; RUN: llc -outline-optional-branches -O2 < %s | FileCheck %s +; RUN: llc -O2 < %s | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-grtev4-linux-gnu" ; Intended layout: -; The outlining flag produces the layout +; The chain-based outlining produces the layout ; test1 ; test2 ; test3 ; test4 -; exit ; optional1 ; optional2 ; optional3 ; optional4 +; exit ; Tail duplication puts test n+1 at the end of optional n ; so optional1 includes a copy of test2 at the end, and branches ; to test3 (at the top) or falls through to optional 2. -; The CHECK statements check for the whole string of tests and exit block, +; The CHECK statements check for the whole string of tests ; and then check that the correct test has been duplicated into the end of ; the optional blocks and that the optional blocks are in the correct order. -;CHECK-LABEL: f: +;CHECK-LABEL: straight_test: ; test1 may have been merged with entry ;CHECK: mr [[TAGREG:[0-9]+]], 3 ;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2 +;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] +;CHECK-NEXT: # %test2 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 +;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 -;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4 +;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 -;CHECK-NEXT: bne 0, .[[OPT4LABEL:[._0-9A-Za-z]+]] -;CHECK-NEXT: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit +;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]] +;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit ;CHECK: blr -;CHECK-NEXT: [[OPT1LABEL]] +;CHECK-NEXT: .[[OPT1LABEL]]: ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 -;CHECK-NEXT: beq 0, [[TEST3LABEL]] -;CHECK-NEXT: [[OPT2LABEL]] +;CHECK-NEXT: beq 0, .[[TEST3LABEL]] +;CHECK-NEXT: .[[OPT2LABEL]]: ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 -;CHECK-NEXT: beq 0, [[TEST4LABEL]] -;CHECK-NEXT: [[OPT3LABEL]] +;CHECK-NEXT: beq 0, .[[TEST4LABEL]] +;CHECK-NEXT: .[[OPT3LABEL]]: ;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 -;CHECK-NEXT: beq 0, [[EXITLABEL]] -;CHECK-NEXT: [[OPT4LABEL]] -;CHECK: b [[EXITLABEL]] +;CHECK-NEXT: beq 0, .[[EXITLABEL]] +;CHECK-NEXT: .[[OPT4LABEL]]: +;CHECK: b .[[EXITLABEL]] -define void @f(i32 %tag) { +define void @straight_test(i32 %tag) { entry: br label %test1 test1: %tagbit1 = and i32 %tag, 1 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 - br i1 %tagbit1eq0, label %test2, label %optional1 + br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 optional1: call void @a() call void @a() @@ -63,7 +63,7 @@ optional1: test2: %tagbit2 = and i32 %tag, 2 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 - br i1 %tagbit2eq0, label %test3, label %optional2 + br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 optional2: call void @b() call void @b() @@ -73,7 +73,7 @@ optional2: test3: %tagbit3 = and i32 %tag, 4 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 - br i1 %tagbit3eq0, label %test4, label %optional3 + br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 optional3: call void @c() call void @c() @@ -83,7 +83,7 @@ optional3: test4: %tagbit4 = and i32 %tag, 8 %tagbit4eq0 = icmp eq i32 %tagbit4, 0 - br i1 %tagbit4eq0, label %exit, label %optional4 + br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1 optional4: call void @d() call void @d() @@ -94,7 +94,333 @@ exit: ret void } +; Intended layout: +; The chain-based outlining produces the layout +; entry +; --- Begin loop --- +; for.latch +; for.check +; test1 +; test2 +; test3 +; test4 +; optional1 +; optional2 +; optional3 +; optional4 +; --- End loop --- +; exit +; The CHECK statements check for the whole string of tests and exit block, +; and then check that the correct test has been duplicated into the end of +; the optional blocks and that the optional blocks are in the correct order. +;CHECK-LABEL: loop_test: +;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4 +;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch +;CHECK: addi +;CHECK: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check +;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]]) +;CHECK: # %test1 +;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 +;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: # %test2 +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]] +;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}} +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 +;CHECK-NEXT: beq 0, .[[LATCHLABEL]] +;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]] +;CHECK: [[OPT1LABEL]] +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 +;CHECK-NEXT: beq 0, .[[TEST3LABEL]] +;CHECK-NEXT: .[[OPT2LABEL]] +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29 +;CHECK-NEXT: beq 0, .[[TEST4LABEL]] +;CHECK-NEXT: .[[OPT3LABEL]] +;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28 +;CHECK-NEXT: beq 0, .[[LATCHLABEL]] +;CHECK: [[OPT4LABEL]]: +;CHECK: b .[[LATCHLABEL]] +define void @loop_test(i32* %tags, i32 %count) { +entry: + br label %for.check +for.check: + %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch] + %done.count = icmp ugt i32 %count.loop, 0 + %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count + %tag = load i32, i32* %tag_ptr + %done.tag = icmp eq i32 %tag, 0 + %done = and i1 %done.count, %done.tag + br i1 %done, label %test1, label %exit, !prof !1 +test1: + %tagbit1 = and i32 %tag, 1 + %tagbit1eq0 = icmp eq i32 %tagbit1, 0 + br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 +optional1: + call void @a() + call void @a() + call void @a() + call void @a() + br label %test2 +test2: + %tagbit2 = and i32 %tag, 2 + %tagbit2eq0 = icmp eq i32 %tagbit2, 0 + br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 +optional2: + call void @b() + call void @b() + call void @b() + call void @b() + br label %test3 +test3: + %tagbit3 = and i32 %tag, 4 + %tagbit3eq0 = icmp eq i32 %tagbit3, 0 + br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 +optional3: + call void @c() + call void @c() + call void @c() + call void @c() + br label %test4 +test4: + %tagbit4 = and i32 %tag, 8 + %tagbit4eq0 = icmp eq i32 %tagbit4, 0 + br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1 +optional4: + call void @d() + call void @d() + call void @d() + call void @d() + br label %for.latch +for.latch: + %count.sub = sub i32 %count.loop, 1 + br label %for.check +exit: + ret void +} + +; The block then2 is not unavoidable, meaning it does not dominate the exit. +; But since it can be tail-duplicated, it should be placed as a fallthrough from +; test2 and copied. The purpose here is to make sure that the tail-duplication +; code is independent of the outlining code, which works by choosing the +; "unavoidable" blocks. +; CHECK-LABEL: avoidable_test: +; CHECK: # %entry +; CHECK: andi. +; CHECK: # %test2 +; Make sure then2 falls through from test2 +; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} +; CHECK: # %then2 +; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29 +; CHECK: # %else1 +; CHECK: bl a +; CHECK: bl a +; Make sure then2 was copied into else1 +; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29 +; CHECK: # %end1 +; CHECK: bl d +; CHECK: # %else2 +; CHECK: bl c +; CHECK: # %end2 +define void @avoidable_test(i32 %tag) { +entry: + br label %test1 +test1: + %tagbit1 = and i32 %tag, 1 + %tagbit1eq0 = icmp eq i32 %tagbit1, 0 + br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely +else1: + call void @a() + call void @a() + br label %then2 +test2: + %tagbit2 = and i32 %tag, 2 + %tagbit2eq0 = icmp eq i32 %tagbit2, 0 + br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely +then2: + %tagbit3 = and i32 %tag, 4 + %tagbit3eq0 = icmp eq i32 %tagbit3, 0 + br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely +else2: + call void @c() + br label %end2 +end2: + ret void +end1: + call void @d() + ret void +} + +; CHECK-LABEL: trellis_test +; The number in the block labels is the expected block frequency given the +; probabilities annotated. There is a conflict in the b;c->d;e trellis that +; should be resolved as c->e;b->d. +; The d;e->f;g trellis should be resolved as e->g;d->f. +; The f;g->h;i trellis should be resolved as f->i;g->h. +; The h;i->j;ret trellis contains a triangle edge, and should be resolved as +; h->j->ret +; CHECK: # %entry +; CHECK: # %c10 +; CHECK: # %e9 +; CHECK: # %g10 +; CHECK: # %h10 +; CHECK: # %j8 +; CHECK: # %ret +; CHECK: # %b6 +; CHECK: # %d7 +; CHECK: # %f6 +; CHECK: # %i6 +define void @trellis_test(i32 %tag) { +entry: + br label %a16 +a16: + call void @a() + call void @a() + %tagbits.a = and i32 %tag, 3 + %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 + br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6 +c10: + call void @c() + call void @c() + %tagbits.c = and i32 %tag, 12 + %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 + ; Both of these edges should be hotter than the other incoming edge + ; for e9 or d7 + br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4 +e9: + call void @e() + call void @e() + %tagbits.e = and i32 %tag, 48 + %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0 + br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2 +g10: + call void @g() + call void @g() + %tagbits.g = and i32 %tag, 192 + %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0 + br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8 +i6: + call void @i() + call void @i() + %tagbits.i = and i32 %tag, 768 + %tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0 + br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3) +b6: + call void @b() + call void @b() + %tagbits.b = and i32 %tag, 12 + %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 + br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3) +d7: + call void @d() + call void @d() + %tagbits.d = and i32 %tag, 48 + %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 + br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4 +f6: + call void @f() + call void @f() + %tagbits.f = and i32 %tag, 192 + %tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128 + br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2 +h10: + call void @h() + call void @h() + %tagbits.h = and i32 %tag, 768 + %tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512 + br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5) +j8: + call void @j() + call void @j() + br label %ret +ret: + ret void +} + +; Verify that we still consider tail-duplication opportunities if we find a +; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors +; of both F and G. The basic trellis algorithm picks the F->G edge, but after +; checking, it's profitable to duplicate G into F. The weights here are not +; really important. They are there to help make the test stable. +; CHECK-LABEL: trellis_then_dup_test +; CHECK: # %entry +; CHECK: # %b +; CHECK: # %d +; CHECK: # %g +; CHECK: # %ret1 +; CHECK: # %c +; CHECK: # %e +; CHECK: # %f +; CHECK: # %ret2 +; CHECK: # %ret +define void @trellis_then_dup_test(i32 %tag) { +entry: + br label %a +a: + call void @a() + call void @a() + %tagbits.a = and i32 %tag, 3 + %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 + br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3 +b: + call void @b() + call void @b() + %tagbits.b = and i32 %tag, 12 + %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 + br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3 +d: + call void @d() + call void @d() + %tagbits.d = and i32 %tag, 48 + %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 + br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3 +f: + call void @f() + call void @f() + br label %g +g: + %tagbits.g = and i32 %tag, 192 + %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0 + br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced +c: + call void @c() + call void @c() + %tagbits.c = and i32 %tag, 12 + %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 + br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3 +e: + call void @e() + call void @e() + %tagbits.e = and i32 %tag, 48 + %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0 + br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3 +ret1: + call void @a() + br label %ret +ret2: + call void @b() + br label %ret +ret: + ret void +} + declare void @a() declare void @b() declare void @c() declare void @d() +declare void @e() +declare void @f() +declare void @g() +declare void @h() +declare void @i() +declare void @j() + +!1 = !{!"branch_weights", i32 5, i32 3} +!2 = !{!"branch_weights", i32 50, i32 50} +!3 = !{!"branch_weights", i32 6, i32 4} +!4 = !{!"branch_weights", i32 7, i32 2} +!5 = !{!"branch_weights", i32 2, i32 8} +!6 = !{!"branch_weights", i32 3, i32 4} +!7 = !{!"branch_weights", i32 4, i32 2} diff --git a/test/CodeGen/SPARC/sjlj.ll b/test/CodeGen/SPARC/sjlj.ll index 647d8f2fd2c..459630f9255 100755 --- a/test/CodeGen/SPARC/sjlj.ll +++ b/test/CodeGen/SPARC/sjlj.ll @@ -67,14 +67,18 @@ return: ; preds = %if.end, %if.then ; CHECK: nop ; CHECK:.LBB1_1: ! %entry ; CHECK: mov %g0, %i0 +; CHECK: ! %entry ; CHECK: cmp %i0, 0 -; CHECK: bne .LBB1_4 -; CHECK: ba .LBB1_5 -; CHECK:.LBB1_2: ! Block address taken -; CHECK: mov 1, %i0 ; CHECK: be .LBB1_5 +; CHECK: nop ; CHECK:.LBB1_4: -; CHECK: ba .LBB1_6 +; CHECK: mov 1, %i0 +; CHECK: ba .LBB1_6 +; CHECK:.LBB1_2: ! Block address taken +; CHECK: mov 1, %i0 +; CHECK: cmp %i0, 0 +; CHECK: bne .LBB1_4 +; CHECK: nop } declare i8* @llvm.frameaddress(i32) #2 diff --git a/test/CodeGen/WebAssembly/mem-intrinsics.ll b/test/CodeGen/WebAssembly/mem-intrinsics.ll index 0ac1e1e182c..0e2f06b70b8 100644 --- a/test/CodeGen/WebAssembly/mem-intrinsics.ll +++ b/test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0| FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0 | FileCheck %s ; Test memcpy, memmove, and memset intrinsics. diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll index 807dfe464cb..4969b188697 100644 --- a/test/CodeGen/X86/block-placement.ll +++ b/test/CodeGen/X86/block-placement.ll @@ -314,7 +314,7 @@ exit: define void @unnatural_cfg1() { ; Test that we can handle a loop with an inner unnatural loop at the end of ; a function. This is a gross CFG reduced out of the single source GCC. -; CHECK: unnatural_cfg1 +; CHECK-LABEL: unnatural_cfg1 ; CHECK: %entry ; CHECK: %loop.body1 ; CHECK: %loop.body2 @@ -352,17 +352,15 @@ define void @unnatural_cfg2() { ; Test that we can handle a loop with a nested natural loop *and* an unnatural ; loop. This was reduced from a crash on block placement when run over ; single-source GCC. -; CHECK: unnatural_cfg2 +; CHECK-LABEL: unnatural_cfg2 ; CHECK: %entry ; CHECK: %loop.body1 ; CHECK: %loop.body2 -; CHECK: %loop.body3 -; CHECK: %loop.inner1.begin -; The end block is folded with %loop.body3... -; CHECK-NOT: %loop.inner1.end ; CHECK: %loop.body4 ; CHECK: %loop.inner2.begin -; The loop.inner2.end block is folded +; CHECK: %loop.inner2.begin +; CHECK: %loop.body3 +; CHECK: %loop.inner1.begin ; CHECK: %loop.header ; CHECK: %bail @@ -559,7 +557,7 @@ define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx ; didn't correctly locate the fallthrough successor, assuming blindly that the ; first one was the fallthrough successor. As a result, we would add an ; erroneous jump to the landing pad thinking *that* was the default successor. -; CHECK: test_eh_lpad_successor +; CHECK-LABEL: test_eh_lpad_successor ; CHECK: %entry ; CHECK-NOT: jmp ; CHECK: %loop @@ -587,7 +585,7 @@ define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personal ; fallthrough simply won't occur. Make sure we don't crash trying to update ; terminators for such constructs. ; -; CHECK: test_eh_throw +; CHECK-LABEL: test_eh_throw ; CHECK: %entry ; CHECK: %cleanup @@ -609,7 +607,7 @@ define void @test_unnatural_cfg_backwards_inner_loop() { ; attempt to merge onto the wrong end of the inner loop just because we find it ; first. This was reduced from a crasher in GCC's single source. ; -; CHECK: test_unnatural_cfg_backwards_inner_loop +; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop ; CHECK: %entry ; CHECK: %loop2b ; CHECK: %loop1 @@ -649,7 +647,7 @@ define void @unanalyzable_branch_to_loop_header() { ; fallthrough because that happens to always produce unanalyzable branches on ; x86. ; -; CHECK: unanalyzable_branch_to_loop_header +; CHECK-LABEL: unanalyzable_branch_to_loop_header ; CHECK: %entry ; CHECK: %loop ; CHECK: %exit @@ -673,7 +671,7 @@ define void @unanalyzable_branch_to_best_succ(i1 %cond) { ; This branch is now analyzable and hence the destination block becomes the ; hotter one. The right order is entry->bar->exit->foo. ; -; CHECK: unanalyzable_branch_to_best_succ +; CHECK-LABEL: unanalyzable_branch_to_best_succ ; CHECK: %entry ; CHECK: %bar ; CHECK: %exit @@ -699,7 +697,7 @@ define void @unanalyzable_branch_to_free_block(float %x) { ; Ensure that we can handle unanalyzable branches where the destination block ; gets selected as the best free block in the CFG. ; -; CHECK: unanalyzable_branch_to_free_block +; CHECK-LABEL: unanalyzable_branch_to_free_block ; CHECK: %entry ; CHECK: %a ; CHECK: %b @@ -729,7 +727,7 @@ define void @many_unanalyzable_branches() { ; Ensure that we don't crash as we're building up many unanalyzable branches, ; blocks, and loops. ; -; CHECK: many_unanalyzable_branches +; CHECK-LABEL: many_unanalyzable_branches ; CHECK: %entry ; CHECK: %exit @@ -948,7 +946,7 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) { ; strange layouts that are siginificantly less efficient, often times maing ; it discontiguous. ; -; CHECK: @benchmark_heapsort +; CHECK-LABEL: @benchmark_heapsort ; CHECK: %entry ; First rotated loop top. ; CHECK: .p2align diff --git a/test/CodeGen/X86/bypass-slow-division-32.ll b/test/CodeGen/X86/bypass-slow-division-32.ll index ea545d22385..9f266647d8a 100644 --- a/test/CodeGen/X86/bypass-slow-division-32.ll +++ b/test/CodeGen/X86/bypass-slow-division-32.ll @@ -95,20 +95,19 @@ define i32 @Test_use_div_and_idiv(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: idivl %ebx ; CHECK-NEXT: movl %eax, %esi ; CHECK-NEXT: testl $-256, %edi -; CHECK-NEXT: jne .LBB3_5 -; CHECK-NEXT: jmp .LBB3_4 -; CHECK-NEXT: .LBB3_1: -; CHECK-NEXT: movzbl %cl, %eax -; CHECK-NEXT: # kill: %EAX<def> %EAX<kill> %AX<def> -; CHECK-NEXT: divb %bl -; CHECK-NEXT: movzbl %al, %esi -; CHECK-NEXT: testl $-256, %edi ; CHECK-NEXT: je .LBB3_4 ; CHECK-NEXT: .LBB3_5: ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: divl %ebx ; CHECK-NEXT: jmp .LBB3_6 +; CHECK-NEXT: .LBB3_1: +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: # kill: %EAX<def> %EAX<kill> %AX<def> +; CHECK-NEXT: divb %bl +; CHECK-NEXT: movzbl %al, %esi +; CHECK-NEXT: testl $-256, %edi +; CHECK-NEXT: jne .LBB3_5 ; CHECK-NEXT: .LBB3_4: ; CHECK-NEXT: movzbl %cl, %eax ; CHECK-NEXT: # kill: %EAX<def> %EAX<kill> %AX<def> diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll index 9488d6d2605..dfc1aefd31a 100644 --- a/test/CodeGen/X86/sse1.ll +++ b/test/CodeGen/X86/sse1.ll @@ -60,7 +60,13 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) { ; X32-NEXT: xorps %xmm1, %xmm1 ; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X32-NEXT: jne .LBB1_5 -; X32-NEXT: jmp .LBB1_4 +; X32-NEXT: .LBB1_4: +; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X32-NEXT: jne .LBB1_8 +; X32-NEXT: .LBB1_7: +; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X32-NEXT: jmp .LBB1_9 ; X32-NEXT: .LBB1_1: ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) @@ -68,17 +74,9 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) { ; X32-NEXT: .LBB1_5: # %entry ; X32-NEXT: xorps %xmm2, %xmm2 ; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; X32-NEXT: jne .LBB1_8 -; X32-NEXT: jmp .LBB1_7 -; X32-NEXT: .LBB1_4: -; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X32-NEXT: je .LBB1_7 ; X32-NEXT: .LBB1_8: # %entry ; X32-NEXT: xorps %xmm3, %xmm3 -; X32-NEXT: jmp .LBB1_9 -; X32-NEXT: .LBB1_7: -; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; X32-NEXT: .LBB1_9: # %entry ; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X32-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] @@ -99,7 +97,13 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) { ; X64-NEXT: xorps %xmm1, %xmm1 ; X64-NEXT: testl %edx, %edx ; X64-NEXT: jne .LBB1_5 -; X64-NEXT: jmp .LBB1_4 +; X64-NEXT: .LBB1_4: +; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: testl %r8d, %r8d +; X64-NEXT: jne .LBB1_8 +; X64-NEXT: .LBB1_7: +; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X64-NEXT: jmp .LBB1_9 ; X64-NEXT: .LBB1_1: ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: testl %edx, %edx @@ -107,17 +111,9 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) { ; X64-NEXT: .LBB1_5: # %entry ; X64-NEXT: xorps %xmm2, %xmm2 ; X64-NEXT: testl %r8d, %r8d -; X64-NEXT: jne .LBB1_8 -; X64-NEXT: jmp .LBB1_7 -; X64-NEXT: .LBB1_4: -; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64-NEXT: testl %r8d, %r8d ; X64-NEXT: je .LBB1_7 ; X64-NEXT: .LBB1_8: # %entry ; X64-NEXT: xorps %xmm3, %xmm3 -; X64-NEXT: jmp .LBB1_9 -; X64-NEXT: .LBB1_7: -; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; X64-NEXT: .LBB1_9: # %entry ; X64-NEXT: testl %esi, %esi ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] @@ -215,7 +211,7 @@ define <4 x i32> @PR30512(<4 x i32> %x, <4 x i32> %y) nounwind { ret <4 x i32> %zext } -; Fragile test warning - we need to induce the generation of a vselect +; Fragile test warning - we need to induce the generation of a vselect ; post-legalization to cause the crash seen in: ; https://llvm.org/bugs/show_bug.cgi?id=31672 ; Is there a way to do that without an unsafe/fast sqrt intrinsic call? diff --git a/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/test/CodeGen/X86/tail-dup-merge-loop-headers.ll index 197fd72586a..80346018ff8 100644 --- a/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ b/test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -6,13 +6,13 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-LABEL: tail_dup_merge_loops ; CHECK: # %entry ; CHECK-NOT: # %{{[a-zA-Z_]+}} +; CHECK: # %exit +; CHECK-NOT: # %{{[a-zA-Z_]+}} ; CHECK: # %inner_loop_exit ; CHECK-NOT: # %{{[a-zA-Z_]+}} ; CHECK: # %inner_loop_latch ; CHECK-NOT: # %{{[a-zA-Z_]+}} ; CHECK: # %inner_loop_test -; CHECK-NOT: # %{{[a-zA-Z_]+}} -; CHECK: # %exit define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 { entry: %notlhs674.i = icmp eq i32 %a, 0 diff --git a/test/CodeGen/X86/tail-dup-repeat.ll b/test/CodeGen/X86/tail-dup-repeat.ll index 21b48e16efb..7d9c0908e57 100644 --- a/test/CodeGen/X86/tail-dup-repeat.ll +++ b/test/CodeGen/X86/tail-dup-repeat.ll @@ -1,4 +1,4 @@ -; RUN: llc -O2 -tail-dup-placement-threshold=4 -o - %s | FileCheck %s +; RUN: llc -O3 -tail-dup-placement-threshold=4 -o - %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll index e7797426d89..96ff33ff5f7 100644 --- a/test/CodeGen/X86/tail-opts.ll +++ b/test/CodeGen/X86/tail-opts.ll @@ -113,16 +113,15 @@ altret: ; CHECK-NEXT: jbe .LBB2_3 ; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} ; CHECK-NEXT: ja .LBB2_4 -; CHECK-NEXT: jmp .LBB2_2 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_3: ; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} ; CHECK-NEXT: jbe .LBB2_2 ; CHECK-NEXT: .LBB2_4: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: ret define i1 @dont_merge_oddly(float* %result) nounwind { entry: diff --git a/test/CodeGen/X86/twoaddr-coalesce-3.ll b/test/CodeGen/X86/twoaddr-coalesce-3.ll index 33c9d46f13c..f5a7326c970 100644 --- a/test/CodeGen/X86/twoaddr-coalesce-3.ll +++ b/test/CodeGen/X86/twoaddr-coalesce-3.ll @@ -19,7 +19,7 @@ for.body.lr.ph: ; preds = %entry ; Check that only one mov will be generated in the kernel loop. ; CHECK-LABEL: foo: -; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body +; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body{{$}} ; CHECK-NOT: mov ; CHECK: movl {{.*}}, [[REG1:%[a-z0-9]+]] ; CHECK-NOT: mov @@ -56,7 +56,7 @@ for.body.lr.ph: ; preds = %entry ; Check that only two mov will be generated in the kernel loop. ; CHECK-LABEL: goo: -; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body +; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body{{$}} ; CHECK-NOT: mov ; CHECK: movl {{.*}}, [[REG2:%[a-z0-9]+]] ; CHECK-NOT: mov diff --git a/test/CodeGen/X86/win-alloca-expander.ll b/test/CodeGen/X86/win-alloca-expander.ll index 45ca3b214ab..4b6e3bb18e6 100644 --- a/test/CodeGen/X86/win-alloca-expander.ll +++ b/test/CodeGen/X86/win-alloca-expander.ll @@ -115,34 +115,36 @@ define void @cfg(i1 %x, i1 %y) { ; Test that the blocks are analyzed in the correct order. ; CHECK-LABEL: cfg: entry: - br i1 %x, label %bb1, label %bb2 + br i1 %x, label %bb1, label %bb3 bb1: %p1 = alloca %struct.S ; CHECK: pushl %eax ; CHECK: subl $1020, %esp - br label %bb3 + br label %bb4 + bb2: - %p2 = alloca %struct.T + %p5 = alloca %struct.T ; CHECK: pushl %eax ; CHECK: subl $2996, %esp - br label %bb3 + call void @g(%struct.T* %p5) + ret void bb3: - br i1 %y, label %bb4, label %bb5 + %p2 = alloca %struct.T +; CHECK: pushl %eax +; CHECK: subl $2996, %esp + br label %bb4 bb4: + br i1 %y, label %bb5, label %bb2 + +bb5: %p4 = alloca %struct.S ; CHECK: subl $1024, %esp call void @f(%struct.S* %p4) ret void -bb5: - %p5 = alloca %struct.T -; CHECK: pushl %eax -; CHECK: subl $2996, %esp - call void @g(%struct.T* %p5) - ret void } |