summaryrefslogtreecommitdiff
path: root/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/AArch64/branch-relax-cbz.ll13
-rw-r--r--test/CodeGen/AArch64/combine-comparisons-by-cse.ll2
-rw-r--r--test/CodeGen/AArch64/optimize-cond-branch.ll2
-rw-r--r--test/CodeGen/AMDGPU/basic-branch.ll5
-rw-r--r--test/CodeGen/AMDGPU/branch-relaxation.ll3
-rw-r--r--test/CodeGen/AMDGPU/cf-loop-on-constant.ll2
-rw-r--r--test/CodeGen/AMDGPU/convergent-inlineasm.ll1
-rw-r--r--test/CodeGen/AMDGPU/salu-to-valu.ll2
-rw-r--r--test/CodeGen/ARM/2007-05-22-tailmerge-3.ll8
-rw-r--r--test/CodeGen/ARM/atomic-cmpxchg.ll8
-rw-r--r--test/CodeGen/ARM/fold-stack-adjust.ll2
-rw-r--r--test/CodeGen/PowerPC/tail-dup-break-cfg.ll14
-rw-r--r--test/CodeGen/PowerPC/tail-dup-layout.ll378
-rwxr-xr-xtest/CodeGen/SPARC/sjlj.ll14
-rw-r--r--test/CodeGen/WebAssembly/mem-intrinsics.ll2
-rw-r--r--test/CodeGen/X86/block-placement.ll28
-rw-r--r--test/CodeGen/X86/bypass-slow-division-32.ll15
-rw-r--r--test/CodeGen/X86/sse1.ll34
-rw-r--r--test/CodeGen/X86/tail-dup-merge-loop-headers.ll4
-rw-r--r--test/CodeGen/X86/tail-dup-repeat.ll2
-rw-r--r--test/CodeGen/X86/tail-opts.ll7
-rw-r--r--test/CodeGen/X86/twoaddr-coalesce-3.ll4
-rw-r--r--test/CodeGen/X86/win-alloca-expander.ll24
23 files changed, 448 insertions, 126 deletions
diff --git a/test/CodeGen/AArch64/branch-relax-cbz.ll b/test/CodeGen/AArch64/branch-relax-cbz.ll
index c654b94e49c..d13c0f677bc 100644
--- a/test/CodeGen/AArch64/branch-relax-cbz.ll
+++ b/test/CodeGen/AArch64/branch-relax-cbz.ll
@@ -6,23 +6,22 @@
; CHECK-NEXT: ; BB#1: ; %b3
; CHECK: ldr [[LOAD:w[0-9]+]]
-; CHECK: cbz [[LOAD]], [[SKIP_LONG_B:LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: b [[B8:LBB[0-9]+_[0-9]+]]
-
-; CHECK-NEXT: [[SKIP_LONG_B]]:
+; CHECK: cbnz [[LOAD]], [[B8:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: b [[B7:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: [[B8]]: ; %b8
+; CHECK-NEXT: ret
+
; CHECK-NEXT: [[B2]]: ; %b2
; CHECK: mov w{{[0-9]+}}, #93
; CHECK: bl _extfunc
; CHECK: cbz w{{[0-9]+}}, [[B7]]
-
-; CHECK-NEXT: [[B8]]: ; %b8
-; CHECK-NEXT: ret
+; CHECK-NEXT: b [[B8]]
; CHECK-NEXT: [[B7]]: ; %b7
; CHECK: mov w{{[0-9]+}}, #13
; CHECK: b _extfunc
+
define void @split_block_no_fallthrough(i64 %val) #0 {
bb:
%c0 = icmp sgt i64 %val, -5
diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index 86be3ccea1d..8c85fc3c68a 100644
--- a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -264,7 +264,7 @@ declare void @do_something() #1
define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ
; CHECK: cmn
-; CHECK: b.gt
+; CHECK: b.le
; CHECK: cmp
; CHECK: b.gt
entry:
diff --git a/test/CodeGen/AArch64/optimize-cond-branch.ll b/test/CodeGen/AArch64/optimize-cond-branch.ll
index 4e3ca6f16e7..ab4ad5e2ce9 100644
--- a/test/CodeGen/AArch64/optimize-cond-branch.ll
+++ b/test/CodeGen/AArch64/optimize-cond-branch.ll
@@ -11,7 +11,7 @@ target triple = "arm64--"
;
; CHECK-LABEL: func
; CHECK-NOT: and
-; CHECK: tbnz
+; CHECK: tbz
define void @func() {
%c0 = icmp sgt i64 0, 0
br i1 %c0, label %b1, label %b6
diff --git a/test/CodeGen/AMDGPU/basic-branch.ll b/test/CodeGen/AMDGPU/basic-branch.ll
index 104dd45e8a1..d4538ee2d25 100644
--- a/test/CodeGen/AMDGPU/basic-branch.ll
+++ b/test/CodeGen/AMDGPU/basic-branch.ll
@@ -8,13 +8,10 @@
; GCNNOOPT: v_writelane_b32
; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
-
-; GCN: ; BB#1
; GCNNOOPT: v_readlane_b32
; GCNNOOPT: v_readlane_b32
; GCN: buffer_store_dword
-; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
-; TODO: This waitcnt can be eliminated
+; GCNNOOPT: s_endpgm
; GCN: {{^}}[[END]]:
; GCN: s_endpgm
diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll
index 3fd40521801..18bbc3e5f07 100644
--- a/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -491,7 +491,8 @@ ret:
; GCN-LABEL: {{^}}long_branch_hang:
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
-; GCN-NEXT: s_cbranch_scc0 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_scc1 {{BB[0-9]+_[0-9]+}}
+; GCN-NEXT: s_branch [[LONG_BR_0:BB[0-9]+_[0-9]+]]
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
diff --git a/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
index 0d919bbf85e..03d8cd9eeea 100644
--- a/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
+++ b/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
@@ -2,7 +2,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s
; GCN-LABEL: {{^}}test_loop:
-; GCN: [[LABEL:BB[0-9+]_[0-9]+]]:
+; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: ; %for.body{{$}}
; GCN: ds_read_b32
; GCN: ds_write_b32
; GCN: s_branch [[LABEL]]
diff --git a/test/CodeGen/AMDGPU/convergent-inlineasm.ll b/test/CodeGen/AMDGPU/convergent-inlineasm.ll
index 755f439c686..c3d155ce747 100644
--- a/test/CodeGen/AMDGPU/convergent-inlineasm.ll
+++ b/test/CodeGen/AMDGPU/convergent-inlineasm.ll
@@ -29,6 +29,7 @@ bb5: ; preds = %bb3, %bb
; GCN: v_cmp_ne_u32_e64
; GCN: BB{{[0-9]+_[0-9]+}}:
+
define void @nonconvergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
bb:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll
index 37083fbbd3c..9c153841a63 100644
--- a/test/CodeGen/AMDGPU/salu-to-valu.ll
+++ b/test/CodeGen/AMDGPU/salu-to-valu.ll
@@ -439,7 +439,7 @@ entry:
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; GCN-NOHSA: buffer_store_dword [[ONE]]
; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]]
-; GCN; {{^}}[[EXIT]]:
+; GCN: {{^}}[[EXIT]]:
; GCN: s_endpgm
define void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
bb3: ; preds = %bb2
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 52cc37e2408..b8f2980be75 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -12,11 +12,11 @@
; CHECK: bl _quux
; CHECK-NOT: bl _quux
-; NOMERGE: bl _baz
-; NOMERGE: bl _baz
+; NOMERGE-DAG: bl _baz
+; NOMERGE-DAG: bl _baz
-; NOMERGE: bl _quux
-; NOMERGE: bl _quux
+; NOMERGE-DAG: bl _quux
+; NOMERGE-DAG: bl _quux
; ModuleID = 'tail.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll
index 364bd5d1369..8a82af6a69b 100644
--- a/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -66,14 +66,14 @@ entry:
; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]:
; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
-; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1
+; CHECK-ARMV7-NEXT: moveq r0, #1
; CHECK-ARMV7-NEXT: bxeq lr
; CHECK-ARMV7-NEXT: [[TRY]]:
-; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
-; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0]
+; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1
; CHECK-ARMV7-NEXT: beq [[HEAD]]
; CHECK-ARMV7-NEXT: clrex
-; CHECK-ARMV7-NEXT: mov [[RES]], #0
+; CHECK-ARMV7-NEXT: mov r0, #0
; CHECK-ARMV7-NEXT: bx lr
; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
index 442459bc058..eb32ee54c09 100644
--- a/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -135,7 +135,7 @@ define void @test_fold_point(i1 %tst) minsize {
; Important to check for beginning of basic block, because if it gets
; if-converted the test is probably no longer checking what it should.
-; CHECK: {{LBB[0-9]+_2}}:
+; CHECK: %end
; CHECK-NEXT: vpop {d7, d8}
; CHECK-NEXT: pop {r4, pc}
diff --git a/test/CodeGen/PowerPC/tail-dup-break-cfg.ll b/test/CodeGen/PowerPC/tail-dup-break-cfg.ll
index 4be737e89a9..f19b11f2ae4 100644
--- a/test/CodeGen/PowerPC/tail-dup-break-cfg.ll
+++ b/test/CodeGen/PowerPC/tail-dup-break-cfg.ll
@@ -16,14 +16,14 @@ target triple = "powerpc64le-grtev4-linux-gnu"
;CHECK-NEXT: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]]
;CHECK-NEXT: # %test2
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
-;CHECK-NEXT: beq 0, [[EXITLABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: b [[BODY2LABEL:[._0-9A-Za-z]+]]
+;CHECK-NEXT: bne 0, [[BODY2LABEL:[._0-9A-Za-z]+]]
+;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
+;CHECK: blr
;CHECK-NEXT: [[BODY1LABEL]]
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
;CHECK-NEXT: beq 0, [[EXITLABEL]]
-;CHECK-NEXT: [[BODY2LABEL]]
-;CHECK: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
-;CHECK: blr
+;CHECK-NEXT: [[BODY2LABEL:[._0-9A-Za-z]+]]:
+;CHECK: b [[EXITLABEL]]
define void @tail_dup_break_cfg(i32 %tag) {
entry:
br label %test1
@@ -79,7 +79,7 @@ body1:
test2:
%tagbit2 = and i32 %tag, 2
%tagbit2eq0 = icmp ne i32 %tagbit2, 0
- br i1 %tagbit2eq0, label %body2, label %exit, !prof !1 ; %body2 more likely
+ br i1 %tagbit2eq0, label %body2, label %exit, !prof !3 ; %body2 more likely
body2:
call void @b()
call void @b()
@@ -137,4 +137,4 @@ ret:
!1 = !{!"branch_weights", i32 5, i32 3}
!2 = !{!"branch_weights", i32 95, i32 5}
-!3 = !{!"branch_weights", i32 7, i32 3}
+!3 = !{!"branch_weights", i32 8, i32 3}
diff --git a/test/CodeGen/PowerPC/tail-dup-layout.ll b/test/CodeGen/PowerPC/tail-dup-layout.ll
index 6790aa8e944..95c23ade513 100644
--- a/test/CodeGen/PowerPC/tail-dup-layout.ll
+++ b/test/CodeGen/PowerPC/tail-dup-layout.ll
@@ -1,59 +1,59 @@
-; RUN: llc -outline-optional-branches -O2 < %s | FileCheck %s
+; RUN: llc -O2 < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-grtev4-linux-gnu"
; Intended layout:
-; The outlining flag produces the layout
+; The chain-based outlining produces the layout
; test1
; test2
; test3
; test4
-; exit
; optional1
; optional2
; optional3
; optional4
+; exit
; Tail duplication puts test n+1 at the end of optional n
; so optional1 includes a copy of test2 at the end, and branches
; to test3 (at the top) or falls through to optional 2.
-; The CHECK statements check for the whole string of tests and exit block,
+; The CHECK statements check for the whole string of tests
; and then check that the correct test has been duplicated into the end of
; the optional blocks and that the optional blocks are in the correct order.
-;CHECK-LABEL: f:
+;CHECK-LABEL: straight_test:
; test1 may have been merged with entry
;CHECK: mr [[TAGREG:[0-9]+]], 3
;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
-;CHECK-NEXT: bc 12, 1, [[OPT1LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[TEST2LABEL:[._0-9A-Za-z]+]]: # %test2
+;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
+;CHECK-NEXT: # %test2
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
-;CHECK-NEXT: bne 0, [[OPT2LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
+;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
+;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
-;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[TEST4LABEL:[._0-9A-Za-z]+]]: # %test4
+;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
+;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4
;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
-;CHECK-NEXT: bne 0, .[[OPT4LABEL:[._0-9A-Za-z]+]]
-;CHECK-NEXT: [[EXITLABEL:[._0-9A-Za-z]+]]: # %exit
+;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]]
+;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
;CHECK: blr
-;CHECK-NEXT: [[OPT1LABEL]]
+;CHECK-NEXT: .[[OPT1LABEL]]:
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
-;CHECK-NEXT: beq 0, [[TEST3LABEL]]
-;CHECK-NEXT: [[OPT2LABEL]]
+;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
+;CHECK-NEXT: .[[OPT2LABEL]]:
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
-;CHECK-NEXT: beq 0, [[TEST4LABEL]]
-;CHECK-NEXT: [[OPT3LABEL]]
+;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
+;CHECK-NEXT: .[[OPT3LABEL]]:
;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
-;CHECK-NEXT: beq 0, [[EXITLABEL]]
-;CHECK-NEXT: [[OPT4LABEL]]
-;CHECK: b [[EXITLABEL]]
+;CHECK-NEXT: beq 0, .[[EXITLABEL]]
+;CHECK-NEXT: .[[OPT4LABEL]]:
+;CHECK: b .[[EXITLABEL]]
-define void @f(i32 %tag) {
+define void @straight_test(i32 %tag) {
entry:
br label %test1
test1:
%tagbit1 = and i32 %tag, 1
%tagbit1eq0 = icmp eq i32 %tagbit1, 0
- br i1 %tagbit1eq0, label %test2, label %optional1
+ br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
optional1:
call void @a()
call void @a()
@@ -63,7 +63,7 @@ optional1:
test2:
%tagbit2 = and i32 %tag, 2
%tagbit2eq0 = icmp eq i32 %tagbit2, 0
- br i1 %tagbit2eq0, label %test3, label %optional2
+ br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
optional2:
call void @b()
call void @b()
@@ -73,7 +73,7 @@ optional2:
test3:
%tagbit3 = and i32 %tag, 4
%tagbit3eq0 = icmp eq i32 %tagbit3, 0
- br i1 %tagbit3eq0, label %test4, label %optional3
+ br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
optional3:
call void @c()
call void @c()
@@ -83,7 +83,7 @@ optional3:
test4:
%tagbit4 = and i32 %tag, 8
%tagbit4eq0 = icmp eq i32 %tagbit4, 0
- br i1 %tagbit4eq0, label %exit, label %optional4
+ br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1
optional4:
call void @d()
call void @d()
@@ -94,7 +94,333 @@ exit:
ret void
}
+; Intended layout:
+; The chain-based outlining produces the layout
+; entry
+; --- Begin loop ---
+; for.latch
+; for.check
+; test1
+; test2
+; test3
+; test4
+; optional1
+; optional2
+; optional3
+; optional4
+; --- End loop ---
+; exit
+; The CHECK statements check for the whole string of tests and exit block,
+; and then check that the correct test has been duplicated into the end of
+; the optional blocks and that the optional blocks are in the correct order.
+;CHECK-LABEL: loop_test:
+;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4
+;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch
+;CHECK: addi
+;CHECK: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
+;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]])
+;CHECK: # %test1
+;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
+;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]]
+;CHECK-NEXT: # %test2
+;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
+;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]]
+;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
+;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
+;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
+;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}}
+;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
+;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
+;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]]
+;CHECK: [[OPT1LABEL]]
+;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30
+;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
+;CHECK-NEXT: .[[OPT2LABEL]]
+;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 29, 29
+;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
+;CHECK-NEXT: .[[OPT3LABEL]]
+;CHECK: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 28, 28
+;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
+;CHECK: [[OPT4LABEL]]:
+;CHECK: b .[[LATCHLABEL]]
+define void @loop_test(i32* %tags, i32 %count) {
+entry:
+ br label %for.check
+for.check:
+ %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
+ %done.count = icmp ugt i32 %count.loop, 0
+ %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count
+ %tag = load i32, i32* %tag_ptr
+ %done.tag = icmp eq i32 %tag, 0
+ %done = and i1 %done.count, %done.tag
+ br i1 %done, label %test1, label %exit, !prof !1
+test1:
+ %tagbit1 = and i32 %tag, 1
+ %tagbit1eq0 = icmp eq i32 %tagbit1, 0
+ br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
+optional1:
+ call void @a()
+ call void @a()
+ call void @a()
+ call void @a()
+ br label %test2
+test2:
+ %tagbit2 = and i32 %tag, 2
+ %tagbit2eq0 = icmp eq i32 %tagbit2, 0
+ br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
+optional2:
+ call void @b()
+ call void @b()
+ call void @b()
+ call void @b()
+ br label %test3
+test3:
+ %tagbit3 = and i32 %tag, 4
+ %tagbit3eq0 = icmp eq i32 %tagbit3, 0
+ br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
+optional3:
+ call void @c()
+ call void @c()
+ call void @c()
+ call void @c()
+ br label %test4
+test4:
+ %tagbit4 = and i32 %tag, 8
+ %tagbit4eq0 = icmp eq i32 %tagbit4, 0
+ br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
+optional4:
+ call void @d()
+ call void @d()
+ call void @d()
+ call void @d()
+ br label %for.latch
+for.latch:
+ %count.sub = sub i32 %count.loop, 1
+ br label %for.check
+exit:
+ ret void
+}
+
+; The block then2 is not unavoidable, meaning it does not dominate the exit.
+; But since it can be tail-duplicated, it should be placed as a fallthrough from
+; test2 and copied. The purpose here is to make sure that the tail-duplication
+; code is independent of the outlining code, which works by choosing the
+; "unavoidable" blocks.
+; CHECK-LABEL: avoidable_test:
+; CHECK: # %entry
+; CHECK: andi.
+; CHECK: # %test2
+; Make sure then2 falls through from test2
+; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}}
+; CHECK: # %then2
+; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
+; CHECK: # %else1
+; CHECK: bl a
+; CHECK: bl a
+; Make sure then2 was copied into else1
+; CHECK: rlwinm. {{[0-9]+}}, {{[0-9]+}}, 0, 29, 29
+; CHECK: # %end1
+; CHECK: bl d
+; CHECK: # %else2
+; CHECK: bl c
+; CHECK: # %end2
+define void @avoidable_test(i32 %tag) {
+entry:
+ br label %test1
+test1:
+ %tagbit1 = and i32 %tag, 1
+ %tagbit1eq0 = icmp eq i32 %tagbit1, 0
+ br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely
+else1:
+ call void @a()
+ call void @a()
+ br label %then2
+test2:
+ %tagbit2 = and i32 %tag, 2
+ %tagbit2eq0 = icmp eq i32 %tagbit2, 0
+ br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely
+then2:
+ %tagbit3 = and i32 %tag, 4
+ %tagbit3eq0 = icmp eq i32 %tagbit3, 0
+ br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely
+else2:
+ call void @c()
+ br label %end2
+end2:
+ ret void
+end1:
+ call void @d()
+ ret void
+}
+
+; CHECK-LABEL: trellis_test
+; The number in the block labels is the expected block frequency given the
+; probabilities annotated. There is a conflict in the b;c->d;e trellis that
+; should be resolved as c->e;b->d.
+; The d;e->f;g trellis should be resolved as e->g;d->f.
+; The f;g->h;i trellis should be resolved as f->i;g->h.
+; The h;i->j;ret trellis contains a triangle edge, and should be resolved as
+; h->j->ret
+; CHECK: # %entry
+; CHECK: # %c10
+; CHECK: # %e9
+; CHECK: # %g10
+; CHECK: # %h10
+; CHECK: # %j8
+; CHECK: # %ret
+; CHECK: # %b6
+; CHECK: # %d7
+; CHECK: # %f6
+; CHECK: # %i6
+define void @trellis_test(i32 %tag) {
+entry:
+ br label %a16
+a16:
+ call void @a()
+ call void @a()
+ %tagbits.a = and i32 %tag, 3
+ %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
+ br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6
+c10:
+ call void @c()
+ call void @c()
+ %tagbits.c = and i32 %tag, 12
+ %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
+ ; Both of these edges should be hotter than the other incoming edge
+ ; for e9 or d7
+ br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4
+e9:
+ call void @e()
+ call void @e()
+ %tagbits.e = and i32 %tag, 48
+ %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
+ br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2
+g10:
+ call void @g()
+ call void @g()
+ %tagbits.g = and i32 %tag, 192
+ %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
+ br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8
+i6:
+ call void @i()
+ call void @i()
+ %tagbits.i = and i32 %tag, 768
+ %tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0
+ br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3)
+b6:
+ call void @b()
+ call void @b()
+ %tagbits.b = and i32 %tag, 12
+ %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
+ br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3)
+d7:
+ call void @d()
+ call void @d()
+ %tagbits.d = and i32 %tag, 48
+ %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
+ br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4
+f6:
+ call void @f()
+ call void @f()
+ %tagbits.f = and i32 %tag, 192
+ %tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128
+ br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2
+h10:
+ call void @h()
+ call void @h()
+ %tagbits.h = and i32 %tag, 768
+ %tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512
+ br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5)
+j8:
+ call void @j()
+ call void @j()
+ br label %ret
+ret:
+ ret void
+}
+
+; Verify that we still consider tail-duplication opportunities if we find a
+; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors
+; of both F and G. The basic trellis algorithm picks the F->G edge, but after
+; checking, it's profitable to duplicate G into F. The weights here are not
+; really important. They are there to help make the test stable.
+; CHECK-LABEL: trellis_then_dup_test
+; CHECK: # %entry
+; CHECK: # %b
+; CHECK: # %d
+; CHECK: # %g
+; CHECK: # %ret1
+; CHECK: # %c
+; CHECK: # %e
+; CHECK: # %f
+; CHECK: # %ret2
+; CHECK: # %ret
+define void @trellis_then_dup_test(i32 %tag) {
+entry:
+ br label %a
+a:
+ call void @a()
+ call void @a()
+ %tagbits.a = and i32 %tag, 3
+ %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
+ br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3
+b:
+ call void @b()
+ call void @b()
+ %tagbits.b = and i32 %tag, 12
+ %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
+ br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3
+d:
+ call void @d()
+ call void @d()
+ %tagbits.d = and i32 %tag, 48
+ %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
+ br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3
+f:
+ call void @f()
+ call void @f()
+ br label %g
+g:
+ %tagbits.g = and i32 %tag, 192
+ %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
+ br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced
+c:
+ call void @c()
+ call void @c()
+ %tagbits.c = and i32 %tag, 12
+ %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
+ br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3
+e:
+ call void @e()
+ call void @e()
+ %tagbits.e = and i32 %tag, 48
+ %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
+ br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3
+ret1:
+ call void @a()
+ br label %ret
+ret2:
+ call void @b()
+ br label %ret
+ret:
+ ret void
+}
+
declare void @a()
declare void @b()
declare void @c()
declare void @d()
+declare void @e()
+declare void @f()
+declare void @g()
+declare void @h()
+declare void @i()
+declare void @j()
+
+!1 = !{!"branch_weights", i32 5, i32 3}
+!2 = !{!"branch_weights", i32 50, i32 50}
+!3 = !{!"branch_weights", i32 6, i32 4}
+!4 = !{!"branch_weights", i32 7, i32 2}
+!5 = !{!"branch_weights", i32 2, i32 8}
+!6 = !{!"branch_weights", i32 3, i32 4}
+!7 = !{!"branch_weights", i32 4, i32 2}
diff --git a/test/CodeGen/SPARC/sjlj.ll b/test/CodeGen/SPARC/sjlj.ll
index 647d8f2fd2c..459630f9255 100755
--- a/test/CodeGen/SPARC/sjlj.ll
+++ b/test/CodeGen/SPARC/sjlj.ll
@@ -67,14 +67,18 @@ return: ; preds = %if.end, %if.then
; CHECK: nop
; CHECK:.LBB1_1: ! %entry
; CHECK: mov %g0, %i0
+; CHECK: ! %entry
; CHECK: cmp %i0, 0
-; CHECK: bne .LBB1_4
-; CHECK: ba .LBB1_5
-; CHECK:.LBB1_2: ! Block address taken
-; CHECK: mov 1, %i0
; CHECK: be .LBB1_5
+; CHECK: nop
; CHECK:.LBB1_4:
-; CHECK: ba .LBB1_6
+; CHECK: mov 1, %i0
+; CHECK: ba .LBB1_6
+; CHECK:.LBB1_2: ! Block address taken
+; CHECK: mov 1, %i0
+; CHECK: cmp %i0, 0
+; CHECK: bne .LBB1_4
+; CHECK: nop
}
declare i8* @llvm.frameaddress(i32) #2
diff --git a/test/CodeGen/WebAssembly/mem-intrinsics.ll b/test/CodeGen/WebAssembly/mem-intrinsics.ll
index 0ac1e1e182c..0e2f06b70b8 100644
--- a/test/CodeGen/WebAssembly/mem-intrinsics.ll
+++ b/test/CodeGen/WebAssembly/mem-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0| FileCheck %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -tail-dup-placement=0 | FileCheck %s
; Test memcpy, memmove, and memset intrinsics.
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index 807dfe464cb..4969b188697 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -314,7 +314,7 @@ exit:
define void @unnatural_cfg1() {
; Test that we can handle a loop with an inner unnatural loop at the end of
; a function. This is a gross CFG reduced out of the single source GCC.
-; CHECK: unnatural_cfg1
+; CHECK-LABEL: unnatural_cfg1
; CHECK: %entry
; CHECK: %loop.body1
; CHECK: %loop.body2
@@ -352,17 +352,15 @@ define void @unnatural_cfg2() {
; Test that we can handle a loop with a nested natural loop *and* an unnatural
; loop. This was reduced from a crash on block placement when run over
; single-source GCC.
-; CHECK: unnatural_cfg2
+; CHECK-LABEL: unnatural_cfg2
; CHECK: %entry
; CHECK: %loop.body1
; CHECK: %loop.body2
-; CHECK: %loop.body3
-; CHECK: %loop.inner1.begin
-; The end block is folded with %loop.body3...
-; CHECK-NOT: %loop.inner1.end
; CHECK: %loop.body4
; CHECK: %loop.inner2.begin
-; The loop.inner2.end block is folded
+; CHECK: %loop.inner2.begin
+; CHECK: %loop.body3
+; CHECK: %loop.inner1.begin
; CHECK: %loop.header
; CHECK: %bail
@@ -559,7 +557,7 @@ define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx
; didn't correctly locate the fallthrough successor, assuming blindly that the
; first one was the fallthrough successor. As a result, we would add an
; erroneous jump to the landing pad thinking *that* was the default successor.
-; CHECK: test_eh_lpad_successor
+; CHECK-LABEL: test_eh_lpad_successor
; CHECK: %entry
; CHECK-NOT: jmp
; CHECK: %loop
@@ -587,7 +585,7 @@ define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personal
; fallthrough simply won't occur. Make sure we don't crash trying to update
; terminators for such constructs.
;
-; CHECK: test_eh_throw
+; CHECK-LABEL: test_eh_throw
; CHECK: %entry
; CHECK: %cleanup
@@ -609,7 +607,7 @@ define void @test_unnatural_cfg_backwards_inner_loop() {
; attempt to merge onto the wrong end of the inner loop just because we find it
; first. This was reduced from a crasher in GCC's single source.
;
-; CHECK: test_unnatural_cfg_backwards_inner_loop
+; CHECK-LABEL: test_unnatural_cfg_backwards_inner_loop
; CHECK: %entry
; CHECK: %loop2b
; CHECK: %loop1
@@ -649,7 +647,7 @@ define void @unanalyzable_branch_to_loop_header() {
; fallthrough because that happens to always produce unanalyzable branches on
; x86.
;
-; CHECK: unanalyzable_branch_to_loop_header
+; CHECK-LABEL: unanalyzable_branch_to_loop_header
; CHECK: %entry
; CHECK: %loop
; CHECK: %exit
@@ -673,7 +671,7 @@ define void @unanalyzable_branch_to_best_succ(i1 %cond) {
; This branch is now analyzable and hence the destination block becomes the
; hotter one. The right order is entry->bar->exit->foo.
;
-; CHECK: unanalyzable_branch_to_best_succ
+; CHECK-LABEL: unanalyzable_branch_to_best_succ
; CHECK: %entry
; CHECK: %bar
; CHECK: %exit
@@ -699,7 +697,7 @@ define void @unanalyzable_branch_to_free_block(float %x) {
; Ensure that we can handle unanalyzable branches where the destination block
; gets selected as the best free block in the CFG.
;
-; CHECK: unanalyzable_branch_to_free_block
+; CHECK-LABEL: unanalyzable_branch_to_free_block
; CHECK: %entry
; CHECK: %a
; CHECK: %b
@@ -729,7 +727,7 @@ define void @many_unanalyzable_branches() {
; Ensure that we don't crash as we're building up many unanalyzable branches,
; blocks, and loops.
;
-; CHECK: many_unanalyzable_branches
+; CHECK-LABEL: many_unanalyzable_branches
; CHECK: %entry
; CHECK: %exit
@@ -948,7 +946,7 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) {
; strange layouts that are siginificantly less efficient, often times maing
; it discontiguous.
;
-; CHECK: @benchmark_heapsort
+; CHECK-LABEL: @benchmark_heapsort
; CHECK: %entry
; First rotated loop top.
; CHECK: .p2align
diff --git a/test/CodeGen/X86/bypass-slow-division-32.ll b/test/CodeGen/X86/bypass-slow-division-32.ll
index ea545d22385..9f266647d8a 100644
--- a/test/CodeGen/X86/bypass-slow-division-32.ll
+++ b/test/CodeGen/X86/bypass-slow-division-32.ll
@@ -95,20 +95,19 @@ define i32 @Test_use_div_and_idiv(i32 %a, i32 %b) nounwind {
; CHECK-NEXT: idivl %ebx
; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: testl $-256, %edi
-; CHECK-NEXT: jne .LBB3_5
-; CHECK-NEXT: jmp .LBB3_4
-; CHECK-NEXT: .LBB3_1:
-; CHECK-NEXT: movzbl %cl, %eax
-; CHECK-NEXT: # kill: %EAX<def> %EAX<kill> %AX<def>
-; CHECK-NEXT: divb %bl
-; CHECK-NEXT: movzbl %al, %esi
-; CHECK-NEXT: testl $-256, %edi
; CHECK-NEXT: je .LBB3_4
; CHECK-NEXT: .LBB3_5:
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: divl %ebx
; CHECK-NEXT: jmp .LBB3_6
+; CHECK-NEXT: .LBB3_1:
+; CHECK-NEXT: movzbl %cl, %eax
+; CHECK-NEXT: # kill: %EAX<def> %EAX<kill> %AX<def>
+; CHECK-NEXT: divb %bl
+; CHECK-NEXT: movzbl %al, %esi
+; CHECK-NEXT: testl $-256, %edi
+; CHECK-NEXT: jne .LBB3_5
; CHECK-NEXT: .LBB3_4:
; CHECK-NEXT: movzbl %cl, %eax
; CHECK-NEXT: # kill: %EAX<def> %EAX<kill> %AX<def>
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
index 9488d6d2605..dfc1aefd31a 100644
--- a/test/CodeGen/X86/sse1.ll
+++ b/test/CodeGen/X86/sse1.ll
@@ -60,7 +60,13 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
; X32-NEXT: xorps %xmm1, %xmm1
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X32-NEXT: jne .LBB1_5
-; X32-NEXT: jmp .LBB1_4
+; X32-NEXT: .LBB1_4:
+; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X32-NEXT: jne .LBB1_8
+; X32-NEXT: .LBB1_7:
+; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; X32-NEXT: jmp .LBB1_9
; X32-NEXT: .LBB1_1:
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
@@ -68,17 +74,9 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
; X32-NEXT: .LBB1_5: # %entry
; X32-NEXT: xorps %xmm2, %xmm2
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X32-NEXT: jne .LBB1_8
-; X32-NEXT: jmp .LBB1_7
-; X32-NEXT: .LBB1_4:
-; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X32-NEXT: je .LBB1_7
; X32-NEXT: .LBB1_8: # %entry
; X32-NEXT: xorps %xmm3, %xmm3
-; X32-NEXT: jmp .LBB1_9
-; X32-NEXT: .LBB1_7:
-; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; X32-NEXT: .LBB1_9: # %entry
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X32-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
@@ -99,7 +97,13 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
; X64-NEXT: xorps %xmm1, %xmm1
; X64-NEXT: testl %edx, %edx
; X64-NEXT: jne .LBB1_5
-; X64-NEXT: jmp .LBB1_4
+; X64-NEXT: .LBB1_4:
+; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X64-NEXT: testl %r8d, %r8d
+; X64-NEXT: jne .LBB1_8
+; X64-NEXT: .LBB1_7:
+; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; X64-NEXT: jmp .LBB1_9
; X64-NEXT: .LBB1_1:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: testl %edx, %edx
@@ -107,17 +111,9 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
; X64-NEXT: .LBB1_5: # %entry
; X64-NEXT: xorps %xmm2, %xmm2
; X64-NEXT: testl %r8d, %r8d
-; X64-NEXT: jne .LBB1_8
-; X64-NEXT: jmp .LBB1_7
-; X64-NEXT: .LBB1_4:
-; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: testl %r8d, %r8d
; X64-NEXT: je .LBB1_7
; X64-NEXT: .LBB1_8: # %entry
; X64-NEXT: xorps %xmm3, %xmm3
-; X64-NEXT: jmp .LBB1_9
-; X64-NEXT: .LBB1_7:
-; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; X64-NEXT: .LBB1_9: # %entry
; X64-NEXT: testl %esi, %esi
; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
@@ -215,7 +211,7 @@ define <4 x i32> @PR30512(<4 x i32> %x, <4 x i32> %y) nounwind {
ret <4 x i32> %zext
}
-; Fragile test warning - we need to induce the generation of a vselect
+; Fragile test warning - we need to induce the generation of a vselect
; post-legalization to cause the crash seen in:
; https://llvm.org/bugs/show_bug.cgi?id=31672
; Is there a way to do that without an unsafe/fast sqrt intrinsic call?
diff --git a/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
index 197fd72586a..80346018ff8 100644
--- a/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -6,13 +6,13 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: tail_dup_merge_loops
; CHECK: # %entry
; CHECK-NOT: # %{{[a-zA-Z_]+}}
+; CHECK: # %exit
+; CHECK-NOT: # %{{[a-zA-Z_]+}}
; CHECK: # %inner_loop_exit
; CHECK-NOT: # %{{[a-zA-Z_]+}}
; CHECK: # %inner_loop_latch
; CHECK-NOT: # %{{[a-zA-Z_]+}}
; CHECK: # %inner_loop_test
-; CHECK-NOT: # %{{[a-zA-Z_]+}}
-; CHECK: # %exit
define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 {
entry:
%notlhs674.i = icmp eq i32 %a, 0
diff --git a/test/CodeGen/X86/tail-dup-repeat.ll b/test/CodeGen/X86/tail-dup-repeat.ll
index 21b48e16efb..7d9c0908e57 100644
--- a/test/CodeGen/X86/tail-dup-repeat.ll
+++ b/test/CodeGen/X86/tail-dup-repeat.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O2 -tail-dup-placement-threshold=4 -o - %s | FileCheck %s
+; RUN: llc -O3 -tail-dup-placement-threshold=4 -o - %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index e7797426d89..96ff33ff5f7 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -113,16 +113,15 @@ altret:
; CHECK-NEXT: jbe .LBB2_3
; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
; CHECK-NEXT: ja .LBB2_4
-; CHECK-NEXT: jmp .LBB2_2
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_3:
; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
; CHECK-NEXT: jbe .LBB2_2
; CHECK-NEXT: .LBB2_4:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB2_2:
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: ret
define i1 @dont_merge_oddly(float* %result) nounwind {
entry:
diff --git a/test/CodeGen/X86/twoaddr-coalesce-3.ll b/test/CodeGen/X86/twoaddr-coalesce-3.ll
index 33c9d46f13c..f5a7326c970 100644
--- a/test/CodeGen/X86/twoaddr-coalesce-3.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce-3.ll
@@ -19,7 +19,7 @@ for.body.lr.ph: ; preds = %entry
; Check that only one mov will be generated in the kernel loop.
; CHECK-LABEL: foo:
-; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body
+; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body{{$}}
; CHECK-NOT: mov
; CHECK: movl {{.*}}, [[REG1:%[a-z0-9]+]]
; CHECK-NOT: mov
@@ -56,7 +56,7 @@ for.body.lr.ph: ; preds = %entry
; Check that only two mov will be generated in the kernel loop.
; CHECK-LABEL: goo:
-; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body
+; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body{{$}}
; CHECK-NOT: mov
; CHECK: movl {{.*}}, [[REG2:%[a-z0-9]+]]
; CHECK-NOT: mov
diff --git a/test/CodeGen/X86/win-alloca-expander.ll b/test/CodeGen/X86/win-alloca-expander.ll
index 45ca3b214ab..4b6e3bb18e6 100644
--- a/test/CodeGen/X86/win-alloca-expander.ll
+++ b/test/CodeGen/X86/win-alloca-expander.ll
@@ -115,34 +115,36 @@ define void @cfg(i1 %x, i1 %y) {
; Test that the blocks are analyzed in the correct order.
; CHECK-LABEL: cfg:
entry:
- br i1 %x, label %bb1, label %bb2
+ br i1 %x, label %bb1, label %bb3
bb1:
%p1 = alloca %struct.S
; CHECK: pushl %eax
; CHECK: subl $1020, %esp
- br label %bb3
+ br label %bb4
+
bb2:
- %p2 = alloca %struct.T
+ %p5 = alloca %struct.T
; CHECK: pushl %eax
; CHECK: subl $2996, %esp
- br label %bb3
+ call void @g(%struct.T* %p5)
+ ret void
bb3:
- br i1 %y, label %bb4, label %bb5
+ %p2 = alloca %struct.T
+; CHECK: pushl %eax
+; CHECK: subl $2996, %esp
+ br label %bb4
bb4:
+ br i1 %y, label %bb5, label %bb2
+
+bb5:
%p4 = alloca %struct.S
; CHECK: subl $1024, %esp
call void @f(%struct.S* %p4)
ret void
-bb5:
- %p5 = alloca %struct.T
-; CHECK: pushl %eax
-; CHECK: subl $2996, %esp
- call void @g(%struct.T* %p5)
- ret void
}