summaryrefslogtreecommitdiff
path: root/test/Transforms
diff options
context:
space:
mode:
authorHans Wennborg <hans@hanshq.net>2018-01-17 16:33:44 +0000
committerHans Wennborg <hans@hanshq.net>2018-01-17 16:33:44 +0000
commita8191faa103d89e0988b13e4dafdcac559acd356 (patch)
treee79b522e770b8f8a216c300206e782d5b7334097 /test/Transforms
parent57ed2ef9cfdb90107904c705e7f84e816be22d92 (diff)
Merging r321751, r321806, and r321878:
------------------------------------------------------------------------ r321751 | arsenm | 2018-01-03 10:45:37 -0800 (Wed, 03 Jan 2018) | 25 lines StructurizeCFG: Fix broken backedge detection The work order was changed in r228186 from SCC order to RPO with an arbitrary sorting function. The sorting function attempted to move inner loop nodes earlier. This was was apparently relying on an assumption that every block in a given loop / the same loop depth would be seen before visiting another loop. In the broken testcase, a block outside of the loop was encountered before moving onto another block in the same loop. The testcase would then structurize such that one blocks unconditional successor could never be reached. Revert to plain RPO for the analysis phase. This fixes detecting edges as backedges that aren't really. The processing phase does use another visited set, and I'm unclear on whether the order there is as important. An arbitrary order doesn't work, and triggers some infinite loops. The reversed RPO list seems to work and is closer to the order that was used before, minus the arbitary custom sorting. A few of the changed tests now produce smaller code, and a few are slightly worse looking. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r321806 | arsenm | 2018-01-04 09:23:24 -0800 (Thu, 04 Jan 2018) | 4 lines StructurizeCFG: xfail one of the testcases from r321751 It fails with -verify-region-info. This seems to be a issue with RegionInfo itself which existed before. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r321878 | arsenm | 2018-01-05 09:51:36 -0800 (Fri, 05 Jan 2018) | 4 lines RegionInfo: Use report_fatal_error instead of llvm_unreachable Otherwise when using -verify-region-info in a release build the error won't be emitted. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@322686 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll77
-rw-r--r--test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll163
-rw-r--r--test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg2
-rw-r--r--test/Transforms/StructurizeCFG/nested-loop-order.ll83
4 files changed, 298 insertions, 27 deletions
diff --git a/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll
new file mode 100644
index 00000000000..e9c54151cf2
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll
@@ -0,0 +1,77 @@
+; XFAIL: *
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg -verify-region-info %s
+
+; FIXME: Merge into backedge-id-bug
+; Variant which has an issue with region construction
+
+define amdgpu_kernel void @loop_backedge_misidentified_alt(i32 addrspace(1)* %arg0) #0 {
+entry:
+ %tmp = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
+ %load1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i32 %tid
+ %i.initial = load volatile i32, i32 addrspace(1)* %gep, align 4
+ br label %LOOP.HEADER
+
+LOOP.HEADER:
+ %i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
+ call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
+ %tmp12 = zext i32 %i to i64
+ %tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 %tmp12
+ %tmp14 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp13, align 16
+ %tmp15 = extractelement <4 x i32> %tmp14, i64 0
+ %tmp16 = and i32 %tmp15, 65535
+ %tmp17 = icmp eq i32 %tmp16, 1
+ br i1 %tmp17, label %bb18, label %bb62
+
+bb18:
+ %tmp19 = extractelement <2 x i32> %tmp, i64 0
+ %tmp22 = lshr i32 %tmp19, 16
+ %tmp24 = urem i32 %tmp22, 52
+ %tmp25 = mul nuw nsw i32 %tmp24, 52
+ br label %INNER_LOOP
+
+INNER_LOOP:
+ %inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
+ call void asm sideeffect "; inner loop body", ""() #0
+ %inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
+ %inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
+ br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
+
+INNER_LOOP_BREAK:
+ %tmp59 = extractelement <4 x i32> %tmp14, i64 2
+ call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
+ br label %END_ELSE_BLOCK
+
+bb62:
+ %load13 = icmp ult i32 %tmp16, 271
+ ;br i1 %load13, label %bb64, label %INCREMENT_I
+ ; branching directly to the return avoids the bug
+ br i1 %load13, label %RETURN, label %INCREMENT_I
+
+
+bb64:
+ call void asm sideeffect "s_nop 42", "~{memory}"() #0
+ br label %RETURN
+
+INCREMENT_I:
+ %inc.i = add i32 %i, 1
+ call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
+ br label %END_ELSE_BLOCK
+
+END_ELSE_BLOCK:
+ %i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
+ call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
+ %cmp.end.else.block = icmp eq i32 %i.final, -1
+ br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
+
+RETURN:
+ call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
+ store volatile <2 x float> %load1, <2 x float> addrspace(1)* undef, align 8
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { convergent nounwind readnone }
diff --git a/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
new file mode 100644
index 00000000000..9cddffdd179
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg %s | FileCheck %s
+
+; StructurizeCFG::orderNodes used an arbitrary and nonsensical sorting
+; function which broke the basic backedge identification algorithm. It
+; would use RPO order, but then do a weird partial sort by the loop
+; depth assuming blocks are sorted by loop. However a block can appear
+; in between blocks of a loop that is not part of a loop, breaking the
+; assumption of the sort.
+;
+; The collectInfos must be done in RPO order. The actual
+; structurization order I think is less important, but unless the loop
+; headers are identified in RPO order, it finds the wrong set of back
+; edges.
+
+define amdgpu_kernel void @loop_backedge_misidentified(i32 addrspace(1)* %arg0) #0 {
+; CHECK-LABEL: @loop_backedge_misidentified(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP:%.*]] = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
+; CHECK-NEXT: [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef
+; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG0:%.*]], i32 [[TID]]
+; CHECK-NEXT: [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK: LOOP.HEADER:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[FLOW4:%.*]] ]
+; CHECK-NEXT: call void asm sideeffect "s_nop 0x100b
+; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[I]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP13]], align 16
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 65535
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TMP17]], true
+; CHECK-NEXT: br i1 [[TMP0]], label [[BB62:%.*]], label [[FLOW:%.*]]
+; CHECK: Flow2:
+; CHECK-NEXT: br label [[FLOW]]
+; CHECK: bb18:
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP]], i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP19]], 16
+; CHECK-NEXT: [[TMP24:%.*]] = urem i32 [[TMP22]], 52
+; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52
+; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
+; CHECK: Flow3:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP7:%.*]], [[FLOW]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ]
+; CHECK-NEXT: br i1 [[TMP2]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW4]]
+; CHECK: INNER_LOOP:
+; CHECK-NEXT: [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ]
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: [[INNER_LOOP_J_INC]] = add nsw i32 [[INNER_LOOP_J]], 1
+; CHECK-NEXT: [[INNER_LOOP_CMP:%.*]] = icmp eq i32 [[INNER_LOOP_J]], 0
+; CHECK-NEXT: br i1 [[INNER_LOOP_CMP]], label [[INNER_LOOP_BREAK]], label [[INNER_LOOP]]
+; CHECK: INNER_LOOP_BREAK:
+; CHECK-NEXT: [[TMP59]] = extractelement <4 x i32> [[TMP14]], i64 2
+; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
+; CHECK-NEXT: br label [[FLOW3:%.*]]
+; CHECK: bb62:
+; CHECK-NEXT: [[LOAD13:%.*]] = icmp ult i32 [[TMP16]], 271
+; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[LOAD13]], true
+; CHECK-NEXT: br i1 [[TMP3]], label [[INCREMENT_I:%.*]], label [[FLOW1:%.*]]
+; CHECK: Flow1:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I]] ], [ undef, [[BB62]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ]
+; CHECK-NEXT: br i1 [[TMP6]], label [[BB64:%.*]], label [[FLOW2:%.*]]
+; CHECK: bb64:
+; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #0
+; CHECK-NEXT: br label [[FLOW2]]
+; CHECK: Flow:
+; CHECK-NEXT: [[TMP7]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP_HEADER]] ]
+; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ false, [[LOOP_HEADER]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW2]] ], [ true, [[LOOP_HEADER]] ]
+; CHECK-NEXT: br i1 [[TMP9]], label [[BB18]], label [[FLOW3]]
+; CHECK: INCREMENT_I:
+; CHECK-NEXT: [[INC_I]] = add i32 [[I]], 1
+; CHECK-NEXT: call void asm sideeffect "s_nop 0x1336
+; CHECK-NEXT: br label [[FLOW1]]
+; CHECK: END_ELSE_BLOCK:
+; CHECK-NEXT: [[I_FINAL:%.*]] = phi i32 [ [[TMP1]], [[FLOW3]] ]
+; CHECK-NEXT: call void asm sideeffect "s_nop 0x1337
+; CHECK-NEXT: [[CMP_END_ELSE_BLOCK:%.*]] = icmp eq i32 [[I_FINAL]], -1
+; CHECK-NEXT: br label [[FLOW4]]
+; CHECK: Flow4:
+; CHECK-NEXT: [[TMP10]] = phi i32 [ [[I_FINAL]], [[END_ELSE_BLOCK]] ], [ undef, [[FLOW3]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW3]] ]
+; CHECK-NEXT: br i1 [[TMP11]], label [[RETURN:%.*]], label [[LOOP_HEADER]]
+; CHECK: RETURN:
+; CHECK-NEXT: call void asm sideeffect "s_nop 0x99
+; CHECK-NEXT: store volatile <2 x float> [[LOAD1]], <2 x float> addrspace(1)* undef, align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %tmp = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
+ %load1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i32 %tid
+ %i.initial = load volatile i32, i32 addrspace(1)* %gep, align 4
+ br label %LOOP.HEADER
+
+LOOP.HEADER:
+ %i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
+ call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
+ %tmp12 = zext i32 %i to i64
+ %tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 %tmp12
+ %tmp14 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp13, align 16
+ %tmp15 = extractelement <4 x i32> %tmp14, i64 0
+ %tmp16 = and i32 %tmp15, 65535
+ %tmp17 = icmp eq i32 %tmp16, 1
+ br i1 %tmp17, label %bb18, label %bb62
+
+bb18:
+ %tmp19 = extractelement <2 x i32> %tmp, i64 0
+ %tmp22 = lshr i32 %tmp19, 16
+ %tmp24 = urem i32 %tmp22, 52
+ %tmp25 = mul nuw nsw i32 %tmp24, 52
+ br label %INNER_LOOP
+
+INNER_LOOP:
+ %inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
+ call void asm sideeffect "; inner loop body", ""() #0
+ %inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
+ %inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
+ br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
+
+INNER_LOOP_BREAK:
+ %tmp59 = extractelement <4 x i32> %tmp14, i64 2
+ call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
+ br label %END_ELSE_BLOCK
+
+bb62:
+ %load13 = icmp ult i32 %tmp16, 271
+ br i1 %load13, label %bb64, label %INCREMENT_I
+
+bb64:
+ call void asm sideeffect "s_nop 42", "~{memory}"() #0
+ br label %RETURN
+
+INCREMENT_I:
+ %inc.i = add i32 %i, 1
+ call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
+ br label %END_ELSE_BLOCK
+
+END_ELSE_BLOCK:
+ %i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
+ call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
+ %cmp.end.else.block = icmp eq i32 %i.final, -1
+ br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
+
+RETURN:
+ call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
+ store volatile <2 x float> %load1, <2 x float> addrspace(1)* undef, align 8
+ ret void
+}
+
+; The same function, except break to return block goes directly to the
+; return, which managed to hide the bug.
+; FIXME: Merge variant from backedge-id-bug-xfail
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { convergent nounwind readnone }
diff --git a/test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg b/test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg
new file mode 100644
index 00000000000..2a665f06be7
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/StructurizeCFG/nested-loop-order.ll b/test/Transforms/StructurizeCFG/nested-loop-order.ll
index 58634d0d37d..7b5bd5acb62 100644
--- a/test/Transforms/StructurizeCFG/nested-loop-order.ll
+++ b/test/Transforms/StructurizeCFG/nested-loop-order.ll
@@ -1,32 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
define void @main(float addrspace(1)* %out) {
-
-; CHECK: main_body:
-; CHECK: br label %LOOP.outer
+; CHECK-LABEL: @main(
+; CHECK-NEXT: main_body:
+; CHECK-NEXT: br label [[LOOP_OUTER:%.*]]
+; CHECK: LOOP.outer:
+; CHECK-NEXT: [[TEMP8_0_PH:%.*]] = phi float [ 0.000000e+00, [[MAIN_BODY:%.*]] ], [ [[TMP13:%.*]], [[FLOW3:%.*]] ]
+; CHECK-NEXT: [[TEMP4_0_PH:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP12:%.*]], [[FLOW3]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: LOOP:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP12]], [[FLOW:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi float [ undef, [[LOOP_OUTER]] ], [ [[TMP13]], [[FLOW]] ]
+; CHECK-NEXT: [[TEMP4_0:%.*]] = phi i32 [ [[TEMP4_0_PH]], [[LOOP_OUTER]] ], [ [[TMP15:%.*]], [[FLOW]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TEMP4_0]], 1
+; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[TMP22]], true
+; CHECK-NEXT: br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]]
+; CHECK: Flow2:
+; CHECK-NEXT: [[TMP3:%.*]] = phi float [ [[TEMP8_0_PH]], [[IF29:%.*]] ], [ [[TMP9:%.*]], [[FLOW1:%.*]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP20]], [[IF29]] ], [ undef, [[FLOW1]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP32:%.*]], [[IF29]] ], [ true, [[FLOW1]] ]
+; CHECK-NEXT: br label [[FLOW]]
+; CHECK: Flow3:
+; CHECK-NEXT: br i1 [[TMP16:%.*]], label [[ENDLOOP:%.*]], label [[LOOP_OUTER]]
+; CHECK: ENDLOOP:
+; CHECK-NEXT: [[TEMP8_1:%.*]] = phi float [ [[TMP14:%.*]], [[FLOW3]] ]
+; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP20]], 3
+; CHECK-NEXT: [[DOT45:%.*]] = select i1 [[TMP23]], float 0.000000e+00, float 1.000000e+00
+; CHECK-NEXT: store float [[DOT45]], float addrspace(1)* [[OUT:%.*]]
+; CHECK-NEXT: ret void
+; CHECK: ENDIF:
+; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP20]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TMP31]], true
+; CHECK-NEXT: br i1 [[TMP6]], label [[ENDIF28:%.*]], label [[FLOW1]]
+; CHECK: Flow1:
+; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP20]], [[ENDIF28]] ], [ [[TMP0]], [[ENDIF]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = phi float [ [[TMP35:%.*]], [[ENDIF28]] ], [ [[TMP1]], [[ENDIF]] ]
+; CHECK-NEXT: [[TMP9]] = phi float [ [[TMP35]], [[ENDIF28]] ], [ [[TEMP8_0_PH]], [[ENDIF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP36:%.*]], [[ENDIF28]] ], [ true, [[ENDIF]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[ENDIF28]] ], [ true, [[ENDIF]] ]
+; CHECK-NEXT: br i1 [[TMP11]], label [[IF29]], label [[FLOW2:%.*]]
+; CHECK: IF29:
+; CHECK-NEXT: [[TMP32]] = icmp sgt i32 [[TMP20]], 2
+; CHECK-NEXT: br label [[FLOW2]]
+; CHECK: Flow:
+; CHECK-NEXT: [[TMP12]] = phi i32 [ [[TMP7]], [[FLOW2]] ], [ [[TMP0]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP13]] = phi float [ [[TMP8]], [[FLOW2]] ], [ [[TMP1]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP14]] = phi float [ [[TMP3]], [[FLOW2]] ], [ [[TEMP8_0_PH]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP15]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP]] ]
+; CHECK-NEXT: [[TMP16]] = phi i1 [ [[TMP10]], [[FLOW2]] ], [ true, [[LOOP]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ true, [[LOOP]] ]
+; CHECK-NEXT: br i1 [[TMP17]], label [[FLOW3]], label [[LOOP]]
+; CHECK: ENDIF28:
+; CHECK-NEXT: [[TMP35]] = fadd float [[TEMP8_0_PH]], 1.000000e+00
+; CHECK-NEXT: [[TMP36]] = icmp sgt i32 [[TMP20]], 2
+; CHECK-NEXT: br label [[FLOW1]]
+;
main_body:
br label %LOOP.outer
-; CHECK: LOOP.outer:
-; CHECK: br label %LOOP
LOOP.outer: ; preds = %ENDIF28, %main_body
%temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ]
%temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ]
br label %LOOP
-; CHECK: LOOP:
-; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow
LOOP: ; preds = %IF29, %LOOP.outer
%temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ]
%tmp20 = add i32 %temp4.0, 1
%tmp22 = icmp sgt i32 %tmp20, 3
br i1 %tmp22, label %ENDLOOP, label %ENDIF
-; CHECK: Flow3
-; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer
-
-; CHECK: ENDLOOP:
-; CHECK: ret void
ENDLOOP: ; preds = %ENDIF28, %IF29, %LOOP
%temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ]
%tmp23 = icmp eq i32 %tmp20, 3
@@ -34,29 +78,14 @@ ENDLOOP: ; preds = %ENDIF28, %IF29, %LO
store float %.45, float addrspace(1)* %out
ret void
-; CHECK: ENDIF:
-; CHECK: br i1 %tmp31, label %IF29, label %Flow1
ENDIF: ; preds = %LOOP
%tmp31 = icmp sgt i32 %tmp20, 1
br i1 %tmp31, label %IF29, label %ENDIF28
-; CHECK: Flow:
-; CHECK: br i1 %{{[0-9]+}}, label %Flow2, label %LOOP
-
-; CHECK: IF29:
-; CHECK: br label %Flow1
IF29: ; preds = %ENDIF
%tmp32 = icmp sgt i32 %tmp20, 2
br i1 %tmp32, label %ENDLOOP, label %LOOP
-; CHECK: Flow1:
-; CHECK: br label %Flow
-
-; CHECK: Flow2:
-; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3
-
-; CHECK: ENDIF28:
-; CHECK: br label %Flow3
ENDIF28: ; preds = %ENDIF
%tmp35 = fadd float %temp8.0.ph, 1.0
%tmp36 = icmp sgt i32 %tmp20, 2