Switch lowering: extract jump tables and bit tests before building binary tree (PR22262)

This is a re-commit of r235101, which also fixes the problems with the previous patch: - Switches with only a default case and non-fallthrough were handled incorrectly - The previous patch tickled a bug in PowerPC Early-Return Creation which is fixed here. > This is a major rewrite of the SelectionDAG switch lowering. The previous code > would lower switches as a binary tre, discovering clusters of cases > suitable for lowering by jump tables or bit tests as it went along. To increase > the likelihood of finding jump tables, the binary tree pivot was selected to > maximize case density on both sides of the pivot. > > By not selecting the pivot in the middle, the binary trees would not always > be balanced, leading to performance problems in the generated code. > > This patch rewrites the lowering to search for clusters of cases > suitable for jump tables or bit tests first, and then builds the binary > tree around those clusters. This way, the binary tree will always be balanced. > > This has the added benefit of decoupling the different aspects of the lowering: > tree building and jump table or bit tests finding are now easier to tweak > separately. > > For example, this will enable us to balance the tree based on profile info > in the future. > > The algorithm for finding jump tables is quadratic, whereas the previous algorithm > was O(n log n) for common cases, and quadratic only in the worst-case. This > doesn't seem to be major problem in practice, e.g. compiling a file consisting > of a 10k-case switch was only 30% slower, and such large switches should be rare > in practice. Compiling e.g. gcc.c showed no compile-time difference. If this > does turn out to be a problem, we could limit the search space of the algorithm. > > This commit also disables all optimizations during switch lowering in -O0. > > Differential Revision: http://reviews.llvm.org/D8649 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235560 91177308-0d34-0410-b5e6-96231b3b80d8
author: Hans Wennborg <hans@hanshq.net> 2015-04-22 23:14:56 +0000
committer: Hans Wennborg <hans@hanshq.net> 2015-04-22 23:14:56 +0000
commit: 395f4f4b2a3db77755fb0a08669b268e74f458c1 (patch)
tree: 80084f7516fa2db016d651d19212bba17ae05423 /test/CodeGen
parent: f9c92b069a1a6f73ec41b7741b2ac6b7740c36a6 (diff)
9 files changed, 409 insertions, 59 deletions
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 5da63dc5f02..e53d989ad52 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -4,8 +4,8 @@
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: t1:
-; CHECK: cmp r2, #1
-; CHECK: cmpne r2, #7
+; CHECK: cmp r2, #7
+; CHECK: cmpne r2, #1
 	switch i32 %c, label %cond_next [
 		 i32 1, label %cond_true
 		 i32 7, label %cond_true
diff --git a/test/CodeGen/ARM/struct-byval-frame-index.ll b/test/CodeGen/ARM/struct-byval-frame-index.ll
index bca797d6dce..52f70fe1e0f 100644
--- a/test/CodeGen/ARM/struct-byval-frame-index.ll
+++ b/test/CodeGen/ARM/struct-byval-frame-index.ll
@@ -194,7 +194,7 @@ lor.lhs.false459:                                 ; preds = %if.end454
   %18 = load i32, i32* %mb_type, align 4
   switch i32 %18, label %for.inc503 [
     i32 9, label %if.then475
-    i32 10, label %if.then475
+    i32 11, label %if.then475
     i32 13, label %if.then475
     i32 14, label %if.then475
   ]
diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll
index 83277c98989..f10bd395abe 100644
--- a/test/CodeGen/Generic/MachineBranchProb.ll
+++ b/test/CodeGen/Generic/MachineBranchProb.ll
@@ -17,9 +17,9 @@ entry:
 ; CHECK: BB#0: derived from LLVM BB %entry
 ; CHECK: Successors according to CFG: BB#2(64) BB#4(14)
 ; CHECK: BB#4: derived from LLVM BB %entry
-; CHECK: Successors according to CFG: BB#1(10) BB#5(4)
+; CHECK: Successors according to CFG: BB#1(4) BB#5(10)
 ; CHECK: BB#5: derived from LLVM BB %entry
-; CHECK: Successors according to CFG: BB#1(4) BB#3(7)
+; CHECK: Successors according to CFG: BB#1(10) BB#3(7)
 
 sw.bb:
   br label %return
diff --git a/test/CodeGen/PowerPC/early-ret.ll b/test/CodeGen/PowerPC/early-ret.ll
index 7d3e225a1e2..52cf464b9fd 100644
--- a/test/CodeGen/PowerPC/early-ret.ll
+++ b/test/CodeGen/PowerPC/early-ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -45,4 +45,37 @@ if.end3:                                          ; preds = %if.then, %if.then2,
 ; CHECK: blr
 }
 
+
+@.str0 = private unnamed_addr constant [2 x i8] c"a\00"
+@.str1 = private unnamed_addr constant [2 x i8] c"b\00"
+@.str2 = private unnamed_addr constant [2 x i8] c"c\00"
+@.str3 = private unnamed_addr constant [2 x i8] c"d\00"
+@.str4 = private unnamed_addr constant [2 x i8] c"e\00"
+define i8* @dont_assert(i32 %x) {
+; LLVM would assert due to moving an early return into the jump table block and
+; removing one of its predecessors despite that block ending with an indirect
+; branch.
+entry:
+  switch i32 %x, label %sw.epilog [
+    i32 1, label %return
+    i32 2, label %sw.bb1
+    i32 3, label %sw.bb2
+    i32 4, label %sw.bb3
+    i32 255, label %sw.bb4
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.epilog: br label %return
+return:
+  %retval.0 = phi i8* [ null, %sw.epilog ],
+                      [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str4, i64 0, i64 0), %sw.bb4 ],
+                      [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
+                      [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
+                      [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
+                      [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str0, i64 0, i64 0), %entry ]
+  ret i8* %retval.0
+}
+
 attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/mcm-5.ll b/test/CodeGen/PowerPC/mcm-5.ll
index 0c258459c91..19adbe5b7d9 100644
--- a/test/CodeGen/PowerPC/mcm-5.ll
+++ b/test/CodeGen/PowerPC/mcm-5.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
-; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -code-model=large <%s | FileCheck %s
 
 ; Test correct code generation for medium and large code model
 ; for loading the address of a jump table from the TOC.
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
index 46295cf3187..1ececf84926 100644
--- a/test/CodeGen/PowerPC/mcm-obj.ll
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -3,6 +3,12 @@
 ; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
 ; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
 
+; Run jump table test separately since jump tables aren't generated at -O0.
+; RUN: llc -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \
+; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM-JT %s
+; RUN: llc -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
+; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE-JT %s
+
 ; FIXME: When asm-parse is available, could make this an assembly test.
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
@@ -92,6 +98,46 @@ entry:
 ; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
 ; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]]
 
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+  %0 = load i32, i32* @ti, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ti, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing tentatively declared variable ti.
+;
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
+;
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
+
+define i8* @test_fnaddr() nounwind {
+entry:
+  %func = alloca i32 (i32)*, align 8
+  store i32 (i32)* @foo, i32 (i32)** %func, align 8
+  %0 = load i32 (i32)*, i32 (i32)** %func, align 8
+  %1 = bitcast i32 (i32)* %0 to i8*
+  ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function address foo.
+;
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+;
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+
+
 define signext i32 @test_jump_table(i32 signext %i) nounwind {
 entry:
   %i.addr = alloca i32, align 4
@@ -139,47 +185,12 @@ sw.epilog:                                        ; preds = %sw.bb3, %sw.default
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing a jump table address.
 ;
-; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
-; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
-;
-; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
-; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
-
-@ti = common global i32 0, align 4
-
-define signext i32 @test_tentative() nounwind {
-entry:
-  %0 = load i32, i32* @ti, align 4
-  %inc = add nsw i32 %0, 1
-  store i32 %inc, i32* @ti, align 4
-  ret i32 %0
-}
-
-; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
-; accessing tentatively declared variable ti.
-;
-; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
-; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
-;
-; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
-; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
-
-define i8* @test_fnaddr() nounwind {
-entry:
-  %func = alloca i32 (i32)*, align 8
-  store i32 (i32)* @foo, i32 (i32)** %func, align 8
-  %0 = load i32 (i32)*, i32 (i32)** %func, align 8
-  %1 = bitcast i32 (i32)* %0 to i8*
-  ret i8* %1
-}
-
-declare signext i32 @foo(i32 signext)
-
-; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
-; accessing function address foo.
-;
-; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
-; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+; MEDIUM-JT:      Relocations [
+; MEDIUM-JT:        Section ({{.*}}) .rela.text {
+; MEDIUM-JT-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
+; MEDIUM-JT-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
 ;
-; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
-; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+; LARGE-JT:       Relocations [
+; LARGE-JT:         Section ({{.*}}) .rela.text {
+; LARGE-JT-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
+; LARGE-JT-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index dd0a8ac0431..8c1992a24ec 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -55,13 +55,15 @@ entry:
 	]
 
 bb:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	call void @_Z3bari( i32 0 )
 	br label %bb1
 
 bb1:		; preds = %bb, %entry
+	call void @_Z3bari( i32 1 )
 	br label %bb2
 
 bb2:		; preds = %bb1, %entry
-	call void @_Z3bari( i32 1 )
+	call void @_Z3bari( i32 2 )
 	br label %bb11
 
 bb3:		; preds = %entry
diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll
index 113782169b0..2cf3aafe547 100644
--- a/test/CodeGen/X86/switch-bt.ll
+++ b/test/CodeGen/X86/switch-bt.ll
@@ -140,19 +140,17 @@ sw.epilog:
 
 ; The balanced binary switch here would start with a comparison against 39, but
 ; it is currently starting with 29 because of the density-sum heuristic.
-; CHECK: cmpl $29
+; CHECK: cmpl $39
 ; CHECK: jg
 ; CHECK: cmpl $10
-; CHECK: jne
-; CHECK: cmpl $49
-; CHECK: jg
-; CHECK: cmpl $30
-; CHECK: jne
+; CHECK: je
 ; CHECK: cmpl $20
 ; CHECK: jne
+; CHECK: cmpl $40
+; CHECK: je
 ; CHECK: cmpl $50
 ; CHECK: jne
-; CHECK: cmpl $40
+; CHECK: cmpl $30
 ; CHECK: jne
 ; CHECK: cmpl $60
 ; CHECK: jne
diff --git a/test/CodeGen/X86/switch.ll b/test/CodeGen/X86/switch.ll
new file mode 100644
index 00000000000..de9f38fc874
--- /dev/null
+++ b/test/CodeGen/X86/switch.ll
@@ -0,0 +1,306 @@
+; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 | FileCheck --check-prefix=NOOPT %s
+
+declare void @g(i32)
+
+define void @basic(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 3, label %bb0
+    i32 1, label %bb1
+    i32 4, label %bb1
+    i32 5, label %bb0
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+return: ret void
+
+; Should be lowered as straight compares in -O0 mode.
+; NOOPT-LABEL: basic
+; NOOPT: subl $3, %eax
+; NOOPT: je
+; NOOPT: subl $1, %eax
+; NOOPT: je
+; NOOPT: subl $4, %eax
+; NOOPT: je
+; NOOPT: subl $5, %eax
+; NOOPT: je
+
+; Jump table otherwise.
+; CHECK-LABEL: basic
+; CHECK: decl
+; CHECK: cmpl $4
+; CHECK: ja
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @simple_ranges(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 0, label %bb0
+    i32 1, label %bb0
+    i32 2, label %bb0
+    i32 3, label %bb0
+    i32 100, label %bb1
+    i32 101, label %bb1
+    i32 102, label %bb1
+    i32 103, label %bb1
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+return: ret void
+
+; Should be lowered to two range checks.
+; CHECK-LABEL: simple_ranges
+; CHECK: leal -100
+; CHECK: cmpl $4
+; CHECK: jae
+; CHECK: cmpl $3
+; CHECK: ja
+}
+
+
+define void @jt_is_better(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 0, label %bb0
+    i32 2, label %bb0
+    i32 4, label %bb0
+    i32 1, label %bb1
+    i32 3, label %bb1
+    i32 5, label %bb1
+
+    i32 6, label %bb2
+    i32 7, label %bb3
+    i32 8, label %bb4
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+return: ret void
+
+; Cases 0-5 could be lowered with two bit tests,
+; but with 6-8, the whole switch is suitable for a jump table.
+; CHECK-LABEL: jt_is_better
+; CHECK: cmpl $8
+; CHECK: jbe
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @bt_is_better(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 0, label %bb0
+    i32 3, label %bb0
+    i32 6, label %bb0
+    i32 1, label %bb1
+    i32 4, label %bb1
+    i32 7, label %bb1
+    i32 2, label %bb2
+    i32 5, label %bb2
+    i32 8, label %bb2
+
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+return: ret void
+
+; This could be lowered as a jump table, but bit tests is more efficient.
+; CHECK-LABEL: bt_is_better
+; 73 = 2^0 + 2^3 + 2^6
+; CHECK: movl $73
+; CHECK: btl
+; 146 = 2^1 + 2^4 + 2^7
+; CHECK: movl $146
+; CHECK: btl
+; 292 = 2^2 + 2^5 + 2^8
+; CHECK: movl $292
+; CHECK: btl
+}
+
+
+define void @optimal_pivot1(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 100, label %bb0
+    i32 200, label %bb1
+    i32 300, label %bb0
+    i32 400, label %bb1
+    i32 500, label %bb0
+    i32 600, label %bb1
+
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+return: ret void
+
+; Should pivot around 400 for two subtrees of equal size.
+; CHECK-LABEL: optimal_pivot1
+; CHECK-NOT: cmpl
+; CHECK: cmpl $399
+}
+
+
+define void @optimal_pivot2(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 100, label %bb0   i32 101, label %bb1   i32 102, label %bb2   i32 103, label %bb3
+    i32 200, label %bb0   i32 201, label %bb1   i32 202, label %bb2   i32 203, label %bb3
+    i32 300, label %bb0   i32 301, label %bb1   i32 302, label %bb2   i32 303, label %bb3
+    i32 400, label %bb0   i32 401, label %bb1   i32 402, label %bb2   i32 403, label %bb3
+
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+return: ret void
+
+; Should pivot around 300 for two subtrees with two jump tables each.
+; CHECK-LABEL: optimal_pivot2
+; CHECK-NOT: cmpl
+; CHECK: cmpl $299
+; CHECK: jmpq *.LJTI
+; CHECK: jmpq *.LJTI
+; CHECK: jmpq *.LJTI
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @optimal_jump_table1(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 0,  label %bb0
+    i32 5,  label %bb1
+    i32 6,  label %bb2
+    i32 12, label %bb3
+    i32 13, label %bb4
+    i32 15, label %bb5
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+bb5: tail call void @g(i32 5) br label %return
+return: ret void
+
+; Splitting in the largest gap (between 6 and 12) would yield suboptimal result.
+; Expecting a jump table from 5 to 15.
+; CHECK-LABEL: optimal_jump_table1
+; CHECK: leal -5
+; CHECK: cmpl $10
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @optimal_jump_table2(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 0,  label %bb0
+    i32 1,  label %bb1
+    i32 2,  label %bb2
+    i32 9,  label %bb3
+    i32 14, label %bb4
+    i32 15, label %bb5
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+bb5: tail call void @g(i32 5) br label %return
+return: ret void
+
+; Partitioning the cases to the minimum number of dense sets is not good enough.
+; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former
+; should be preferred. Expecting a table from 0-9.
+; CHECK-LABEL: optimal_jump_table2
+; CHECK: cmpl $9
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @optimal_jump_table3(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 1,  label %bb0
+    i32 2,  label %bb1
+    i32 3,  label %bb2
+    i32 10, label %bb3
+    i32 13, label %bb0
+    i32 14, label %bb1
+    i32 15, label %bb2
+    i32 20, label %bb3
+    i32 25, label %bb4
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+return: ret void
+
+; Splitting to maximize left-right density sum and gap size would split this
+; between 3 and 10, and then between 20 and 25. It's better to build a table
+; from 1-20.
+; CHECK-LABEL: optimal_jump_table3
+; CHECK: leal -1
+; CHECK: cmpl $19
+; CHECK: jmpq *.LJTI
+}
+
+%struct.S = type { %struct.S*, i32 }
+define void @phi_node_trouble(%struct.S* %s) {
+entry:
+  br label %header
+header:
+  %ptr = phi %struct.S* [ %s, %entry ], [ %next, %loop ]
+  %bool = icmp eq %struct.S* %ptr, null
+  br i1 %bool, label %exit, label %loop
+loop:
+  %nextptr = getelementptr inbounds %struct.S, %struct.S* %ptr, i64 0, i32 0
+  %next = load %struct.S*, %struct.S** %nextptr
+  %xptr = getelementptr inbounds %struct.S, %struct.S* %next, i64 0, i32 1
+  %x = load i32, i32* %xptr
+  switch i32 %x, label %exit [
+    i32 4, label %header
+    i32 36, label %exit2
+    i32 69, label %exit2
+    i32 25, label %exit2
+  ]
+exit:
+  ret void
+exit2:
+  ret void
+
+; This will be lowered to a comparison with 4 and then bit tests. Make sure
+; that the phi node in %header gets a value from the comparison block.
+; CHECK-LABEL: phi_node_trouble
+; CHECK: movq (%[[REG1:[a-z]+]]), %[[REG1]]
+; CHECK: movl 8(%[[REG1]]), %[[REG2:[a-z]+]]
+; CHECK: cmpl $4, %[[REG2]]
+}
+
+
+define void @default_only(i32 %x) {
+entry:
+  br label %sw
+return:
+  ret void
+sw:
+  switch i32 %x, label %return [
+  ]
+
+; Branch directly to the default.
+; (In optimized builds the switch is removed earlier.)
+; NOOPT-LABEL: default_only
+; NOOPT: .[[L:[A-Z0-9_]+]]:
+; NOOPT-NEXT: retq
+; NOOPT: jmp .[[L]]
+}
author	Hans Wennborg <hans@hanshq.net>	2015-04-22 23:14:56 +0000
committer	Hans Wennborg <hans@hanshq.net>	2015-04-22 23:14:56 +0000
commit	395f4f4b2a3db77755fb0a08669b268e74f458c1 (patch)
tree	80084f7516fa2db016d651d19212bba17ae05423 /test/CodeGen
parent	f9c92b069a1a6f73ec41b7741b2ac6b7740c36a6 (diff)