diff options
author | Hans Wennborg <hans@hanshq.net> | 2015-04-22 23:14:56 +0000 |
---|---|---|
committer | Hans Wennborg <hans@hanshq.net> | 2015-04-22 23:14:56 +0000 |
commit | 395f4f4b2a3db77755fb0a08669b268e74f458c1 (patch) | |
tree | 80084f7516fa2db016d651d19212bba17ae05423 /test/CodeGen | |
parent | f9c92b069a1a6f73ec41b7741b2ac6b7740c36a6 (diff) |
Switch lowering: extract jump tables and bit tests before building binary tree (PR22262)
This is a re-commit of r235101, which also fixes the problems with the previous patch:
- Switches with only a default case and non-fallthrough were handled incorrectly
- The previous patch tickled a bug in PowerPC Early-Return Creation which is fixed here.
> This is a major rewrite of the SelectionDAG switch lowering. The previous code
> would lower switches as a binary tre, discovering clusters of cases
> suitable for lowering by jump tables or bit tests as it went along. To increase
> the likelihood of finding jump tables, the binary tree pivot was selected to
> maximize case density on both sides of the pivot.
>
> By not selecting the pivot in the middle, the binary trees would not always
> be balanced, leading to performance problems in the generated code.
>
> This patch rewrites the lowering to search for clusters of cases
> suitable for jump tables or bit tests first, and then builds the binary
> tree around those clusters. This way, the binary tree will always be balanced.
>
> This has the added benefit of decoupling the different aspects of the lowering:
> tree building and jump table or bit tests finding are now easier to tweak
> separately.
>
> For example, this will enable us to balance the tree based on profile info
> in the future.
>
> The algorithm for finding jump tables is quadratic, whereas the previous algorithm
> was O(n log n) for common cases, and quadratic only in the worst-case. This
> doesn't seem to be major problem in practice, e.g. compiling a file consisting
> of a 10k-case switch was only 30% slower, and such large switches should be rare
> in practice. Compiling e.g. gcc.c showed no compile-time difference. If this
> does turn out to be a problem, we could limit the search space of the algorithm.
>
> This commit also disables all optimizations during switch lowering in -O0.
>
> Differential Revision: http://reviews.llvm.org/D8649
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235560 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/ARM/ifcvt3.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/ARM/struct-byval-frame-index.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Generic/MachineBranchProb.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/early-ret.ll | 35 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/mcm-5.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/mcm-obj.ll | 97 | ||||
-rw-r--r-- | test/CodeGen/X86/pic_jumptable.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/switch-bt.ll | 12 | ||||
-rw-r--r-- | test/CodeGen/X86/switch.ll | 306 |
9 files changed, 409 insertions, 59 deletions
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll index 5da63dc5f02..e53d989ad52 100644 --- a/test/CodeGen/ARM/ifcvt3.ll +++ b/test/CodeGen/ARM/ifcvt3.ll @@ -4,8 +4,8 @@ define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: t1: -; CHECK: cmp r2, #1 -; CHECK: cmpne r2, #7 +; CHECK: cmp r2, #7 +; CHECK: cmpne r2, #1 switch i32 %c, label %cond_next [ i32 1, label %cond_true i32 7, label %cond_true diff --git a/test/CodeGen/ARM/struct-byval-frame-index.ll b/test/CodeGen/ARM/struct-byval-frame-index.ll index bca797d6dce..52f70fe1e0f 100644 --- a/test/CodeGen/ARM/struct-byval-frame-index.ll +++ b/test/CodeGen/ARM/struct-byval-frame-index.ll @@ -194,7 +194,7 @@ lor.lhs.false459: ; preds = %if.end454 %18 = load i32, i32* %mb_type, align 4 switch i32 %18, label %for.inc503 [ i32 9, label %if.then475 - i32 10, label %if.then475 + i32 11, label %if.then475 i32 13, label %if.then475 i32 14, label %if.then475 ] diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll index 83277c98989..f10bd395abe 100644 --- a/test/CodeGen/Generic/MachineBranchProb.ll +++ b/test/CodeGen/Generic/MachineBranchProb.ll @@ -17,9 +17,9 @@ entry: ; CHECK: BB#0: derived from LLVM BB %entry ; CHECK: Successors according to CFG: BB#2(64) BB#4(14) ; CHECK: BB#4: derived from LLVM BB %entry -; CHECK: Successors according to CFG: BB#1(10) BB#5(4) +; CHECK: Successors according to CFG: BB#1(4) BB#5(10) ; CHECK: BB#5: derived from LLVM BB %entry -; CHECK: Successors according to CFG: BB#1(4) BB#3(7) +; CHECK: Successors according to CFG: BB#1(10) BB#3(7) sw.bb: br label %return diff --git a/test/CodeGen/PowerPC/early-ret.ll b/test/CodeGen/PowerPC/early-ret.ll index 7d3e225a1e2..52cf464b9fd 100644 --- a/test/CodeGen/PowerPC/early-ret.ll +++ b/test/CodeGen/PowerPC/early-ret.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -45,4 +45,37 @@ if.end3: ; preds = %if.then, %if.then2, ; CHECK: blr } + +@.str0 = private unnamed_addr constant [2 x i8] c"a\00" +@.str1 = private unnamed_addr constant [2 x i8] c"b\00" +@.str2 = private unnamed_addr constant [2 x i8] c"c\00" +@.str3 = private unnamed_addr constant [2 x i8] c"d\00" +@.str4 = private unnamed_addr constant [2 x i8] c"e\00" +define i8* @dont_assert(i32 %x) { +; LLVM would assert due to moving an early return into the jump table block and +; removing one of its predecessors despite that block ending with an indirect +; branch. +entry: + switch i32 %x, label %sw.epilog [ + i32 1, label %return + i32 2, label %sw.bb1 + i32 3, label %sw.bb2 + i32 4, label %sw.bb3 + i32 255, label %sw.bb4 + ] +sw.bb1: br label %return +sw.bb2: br label %return +sw.bb3: br label %return +sw.bb4: br label %return +sw.epilog: br label %return +return: + %retval.0 = phi i8* [ null, %sw.epilog ], + [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str4, i64 0, i64 0), %sw.bb4 ], + [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ], + [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ], + [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ], + [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str0, i64 0, i64 0), %entry ] + ret i8* %retval.0 +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/PowerPC/mcm-5.ll b/test/CodeGen/PowerPC/mcm-5.ll index 0c258459c91..19adbe5b7d9 100644 --- a/test/CodeGen/PowerPC/mcm-5.ll +++ b/test/CodeGen/PowerPC/mcm-5.ll @@ -1,5 +1,5 @@ -; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s -; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s +; RUN: llc -mcpu=pwr7 -code-model=medium <%s | FileCheck %s +; RUN: llc -mcpu=pwr7 -code-model=large <%s | FileCheck %s ; Test correct code generation for medium and large code model ; for loading the address of a jump table from the TOC. diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll index 46295cf3187..1ececf84926 100644 --- a/test/CodeGen/PowerPC/mcm-obj.ll +++ b/test/CodeGen/PowerPC/mcm-obj.ll @@ -3,6 +3,12 @@ ; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \ ; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s +; Run jump table test separately since jump tables aren't generated at -O0. +; RUN: llc -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \ +; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM-JT %s +; RUN: llc -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \ +; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE-JT %s + ; FIXME: When asm-parse is available, could make this an assembly test. target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" @@ -92,6 +98,46 @@ entry: ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]] ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]] +@ti = common global i32 0, align 4 + +define signext i32 @test_tentative() nounwind { +entry: + %0 = load i32, i32* @ti, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @ti, align 4 + ret i32 %0 +} + +; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for +; accessing tentatively declared variable ti. +; +; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]] +; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]] +; +; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]] +; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]] + +define i8* @test_fnaddr() nounwind { +entry: + %func = alloca i32 (i32)*, align 8 + store i32 (i32)* @foo, i32 (i32)** %func, align 8 + %0 = load i32 (i32)*, i32 (i32)** %func, align 8 + %1 = bitcast i32 (i32)* %0 to i8* + ret i8* %1 +} + +declare signext i32 @foo(i32 signext) + +; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for +; accessing function address foo. +; +; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]] +; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]] +; +; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]] +; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]] + + define signext i32 @test_jump_table(i32 signext %i) nounwind { entry: %i.addr = alloca i32, align 4 @@ -139,47 +185,12 @@ sw.epilog: ; preds = %sw.bb3, %sw.default ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for ; accessing a jump table address. ; -; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]] -; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]] -; -; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]] -; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]] - -@ti = common global i32 0, align 4 - -define signext i32 @test_tentative() nounwind { -entry: - %0 = load i32, i32* @ti, align 4 - %inc = add nsw i32 %0, 1 - store i32 %inc, i32* @ti, align 4 - ret i32 %0 -} - -; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for -; accessing tentatively declared variable ti. -; -; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]] -; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]] -; -; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]] -; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]] - -define i8* @test_fnaddr() nounwind { -entry: - %func = alloca i32 (i32)*, align 8 - store i32 (i32)* @foo, i32 (i32)** %func, align 8 - %0 = load i32 (i32)*, i32 (i32)** %func, align 8 - %1 = bitcast i32 (i32)* %0 to i8* - ret i8* %1 -} - -declare signext i32 @foo(i32 signext) - -; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for -; accessing function address foo. -; -; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]] -; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]] +; MEDIUM-JT: Relocations [ +; MEDIUM-JT: Section ({{.*}}) .rela.text { +; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]] +; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]] ; -; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]] -; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]] +; LARGE-JT: Relocations [ +; LARGE-JT: Section ({{.*}}) .rela.text { +; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]] +; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]] diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll index dd0a8ac0431..8c1992a24ec 100644 --- a/test/CodeGen/X86/pic_jumptable.ll +++ b/test/CodeGen/X86/pic_jumptable.ll @@ -55,13 +55,15 @@ entry: ] bb: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry + call void @_Z3bari( i32 0 ) br label %bb1 bb1: ; preds = %bb, %entry + call void @_Z3bari( i32 1 ) br label %bb2 bb2: ; preds = %bb1, %entry - call void @_Z3bari( i32 1 ) + call void @_Z3bari( i32 2 ) br label %bb11 bb3: ; preds = %entry diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll index 113782169b0..2cf3aafe547 100644 --- a/test/CodeGen/X86/switch-bt.ll +++ b/test/CodeGen/X86/switch-bt.ll @@ -140,19 +140,17 @@ sw.epilog: ; The balanced binary switch here would start with a comparison against 39, but ; it is currently starting with 29 because of the density-sum heuristic. -; CHECK: cmpl $29 +; CHECK: cmpl $39 ; CHECK: jg ; CHECK: cmpl $10 -; CHECK: jne -; CHECK: cmpl $49 -; CHECK: jg -; CHECK: cmpl $30 -; CHECK: jne +; CHECK: je ; CHECK: cmpl $20 ; CHECK: jne +; CHECK: cmpl $40 +; CHECK: je ; CHECK: cmpl $50 ; CHECK: jne -; CHECK: cmpl $40 +; CHECK: cmpl $30 ; CHECK: jne ; CHECK: cmpl $60 ; CHECK: jne diff --git a/test/CodeGen/X86/switch.ll b/test/CodeGen/X86/switch.ll new file mode 100644 index 00000000000..de9f38fc874 --- /dev/null +++ b/test/CodeGen/X86/switch.ll @@ -0,0 +1,306 @@ +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 | FileCheck --check-prefix=NOOPT %s + +declare void @g(i32) + +define void @basic(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 3, label %bb0 + i32 1, label %bb1 + i32 4, label %bb1 + i32 5, label %bb0 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +return: ret void + +; Should be lowered as straight compares in -O0 mode. +; NOOPT-LABEL: basic +; NOOPT: subl $3, %eax +; NOOPT: je +; NOOPT: subl $1, %eax +; NOOPT: je +; NOOPT: subl $4, %eax +; NOOPT: je +; NOOPT: subl $5, %eax +; NOOPT: je + +; Jump table otherwise. +; CHECK-LABEL: basic +; CHECK: decl +; CHECK: cmpl $4 +; CHECK: ja +; CHECK: jmpq *.LJTI +} + + +define void @simple_ranges(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 0, label %bb0 + i32 1, label %bb0 + i32 2, label %bb0 + i32 3, label %bb0 + i32 100, label %bb1 + i32 101, label %bb1 + i32 102, label %bb1 + i32 103, label %bb1 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +return: ret void + +; Should be lowered to two range checks. +; CHECK-LABEL: simple_ranges +; CHECK: leal -100 +; CHECK: cmpl $4 +; CHECK: jae +; CHECK: cmpl $3 +; CHECK: ja +} + + +define void @jt_is_better(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 0, label %bb0 + i32 2, label %bb0 + i32 4, label %bb0 + i32 1, label %bb1 + i32 3, label %bb1 + i32 5, label %bb1 + + i32 6, label %bb2 + i32 7, label %bb3 + i32 8, label %bb4 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +bb3: tail call void @g(i32 3) br label %return +bb4: tail call void @g(i32 4) br label %return +return: ret void + +; Cases 0-5 could be lowered with two bit tests, +; but with 6-8, the whole switch is suitable for a jump table. +; CHECK-LABEL: jt_is_better +; CHECK: cmpl $8 +; CHECK: jbe +; CHECK: jmpq *.LJTI +} + + +define void @bt_is_better(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 0, label %bb0 + i32 3, label %bb0 + i32 6, label %bb0 + i32 1, label %bb1 + i32 4, label %bb1 + i32 7, label %bb1 + i32 2, label %bb2 + i32 5, label %bb2 + i32 8, label %bb2 + + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +return: ret void + +; This could be lowered as a jump table, but bit tests is more efficient. +; CHECK-LABEL: bt_is_better +; 73 = 2^0 + 2^3 + 2^6 +; CHECK: movl $73 +; CHECK: btl +; 146 = 2^1 + 2^4 + 2^7 +; CHECK: movl $146 +; CHECK: btl +; 292 = 2^2 + 2^5 + 2^8 +; CHECK: movl $292 +; CHECK: btl +} + + +define void @optimal_pivot1(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 100, label %bb0 + i32 200, label %bb1 + i32 300, label %bb0 + i32 400, label %bb1 + i32 500, label %bb0 + i32 600, label %bb1 + + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +return: ret void + +; Should pivot around 400 for two subtrees of equal size. +; CHECK-LABEL: optimal_pivot1 +; CHECK-NOT: cmpl +; CHECK: cmpl $399 +} + + +define void @optimal_pivot2(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 100, label %bb0 i32 101, label %bb1 i32 102, label %bb2 i32 103, label %bb3 + i32 200, label %bb0 i32 201, label %bb1 i32 202, label %bb2 i32 203, label %bb3 + i32 300, label %bb0 i32 301, label %bb1 i32 302, label %bb2 i32 303, label %bb3 + i32 400, label %bb0 i32 401, label %bb1 i32 402, label %bb2 i32 403, label %bb3 + + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +bb3: tail call void @g(i32 3) br label %return +return: ret void + +; Should pivot around 300 for two subtrees with two jump tables each. +; CHECK-LABEL: optimal_pivot2 +; CHECK-NOT: cmpl +; CHECK: cmpl $299 +; CHECK: jmpq *.LJTI +; CHECK: jmpq *.LJTI +; CHECK: jmpq *.LJTI +; CHECK: jmpq *.LJTI +} + + +define void @optimal_jump_table1(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 0, label %bb0 + i32 5, label %bb1 + i32 6, label %bb2 + i32 12, label %bb3 + i32 13, label %bb4 + i32 15, label %bb5 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +bb3: tail call void @g(i32 3) br label %return +bb4: tail call void @g(i32 4) br label %return +bb5: tail call void @g(i32 5) br label %return +return: ret void + +; Splitting in the largest gap (between 6 and 12) would yield suboptimal result. +; Expecting a jump table from 5 to 15. +; CHECK-LABEL: optimal_jump_table1 +; CHECK: leal -5 +; CHECK: cmpl $10 +; CHECK: jmpq *.LJTI +} + + +define void @optimal_jump_table2(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 0, label %bb0 + i32 1, label %bb1 + i32 2, label %bb2 + i32 9, label %bb3 + i32 14, label %bb4 + i32 15, label %bb5 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +bb3: tail call void @g(i32 3) br label %return +bb4: tail call void @g(i32 4) br label %return +bb5: tail call void @g(i32 5) br label %return +return: ret void + +; Partitioning the cases to the minimum number of dense sets is not good enough. +; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former +; should be preferred. Expecting a table from 0-9. +; CHECK-LABEL: optimal_jump_table2 +; CHECK: cmpl $9 +; CHECK: jmpq *.LJTI +} + + +define void @optimal_jump_table3(i32 %x) { +entry: + switch i32 %x, label %return [ + i32 1, label %bb0 + i32 2, label %bb1 + i32 3, label %bb2 + i32 10, label %bb3 + i32 13, label %bb0 + i32 14, label %bb1 + i32 15, label %bb2 + i32 20, label %bb3 + i32 25, label %bb4 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +bb3: tail call void @g(i32 3) br label %return +bb4: tail call void @g(i32 4) br label %return +return: ret void + +; Splitting to maximize left-right density sum and gap size would split this +; between 3 and 10, and then between 20 and 25. It's better to build a table +; from 1-20. +; CHECK-LABEL: optimal_jump_table3 +; CHECK: leal -1 +; CHECK: cmpl $19 +; CHECK: jmpq *.LJTI +} + +%struct.S = type { %struct.S*, i32 } +define void @phi_node_trouble(%struct.S* %s) { +entry: + br label %header +header: + %ptr = phi %struct.S* [ %s, %entry ], [ %next, %loop ] + %bool = icmp eq %struct.S* %ptr, null + br i1 %bool, label %exit, label %loop +loop: + %nextptr = getelementptr inbounds %struct.S, %struct.S* %ptr, i64 0, i32 0 + %next = load %struct.S*, %struct.S** %nextptr + %xptr = getelementptr inbounds %struct.S, %struct.S* %next, i64 0, i32 1 + %x = load i32, i32* %xptr + switch i32 %x, label %exit [ + i32 4, label %header + i32 36, label %exit2 + i32 69, label %exit2 + i32 25, label %exit2 + ] +exit: + ret void +exit2: + ret void + +; This will be lowered to a comparison with 4 and then bit tests. Make sure +; that the phi node in %header gets a value from the comparison block. +; CHECK-LABEL: phi_node_trouble +; CHECK: movq (%[[REG1:[a-z]+]]), %[[REG1]] +; CHECK: movl 8(%[[REG1]]), %[[REG2:[a-z]+]] +; CHECK: cmpl $4, %[[REG2]] +} + + +define void @default_only(i32 %x) { +entry: + br label %sw +return: + ret void +sw: + switch i32 %x, label %return [ + ] + +; Branch directly to the default. +; (In optimized builds the switch is removed earlier.) +; NOOPT-LABEL: default_only +; NOOPT: .[[L:[A-Z0-9_]+]]: +; NOOPT-NEXT: retq +; NOOPT: jmp .[[L]] +} |