summaryrefslogtreecommitdiff
path: root/test/CodeGen/PowerPC
diff options
context:
space:
mode:
authorHans Wennborg <hans@hanshq.net>2015-04-22 23:14:56 +0000
committerHans Wennborg <hans@hanshq.net>2015-04-22 23:14:56 +0000
commit395f4f4b2a3db77755fb0a08669b268e74f458c1 (patch)
tree80084f7516fa2db016d651d19212bba17ae05423 /test/CodeGen/PowerPC
parentf9c92b069a1a6f73ec41b7741b2ac6b7740c36a6 (diff)
Switch lowering: extract jump tables and bit tests before building binary tree (PR22262)
This is a re-commit of r235101, which also fixes the problems with the previous patch: - Switches with only a default case and non-fallthrough were handled incorrectly - The previous patch tickled a bug in PowerPC Early-Return Creation which is fixed here. > This is a major rewrite of the SelectionDAG switch lowering. The previous code > would lower switches as a binary tre, discovering clusters of cases > suitable for lowering by jump tables or bit tests as it went along. To increase > the likelihood of finding jump tables, the binary tree pivot was selected to > maximize case density on both sides of the pivot. > > By not selecting the pivot in the middle, the binary trees would not always > be balanced, leading to performance problems in the generated code. > > This patch rewrites the lowering to search for clusters of cases > suitable for jump tables or bit tests first, and then builds the binary > tree around those clusters. This way, the binary tree will always be balanced. > > This has the added benefit of decoupling the different aspects of the lowering: > tree building and jump table or bit tests finding are now easier to tweak > separately. > > For example, this will enable us to balance the tree based on profile info > in the future. > > The algorithm for finding jump tables is quadratic, whereas the previous algorithm > was O(n log n) for common cases, and quadratic only in the worst-case. This > doesn't seem to be major problem in practice, e.g. compiling a file consisting > of a 10k-case switch was only 30% slower, and such large switches should be rare > in practice. Compiling e.g. gcc.c showed no compile-time difference. If this > does turn out to be a problem, we could limit the search space of the algorithm. > > This commit also disables all optimizations during switch lowering in -O0. > > Differential Revision: http://reviews.llvm.org/D8649 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235560 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/PowerPC')
-rw-r--r--test/CodeGen/PowerPC/early-ret.ll35
-rw-r--r--test/CodeGen/PowerPC/mcm-5.ll4
-rw-r--r--test/CodeGen/PowerPC/mcm-obj.ll97
3 files changed, 90 insertions, 46 deletions
diff --git a/test/CodeGen/PowerPC/early-ret.ll b/test/CodeGen/PowerPC/early-ret.ll
index 7d3e225a1e2..52cf464b9fd 100644
--- a/test/CodeGen/PowerPC/early-ret.ll
+++ b/test/CodeGen/PowerPC/early-ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -45,4 +45,37 @@ if.end3: ; preds = %if.then, %if.then2,
; CHECK: blr
}
+
+@.str0 = private unnamed_addr constant [2 x i8] c"a\00"
+@.str1 = private unnamed_addr constant [2 x i8] c"b\00"
+@.str2 = private unnamed_addr constant [2 x i8] c"c\00"
+@.str3 = private unnamed_addr constant [2 x i8] c"d\00"
+@.str4 = private unnamed_addr constant [2 x i8] c"e\00"
+define i8* @dont_assert(i32 %x) {
+; LLVM would assert due to moving an early return into the jump table block and
+; removing one of its predecessors despite that block ending with an indirect
+; branch.
+entry:
+ switch i32 %x, label %sw.epilog [
+ i32 1, label %return
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb2
+ i32 4, label %sw.bb3
+ i32 255, label %sw.bb4
+ ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.epilog: br label %return
+return:
+ %retval.0 = phi i8* [ null, %sw.epilog ],
+ [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str4, i64 0, i64 0), %sw.bb4 ],
+ [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
+ [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
+ [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
+ [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str0, i64 0, i64 0), %entry ]
+ ret i8* %retval.0
+}
+
attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/mcm-5.ll b/test/CodeGen/PowerPC/mcm-5.ll
index 0c258459c91..19adbe5b7d9 100644
--- a/test/CodeGen/PowerPC/mcm-5.ll
+++ b/test/CodeGen/PowerPC/mcm-5.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
-; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -code-model=large <%s | FileCheck %s
; Test correct code generation for medium and large code model
; for loading the address of a jump table from the TOC.
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
index 46295cf3187..1ececf84926 100644
--- a/test/CodeGen/PowerPC/mcm-obj.ll
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -3,6 +3,12 @@
; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
+; Run jump table test separately since jump tables aren't generated at -O0.
+; RUN: llc -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \
+; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM-JT %s
+; RUN: llc -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
+; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE-JT %s
+
; FIXME: When asm-parse is available, could make this an assembly test.
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
@@ -92,6 +98,46 @@ entry:
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]]
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+ %0 = load i32, i32* @ti, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* @ti, align 4
+ ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing tentatively declared variable ti.
+;
+; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
+; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
+;
+; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
+; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
+
+define i8* @test_fnaddr() nounwind {
+entry:
+ %func = alloca i32 (i32)*, align 8
+ store i32 (i32)* @foo, i32 (i32)** %func, align 8
+ %0 = load i32 (i32)*, i32 (i32)** %func, align 8
+ %1 = bitcast i32 (i32)* %0 to i8*
+ ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function address foo.
+;
+; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
+; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+;
+; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
+; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+
+
define signext i32 @test_jump_table(i32 signext %i) nounwind {
entry:
%i.addr = alloca i32, align 4
@@ -139,47 +185,12 @@ sw.epilog: ; preds = %sw.bb3, %sw.default
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
; accessing a jump table address.
;
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
-;
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
-
-@ti = common global i32 0, align 4
-
-define signext i32 @test_tentative() nounwind {
-entry:
- %0 = load i32, i32* @ti, align 4
- %inc = add nsw i32 %0, 1
- store i32 %inc, i32* @ti, align 4
- ret i32 %0
-}
-
-; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
-; accessing tentatively declared variable ti.
-;
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
-;
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
-
-define i8* @test_fnaddr() nounwind {
-entry:
- %func = alloca i32 (i32)*, align 8
- store i32 (i32)* @foo, i32 (i32)** %func, align 8
- %0 = load i32 (i32)*, i32 (i32)** %func, align 8
- %1 = bitcast i32 (i32)* %0 to i8*
- ret i8* %1
-}
-
-declare signext i32 @foo(i32 signext)
-
-; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
-; accessing function address foo.
-;
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+; MEDIUM-JT: Relocations [
+; MEDIUM-JT: Section ({{.*}}) .rela.text {
+; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
+; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
;
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+; LARGE-JT: Relocations [
+; LARGE-JT: Section ({{.*}}) .rela.text {
+; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
+; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]