diff options
author | Hans Wennborg <hans@hanshq.net> | 2015-04-22 23:14:56 +0000 |
---|---|---|
committer | Hans Wennborg <hans@hanshq.net> | 2015-04-22 23:14:56 +0000 |
commit | 395f4f4b2a3db77755fb0a08669b268e74f458c1 (patch) | |
tree | 80084f7516fa2db016d651d19212bba17ae05423 /test/CodeGen/PowerPC/early-ret.ll | |
parent | f9c92b069a1a6f73ec41b7741b2ac6b7740c36a6 (diff) |
Switch lowering: extract jump tables and bit tests before building binary tree (PR22262)
This is a re-commit of r235101, which also fixes the problems with the previous patch:
- Switches with only a default case and non-fallthrough were handled incorrectly
- The previous patch tickled a bug in PowerPC Early-Return Creation which is fixed here.
> This is a major rewrite of the SelectionDAG switch lowering. The previous code
> would lower switches as a binary tre, discovering clusters of cases
> suitable for lowering by jump tables or bit tests as it went along. To increase
> the likelihood of finding jump tables, the binary tree pivot was selected to
> maximize case density on both sides of the pivot.
>
> By not selecting the pivot in the middle, the binary trees would not always
> be balanced, leading to performance problems in the generated code.
>
> This patch rewrites the lowering to search for clusters of cases
> suitable for jump tables or bit tests first, and then builds the binary
> tree around those clusters. This way, the binary tree will always be balanced.
>
> This has the added benefit of decoupling the different aspects of the lowering:
> tree building and jump table or bit tests finding are now easier to tweak
> separately.
>
> For example, this will enable us to balance the tree based on profile info
> in the future.
>
> The algorithm for finding jump tables is quadratic, whereas the previous algorithm
> was O(n log n) for common cases, and quadratic only in the worst-case. This
> doesn't seem to be major problem in practice, e.g. compiling a file consisting
> of a 10k-case switch was only 30% slower, and such large switches should be rare
> in practice. Compiling e.g. gcc.c showed no compile-time difference. If this
> does turn out to be a problem, we could limit the search space of the algorithm.
>
> This commit also disables all optimizations during switch lowering in -O0.
>
> Differential Revision: http://reviews.llvm.org/D8649
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235560 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/PowerPC/early-ret.ll')
-rw-r--r-- | test/CodeGen/PowerPC/early-ret.ll | 35 |
1 files changed, 34 insertions, 1 deletions
diff --git a/test/CodeGen/PowerPC/early-ret.ll b/test/CodeGen/PowerPC/early-ret.ll index 7d3e225a1e2..52cf464b9fd 100644 --- a/test/CodeGen/PowerPC/early-ret.ll +++ b/test/CodeGen/PowerPC/early-ret.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -45,4 +45,37 @@ if.end3: ; preds = %if.then, %if.then2, ; CHECK: blr } + +@.str0 = private unnamed_addr constant [2 x i8] c"a\00" +@.str1 = private unnamed_addr constant [2 x i8] c"b\00" +@.str2 = private unnamed_addr constant [2 x i8] c"c\00" +@.str3 = private unnamed_addr constant [2 x i8] c"d\00" +@.str4 = private unnamed_addr constant [2 x i8] c"e\00" +define i8* @dont_assert(i32 %x) { +; LLVM would assert due to moving an early return into the jump table block and +; removing one of its predecessors despite that block ending with an indirect +; branch. +entry: + switch i32 %x, label %sw.epilog [ + i32 1, label %return + i32 2, label %sw.bb1 + i32 3, label %sw.bb2 + i32 4, label %sw.bb3 + i32 255, label %sw.bb4 + ] +sw.bb1: br label %return +sw.bb2: br label %return +sw.bb3: br label %return +sw.bb4: br label %return +sw.epilog: br label %return +return: + %retval.0 = phi i8* [ null, %sw.epilog ], + [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str4, i64 0, i64 0), %sw.bb4 ], + [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ], + [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ], + [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ], + [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str0, i64 0, i64 0), %entry ] + ret i8* %retval.0 +} + attributes #0 = { nounwind } |