summaryrefslogtreecommitdiff
path: root/test/Analysis/BlockFrequencyInfo
diff options
context:
space:
mode:
authorDiego Novillo <dnovillo@google.com>2015-04-01 17:42:27 +0000
committerDiego Novillo <dnovillo@google.com>2015-04-01 17:42:27 +0000
commit32d9020423f9b49c831bce1244218b575819029c (patch)
treec19c5354446214363306d66e739b7e7832628e50 /test/Analysis/BlockFrequencyInfo
parent6b2fe99659736dd2fcf8f82801ff0e5a299c348c (diff)
Remove 4,096 loop scale limitation.
Summary: This is part 1 of fixes to address the problems described in https://llvm.org/bugs/show_bug.cgi?id=22719. The restriction to limit loop scales to 4,096 does not really prevent overflows anymore, as the underlying algorithm has changed and does not seem to suffer from this problem. Additionally, artificially restricting loop scales to such a low number skews frequency information, making loops of equal hotness appear to have very different hotness properties. The only loops that are artificially restricted to a scale of 4096 are infinite loops (those loops with an exit mass of 0). This prevents infinite loops from skewing the frequencies of other regions in the CFG. At the end of propagation, frequencies are scaled to values that take no more than 64 bits to represent. When the range of frequencies to be represented fits within 61 bits, it pushes up the scaling factor to a minimum of 8 to better distinguish small frequency values. Otherwise, small frequency values are all saturated down at 1. Tested on x86_64. Reviewers: dexonsmith Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8718 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@233826 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Analysis/BlockFrequencyInfo')
-rw-r--r--test/Analysis/BlockFrequencyInfo/bad_input.ll3
-rw-r--r--test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll204
2 files changed, 206 insertions, 1 deletions
diff --git a/test/Analysis/BlockFrequencyInfo/bad_input.ll b/test/Analysis/BlockFrequencyInfo/bad_input.ll
index da62dca67a8..e5b1f500e1e 100644
--- a/test/Analysis/BlockFrequencyInfo/bad_input.ll
+++ b/test/Analysis/BlockFrequencyInfo/bad_input.ll
@@ -32,7 +32,8 @@ define void @infinite_loop(i1 %x) {
entry:
br i1 %x, label %for.body, label %for.end, !prof !1
-; Check that the loop scale maxes out at 4096, giving 2048 here.
+; Check that the infinite loop is arbitrarily scaled to max out at 4096,
+; giving 2048 here.
; CHECK-NEXT: for.body: float = 2048.0,
for.body:
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
diff --git a/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll b/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
new file mode 100644
index 00000000000..e5e3f64b649
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
@@ -0,0 +1,204 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+; This code contains three loops. One is triple-nested, the
+; second is double nested and the third is a single loop. At
+; runtime, all three loops execute 1,000,000 times each. We use to
+; give different frequencies to each of the loops because loop
+; scales were limited to no more than 4,096.
+;
+; This was penalizing the hotness of the second and third loops
+; because BFI was reducing the loop scale for for.cond16 and
+; for.cond26 to a max of 4,096.
+;
+; Without this restriction, all loops are now correctly given the same
+; frequency values.
+;
+; Original C code:
+;
+;
+; int g;
+; __attribute__((noinline)) void bar() {
+; g++;
+; }
+;
+; extern int printf(const char*, ...);
+;
+; int main()
+; {
+; int i, j, k;
+;
+; g = 0;
+; for (i = 0; i < 100; i++)
+; for (j = 0; j < 100; j++)
+; for (k = 0; k < 100; k++)
+; bar();
+;
+; printf ("g = %d\n", g);
+; g = 0;
+;
+; for (i = 0; i < 100; i++)
+; for (j = 0; j < 10000; j++)
+; bar();
+;
+; printf ("g = %d\n", g);
+; g = 0;
+;
+;
+; for (i = 0; i < 1000000; i++)
+; bar();
+;
+; printf ("g = %d\n", g);
+; g = 0;
+; }
+
+@g = common global i32 0, align 4
+@.str = private unnamed_addr constant [8 x i8] c"g = %d\0A\00", align 1
+
+declare void @bar()
+declare i32 @printf(i8*, ...)
+
+; CHECK: Printing analysis {{.*}} for function 'main':
+; CHECK-NEXT: block-frequency-info: main
+define i32 @main() {
+entry:
+ %retval = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ %k = alloca i32, align 4
+ store i32 0, i32* %retval
+ store i32 0, i32* @g, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc10, %entry
+ %0 = load i32, i32* %i, align 4
+ %cmp = icmp slt i32 %0, 100
+ br i1 %cmp, label %for.body, label %for.end12, !prof !1
+
+for.body: ; preds = %for.cond
+ store i32 0, i32* %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc7, %for.body
+ %1 = load i32, i32* %j, align 4
+ %cmp2 = icmp slt i32 %1, 100
+ br i1 %cmp2, label %for.body3, label %for.end9, !prof !2
+
+for.body3: ; preds = %for.cond1
+ store i32 0, i32* %k, align 4
+ br label %for.cond4
+
+for.cond4: ; preds = %for.inc, %for.body3
+ %2 = load i32, i32* %k, align 4
+ %cmp5 = icmp slt i32 %2, 100
+ br i1 %cmp5, label %for.body6, label %for.end, !prof !3
+
+; CHECK: - for.body6: float = 500000.5, int = 4000003
+for.body6: ; preds = %for.cond4
+ call void @bar()
+ br label %for.inc
+
+for.inc: ; preds = %for.body6
+ %3 = load i32, i32* %k, align 4
+ %inc = add nsw i32 %3, 1
+ store i32 %inc, i32* %k, align 4
+ br label %for.cond4
+
+for.end: ; preds = %for.cond4
+ br label %for.inc7
+
+for.inc7: ; preds = %for.end
+ %4 = load i32, i32* %j, align 4
+ %inc8 = add nsw i32 %4, 1
+ store i32 %inc8, i32* %j, align 4
+ br label %for.cond1
+
+for.end9: ; preds = %for.cond1
+ br label %for.inc10
+
+for.inc10: ; preds = %for.end9
+ %5 = load i32, i32* %i, align 4
+ %inc11 = add nsw i32 %5, 1
+ store i32 %inc11, i32* %i, align 4
+ br label %for.cond
+
+for.end12: ; preds = %for.cond
+ %6 = load i32, i32* @g, align 4
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %6)
+ store i32 0, i32* @g, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond13
+
+for.cond13: ; preds = %for.inc22, %for.end12
+ %7 = load i32, i32* %i, align 4
+ %cmp14 = icmp slt i32 %7, 100
+ br i1 %cmp14, label %for.body15, label %for.end24, !prof !1
+
+for.body15: ; preds = %for.cond13
+ store i32 0, i32* %j, align 4
+ br label %for.cond16
+
+for.cond16: ; preds = %for.inc19, %for.body15
+ %8 = load i32, i32* %j, align 4
+ %cmp17 = icmp slt i32 %8, 10000
+ br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
+
+; CHECK: - for.body18: float = 500000.5, int = 4000003
+for.body18: ; preds = %for.cond16
+ call void @bar()
+ br label %for.inc19
+
+for.inc19: ; preds = %for.body18
+ %9 = load i32, i32* %j, align 4
+ %inc20 = add nsw i32 %9, 1
+ store i32 %inc20, i32* %j, align 4
+ br label %for.cond16
+
+for.end21: ; preds = %for.cond16
+ br label %for.inc22
+
+for.inc22: ; preds = %for.end21
+ %10 = load i32, i32* %i, align 4
+ %inc23 = add nsw i32 %10, 1
+ store i32 %inc23, i32* %i, align 4
+ br label %for.cond13
+
+for.end24: ; preds = %for.cond13
+ %11 = load i32, i32* @g, align 4
+ %call25 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %11)
+ store i32 0, i32* @g, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond26
+
+for.cond26: ; preds = %for.inc29, %for.end24
+ %12 = load i32, i32* %i, align 4
+ %cmp27 = icmp slt i32 %12, 1000000
+ br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
+
+; CHECK: - for.body28: float = 500000.5, int = 4000003
+for.body28: ; preds = %for.cond26
+ call void @bar()
+ br label %for.inc29
+
+for.inc29: ; preds = %for.body28
+ %13 = load i32, i32* %i, align 4
+ %inc30 = add nsw i32 %13, 1
+ store i32 %inc30, i32* %i, align 4
+ br label %for.cond26
+
+for.end31: ; preds = %for.cond26
+ %14 = load i32, i32* @g, align 4
+ %call32 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %14)
+ store i32 0, i32* @g, align 4
+ %15 = load i32, i32* %retval
+ ret i32 %15
+}
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"}
+!1 = !{!"branch_weights", i32 101, i32 2}
+!2 = !{!"branch_weights", i32 10001, i32 101}
+!3 = !{!"branch_weights", i32 1000001, i32 10001}
+!4 = !{!"branch_weights", i32 1000001, i32 101}
+!5 = !{!"branch_weights", i32 1000001, i32 2}