summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeresa Johnson <tejohnson@google.com>2017-12-20 17:53:10 +0000
committerTeresa Johnson <tejohnson@google.com>2017-12-20 17:53:10 +0000
commit9274a9b246ca8970c9e1880756a4de8f8244540e (patch)
treea88c484fb80088f277165ad2f208809bbe63b859
parentd49ac948c39f26ce8e3a8c862f54a573779b44b5 (diff)
[PGO] Function section hotness prefix should look at all blocks
Summary: The function section prefix for PGO based layout (e.g. hot/unlikely) should look at the hotness of all blocks not just the entry BB. A function with a cold entry but a very hot loop should be placed in the hot section, for example, so that it is located close to other hot functions it may call. For SamplePGO it was already looking at the branch weights on calls, and I made that code conditional on whether this is SamplePGO since it was essentially a noop for instrumentation PGO anyway. Reviewers: davidxl Subscribers: eraman, llvm-commits Differential Revision: https://reviews.llvm.org/D41395 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321197 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Analysis/ProfileSummaryInfo.h8
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp58
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp15
-rw-r--r--test/Transforms/CodeGenPrepare/section.ll47
4 files changed, 84 insertions, 44 deletions
diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h
index bd7b0037482..29303345842 100644
--- a/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -92,12 +92,12 @@ public:
bool hasHugeWorkingSetSize();
/// \brief Returns true if \p F has hot function entry.
bool isFunctionEntryHot(const Function *F);
- /// Returns true if \p F has hot function entry or hot call edge.
- bool isFunctionHotInCallGraph(const Function *F);
+ /// Returns true if \p F contains hot code.
+ bool isFunctionHotInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
/// \brief Returns true if \p F has cold function entry.
bool isFunctionEntryCold(const Function *F);
- /// Returns true if \p F has cold function entry or cold call edge.
- bool isFunctionColdInCallGraph(const Function *F);
+ /// Returns true if \p F contains only cold code.
+ bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
/// \brief Returns true if \p F is a hot function.
bool isHotCount(uint64_t C);
/// \brief Returns true if count \p C is considered cold.
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 671744f93fb..3bb4793c3ee 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -115,42 +115,62 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
return FunctionCount && isHotCount(FunctionCount.getValue());
}
-/// Returns true if the function's entry or total call edge count is hot.
+/// Returns true if the function contains hot code. This can include a hot
+/// function entry count, hot basic block, or (in the case of Sample PGO)
+/// hot total call edge count.
/// If it returns false, it either means it is not hot or it is unknown
-/// whether it is hot or not (for example, no profile data is available).
-bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F) {
+/// (for example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F,
+ BlockFrequencyInfo &BFI) {
if (!F || !computeSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
if (isHotCount(FunctionCount.getValue()))
return true;
- uint64_t TotalCallCount = 0;
+ if (hasSampleProfile()) {
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(&I, nullptr))
+ TotalCallCount += CallCount.getValue();
+ if (isHotCount(TotalCallCount))
+ return true;
+ }
for (const auto &BB : *F)
- for (const auto &I : BB)
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- if (auto CallCount = getProfileCount(&I, nullptr))
- TotalCallCount += CallCount.getValue();
- return isHotCount(TotalCallCount);
+ if (isHotBB(&BB, &BFI))
+ return true;
+ return false;
}
-/// Returns true if the function's entry and total call edge count is cold.
+/// Returns true if the function only contains cold code. This means that
+/// the function entry and blocks are all cold, and (in the case of Sample PGO)
+/// the total call edge count is cold.
/// If it returns false, it either means it is not cold or it is unknown
-/// whether it is cold or not (for example, no profile data is available).
-bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F) {
+/// (for example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
+ BlockFrequencyInfo &BFI) {
if (!F || !computeSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
if (!isColdCount(FunctionCount.getValue()))
return false;
-
- uint64_t TotalCallCount = 0;
+
+ if (hasSampleProfile()) {
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(&I, nullptr))
+ TotalCallCount += CallCount.getValue();
+ if (!isColdCount(TotalCallCount))
+ return false;
+ }
for (const auto &BB : *F)
- for (const auto &I : BB)
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- if (auto CallCount = getProfileCount(&I, nullptr))
- TotalCallCount += CallCount.getValue();
- return isColdCount(TotalCallCount);
+ if (!isColdBB(&BB, &BFI))
+ return false;
+ return true;
}
/// Returns true if the function's entry is a cold. If it returns false, it
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index afe1cedb9e3..d6f55bba716 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -352,8 +352,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Clear per function information.
InsertedInsts.clear();
PromotedInsts.clear();
- BFI.reset();
- BPI.reset();
ModifiedDT = false;
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
@@ -365,14 +363,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BPI.reset(new BranchProbabilityInfo(F, *LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
OptSize = F.optForSize();
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
- if (PSI->isFunctionHotInCallGraph(&F))
+ if (PSI->isFunctionHotInCallGraph(&F, *BFI))
F.setSectionPrefix(".hot");
- else if (PSI->isFunctionColdInCallGraph(&F))
+ else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
F.setSectionPrefix(".unlikely");
}
@@ -652,13 +652,6 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
if (SameIncomingValueBBs.count(Pred))
return true;
- if (!BFI) {
- Function &F = *BB->getParent();
- LoopInfo LI{DominatorTree(F)};
- BPI.reset(new BranchProbabilityInfo(F, LI));
- BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
- }
-
BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
BlockFrequency BBFreq = BFI->getBlockFreq(BB);
diff --git a/test/Transforms/CodeGenPrepare/section.ll b/test/Transforms/CodeGenPrepare/section.ll
index 4f3144e7fc7..30598ba7afb 100644
--- a/test/Transforms/CodeGenPrepare/section.ll
+++ b/test/Transforms/CodeGenPrepare/section.ll
@@ -4,33 +4,59 @@ target triple = "x86_64-pc-linux-gnu"
; This tests that hot/cold functions get correct section prefix assigned
-; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; CHECK: hot_func1{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
; The entry is hot
-define void @hot_func() !prof !15 {
+define void @hot_func1() !prof !15 {
ret void
}
-; For instrumentation based PGO, we should only look at entry counts,
+; CHECK: hot_func2{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; Entry is cold but inner block is hot
+define void @hot_func2(i32 %n) !prof !16 {
+entry:
+ %n.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 %n, i32* %n.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond:
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %n.addr, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end, !prof !19
+
+for.body:
+ %2 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %2, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end:
+ ret void
+}
+
+; For instrumentation based PGO, we should only look at block counts,
; not call site VP metadata (which can exist on value profiled memcpy,
; or possibly left behind after static analysis based devirtualization).
; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
define void @cold_func1() !prof !16 {
- call void @hot_func(), !prof !17
- call void @hot_func(), !prof !17
+ call void @hot_func1(), !prof !17
+ call void @hot_func1(), !prof !17
ret void
}
-; CHECK: cold_func2{{.*}}!section_prefix
+; CHECK: cold_func2{{.*}}!section_prefix ![[COLD_ID]]
define void @cold_func2() !prof !16 {
- call void @hot_func(), !prof !17
- call void @hot_func(), !prof !18
- call void @hot_func(), !prof !18
+ call void @hot_func1(), !prof !17
+ call void @hot_func1(), !prof !18
+ call void @hot_func1(), !prof !18
ret void
}
; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]]
define void @cold_func3() !prof !16 {
- call void @hot_func(), !prof !18
+ call void @hot_func1(), !prof !18
ret void
}
@@ -55,3 +81,4 @@ define void @cold_func3() !prof !16 {
!16 = !{!"function_entry_count", i64 1}
!17 = !{!"branch_weights", i32 80}
!18 = !{!"branch_weights", i32 1}
+!19 = !{!"branch_weights", i32 1000, i32 1}