summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJan Hubicka <hubicka@ucw.cz>2019-12-05 19:12:51 +0100
committerJan Hubicka <hubicka@gcc.gnu.org>2019-12-05 18:12:51 +0000
commit34fbe3f0946f88828765184ed6581bda62cdf49f (patch)
tree5a3bf82ae6bb92203c6e4922a5d694198595edc6 /gcc
parent8575d5925226a8f92ee644d6d59a2b1b93840d94 (diff)
cgraphclones.c (localize_profile): New function.
* cgraphclones.c (localize_profile): New function. (cgraph_node::create_clone): Use it for partial profiles. * common.opt (fprofile-partial-training): New flag. * doc/invoke.texi (-fprofile-partial-training): Document. * ipa-cp.c (update_profiling_info): For partial profiles do not set function profile to zero. * profile.c (compute_branch_probabilities): With partial profile watch if edge count is zero and turn all probabilities to guessed. (compute_branch_probabilities): For partial profiles do not apply profile when entry count is zero. * tree-profile.c (tree_profiling): Only do value_profile_transformations when profile is read. From-SVN: r279013
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/cgraphclones.c26
-rw-r--r--gcc/common.opt4
-rw-r--r--gcc/doc/invoke.texi17
-rw-r--r--gcc/ipa-cp.c9
-rw-r--r--gcc/profile.c29
-rw-r--r--gcc/tree-profile.c3
7 files changed, 94 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 08aee8972c7..3cd05387def 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2019-12-05 Jan Hubicka <hubicka@ucw.cz>
+
+ * cgraphclones.c (localize_profile): New function.
+ (cgraph_node::create_clone): Use it for partial profiles.
+ * common.opt (fprofile-partial-training): New flag.
+ * doc/invoke.texi (-fprofile-partial-training): Document.
+ * ipa-cp.c (update_profiling_info): For partial profiles do not
+ set function profile to zero.
+ * profile.c (compute_branch_probabilities): With partial profile
+ watch if edge count is zero and turn all probabilities to guessed.
+ (compute_branch_probabilities): For partial profiles do not apply
+ profile when entry count is zero.
+ * tree-profile.c (tree_profiling): Only do value_profile_transformations
+ when profile is read.
+
2019-12-05 Sudakshina Das <sudi.das@arm.com>
* tree-vect-loop.c (vect_model_reduction_cost): Remove reduction_type
diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c
index 81c5dfd194f..f2dfb4e2d67 100644
--- a/gcc/cgraphclones.c
+++ b/gcc/cgraphclones.c
@@ -307,6 +307,22 @@ dump_callgraph_transformation (const cgraph_node *original,
}
}
+/* Turn profile of N to local profile. */
+
+static void
+localize_profile (cgraph_node *n)
+{
+ n->count = n->count.guessed_local ();
+ for (cgraph_edge *e = n->callees; e; e=e->next_callee)
+ {
+ e->count = e->count.guessed_local ();
+ if (!e->inline_failed)
+ localize_profile (e->callee);
+ }
+ for (cgraph_edge *e = n->indirect_calls; e; e=e->next_callee)
+ e->count = e->count.guessed_local ();
+}
+
/* Create node representing clone of N executed COUNT times. Decrease
the execution counts from original node too.
The new clone will have decl set to DECL that may or may not be the same
@@ -340,6 +356,7 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count,
cgraph_edge *e;
unsigned i;
profile_count old_count = count;
+ bool nonzero = count.ipa ().nonzero_p ();
if (new_inlined_to)
dump_callgraph_transformation (this, new_inlined_to, "inlining to");
@@ -426,6 +443,15 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count,
if (call_duplication_hook)
symtab->call_cgraph_duplication_hooks (this, new_node);
+ /* With partial train run we do not want to assume that original's
+ count is zero whenever we redurect all executed edges to clone.
+ Simply drop profile to local one in this case. */
+ if (update_original
+ && opt_for_fn (decl, flag_profile_partial_training)
+ && nonzero
+ && count.ipa_p ()
+ && !count.ipa ().nonzero_p ())
+ localize_profile (this);
if (!new_inlined_to)
dump_callgraph_transformation (this, new_node, suffix);
diff --git a/gcc/common.opt b/gcc/common.opt
index 404b6aac298..7e47953f8dc 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2160,6 +2160,10 @@ fprofile-generate=
Common Joined RejectNegative
Enable common options for generating profile info for profile feedback directed optimizations, and set -fprofile-dir=.
+fprofile-partial-training
+Common Report Var(flag_profile_partial_training) Optimization
+Do not assume that functions never executed during the train run are cold
+
fprofile-use
Common Var(flag_profile_use)
Enable common options for performing profile feedback directed optimizations.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d165f31a865..af3c7f2b910 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -453,8 +453,8 @@ Objective-C and Objective-C++ Dialects}.
-fpartial-inlining -fpeel-loops -fpredictive-commoning @gol
-fprefetch-loop-arrays @gol
-fprofile-correction @gol
--fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
--fprofile-reorder-functions @gol
+-fprofile-use -fprofile-use=@var{path} -fprofile-partial-training @gol
+-fprofile-values -fprofile-reorder-functions @gol
-freciprocal-math -free -frename-registers -freorder-blocks @gol
-freorder-blocks-algorithm=@var{algorithm} @gol
-freorder-blocks-and-partition -freorder-functions @gol
@@ -10634,6 +10634,19 @@ default, GCC emits an error message when an inconsistent profile is detected.
This option is enabled by @option{-fauto-profile}.
+@item -fprofile-partial-training
+@opindex fprofile-use
+With @code{-fprofile-use} all portions of programs not executed during train
+run are optimized agressively for size rather than speed. In some cases it is
+not practical to train all possible hot paths in the program. (For
+example, program may contain functions specific for a given hardware and
+trianing may not cover all hardware configurations program is run on.) With
+@code{-fprofile-partial-training} profile feedback will be ignored for all
+functions not executed during the train run leading them to be optimized as if
+they were compiled without profile feedback. This leads to better performance
+when train run is not representative but also leads to significantly bigger
+code.
+
@item -fprofile-use
@itemx -fprofile-use=@var{path}
@opindex fprofile-use
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 693c7a2fdc5..14064ae0034 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -4295,6 +4295,15 @@ update_profiling_info (struct cgraph_node *orig_node,
remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
- new_sum.ipa ());
+
+ /* With partial train run we do not want to assume that original's
+ count is zero whenever we redurect all executed edges to clone.
+ Simply drop profile to local one in this case. */
+ if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
+ && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
+ && flag_profile_partial_training)
+ remainder = remainder.guessed_local ();
+
new_sum = orig_node_count.combine_with_ipa_count (new_sum);
new_node->count = new_sum;
orig_node->count = remainder;
diff --git a/gcc/profile.c b/gcc/profile.c
index 8d39a7d094e..7e2d7d3ca3e 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -635,9 +635,20 @@ compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum)
}
if (bb_gcov_count (bb))
{
+ bool set_to_guessed = false;
FOR_EACH_EDGE (e, ei, bb->succs)
- e->probability = profile_probability::probability_in_gcov_type
- (edge_gcov_count (e), bb_gcov_count (bb));
+ {
+ bool prev_never = e->probability == profile_probability::never ();
+ e->probability = profile_probability::probability_in_gcov_type
+ (edge_gcov_count (e), bb_gcov_count (bb));
+ if (e->probability == profile_probability::never ()
+ && !prev_never
+ && flag_profile_partial_training)
+ set_to_guessed = true;
+ }
+ if (set_to_guessed)
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->probability = e->probability.guessed ();
if (bb->index >= NUM_FIXED_BLOCKS
&& block_ends_with_condjump_p (bb)
&& EDGE_COUNT (bb->succs) >= 2)
@@ -697,17 +708,23 @@ compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum)
}
}
- if (exec_counts)
+ if (exec_counts
+ && (bb_gcov_count (ENTRY_BLOCK_PTR_FOR_FN (cfun))
+ || !flag_profile_partial_training))
profile_status_for_fn (cfun) = PROFILE_READ;
/* If we have real data, use them! */
if (bb_gcov_count (ENTRY_BLOCK_PTR_FOR_FN (cfun))
|| !flag_guess_branch_prob)
FOR_ALL_BB_FN (bb, cfun)
- bb->count = profile_count::from_gcov_type (bb_gcov_count (bb));
+ if (bb_gcov_count (bb) || !flag_profile_partial_training)
+ bb->count = profile_count::from_gcov_type (bb_gcov_count (bb));
+ else
+ bb->count = profile_count::guessed_zero ();
/* If function was not trained, preserve local estimates including statically
determined zero counts. */
- else if (profile_status_for_fn (cfun) == PROFILE_READ)
+ else if (profile_status_for_fn (cfun) == PROFILE_READ
+ && !flag_profile_partial_training)
FOR_ALL_BB_FN (bb, cfun)
if (!(bb->count == profile_count::zero ()))
bb->count = bb->count.global0 ();
@@ -1417,7 +1434,7 @@ branch_prob (bool thunk)
/* At this moment we have precise loop iteration count estimates.
Record them to loop structure before the profile gets out of date. */
FOR_EACH_LOOP (loop, 0)
- if (loop->header->count > 0)
+ if (loop->header->count > 0 && loop->header->count.reliable_p ())
{
gcov_type nit = expected_loop_iterations_unbounded (loop);
widest_int bound = gcov_type_to_wide_int (nit);
diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c
index b4435b9b2a8..df60eda665d 100644
--- a/gcc/tree-profile.c
+++ b/gcc/tree-profile.c
@@ -785,7 +785,8 @@ tree_profiling (void)
if (flag_branch_probabilities
&& !thunk
&& flag_profile_values
- && flag_value_profile_transformations)
+ && flag_value_profile_transformations
+ && profile_status_for_fn (cfun) == PROFILE_READ)
gimple_value_profile_transformations ();
/* The above could hose dominator info. Currently there is