summaryrefslogtreecommitdiff
path: root/gcc/tree-loop-distribution.c
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2019-05-23 11:35:16 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2019-05-23 11:35:16 +0000
commit5879ab5fafedc8f6f9bfe95a4cf8501b0df90edd (patch)
treee886bbae2a64b230962aaf93a1c7ceb9b88ece6e /gcc/tree-loop-distribution.c
parent32d941139e8144662e0dc182d64e3f3f9df89ad4 (diff)
re PR tree-optimization/88440 (size optimization of memcpy-like code)
2019-05-23 Richard Biener <rguenther@suse.de> PR tree-optimization/88440 * opts.c (default_options_table): Enable -ftree-loop-distribute-patterns at -O[2s]+. * tree-loop-distribution.c (generate_memset_builtin): Fold the generated call. (generate_memcpy_builtin): Likewise. (distribute_loop): Pass in whether to only distribute patterns. (prepare_perfect_loop_nest): Also allow size optimization. (pass_loop_distribution::execute): When optimizing a loop nest for size allow pattern replacement. * gcc.dg/tree-ssa/ldist-37.c: New testcase. * gcc.dg/tree-ssa/ldist-38.c: Likewise. * gcc.dg/vect/vect.exp: Add -fno-tree-loop-distribute-patterns. * gcc.dg/tree-ssa/ldist-37.c: Adjust. * gcc.dg/tree-ssa/ldist-38.c: Likewise. * g++.dg/tree-ssa/pr78847.C: Likewise. * gcc.dg/autopar/pr39500-1.c: Likewise. * gcc.dg/autopar/reduc-1char.c: Likewise. * gcc.dg/autopar/reduc-7.c: Likewise. * gcc.dg/tree-ssa/ivopts-lt-2.c: Likewise. * gcc.dg/tree-ssa/ivopts-lt.c: Likewise. * gcc.dg/tree-ssa/predcom-dse-1.c: Likewise. * gcc.dg/tree-ssa/predcom-dse-2.c: Likewise. * gcc.dg/tree-ssa/predcom-dse-3.c: Likewise. * gcc.dg/tree-ssa/predcom-dse-4.c: Likewise. * gcc.dg/tree-ssa/prefetch-7.c: Likewise. * gcc.dg/tree-ssa/prefetch-8.c: Likewise. * gcc.dg/tree-ssa/prefetch-9.c: Likewise. * gcc.dg/tree-ssa/scev-11.c: Likewise. * gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: Likewise. * gcc.dg/vect/costmodel/i386/costmodel-vect-33.c: Likewise. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c: Likewise. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c: Likewise. * gcc.target/i386/pr30970.c: Likewise. * gcc.target/i386/vect-double-1.c: Likewise. * gcc.target/i386/vect-double-2.c: Likewise. * gcc.dg/tree-ssa/gen-vect-2.c: Likewise. * gcc.dg/tree-ssa/gen-vect-26.c: Likewise. * gcc.dg/tree-ssa/gen-vect-28.c: Likewise. * gcc.dg/tree-ssa/gen-vect-32.c: Likewise. * gfortran.dg/vect/vect-5.f90: Likewise. * gfortran.dg/vect/vect-8.f90: Likewise. From-SVN: r271553
Diffstat (limited to 'gcc/tree-loop-distribution.c')
-rw-r--r--gcc/tree-loop-distribution.c24
1 files changed, 15 insertions, 9 deletions
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 3d8f13c4d14..5f03b87f9c1 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -115,6 +115,7 @@ along with GCC; see the file COPYING3. If not see
#include "params.h"
#include "tree-vectorizer.h"
#include "tree-eh.h"
+#include "gimple-fold.h"
#define MAX_DATAREFS_NUM \
@@ -1028,6 +1029,7 @@ generate_memset_builtin (struct loop *loop, partition *partition)
fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+ fold_stmt (&gsi);
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -1071,6 +1073,7 @@ generate_memcpy_builtin (struct loop *loop, partition *partition)
fn = build_fold_addr_expr (builtin_decl_implicit (kind));
fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+ fold_stmt (&gsi);
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -2769,7 +2772,8 @@ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
static int
distribute_loop (struct loop *loop, vec<gimple *> stmts,
- control_dependences *cd, int *nb_calls, bool *destroy_p)
+ control_dependences *cd, int *nb_calls, bool *destroy_p,
+ bool only_patterns_p)
{
ddrs_table = new hash_table<ddr_hasher> (389);
struct graph *rdg;
@@ -2843,7 +2847,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
/* If we are only distributing patterns but did not detect any,
simply bail out. */
- if (!flag_tree_loop_distribution
+ if (only_patterns_p
&& !any_builtin)
{
nbp = 0;
@@ -2855,7 +2859,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
a loop into pieces, separated by builtin calls. That is, we
only want no or a single loop body remaining. */
struct partition *into;
- if (!flag_tree_loop_distribution)
+ if (only_patterns_p)
{
for (i = 0; partitions.iterate (i, &into); ++i)
if (!partition_builtin_p (into))
@@ -3085,7 +3089,6 @@ prepare_perfect_loop_nest (struct loop *loop)
&& loop_outer (outer)
&& outer->inner == loop && loop->next == NULL
&& single_exit (outer)
- && optimize_loop_for_speed_p (outer)
&& !chrec_contains_symbols_defined_in_loop (niters, outer->num)
&& (niters = number_of_latch_executions (outer)) != NULL_TREE
&& niters != chrec_dont_know)
@@ -3139,9 +3142,11 @@ pass_loop_distribution::execute (function *fun)
walking to innermost loops. */
FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
{
- /* Don't distribute multiple exit edges loop, or cold loop. */
+ /* Don't distribute multiple exit edges loop, or cold loop when
+ not doing pattern detection. */
if (!single_exit (loop)
- || !optimize_loop_for_speed_p (loop))
+ || (!flag_tree_loop_distribute_patterns
+ && !optimize_loop_for_speed_p (loop)))
continue;
/* Don't distribute loop if niters is unknown. */
@@ -3169,9 +3174,10 @@ pass_loop_distribution::execute (function *fun)
bool destroy_p;
int nb_generated_loops, nb_generated_calls;
- nb_generated_loops = distribute_loop (loop, work_list, cd,
- &nb_generated_calls,
- &destroy_p);
+ nb_generated_loops
+ = distribute_loop (loop, work_list, cd, &nb_generated_calls,
+ &destroy_p, (!optimize_loop_for_speed_p (loop)
+ || !flag_tree_loop_distribution));
if (destroy_p)
loops_to_be_destroyed.safe_push (loop);