From 7c9039cf2c981fe243d2807ce4305f30555c5da3 Mon Sep 17 00:00:00 2001 From: Philipp Tomsich Date: Mon, 21 Jul 2014 12:33:40 +0200 Subject: Adding -fnoalias to delcare non-aliasing on function level. This allows optimization by assuring the compiler that pointers within the given function don't alias. The optimizations within this patch are: * pass_uninline_innermost_loops (new lowering pass) Uninline the innermost loop if noalias is enabled, by moving it into a separate function. * pass_peel_last_iteration (new optimization pass) Peel the last loop iteration if noalias is enabled. --- gcc/Makefile.in | 2 + gcc/cfgloopmanip.c | 11 +- gcc/common.opt | 4 + gcc/passes.def | 2 + gcc/peel-last-iteration.c | 442 +++++++++++++++++++++++++++++++++++++++ gcc/timevar.def | 2 + gcc/tree-pass.h | 2 + gcc/tree-ssa-loop-prefetch.c | 4 +- gcc/uninline-innermost-loops.c | 456 +++++++++++++++++++++++++++++++++++++++++ 9 files changed, 922 insertions(+), 3 deletions(-) create mode 100644 gcc/peel-last-iteration.c create mode 100644 gcc/uninline-innermost-loops.c diff --git a/gcc/Makefile.in b/gcc/Makefile.in index f675e073ecc7..fa00ab4d06f5 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1404,6 +1404,7 @@ OBJS = \ options-save.o \ opts-global.o \ passes.o \ + peel-last-iteration.o \ plugin.o \ postreload-gcse.o \ postreload.o \ @@ -1561,6 +1562,7 @@ OBJS = \ tree-vrp.o \ tree.o \ typed-splay-tree.o \ + uninline-innermost-loops.o \ valtrack.o \ value-prof.o \ var-tracking.o \ diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 3e34aadd6911..979d9e64971f 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -1545,8 +1545,17 @@ create_preheader (struct loop *loop, int flags) || has_preds_from_loop (single_entry->src, loop))) need_forwarder_block = true; } + /* FIXME Use the flags in a more sensible way. */ if (! need_forwarder_block) - return NULL; + { + if (flags != 128) + return NULL; + else + { + if (dump_file) + fprintf (dump_file, "would return null\n"); + } + } } mfb_kj_edge = loop_latch_edge (loop); diff --git a/gcc/common.opt b/gcc/common.opt index 437db8e86152..e557af7c293a 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2626,6 +2626,10 @@ fsplit-loops Common Report Var(flag_split_loops) Optimization Perform loop splitting. +fnoalias= +Common Joined Var(noalias) Init(0) +Move innermost loops to separate function + funwind-tables Common Report Var(flag_unwind_tables) Optimization Just generate unwind tables for exception handling. diff --git a/gcc/passes.def b/gcc/passes.def index e2610a38f416..f3401d235955 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -45,6 +45,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_sprintf_length, false); NEXT_PASS (pass_walloca, /*strict_mode_p=*/true); NEXT_PASS (pass_build_cgraph_edges); + NEXT_PASS (pass_uninline_innermost_loops); TERMINATE_PASS_LIST (all_lowering_passes) /* Interprocedural optimization passes. */ @@ -209,6 +210,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_thread_jumps); NEXT_PASS (pass_vrp, true /* warn_array_bounds_p */); NEXT_PASS (pass_chkp_opt); + NEXT_PASS (pass_peel_last_iteration); NEXT_PASS (pass_dce); NEXT_PASS (pass_stdarg); NEXT_PASS (pass_call_cdce); diff --git a/gcc/peel-last-iteration.c b/gcc/peel-last-iteration.c new file mode 100644 index 000000000000..7d6333a7e079 --- /dev/null +++ b/gcc/peel-last-iteration.c @@ -0,0 +1,442 @@ +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "hash-set.h" +#include "machmode.h" +#include "vec.h" +#include "double-int.h" +#include "input.h" +#include "alias.h" +#include "symtab.h" +#include "options.h" +#include "wide-int.h" +#include "inchash.h" +#include "tree.h" +#include "fold-const.h" +#include "hashtab.h" +#include "tm.h" +#include "hard-reg-set.h" +#include "function.h" +#include "rtl.h" +#include "flags.h" +#include "statistics.h" +#include "real.h" +#include "fixed-value.h" +#include "insn-config.h" +#include "expmed.h" +#include "dojump.h" +#include "explow.h" +#include "calls.h" +#include "varasm.h" +#include "stmt.h" +#include "expr.h" +#include "gimple-pretty-print.h" +#include "predict.h" +#include "dominance.h" +#include "cfg.h" +#include "cfgloop.h" +#include "cfghooks.h" +#include "basic-block.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "tree-cfg.h" +#include "gimplify-me.h" +#include "gimple-ssa.h" +#include "tree-ssa-operands.h" +#include "tree-phinodes.h" +#include "ssa-iterators.h" +#include "dumpfile.h" +#include "tree-pass.h" + +gcond * +get_loop_exit_condition (const struct loop *loop); + +extern const char * uninlined_function_postfix; + +void initialize_original_copy_tables (void); + +/* FIXME Refactor to separate functions. */ +static unsigned int +peel_in_fn (void) +{ + struct loop *loop; + basic_block *bbs, *copied_bbs; + basic_block bb, copied_bb; + + edge exit_edge, copied_exit_edge; + edge e, copied_e; + edge_iterator ei, copied_ei; + edge entry_edge; + basic_block exit_block; + basic_block copied_header; + + edge true_edge; + edge false_edge; + + gimple_stmt_iterator phis; + gimple_stmt_iterator copied_phis; + + gimple_stmt_iterator def_iterator; + gimple_stmt_iterator copied_def_iterator; + + gimple_stmt_iterator cond_iterator; + gimple *last_stmt; + + gimple *phi, *copied_phi; + gimple *use_stmt; + + tree phi_op; + tree copied_phi_op; + tree phi_result, copied_phi_result; + + imm_use_iterator iter; + use_operand_p use_p; + basic_block use_bb; + basic_block copied_def_bb; + gimple *def_stmt; + gimple *copied_def_stmt; + + basic_block removed_edge_dest; + basic_block dom_bb; + + enum tree_code old_cond_code; + enum tree_code new_cond_code; + gimple *exit_cond; + + unsigned i; + unsigned j; + unsigned num_nodes; + + const char * fun_to_uninline; + int fun_to_uninline_len; + + int uninlined_function_postfix_len; + + gimple *return_stmt; + + char fun_to_peel[256]; + + int k; + + entry_edge = NULL; + copied_def_stmt = NULL; + + fun_to_uninline = noalias; + fun_to_uninline_len = strlen (fun_to_uninline); + uninlined_function_postfix_len = strlen (uninlined_function_postfix); + + /* FIXME Make this more safe. */ + snprintf(fun_to_peel, sizeof fun_to_peel, "%s.%s", fun_to_uninline, + uninlined_function_postfix); + + loop_optimizer_init (LOOPS_HAVE_SIMPLE_LATCHES | LOOPS_HAVE_PREHEADERS); + + record_loop_exits (); + initialize_original_copy_tables (); + k = -1; + FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) + { + k++; + if (strncmp (function_name (cfun), fun_to_peel, fun_to_uninline_len + 1 + uninlined_function_postfix_len) != 0) + { + if (dump_file) + fprintf (dump_file, "%s:%d (%s): wrong function %s\n", + __FILE__, __LINE__, __func__, + function_name (cfun)); + continue; + } + if (k != 0) + { + if (dump_file) + fprintf (dump_file, "%s:%d (%s): wrong loop\n", + __FILE__, __LINE__, __func__ + ); + continue; + } + + if (dump_file) + fprintf (dump_file, "%s:%d (%s): found our loop\n", + __FILE__, __LINE__, __func__ + ); + + if (loop->header->preds->length () != 2) + { + if (dump_file) + fprintf (dump_file, "%s:%d (%s): wrong preds length\n", + __FILE__, __LINE__, __func__ + ); + continue; + } + + exit_edge = single_exit (loop); + if (!exit_edge) + { + if (dump_file) + fprintf (dump_file, "%s:%d (%s): not single exit edge\n", + __FILE__, __LINE__, __func__ + ); + continue; + } + if (exit_edge->src != loop->header) + { + if (dump_file) + fprintf (dump_file, "%s:%d (%s): exit edge not from header\n", + __FILE__, __LINE__, __func__ + ); + continue; + } + + exit_cond = get_loop_exit_condition (loop); + if (!exit_cond) + { + if (dump_file) + fprintf (dump_file, "%s:%d (%s): exit condition not found\n", + __FILE__, __LINE__, __func__ + ); + continue; + } + + old_cond_code = gimple_cond_code (exit_cond); + if (old_cond_code != LE_EXPR && old_cond_code != GE_EXPR) + { + if (dump_file) + fprintf (dump_file, "%s:%d (%s): cond code is wrong\n", + __FILE__, __LINE__, __func__ + ); + continue; + } + + if (dump_file) + fprintf (dump_file, "%s:%d (%s): about to copy loop\n", + __FILE__, __LINE__, __func__ + ); + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + { + return_stmt = gimple_build_return (NULL_TREE); + gsi_insert_on_edge (e, return_stmt); + } + gsi_commit_edge_inserts (); + + if (old_cond_code == LE_EXPR) + new_cond_code = LT_EXPR; + else + new_cond_code = GT_EXPR; + + num_nodes = loop->num_nodes; + + create_preheader (loop, 0); + copied_bbs = XNEWVEC (basic_block, num_nodes); + bbs = get_loop_body (loop); + + FOR_EACH_EDGE (e, ei, loop->header->preds) + { + if (e->src != loop->latch) + entry_edge = e; + } + + /* Do the copying. */ + copy_bbs (bbs, num_nodes, copied_bbs, &exit_edge, 1, + &copied_exit_edge, loop_outer (loop), exit_edge->dest, true); + + /* Fix the condition in the original. */ + gimple_cond_set_condition (as_a (exit_cond), new_cond_code, + gimple_cond_lhs (exit_cond), gimple_cond_rhs (exit_cond)); + + /* Redirect edges. */ + exit_block = exit_edge->dest; + + copied_header = copied_bbs[0]; + redirect_edge_and_branch (exit_edge, copied_header); + + set_immediate_dominator (CDI_DOMINATORS, exit_block, copied_exit_edge->src); + set_immediate_dominator (CDI_DOMINATORS, copied_header, exit_edge->src); + + for (i = 0; i < num_nodes; ++i) + { + bb = bbs[i]; + copied_bb = copied_bbs[i]; + + /* Fix phi node. */ + for (phis = gsi_start_phis (bb), + copied_phis = gsi_start_phis (copied_bb); + !gsi_end_p (phis); + gsi_next (&phis), gsi_next (&copied_phis)) + { + phi = gsi_stmt (phis); + copied_phi = gsi_stmt (copied_phis); + + phi_result = gimple_phi_result (phi); + + copied_phi_result = gimple_phi_result (copied_phi); + /* Fix uses. */ + FOR_EACH_IMM_USE_STMT (use_stmt, iter, phi_result) + { + use_bb = gimple_bb (use_stmt); + + if (dominated_by_p (CDI_DOMINATORS, use_bb, copied_bbs[0])) + { + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + SET_USE (use_p, copied_phi_result); + } + } + + /* Only for the header. */ + if (i == 0) + { + /* FIXME This is not a generic solution, but in this case it is + * sufficient. */ + add_phi_arg (as_a (copied_phi), phi_result, + exit_edge, UNKNOWN_LOCATION); + } + + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (e == entry_edge) + continue; + + phi_op = PHI_ARG_DEF_FROM_EDGE (phi, e); + + if (TREE_CODE (phi_op) == SSA_NAME) + { + def_stmt = SSA_NAME_DEF_STMT (phi_op); + copied_def_bb = get_bb_copy (gimple_bb (def_stmt)); + + copied_def_stmt = NULL; + if (gimple_code (def_stmt) == GIMPLE_PHI) + { + def_iterator = gsi_start_phis (gimple_bb (def_stmt)), + copied_def_iterator = gsi_start_phis (copied_def_bb); + } + else + { + def_iterator = gsi_start_bb (gimple_bb (def_stmt)), + copied_def_iterator = gsi_start_bb (copied_def_bb); + } + + /* Find the statement that defines the value for the back edge. */ + for (; + !gsi_end_p (def_iterator); + gsi_next (&def_iterator), gsi_next (&copied_def_iterator)) + { + if (gsi_stmt (def_iterator) == def_stmt) + { + copied_def_stmt = gsi_stmt (copied_def_iterator); + break; + } + } + + if (gimple_code (copied_def_stmt) == GIMPLE_PHI) + { + copied_phi_op = gimple_phi_result (copied_def_stmt); + } + else if (virtual_operand_p (gimple_phi_result (phi))) + { + copied_phi_op = gimple_vdef (copied_def_stmt); + } + else + { + copied_phi_op = gimple_assign_lhs (copied_def_stmt); + } + + /* Lookup the corresponding copied edge. */ + /* FIXME This is ugly. */ + for (j = 0; j < num_nodes; ++j) + { + if (bbs[j] == e->src) + break; + } + FOR_EACH_EDGE (copied_e, copied_ei, copied_bb->preds) + { + if (copied_e->src == copied_bbs[j]) + break; + } + + /* Fix phi args */ + add_phi_arg (as_a (copied_phi), copied_phi_op, + copied_e, UNKNOWN_LOCATION); + } + } + } + } + + for (i = 1; i < num_nodes; ++i) + { + bb = bbs[i]; + + cond_iterator = gsi_last_bb (bb); + last_stmt = gsi_stmt (cond_iterator); + if (gimple_code (last_stmt) == GIMPLE_COND + && gimple_cond_lhs (exit_cond) == gimple_cond_lhs (last_stmt) + && gimple_cond_rhs (exit_cond) == gimple_cond_rhs (last_stmt) + && gimple_cond_code (exit_cond) == gimple_cond_code (last_stmt)) + { + extract_true_false_edges_from_block (bb, + &true_edge, + &false_edge); + removed_edge_dest = false_edge->dest; + + remove_edge (false_edge); + gsi_remove (&cond_iterator, true); + + true_edge->flags &= ~EDGE_FALSE_VALUE; + true_edge->flags |= EDGE_FALLTHRU; + + dom_bb = recompute_dominator (CDI_DOMINATORS, removed_edge_dest); + set_immediate_dominator (CDI_DOMINATORS, removed_edge_dest, dom_bb); + } + } + } + free_original_copy_tables (); + release_recorded_exits (cfun); + + return 0; +} + +static bool +gate_peel_last_iteration (void) +{ + /* Run this only as a post processing step for the uninlining pass. */ + return noalias != 0; +} + +namespace { + +const pass_data pass_data_peel_last_iteration = +{ + GIMPLE_PASS, /* type */ + "peel-last-iteration", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_PEEL_LAST_ITERATION, /* tv_id */ + PROP_gimple_any, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + (TODO_update_ssa | TODO_cleanup_cfg), /* todo_flags_finish */ +}; + +class pass_peel_last_iteration: public gimple_opt_pass +{ +public: + pass_peel_last_iteration(gcc::context *ctxt) + : gimple_opt_pass (pass_data_peel_last_iteration, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) { return gate_peel_last_iteration (); } + virtual unsigned int execute (function *) { return peel_in_fn (); } + +}; + +} // anon namespace + +gimple_opt_pass * +make_pass_peel_last_iteration (gcc::context *ctxt) +{ + return new pass_peel_last_iteration (ctxt); +} diff --git a/gcc/timevar.def b/gcc/timevar.def index 51ec0357656b..275e23a041e1 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -189,7 +189,9 @@ DEFTIMEVAR (TV_LOOP_SPLIT , "loop splitting") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization") +DEFTIMEVAR (TV_UNINLINE_INNERMOST_LOOPS, "uninline innermost loops") DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization") +DEFTIMEVAR (TV_PEEL_LAST_ITERATION, "peel last iteration") DEFTIMEVAR (TV_GRAPHITE , "Graphite") DEFTIMEVAR (TV_GRAPHITE_TRANSFORMS , "Graphite loop transforms") DEFTIMEVAR (TV_GRAPHITE_DATA_DEPS , "Graphite data dep analysis") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 85bfba7ac280..0d54f61e7001 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -380,6 +380,7 @@ extern gimple_opt_pass *make_pass_if_conversion (gcc::context *ctxt); extern gimple_opt_pass *make_pass_loop_distribution (gcc::context *ctxt); extern gimple_opt_pass *make_pass_vectorize (gcc::context *ctxt); extern gimple_opt_pass *make_pass_simduid_cleanup (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_uninline_innermost_loops (gcc::context *ctxt); extern gimple_opt_pass *make_pass_slp_vectorize (gcc::context *ctxt); extern gimple_opt_pass *make_pass_complete_unroll (gcc::context *ctxt); extern gimple_opt_pass *make_pass_complete_unrolli (gcc::context *ctxt); @@ -478,6 +479,7 @@ extern gimple_opt_pass *make_pass_gen_hsail (gcc::context *ctxt); extern gimple_opt_pass *make_pass_warn_nonnull_compare (gcc::context *ctxt); extern gimple_opt_pass *make_pass_sprintf_length (gcc::context *ctxt); extern gimple_opt_pass *make_pass_walloca (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_peel_last_iteration (gcc::context *ctxt); /* IPA Passes */ extern simple_ipa_opt_pass *make_pass_ipa_lower_emutls (gcc::context *ctxt); diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 339eeafcfee3..5613eff983c3 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -158,7 +158,7 @@ along with GCC; see the file COPYING3. If not see /* True if read can be prefetched by a write prefetch. */ #ifndef READ_CAN_USE_WRITE_PREFETCH -#define READ_CAN_USE_WRITE_PREFETCH 0 +#define READ_CAN_USE_WRITE_PREFETCH 1 #endif /* The size of the block loaded by a single prefetch. Usually, this is @@ -172,7 +172,7 @@ along with GCC; see the file COPYING3. If not see /* Do we have a forward hardware sequential prefetching? */ #ifndef HAVE_FORWARD_PREFETCH -#define HAVE_FORWARD_PREFETCH 0 +#define HAVE_FORWARD_PREFETCH 1 #endif /* Do we have a backward hardware sequential prefetching? */ diff --git a/gcc/uninline-innermost-loops.c b/gcc/uninline-innermost-loops.c new file mode 100644 index 000000000000..3726e6efeeec --- /dev/null +++ b/gcc/uninline-innermost-loops.c @@ -0,0 +1,456 @@ +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "hash-set.h" +#include "machmode.h" +#include "vec.h" +#include "double-int.h" +#include "input.h" +#include "alias.h" +#include "symtab.h" +#include "options.h" +#include "wide-int.h" +#include "inchash.h" +#include "tree.h" +#include "fold-const.h" +#include "hashtab.h" +#include "tm.h" +#include "hard-reg-set.h" +#include "function.h" +#include "rtl.h" +#include "flags.h" +#include "statistics.h" +#include "real.h" +#include "fixed-value.h" +#include "insn-config.h" +#include "expmed.h" +#include "dojump.h" +#include "explow.h" +#include "calls.h" +#include "varasm.h" +#include "stmt.h" +#include "expr.h" +#include "gimple-pretty-print.h" +#include "predict.h" +#include "dominance.h" +#include "cfg.h" +#include "cfgloop.h" +#include "cfghooks.h" +#include "basic-block.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "tree-cfg.h" +#include "gimplify-me.h" +#include "gimple-ssa.h" +#include "tree-ssa-operands.h" +#include "tree-phinodes.h" +#include "ssa-iterators.h" +#include "dumpfile.h" +#include "tree-pass.h" +#include "ipa-ref.h" +#include "plugin-api.h" +#include "cgraph.h" +#include "function.h" +#include "tree-ssa.h" + +void +print_generic_expr (FILE *file, tree t, int flags); + +typedef vec expr_vec; + +extern const char * uninlined_function_postfix; +const char * uninlined_function_postfix = "innermost_loop"; + +static bool +is_defined_in_stmt (gimple *stmt, tree var) +{ + tree lhs; + if (gimple_code (stmt) == GIMPLE_ASSIGN) + { + lhs = gimple_assign_lhs (stmt); + if (lhs == var) + return true; + } + return false; +} + +static bool +is_defined_in_bb (gimple_stmt_iterator gsi, tree var) +{ + for (; !gsi_end_p (gsi); gsi_prev (&gsi)) + { + if (is_defined_in_stmt (gsi_stmt (gsi), var)) + return true; + } + return false; +} + +static bool +is_defined_outside (gimple *use, tree var) +{ + gimple_stmt_iterator gsi; + gsi = gsi_for_stmt (use); + gsi_prev (&gsi); + /* FIXME Recursively search other also predecessor bbs. */ + return !is_defined_in_bb (gsi, var); +} + +/* FIXME Is there a standard function to do this? */ +static bool +contained (expr_vec v, tree var) +{ + int i = 0; + tree u; + + FOR_EACH_VEC_ELT (v, i, u) + { + if (u == var) + return true; + } + return false; +} + +typedef void (*op_callback) (void * g, tree op, gimple *stmt, unsigned index); + +static void +for_each_operand_in_loop (struct loop * loop, op_callback cb, void * g) +{ + basic_block * bbs; + basic_block bb; + unsigned num_nodes; + unsigned i; + unsigned j; + gimple *stmt; + tree op; + + bbs = get_loop_body (loop); + num_nodes = loop->num_nodes; + for (i = 0; i < num_nodes; i++) + { + bb = bbs[i]; + gimple_stmt_iterator iter = gsi_start_bb (bb); + while (!gsi_end_p (iter)) + { + stmt = gsi_stmt (iter); + for (j = 0; j < gimple_num_ops (stmt); j++) + { + op = gimple_op (stmt, j); + if (op) + { + /* Here is the callback. */ + cb (g, op, stmt, j); + } + } + gsi_next (&iter); + } + } + free (bbs); +} + +static void +collect_cb (void * g, tree op, gimple *stmt, unsigned index) +{ + expr_vec * v = (expr_vec *) g; + enum tree_code code; + + if (gimple_code (stmt) == GIMPLE_ASSIGN && index == 0) + { + /* I assume operand 0 of an assignment is the lhs. */ + return; + } + + code = TREE_CODE (op); + if ((code == VAR_DECL || code == PARM_DECL) && + ! contained (*v, op) && + is_defined_outside (stmt, op)) + { + v->safe_push (op); + } +} + +static expr_vec +collect_def_outside (struct loop * loop) +{ + expr_vec v = vNULL; + for_each_operand_in_loop (loop, collect_cb, (void *) & v); + return v; +} + +static void +contains_call_cb (void * r, tree, gimple *stmt, unsigned) +{ + bool * ret = (bool *) r; + if (gimple_code (stmt) == GIMPLE_CALL) + *ret |= true; + else + *ret |= false; +} + +static bool +contains_call (struct loop * loop) +{ + bool ret; + ret = false; + for_each_operand_in_loop (loop, contains_call_cb, &ret); + return ret; +} + +struct var_to_parm_cb_p +{ + tree d; + tree d_parm; +}; + +static void +var_to_parm_cb (void * g, tree op, gimple *stmt, unsigned index) +{ + struct var_to_parm_cb_p * p = (struct var_to_parm_cb_p *) g; + + if (p->d == op) + { + gimple_set_op (stmt, index, p->d_parm); + } +} + +static void +convert_var_to_parm_uses (struct loop * loop, tree d, tree d_parm) +{ + struct var_to_parm_cb_p c; + c.d = d; + c.d_parm = d_parm; + for_each_operand_in_loop (loop, var_to_parm_cb, (void *) & c); +} + +static void +body_to_fun (struct loop * loop, expr_vec uv) +{ + tree name, decl, type, t, block; + gimple_seq body = NULL; + gimple *bind; + int i; + vec args; + struct function *fun; + basic_block new_bb, preheader; + edge out; + basic_block in_bb, out_bb; + + /* Make the new function similar to create_omp_child_function. */ + name = clone_function_name (cfun->decl, uninlined_function_postfix); + decl = build_decl (input_location, FUNCTION_DECL, name, NULL_TREE); + + TREE_STATIC (decl) = 1; + TREE_USED (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; + DECL_NAMELESS (decl) = 0; + DECL_IGNORED_P (decl) = 0; + TREE_PUBLIC (decl) = 0; + DECL_UNINLINABLE (decl) = 1; + DECL_EXTERNAL (decl) = 0; + DECL_CONTEXT (decl) = NULL_TREE; + + block = make_node (BLOCK); + DECL_INITIAL (decl) = block; + + t = build_decl (DECL_SOURCE_LOCATION (decl), + RESULT_DECL, NULL_TREE, void_type_node); + + DECL_ARTIFICIAL (t) = 1; + DECL_IGNORED_P (t) = 1; + DECL_CONTEXT (t) = decl; + DECL_RESULT (decl) = t; + + tree d_parm = NULL; + tree d = NULL; + tree t_first = NULL; + t = NULL; + + tree type_new = NULL; + + tree types = NULL_TREE; + tree *pp = &types; + + args = vNULL; + + /* Prepare the arguments collected earlier on. */ + i = 0; + FOR_EACH_VEC_ELT (uv, i, d) + { + args.safe_push (d); + + type_new = copy_node (TREE_TYPE (d)); + + *pp = build_tree_list (NULL, type_new); + pp = &TREE_CHAIN (*pp); + + d_parm = build_decl (DECL_SOURCE_LOCATION (decl), PARM_DECL, + DECL_NAME (d), type_new); + DECL_ARG_TYPE (d_parm) = TREE_TYPE (d); + DECL_ARTIFICIAL (d_parm) = 1; + DECL_CONTEXT (d_parm) = decl; + TYPE_RESTRICT (TREE_TYPE (d_parm)) = 1; + convert_var_to_parm_uses (loop, d, d_parm); + + if (!t_first) + { + t = d_parm; + t_first = d_parm; + } + else + { + DECL_CHAIN (t) = d_parm; + t = d_parm; + } + } + + type = build_function_type (void_type_node, types); + + DECL_ARGUMENTS (decl) = t_first; + TREE_TYPE (decl) = type; + + push_struct_function (decl); + cfun->function_end_locus = DECL_SOURCE_LOCATION (decl); + pop_cfun (); + + /* Fill the function body. */ + bind = gimple_build_bind (NULL, body, block); + gimple_seq_add_stmt (&body, bind); + + gimple_set_body (decl, body); + + fun = DECL_STRUCT_FUNCTION (decl); + + init_tree_ssa (fun); + + out = single_exit (loop); + + out_bb = split_edge (out); + + /* FIXME I patched this function such that + it always creates the + preheader. + */ + preheader = create_preheader (loop, 128); + in_bb = preheader; + + new_bb = move_sese_region_to_fn (fun, in_bb, + out_bb, NULL_TREE); + + DECL_STRUCT_FUNCTION (decl)->curr_properties = cfun->curr_properties; + + /* Make the new function known. */ + cgraph_node::add_new_function (decl, true); + + gimple_stmt_iterator call_iter = gsi_start_bb (new_bb); + gimple *call_stmt; + call_stmt = gimple_build_call_vec (decl, args); + gsi_insert_after (&call_iter, call_stmt, + GSI_NEW_STMT); + + cgraph_edge::rebuild_edges (); + + push_cfun (fun); + cgraph_edge::rebuild_edges (); + pop_cfun (); +} + +static unsigned int +extract_loop_to_function (void) +{ + struct loop *loop; + struct loop *lu; + expr_vec v = vNULL; + bool cc; + + const char * fun_to_uninline; + unsigned n; + + lu = NULL; + + record_loop_exits (); + + fun_to_uninline = noalias; + + n = 0; + FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) + { + /* FIXME Try to generalize this. */ + /* We assume that we outline at most one + innermost loop. + We choose the biggest loop, assuming + it will bring the most benefit. + FIXME Make sure there flows no data out of + the loop. + */ + if (single_exit (loop) && + strcmp (function_name (cfun), fun_to_uninline) == 0) + { + cc = contains_call (loop); + if (!cc && n < loop->num_nodes) + { + n = loop->num_nodes; + lu = loop; + } + } + } + + if (lu) + { + if (dump_file) + fprintf (dump_file, + "%s:%d (%s): uninlining in %s\n", + __FILE__, __LINE__, __func__, fun_to_uninline); + v = collect_def_outside (lu); + body_to_fun (lu, v); + } + + release_recorded_exits (cfun); + + return 0; +} + +static bool +gate_uninline_innermost_loops (void) +{ + return noalias != 0; +} + +namespace { + +const pass_data pass_data_uninline_innermost_loops = +{ + GIMPLE_PASS, /* type */ + "uninline-innermost-loops", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_UNINLINE_INNERMOST_LOOPS, /* tv_id */ + PROP_gimple_any, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_uninline_innermost_loops: public gimple_opt_pass +{ +public: + pass_uninline_innermost_loops (gcc::context *ctxt) + : gimple_opt_pass (pass_data_uninline_innermost_loops, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *) { return gate_uninline_innermost_loops (); } + unsigned int execute (function *) { return extract_loop_to_function (); } + +}; + +} // anon namespace + +gimple_opt_pass * +make_pass_uninline_innermost_loops (gcc::context *ctxt) +{ + return new pass_uninline_innermost_loops (ctxt); +} -- cgit v1.2.3