diff options
author | Christoph Muellner <christoph.muellner@theobroma-systems.com> | 2018-08-08 11:40:22 +0200 |
---|---|---|
committer | Christoph Muellner <christoph.muellner@theobroma-systems.com> | 2019-08-30 20:10:53 +0200 |
commit | b73264e3d1eb934f48e1e99f709828590ae81c88 (patch) | |
tree | 8c582adc3fa58db0a93ca87e505421c1709b206c | |
parent | 53e60509c403cc02da5a324a5a9ec724ce2ea6ee (diff) |
tree-ssa-list-find-pipeline: Add pipelining loads for list finds.
This pass can be activated with -flist-find-pipeline.
This patch optimizes a common linked list idiom (example from
Coremark's 'core_list_find') of the following form:
while (list && (list->info->idx != info->idx))
list=list->next;
return list;
This idiom introduces a number of dependent-loads across the code path.
However, the dereference of list and the assignment of
list_{i+1} = list_{i}->next (i ... iteration)
only depends on the first condition (i.e. “list != NULL”) and can be
moved earlier.
The [list-find pass] is an experimental pass (to be generalised in a
next step) that provides a targeted implementation of a software pipeliner
for loops iterating over linked list and hoisting the list = list->next
dereference (for the next iteration) above the comparison of the index field.
In SSA form, the loop should thus become (all conditions need to be
inverted):
if (!list_{i})
return NULL; // forward-propagate from the
// if-condition
list_{i+1} = list_{i};
if (list_{i}->idx == info->idx)
return list{i};
which should be unrolled at least once to allow using two distinct registers
for list_{i} and list_{i+1} and avoids any additional register moves.
Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
-rw-r--r-- | gcc/Makefile.in | 1 | ||||
-rw-r--r-- | gcc/common.opt | 4 | ||||
-rw-r--r-- | gcc/passes.def | 1 | ||||
-rw-r--r-- | gcc/tree-pass.h | 1 | ||||
-rw-r--r-- | gcc/tree-ssa-list-find-pipeline.c | 555 |
5 files changed, 562 insertions, 0 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 20bee0494b1b..a8f4e407e5f4 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1569,6 +1569,7 @@ OBJS = \ tree-ssa-uninit.o \ tree-ssa.o \ tree-ssanames.o \ + tree-ssa-list-find-pipeline.o \ tree-stdarg.o \ tree-streamer.o \ tree-streamer-in.o \ diff --git a/gcc/common.opt b/gcc/common.opt index b52ef0b38c81..ffdc4300e668 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1798,6 +1798,10 @@ fleading-underscore Common Report Var(flag_leading_underscore) Init(-1) Give external symbols a leading underscore. +flist-find-pipeline +Common Report Var(list_find_pipe) Optimization +Pipeline the find operation in a linked list. + floop-optimize Common Ignore Does nothing. Preserved for backward compatibility. diff --git a/gcc/passes.def b/gcc/passes.def index 3ebcfc30349f..d8f8793549a1 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -76,6 +76,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_fixup_cfg); NEXT_PASS (pass_rebuild_cgraph_edges); NEXT_PASS (pass_local_fn_summary); + NEXT_PASS (pass_list_find_pipeline); NEXT_PASS (pass_early_inline); NEXT_PASS (pass_all_early_optimizations); PUSH_INSERT_PASSES_WITHIN (pass_all_early_optimizations) diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 93a6a99eb7a1..88a27838afd9 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -627,6 +627,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt); extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt); extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt); extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_list_find_pipeline (gcc::context *ctxt); /* Current optimization pass. */ extern opt_pass *current_pass; diff --git a/gcc/tree-ssa-list-find-pipeline.c b/gcc/tree-ssa-list-find-pipeline.c new file mode 100644 index 000000000000..f871569de50c --- /dev/null +++ b/gcc/tree-ssa-list-find-pipeline.c @@ -0,0 +1,555 @@ +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "hash-set.h" +#include "machmode.h" +#include "vec.h" +#include "double-int.h" +#include "input.h" +#include "alias.h" +#include "symtab.h" +#include "options.h" +#include "wide-int.h" +#include "inchash.h" +#include "tree.h" +#include "fold-const.h" +#include "hashtab.h" +#include "tm.h" +#include "hard-reg-set.h" +#include "function.h" +#include "rtl.h" +#include "flags.h" +#include "statistics.h" +#include "real.h" +#include "fixed-value.h" +#include "insn-config.h" +#include "profile-count.h" +#include "expmed.h" +#include "dojump.h" +#include "explow.h" +#include "calls.h" +#include "varasm.h" +#include "stmt.h" +#include "expr.h" +#include "gimple-pretty-print.h" +#include "predict.h" +#include "dominance.h" +#include "cfg.h" +#include "cfgloop.h" +#include "cfghooks.h" +#include "basic-block.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "tree-cfg.h" +#include "gimplify-me.h" +#include "gimple-ssa.h" +#include "tree-ssa-operands.h" +#include "tree-phinodes.h" +#include "ssa-iterators.h" +#include "dumpfile.h" +#include "tree-pass.h" +#include "ipa-ref.h" +#include "plugin-api.h" +#include "cgraph.h" +#include "function.h" + +#include "stringpool.h" +#include "tree-vrp.h" +#include "tree-ssanames.h" +#include "tree-into-ssa.h" + +typedef vec <tree> expr_vec; + +// FIXME Copied from tree-ssa-math-opts.c +static void +build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, + tree type, tree val) +{ + gimple *use_stmt; + imm_use_iterator use_iter; + use_operand_p use_p; + + tree result = make_ssa_name (type, NULL); + + gimple *stmt = gimple_build_assign (result, NOP_EXPR, val); + gimple_set_location (stmt, loc); + + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, val) + { + FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) + SET_USE (use_p, result); + update_stmt (use_stmt); + } + + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + update_stmt (stmt); +} + +static void insert_casts (basic_block body, int n) +{ + gimple_stmt_iterator iter; + gimple *body_last_stmt; + tree cond_rhs; + + // FIXME Hardcoded loop id. + if (n == 2) + { + iter = gsi_last_bb (body); + body_last_stmt = gsi_stmt (iter); + cond_rhs = gimple_cond_rhs (body_last_stmt); + build_and_insert_cast (&iter, UNKNOWN_LOCATION, + short_integer_type_node, cond_rhs); + } + +} + +static unsigned int +num_phis (basic_block bb) +{ + gimple_stmt_iterator si; + unsigned int np = 0; + for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) + np++; + return np; +} + +// FIXME This pass is in a bad state. +// * Get rid of warnings. +// * Refactor to separate functions. +// * Minimize number of update_stmt. +// * Find where you can reduce duplicate work. +// * Think of consistent variable namings. +static unsigned int +list_find_pipeline (void) +{ + struct loop * loop; + edge e; + edge_iterator ei; + tree op0, op1, op01; + tree def_rhs, cond_rhs; + tree phi_op; + tree back_edge_phi_op; + + tree constant_null_pointer; + + gimple *use_stmt; + imm_use_iterator use_iter; + use_operand_p use_p; + + gimple *new_next_stmt; + // FIXME this is an awful name. + tree new_new_list; + + tree new_list; + tree old_list; + tree copy_lhs; + tree phi_result; + gimple *next_stmt; + gimple *next_copy; + gimple *phi_stmt; + gimple_stmt_iterator cond_iterator; + gimple *last_stmt; + gimple *exit_phi_stmt; + gimple *exit_first_stmt; + + tree exit_lhs; + tree exit_rhs; + + gimple *def_stmt; + + basic_block use_bb; + + edge entry_edge; + edge exit_edge = NULL; + edge back_edge; + edge body_entry_edge = NULL; + edge body_exit_edge = NULL; + edge body_latch_edge = NULL; + + edge new_exit_edge; + edge new_body_entry_edge = NULL; + edge new_body_exit_edge; + edge new_body_latch_edge = NULL; + + basic_block body; + basic_block latch; + + basic_block new_header; + basic_block new_body; + + gimple_stmt_iterator iter; + + bool complex_case; + + FILE * dump_f = dump_file; + + loop_optimizer_init (LOOPS_HAVE_SIMPLE_LATCHES | LOOPS_HAVE_PREHEADERS); + record_loop_exits (); + initialize_original_copy_tables (); + + FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) + { + if (loop->header->preds->length () != 2) + continue; + + if (loop->header->succs->length () != 2) + continue; + + // This will we overwritten so we keep it. + latch = loop->latch; + FOR_EACH_EDGE (e, ei, loop->header->preds) + { + if (e->src != latch) + entry_edge = e; + else + back_edge = e; + } + + FOR_EACH_EDGE (e, ei, loop->header->succs) + { + if (loop_exit_edge_p (loop, e)) + exit_edge = e; + else + body_entry_edge = e; + } + + // The predecessor must dominate the header. + if (entry_edge->src->succs->length () != 1) + continue; + + // We only want a loop of header, body and latch. + if (loop->num_nodes != 3) + continue; + + // We only want exactly one phi node. + if (num_phis (loop->header) != 1) + continue; + + // This is the basic block in the loop, + // where most of the work is done. + body = body_entry_edge->dest; + // Body must have exactly two successors. + if (body->succs->length () != 2) + continue; + + if (!can_duplicate_block_p (loop->header)) + continue; + if (!can_duplicate_block_p (body)) + continue; + + FOR_EACH_EDGE (e, ei, body->succs) + { + if (e->dest == latch) + body_latch_edge = e; + else + body_exit_edge = e; + } + + // Both exit edges must point to the same bb. + if (body_exit_edge->dest != exit_edge->dest) + continue; + + // Find and check the condition. + cond_iterator = gsi_last_bb (loop->header); + last_stmt = gsi_stmt (cond_iterator); + if (dump_f) + print_gimple_stmt (dump_f, last_stmt, 0, 0); + + if (gimple_code (last_stmt) != GIMPLE_COND) + continue; + + if (gimple_cond_code (last_stmt) != NE_EXPR) + continue; + + cond_rhs = gimple_cond_rhs (last_stmt); + if (!operand_equal_p (cond_rhs, null_pointer_node, 0)) + continue; + + new_list = gimple_cond_lhs (last_stmt); + + // Also the exit node must have only one phi node. + // FIXME This is not a sufficiently safe check. + iter = gsi_start_bb (exit_edge->dest); + exit_first_stmt = gsi_stmt (iter); + exit_lhs = gimple_assign_lhs (exit_first_stmt); + exit_rhs = gimple_assign_rhs1 (exit_first_stmt); + + if (exit_rhs == new_list) + { + if (exit_edge->dest->preds->length () != 2) + continue; + + // FIXME Do all alteration *after* the analysis phase. + gsi_remove (&iter, true); + exit_phi_stmt = create_phi_node (exit_lhs, exit_edge->dest); + + FOR_EACH_EDGE (e, ei, exit_edge->dest->preds) + { + SET_PHI_ARG_DEF (exit_phi_stmt, e->dest_idx, exit_rhs); + } + + if (dump_f) + fprintf (dump_f, "list-find-pipeline: replace assignment by phi node in loop number %d\n", loop->num); + + } + else if (num_phis (exit_edge->dest) != 1) + { + continue; + } + + // Find the defining statement. + def_stmt = SSA_NAME_DEF_STMT (new_list); + if (dump_f) + print_gimple_stmt (dump_f, def_stmt, 0, 0); + + if (gimple_code (def_stmt) == GIMPLE_ASSIGN) + { + // Is it the first statement in the header? + if (def_stmt != gsi_stmt (gsi_start_bb (loop->header))) + continue; + complex_case = true; + next_stmt = def_stmt; + } + else if (gimple_code (def_stmt) == GIMPLE_PHI) + { + // Is it the last statement in the latch? + back_edge_phi_op = PHI_ARG_DEF_FROM_EDGE (def_stmt, back_edge); + next_stmt = SSA_NAME_DEF_STMT (back_edge_phi_op); + if (next_stmt != gsi_stmt (gsi_last_bb (latch))) + continue; + complex_case = false; + new_list = gimple_assign_lhs (next_stmt); + } + else + continue; + + next_copy = gimple_copy (next_stmt); + + // Analyze the defining statement (acutally its copy). + // It should be of the form new_list = old_list->next. + def_rhs = gimple_assign_rhs1 (next_copy); + if (TREE_CODE (def_rhs) != COMPONENT_REF) + continue; + if (TREE_OPERAND_LENGTH (def_rhs) != 3) + continue; + + op1 = TREE_OPERAND (def_rhs, 1); + if (TREE_CODE (op1) != FIELD_DECL) + continue; + if (TREE_OPERAND_LENGTH (op1) != 0) + continue; + + op0 = TREE_OPERAND (def_rhs, 0); + if (TREE_CODE (op0) != MEM_REF) + continue; + if (TREE_OPERAND_LENGTH (op0) != 2) + continue; + + op01 = TREE_OPERAND (op0, 1); + if (TREE_CODE (op01) != INTEGER_CST) + continue; + if (!operand_equal_p (op01, integer_zero_node, 0)) + continue; + + old_list = TREE_OPERAND (op0, 0); + if (TREE_CODE (old_list) != SSA_NAME) + continue; + + if (TREE_OPERAND_LENGTH (old_list) != 0) + continue; + + // The old list must be defined by the phi node. + phi_stmt = SSA_NAME_DEF_STMT (old_list); + if (gimple_code (phi_stmt) != GIMPLE_PHI) + continue; + + if (gimple_phi_num_args (phi_stmt) != 2) + continue; + + if (dump_f) + print_gimple_stmt (dump_f, phi_stmt, 0, 0); + + if (!complex_case) + gcc_assert (phi_stmt == def_stmt); + + // The operand coming in by the entry edge. + phi_op = PHI_ARG_DEF_FROM_EDGE (phi_stmt, entry_edge); + if (TREE_CODE (phi_op) != SSA_NAME) + continue; + + // We found a feasible loop. + if (dump_f) + fprintf (dump_f, "list-find-pipeline: Found feasible loop number %d\n", loop->num); + + insert_casts (body, loop->num); + + phi_result = gimple_phi_result (phi_stmt); + + if (complex_case) + { + // Set the new pointer base in next_copy. + TREE_OPERAND (op0, 0) = phi_op; + + copy_lhs = copy_ssa_name (new_list, next_copy); + gimple_assign_set_lhs (next_copy, copy_lhs); + update_stmt (next_copy); + + // The next_copy must be at the end of the single predecessor block. + iter = gsi_last_bb (entry_edge->src); + gsi_insert_after (&iter, next_copy, GSI_NEW_STMT); + + SET_PHI_ARG_DEF (phi_stmt, entry_edge->dest_idx, copy_lhs); + + // Remove the original next_stmt temporarily, insert it later. + iter = gsi_for_stmt (next_stmt); + gsi_remove (&iter, false); + + // Set all uses of new_list in header and body to the + // name we get via the phi node. + // Also in the phi node of the block after the loop. + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, new_list) + { + use_bb = gimple_bb (use_stmt); + + if (use_bb == loop->header || + use_bb == body || + use_bb == exit_edge->dest) + { + FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) + SET_USE (use_p, phi_result); + update_stmt (use_stmt); + } + } + + // Insert the next_stmt at the top of body. + iter = gsi_start_bb (body); + gsi_insert_before (&iter, next_stmt, GSI_NEW_STMT); + // Correct the phi argument. + SET_PHI_ARG_DEF (phi_stmt, back_edge->dest_idx, new_list); + + if (dump_f) + print_gimple_stmt (dump_f, next_copy, 0, 0); + } + else + { + // Remove the original next_stmt temporarily, insert it later. + iter = gsi_for_stmt (next_stmt); + gsi_remove (&iter, false); + iter = gsi_start_bb (body); + gsi_insert_before (&iter, next_stmt, GSI_NEW_STMT); + } + + // Unroll once. + new_header = duplicate_block (loop->header, body_latch_edge, body); + FOR_EACH_EDGE (e, ei, new_header->succs) + { + if (e->dest != exit_edge->dest) + new_body_entry_edge = e; + else + new_exit_edge = e; + } + new_body = duplicate_block (body, new_body_entry_edge, new_header); + FOR_EACH_EDGE (e, ei, new_body->succs) + { + if (e->dest != exit_edge->dest) + new_body_latch_edge = e; + else + new_body_exit_edge = e; + } + + redirect_edge_succ (new_body_latch_edge, latch); + + set_immediate_dominator (CDI_DOMINATORS, new_header, body); + set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); + set_immediate_dominator (CDI_DOMINATORS, latch, new_body); + + remove_phi_nodes (new_header); + + iter = gsi_start_phis (exit_edge->dest); + exit_phi_stmt = gsi_stmt (iter); + constant_null_pointer = null_pointer_node; + + SET_PHI_ARG_DEF (exit_phi_stmt, new_exit_edge->dest_idx, constant_null_pointer); + SET_PHI_ARG_DEF (exit_phi_stmt, new_body_exit_edge->dest_idx, new_list); + + update_ssa (TODO_update_ssa); + + SET_PHI_ARG_DEF (exit_phi_stmt, new_body_exit_edge->dest_idx, new_list); + + // We know this is the new list, since we put it there. + iter = gsi_start_bb (new_body); + new_next_stmt = gsi_stmt (iter); + new_new_list = gimple_assign_lhs (new_next_stmt); + SET_PHI_ARG_DEF (phi_stmt, back_edge->dest_idx, new_new_list); + update_stmt (phi_stmt); + update_stmt (new_next_stmt); + + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, phi_result) + { + use_bb = gimple_bb (use_stmt); + + if (use_bb == new_header || + use_bb == new_body) + { + FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) + { + SET_USE (use_p, new_list); + } + update_stmt (use_stmt); + } + } + } + + free_original_copy_tables (); + release_recorded_exits (cfun); + + + return 0; +} + +static bool +gate_list_find_pipeline (void) +{ + return list_find_pipe; +} + +namespace { + +const pass_data pass_data_list_find_pipeline = +{ + GIMPLE_PASS, /* type */ + "list-find-pipeline", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_gimple_any, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_list_find_pipeline: public gimple_opt_pass +{ +public: + pass_list_find_pipeline (gcc::context *ctxt) + : gimple_opt_pass (pass_data_list_find_pipeline, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *) { return gate_list_find_pipeline (); } + unsigned int execute (function *) { return list_find_pipeline (); } + +}; + +} // anon namespace + +gimple_opt_pass * +make_pass_list_find_pipeline (gcc::context *ctxt) +{ + return new pass_list_find_pipeline (ctxt); +} |