summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/Makefile.in3
-rw-r--r--gcc/common.opt4
-rw-r--r--gcc/config/aarch64/aarch64.c77
-rw-r--r--gcc/passes.def1
-rw-r--r--gcc/timevar.def1
-rw-r--r--gcc/tree-pass.h1
-rw-r--r--gcc/uncse.c437
7 files changed, 492 insertions, 32 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index b5275263c20b..e39125436ee1 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1564,6 +1564,7 @@ OBJS = \
tree.o \
typed-splay-tree.o \
uninline-innermost-loops.o \
+ uncse.o \
valtrack.o \
value-prof.o \
var-tracking.o \
@@ -2540,7 +2541,7 @@ ALL_GTFILES_H := $(sort $(GTFILES_H) $(GTFILES_LANG_H))
$(ALL_GTFILES_H) gtype-desc.c gtype-desc.h gtype.state: s-gtype ; @true
### Common flags to gengtype [e.g. -v or -B backupdir]
-GENGTYPE_FLAGS=
+GENGTYPE_FLAGS=
gtyp-input.list: s-gtyp-input ; @true
s-gtyp-input: Makefile
diff --git a/gcc/common.opt b/gcc/common.opt
index ce7c2e57d40d..05e0fabe9013 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2582,6 +2582,10 @@ Common Var(flag_unconstrained_commons) Optimization
Assume common declarations may be overridden with ones with a larger
trailing array.
+funcse
+Common Report Var(flag_uncse) Init(0) Optimization
+Undoing of CSE
+
funit-at-a-time
Common Report Var(flag_unit_at_a_time) Init(1)
Compile whole compilation unit at a time.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 862492cdcb2e..91bc84012fdd 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -252,8 +252,8 @@ static const struct cpu_addrcost_table xgene1_addrcost_table =
1, /* pre_modify */
0, /* post_modify */
0, /* register_offset */
- 1, /* register_sextend */
- 1, /* register_zextend */
+ 0, /* register_sextend */
+ 0, /* register_zextend */
0, /* imm_offset */
};
@@ -14841,33 +14841,40 @@ xgene1_strip_extended_register (rtx op, int *cost, bool speed ATTRIBUTE_UNUSED,
inside another operation such as (plus (mult X const)). Instead,
aarch64.md recognizes it as an operation with an embedded shift,
and we charge a cost accordingly. */
- if (GET_CODE (op) == MULT)
+ if (GET_CODE (op) == MULT
+ || GET_CODE (op) == ASHIFT)
{
rtx op0 = XEXP (op, 0);
rtx op1 = XEXP (op, 1);
- if (CONST_INT_P (op1)
- && exact_log2 (INTVAL (op1)) > 0)
+ int shift = 0;
+
+ if (CONST_INT_P (op1))
{
- if (exact_log2 (INTVAL (op1)) <= 4)
- {
- *cost += COSTS_N_INSNS(1);
+ shift = INTVAL (op1);
- /* The extended register form can include a zero-
- or sign-extend for free. */
- if (GET_CODE (op0) == ZERO_EXTEND
- || GET_CODE (op1) == SIGN_EXTEND)
- return XEXP (op0, 0);
- else
- return op0;
- }
+ if (GET_CODE (op) == MULT)
+ shift = exact_log2 (shift);
+ }
+
+ if (shift > 4)
+ {
+ /* The shifted register form can support a larger
+ left shift, but cannot include a free extend. */
+ *cost += COSTS_N_INSNS(2);
+ return op0;
+ }
+ else if (shift > 0)
+ {
+ *cost += COSTS_N_INSNS(1);
+
+ /* The extended register form can include a zero-
+ or sign-extend for free. */
+ if (GET_CODE (op0) == ZERO_EXTEND
+ || GET_CODE (op0) == SIGN_EXTEND)
+ return XEXP (op0, 0);
else
- {
- /* The shifted register form can support a larger
- left shift, but cannot include a free extend. */
- *cost += COSTS_N_INSNS(2);
- return op0;
- }
+ return op0;
}
}
@@ -14929,7 +14936,14 @@ xgene1_rtx_costs (rtx x, machine_mode mode, int code, int outer ATTRIBUTE_UNUSED
if (x == const0_rtx)
*cost = 0;
else
- *cost += COSTS_N_INSNS(1); /* base cost */
+ {
+ /* To an approximation, building any other constant is
+ proportionally expensive to the number of instructions
+ required to build that constant. This is true whether we
+ are compiling for SPEED or otherwise. */
+ *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
+ (NULL_RTX, x, false, mode));
+ }
return true;
case CONST_DOUBLE:
@@ -14953,7 +14967,7 @@ xgene1_rtx_costs (rtx x, machine_mode mode, int code, int outer ATTRIBUTE_UNUSED
/* Add the cost of complex addressing modes. */
addr = XEXP(op0, 0);
- *cost += aarch64_address_cost(addr, word_mode, 0, speed);
+ *cost += COSTS_N_INSNS(aarch64_address_cost(addr, mode, 0, speed));
return true;
case SUBREG:
@@ -14969,7 +14983,7 @@ xgene1_rtx_costs (rtx x, machine_mode mode, int code, int outer ATTRIBUTE_UNUSED
previously calculated value of n_minus_1 is not
useful. */
n_minus_1 = (GET_MODE_SIZE (GET_MODE (SET_DEST (x))) - 1) / UNITS_PER_WORD;
- *cost = COSTS_N_INSNS(n_minus_1 + 16);
+ *cost = COSTS_N_INSNS(n_minus_1 + 16); /* Why +16 instead of +1 */
return true;
}
else
@@ -14979,7 +14993,8 @@ xgene1_rtx_costs (rtx x, machine_mode mode, int code, int outer ATTRIBUTE_UNUSED
return true;
}
- case ZERO_EXTRACT:
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
/* Bit-field insertion. */
/* Strip any redundant widening of the RHS to meet the width
of the target. */
@@ -15028,7 +15043,7 @@ xgene1_rtx_costs (rtx x, machine_mode mode, int code, int outer ATTRIBUTE_UNUSED
/* Add the cost of complex addressing modes. */
addr = XEXP(x, 0);
- *cost += aarch64_address_cost(addr, word_mode, 0, speed);
+ *cost += aarch64_address_cost(addr, mode, 0, speed);
return true;
case COMPARE:
@@ -15093,7 +15108,7 @@ xgene1_rtx_costs (rtx x, machine_mode mode, int code, int outer ATTRIBUTE_UNUSED
}
goto cost_minus_int;
}
-
+
/* Support for CMP (FP) */
if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
{
@@ -15348,7 +15363,7 @@ xgene1_rtx_costs (rtx x, machine_mode mode, int code, int outer ATTRIBUTE_UNUSED
/* UBFM/SBFM can incorporate zero/sign extend for free. */
if (GET_CODE (op0) == ZERO_EXTEND
- || GET_CODE (op1) == SIGN_EXTEND)
+ || GET_CODE (op0) == SIGN_EXTEND)
op0 = XEXP (op0, 0);
*cost += rtx_cost (op0, mode, ASHIFT, 0, speed);
@@ -15532,7 +15547,7 @@ xgene1_rtx_costs (rtx x, machine_mode mode, int code, int outer ATTRIBUTE_UNUSED
{
/* There is no integer SQRT, so only DIV and UDIV can get
here. */
- /* Integer divide of a register is variable latency.
+ /* Integer divide of a register is variable latency.
Without data, I assume an average of 16 cycles. */
/* Integer divide of a constant has known latency
depending on the constant. However, GCC can't
@@ -15575,7 +15590,7 @@ xgene1_rtx_costs (rtx x, machine_mode mode, int code, int outer ATTRIBUTE_UNUSED
return true;
}
}
- else {
+ else {
if ( (GET_CODE(op0) == EQ || GET_CODE(op0) == NE || GET_CODE(op0) == GT || GET_CODE(op0) == GTU
|| GET_CODE(op0) == LT || GET_CODE(op0) == LTU || GET_CODE(op0) == GE || GET_CODE(op0) == GEU
|| GET_CODE(op0) == LE || GET_CODE(op0) == LEU)
diff --git a/gcc/passes.def b/gcc/passes.def
index 3831a74b1adf..9637edb57e20 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -446,6 +446,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_initialize_regs);
NEXT_PASS (pass_ud_rtl_dce);
NEXT_PASS (pass_combine);
+ NEXT_PASS (pass_uncse);
NEXT_PASS (pass_if_after_combine);
NEXT_PASS (pass_partition_blocks);
NEXT_PASS (pass_outof_cfg_layout_mode);
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 275e23a041e1..cf2d436e0639 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -259,6 +259,7 @@ DEFTIMEVAR (TV_RELOAD , "reload")
DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs")
DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload")
DEFTIMEVAR (TV_REE , "ree")
+DEFTIMEVAR (TV_UNCSE , "uncse")
DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue")
DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2")
DEFTIMEVAR (TV_SPLIT_PATHS , "split paths")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index ed017b8eec44..ade32c09617e 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -563,6 +563,7 @@ extern rtl_opt_pass *make_pass_inc_dec (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_stack_ptr_mod (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_initialize_regs (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_combine (gcc::context *ctxt);
+extern rtl_opt_pass *make_pass_uncse (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_if_after_combine (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_ree (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_partition_blocks (gcc::context *ctxt);
diff --git a/gcc/uncse.c b/gcc/uncse.c
new file mode 100644
index 000000000000..6762606bb8d0
--- /dev/null
+++ b/gcc/uncse.c
@@ -0,0 +1,437 @@
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "hash-set.h"
+#include "vec.h"
+#include "input.h"
+#include "double-int.h"
+#include "alias.h"
+#include "symtab.h"
+#include "wide-int.h"
+#include "inchash.h"
+#include "tm.h"
+#include "hard-reg-set.h"
+#include "predict.h"
+#include "function.h"
+#include "dominance.h"
+#include "predict.h"
+#include "tree.h"
+#include "rtl.h"
+#include "df.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "tree-pass.h"
+#include "insn-config.h"
+#include "recog.h"
+#include "cfgrtl.h"
+#include "rtl-iter.h"
+
+static int num_changes;
+static int num_mul_changes;
+
+static bool optimize_this_for_speed_p;
+
+static void
+move_dead_notes (rtx_insn *to_insn, rtx_insn *from_insn, rtx expr)
+{
+ rtx note;
+ rtx next_note;
+ rtx prev_note = NULL;
+ rtx datum;
+
+ for (note = REG_NOTES (from_insn); note; note = next_note)
+ {
+ next_note = XEXP (note, 1);
+ datum = XEXP (note, 0);
+
+ if (REG_NOTE_KIND (note) == REG_DEAD && rtx_referenced_p (datum, expr))
+ {
+ XEXP (note, 1) = REG_NOTES (to_insn);
+ REG_NOTES (to_insn) = note;
+ if (prev_note)
+ XEXP (prev_note, 1) = next_note;
+ else
+ REG_NOTES (from_insn) = next_note;
+ }
+ else prev_note = note;
+ }
+}
+
+/* checks if USE was modified between DEF_INSN and TARGET_INSN
+ (both exclusive). This is trivial to determine if both insns are
+ in the same basic block. If not they need to be in the same extended
+ basic block. Other cases are regarded as if USE was modified. */
+static bool
+use_killed_between(rtx use, rtx_insn *def_insn, rtx_insn *target_insn)
+{
+ basic_block def_bb = BLOCK_FOR_INSN (def_insn);
+ basic_block target_bb = BLOCK_FOR_INSN (target_insn);
+
+ if (def_bb == target_bb)
+ {
+ /* There are cases where the use is *before* the definition.
+ E.g. uninitialized variables. */
+ if (DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn))
+ return true;
+
+ else
+ return modified_between_p (use, def_insn, target_insn);
+ }
+
+ else {
+ basic_block loop_bb = target_bb;
+
+ if (modified_in_p (use, BB_HEAD (target_bb))
+ || modified_between_p (use, BB_HEAD (target_bb), target_insn))
+ return true;
+
+ while (single_pred_p (loop_bb)) {
+ loop_bb = single_pred (loop_bb);
+
+ if (loop_bb == def_bb)
+ return modified_between_p (use, def_insn, BB_END (def_bb))
+ || modified_in_p (use, BB_END (def_bb));
+ else if (modified_in_p (use, BB_HEAD (loop_bb))
+ || modified_between_p (use, BB_HEAD (loop_bb), BB_END (loop_bb))
+ || modified_in_p (use, BB_END (loop_bb)))
+ return true;
+ }
+
+ return true;
+ }
+}
+
+/* determine costs for INSN. Invokes `set_src_cost` for both SRC
+ and DEST of the SET insn and adds both costs. This is necessary
+ since `insn_rtx_costs` only returns the costs of the SRC but does
+ not factor in the costs for the DEST. */
+static int
+insn_cost (rtx_insn *insn)
+{
+ int cost = set_rtx_cost (PATTERN (insn), optimize_this_for_speed_p);
+
+ if (dump_file && cost > 20)
+ {
+ fprintf (dump_file, "LARGE COST FOR INSN: %d\n", cost);
+ print_rtl_single (dump_file, insn);
+ }
+
+ return cost;
+}
+
+/* returns TRUE if DEST_REG in USE_INSN could be replaced with its
+ initialization value. */
+static bool
+try_prop_def_to_use (rtx_insn *def_insn, rtx dest_reg, df_ref use, rtx *loc)
+{
+ rtx src = SET_SRC (PATTERN (def_insn));
+ rtx_insn *use_insn = DF_REF_INSN (use);
+
+ if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
+ return false;
+
+ if (DF_REF_IS_ARTIFICIAL (use))
+ return false;
+
+ rtx use_set = single_set (use_insn);
+
+ /* insn needs to be either set or debug_insn. */
+ if (!use_set && !DEBUG_INSN_P (use_insn))
+ return false;
+
+ rtx reg = DF_REF_REG (use);
+
+ /* only move if usage if reg is register with same mode. */
+ if (!REG_P (reg) || GET_MODE (reg) != GET_MODE (dest_reg))
+ return false;
+
+ /* do not forward propagate into loop. */
+ if (BLOCK_FOR_INSN (def_insn)->loop_father != BLOCK_FOR_INSN (use_insn)->loop_father)
+ return false;
+
+ df_ref use_iterator;
+
+ /* cancel for multiple definitions. */
+ FOR_EACH_INSN_INFO_USE (use_iterator, DF_INSN_INFO_GET (use_insn))
+ {
+ if (DF_REF_REGNO (use_iterator) == REGNO (dest_reg))
+ {
+ df_link *chain = DF_REF_CHAIN (use_iterator);
+
+ if (chain->next)
+ return false;
+ }
+ }
+
+ /* register needs to occur in use_set. */
+ if (use_set && !reg_mentioned_p (DF_REF_REG (use), SET_SRC (use_set)))
+ return false;
+
+ /* check if src was modified between def_insn and use_insn. */
+ if (!CONSTANT_P (src) && use_killed_between (src, def_insn, use_insn))
+ return false;
+
+
+ int old_costs = insn_cost (use_insn);
+
+ validate_unshare_change (use_insn, loc, src, true);
+
+ if (!DEBUG_INSN_P (use_insn))
+ {
+ int code = recog_memoized (use_insn);
+
+ /* check if instruction wasn't recognized */
+ if (code == -1)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "insn not recognized:\n");
+ print_rtl_single (dump_file, use_insn);
+ }
+
+ return false;
+ }
+
+ int new_costs = insn_cost (use_insn);
+
+ /* increase current costs */
+ return new_costs <= old_costs;
+ }
+
+ return true;
+}
+
+
+static struct df_link *
+get_uses (rtx_insn *insn, rtx reg)
+{
+ df_ref def;
+
+ FOR_EACH_INSN_DEF (def, insn)
+ {
+ if (REGNO (DF_REF_REG (def)) == REGNO (reg))
+ break;
+ }
+
+ gcc_assert (def != NULL);
+
+ return DF_REF_CHAIN (def);
+}
+
+static bool
+insn_later(rtx_insn *cur, rtx_insn *last) {
+ basic_block cur_bb = BLOCK_FOR_INSN (cur);
+ basic_block last_bb = BLOCK_FOR_INSN (last);
+
+ if (cur_bb == last_bb)
+ return DF_INSN_LUID (cur) > DF_INSN_LUID (last);
+
+ /* if insn's are in two different basic blocks, we can assume that one
+ has the other as single predecessor. Return true if cur insn is after
+ last insn, that means if cur_bb has last_bb as its single predecessor. */
+ else
+ {
+ basic_block loop_bb = cur_bb;
+
+ while (single_pred_p (loop_bb))
+ {
+ loop_bb = single_pred (loop_bb);
+
+ if (loop_bb == last_bb)
+ return true;
+ }
+
+ return false;
+ }
+}
+
+static void
+dump_uncse (rtx_insn *def_insn)
+{
+ fprintf (dump_file, "uncse def\n");
+ print_rtl_single (dump_file, def_insn);
+
+ struct df_link *ref_chain, *ref_link;
+
+ fprintf (dump_file, "into\n");
+ bool first = true;
+
+ rtx dest = SET_DEST (PATTERN (def_insn));
+
+ ref_chain = get_uses (def_insn, dest);
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ if (DF_REF_INSN_INFO (ref_link->ref))
+ {
+ rtx_insn *target_insn = DF_REF_INSN (ref_link->ref);
+
+ if (!first)
+ fprintf (dump_file, "and\n");
+
+ print_rtl_single (dump_file, target_insn);
+ first = false;
+ }
+}
+
+/* returns TRUE if DEF_INSN could be forwarded into all
+ its uses and hence was deleted. */
+static bool
+try_uncse_def (rtx_insn *def_insn)
+{
+ /* only handle SET */
+ if (!INSN_P(def_insn)
+ || GET_CODE (PATTERN (def_insn)) != SET)
+ return false;
+
+ rtx dest = SET_DEST (PATTERN (def_insn));
+
+ /* for now: only handle assignments to register */
+ if (!REG_P (dest) || GET_MODE (dest) == VOIDmode)
+ return false;
+
+ rtx src = SET_SRC (PATTERN (def_insn));
+
+ /* do not care about register to register assignments */
+ if (REG_P (src))
+ return false;
+
+ /* do not propagate definition with side effects */
+ if (side_effects_p (src))
+ return false;
+
+ /* ignore definition if source contains memory access */
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, src, ALL)
+ if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
+ return false;
+
+ struct df_link *ref_chain, *ref_link;
+ rtx_insn *last_insn = NULL;
+ bool replaced_all = true;
+ int number_replaced = 0;
+
+ ref_chain = get_uses (def_insn, dest);
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ {
+ if (DF_REF_INSN_INFO (ref_link->ref))
+ {
+ df_ref use = ref_link->ref;
+ rtx_insn *target_insn = DF_REF_INSN (use);
+
+ if (!target_insn->deleted()
+ && try_prop_def_to_use (def_insn, dest, use, DF_REF_LOC (use)))
+ {
+ number_replaced++;
+
+ /* remember last use instruction for REG_DEAD-movement. */
+ if (!last_insn || insn_later(target_insn, last_insn))
+ last_insn = target_insn;
+
+ continue;
+ }
+ }
+
+ replaced_all = false;
+ break;
+ }
+
+ if (replaced_all && apply_change_group ())
+ {
+ if (last_insn)
+ move_dead_notes (last_insn, def_insn, src);
+
+ num_changes++;
+
+ if (number_replaced > 1)
+ num_mul_changes++;
+
+ if (dump_file)
+ dump_uncse (def_insn);
+
+ delete_insn_and_edges (def_insn);
+
+ return true;
+ }
+
+ cancel_changes (0);
+ return false;
+}
+
+/* move through all insns and try to forward a definition
+ into all its uses. Start of the uncse-pass. */
+static void
+find_uncse_poss()
+{
+ num_changes = 0;
+ num_mul_changes = 0;
+
+ df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
+ df_analyze ();
+ df_set_flags (DF_DEFER_INSN_RESCAN);
+
+ basic_block bb;
+ rtx_insn *insn;
+
+ int cleanup = 0;
+ const char *fname = current_function_name();
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ optimize_this_for_speed_p = optimize_bb_for_speed_p (bb);
+
+ FOR_BB_INSNS (bb, insn)
+ cleanup |= try_uncse_def (insn);
+ }
+
+ if (dump_file)
+ fprintf (dump_file,
+ "\nNumber changes in %s: %d %d (with multiple uses)\n\n",
+ fname, num_changes, num_mul_changes);
+}
+
+static unsigned int
+rest_of_handle_uncse ()
+{
+ find_uncse_poss();
+ return 0;
+}
+
+namespace {
+
+const pass_data pass_data_uncse =
+{
+ RTL_PASS, /* type */
+ "uncse", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_UNCSE, /* tv_id */
+ PROP_cfglayout, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_uncse : public rtl_opt_pass
+{
+public:
+ pass_uncse (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_uncse, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *) { return optimize > 0 && flag_uncse; }
+
+ virtual unsigned int execute (function *)
+ {
+ return rest_of_handle_uncse ();
+ }
+
+}; // class pass_uncse
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_uncse (gcc::context *ctxt)
+{
+ return new pass_uncse (ctxt);
+}