summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog57
-rw-r--r--gcc/builtins.c83
-rw-r--r--gcc/builtins.def4
-rw-r--r--gcc/defaults.h5
-rw-r--r--gcc/doc/tm.texi27
-rw-r--r--gcc/doc/tm.texi.in14
-rw-r--r--gcc/expr.c1356
-rw-r--r--gcc/expr.h21
-rw-r--r--gcc/target.def17
-rw-r--r--gcc/target.h11
-rw-r--r--gcc/targhooks.c49
-rw-r--r--gcc/targhooks.h1
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/pr52171.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/pr52171.c23
-rw-r--r--gcc/tree-ssa-strlen.c87
-rw-r--r--gcc/tree.c7
17 files changed, 1162 insertions, 618 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ff69cf53dd3..75b4c01ba68 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,60 @@
+2016-06-03 Bernd Schmidt <bschmidt@redhat.com>
+
+ PR tree-optimization/52171
+ * builtins.c (expand_cmpstrn_or_cmpmem): Delete, moved elsewhere.
+ (expand_builtin_memcmp): New arg RESULT_EQ. All callers changed.
+ Look for constant strings. Move some code to emit_block_cmp_hints
+ and use it.
+ * builtins.def (BUILT_IN_MEMCMP_EQ): New.
+ * defaults.h (COMPARE_MAX_PIECES): New macro.
+ * expr.c (move_by_pieces_d, store_by_pieces_d): Remove old structs.
+ (move_by_pieces_1, store_by_pieces_1, store_by_pieces_2): Remvoe.
+ (clear_by_pieces_1): Don't declare. Move definition before use.
+ (can_do_by_pieces): New static function.
+ (can_move_by_pieces): Use it. Return bool.
+ (by_pieces_ninsns): Renamed from move_by_pieces_ninsns. New arg
+ OP. All callers changed. Handle COMPARE_BY_PIECES.
+ (class pieces_addr); New.
+ (pieces_addr::pieces_addr, pieces_addr::decide_autoinc,
+ pieces_addr::adjust, pieces_addr::increment_address,
+ pieces_addr::maybe_predec, pieces_addr::maybe_postinc): New member
+ functions for it.
+ (class op_by_pieces_d): New.
+ (op_by_pieces_d::op_by_pieces_d, op_by_pieces_d::run): New member
+ functions for it.
+ (class move_by_pieces_d, class compare_by_pieces_d,
+ class store_by_pieces_d): New subclasses of op_by_pieces_d.
+ (move_by_pieces_d::prepare_mode, move_by_pieces_d::generate,
+ move_by_pieces_d::finish_endp, store_by_pieces_d::prepare_mode,
+ store_by_pieces_d::generate, store_by_pieces_d::finish_endp,
+ compare_by_pieces_d::generate, compare_by_pieces_d::prepare_mode,
+ compare_by_pieces_d::finish_mode): New member functions.
+ (compare_by_pieces, emit_block_cmp_via_cmpmem): New static
+ functions.
+ (expand_cmpstrn_or_cmpmem): Moved here from builtins.c.
+ (emit_block_cmp_hints): New function.
+ (move_by_pieces, store_by_pieces, clear_by_pieces): Rewrite to just
+ use the newly defined classes.
+ * expr.h (by_pieces_constfn): New typedef.
+ (can_store_by_pieces, store_by_pieces): Use it in arg declarations.
+ (emit_block_cmp_hints, expand_cmpstrn_or_cmpmem): Declare.
+ (move_by_pieces_ninsns): Don't declare.
+ (can_move_by_pieces): Change return value to bool.
+ * target.def (TARGET_USE_BY_PIECES_INFRASTRUCTURE_P): Update docs.
+ (compare_by_pieces_branch_ratio): New hook.
+ * target.h (enum by_pieces_operation): Add COMPARE_BY_PIECES.
+ (by_pieces_ninsns): Declare.
+ * targethooks.c (default_use_by_pieces_infrastructure_p): Handle
+ COMPARE_BY_PIECES.
+ (default_compare_by_pieces_branch_ratio): New function.
+ * targhooks.h (default_compare_by_pieces_branch_ratio): Declare.
+ * doc/tm.texi.in (STORE_MAX_PIECES, COMPARE_MAX_PIECES): Document.
+ * doc/tm.texi: Regenerate.
+ * tree-ssa-strlen.c: Include "builtins.h".
+ (handle_builtin_memcmp): New static function.
+ (strlen_optimize_stmt): Call it for BUILT_IN_MEMCMP.
+ * tree.c (build_common_builtin_nodes): Create __builtin_memcmp_eq.
+
2016-06-03 Alan Hayward <alan.hayward@arm.com>
* tree-vect-stmts.c (vect_stmt_relevant_p): Do not vectorize non live
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 931d4a6b7bc..d5191761680 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -3671,53 +3671,24 @@ expand_cmpstr (insn_code icode, rtx target, rtx arg1_rtx, rtx arg2_rtx,
return NULL_RTX;
}
-/* Try to expand cmpstrn or cmpmem operation ICODE with the given operands.
- ARG3_TYPE is the type of ARG3_RTX. Return the result rtx on success,
- otherwise return null. */
-
-static rtx
-expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
- rtx arg2_rtx, tree arg3_type, rtx arg3_rtx,
- HOST_WIDE_INT align)
-{
- machine_mode insn_mode = insn_data[icode].operand[0].mode;
-
- if (target && (!REG_P (target) || HARD_REGISTER_P (target)))
- target = NULL_RTX;
-
- struct expand_operand ops[5];
- create_output_operand (&ops[0], target, insn_mode);
- create_fixed_operand (&ops[1], arg1_rtx);
- create_fixed_operand (&ops[2], arg2_rtx);
- create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type),
- TYPE_UNSIGNED (arg3_type));
- create_integer_operand (&ops[4], align);
- if (maybe_expand_insn (icode, 5, ops))
- return ops[0].value;
- return NULL_RTX;
-}
-
/* Expand expression EXP, which is a call to the memcmp built-in function.
Return NULL_RTX if we failed and the caller should emit a normal call,
- otherwise try to get the result in TARGET, if convenient. */
+ otherwise try to get the result in TARGET, if convenient.
+ RESULT_EQ is true if we can relax the returned value to be either zero
+ or nonzero, without caring about the sign. */
static rtx
-expand_builtin_memcmp (tree exp, rtx target)
+expand_builtin_memcmp (tree exp, rtx target, bool result_eq)
{
if (!validate_arglist (exp,
POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
return NULL_RTX;
- /* Note: The cmpstrnsi pattern, if it exists, is not suitable for
- implementing memcmp because it will stop if it encounters two
- zero bytes. */
- insn_code icode = direct_optab_handler (cmpmem_optab, SImode);
- if (icode == CODE_FOR_nothing)
- return NULL_RTX;
-
tree arg1 = CALL_EXPR_ARG (exp, 0);
tree arg2 = CALL_EXPR_ARG (exp, 1);
tree len = CALL_EXPR_ARG (exp, 2);
+ machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
+ location_t loc = EXPR_LOCATION (exp);
unsigned int arg1_align = get_pointer_alignment (arg1) / BITS_PER_UNIT;
unsigned int arg2_align = get_pointer_alignment (arg2) / BITS_PER_UNIT;
@@ -3726,22 +3697,38 @@ expand_builtin_memcmp (tree exp, rtx target)
if (arg1_align == 0 || arg2_align == 0)
return NULL_RTX;
- machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
- location_t loc = EXPR_LOCATION (exp);
rtx arg1_rtx = get_memory_rtx (arg1, len);
rtx arg2_rtx = get_memory_rtx (arg2, len);
- rtx arg3_rtx = expand_normal (fold_convert_loc (loc, sizetype, len));
+ rtx len_rtx = expand_normal (fold_convert_loc (loc, sizetype, len));
/* Set MEM_SIZE as appropriate. */
- if (CONST_INT_P (arg3_rtx))
+ if (CONST_INT_P (len_rtx))
{
- set_mem_size (arg1_rtx, INTVAL (arg3_rtx));
- set_mem_size (arg2_rtx, INTVAL (arg3_rtx));
+ set_mem_size (arg1_rtx, INTVAL (len_rtx));
+ set_mem_size (arg2_rtx, INTVAL (len_rtx));
}
- rtx result = expand_cmpstrn_or_cmpmem (icode, target, arg1_rtx, arg2_rtx,
- TREE_TYPE (len), arg3_rtx,
- MIN (arg1_align, arg2_align));
+ by_pieces_constfn constfn = NULL;
+
+ const char *src_str = c_getstr (arg1);
+ if (src_str == NULL)
+ src_str = c_getstr (arg2);
+ else
+ std::swap (arg1_rtx, arg2_rtx);
+
+ /* If SRC is a string constant and block move would be done
+ by pieces, we can avoid loading the string from memory
+ and only stored the computed constants. */
+ if (src_str
+ && CONST_INT_P (len_rtx)
+ && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1)
+ constfn = builtin_memcpy_read_str;
+
+ rtx result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx,
+ TREE_TYPE (len), target,
+ result_eq, constfn,
+ CONST_CAST (char *, src_str));
+
if (result)
{
/* Return the value in the proper mode for this function. */
@@ -6073,9 +6060,15 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
case BUILT_IN_BCMP:
case BUILT_IN_MEMCMP:
- target = expand_builtin_memcmp (exp, target);
+ case BUILT_IN_MEMCMP_EQ:
+ target = expand_builtin_memcmp (exp, target, fcode == BUILT_IN_MEMCMP_EQ);
if (target)
return target;
+ if (fcode == BUILT_IN_MEMCMP_EQ)
+ {
+ tree newdecl = builtin_decl_explicit (BUILT_IN_MEMCMP);
+ TREE_OPERAND (exp, 1) = build_fold_addr_expr (newdecl);
+ }
break;
case BUILT_IN_SETJMP:
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 2fc7f65d95a..527503800ff 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -864,6 +864,10 @@ DEF_BUILTIN_STUB (BUILT_IN_STACK_SAVE, "__builtin_stack_save")
DEF_BUILTIN_STUB (BUILT_IN_STACK_RESTORE, "__builtin_stack_restore")
DEF_BUILTIN_STUB (BUILT_IN_ALLOCA_WITH_ALIGN, "__builtin_alloca_with_align")
+/* An internal version of memcmp, used when the result is only tested for
+ equality with zero. */
+DEF_BUILTIN_STUB (BUILT_IN_MEMCMP_EQ, "__builtin_memcmp_eq")
+
/* Object size checking builtins. */
DEF_GCC_BUILTIN (BUILT_IN_OBJECT_SIZE, "object_size", BT_FN_SIZE_CONST_PTR_INT, ATTR_PURE_NOTHROW_LEAF_LIST)
DEF_EXT_LIB_BUILTIN_CHKP (BUILT_IN_MEMCPY_CHK, "__memcpy_chk", BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE, ATTR_RET1_NOTHROW_NONNULL_LEAF)
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 3e18338c99a..319a7dce6f5 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1039,6 +1039,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define STORE_MAX_PIECES MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT))
#endif
+/* Likewise for block comparisons. */
+#ifndef COMPARE_MAX_PIECES
+#define COMPARE_MAX_PIECES MOVE_MAX_PIECES
+#endif
+
#ifndef MAX_MOVE_MAX
#define MAX_MOVE_MAX MOVE_MAX
#endif
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index a343e913eda..b318615b7b3 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6315,8 +6315,9 @@ Both @var{size} and @var{alignment} are measured in terms of storage
units.
The parameter @var{op} is one of: @code{CLEAR_BY_PIECES},
-@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES}.
-These describe the type of memory operation under consideration.
+@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES} or
+@code{COMPARE_BY_PIECES}. These describe the type of memory operation
+under consideration.
The parameter @var{speed_p} is true if the code is currently being
optimized for speed rather than size.
@@ -6333,11 +6334,33 @@ in code size, for example where the number of insns emitted to perform a
move would be greater than that of a library call.
@end deftypefn
+@deftypefn {Target Hook} int TARGET_COMPARE_BY_PIECES_BRANCH_RATIO (machine_mode @var{mode})
+When expanding a block comparison in MODE, gcc can try to reduce the
+number of branches at the expense of more memory operations. This hook
+allows the target to override the default choice. It should return the
+factor by which branches should be reduced over the plain expansion with
+one comparison per @var{mode}-sized piece. A port can also prevent a
+particular mode from being used for block comparisons by returning a
+negative number from this hook.
+@end deftypefn
+
@defmac MOVE_MAX_PIECES
A C expression used by @code{move_by_pieces} to determine the largest unit
a load or store used to copy memory is. Defaults to @code{MOVE_MAX}.
@end defmac
+@defmac STORE_MAX_PIECES
+A C expression used by @code{store_by_pieces} to determine the largest unit
+a store used to memory is. Defaults to @code{MOVE_MAX_PIECES}, or two times
+the size of @code{HOST_WIDE_INT}, whichever is smaller.
+@end defmac
+
+@defmac COMPARE_MAX_PIECES
+A C expression used by @code{compare_by_pieces} to determine the largest unit
+a load or store used to compare memory is. Defaults to
+@code{MOVE_MAX_PIECES}.
+@end defmac
+
@defmac CLEAR_RATIO (@var{speed})
The threshold of number of scalar move insns, @emph{below} which a sequence
of insns should be generated to clear memory instead of a string clear insn
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f963a586612..1e8423cb4e2 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4653,11 +4653,25 @@ If you don't define this, a reasonable default is used.
@hook TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+@hook TARGET_COMPARE_BY_PIECES_BRANCH_RATIO
+
@defmac MOVE_MAX_PIECES
A C expression used by @code{move_by_pieces} to determine the largest unit
a load or store used to copy memory is. Defaults to @code{MOVE_MAX}.
@end defmac
+@defmac STORE_MAX_PIECES
+A C expression used by @code{store_by_pieces} to determine the largest unit
+a store used to memory is. Defaults to @code{MOVE_MAX_PIECES}, or two times
+the size of @code{HOST_WIDE_INT}, whichever is smaller.
+@end defmac
+
+@defmac COMPARE_MAX_PIECES
+A C expression used by @code{compare_by_pieces} to determine the largest unit
+a load or store used to compare memory is. Defaults to
+@code{MOVE_MAX_PIECES}.
+@end defmac
+
@defmac CLEAR_RATIO (@var{speed})
The threshold of number of scalar move insns, @emph{below} which a sequence
of insns should be generated to clear memory instead of a string clear insn
diff --git a/gcc/expr.c b/gcc/expr.c
index 3c7e71f7130..19af58fe03d 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -70,51 +70,12 @@ along with GCC; see the file COPYING3. If not see
the same indirect address eventually. */
int cse_not_expected;
-/* This structure is used by move_by_pieces to describe the move to
- be performed. */
-struct move_by_pieces_d
-{
- rtx to;
- rtx to_addr;
- int autinc_to;
- int explicit_inc_to;
- rtx from;
- rtx from_addr;
- int autinc_from;
- int explicit_inc_from;
- unsigned HOST_WIDE_INT len;
- HOST_WIDE_INT offset;
- int reverse;
-};
-
-/* This structure is used by store_by_pieces to describe the clear to
- be performed. */
-
-struct store_by_pieces_d
-{
- rtx to;
- rtx to_addr;
- int autinc_to;
- int explicit_inc_to;
- unsigned HOST_WIDE_INT len;
- HOST_WIDE_INT offset;
- rtx (*constfun) (void *, HOST_WIDE_INT, machine_mode);
- void *constfundata;
- int reverse;
-};
-
-static void move_by_pieces_1 (insn_gen_fn, machine_mode,
- struct move_by_pieces_d *);
static bool block_move_libcall_safe_for_call_parm (void);
static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT,
unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
-static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, machine_mode);
static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int);
-static void store_by_pieces_1 (struct store_by_pieces_d *, unsigned int);
-static void store_by_pieces_2 (insn_gen_fn, machine_mode,
- struct store_by_pieces_d *);
static rtx_insn *compress_float_constant (rtx, rtx);
static rtx get_subtarget (rtx);
static void store_constructor_field (rtx, unsigned HOST_WIDE_INT,
@@ -767,276 +728,799 @@ widest_int_mode_for_size (unsigned int size)
return mode;
}
+/* Determine whether an operation OP on LEN bytes with alignment ALIGN can
+ and should be performed piecewise. */
+
+static bool
+can_do_by_pieces (unsigned HOST_WIDE_INT len, unsigned int align,
+ enum by_pieces_operation op)
+{
+ return targetm.use_by_pieces_infrastructure_p (len, align, op,
+ optimize_insn_for_speed_p ());
+}
+
/* Determine whether the LEN bytes can be moved by using several move
instructions. Return nonzero if a call to move_by_pieces should
succeed. */
-int
-can_move_by_pieces (unsigned HOST_WIDE_INT len,
- unsigned int align)
+bool
+can_move_by_pieces (unsigned HOST_WIDE_INT len, unsigned int align)
{
- return targetm.use_by_pieces_infrastructure_p (len, align, MOVE_BY_PIECES,
- optimize_insn_for_speed_p ());
+ return can_do_by_pieces (len, align, MOVE_BY_PIECES);
}
-/* Generate several move instructions to copy LEN bytes from block FROM to
- block TO. (These are MEM rtx's with BLKmode).
+/* Return number of insns required to perform operation OP by pieces
+ for L bytes. ALIGN (in bits) is maximum alignment we can assume. */
- If PUSH_ROUNDING is defined and TO is NULL, emit_single_push_insn is
- used to push FROM to the stack.
+unsigned HOST_WIDE_INT
+by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
+ unsigned int max_size, by_pieces_operation op)
+{
+ unsigned HOST_WIDE_INT n_insns = 0;
- ALIGN is maximum stack alignment we can assume.
+ align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
- If ENDP is 0 return to, if ENDP is 1 return memory at the end ala
- mempcpy, and if ENDP is 2 return memory the end minus one byte ala
- stpcpy. */
+ while (max_size > 1 && l > 0)
+ {
+ machine_mode mode;
+ enum insn_code icode;
-rtx
-move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
- unsigned int align, int endp)
+ mode = widest_int_mode_for_size (max_size);
+
+ if (mode == VOIDmode)
+ break;
+ unsigned int modesize = GET_MODE_SIZE (mode);
+
+ icode = optab_handler (mov_optab, mode);
+ if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode))
+ {
+ unsigned HOST_WIDE_INT n_pieces = l / modesize;
+ l %= modesize;
+ switch (op)
+ {
+ default:
+ n_insns += n_pieces;
+ break;
+
+ case COMPARE_BY_PIECES:
+ int batch = targetm.compare_by_pieces_branch_ratio (mode);
+ int batch_ops = 4 * batch - 1;
+ int full = n_pieces / batch;
+ n_insns += full * batch_ops;
+ if (n_pieces % batch != 0)
+ n_insns++;
+ break;
+
+ }
+ }
+ max_size = modesize;
+ }
+
+ gcc_assert (!l);
+ return n_insns;
+}
+
+/* Used when performing piecewise block operations, holds information
+ about one of the memory objects involved. The member functions
+ can be used to generate code for loading from the object and
+ updating the address when iterating. */
+
+class pieces_addr
{
- struct move_by_pieces_d data;
- machine_mode to_addr_mode;
- machine_mode from_addr_mode = get_address_mode (from);
- rtx to_addr, from_addr = XEXP (from, 0);
- unsigned int max_size = MOVE_MAX_PIECES + 1;
- enum insn_code icode;
+ /* The object being referenced, a MEM. Can be NULL_RTX to indicate
+ stack pushes. */
+ rtx m_obj;
+ /* The address of the object. Can differ from that seen in the
+ MEM rtx if we copied the address to a register. */
+ rtx m_addr;
+ /* Nonzero if the address on the object has an autoincrement already,
+ signifies whether that was an increment or decrement. */
+ signed char m_addr_inc;
+ /* Nonzero if we intend to use autoinc without the address already
+ having autoinc form. We will insert add insns around each memory
+ reference, expecting later passes to form autoinc addressing modes.
+ The only supported options are predecrement and postincrement. */
+ signed char m_explicit_inc;
+ /* True if we have either of the two possible cases of using
+ autoincrement. */
+ bool m_auto;
+ /* True if this is an address to be used for load operations rather
+ than stores. */
+ bool m_is_load;
+
+ /* Optionally, a function to obtain constants for any given offset into
+ the objects, and data associated with it. */
+ by_pieces_constfn m_constfn;
+ void *m_cfndata;
+public:
+ pieces_addr (rtx, bool, by_pieces_constfn, void *);
+ rtx adjust (machine_mode, HOST_WIDE_INT);
+ void increment_address (HOST_WIDE_INT);
+ void maybe_predec (HOST_WIDE_INT);
+ void maybe_postinc (HOST_WIDE_INT);
+ void decide_autoinc (machine_mode, bool, HOST_WIDE_INT);
+ int get_addr_inc ()
+ {
+ return m_addr_inc;
+ }
+};
- align = MIN (to ? MEM_ALIGN (to) : align, MEM_ALIGN (from));
+/* Initialize a pieces_addr structure from an object OBJ. IS_LOAD is
+ true if the operation to be performed on this object is a load
+ rather than a store. For stores, OBJ can be NULL, in which case we
+ assume the operation is a stack push. For loads, the optional
+ CONSTFN and its associated CFNDATA can be used in place of the
+ memory load. */
- data.offset = 0;
- data.from_addr = from_addr;
- if (to)
- {
- to_addr_mode = get_address_mode (to);
- to_addr = XEXP (to, 0);
- data.to = to;
- data.autinc_to
- = (GET_CODE (to_addr) == PRE_INC || GET_CODE (to_addr) == PRE_DEC
- || GET_CODE (to_addr) == POST_INC || GET_CODE (to_addr) == POST_DEC);
- data.reverse
- = (GET_CODE (to_addr) == PRE_DEC || GET_CODE (to_addr) == POST_DEC);
+pieces_addr::pieces_addr (rtx obj, bool is_load, by_pieces_constfn constfn,
+ void *cfndata)
+ : m_obj (obj), m_is_load (is_load), m_constfn (constfn), m_cfndata (cfndata)
+{
+ m_addr_inc = 0;
+ m_auto = false;
+ if (obj)
+ {
+ rtx addr = XEXP (obj, 0);
+ rtx_code code = GET_CODE (addr);
+ m_addr = addr;
+ bool dec = code == PRE_DEC || code == POST_DEC;
+ bool inc = code == PRE_INC || code == POST_INC;
+ m_auto = inc || dec;
+ if (m_auto)
+ m_addr_inc = dec ? -1 : 1;
+
+ /* While we have always looked for these codes here, the code
+ implementing the memory operation has never handled them.
+ Support could be added later if necessary or beneficial. */
+ gcc_assert (code != PRE_INC && code != POST_DEC);
}
else
{
- to_addr_mode = VOIDmode;
- to_addr = NULL_RTX;
- data.to = NULL_RTX;
- data.autinc_to = 1;
- if (STACK_GROWS_DOWNWARD)
- data.reverse = 1;
+ m_addr = NULL_RTX;
+ if (!is_load)
+ {
+ m_auto = true;
+ if (STACK_GROWS_DOWNWARD)
+ m_addr_inc = -1;
+ else
+ m_addr_inc = 1;
+ }
else
- data.reverse = 0;
+ gcc_assert (constfn != NULL);
}
- data.to_addr = to_addr;
- data.from = from;
- data.autinc_from
- = (GET_CODE (from_addr) == PRE_INC || GET_CODE (from_addr) == PRE_DEC
- || GET_CODE (from_addr) == POST_INC
- || GET_CODE (from_addr) == POST_DEC);
+ m_explicit_inc = 0;
+ if (constfn)
+ gcc_assert (is_load);
+}
+
+/* Decide whether to use autoinc for an address involved in a memory op.
+ MODE is the mode of the accesses, REVERSE is true if we've decided to
+ perform the operation starting from the end, and LEN is the length of
+ the operation. Don't override an earlier decision to set m_auto. */
+
+void
+pieces_addr::decide_autoinc (machine_mode ARG_UNUSED (mode), bool reverse,
+ HOST_WIDE_INT len)
+{
+ if (m_auto || m_obj == NULL_RTX)
+ return;
- data.explicit_inc_from = 0;
- data.explicit_inc_to = 0;
- if (data.reverse) data.offset = len;
- data.len = len;
+ bool use_predec = (m_is_load
+ ? USE_LOAD_PRE_DECREMENT (mode)
+ : USE_STORE_PRE_DECREMENT (mode));
+ bool use_postinc = (m_is_load
+ ? USE_LOAD_POST_INCREMENT (mode)
+ : USE_STORE_POST_INCREMENT (mode));
+ machine_mode addr_mode = get_address_mode (m_obj);
+
+ if (use_predec && reverse)
+ {
+ m_addr = copy_to_mode_reg (addr_mode,
+ plus_constant (addr_mode,
+ m_addr, len));
+ m_auto = true;
+ m_explicit_inc = -1;
+ }
+ else if (use_postinc && !reverse)
+ {
+ m_addr = copy_to_mode_reg (addr_mode, m_addr);
+ m_auto = true;
+ m_explicit_inc = 1;
+ }
+ else if (CONSTANT_P (m_addr))
+ m_addr = copy_to_mode_reg (addr_mode, m_addr);
+}
+
+/* Adjust the address to refer to the data at OFFSET in MODE. If we
+ are using autoincrement for this address, we don't add the offset,
+ but we still modify the MEM's properties. */
+
+rtx
+pieces_addr::adjust (machine_mode mode, HOST_WIDE_INT offset)
+{
+ if (m_constfn)
+ return m_constfn (m_cfndata, offset, mode);
+ if (m_obj == NULL_RTX)
+ return NULL_RTX;
+ if (m_auto)
+ return adjust_automodify_address (m_obj, mode, m_addr, offset);
+ else
+ return adjust_address (m_obj, mode, offset);
+}
+
+/* Emit an add instruction to increment the address by SIZE. */
+
+void
+pieces_addr::increment_address (HOST_WIDE_INT size)
+{
+ rtx amount = gen_int_mode (size, GET_MODE (m_addr));
+ emit_insn (gen_add2_insn (m_addr, amount));
+}
+
+/* If we are supposed to decrement the address after each access, emit code
+ to do so now. Increment by SIZE (which has should have the correct sign
+ already). */
+
+void
+pieces_addr::maybe_predec (HOST_WIDE_INT size)
+{
+ if (m_explicit_inc >= 0)
+ return;
+ gcc_assert (HAVE_PRE_DECREMENT);
+ increment_address (size);
+}
+
+/* If we are supposed to decrement the address after each access, emit code
+ to do so now. Increment by SIZE. */
+
+void
+pieces_addr::maybe_postinc (HOST_WIDE_INT size)
+{
+ if (m_explicit_inc <= 0)
+ return;
+ gcc_assert (HAVE_POST_INCREMENT);
+ increment_address (size);
+}
+
+/* This structure is used by do_op_by_pieces to describe the operation
+ to be performed. */
+
+class op_by_pieces_d
+{
+ protected:
+ pieces_addr m_to, m_from;
+ unsigned HOST_WIDE_INT m_len;
+ HOST_WIDE_INT m_offset;
+ unsigned int m_align;
+ unsigned int m_max_size;
+ bool m_reverse;
+
+ /* Virtual functions, overriden by derived classes for the specific
+ operation. */
+ virtual void generate (rtx, rtx, machine_mode) = 0;
+ virtual bool prepare_mode (machine_mode, unsigned int) = 0;
+ virtual void finish_mode (machine_mode)
+ {
+ }
+
+ public:
+ op_by_pieces_d (rtx, bool, rtx, bool, by_pieces_constfn, void *,
+ unsigned HOST_WIDE_INT, unsigned int);
+ void run ();
+};
+
+/* The constructor for an op_by_pieces_d structure. We require two
+ objects named TO and FROM, which are identified as loads or stores
+ by TO_LOAD and FROM_LOAD. If FROM is a load, the optional FROM_CFN
+ and its associated FROM_CFN_DATA can be used to replace loads with
+ constant values. LEN describes the length of the operation. */
+
+op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
+ rtx from, bool from_load,
+ by_pieces_constfn from_cfn,
+ void *from_cfn_data,
+ unsigned HOST_WIDE_INT len,
+ unsigned int align)
+ : m_to (to, to_load, NULL, NULL),
+ m_from (from, from_load, from_cfn, from_cfn_data),
+ m_len (len), m_max_size (MOVE_MAX_PIECES + 1)
+{
+ int toi = m_to.get_addr_inc ();
+ int fromi = m_from.get_addr_inc ();
+ if (toi >= 0 && fromi >= 0)
+ m_reverse = false;
+ else if (toi <= 0 && fromi <= 0)
+ m_reverse = true;
+ else
+ gcc_unreachable ();
+
+ m_offset = m_reverse ? len : 0;
+ align = MIN (to ? MEM_ALIGN (to) : align,
+ from ? MEM_ALIGN (from) : align);
/* If copying requires more than two move insns,
copy addresses to registers (to make displacements shorter)
and use post-increment if available. */
- if (!(data.autinc_from && data.autinc_to)
- && move_by_pieces_ninsns (len, align, max_size) > 2)
+ if (by_pieces_ninsns (len, align, m_max_size, MOVE_BY_PIECES) > 2)
{
- /* Find the mode of the largest move...
- MODE might not be used depending on the definitions of the
- USE_* macros below. */
- machine_mode mode ATTRIBUTE_UNUSED
- = widest_int_mode_for_size (max_size);
+ /* Find the mode of the largest comparison. */
+ machine_mode mode = widest_int_mode_for_size (m_max_size);
- if (USE_LOAD_PRE_DECREMENT (mode) && data.reverse && ! data.autinc_from)
- {
- data.from_addr = copy_to_mode_reg (from_addr_mode,
- plus_constant (from_addr_mode,
- from_addr, len));
- data.autinc_from = 1;
- data.explicit_inc_from = -1;
- }
- if (USE_LOAD_POST_INCREMENT (mode) && ! data.autinc_from)
- {
- data.from_addr = copy_to_mode_reg (from_addr_mode, from_addr);
- data.autinc_from = 1;
- data.explicit_inc_from = 1;
- }
- if (!data.autinc_from && CONSTANT_P (from_addr))
- data.from_addr = copy_to_mode_reg (from_addr_mode, from_addr);
- if (USE_STORE_PRE_DECREMENT (mode) && data.reverse && ! data.autinc_to)
- {
- data.to_addr = copy_to_mode_reg (to_addr_mode,
- plus_constant (to_addr_mode,
- to_addr, len));
- data.autinc_to = 1;
- data.explicit_inc_to = -1;
- }
- if (USE_STORE_POST_INCREMENT (mode) && ! data.reverse && ! data.autinc_to)
- {
- data.to_addr = copy_to_mode_reg (to_addr_mode, to_addr);
- data.autinc_to = 1;
- data.explicit_inc_to = 1;
- }
- if (!data.autinc_to && CONSTANT_P (to_addr))
- data.to_addr = copy_to_mode_reg (to_addr_mode, to_addr);
+ m_from.decide_autoinc (mode, m_reverse, len);
+ m_to.decide_autoinc (mode, m_reverse, len);
}
align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
+ m_align = align;
+}
- /* First move what we can in the largest integer mode, then go to
- successively smaller modes. */
+/* This function contains the main loop used for expanding a block
+ operation. First move what we can in the largest integer mode,
+ then go to successively smaller modes. For every access, call
+ GENFUN with the two operands and the EXTRA_DATA. */
- while (max_size > 1 && data.len > 0)
+void
+op_by_pieces_d::run ()
+{
+ while (m_max_size > 1 && m_len > 0)
{
- machine_mode mode = widest_int_mode_for_size (max_size);
+ machine_mode mode = widest_int_mode_for_size (m_max_size);
if (mode == VOIDmode)
break;
- icode = optab_handler (mov_optab, mode);
- if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode))
- move_by_pieces_1 (GEN_FCN (icode), mode, &data);
+ if (prepare_mode (mode, m_align))
+ {
+ unsigned int size = GET_MODE_SIZE (mode);
+ rtx to1 = NULL_RTX, from1;
+
+ while (m_len >= size)
+ {
+ if (m_reverse)
+ m_offset -= size;
+
+ to1 = m_to.adjust (mode, m_offset);
+ from1 = m_from.adjust (mode, m_offset);
+
+ m_to.maybe_predec (-(HOST_WIDE_INT)size);
+ m_from.maybe_predec (-(HOST_WIDE_INT)size);
+
+ generate (to1, from1, mode);
+
+ m_to.maybe_postinc (size);
+ m_from.maybe_postinc (size);
+
+ if (!m_reverse)
+ m_offset += size;
- max_size = GET_MODE_SIZE (mode);
+ m_len -= size;
+ }
+
+ finish_mode (mode);
+ }
+
+ m_max_size = GET_MODE_SIZE (mode);
}
/* The code above should have handled everything. */
- gcc_assert (!data.len);
+ gcc_assert (!m_len);
+}
+
+/* Derived class from op_by_pieces_d, providing support for block move
+ operations. */
+
+class move_by_pieces_d : public op_by_pieces_d
+{
+ insn_gen_fn m_gen_fun;
+ void generate (rtx, rtx, machine_mode);
+ bool prepare_mode (machine_mode, unsigned int);
+
+ public:
+ move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len,
+ unsigned int align)
+ : op_by_pieces_d (to, false, from, true, NULL, NULL, len, align)
+ {
+ }
+ rtx finish_endp (int);
+};
+
+/* Return true if MODE can be used for a set of copies, given an
+ alignment ALIGN. Prepare whatever data is necessary for later
+ calls to generate. */
+
+bool
+move_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align)
+{
+ insn_code icode = optab_handler (mov_optab, mode);
+ m_gen_fun = GEN_FCN (icode);
+ return icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode);
+}
+
+/* A callback used when iterating for a compare_by_pieces_operation.
+ OP0 and OP1 are the values that have been loaded and should be
+ compared in MODE. If OP0 is NULL, this means we should generate a
+ push; otherwise EXTRA_DATA holds a pointer to a pointer to the insn
+ gen function that should be used to generate the mode. */
+
+void
+move_by_pieces_d::generate (rtx op0, rtx op1, machine_mode mode)
+{
+#ifdef PUSH_ROUNDING
+ if (op0 == NULL_RTX)
+ {
+ emit_single_push_insn (mode, op1, NULL);
+ return;
+ }
+#endif
+ emit_insn (m_gen_fun (op0, op1));
+}
+
+/* Perform the final adjustment at the end of a string to obtain the
+ correct return value for the block operation. If ENDP is 1 return
+ memory at the end ala mempcpy, and if ENDP is 2 return memory the
+ end minus one byte ala stpcpy. */
+
+rtx
+move_by_pieces_d::finish_endp (int endp)
+{
+ gcc_assert (!m_reverse);
+ if (endp == 2)
+ {
+ m_to.maybe_postinc (-1);
+ --m_offset;
+ }
+ return m_to.adjust (QImode, m_offset);
+}
+
+/* Generate several move instructions to copy LEN bytes from block FROM to
+ block TO. (These are MEM rtx's with BLKmode).
+
+ If PUSH_ROUNDING is defined and TO is NULL, emit_single_push_insn is
+ used to push FROM to the stack.
+
+ ALIGN is maximum stack alignment we can assume.
+
+ If ENDP is 0 return to, if ENDP is 1 return memory at the end ala
+ mempcpy, and if ENDP is 2 return memory the end minus one byte ala
+ stpcpy. */
+
+rtx
+move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
+ unsigned int align, int endp)
+{
+#ifndef PUSH_ROUNDING
+ if (to == NULL)
+ gcc_unreachable ();
+#endif
+
+ move_by_pieces_d data (to, from, len, align);
+
+ data.run ();
if (endp)
+ return data.finish_endp (endp);
+ else
+ return to;
+}
+
+/* Derived class from op_by_pieces_d, providing support for block move
+ operations. */
+
+class store_by_pieces_d : public op_by_pieces_d
+{
+ insn_gen_fn m_gen_fun;
+ void generate (rtx, rtx, machine_mode);
+ bool prepare_mode (machine_mode, unsigned int);
+
+ public:
+ store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data,
+ unsigned HOST_WIDE_INT len, unsigned int align)
+ : op_by_pieces_d (to, false, NULL_RTX, true, cfn, cfn_data, len, align)
+ {
+ }
+ rtx finish_endp (int);
+};
+
+/* Return true if MODE can be used for a set of stores, given an
+ alignment ALIGN. Prepare whatever data is necessary for later
+ calls to generate. */
+
+bool
+store_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align)
+{
+ insn_code icode = optab_handler (mov_optab, mode);
+ m_gen_fun = GEN_FCN (icode);
+ return icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode);
+}
+
+/* A callback used when iterating for a store_by_pieces_operation.
+ OP0 and OP1 are the values that have been loaded and should be
+ compared in MODE. If OP0 is NULL, this means we should generate a
+ push; otherwise EXTRA_DATA holds a pointer to a pointer to the insn
+ gen function that should be used to generate the mode. */
+
+void
+store_by_pieces_d::generate (rtx op0, rtx op1, machine_mode)
+{
+ emit_insn (m_gen_fun (op0, op1));
+}
+
+/* Perform the final adjustment at the end of a string to obtain the
+ correct return value for the block operation. If ENDP is 1 return
+ memory at the end ala mempcpy, and if ENDP is 2 return memory the
+ end minus one byte ala stpcpy. */
+
+rtx
+store_by_pieces_d::finish_endp (int endp)
+{
+ gcc_assert (!m_reverse);
+ if (endp == 2)
{
- rtx to1;
+ m_to.maybe_postinc (-1);
+ --m_offset;
+ }
+ return m_to.adjust (QImode, m_offset);
+}
+
+/* Determine whether the LEN bytes generated by CONSTFUN can be
+ stored to memory using several move instructions. CONSTFUNDATA is
+ a pointer which will be passed as argument in every CONSTFUN call.
+ ALIGN is maximum alignment we can assume. MEMSETP is true if this is
+ a memset operation and false if it's a copy of a constant string.
+ Return nonzero if a call to store_by_pieces should succeed. */
+
+int
+can_store_by_pieces (unsigned HOST_WIDE_INT len,
+ rtx (*constfun) (void *, HOST_WIDE_INT, machine_mode),
+ void *constfundata, unsigned int align, bool memsetp)
+{
+ unsigned HOST_WIDE_INT l;
+ unsigned int max_size;
+ HOST_WIDE_INT offset = 0;
+ machine_mode mode;
+ enum insn_code icode;
+ int reverse;
+ /* cst is set but not used if LEGITIMATE_CONSTANT doesn't use it. */
+ rtx cst ATTRIBUTE_UNUSED;
+
+ if (len == 0)
+ return 1;
+
+ if (!targetm.use_by_pieces_infrastructure_p (len, align,
+ memsetp
+ ? SET_BY_PIECES
+ : STORE_BY_PIECES,
+ optimize_insn_for_speed_p ()))
+ return 0;
+
+ align = alignment_for_piecewise_move (STORE_MAX_PIECES, align);
+
+ /* We would first store what we can in the largest integer mode, then go to
+ successively smaller modes. */
- gcc_assert (!data.reverse);
- if (data.autinc_to)
+ for (reverse = 0;
+ reverse <= (HAVE_PRE_DECREMENT || HAVE_POST_DECREMENT);
+ reverse++)
+ {
+ l = len;
+ max_size = STORE_MAX_PIECES + 1;
+ while (max_size > 1 && l > 0)
{
- if (endp == 2)
+ mode = widest_int_mode_for_size (max_size);
+
+ if (mode == VOIDmode)
+ break;
+
+ icode = optab_handler (mov_optab, mode);
+ if (icode != CODE_FOR_nothing
+ && align >= GET_MODE_ALIGNMENT (mode))
{
- if (HAVE_POST_INCREMENT && data.explicit_inc_to > 0)
- emit_insn (gen_add2_insn (data.to_addr, constm1_rtx));
- else
- data.to_addr = copy_to_mode_reg (to_addr_mode,
- plus_constant (to_addr_mode,
- data.to_addr,
- -1));
+ unsigned int size = GET_MODE_SIZE (mode);
+
+ while (l >= size)
+ {
+ if (reverse)
+ offset -= size;
+
+ cst = (*constfun) (constfundata, offset, mode);
+ if (!targetm.legitimate_constant_p (mode, cst))
+ return 0;
+
+ if (!reverse)
+ offset += size;
+
+ l -= size;
+ }
}
- to1 = adjust_automodify_address (data.to, QImode, data.to_addr,
- data.offset);
- }
- else
- {
- if (endp == 2)
- --data.offset;
- to1 = adjust_address (data.to, QImode, data.offset);
+
+ max_size = GET_MODE_SIZE (mode);
}
- return to1;
+
+ /* The code above should have handled everything. */
+ gcc_assert (!l);
}
+
+ return 1;
+}
+
+/* Generate several move instructions to store LEN bytes generated by
+ CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a
+ pointer which will be passed as argument in every CONSTFUN call.
+ ALIGN is maximum alignment we can assume. MEMSETP is true if this is
+ a memset operation and false if it's a copy of a constant string.
+ If ENDP is 0 return to, if ENDP is 1 return memory at the end ala
+ mempcpy, and if ENDP is 2 return memory the end minus one byte ala
+ stpcpy. */
+
+rtx
+store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
+ rtx (*constfun) (void *, HOST_WIDE_INT, machine_mode),
+ void *constfundata, unsigned int align, bool memsetp, int endp)
+{
+ if (len == 0)
+ {
+ gcc_assert (endp != 2);
+ return to;
+ }
+
+ gcc_assert (targetm.use_by_pieces_infrastructure_p
+ (len, align,
+ memsetp ? SET_BY_PIECES : STORE_BY_PIECES,
+ optimize_insn_for_speed_p ()));
+
+ store_by_pieces_d data (to, constfun, constfundata, len, align);
+ data.run ();
+
+ if (endp)
+ return data.finish_endp (endp);
else
- return data.to;
+ return to;
}
-/* Return number of insns required to move L bytes by pieces.
- ALIGN (in bits) is maximum alignment we can assume. */
+/* Callback routine for clear_by_pieces.
+ Return const0_rtx unconditionally. */
-unsigned HOST_WIDE_INT
-move_by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
- unsigned int max_size)
+static rtx
+clear_by_pieces_1 (void *, HOST_WIDE_INT, machine_mode)
{
- unsigned HOST_WIDE_INT n_insns = 0;
+ return const0_rtx;
+}
- align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
+/* Generate several move instructions to clear LEN bytes of block TO. (A MEM
+ rtx with BLKmode). ALIGN is maximum alignment we can assume. */
- while (max_size > 1 && l > 0)
- {
- machine_mode mode;
- enum insn_code icode;
+static void
+clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
+{
+ if (len == 0)
+ return;
- mode = widest_int_mode_for_size (max_size);
+ store_by_pieces_d data (to, clear_by_pieces_1, NULL, len, align);
+ data.run ();
+}
- if (mode == VOIDmode)
- break;
+/* Context used by compare_by_pieces_genfn. It stores the fail label
+ to jump to in case of miscomparison, and for branch ratios greater than 1,
+ it stores an accumulator and the current and maximum counts before
+ emitting another branch. */
- icode = optab_handler (mov_optab, mode);
- if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode))
- n_insns += l / GET_MODE_SIZE (mode), l %= GET_MODE_SIZE (mode);
+class compare_by_pieces_d : public op_by_pieces_d
+{
+ rtx_code_label *m_fail_label;
+ rtx m_accumulator;
+ int m_count, m_batch;
+
+ void generate (rtx, rtx, machine_mode);
+ bool prepare_mode (machine_mode, unsigned int);
+ void finish_mode (machine_mode);
+ public:
+ compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn,
+ void *op1_cfn_data, HOST_WIDE_INT len, int align,
+ rtx_code_label *fail_label)
+ : op_by_pieces_d (op0, true, op1, true, op1_cfn, op1_cfn_data, len, align)
+ {
+ m_fail_label = fail_label;
+ }
+};
- max_size = GET_MODE_SIZE (mode);
+/* A callback used when iterating for a compare_by_pieces_operation.
+ OP0 and OP1 are the values that have been loaded and should be
+ compared in MODE. DATA holds a pointer to the compare_by_pieces_data
+ context structure. */
+
+void
+compare_by_pieces_d::generate (rtx op0, rtx op1, machine_mode mode)
+{
+ if (m_batch > 1)
+ {
+ rtx temp = expand_binop (mode, sub_optab, op0, op1, NULL_RTX,
+ true, OPTAB_LIB_WIDEN);
+ if (m_count != 0)
+ temp = expand_binop (mode, ior_optab, m_accumulator, temp, temp,
+ true, OPTAB_LIB_WIDEN);
+ m_accumulator = temp;
+
+ if (++m_count < m_batch)
+ return;
+
+ m_count = 0;
+ op0 = m_accumulator;
+ op1 = const0_rtx;
+ m_accumulator = NULL_RTX;
}
+ do_compare_rtx_and_jump (op0, op1, NE, true, mode, NULL_RTX, NULL,
+ m_fail_label, -1);
+}
- gcc_assert (!l);
- return n_insns;
+/* Return true if MODE can be used for a set of moves and comparisons,
+ given an alignment ALIGN. Prepare whatever data is necessary for
+ later calls to generate. */
+
+bool
+compare_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align)
+{
+ insn_code icode = optab_handler (mov_optab, mode);
+ if (icode == CODE_FOR_nothing
+ || align < GET_MODE_ALIGNMENT (mode)
+ || !can_compare_p (EQ, mode, ccp_jump))
+ return false;
+ m_batch = targetm.compare_by_pieces_branch_ratio (mode);
+ if (m_batch < 0)
+ return false;
+ m_accumulator = NULL_RTX;
+ m_count = 0;
+ return true;
}
-/* Subroutine of move_by_pieces. Move as many bytes as appropriate
- with move instructions for mode MODE. GENFUN is the gen_... function
- to make a move insn for that mode. DATA has all the other info. */
+/* Called after expanding a series of comparisons in MODE. If we have
+ accumulated results for which we haven't emitted a branch yet, do
+ so now. */
-static void
-move_by_pieces_1 (insn_gen_fn genfun, machine_mode mode,
- struct move_by_pieces_d *data)
+void
+compare_by_pieces_d::finish_mode (machine_mode mode)
{
- unsigned int size = GET_MODE_SIZE (mode);
- rtx to1 = NULL_RTX, from1;
+ if (m_accumulator != NULL_RTX)
+ do_compare_rtx_and_jump (m_accumulator, const0_rtx, NE, true, mode,
+ NULL_RTX, NULL, m_fail_label, -1);
+}
- while (data->len >= size)
- {
- if (data->reverse)
- data->offset -= size;
+/* Generate several move instructions to compare LEN bytes from blocks
+ ARG0 and ARG1. (These are MEM rtx's with BLKmode).
- if (data->to)
- {
- if (data->autinc_to)
- to1 = adjust_automodify_address (data->to, mode, data->to_addr,
- data->offset);
- else
- to1 = adjust_address (data->to, mode, data->offset);
- }
+ If PUSH_ROUNDING is defined and TO is NULL, emit_single_push_insn is
+ used to push FROM to the stack.
- if (data->autinc_from)
- from1 = adjust_automodify_address (data->from, mode, data->from_addr,
- data->offset);
- else
- from1 = adjust_address (data->from, mode, data->offset);
-
- if (HAVE_PRE_DECREMENT && data->explicit_inc_to < 0)
- emit_insn (gen_add2_insn (data->to_addr,
- gen_int_mode (-(HOST_WIDE_INT) size,
- GET_MODE (data->to_addr))));
- if (HAVE_PRE_DECREMENT && data->explicit_inc_from < 0)
- emit_insn (gen_add2_insn (data->from_addr,
- gen_int_mode (-(HOST_WIDE_INT) size,
- GET_MODE (data->from_addr))));
-
- if (data->to)
- emit_insn ((*genfun) (to1, from1));
- else
- {
-#ifdef PUSH_ROUNDING
- emit_single_push_insn (mode, from1, NULL);
-#else
- gcc_unreachable ();
-#endif
- }
+ ALIGN is maximum stack alignment we can assume.
- if (HAVE_POST_INCREMENT && data->explicit_inc_to > 0)
- emit_insn (gen_add2_insn (data->to_addr,
- gen_int_mode (size,
- GET_MODE (data->to_addr))));
- if (HAVE_POST_INCREMENT && data->explicit_inc_from > 0)
- emit_insn (gen_add2_insn (data->from_addr,
- gen_int_mode (size,
- GET_MODE (data->from_addr))));
+ Optionally, the caller can pass a constfn and associated data in A1_CFN
+ and A1_CFN_DATA. describing that the second operand being compared is a
+ known constant and how to obtain its data. */
- if (! data->reverse)
- data->offset += size;
+static rtx
+compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len,
+ rtx target, unsigned int align,
+ by_pieces_constfn a1_cfn, void *a1_cfn_data)
+{
+ rtx_code_label *fail_label = gen_label_rtx ();
+ rtx_code_label *end_label = gen_label_rtx ();
- data->len -= size;
- }
+ if (target == NULL_RTX
+ || !REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
+ target = gen_reg_rtx (TYPE_MODE (integer_type_node));
+
+ compare_by_pieces_d data (arg0, arg1, a1_cfn, a1_cfn_data, len, align,
+ fail_label);
+
+ data.run ();
+
+ emit_move_insn (target, const0_rtx);
+ emit_jump (end_label);
+ emit_barrier ();
+ emit_label (fail_label);
+ emit_move_insn (target, const1_rtx);
+ emit_label (end_label);
+
+ return target;
}
/* Emit code to move a block Y to a block X. This may be done with
@@ -1066,8 +1550,7 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
unsigned int align;
gcc_assert (size);
- if (CONST_INT_P (size)
- && INTVAL (size) == 0)
+ if (CONST_INT_P (size) && INTVAL (size) == 0)
return 0;
switch (method)
@@ -1394,6 +1877,99 @@ emit_block_op_via_libcall (enum built_in_function fncode, rtx dst, rtx src,
return expand_call (call_expr, NULL_RTX, false);
}
+
+/* Try to expand cmpstrn or cmpmem operation ICODE with the given operands.
+ ARG3_TYPE is the type of ARG3_RTX. Return the result rtx on success,
+ otherwise return null. */
+
+rtx
+expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
+ rtx arg2_rtx, tree arg3_type, rtx arg3_rtx,
+ HOST_WIDE_INT align)
+{
+ machine_mode insn_mode = insn_data[icode].operand[0].mode;
+
+ if (target && (!REG_P (target) || HARD_REGISTER_P (target)))
+ target = NULL_RTX;
+
+ struct expand_operand ops[5];
+ create_output_operand (&ops[0], target, insn_mode);
+ create_fixed_operand (&ops[1], arg1_rtx);
+ create_fixed_operand (&ops[2], arg2_rtx);
+ create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type),
+ TYPE_UNSIGNED (arg3_type));
+ create_integer_operand (&ops[4], align);
+ if (maybe_expand_insn (icode, 5, ops))
+ return ops[0].value;
+ return NULL_RTX;
+}
+
+/* Expand a block compare between X and Y with length LEN using the
+ cmpmem optab, placing the result in TARGET. LEN_TYPE is the type
+ of the expression that was used to calculate the length. ALIGN
+ gives the known minimum common alignment. */
+
+static rtx
+emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target,
+ unsigned align)
+{
+ /* Note: The cmpstrnsi pattern, if it exists, is not suitable for
+ implementing memcmp because it will stop if it encounters two
+ zero bytes. */
+ insn_code icode = direct_optab_handler (cmpmem_optab, SImode);
+
+ if (icode == CODE_FOR_nothing)
+ return NULL_RTX;
+
+ return expand_cmpstrn_or_cmpmem (icode, target, x, y, len_type, len, align);
+}
+
+/* Emit code to compare a block Y to a block X. This may be done with
+ string-compare instructions, with multiple scalar instructions,
+ or with a library call.
+
+ Both X and Y must be MEM rtx's. LEN is an rtx that says how long
+ they are. LEN_TYPE is the type of the expression that was used to
+ calculate it.
+
+ If EQUALITY_ONLY is true, it means we don't have to return the tri-state
+ value of a normal memcmp call, instead we can just compare for equality.
+ If FORCE_LIBCALL is true, we should emit a call to memcmp rather than
+ returning NULL_RTX.
+
+ Optionally, the caller can pass a constfn and associated data in Y_CFN
+ and Y_CFN_DATA. describing that the second operand being compared is a
+ known constant and how to obtain its data.
+ Return the result of the comparison, or NULL_RTX if we failed to
+ perform the operation. */
+
+rtx
+emit_block_cmp_hints (rtx x, rtx y, rtx len, tree len_type, rtx target,
+ bool equality_only, by_pieces_constfn y_cfn,
+ void *y_cfndata)
+{
+ rtx result = 0;
+
+ if (CONST_INT_P (len) && INTVAL (len) == 0)
+ return const0_rtx;
+
+ gcc_assert (MEM_P (x) && MEM_P (y));
+ unsigned int align = MIN (MEM_ALIGN (x), MEM_ALIGN (y));
+ gcc_assert (align >= BITS_PER_UNIT);
+
+ x = adjust_address (x, BLKmode, 0);
+ y = adjust_address (y, BLKmode, 0);
+
+ if (equality_only
+ && CONST_INT_P (len)
+ && can_do_by_pieces (INTVAL (len), align, COMPARE_BY_PIECES))
+ result = compare_by_pieces (x, y, INTVAL (len), target, align,
+ y_cfn, y_cfndata);
+ else
+ result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align);
+
+ return result;
+}
/* Copy all or part of a value X into registers starting at REGNO.
The number of registers to be filled is NREGS. */
@@ -2330,308 +2906,6 @@ get_def_for_expr_class (tree name, enum tree_code_class tclass)
return def_stmt;
}
-
-/* Determine whether the LEN bytes generated by CONSTFUN can be
- stored to memory using several move instructions. CONSTFUNDATA is
- a pointer which will be passed as argument in every CONSTFUN call.
- ALIGN is maximum alignment we can assume. MEMSETP is true if this is
- a memset operation and false if it's a copy of a constant string.
- Return nonzero if a call to store_by_pieces should succeed. */
-
-int
-can_store_by_pieces (unsigned HOST_WIDE_INT len,
- rtx (*constfun) (void *, HOST_WIDE_INT, machine_mode),
- void *constfundata, unsigned int align, bool memsetp)
-{
- unsigned HOST_WIDE_INT l;
- unsigned int max_size;
- HOST_WIDE_INT offset = 0;
- machine_mode mode;
- enum insn_code icode;
- int reverse;
- /* cst is set but not used if LEGITIMATE_CONSTANT doesn't use it. */
- rtx cst ATTRIBUTE_UNUSED;
-
- if (len == 0)
- return 1;
-
- if (!targetm.use_by_pieces_infrastructure_p (len, align,
- memsetp
- ? SET_BY_PIECES
- : STORE_BY_PIECES,
- optimize_insn_for_speed_p ()))
- return 0;
-
- align = alignment_for_piecewise_move (STORE_MAX_PIECES, align);
-
- /* We would first store what we can in the largest integer mode, then go to
- successively smaller modes. */
-
- for (reverse = 0;
- reverse <= (HAVE_PRE_DECREMENT || HAVE_POST_DECREMENT);
- reverse++)
- {
- l = len;
- max_size = STORE_MAX_PIECES + 1;
- while (max_size > 1 && l > 0)
- {
- mode = widest_int_mode_for_size (max_size);
-
- if (mode == VOIDmode)
- break;
-
- icode = optab_handler (mov_optab, mode);
- if (icode != CODE_FOR_nothing
- && align >= GET_MODE_ALIGNMENT (mode))
- {
- unsigned int size = GET_MODE_SIZE (mode);
-
- while (l >= size)
- {
- if (reverse)
- offset -= size;
-
- cst = (*constfun) (constfundata, offset, mode);
- if (!targetm.legitimate_constant_p (mode, cst))
- return 0;
-
- if (!reverse)
- offset += size;
-
- l -= size;
- }
- }
-
- max_size = GET_MODE_SIZE (mode);
- }
-
- /* The code above should have handled everything. */
- gcc_assert (!l);
- }
-
- return 1;
-}
-
-/* Generate several move instructions to store LEN bytes generated by
- CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a
- pointer which will be passed as argument in every CONSTFUN call.
- ALIGN is maximum alignment we can assume. MEMSETP is true if this is
- a memset operation and false if it's a copy of a constant string.
- If ENDP is 0 return to, if ENDP is 1 return memory at the end ala
- mempcpy, and if ENDP is 2 return memory the end minus one byte ala
- stpcpy. */
-
-rtx
-store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
- rtx (*constfun) (void *, HOST_WIDE_INT, machine_mode),
- void *constfundata, unsigned int align, bool memsetp, int endp)
-{
- machine_mode to_addr_mode = get_address_mode (to);
- struct store_by_pieces_d data;
-
- if (len == 0)
- {
- gcc_assert (endp != 2);
- return to;
- }
-
- gcc_assert (targetm.use_by_pieces_infrastructure_p
- (len, align,
- memsetp
- ? SET_BY_PIECES
- : STORE_BY_PIECES,
- optimize_insn_for_speed_p ()));
-
- data.constfun = constfun;
- data.constfundata = constfundata;
- data.len = len;
- data.to = to;
- store_by_pieces_1 (&data, align);
- if (endp)
- {
- rtx to1;
-
- gcc_assert (!data.reverse);
- if (data.autinc_to)
- {
- if (endp == 2)
- {
- if (HAVE_POST_INCREMENT && data.explicit_inc_to > 0)
- emit_insn (gen_add2_insn (data.to_addr, constm1_rtx));
- else
- data.to_addr = copy_to_mode_reg (to_addr_mode,
- plus_constant (to_addr_mode,
- data.to_addr,
- -1));
- }
- to1 = adjust_automodify_address (data.to, QImode, data.to_addr,
- data.offset);
- }
- else
- {
- if (endp == 2)
- --data.offset;
- to1 = adjust_address (data.to, QImode, data.offset);
- }
- return to1;
- }
- else
- return data.to;
-}
-
-/* Generate several move instructions to clear LEN bytes of block TO. (A MEM
- rtx with BLKmode). ALIGN is maximum alignment we can assume. */
-
-static void
-clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
-{
- struct store_by_pieces_d data;
-
- if (len == 0)
- return;
-
- data.constfun = clear_by_pieces_1;
- data.constfundata = NULL;
- data.len = len;
- data.to = to;
- store_by_pieces_1 (&data, align);
-}
-
-/* Callback routine for clear_by_pieces.
- Return const0_rtx unconditionally. */
-
-static rtx
-clear_by_pieces_1 (void *data ATTRIBUTE_UNUSED,
- HOST_WIDE_INT offset ATTRIBUTE_UNUSED,
- machine_mode mode ATTRIBUTE_UNUSED)
-{
- return const0_rtx;
-}
-
-/* Subroutine of clear_by_pieces and store_by_pieces.
- Generate several move instructions to store LEN bytes of block TO. (A MEM
- rtx with BLKmode). ALIGN is maximum alignment we can assume. */
-
-static void
-store_by_pieces_1 (struct store_by_pieces_d *data ATTRIBUTE_UNUSED,
- unsigned int align ATTRIBUTE_UNUSED)
-{
- machine_mode to_addr_mode = get_address_mode (data->to);
- rtx to_addr = XEXP (data->to, 0);
- unsigned int max_size = STORE_MAX_PIECES + 1;
- enum insn_code icode;
-
- data->offset = 0;
- data->to_addr = to_addr;
- data->autinc_to
- = (GET_CODE (to_addr) == PRE_INC || GET_CODE (to_addr) == PRE_DEC
- || GET_CODE (to_addr) == POST_INC || GET_CODE (to_addr) == POST_DEC);
-
- data->explicit_inc_to = 0;
- data->reverse
- = (GET_CODE (to_addr) == PRE_DEC || GET_CODE (to_addr) == POST_DEC);
- if (data->reverse)
- data->offset = data->len;
-
- /* If storing requires more than two move insns,
- copy addresses to registers (to make displacements shorter)
- and use post-increment if available. */
- if (!data->autinc_to
- && move_by_pieces_ninsns (data->len, align, max_size) > 2)
- {
- /* Determine the main mode we'll be using.
- MODE might not be used depending on the definitions of the
- USE_* macros below. */
- machine_mode mode ATTRIBUTE_UNUSED
- = widest_int_mode_for_size (max_size);
-
- if (USE_STORE_PRE_DECREMENT (mode) && data->reverse && ! data->autinc_to)
- {
- data->to_addr = copy_to_mode_reg (to_addr_mode,
- plus_constant (to_addr_mode,
- to_addr,
- data->len));
- data->autinc_to = 1;
- data->explicit_inc_to = -1;
- }
-
- if (USE_STORE_POST_INCREMENT (mode) && ! data->reverse
- && ! data->autinc_to)
- {
- data->to_addr = copy_to_mode_reg (to_addr_mode, to_addr);
- data->autinc_to = 1;
- data->explicit_inc_to = 1;
- }
-
- if ( !data->autinc_to && CONSTANT_P (to_addr))
- data->to_addr = copy_to_mode_reg (to_addr_mode, to_addr);
- }
-
- align = alignment_for_piecewise_move (STORE_MAX_PIECES, align);
-
- /* First store what we can in the largest integer mode, then go to
- successively smaller modes. */
-
- while (max_size > 1 && data->len > 0)
- {
- machine_mode mode = widest_int_mode_for_size (max_size);
-
- if (mode == VOIDmode)
- break;
-
- icode = optab_handler (mov_optab, mode);
- if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode))
- store_by_pieces_2 (GEN_FCN (icode), mode, data);
-
- max_size = GET_MODE_SIZE (mode);
- }
-
- /* The code above should have handled everything. */
- gcc_assert (!data->len);
-}
-
-/* Subroutine of store_by_pieces_1. Store as many bytes as appropriate
- with move instructions for mode MODE. GENFUN is the gen_... function
- to make a move insn for that mode. DATA has all the other info. */
-
-static void
-store_by_pieces_2 (insn_gen_fn genfun, machine_mode mode,
- struct store_by_pieces_d *data)
-{
- unsigned int size = GET_MODE_SIZE (mode);
- rtx to1, cst;
-
- while (data->len >= size)
- {
- if (data->reverse)
- data->offset -= size;
-
- if (data->autinc_to)
- to1 = adjust_automodify_address (data->to, mode, data->to_addr,
- data->offset);
- else
- to1 = adjust_address (data->to, mode, data->offset);
-
- if (HAVE_PRE_DECREMENT && data->explicit_inc_to < 0)
- emit_insn (gen_add2_insn (data->to_addr,
- gen_int_mode (-(HOST_WIDE_INT) size,
- GET_MODE (data->to_addr))));
-
- cst = (*data->constfun) (data->constfundata, data->offset, mode);
- emit_insn ((*genfun) (to1, cst));
-
- if (HAVE_POST_INCREMENT && data->explicit_inc_to > 0)
- emit_insn (gen_add2_insn (data->to_addr,
- gen_int_mode (size,
- GET_MODE (data->to_addr))));
-
- if (! data->reverse)
- data->offset += size;
-
- data->len -= size;
- }
-}
-
/* Write zeros through the storage of OBJECT. If OBJECT has BLKmode, SIZE is
its length in bytes. */
diff --git a/gcc/expr.h b/gcc/expr.h
index fea69a27e3a..bd0da5ea364 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -103,12 +103,16 @@ enum block_op_methods
BLOCK_OP_TAILCALL
};
+typedef rtx (*by_pieces_constfn) (void *, HOST_WIDE_INT, machine_mode);
+
extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods);
extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
unsigned int, HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
+extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool,
+ by_pieces_constfn, void *);
extern bool emit_storent_insn (rtx to, rtx from);
/* Copy all or part of a value X into registers starting at REGNO.
@@ -173,6 +177,11 @@ extern void use_regs (rtx *, int, int);
/* Mark a PARALLEL as holding a parameter for the next CALL_INSN. */
extern void use_group_regs (rtx *, rtx);
+#ifdef GCC_INSN_CODES_H
+extern rtx expand_cmpstrn_or_cmpmem (insn_code, rtx, rtx, rtx, tree, rtx,
+ HOST_WIDE_INT);
+#endif
+
/* Write zeros through the storage of OBJECT.
If OBJECT has BLKmode, SIZE is its length in bytes. */
extern rtx clear_storage (rtx, rtx, enum block_op_methods);
@@ -191,10 +200,6 @@ extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
-extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
- unsigned int,
- unsigned int);
-
/* Return nonzero if it is desirable to store LEN bytes generated by
CONSTFUN with several move instructions by store_by_pieces
function. CONSTFUNDATA is a pointer which will be passed as argument
@@ -203,8 +208,7 @@ extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
MEMSETP is true if this is a real memset/bzero, not a copy
of a const string. */
extern int can_store_by_pieces (unsigned HOST_WIDE_INT,
- rtx (*) (void *, HOST_WIDE_INT,
- machine_mode),
+ by_pieces_constfn,
void *, unsigned int, bool);
/* Generate several move instructions to store LEN bytes generated by
@@ -213,8 +217,7 @@ extern int can_store_by_pieces (unsigned HOST_WIDE_INT,
ALIGN is maximum alignment we can assume.
MEMSETP is true if this is a real memset/bzero, not a copy.
Returns TO + LEN. */
-extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT,
- rtx (*) (void *, HOST_WIDE_INT, machine_mode),
+extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT, by_pieces_constfn,
void *, unsigned int, bool, int);
/* Emit insns to set X from Y. */
@@ -295,7 +298,7 @@ rtx get_personality_function (tree);
/* Determine whether the LEN bytes can be moved by using several move
instructions. Return nonzero if a call to move_by_pieces should
succeed. */
-extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
+extern bool can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
diff --git a/gcc/target.def b/gcc/target.def
index 5285e57345c..a4df363698c 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3397,8 +3397,9 @@ Both @var{size} and @var{alignment} are measured in terms of storage\n\
units.\n\
\n\
The parameter @var{op} is one of: @code{CLEAR_BY_PIECES},\n\
-@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES}.\n\
-These describe the type of memory operation under consideration.\n\
+@code{MOVE_BY_PIECES}, @code{SET_BY_PIECES}, @code{STORE_BY_PIECES} or\n\
+@code{COMPARE_BY_PIECES}. These describe the type of memory operation\n\
+under consideration.\n\
\n\
The parameter @var{speed_p} is true if the code is currently being\n\
optimized for speed rather than size.\n\
@@ -3418,6 +3419,18 @@ move would be greater than that of a library call.",
default_use_by_pieces_infrastructure_p)
DEFHOOK
+(compare_by_pieces_branch_ratio,
+ "When expanding a block comparison in MODE, gcc can try to reduce the\n\
+number of branches at the expense of more memory operations. This hook\n\
+allows the target to override the default choice. It should return the\n\
+factor by which branches should be reduced over the plain expansion with\n\
+one comparison per @var{mode}-sized piece. A port can also prevent a\n\
+particular mode from being used for block comparisons by returning a\n\
+negative number from this hook.",
+ int, (machine_mode mode),
+ default_compare_by_pieces_branch_ratio)
+
+DEFHOOK
(optab_supported_p,
"Return true if the optimizers should use optab @var{op} with\n\
modes @var{mode1} and @var{mode2} for optimization type @var{opt_type}.\n\
diff --git a/gcc/target.h b/gcc/target.h
index 43022bd92c3..f4fed68ba32 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -79,16 +79,23 @@ enum print_switch_type
};
/* Types of memory operation understood by the "by_pieces" infrastructure.
- Used by the TARGET_USE_BY_PIECES_INFRASTRUCTURE_P target hook. */
+ Used by the TARGET_USE_BY_PIECES_INFRASTRUCTURE_P target hook and
+ internally by the functions in expr.c. */
enum by_pieces_operation
{
CLEAR_BY_PIECES,
MOVE_BY_PIECES,
SET_BY_PIECES,
- STORE_BY_PIECES
+ STORE_BY_PIECES,
+ COMPARE_BY_PIECES
};
+extern unsigned HOST_WIDE_INT by_pieces_ninsns (unsigned HOST_WIDE_INT,
+ unsigned int,
+ unsigned int,
+ by_pieces_operation);
+
typedef int (* print_switch_fn_type) (print_switch_type, const char *);
/* An example implementation for ELF targets. Defined in varasm.c */
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 6b4601b719a..95980f547bd 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1482,25 +1482,40 @@ default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
switch (op)
{
- case CLEAR_BY_PIECES:
- max_size = STORE_MAX_PIECES;
- ratio = CLEAR_RATIO (speed_p);
- break;
- case MOVE_BY_PIECES:
- max_size = MOVE_MAX_PIECES;
- ratio = get_move_ratio (speed_p);
- break;
- case SET_BY_PIECES:
- max_size = STORE_MAX_PIECES;
- ratio = SET_RATIO (speed_p);
- break;
- case STORE_BY_PIECES:
- max_size = STORE_MAX_PIECES;
- ratio = get_move_ratio (speed_p);
- break;
+ case CLEAR_BY_PIECES:
+ max_size = STORE_MAX_PIECES;
+ ratio = CLEAR_RATIO (speed_p);
+ break;
+ case MOVE_BY_PIECES:
+ max_size = MOVE_MAX_PIECES;
+ ratio = get_move_ratio (speed_p);
+ break;
+ case SET_BY_PIECES:
+ max_size = STORE_MAX_PIECES;
+ ratio = SET_RATIO (speed_p);
+ break;
+ case STORE_BY_PIECES:
+ max_size = STORE_MAX_PIECES;
+ ratio = get_move_ratio (speed_p);
+ break;
+ case COMPARE_BY_PIECES:
+ max_size = COMPARE_MAX_PIECES;
+ /* Pick a likely default, just as in get_move_ratio. */
+ ratio = speed_p ? 15 : 3;
+ break;
}
- return move_by_pieces_ninsns (size, alignment, max_size + 1) < ratio;
+ return by_pieces_ninsns (size, alignment, max_size + 1, op) < ratio;
+}
+
+/* This hook controls code generation for expanding a memcmp operation by
+ pieces. Return 1 for the normal pattern of compare/jump after each pair
+ of loads, or a higher number to reduce the number of branches. */
+
+int
+default_compare_by_pieces_branch_ratio (machine_mode)
+{
+ return 1;
}
bool
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 7687c39b53b..d6581cfab89 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -199,6 +199,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
unsigned int,
enum by_pieces_operation,
bool);
+extern int default_compare_by_pieces_branch_ratio (machine_mode);
extern bool default_profile_before_prologue (void);
extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c567f756408..784bf205fca 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-06-03 Bernd Schmidt <bschmidt@redhat.com>
+
+ PR tree-optimization/52171
+ * gcc.dg/pr52171.c: New test.
+ * gcc.target/i386/pr52171.c: New test.
+
2016-06-03 Jan Hubicka <jh@suse.cz>
* g++.dg/tree-ssa/pred-1.C: New testcase
diff --git a/gcc/testsuite/gcc.dg/pr52171.c b/gcc/testsuite/gcc.dg/pr52171.c
new file mode 100644
index 00000000000..45aeff6b8eb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr52171.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "memcmp" } } */
+#include <string.h>
+struct A { int x; } a, b;
+
+extern char s[], t[];
+
+int foo ()
+{
+ return memcmp (&a, &b, sizeof (struct A)) == 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr52171.c b/gcc/testsuite/gcc.target/i386/pr52171.c
new file mode 100644
index 00000000000..50cc520b60c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr52171.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "memcmp" } } */
+/* { dg-final { scan-assembler "1752394086" } } */
+
+/* This should turn into four compare/jump pairs with -m32, within the
+ limit of what the tuning considers acceptable for -O2. */
+int cmp (char *p, char *q)
+{
+ char *pa = __builtin_assume_aligned (p, 4);
+ char *qa = __builtin_assume_aligned (q, 4);
+ if (__builtin_memcmp (pa, qa, 16) != 0)
+ return 1;
+ return 0;
+}
+/* Since we have fast unaligned access, we should make a single
+ constant comparison. The constant becomes 1752394086. */
+int cmp2 (char *p)
+{
+ if (__builtin_memcmp (p, "fish", 4) != 0)
+ return 1;
+ return 0;
+}
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index d27b60754d3..5e2d7dbe7b1 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see
#include "params.h"
#include "ipa-chkp.h"
#include "tree-hash-traits.h"
+#include "builtins.h"
/* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
is an index into strinfo vector, negative value stands for
@@ -1843,6 +1844,88 @@ handle_builtin_memset (gimple_stmt_iterator *gsi)
return false;
}
+/* Handle a call to memcmp. We try to handle small comparisons by
+ converting them to load and compare, and replacing the call to memcmp
+ with a __builtin_memcmp_eq call where possible. */
+
+static bool
+handle_builtin_memcmp (gimple_stmt_iterator *gsi)
+{
+ gcall *stmt2 = as_a <gcall *> (gsi_stmt (*gsi));
+ tree res = gimple_call_lhs (stmt2);
+ tree arg1 = gimple_call_arg (stmt2, 0);
+ tree arg2 = gimple_call_arg (stmt2, 1);
+ tree len = gimple_call_arg (stmt2, 2);
+ unsigned HOST_WIDE_INT leni;
+ use_operand_p use_p;
+ imm_use_iterator iter;
+
+ if (!res)
+ return true;
+
+ FOR_EACH_IMM_USE_FAST (use_p, iter, res)
+ {
+ gimple *ustmt = USE_STMT (use_p);
+
+ if (gimple_code (ustmt) == GIMPLE_ASSIGN)
+ {
+ gassign *asgn = as_a <gassign *> (ustmt);
+ tree_code code = gimple_assign_rhs_code (asgn);
+ if ((code != EQ_EXPR && code != NE_EXPR)
+ || !integer_zerop (gimple_assign_rhs2 (asgn)))
+ return true;
+ }
+ else if (gimple_code (ustmt) == GIMPLE_COND)
+ {
+ tree_code code = gimple_cond_code (ustmt);
+ if ((code != EQ_EXPR && code != NE_EXPR)
+ || !integer_zerop (gimple_cond_rhs (ustmt)))
+ return true;
+ }
+ else
+ return true;
+ }
+
+ if (tree_fits_uhwi_p (len)
+ && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
+ && exact_log2 (leni) != -1)
+ {
+ leni *= CHAR_TYPE_SIZE;
+ unsigned align1 = get_pointer_alignment (arg1);
+ unsigned align2 = get_pointer_alignment (arg2);
+ unsigned align = MIN (align1, align2);
+ machine_mode mode = mode_for_size (leni, MODE_INT, 1);
+ if (mode != BLKmode
+ && (align >= leni || !SLOW_UNALIGNED_ACCESS (mode, align)))
+ {
+ location_t loc = gimple_location (stmt2);
+ tree type, off;
+ type = build_nonstandard_integer_type (leni, 1);
+ gcc_assert (GET_MODE_BITSIZE (TYPE_MODE (type)) == leni);
+ tree ptrtype = build_pointer_type_for_mode (char_type_node,
+ ptr_mode, true);
+ off = build_int_cst (ptrtype, 0);
+ arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
+ arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
+ tree tem1 = fold_const_aggregate_ref (arg1);
+ if (tem1)
+ arg1 = tem1;
+ tree tem2 = fold_const_aggregate_ref (arg2);
+ if (tem2)
+ arg2 = tem2;
+ res = fold_convert_loc (loc, TREE_TYPE (res),
+ fold_build2_loc (loc, NE_EXPR,
+ boolean_type_node,
+ arg1, arg2));
+ gimplify_and_update_call_from_tree (gsi, res);
+ return false;
+ }
+ }
+
+ gimple_call_set_fndecl (stmt2, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
+ return false;
+}
+
/* Handle a POINTER_PLUS_EXPR statement.
For p = "abcd" + 2; compute associated length, or if
p = q + off is pointing to a '\0' character of a string, call
@@ -2100,6 +2183,10 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)
if (!handle_builtin_memset (gsi))
return false;
break;
+ case BUILT_IN_MEMCMP:
+ if (!handle_builtin_memcmp (gsi))
+ return false;
+ break;
default:
break;
}
diff --git a/gcc/tree.c b/gcc/tree.c
index 7511d0a6442..2e01eac6aca 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -10601,6 +10601,13 @@ build_common_builtin_nodes (void)
BUILT_IN_STACK_RESTORE,
"__builtin_stack_restore", ECF_NOTHROW | ECF_LEAF);
+ ftype = build_function_type_list (integer_type_node, const_ptr_type_node,
+ const_ptr_type_node, size_type_node,
+ NULL_TREE);
+ local_define_builtin ("__builtin_memcmp_eq", ftype, BUILT_IN_MEMCMP_EQ,
+ "__builtin_memcmp_eq",
+ ECF_PURE | ECF_NOTHROW | ECF_LEAF);
+
/* If there's a possibility that we might use the ARM EABI, build the
alternate __cxa_end_cleanup node used to resume from C++ and Java. */
if (targetm.arm_eabi_unwinder)