summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorclaziss <claziss@138bc75d-0d04-0410-961f-82ee72b054a4>2018-07-16 16:17:02 +0000
committerclaziss <claziss@138bc75d-0d04-0410-961f-82ee72b054a4>2018-07-16 16:17:02 +0000
commited2711622a551546b51d4cc6001d19316ae3c650 (patch)
treea72e42c2a28b90b4bcb1b60801b456d422a62af6
parent58a930f8e5a39d133d3efdb2c2f49defef892636 (diff)
[ARC] Reimplement return padding
2018-06-16 Claudiu Zissulescu <claziss@synopsys.com> Backport from mainline 2018-06-12 Claudiu Zissulescu <claziss@synopsys.com> * config/arc/arc-protos.h (arc_pad_return): Remove. * config/arc/arc.c (machine_function): Remove force_short_suffix and size_reason. (arc_print_operand): Adjust printing of '&'. (arc_verify_short): Remove conditional printing of short suffix. (arc_final_prescan_insn): Remove reference to size_reason. (pad_return): New function. (arc_reorg): Call pad_return. (arc_pad_return): Remove. (arc_init_machine_status): Remove reference to force_short_suffix. * config/arc/arc.md (vunspec): Add VUNSPEC_ARC_BLOCKAGE. (attr length): When attribute iscompact is true force to 2 regardless; in the case of maybe check if we want to force the instruction to have 4 bytes length. (nopv): Change it to generate 4 byte long nop as well. (blockage): New pattern. (simple_return): Remove call to arc_pad_return. (p_return_i): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-8-branch@262738 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog24
-rw-r--r--gcc/config/arc/arc-protos.h1
-rw-r--r--gcc/config/arc/arc.c156
-rw-r--r--gcc/config/arc/arc.md26
-rw-r--r--gcc/config/arc/t-multilib4
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.target/arc/pr9001107555.c51
7 files changed, 174 insertions, 95 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d37fcac446ed..a977a81bd16f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,27 @@
+2018-07-16 Claudiu Zissulescu <claziss@synopsys.com>
+
+ Backport from mainline
+ 2017-03-24 Claudiu Zissulescu <claziss@synopsys.com>
+
+ * config/arc/arc-protos.h (arc_pad_return): Remove.
+ * config/arc/arc.c (machine_function): Remove force_short_suffix
+ and size_reason.
+ (arc_print_operand): Adjust printing of '&'.
+ (arc_verify_short): Remove conditional printing of short suffix.
+ (arc_final_prescan_insn): Remove reference to size_reason.
+ (pad_return): New function.
+ (arc_reorg): Call pad_return.
+ (arc_pad_return): Remove.
+ (arc_init_machine_status): Remove reference to force_short_suffix.
+ * config/arc/arc.md (vunspec): Add VUNSPEC_ARC_BLOCKAGE.
+ (attr length): When attribute iscompact is true force to 2
+ regardless; in the case of maybe check if we want to force the
+ instruction to have 4 bytes length.
+ (nopv): Change it to generate 4 byte long nop as well.
+ (blockage): New pattern.
+ (simple_return): Remove call to arc_pad_return.
+ (p_return_i): Likewise.
+
2018-07-19 Richard Biener <rguenther@suse.de>
Backport from mainline
diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 0ba6871628ad..cb5909564eac 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -93,7 +93,6 @@ extern void arc_clear_unalign (void);
extern void arc_toggle_unalign (void);
extern void split_addsi (rtx *);
extern void split_subsi (rtx *);
-extern void arc_pad_return (void);
extern void arc_split_move (rtx *);
extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
extern rtx arc_regno_use_in (unsigned int, rtx);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 2e6fbcb70c6c..77099899eb72 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -2564,8 +2564,6 @@ typedef struct GTY (()) machine_function
struct arc_frame_info frame_info;
/* To keep track of unalignment caused by short insns. */
int unalign;
- int force_short_suffix; /* Used when disgorging return delay slot insns. */
- const char *size_reason;
struct arc_ccfsm ccfsm_current;
/* Map from uid to ccfsm state during branch shortening. */
rtx ccfsm_current_insn;
@@ -4220,7 +4218,7 @@ arc_print_operand (FILE *file, rtx x, int code)
}
break;
case '&':
- if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
+ if (TARGET_ANNOTATE_ALIGN)
fprintf (file, "; unalign: %d", cfun->machine->unalign);
return;
case '+':
@@ -4906,7 +4904,6 @@ static int
arc_verify_short (rtx_insn *insn, int, int check_attr)
{
enum attr_iscompact iscompact;
- struct machine_function *machine;
if (check_attr > 0)
{
@@ -4914,10 +4911,6 @@ arc_verify_short (rtx_insn *insn, int, int check_attr)
if (iscompact == ISCOMPACT_FALSE)
return 0;
}
- machine = cfun->machine;
-
- if (machine->force_short_suffix >= 0)
- return machine->force_short_suffix;
return (get_attr_length (insn) & 2) != 0;
}
@@ -4956,8 +4949,6 @@ arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
cfun->machine->prescan_initialized = 1;
}
arc_ccfsm_advance (insn, &arc_ccfsm_current);
-
- cfun->machine->size_reason = 0;
}
/* Given FROM and TO register numbers, say whether this elimination is allowed.
@@ -7599,6 +7590,76 @@ jli_call_scan (void)
}
}
+/* Add padding if necessary to avoid a mispredict. A return could
+ happen immediately after the function start. A call/return and
+ return/return must be 6 bytes apart to avoid mispredict. */
+
+static void
+pad_return (void)
+{
+ rtx_insn *insn;
+ long offset;
+
+ if (!TARGET_PAD_RETURN)
+ return;
+
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ rtx_insn *prev0 = prev_active_insn (insn);
+ bool wantlong = false;
+
+ if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) != SIMPLE_RETURN)
+ continue;
+
+ if (!prev0)
+ {
+ prev0 = emit_insn_before (gen_nopv (), insn);
+ /* REG_SAVE_NOTE is used by Haifa scheduler, we are in reorg
+ so it is safe to reuse it for forcing a particular length
+ for an instruction. */
+ add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
+ emit_insn_before (gen_nopv (), insn);
+ continue;
+ }
+ offset = get_attr_length (prev0);
+
+ if (get_attr_length (prev0) == 2
+ && get_attr_iscompact (prev0) != ISCOMPACT_TRUE)
+ {
+ /* Force long version of the insn. */
+ wantlong = true;
+ offset += 2;
+ }
+
+ rtx_insn *prev = prev_active_insn (prev0);
+ if (prev)
+ offset += get_attr_length (prev);
+
+ prev = prev_active_insn (prev);
+ if (prev)
+ offset += get_attr_length (prev);
+
+ switch (offset)
+ {
+ case 2:
+ prev = emit_insn_before (gen_nopv (), insn);
+ add_reg_note (prev, REG_SAVE_NOTE, GEN_INT (1));
+ break;
+ case 4:
+ emit_insn_before (gen_nopv (), insn);
+ break;
+ default:
+ continue;
+ }
+
+ if (wantlong)
+ add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
+
+ /* Emit a blockage to avoid delay slot scheduling. */
+ emit_insn_before (gen_blockage (), insn);
+ }
+}
+
static int arc_reorg_in_progress = 0;
/* ARC's machince specific reorg function. */
@@ -7624,6 +7685,7 @@ arc_reorg (void)
workaround_arc_anomaly ();
jli_call_scan ();
+ pad_return ();
/* FIXME: should anticipate ccfsm action, generate special patterns for
to-be-deleted branches that have no delay slot and have at least the
@@ -9332,79 +9394,6 @@ arc_branch_size_unknown_p (void)
return !optimize_size && arc_reorg_in_progress;
}
-/* We are about to output a return insn. Add padding if necessary to avoid
- a mispredict. A return could happen immediately after the function
- start, but after a call we know that there will be at least a blink
- restore. */
-
-void
-arc_pad_return (void)
-{
- rtx_insn *insn = current_output_insn;
- rtx_insn *prev = prev_active_insn (insn);
- int want_long;
-
- if (!prev)
- {
- fputs ("\tnop_s\n", asm_out_file);
- cfun->machine->unalign ^= 2;
- want_long = 1;
- }
- /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
- because after a call, we'd have to restore blink first. */
- else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
- return;
- else
- {
- want_long = (get_attr_length (prev) == 2);
- prev = prev_active_insn (prev);
- }
- if (!prev
- || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
- ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
- NON_SIBCALL)
- : CALL_ATTR (prev, NON_SIBCALL)))
- {
- if (want_long)
- cfun->machine->size_reason
- = "call/return and return/return must be 6 bytes apart to avoid mispredict";
- else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
- {
- cfun->machine->size_reason
- = "Long unaligned jump avoids non-delay slot penalty";
- want_long = 1;
- }
- /* Disgorge delay insn, if there is any, and it may be moved. */
- if (final_sequence
- /* ??? Annulled would be OK if we can and do conditionalize
- the delay slot insn accordingly. */
- && !INSN_ANNULLED_BRANCH_P (insn)
- && (get_attr_cond (insn) != COND_USE
- || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
- XVECEXP (final_sequence, 0, 1))))
- {
- prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
- gcc_assert (!prev_real_insn (insn)
- || !arc_hazard (prev_real_insn (insn), prev));
- cfun->machine->force_short_suffix = !want_long;
- rtx save_pred = current_insn_predicate;
- final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
- cfun->machine->force_short_suffix = -1;
- prev->set_deleted ();
- current_output_insn = insn;
- current_insn_predicate = save_pred;
- }
- else if (want_long)
- fputs ("\tnop\n", asm_out_file);
- else
- {
- fputs ("\tnop_s\n", asm_out_file);
- cfun->machine->unalign ^= 2;
- }
- }
- return;
-}
-
/* The usual; we set up our machine_function data. */
static struct machine_function *
@@ -9413,7 +9402,6 @@ arc_init_machine_status (void)
struct machine_function *machine;
machine = ggc_cleared_alloc<machine_function> ();
machine->fn_type = ARC_FUNCTION_UNKNOWN;
- machine->force_short_suffix = -1;
return machine;
}
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index d19e99daca72..fcc6e0692dd1 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -161,6 +161,7 @@
VUNSPEC_ARC_CAS
VUNSPEC_ARC_SC
VUNSPEC_ARC_LL
+ VUNSPEC_ARC_BLOCKAGE
])
(define_constants
@@ -384,13 +385,18 @@
;; and insn lengths: insns with shimm values cannot be conditionally executed.
(define_attr "length" ""
(cond
- [(eq_attr "iscompact" "true,maybe")
+ [(eq_attr "iscompact" "true")
+ (const_int 2)
+
+ (eq_attr "iscompact" "maybe")
(cond
[(eq_attr "type" "sfunc")
(cond [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC")
(const_int 12)]
(const_int 10))
- (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)]
+ (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)
+ (match_test "find_reg_note (insn, REG_SAVE_NOTE, GEN_INT (1))")
+ (const_int 4)]
(const_int 2))
(eq_attr "iscompact" "true_limm")
@@ -4438,8 +4444,16 @@
""
"nop%?"
[(set_attr "type" "misc")
- (set_attr "iscompact" "true")
- (set_attr "length" "2")])
+ (set_attr "iscompact" "maybe")
+ (set_attr "length" "*")])
+
+(define_insn "blockage"
+ [(unspec_volatile [(const_int 0)] VUNSPEC_ARC_BLOCKAGE)]
+ ""
+ ""
+ [(set_attr "length" "0")
+ (set_attr "type" "block")]
+)
;; Split up troublesome insns for better scheduling.
@@ -4984,8 +4998,6 @@
{
return \"rtie\";
}
- if (TARGET_PAD_RETURN)
- arc_pad_return ();
output_asm_insn (\"j%!%* [%0]%&\", &reg);
return \"\";
}
@@ -5029,8 +5041,6 @@
arc_return_address_register (arc_compute_function_type
(cfun)));
- if (TARGET_PAD_RETURN)
- arc_pad_return ();
output_asm_insn (\"j%d0%!%# [%1]%&\", xop);
/* record the condition in case there is a delay insn. */
arc_ccfsm_record_condition (xop[0], false, insn, 0);
diff --git a/gcc/config/arc/t-multilib b/gcc/config/arc/t-multilib
index d9ab0ca1da3c..08add5492f0b 100644
--- a/gcc/config/arc/t-multilib
+++ b/gcc/config/arc/t-multilib
@@ -21,9 +21,9 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
-MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400
+MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400
-MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400
+MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400
# Aliases:
MULTILIB_MATCHES = mcpu?arc600=mcpu?ARC600
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b8bc37428e77..77e1714b5d34 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2018-06-12 Claudiu Zissulescu <claziss@synopsys.com>
+
+ Backport from mainline
+ x2018-06-12 Claudiu Zissulescu <claziss@synopsys.com>
+
+ * gcc.target/arc/pr9001107555.c: New file.
+
2018-07-19 Richard Biener <rguenther@suse.de>
Backport from mainline
diff --git a/gcc/testsuite/gcc.target/arc/pr9001107555.c b/gcc/testsuite/gcc.target/arc/pr9001107555.c
new file mode 100644
index 000000000000..420fa835fa41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/pr9001107555.c
@@ -0,0 +1,51 @@
+/* { dg-do assemble } *
+/* { dg-skip-if "" { ! { clmcpu } } } */
+/* { dg-options "-O3 -funroll-loops -mno-sdata -mcpu=arc700" } */
+
+typedef long long a __attribute__((__mode__(__DI__)));
+typedef struct c c;
+
+struct b
+{
+ int d;
+ c *e;
+};
+
+enum { f };
+
+typedef struct
+{
+ a g;
+ a h;
+ int i;
+} j;
+
+struct c
+{
+ int count;
+ int current;
+};
+
+int k;
+
+extern void bar (int, long long);
+int foo (struct b *demux, __builtin_va_list args)
+{
+ c m = *demux->e;
+ j *n;
+ switch (k)
+ case f:
+ {
+ a o = __builtin_va_arg(args, a);
+ m.current = 0;
+ while (m.current < m.count)
+ {
+ if (n[m.current].h > o) {
+ bar (demux->d, 4 + 128LL * n[m.current].i);
+ break;
+ }
+ m.current++;
+ }
+ return 0;
+ }
+}