summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDominik Inführ <dominik.infuehr@theobroma-systems.com>2017-04-21 11:36:58 +0200
committerChristoph Muellner <christoph.muellner@theobroma-systems.com>2018-04-27 10:17:26 +0200
commit43881c3d5e9a4fb1e68481d6a4fa3bfa2489f2f0 (patch)
tree7f2688c76f9ec5b67894ea95d98fa6873c8681db
parentba5a3780dd9937d0f3adee051239ea3aa65dd148 (diff)
aarch64: Xgene: Procedural cost-model for X-Gene processors.
Bypass table-based cost-model with a procedural one to more closely the Xgene microarchitecture.
-rw-r--r--gcc/config/aarch64/aarch64.c891
1 files changed, 890 insertions, 1 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3ff8e65ba6da..0cf58768775a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -152,6 +152,8 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
/* Major revision number of the ARM Architecture implemented by the target. */
unsigned aarch64_architecture_version;
+static bool xgene1_rtx_costs (rtx, machine_mode, int, int, int, int*, bool);
+
/* The processor for which instructions should be scheduled. */
enum aarch64_processor aarch64_tune = cortexa53;
@@ -4277,7 +4279,7 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
index = SUBREG_REG (index);
if ((shift == 0 ||
- (shift > 0 && shift <= 3
+ (shift > 0 && shift <= 4
&& (1 << shift) == GET_MODE_SIZE (mode)))
&& REG_P (index)
&& aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
@@ -4620,6 +4622,9 @@ aarch64_float_const_zero_rtx_p (rtx x)
if (GET_MODE (x) == VOIDmode)
return false;
+ if (!CONST_DOUBLE_P (x))
+ return false;
+
if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
return !HONOR_SIGNED_ZEROS (GET_MODE (x));
return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
@@ -6556,6 +6561,19 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
above this default. */
*cost = COSTS_N_INSNS (1);
+ /* TODO: The cost infrastructure currently does not handle
+ vector operations. Assume that all vector operations
+ are equally expensive. */
+ if (VECTOR_MODE_P (mode))
+ {
+ if (speed)
+ *cost += extra_cost->vect.alu;
+ return true;
+ }
+
+ if (selected_cpu->ident == xgene1)
+ return xgene1_rtx_costs(x, mode, code, outer, param, cost, speed);
+
switch (code)
{
case SET:
@@ -14800,6 +14818,877 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
return true;
}
+/* This function aids the processing of an add/sub instruction that
+ may use the "extended register" or "shifted register" form. For
+ many such cases, we can simply process the extend/shift as if it
+ were a separate isntruction, since the op cost is the same.
+ However, certain cases must be handled separately when the ops are
+ integrated into a single instruction.
+
+ Returns the inner operand if successful, or the original expression
+ on failure. Also updates the cost if successful. */
+static rtx
+xgene1_strip_extended_register (rtx op, int *cost, bool speed ATTRIBUTE_UNUSED, bool separate)
+{
+ /* If the operand is zero-extended from 32-bits, it is free. */
+ if (!separate
+ && GET_CODE (op) == ZERO_EXTEND
+ && GET_MODE (XEXP (op, 0)) == SImode)
+ return XEXP (op, 0);
+
+ /* A stand-alone multiply costs 4 or 5, so GCC will choose a cheaper
+ shift if it can. But GCC will not transform a multiply embedded
+ inside another operation such as (plus (mult X const)). Instead,
+ aarch64.md recognizes it as an operation with an embedded shift,
+ and we charge a cost accordingly. */
+ if (GET_CODE (op) == MULT)
+ {
+ rtx op0 = XEXP (op, 0);
+ rtx op1 = XEXP (op, 1);
+
+ if (CONST_INT_P (op1)
+ && exact_log2 (INTVAL (op1)) > 0)
+ {
+ if (exact_log2 (INTVAL (op1)) <= 4)
+ {
+ *cost += COSTS_N_INSNS(1);
+
+ /* The extended register form can include a zero-
+ or sign-extend for free. */
+ if (GET_CODE (op0) == ZERO_EXTEND
+ || GET_CODE (op1) == SIGN_EXTEND)
+ return XEXP (op0, 0);
+ else
+ return op0;
+ }
+ else
+ {
+ /* The shifted register form can support a larger
+ left shift, but cannot include a free extend. */
+ *cost += COSTS_N_INSNS(2);
+ return op0;
+ }
+ }
+ }
+
+ /* No candidates found. Return op unchanged. */
+ return op;
+}
+
+/* Calculate the cost of calculating X, storing it in *COST. Result
+ is true if the total cost of the operation has now been calculated. */
+static bool
+xgene1_rtx_costs (rtx x, machine_mode mode, int code, int outer ATTRIBUTE_UNUSED,
+ int param ATTRIBUTE_UNUSED, int *cost, bool speed)
+{
+ rtx op0, op1, op2, addr;
+ int n_minus_1;
+
+ /* Throw away the default cost and start over. */
+ /* A size N times larger than UNITS_PER_WORD (rounded up) probably
+ needs N times as many ops, so it executes in N-1 extra
+ cycles. */
+ n_minus_1 = (GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD;
+ /* If the mode size is less than UNITS_PER_WORD, then n_minus_1 is
+ 0, and the starting cost is 0. This the default. Instructions
+ then add cost above and beyond that value. */
+ *cost = COSTS_N_INSNS(n_minus_1);
+
+ switch (code)
+ {
+ case REG:
+ /* Warning: rtx_cost won't actually ask for the cost of a
+ register. It just assumes that the cost is 0. So this code
+ may be useless. */
+ /* a register has zero cost when used as part of an expression,
+ but it has a cost when copied to another register. */
+ if (outer != SET)
+ *cost = 0;
+ else if (FLOAT_MODE_P (mode))
+ *cost += COSTS_N_INSNS(3); /* base cost */
+ else if (VECTOR_MODE_P (mode))
+ *cost += COSTS_N_INSNS(2); /* base cost */
+ else
+ *cost += COSTS_N_INSNS(1); /* base cost */
+ return true;
+
+ case CONST_INT:
+ /* If an instruction can incorporate a constant within the
+ instruction, the instruction's expression avoids calling
+ rtx_cost() on the constant. If rtx_cost() is called on a
+ constant, then it's usually because the constant must be
+ moved into a register by one or more instructions.
+
+ The exception is constant 0, which usually can be expressed
+ as XZR/WZR with zero cost. const0 occasionally has positive
+ cost, but we can't tell that here. In particular, setting a
+ register to const0 costs an instruction, but that case
+ doesn't call this function anyway. One compelling reason to
+ pretend that setting a register to 0 costs nothing is to get
+ the desired results in synth_mult() in expmed.c. */
+ if (x == const0_rtx)
+ *cost = 0;
+ else
+ *cost += COSTS_N_INSNS(1); /* base cost */
+ return true;
+
+ case CONST_DOUBLE:
+ if (aarch64_float_const_representable_p(x))
+ *cost += COSTS_N_INSNS(3); /* MOVI when used by FP */
+ else
+ *cost += COSTS_N_INSNS(5); /* GCC loads the constant from
+ memory. */
+ return true;
+
+ case SET:
+ op0 = SET_DEST (x);
+ op1 = SET_SRC (x);
+
+ switch (GET_CODE (op0))
+ {
+ case MEM:
+ /* If the store data is not already in a register, get the
+ cost to prepare it. */
+ *cost += rtx_cost (op1, mode, SET, 1, speed);
+
+ /* Add the cost of complex addressing modes. */
+ addr = XEXP(op0, 0);
+ *cost += aarch64_address_cost(addr, word_mode, 0, speed);
+ return true;
+
+ case SUBREG:
+ if (! REG_P (SUBREG_REG (op0)))
+ *cost += rtx_cost (SUBREG_REG (op0), mode, SET, 0, speed);
+ /* Fall through. */
+
+ case REG:
+ if (GET_CODE (op1) == REG)
+ {
+ /* The cost is 1 per register copied. */
+ /* Note that SET does not itself have a mode, so the
+ previously calculated value of n_minus_1 is not
+ useful. */
+ n_minus_1 = (GET_MODE_SIZE (GET_MODE (SET_DEST (x))) - 1) / UNITS_PER_WORD;
+ *cost = COSTS_N_INSNS(n_minus_1 + 16);
+ return true;
+ }
+ else
+ {
+ /* Cost is just the cost of the RHS of the set (min 1). */
+ *cost = rtx_cost (op1, mode, SET, 0, speed);
+ return true;
+ }
+
+ case ZERO_EXTRACT:
+ /* Bit-field insertion. */
+ /* Strip any redundant widening of the RHS to meet the width
+ of the target. */
+ if (GET_CODE (op1) == SUBREG)
+ op1 = SUBREG_REG (op1);
+ if ((GET_CODE (op1) == ZERO_EXTEND
+ || GET_CODE (op1) == SIGN_EXTEND)
+ && GET_CODE (XEXP (op0, 1)) == CONST_INT
+ && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
+ >= INTVAL (XEXP (op0, 1))))
+ op1 = XEXP (op1, 0);
+
+ if (CONST_INT_P (op1))
+ {
+ /* It must be a MOVK. */
+ *cost += COSTS_N_INSNS(1);
+ return true;
+ }
+ else
+ {
+ /* It must be a BFM. */
+ *cost += COSTS_N_INSNS(2);
+ *cost += rtx_cost (op1, mode, ZERO_EXTRACT, 1, speed);
+ return true;
+ }
+
+ default:
+ *cost += COSTS_N_INSNS(1); /* default cost */
+ return false;
+ }
+
+ case MEM:
+ /* The base cost is the load latency. */
+ if (GET_MODE_CLASS(GET_MODE(x)) == MODE_INT)
+ {
+ *cost += COSTS_N_INSNS(5);
+ }
+ else if (GET_MODE_CLASS(GET_MODE(x)) == MODE_FLOAT)
+ {
+ *cost += COSTS_N_INSNS(10);
+ }
+ else
+ {
+ *cost += COSTS_N_INSNS(8); /* default cost */
+ }
+
+ /* Add the cost of complex addressing modes. */
+ addr = XEXP(x, 0);
+ *cost += aarch64_address_cost(addr, word_mode, 0, speed);
+ return true;
+
+ case COMPARE:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ /* We only get here if the compare is being used to set the CC
+ flags. Compares within other instructions (e.g. cbz) are
+ subexpressions of if_then_else and are handled there. */
+
+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+ {
+ /* A write to the CC flags costs extra. */
+ *cost += COSTS_N_INSNS(2); /* base cost */
+
+ /* Support for ANDS. */
+ if (GET_CODE (op0) == AND)
+ {
+ x = op0;
+ goto cost_logic;
+ }
+
+ /* Support for TST that looks like zero extract. */
+ if (GET_CODE (op0) == ZERO_EXTRACT)
+ {
+ *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 1, speed);
+ return true;
+ }
+
+ /* Support for ADDS (and CMN alias). */
+ if (GET_CODE (op0) == PLUS)
+ {
+ x = op0;
+ goto cost_plus;
+ }
+
+ /* Support for SUBS. */
+ if (GET_CODE (op0) == MINUS)
+ {
+ x = op0;
+ goto cost_minus;
+ }
+
+ /* Support for CMN. */
+ if (GET_CODE (op1) == NEG)
+ {
+ *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
+ *cost += rtx_cost (XEXP (op1, 0), mode, ZERO_EXTRACT, 1, speed);
+ return true;
+ }
+
+ /* Support for CMP (integer) */
+ /* Compare can freely swap the order of operands, and
+ canonicalization puts the more complex operation first.
+ But the integer MINUS logic expects the shift/extend
+ operation in op1. */
+ if (! (REG_P (op0)
+ || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
+ {
+ op0 = XEXP (x, 1);
+ op1 = XEXP (x, 0);
+ }
+ goto cost_minus_int;
+ }
+
+ /* Support for CMP (FP) */
+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
+ {
+ *cost += COSTS_N_INSNS(11);
+ if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
+ {
+ /* fcmp supports constant 0.0 for no extra cost. */
+ return true;
+ }
+ return false;
+ }
+
+ *cost += COSTS_N_INSNS(2); /* default cost */
+ return false;
+
+ case NEG:
+ op0 = XEXP (x, 0);
+
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ {
+ *cost += COSTS_N_INSNS(1); /* base cost */
+
+ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
+ {
+ /* This looks like CSETM. */
+ *cost += rtx_cost (XEXP (op0, 0), mode, NEG, 0, speed);
+ return true;
+ }
+
+ op0 = CONST0_RTX (GET_MODE (x));
+ op1 = XEXP (x, 0);
+ goto cost_minus_int;
+ }
+
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+ {
+ /* Support (neg(fma...)) as a single instruction only if
+ sign of zeros is unimportant. This matches the decision
+ making in aarch64.md. */
+ if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
+ {
+ *cost += rtx_cost (op0, mode, NEG, 0, speed);
+ return true;
+ }
+
+ *cost += COSTS_N_INSNS(3); /* FNEG when used by FP */
+ return false;
+ }
+
+ *cost += COSTS_N_INSNS(1); /* default cost */
+ return false;
+
+ case MINUS:
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ {
+ *cost += COSTS_N_INSNS(1); /* base cost */
+
+ cost_minus: /* the base cost must be set before entry here */
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ cost_minus_int: /* the base cost must be set before entry here */
+ if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
+ {
+ /* A SUB instruction cannot combine a shift/extend
+ operation with an immediate, so we assume that the
+ shift/extend is a separate instruction. */
+ *cost += rtx_cost (op0, mode, MINUS, 1, speed);
+ return true;
+ }
+
+ /* Unlike ADD, we normally expect MINUS to have the
+ shift/extend operand in op1. */
+ op1 = xgene1_strip_extended_register (op1, cost, speed, false);
+
+ /* However, expmed.c performs some cost tests of shifted
+ register minus register. Since this will require the
+ shift to take place in a separate instruction, we'd
+ normally evaluate the cost of the shift subexpression
+ independently. However, expmed codes the shift as a
+ multiply, and we don't want to change the cost of an
+ indepedent multiply. So instead we treat it as an
+ integrated subexpression, with the caveat that zero
+ extend is not free. */
+ op0 = xgene1_strip_extended_register (op0, cost, speed, true);
+
+ *cost += rtx_cost (op0, mode, PLUS, 0, speed);
+ *cost += rtx_cost (op1, mode, PLUS, 1, speed);
+ return true;
+ }
+
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+ {
+ *cost += COSTS_N_INSNS(5); /* base cost */
+ return false;
+ }
+
+ *cost += COSTS_N_INSNS(1); /* default cost */
+ return false;
+
+ case PLUS:
+ if (FLOAT_MODE_P (mode))
+ {
+ *cost += COSTS_N_INSNS(5); /* base cost */
+ return false;
+ }
+ else if (VECTOR_MODE_P (mode))
+ {
+ *cost += COSTS_N_INSNS(3); /* base cost */
+ return false;
+ }
+ if (SCALAR_INT_MODE_P(mode))
+ {
+ *cost += COSTS_N_INSNS(1); /* base cost */
+
+ cost_plus: /* the base cost must be set before entry here */
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
+ {
+ /* This looks like CINC. */
+ *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
+ *cost += rtx_cost (op1, mode, PLUS, 1, speed);
+ return true;
+ }
+
+ if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
+ {
+ /* An ADD instruction cannot combine a shift/extend
+ operation with an immediate, so we assume that the
+ shift/extend is a separate instruction. */
+ *cost += rtx_cost (op0, mode, PLUS, 0, speed);
+ return true;
+ }
+
+ /* We could handle multiply-add here, but the cost is the
+ same as handling them separately. (At least, it is for
+ integers.) */
+
+ op0 = xgene1_strip_extended_register (op0, cost, speed, false);
+
+ *cost += rtx_cost (op0, mode, PLUS, 0, speed);
+ *cost += rtx_cost (op1, mode, PLUS, 1, speed);
+ return true;
+ }
+
+ *cost += COSTS_N_INSNS(1); /* default cost */
+ return false;
+
+ case XOR:
+ case AND:
+ *cost += COSTS_N_INSNS(1); /* base cost */
+
+ cost_logic: /* the base cost must be set before entry here */
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ /* Depending on the immediates, (and (mult X mult_imm) and_imm)
+ may be translated to UBFM/SBFM, so we set the cost
+ accordingly. */
+ if (code == AND
+ && GET_CODE (op0) == MULT
+ && CONST_INT_P (XEXP (op0, 1))
+ && CONST_INT_P (op1)
+ && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
+ INTVAL (op1)) != 0)
+ {
+ /* This UBFM/SBFM form can be implemented with a
+ single-cycle op. */
+ *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
+ return true;
+ }
+
+ if (CONST_INT_P (op1)
+ && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
+ {
+ /* A logical instruction cannot combine a NOT operation with
+ an immediate, so we assume that the NOT operation is a
+ separate instruction. */
+ *cost += rtx_cost (op0, mode, AND, 0, speed);
+ return true;
+ }
+
+ /* Handle ORN, EON, or BIC. */
+ if (GET_CODE (op0) == NOT)
+ op0 = XEXP (op0, 0);
+
+ /* The logical instruction could have the shifted register form,
+ but the cost is the same if the shift is processed as a
+ separate instruction, so we don't bother with it here. */
+
+ *cost += rtx_cost (op0, mode, AND, 0, speed);
+ *cost += rtx_cost (op1, mode, AND, 1, speed);
+ return true;
+
+ case NOT:
+ *cost += COSTS_N_INSNS(1); /* default cost */
+
+ /* The logical instruction could have the shifted register form,
+ but the cost is the same if the shift is processed as a separate
+ instruction, so we don't bother with it here. */
+ return false;
+
+ case ZERO_EXTEND:
+ if (GET_MODE (x) == DImode
+ && GET_MODE (XEXP (x, 0)) == SImode
+ && outer == SET)
+ {
+ /* All ops that produce a 32-bit result can zero extend to 64-bits for free
+ when writing to a register. */
+ *cost = rtx_cost (XEXP (x, 0), mode, SET, param, speed);
+
+ /* If we're simply zero extending a register,
+ that still costs a minimum of one instruction. */
+ if (*cost == 0) *cost = COSTS_N_INSNS(1);
+ return true;
+ }
+ else if (GET_CODE (XEXP (x, 0)) == MEM)
+ {
+ /* All loads can zero extend to any size for free. */
+ *cost = rtx_cost (XEXP (x, 0), mode, SET, param, speed);
+ return true;
+ }
+ else
+ {
+ *cost += COSTS_N_INSNS(1); /* base cost */
+ return false;
+ }
+
+ case SIGN_EXTEND:
+ /* If sign extension isn't under a shift operation and thus
+ handled specially, then the sign extension always requires a
+ separate 1-cycle op. */
+ *cost += COSTS_N_INSNS(1); /* base cost */
+ return false;
+
+ case ASHIFT:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ /* (ashift (extend X) shift_imm)
+ may be translated to UBFM/SBFM which has additional powers. */
+ if (CONST_INT_P (op1))
+ {
+ if (INTVAL (op1) <= 4)
+ *cost += COSTS_N_INSNS(1); /* base cost */
+ else
+ *cost += COSTS_N_INSNS(2); /* base cost */
+
+ /* UBFM/SBFM can incorporate zero/sign extend for free. */
+ if (GET_CODE (op0) == ZERO_EXTEND
+ || GET_CODE (op1) == SIGN_EXTEND)
+ op0 = XEXP (op0, 0);
+
+ *cost += rtx_cost (op0, mode, ASHIFT, 0, speed);
+ return true;
+ }
+ else
+ {
+ *cost += COSTS_N_INSNS(2); /* base cost */
+ return false;
+ }
+
+ case ROTATE:
+ case ROTATERT:
+ case LSHIFTRT:
+ case ASHIFTRT:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ *cost += COSTS_N_INSNS(2); /* base cost */
+
+ if (CONST_INT_P (op1))
+ {
+ *cost += rtx_cost (op0, mode, ASHIFT, 0, speed);
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+
+ case HIGH:
+ *cost += COSTS_N_INSNS(1); /* default cost */
+ if (!CONSTANT_P (XEXP (x, 0)))
+ *cost += rtx_cost (XEXP (x, 0), mode, HIGH, 0, speed);
+ return true;
+
+ case LO_SUM:
+ *cost += COSTS_N_INSNS(1); /* default cost */
+ if (!CONSTANT_P (XEXP (x, 1)))
+ *cost += rtx_cost (XEXP (x, 1), mode, LO_SUM, 1, speed);
+ *cost += rtx_cost (XEXP (x, 0), mode, LO_SUM, 0, speed);
+ return true;
+
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ /* (extract (mult X mult_imm) extract_imm (const_int 0))
+ may be translated to UBFM/SBFM depending on the respective immediates. */
+ /* For whatever reason, I never see this stand-alone, and I never see it
+ with zero_extract. But "(sign_extract (mult ..." sometimes shows up
+ as part of a larger expression, e.g. under "(plus ...". This includes
+ using it as part of memory addressing. */
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+ op2 = XEXP (x, 2);
+ if (GET_CODE (op0) == MULT
+ && CONST_INT_P (op1)
+ && op2 == const0_rtx)
+ {
+ rtx mult_reg = XEXP (op0, 0);
+ rtx mult_imm = XEXP (op0, 1);
+ if (CONST_INT_P (mult_imm)
+ && aarch64_is_extend_from_extract (GET_MODE (x),
+ mult_imm,
+ op1))
+ {
+ /* This UBFM/SBFM form can be implemented with a single-cycle op. */
+ *cost += COSTS_N_INSNS(1); /* base cost */
+ *cost += rtx_cost (mult_reg, mode, ZERO_EXTRACT, 0, speed);
+ return true;
+ }
+ }
+
+ if (CONST_INT_P (op1)
+ && CONST_INT_P (op2))
+ {
+ /* This can be implemented with a UBFM/SBFM. If it was a simple
+ zero- or sign-extend, then it would use code ZERO_EXTEND or
+ SIGN_EXTEND. Since it doesn't, it must be something more
+ complex, so it requires 2-cycle latency. */
+ *cost += COSTS_N_INSNS(2); /* base cost */
+ *cost += rtx_cost (XEXP (x, 0), mode, ZERO_EXTRACT, 0, speed);
+ return true;
+ }
+ else
+ {
+ *cost += COSTS_N_INSNS(2); /* default cost */
+ return false;
+ }
+
+ case MULT:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+ {
+ /* FP multiply */
+ *cost += COSTS_N_INSNS(5); /* base cost */
+
+ /* FNMUL is free. */
+ if (GET_CODE (op0) == NEG)
+ op0 = XEXP (op0, 0);
+
+ *cost += rtx_cost (op0, mode, MULT, 0, speed);
+ *cost += rtx_cost (op1, mode, MULT, 1, speed);
+ return true;
+ }
+ else if (GET_MODE (x) == DImode)
+ {
+ if (((GET_CODE (op0) == ZERO_EXTEND
+ && GET_CODE (op1) == ZERO_EXTEND)
+ || (GET_CODE (op0) == SIGN_EXTEND
+ && GET_CODE (op1) == SIGN_EXTEND))
+ && GET_MODE (XEXP (op0, 0)) == SImode
+ && GET_MODE (XEXP (op1, 0)) == SImode)
+ {
+ /* 32-bit integer multiply with 64-bit result */
+ *cost += COSTS_N_INSNS(4);
+ *cost += rtx_cost (XEXP (op0, 0), mode, MULT, 0, speed);
+ *cost += rtx_cost (XEXP (op1, 0), mode, MULT, 1, speed);
+ return true;
+ }
+
+ if (GET_CODE (op0) == NEG
+ && ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
+ && GET_CODE (op1) == ZERO_EXTEND)
+ || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
+ && GET_CODE (op1) == SIGN_EXTEND))
+ && GET_MODE (XEXP (XEXP (op0, 0), 0)) == SImode
+ && GET_MODE (XEXP (op1, 0)) == SImode)
+ {
+ /* 32-bit integer multiply with negated 64-bit result */
+ *cost += COSTS_N_INSNS(5);
+ *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), mode, MULT, 0, speed);
+ *cost += rtx_cost (XEXP (op1, 0), mode, MULT, 1, speed);
+ return true;
+ }
+
+ /* 64-bit integer multiply */
+ *cost += COSTS_N_INSNS(5); /* base cost */
+ }
+ else if (GET_MODE (x) == SImode)
+ {
+ /* 32-bit integer multiply */
+ *cost += COSTS_N_INSNS(4); /* base cost */
+ }
+ else
+ {
+ *cost += COSTS_N_INSNS(5); /* default cost */
+ }
+ return false; /* All arguments need to be in registers. */
+
+ case MOD:
+ case UMOD:
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ {
+ /* integer mod = divide + mult + sub */
+ /* See DIV for notes on variable-latency divide. */
+ if (GET_MODE (x) == SImode)
+ *cost += COSTS_N_INSNS (16 + 4 + 1);
+ else
+ *cost += COSTS_N_INSNS (16 + 5 + 1);
+ }
+ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+ {
+ /* FP mod = divide + round + mult-sub */
+ if (GET_MODE (x) == SFmode)
+ *cost += COSTS_N_INSNS (22+1 + 5+1 + 5);
+ else
+ *cost += COSTS_N_INSNS (28+1 + 5+1 + 5);
+ }
+ else
+ {
+ *cost += COSTS_N_INSNS(16 + 5 + 1); /* default cost */
+ }
+ return false; /* All arguments need to be in registers. */
+
+ case DIV:
+ case UDIV:
+ case SQRT:
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ {
+ /* There is no integer SQRT, so only DIV and UDIV can get
+ here. */
+ /* Integer divide of a register is variable latency.
+ Without data, I assume an average of 16 cycles. */
+ /* Integer divide of a constant has known latency
+ depending on the constant. However, GCC can't
+ won't pick a different instruction based on the
+ cost, so whatever. */
+ *cost += COSTS_N_INSNS (16);
+ }
+ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+ {
+ if (GET_MODE (x) == SFmode)
+ *cost += COSTS_N_INSNS (22+1);
+ else
+ *cost += COSTS_N_INSNS (28+1);
+ }
+ else
+ {
+ *cost += COSTS_N_INSNS(16); /* default cost */
+ }
+ return false; /* All arguments need to be in registers. */
+
+ case IF_THEN_ELSE:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+ op2 = XEXP (x, 2);
+
+ if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
+ {
+ /* conditional branch */
+ if (GET_MODE_CLASS (GET_MODE (XEXP (op0, 0))) == MODE_CC)
+ {
+ /* Regular conditional branch. */
+ *cost += COSTS_N_INSNS (1); /* base cost */
+ return true;
+ }
+ else
+ {
+ /* The branch is not based on the condition codes, so it must be
+ a compare and branch (cbz/cbnz or tbz/tbnz). */
+ *cost += COSTS_N_INSNS (3); /* base cost */
+ return true;
+ }
+ }
+ else {
+ if ( (GET_CODE(op0) == EQ || GET_CODE(op0) == NE || GET_CODE(op0) == GT || GET_CODE(op0) == GTU
+ || GET_CODE(op0) == LT || GET_CODE(op0) == LTU || GET_CODE(op0) == GE || GET_CODE(op0) == GEU
+ || GET_CODE(op0) == LE || GET_CODE(op0) == LEU)
+ && (GET_MODE_CLASS (GET_MODE (XEXP (op0, 0))) == MODE_CC) )
+ {
+ /* It's a conditional operation based on the status flags,
+ so it must be some flavor of CSEL. */
+ *cost += COSTS_N_INSNS (1); /* base cost */
+
+ /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
+ if (GET_CODE (op1) == NEG
+ || GET_CODE (op1) == NOT
+ || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
+ op1 = XEXP (op1, 0);
+
+ /* If the remaining parameters are not registers,
+ get the cost to put them into registers. */
+ *cost += rtx_cost (op1, mode, IF_THEN_ELSE, 1, speed);
+ *cost += rtx_cost (op2, mode, IF_THEN_ELSE, 2, speed);
+ return true;
+ }
+ else
+ {
+ *cost += COSTS_N_INSNS (1); /* default cost */
+ return true;
+ }
+ } // ***
+
+ case EQ:
+ case NE:
+ case GT:
+ case GTU:
+ case LT:
+ case LTU:
+ case GE:
+ case GEU:
+ case LE:
+ case LEU:
+ /* This looks like a CSET. */
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ {
+ *cost += COSTS_N_INSNS(1); /* base cost */
+ return false; /* All arguments need to be in registers. */
+ }
+
+ *cost += COSTS_N_INSNS(1); /* default cost */
+ return false;
+
+ case FMA:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+ op2 = XEXP (x, 2);
+
+ *cost += COSTS_N_INSNS(5); /* base cost */
+
+ /* FMSUB, FNMADD, and FNMSUB are free. */
+ if (GET_CODE (op0) == NEG)
+ op0 = XEXP (op0, 0);
+
+ if (GET_CODE (op2) == NEG)
+ op2 = XEXP (op2, 0);
+
+ /* If the remaining parameters are not registers,
+ get the cost to put them into registers. */
+ *cost += rtx_cost (op0, mode, FMA, 0, speed);
+ *cost += rtx_cost (op1, mode, FMA, 1, speed);
+ *cost += rtx_cost (op2, mode, FMA, 2, speed);
+ return true;
+
+ case FLOAT_EXTEND:
+ case FLOAT_TRUNCATE:
+ *cost += COSTS_N_INSNS (6); /* base cost */
+ return false;
+
+ case ABS:
+ *cost += COSTS_N_INSNS(3); /* FABS when used by FP */
+ return false;
+
+ case SMAX:
+ case SMIN:
+ *cost += COSTS_N_INSNS(3); /* base cost */
+ return false;
+
+ case TRUNCATE:
+ if (mode == DImode
+ && GET_MODE (XEXP (x, 0)) == TImode
+ && GET_CODE (XEXP (x, 0)) == LSHIFTRT
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
+ || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
+ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
+ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode)
+ {
+ /* umulh/smulh */
+ *cost += COSTS_N_INSNS(5);
+ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0), mode, MULT, 0, speed);
+ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0), mode, MULT, 1, speed);
+ return true;
+ }
+
+ *cost += COSTS_N_INSNS(1); /* default */
+ return false;
+
+ default:
+ *cost += COSTS_N_INSNS (1); /* default cost */
+ return false;
+ }
+}
+
/* Return 1 if pseudo register should be created and used to hold
GOT address for PIC code. */