diff options
52 files changed, 790 insertions, 823 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b82ea04f0f4..821442fa724 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,135 @@ 2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> + * Makefile.in (OBJS): Add vec-perm-indices.o. + * vec-perm-indices.h: New file. + * vec-perm-indices.c: Likewise. + * target.h (vec_perm_indices): Replace with a forward class + declaration. + (auto_vec_perm_indices): Move to vec-perm-indices.h. + * optabs.h: Include vec-perm-indices.h. + (expand_vec_perm): Delete. + (selector_fits_mode_p, expand_vec_perm_var): Declare. + (expand_vec_perm_const): Declare. + * target.def (vec_perm_const_ok): Replace with... + (vec_perm_const): ...this new hook. + * doc/tm.texi.in (TARGET_VECTORIZE_VEC_PERM_CONST_OK): Replace with... + (TARGET_VECTORIZE_VEC_PERM_CONST): ...this new hook. + * doc/tm.texi: Regenerate. + * optabs.def (vec_perm_const): Delete. + * doc/md.texi (vec_perm_const): Likewise. + (vec_perm): Refer to TARGET_VECTORIZE_VEC_PERM_CONST. + * expr.c (expand_expr_real_2): Use expand_vec_perm_const rather than + expand_vec_perm for constant permutation vectors. Assert that + the mode of variable permutation vectors is the integer equivalent + of the mode that is being permuted. + * optabs-query.h (selector_fits_mode_p): Declare. + * optabs-query.c: Include vec-perm-indices.h. + (selector_fits_mode_p): New function. + (can_vec_perm_const_p): Check whether targetm.vectorize.vec_perm_const + is defined, instead of checking whether the vec_perm_const_optab + exists. Use targetm.vectorize.vec_perm_const instead of + targetm.vectorize.vec_perm_const_ok. Check whether the indices + fit in the vector mode before using a variable permute. + * optabs.c (shift_amt_for_vec_perm_mask): Take a mode and a + vec_perm_indices instead of an rtx. + (expand_vec_perm): Replace with... + (expand_vec_perm_const): ...this new function. Take the selector + as a vec_perm_indices rather than an rtx. Also take the mode of + the selector. Update call to shift_amt_for_vec_perm_mask. + Use targetm.vectorize.vec_perm_const instead of vec_perm_const_optab. + Use vec_perm_indices::new_expanded_vector to expand the original + selector into bytes. Check whether the indices fit in the vector + mode before using a variable permute. + (expand_vec_perm_var): Make global. + (expand_mult_highpart): Use expand_vec_perm_const. + * fold-const.c: Includes vec-perm-indices.h. + * tree-ssa-forwprop.c: Likewise. + * tree-vect-data-refs.c: Likewise. + * tree-vect-generic.c: Likewise. + * tree-vect-loop.c: Likewise. + * tree-vect-slp.c: Likewise. + * tree-vect-stmts.c: Likewise. + * config/aarch64/aarch64-protos.h (aarch64_expand_vec_perm_const): + Delete. + * config/aarch64/aarch64-simd.md (vec_perm_const<mode>): Delete. + * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const) + (aarch64_vectorize_vec_perm_const_ok): Fuse into... + (aarch64_vectorize_vec_perm_const): ...this new function. + (TARGET_VECTORIZE_VEC_PERM_CONST_OK): Delete. + (TARGET_VECTORIZE_VEC_PERM_CONST): Redefine. + * config/arm/arm-protos.h (arm_expand_vec_perm_const): Delete. + * config/arm/vec-common.md (vec_perm_const<mode>): Delete. + * config/arm/arm.c (TARGET_VECTORIZE_VEC_PERM_CONST_OK): Delete. + (TARGET_VECTORIZE_VEC_PERM_CONST): Redefine. + (arm_expand_vec_perm_const, arm_vectorize_vec_perm_const_ok): Merge + into... + (arm_vectorize_vec_perm_const): ...this new function. Explicitly + check for NEON modes. + * config/i386/i386-protos.h (ix86_expand_vec_perm_const): Delete. + * config/i386/sse.md (VEC_PERM_CONST, vec_perm_const<mode>): Delete. + * config/i386/i386.c (ix86_expand_vec_perm_const_1): Update comment. + (ix86_expand_vec_perm_const, ix86_vectorize_vec_perm_const_ok): Merge + into... + (ix86_vectorize_vec_perm_const): ...this new function. Incorporate + the old VEC_PERM_CONST conditions. + * config/ia64/ia64-protos.h (ia64_expand_vec_perm_const): Delete. + * config/ia64/vect.md (vec_perm_const<mode>): Delete. + * config/ia64/ia64.c (ia64_expand_vec_perm_const) + (ia64_vectorize_vec_perm_const_ok): Merge into... + (ia64_vectorize_vec_perm_const): ...this new function. + * config/mips/loongson.md (vec_perm_const<mode>): Delete. + * config/mips/mips-msa.md (vec_perm_const<mode>): Delete. + * config/mips/mips-ps-3d.md (vec_perm_constv2sf): Delete. + * config/mips/mips-protos.h (mips_expand_vec_perm_const): Delete. + * config/mips/mips.c (mips_expand_vec_perm_const) + (mips_vectorize_vec_perm_const_ok): Merge into... + (mips_vectorize_vec_perm_const): ...this new function. + * config/powerpcspe/altivec.md (vec_perm_constv16qi): Delete. + * config/powerpcspe/paired.md (vec_perm_constv2sf): Delete. + * config/powerpcspe/spe.md (vec_perm_constv2si): Delete. + * config/powerpcspe/vsx.md (vec_perm_const<mode>): Delete. + * config/powerpcspe/powerpcspe-protos.h (altivec_expand_vec_perm_const) + (rs6000_expand_vec_perm_const): Delete. + * config/powerpcspe/powerpcspe.c (TARGET_VECTORIZE_VEC_PERM_CONST_OK): + Delete. + (TARGET_VECTORIZE_VEC_PERM_CONST): Redefine. + (altivec_expand_vec_perm_const_le): Take each operand individually. + Operate on constant selectors rather than rtxes. + (altivec_expand_vec_perm_const): Likewise. Update call to + altivec_expand_vec_perm_const_le. + (rs6000_expand_vec_perm_const): Delete. + (rs6000_vectorize_vec_perm_const_ok): Delete. + (rs6000_vectorize_vec_perm_const): New function. + (rs6000_do_expand_vec_perm): Take a vec_perm_builder instead of + an element count and rtx array. + (rs6000_expand_extract_even): Update call accordingly. + (rs6000_expand_interleave): Likewise. + * config/rs6000/altivec.md (vec_perm_constv16qi): Delete. + * config/rs6000/paired.md (vec_perm_constv2sf): Delete. + * config/rs6000/vsx.md (vec_perm_const<mode>): Delete. + * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_const) + (rs6000_expand_vec_perm_const): Delete. + * config/rs6000/rs6000.c (TARGET_VECTORIZE_VEC_PERM_CONST_OK): Delete. + (TARGET_VECTORIZE_VEC_PERM_CONST): Redefine. + (altivec_expand_vec_perm_const_le): Take each operand individually. + Operate on constant selectors rather than rtxes. + (altivec_expand_vec_perm_const): Likewise. Update call to + altivec_expand_vec_perm_const_le. + (rs6000_expand_vec_perm_const): Delete. + (rs6000_vectorize_vec_perm_const_ok): Delete. + (rs6000_vectorize_vec_perm_const): New function. Remove stray + reference to the SPE evmerge intructions. + (rs6000_do_expand_vec_perm): Take a vec_perm_builder instead of + an element count and rtx array. + (rs6000_expand_extract_even): Update call accordingly. + (rs6000_expand_interleave): Likewise. + * config/sparc/sparc.md (vec_perm_constv8qi): Delete in favor of... + * config/sparc/sparc.c (sparc_vectorize_vec_perm_const): ...this + new function. + (TARGET_VECTORIZE_VEC_PERM_CONST): Redefine. + +2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> + * optabs.c (expand_vec_perm_1): Assert that SEL has an integer vector mode and that that mode matches the mode of the data being permuted. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index d9f27de0de3..3b0ba276951 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1585,6 +1585,7 @@ OBJS = \ var-tracking.o \ varasm.o \ varpool.o \ + vec-perm-indices.o \ vmsdbgout.o \ vr-values.o \ vtable-verify.o \ diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 002ac330c6d..3bc3756563a 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -488,8 +488,6 @@ extern void aarch64_split_combinev16qi (rtx operands[3]); extern void aarch64_expand_vec_perm (rtx, rtx, rtx, rtx, unsigned int); extern bool aarch64_madd_needs_nop (rtx_insn *); extern void aarch64_final_prescan_insn (rtx_insn *); -extern bool -aarch64_expand_vec_perm_const (rtx, rtx, rtx, rtx, unsigned int); void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); int aarch64_ccmp_mode_to_code (machine_mode mode); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 84c4f8286c0..e04a9883892 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5385,20 +5385,6 @@ ;; vec_perm support -(define_expand "vec_perm_const<mode>" - [(match_operand:VALL_F16 0 "register_operand") - (match_operand:VALL_F16 1 "register_operand") - (match_operand:VALL_F16 2 "register_operand") - (match_operand:<V_INT_EQUIV> 3)] - "TARGET_SIMD" -{ - if (aarch64_expand_vec_perm_const (operands[0], operands[1], - operands[2], operands[3], <nunits>)) - DONE; - else - FAIL; -}) - (define_expand "vec_perm<mode>" [(match_operand:VB 0 "register_operand") (match_operand:VB 1 "register_operand") diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1da313f57e0..05b82bcd615 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -143,8 +143,6 @@ static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_override_options_after_change (void); static bool aarch64_vector_mode_supported_p (machine_mode); -static bool aarch64_vectorize_vec_perm_const_ok (machine_mode, - vec_perm_indices); static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool); static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, @@ -13670,29 +13668,27 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return false; } -/* Expand a vec_perm_const pattern with the operands given by TARGET, - OP0, OP1 and SEL. NELT is the number of elements in the vector. */ +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ -bool -aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel, - unsigned int nelt) +static bool +aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { struct expand_vec_perm_d d; unsigned int i, which; + d.vmode = vmode; d.target = target; d.op0 = op0; d.op1 = op1; + d.testing_p = !target; - d.vmode = GET_MODE (target); - gcc_assert (VECTOR_MODE_P (d.vmode)); - d.testing_p = false; - + /* Calculate whether all elements are in one vector. */ + unsigned int nelt = sel.length (); d.perm.reserve (nelt); for (i = which = 0; i < nelt; ++i) { - rtx e = XVECEXP (sel, 0, i); - unsigned int ei = INTVAL (e) & (2 * nelt - 1); + unsigned int ei = sel[i] & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); d.perm.quick_push (ei); } @@ -13704,7 +13700,7 @@ aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel, case 3: d.one_vector_p = false; - if (!rtx_equal_p (op0, op1)) + if (d.testing_p || !rtx_equal_p (op0, op1)) break; /* The elements of PERM do not suggest that only the first operand @@ -13725,37 +13721,8 @@ aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel, break; } - return aarch64_expand_vec_perm_const_1 (&d); -} - -static bool -aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.testing_p = true; - d.perm.safe_splice (sel); - - /* Calculate whether all elements are in one vector. */ - nelt = sel.length (); - for (i = which = 0; i < nelt; ++i) - { - unsigned int e = d.perm[i]; - gcc_assert (e < 2 * nelt); - which |= (e < nelt ? 1 : 2); - } - - /* If all elements are from the second vector, reindex as if from the - first vector. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; - - /* Check whether the mask can be applied to a single vector. */ - d.one_vector_p = (which != 3); + if (!d.testing_p) + return aarch64_expand_vec_perm_const_1 (&d); d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); @@ -13763,7 +13730,7 @@ aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); start_sequence (); - ret = aarch64_expand_vec_perm_const_1 (&d); + bool ret = aarch64_expand_vec_perm_const_1 (&d); end_sequence (); return ret; @@ -15515,9 +15482,9 @@ aarch64_libgcc_floating_mode_supported_p /* vec_perm support. */ -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ - aarch64_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST \ + aarch64_vectorize_vec_perm_const #undef TARGET_INIT_LIBFUNCS #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 0c977429c12..24a4ab870c7 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -357,7 +357,6 @@ extern bool arm_validize_comparison (rtx *, rtx *, rtx *); extern bool arm_gen_setmem (rtx *); extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); -extern bool arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 2aa64917e4c..2173d95dd6d 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -290,7 +290,8 @@ static int arm_cortex_a5_branch_cost (bool, bool); static int arm_cortex_m_branch_cost (bool, bool); static int arm_cortex_m7_branch_cost (bool, bool); -static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices); +static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, + const vec_perm_indices &); static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*); @@ -736,9 +737,8 @@ static const struct attribute_spec arm_attribute_table[] = #define TARGET_PREFERRED_RENAME_CLASS \ arm_preferred_rename_class -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ - arm_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ @@ -29383,28 +29383,31 @@ arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return false; } -/* Expand a vec_perm_const pattern. */ +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ -bool -arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) +static bool +arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { struct expand_vec_perm_d d; int i, nelt, which; + if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode)) + return false; + d.target = target; d.op0 = op0; d.op1 = op1; - d.vmode = GET_MODE (target); + d.vmode = vmode; gcc_assert (VECTOR_MODE_P (d.vmode)); - d.testing_p = false; + d.testing_p = !target; nelt = GET_MODE_NUNITS (d.vmode); d.perm.reserve (nelt); for (i = which = 0; i < nelt; ++i) { - rtx e = XVECEXP (sel, 0, i); - int ei = INTVAL (e) & (2 * nelt - 1); + int ei = sel[i] & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); d.perm.quick_push (ei); } @@ -29416,7 +29419,7 @@ arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) case 3: d.one_vector_p = false; - if (!rtx_equal_p (op0, op1)) + if (d.testing_p || !rtx_equal_p (op0, op1)) break; /* The elements of PERM do not suggest that only the first operand @@ -29437,38 +29440,8 @@ arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) break; } - return arm_expand_vec_perm_const_1 (&d); -} - -/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ - -static bool -arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.testing_p = true; - d.perm.safe_splice (sel); - - /* Categorize the set of elements in the selector. */ - nelt = GET_MODE_NUNITS (d.vmode); - for (i = which = 0; i < nelt; ++i) - { - unsigned int e = d.perm[i]; - gcc_assert (e < 2 * nelt); - which |= (e < nelt ? 1 : 2); - } - - /* For all elements from second vector, fold the elements to first. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; - - /* Check whether the mask can be applied to the vector type. */ - d.one_vector_p = (which != 3); + if (d.testing_p) + return arm_expand_vec_perm_const_1 (&d); d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); @@ -29476,7 +29449,7 @@ arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); start_sequence (); - ret = arm_expand_vec_perm_const_1 (&d); + bool ret = arm_expand_vec_perm_const_1 (&d); end_sequence (); return ret; diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 813341b157f..20ae24fed56 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -109,35 +109,6 @@ { }) -(define_expand "vec_perm_const<mode>" - [(match_operand:VALL 0 "s_register_operand" "") - (match_operand:VALL 1 "s_register_operand" "") - (match_operand:VALL 2 "s_register_operand" "") - (match_operand:<V_cmp_result> 3 "" "")] - "TARGET_NEON - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))" -{ - if (arm_expand_vec_perm_const (operands[0], operands[1], - operands[2], operands[3])) - DONE; - else - FAIL; -}) - -(define_expand "vec_perm_const<mode>" - [(match_operand:VH 0 "s_register_operand") - (match_operand:VH 1 "s_register_operand") - (match_operand:VH 2 "s_register_operand") - (match_operand:<V_cmp_result> 3)] - "TARGET_NEON" -{ - if (arm_expand_vec_perm_const (operands[0], operands[1], - operands[2], operands[3])) - DONE; - else - FAIL; -}) - (define_expand "vec_perm<mode>" [(match_operand:VE 0 "s_register_operand" "") (match_operand:VE 1 "s_register_operand" "") diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index f5755f0d363..287b0198589 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -133,7 +133,6 @@ extern bool ix86_expand_fp_movcc (rtx[]); extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_int_vcond (rtx[]); extern void ix86_expand_vec_perm (rtx[]); -extern bool ix86_expand_vec_perm_const (rtx[]); extern bool ix86_expand_mask_vec_cmp (rtx[]); extern bool ix86_expand_int_vec_cmp (rtx[]); extern bool ix86_expand_fp_vec_cmp (rtx[]); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9ff9ca4e37f..1acb2c6ab83 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -47605,9 +47605,8 @@ expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d) return true; } -/* The guts of ix86_expand_vec_perm_const, also used by the ok hook. - With all of the interface bits taken care of, perform the expansion - in D and return true on success. */ +/* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits + taken care of, perform the expansion in D and return true on success. */ static bool ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) @@ -47742,69 +47741,29 @@ canonicalize_perm (struct expand_vec_perm_d *d) return (which == 3); } -bool -ix86_expand_vec_perm_const (rtx operands[4]) +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ + +static bool +ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { struct expand_vec_perm_d d; unsigned char perm[MAX_VECT_LEN]; - int i, nelt; + unsigned int i, nelt, which; bool two_args; - rtx sel; - d.target = operands[0]; - d.op0 = operands[1]; - d.op1 = operands[2]; - sel = operands[3]; + d.target = target; + d.op0 = op0; + d.op1 = op1; - d.vmode = GET_MODE (d.target); + d.vmode = vmode; gcc_assert (VECTOR_MODE_P (d.vmode)); d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = false; + d.testing_p = !target; - gcc_assert (GET_CODE (sel) == CONST_VECTOR); - gcc_assert (XVECLEN (sel, 0) == nelt); + gcc_assert (sel.length () == nelt); gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); - for (i = 0; i < nelt; ++i) - { - rtx e = XVECEXP (sel, 0, i); - int ei = INTVAL (e) & (2 * nelt - 1); - d.perm[i] = ei; - perm[i] = ei; - } - - two_args = canonicalize_perm (&d); - - if (ix86_expand_vec_perm_const_1 (&d)) - return true; - - /* If the selector says both arguments are needed, but the operands are the - same, the above tried to expand with one_operand_p and flattened selector. - If that didn't work, retry without one_operand_p; we succeeded with that - during testing. */ - if (two_args && d.one_operand_p) - { - d.one_operand_p = false; - memcpy (d.perm, perm, sizeof (perm)); - return ix86_expand_vec_perm_const_1 (&d); - } - - return false; -} - -/* Implement targetm.vectorize.vec_perm_const_ok. */ - -static bool -ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = true; - /* Given sufficient ISA support we can just return true here for selected vector modes. */ switch (d.vmode) @@ -47813,17 +47772,23 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) case E_V16SImode: case E_V8DImode: case E_V8DFmode: - if (TARGET_AVX512F) - /* All implementable with a single vperm[it]2 insn. */ + if (!TARGET_AVX512F) + return false; + /* All implementable with a single vperm[it]2 insn. */ + if (d.testing_p) return true; break; case E_V32HImode: - if (TARGET_AVX512BW) + if (!TARGET_AVX512BW) + return false; + if (d.testing_p) /* All implementable with a single vperm[it]2 insn. */ return true; break; case E_V64QImode: - if (TARGET_AVX512BW) + if (!TARGET_AVX512BW) + return false; + if (d.testing_p) /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */ return true; break; @@ -47831,73 +47796,108 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) case E_V8SFmode: case E_V4DFmode: case E_V4DImode: - if (TARGET_AVX512VL) + if (!TARGET_AVX) + return false; + if (d.testing_p && TARGET_AVX512VL) /* All implementable with a single vperm[it]2 insn. */ return true; break; case E_V16HImode: - if (TARGET_AVX2) + if (!TARGET_SSE2) + return false; + if (d.testing_p && TARGET_AVX2) /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ return true; break; case E_V32QImode: - if (TARGET_AVX2) + if (!TARGET_SSE2) + return false; + if (d.testing_p && TARGET_AVX2) /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ return true; break; - case E_V4SImode: - case E_V4SFmode: case E_V8HImode: case E_V16QImode: + if (!TARGET_SSE2) + return false; + /* Fall through. */ + case E_V4SImode: + case E_V4SFmode: + if (!TARGET_SSE) + return false; /* All implementable with a single vpperm insn. */ - if (TARGET_XOP) + if (d.testing_p && TARGET_XOP) return true; /* All implementable with 2 pshufb + 1 ior. */ - if (TARGET_SSSE3) + if (d.testing_p && TARGET_SSSE3) return true; break; case E_V2DImode: case E_V2DFmode: + if (!TARGET_SSE) + return false; /* All implementable with shufpd or unpck[lh]pd. */ - return true; + if (d.testing_p) + return true; + break; default: return false; } - /* Extract the values from the vector CST into the permutation - array in D. */ for (i = which = 0; i < nelt; ++i) { unsigned char e = sel[i]; gcc_assert (e < 2 * nelt); d.perm[i] = e; + perm[i] = e; which |= (e < nelt ? 1 : 2); } - /* For all elements from second vector, fold the elements to first. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; + if (d.testing_p) + { + /* For all elements from second vector, fold the elements to first. */ + if (which == 2) + for (i = 0; i < nelt; ++i) + d.perm[i] -= nelt; - /* Check whether the mask can be applied to the vector type. */ - d.one_operand_p = (which != 3); + /* Check whether the mask can be applied to the vector type. */ + d.one_operand_p = (which != 3); - /* Implementable with shufps or pshufd. */ - if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode)) - return true; + /* Implementable with shufps or pshufd. */ + if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode)) + return true; - /* Otherwise we have to go through the motions and see if we can - figure out how to generate the requested permutation. */ - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); - if (!d.one_operand_p) - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + /* Otherwise we have to go through the motions and see if we can + figure out how to generate the requested permutation. */ + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_operand_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); - start_sequence (); - ret = ix86_expand_vec_perm_const_1 (&d); - end_sequence (); + start_sequence (); + bool ret = ix86_expand_vec_perm_const_1 (&d); + end_sequence (); - return ret; + return ret; + } + + two_args = canonicalize_perm (&d); + + if (ix86_expand_vec_perm_const_1 (&d)) + return true; + + /* If the selector says both arguments are needed, but the operands are the + same, the above tried to expand with one_operand_p and flattened selector. + If that didn't work, retry without one_operand_p; we succeeded with that + during testing. */ + if (two_args && d.one_operand_p) + { + d.one_operand_p = false; + memcpy (d.perm, perm, sizeof (perm)); + return ix86_expand_vec_perm_const_1 (&d); + } + + return false; } void @@ -50549,9 +50549,8 @@ ix86_run_selftests (void) #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ ix86_builtin_vectorization_cost -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ - ix86_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ ix86_preferred_simd_mode diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 105b5cf6092..76c150fe8ec 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11498,30 +11498,6 @@ DONE; }) -(define_mode_iterator VEC_PERM_CONST - [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE") - (V2DF "TARGET_SSE") (V2DI "TARGET_SSE") - (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2") - (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") - (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") - (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") - (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") - (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") - (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) - -(define_expand "vec_perm_const<mode>" - [(match_operand:VEC_PERM_CONST 0 "register_operand") - (match_operand:VEC_PERM_CONST 1 "register_operand") - (match_operand:VEC_PERM_CONST 2 "register_operand") - (match_operand:<sseintvecmode> 3)] - "" -{ - if (ix86_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel bitwise logical operations diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index cbabbd3b757..71e55e47557 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -62,7 +62,6 @@ extern const char *get_bundle_name (int); extern const char *output_probe_stack_range (rtx, rtx); extern void ia64_expand_vec_perm_even_odd (rtx, rtx, rtx, int); -extern bool ia64_expand_vec_perm_const (rtx op[4]); extern void ia64_expand_vec_setv2sf (rtx op[3]); #endif /* RTX_CODE */ diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index f99bea98d21..d2ce1a49fb9 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -335,7 +335,8 @@ static fixed_size_mode ia64_get_reg_raw_mode (int regno); static section * ia64_hpux_function_section (tree, enum node_frequency, bool, bool); -static bool ia64_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices); +static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, + const vec_perm_indices &); static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode); static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode); @@ -654,8 +655,8 @@ static const struct attribute_spec ia64_attribute_table[] = #undef TARGET_DELAY_VARTRACK #define TARGET_DELAY_VARTRACK true -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p @@ -11743,32 +11744,31 @@ ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return false; } -bool -ia64_expand_vec_perm_const (rtx operands[4]) +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ + +static bool +ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { struct expand_vec_perm_d d; unsigned char perm[MAX_VECT_LEN]; - int i, nelt, which; - rtx sel; + unsigned int i, nelt, which; - d.target = operands[0]; - d.op0 = operands[1]; - d.op1 = operands[2]; - sel = operands[3]; + d.target = target; + d.op0 = op0; + d.op1 = op1; - d.vmode = GET_MODE (d.target); + d.vmode = vmode; gcc_assert (VECTOR_MODE_P (d.vmode)); d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = false; + d.testing_p = !target; - gcc_assert (GET_CODE (sel) == CONST_VECTOR); - gcc_assert (XVECLEN (sel, 0) == nelt); + gcc_assert (sel.length () == nelt); gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); for (i = which = 0; i < nelt; ++i) { - rtx e = XVECEXP (sel, 0, i); - int ei = INTVAL (e) & (2 * nelt - 1); + unsigned int ei = sel[i] & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); d.perm[i] = ei; @@ -11781,7 +11781,7 @@ ia64_expand_vec_perm_const (rtx operands[4]) gcc_unreachable(); case 3: - if (!rtx_equal_p (d.op0, d.op1)) + if (d.testing_p || !rtx_equal_p (d.op0, d.op1)) { d.one_operand_p = false; break; @@ -11809,6 +11809,22 @@ ia64_expand_vec_perm_const (rtx operands[4]) break; } + if (d.testing_p) + { + /* We have to go through the motions and see if we can + figure out how to generate the requested permutation. */ + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_operand_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + bool ret = ia64_expand_vec_perm_const_1 (&d); + end_sequence (); + + return ret; + } + if (ia64_expand_vec_perm_const_1 (&d)) return true; @@ -11825,51 +11841,6 @@ ia64_expand_vec_perm_const (rtx operands[4]) return false; } -/* Implement targetm.vectorize.vec_perm_const_ok. */ - -static bool -ia64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = true; - - /* Extract the values from the vector CST into the permutation - array in D. */ - for (i = which = 0; i < nelt; ++i) - { - unsigned char e = sel[i]; - d.perm[i] = e; - gcc_assert (e < 2 * nelt); - which |= (e < nelt ? 1 : 2); - } - - /* For all elements from second vector, fold the elements to first. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; - - /* Check whether the mask can be applied to the vector type. */ - d.one_operand_p = (which != 3); - - /* Otherwise we have to go through the motions and see if we can - figure out how to generate the requested permutation. */ - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); - if (!d.one_operand_p) - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); - - start_sequence (); - ret = ia64_expand_vec_perm_const_1 (&d); - end_sequence (); - - return ret; -} - void ia64_expand_vec_setv2sf (rtx operands[3]) { diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md index 20e260ccfba..68ac05c0e8a 100644 --- a/gcc/config/ia64/vect.md +++ b/gcc/config/ia64/vect.md @@ -1549,19 +1549,6 @@ DONE; }) -(define_expand "vec_perm_const<mode>" - [(match_operand:VEC 0 "register_operand" "") - (match_operand:VEC 1 "register_operand" "") - (match_operand:VEC 2 "register_operand" "") - (match_operand:<vecint> 3 "" "")] - "" -{ - if (ia64_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - ;; Missing operations ;; fprcpa ;; fpsqrta diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md index b48dfa0dc71..c75ce2cca5b 100644 --- a/gcc/config/mips/loongson.md +++ b/gcc/config/mips/loongson.md @@ -784,19 +784,6 @@ "punpcklwd\t%0,%1,%2" [(set_attr "type" "fcvt")]) -(define_expand "vec_perm_const<mode>" - [(match_operand:VWHB 0 "register_operand" "") - (match_operand:VWHB 1 "register_operand" "") - (match_operand:VWHB 2 "register_operand" "") - (match_operand:VWHB 3 "" "")] - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" -{ - if (mips_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_expand "vec_unpacks_lo_<mode>" [(match_operand:<V_stretch_half> 0 "register_operand" "") (match_operand:VHB 1 "register_operand" "")] diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md index 87d889d7296..73f38d3f5d4 100644 --- a/gcc/config/mips/mips-msa.md +++ b/gcc/config/mips/mips-msa.md @@ -558,19 +558,6 @@ [(set_attr "type" "simd_copy") (set_attr "mode" "<MODE>")]) -(define_expand "vec_perm_const<mode>" - [(match_operand:MSA 0 "register_operand") - (match_operand:MSA 1 "register_operand") - (match_operand:MSA 2 "register_operand") - (match_operand:<VIMODE> 3 "")] - "ISA_HAS_MSA" -{ - if (mips_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_expand "abs<mode>2" [(match_operand:IMSA 0 "register_operand" "=f") (abs:IMSA (match_operand:IMSA 1 "register_operand" "f"))] diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index 1c4167a836a..8eab7c58114 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -348,7 +348,6 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs, rtx, rtx, rtx, rtx); extern void mips_expand_vector_init (rtx, rtx); -extern bool mips_expand_vec_perm_const (rtx op[4]); extern void mips_expand_vec_unpack (rtx op[2], bool, bool); extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx)); extern void mips_expand_vec_minmax (rtx, rtx, rtx, diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md index 81820b13b11..05d58e9968f 100644 --- a/gcc/config/mips/mips-ps-3d.md +++ b/gcc/config/mips/mips-ps-3d.md @@ -164,19 +164,6 @@ [(set_attr "type" "fmove") (set_attr "mode" "SF")]) -(define_expand "vec_perm_constv2sf" - [(match_operand:V2SF 0 "register_operand" "") - (match_operand:V2SF 1 "register_operand" "") - (match_operand:V2SF 2 "register_operand" "") - (match_operand:V2SI 3 "" "")] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" -{ - if (mips_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - ;; Expanders for builtins. The instruction: ;; ;; P[UL][UL].PS <result>, <a>, <b> diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 8f2f6e09824..966e7ce0891 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -21379,34 +21379,32 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return false; } -/* Expand a vec_perm_const pattern. */ +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ -bool -mips_expand_vec_perm_const (rtx operands[4]) +static bool +mips_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { struct expand_vec_perm_d d; int i, nelt, which; unsigned char orig_perm[MAX_VECT_LEN]; - rtx sel; bool ok; - d.target = operands[0]; - d.op0 = operands[1]; - d.op1 = operands[2]; - sel = operands[3]; + d.target = target; + d.op0 = op0; + d.op1 = op1; - d.vmode = GET_MODE (d.target); - gcc_assert (VECTOR_MODE_P (d.vmode)); - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = false; + d.vmode = vmode; + gcc_assert (VECTOR_MODE_P (vmode)); + d.nelt = nelt = GET_MODE_NUNITS (vmode); + d.testing_p = !target; /* This is overly conservative, but ensures we don't get an uninitialized warning on ORIG_PERM. */ memset (orig_perm, 0, MAX_VECT_LEN); for (i = which = 0; i < nelt; ++i) { - rtx e = XVECEXP (sel, 0, i); - int ei = INTVAL (e) & (2 * nelt - 1); + int ei = sel[i] & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); orig_perm[i] = ei; } @@ -21419,7 +21417,7 @@ mips_expand_vec_perm_const (rtx operands[4]) case 3: d.one_vector_p = false; - if (!rtx_equal_p (d.op0, d.op1)) + if (d.testing_p || !rtx_equal_p (d.op0, d.op1)) break; /* FALLTHRU */ @@ -21436,6 +21434,19 @@ mips_expand_vec_perm_const (rtx operands[4]) break; } + if (d.testing_p) + { + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + ok = mips_expand_vec_perm_const_1 (&d); + end_sequence (); + return ok; + } + ok = mips_expand_vec_perm_const_1 (&d); /* If we were given a two-vector permutation which just happened to @@ -21447,8 +21458,8 @@ mips_expand_vec_perm_const (rtx operands[4]) the original permutation. */ if (!ok && which == 3) { - d.op0 = operands[1]; - d.op1 = operands[2]; + d.op0 = op0; + d.op1 = op1; d.one_vector_p = false; memcpy (d.perm, orig_perm, MAX_VECT_LEN); ok = mips_expand_vec_perm_const_1 (&d); @@ -21468,48 +21479,6 @@ mips_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, return 1; } -/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ - -static bool -mips_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = true; - - /* Categorize the set of elements in the selector. */ - for (i = which = 0; i < nelt; ++i) - { - unsigned char e = sel[i]; - d.perm[i] = e; - gcc_assert (e < 2 * nelt); - which |= (e < nelt ? 1 : 2); - } - - /* For all elements from second vector, fold the elements to first. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; - - /* Check whether the mask can be applied to the vector type. */ - d.one_vector_p = (which != 3); - - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); - if (!d.one_vector_p) - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); - - start_sequence (); - ret = mips_expand_vec_perm_const_1 (&d); - end_sequence (); - - return ret; -} - /* Expand an integral vector unpack operation. */ void @@ -22591,8 +22560,8 @@ mips_starting_frame_offset (void) #undef TARGET_PREPARE_PCH_SAVE #define TARGET_PREPARE_PCH_SAVE mips_prepare_pch_save -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST mips_vectorize_vec_perm_const #undef TARGET_SCHED_REASSOCIATION_WIDTH #define TARGET_SCHED_REASSOCIATION_WIDTH mips_sched_reassociation_width diff --git a/gcc/config/powerpcspe/altivec.md b/gcc/config/powerpcspe/altivec.md index 81373f581d1..2f85e369c3e 100644 --- a/gcc/config/powerpcspe/altivec.md +++ b/gcc/config/powerpcspe/altivec.md @@ -2080,19 +2080,6 @@ } }) -(define_expand "vec_perm_constv16qi" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "register_operand" "") - (match_operand:V16QI 3 "" "")] - "TARGET_ALTIVEC" -{ - if (altivec_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_insn "*altivec_vpermr_<mode>_internal" [(set (match_operand:VM 0 "register_operand" "=v,?wo") (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") diff --git a/gcc/config/powerpcspe/paired.md b/gcc/config/powerpcspe/paired.md index e12f07fc9b8..e950e465861 100644 --- a/gcc/config/powerpcspe/paired.md +++ b/gcc/config/powerpcspe/paired.md @@ -313,19 +313,6 @@ "ps_merge11 %0, %1, %2" [(set_attr "type" "fp")]) -(define_expand "vec_perm_constv2sf" - [(match_operand:V2SF 0 "gpc_reg_operand" "") - (match_operand:V2SF 1 "gpc_reg_operand" "") - (match_operand:V2SF 2 "gpc_reg_operand" "") - (match_operand:V2SI 3 "" "")] - "TARGET_PAIRED_FLOAT" -{ - if (rs6000_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_insn "paired_sum0" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") (vec_concat:V2SF (plus:SF (vec_select:SF diff --git a/gcc/config/powerpcspe/powerpcspe-protos.h b/gcc/config/powerpcspe/powerpcspe-protos.h index 78baeecad38..b9baae8a680 100644 --- a/gcc/config/powerpcspe/powerpcspe-protos.h +++ b/gcc/config/powerpcspe/powerpcspe-protos.h @@ -64,9 +64,7 @@ extern void rs6000_expand_vector_extract (rtx, rtx, rtx); extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx); extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode); extern void rs6000_split_v4si_init (rtx []); -extern bool altivec_expand_vec_perm_const (rtx op[4]); extern void altivec_expand_vec_perm_le (rtx op[4]); -extern bool rs6000_expand_vec_perm_const (rtx op[4]); extern void altivec_expand_lvx_be (rtx, rtx, machine_mode, unsigned); extern void altivec_expand_stvx_be (rtx, rtx, machine_mode, unsigned); extern void altivec_expand_stvex_be (rtx, rtx, machine_mode, unsigned); diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c index bf90cc5cd7d..9133125a3ea 100644 --- a/gcc/config/powerpcspe/powerpcspe.c +++ b/gcc/config/powerpcspe/powerpcspe.c @@ -1938,8 +1938,8 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost @@ -38313,6 +38313,9 @@ rs6000_emit_parity (rtx dst, rtx src) } /* Expand an Altivec constant permutation for little endian mode. + OP0 and OP1 are the input vectors and TARGET is the output vector. + SEL specifies the constant permutation vector. + There are two issues: First, the two input operands must be swapped so that together they form a double-wide array in LE order. Second, the vperm instruction has surprising behavior @@ -38354,22 +38357,18 @@ rs6000_emit_parity (rtx dst, rtx src) vr9 = 00000006 00000004 00000002 00000000. */ -void -altivec_expand_vec_perm_const_le (rtx operands[4]) +static void +altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { unsigned int i; rtx perm[16]; rtx constv, unspec; - rtx target = operands[0]; - rtx op0 = operands[1]; - rtx op1 = operands[2]; - rtx sel = operands[3]; /* Unpack and adjust the constant selector. */ for (i = 0; i < 16; ++i) { - rtx e = XVECEXP (sel, 0, i); - unsigned int elt = 31 - (INTVAL (e) & 31); + unsigned int elt = 31 - (sel[i] & 31); perm[i] = GEN_INT (elt); } @@ -38451,10 +38450,14 @@ altivec_expand_vec_perm_le (rtx operands[4]) } /* Expand an Altivec constant permutation. Return true if we match - an efficient implementation; false to fall back to VPERM. */ + an efficient implementation; false to fall back to VPERM. -bool -altivec_expand_vec_perm_const (rtx operands[4]) + OP0 and OP1 are the input vectors and TARGET is the output vector. + SEL specifies the constant permutation vector. */ + +static bool +altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { struct altivec_perm_insn { HOST_WIDE_INT mask; @@ -38498,19 +38501,13 @@ altivec_expand_vec_perm_const (rtx operands[4]) unsigned int i, j, elt, which; unsigned char perm[16]; - rtx target, op0, op1, sel, x; + rtx x; bool one_vec; - target = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - sel = operands[3]; - /* Unpack the constant selector. */ for (i = which = 0; i < 16; ++i) { - rtx e = XVECEXP (sel, 0, i); - elt = INTVAL (e) & 31; + elt = sel[i] & 31; which |= (elt < 16 ? 1 : 2); perm[i] = elt; } @@ -38666,7 +38663,7 @@ altivec_expand_vec_perm_const (rtx operands[4]) if (!BYTES_BIG_ENDIAN) { - altivec_expand_vec_perm_const_le (operands); + altivec_expand_vec_perm_const_le (target, op0, op1, sel); return true; } @@ -38726,60 +38723,54 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, return true; } -bool -rs6000_expand_vec_perm_const (rtx operands[4]) -{ - rtx target, op0, op1, sel; - unsigned char perm0, perm1; - - target = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - sel = operands[3]; - - /* Unpack the constant selector. */ - perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3; - perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3; - - return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1); -} - -/* Test whether a constant permutation is supported. */ +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ static bool -rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) +rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { + bool testing_p = !target; + /* AltiVec (and thus VSX) can handle arbitrary permutations. */ - if (TARGET_ALTIVEC) + if (TARGET_ALTIVEC && testing_p) return true; - /* Check for ps_merge* or evmerge* insns. */ - if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode) - || (TARGET_SPE && vmode == V2SImode)) + /* Check for ps_merge*, evmerge* or xxperm* insns. */ + if ((vmode == V2SFmode && TARGET_PAIRED_FLOAT) + || (vmode == V2SImode && TARGET_SPE) + || ((vmode == V2DFmode || vmode == V2DImode) + && VECTOR_MEM_VSX_P (vmode))) + { + if (testing_p) + { + op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); + op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); + } + if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1])) + return true; + } + + if (TARGET_ALTIVEC) { - rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); - rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); - return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]); + /* Force the target-independent code to lower to V16QImode. */ + if (vmode != V16QImode) + return false; + if (altivec_expand_vec_perm_const (target, op0, op1, sel)) + return true; } return false; } -/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */ +/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. + OP0 and OP1 are the input vectors and TARGET is the output vector. + PERM specifies the constant permutation vector. */ static void rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1, - machine_mode vmode, unsigned nelt, rtx perm[]) + machine_mode vmode, const vec_perm_builder &perm) { - machine_mode imode; - rtx x; - - imode = vmode; - if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT) - imode = mode_for_int_vector (vmode).require (); - - x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm)); - x = expand_vec_perm (vmode, op0, op1, x, target); + rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target); if (x != target) emit_move_insn (target, x); } @@ -38791,12 +38782,12 @@ rs6000_expand_extract_even (rtx target, rtx op0, rtx op1) { machine_mode vmode = GET_MODE (target); unsigned i, nelt = GET_MODE_NUNITS (vmode); - rtx perm[16]; + vec_perm_builder perm (nelt); for (i = 0; i < nelt; i++) - perm[i] = GEN_INT (i * 2); + perm.quick_push (i * 2); - rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); + rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); } /* Expand a vector interleave operation. */ @@ -38806,16 +38797,16 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) { machine_mode vmode = GET_MODE (target); unsigned i, high, nelt = GET_MODE_NUNITS (vmode); - rtx perm[16]; + vec_perm_builder perm (nelt); high = (highp ? 0 : nelt / 2); for (i = 0; i < nelt / 2; i++) { - perm[i * 2] = GEN_INT (i + high); - perm[i * 2 + 1] = GEN_INT (i + nelt + high); + perm.quick_push (i + high); + perm.quick_push (i + nelt + high); } - rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); + rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); } /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ diff --git a/gcc/config/powerpcspe/spe.md b/gcc/config/powerpcspe/spe.md index 2351152dc24..56acfdd86d0 100644 --- a/gcc/config/powerpcspe/spe.md +++ b/gcc/config/powerpcspe/spe.md @@ -511,19 +511,6 @@ [(set_attr "type" "vecsimple") (set_attr "length" "4")]) -(define_expand "vec_perm_constv2si" - [(match_operand:V2SI 0 "gpc_reg_operand" "") - (match_operand:V2SI 1 "gpc_reg_operand" "") - (match_operand:V2SI 2 "gpc_reg_operand" "") - (match_operand:V2SI 3 "" "")] - "TARGET_SPE" -{ - if (rs6000_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_expand "spe_evmergehi" [(match_operand:V2SI 0 "register_operand" "") (match_operand:V2SI 1 "register_operand" "") diff --git a/gcc/config/powerpcspe/vsx.md b/gcc/config/powerpcspe/vsx.md index b669764ce8f..794ff446b8a 100644 --- a/gcc/config/powerpcspe/vsx.md +++ b/gcc/config/powerpcspe/vsx.md @@ -2543,19 +2543,6 @@ } [(set_attr "type" "vecperm")]) -(define_expand "vec_perm_const<mode>" - [(match_operand:VSX_D 0 "vsx_register_operand" "") - (match_operand:VSX_D 1 "vsx_register_operand" "") - (match_operand:VSX_D 2 "vsx_register_operand" "") - (match_operand:V2DI 3 "" "")] - "VECTOR_MEM_VSX_P (<MODE>mode)" -{ - if (rs6000_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - ;; Extraction of a single element in a small integer vector. Until ISA 3.0, ;; none of the small types were allowed in a vector register, so we had to ;; extract to a DImode and either do a direct move or store. diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 7122f99bffd..5d10c829103 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2198,19 +2198,6 @@ } }) -(define_expand "vec_perm_constv16qi" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "register_operand" "") - (match_operand:V16QI 3 "" "")] - "TARGET_ALTIVEC" -{ - if (altivec_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_insn "*altivec_vpermr_<mode>_internal" [(set (match_operand:VM 0 "register_operand" "=v,?wo") (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") diff --git a/gcc/config/rs6000/paired.md b/gcc/config/rs6000/paired.md index b0aa329d7b8..584a791d431 100644 --- a/gcc/config/rs6000/paired.md +++ b/gcc/config/rs6000/paired.md @@ -313,19 +313,6 @@ "ps_merge11 %0, %1, %2" [(set_attr "type" "fp")]) -(define_expand "vec_perm_constv2sf" - [(match_operand:V2SF 0 "gpc_reg_operand" "") - (match_operand:V2SF 1 "gpc_reg_operand" "") - (match_operand:V2SF 2 "gpc_reg_operand" "") - (match_operand:V2SI 3 "" "")] - "TARGET_PAIRED_FLOAT" -{ - if (rs6000_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_insn "paired_sum0" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") (vec_concat:V2SF (plus:SF (vec_select:SF diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 9264aa2fd26..90107ea8821 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -63,9 +63,7 @@ extern void rs6000_expand_vector_extract (rtx, rtx, rtx); extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx); extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode); extern void rs6000_split_v4si_init (rtx []); -extern bool altivec_expand_vec_perm_const (rtx op[4]); extern void altivec_expand_vec_perm_le (rtx op[4]); -extern bool rs6000_expand_vec_perm_const (rtx op[4]); extern void altivec_expand_lvx_be (rtx, rtx, machine_mode, unsigned); extern void altivec_expand_stvx_be (rtx, rtx, machine_mode, unsigned); extern void altivec_expand_stvex_be (rtx, rtx, machine_mode, unsigned); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 42704d34d98..7d10b44fff9 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1910,8 +1910,8 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost @@ -35570,6 +35570,9 @@ rs6000_emit_parity (rtx dst, rtx src) } /* Expand an Altivec constant permutation for little endian mode. + OP0 and OP1 are the input vectors and TARGET is the output vector. + SEL specifies the constant permutation vector. + There are two issues: First, the two input operands must be swapped so that together they form a double-wide array in LE order. Second, the vperm instruction has surprising behavior @@ -35611,22 +35614,18 @@ rs6000_emit_parity (rtx dst, rtx src) vr9 = 00000006 00000004 00000002 00000000. */ -void -altivec_expand_vec_perm_const_le (rtx operands[4]) +static void +altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { unsigned int i; rtx perm[16]; rtx constv, unspec; - rtx target = operands[0]; - rtx op0 = operands[1]; - rtx op1 = operands[2]; - rtx sel = operands[3]; /* Unpack and adjust the constant selector. */ for (i = 0; i < 16; ++i) { - rtx e = XVECEXP (sel, 0, i); - unsigned int elt = 31 - (INTVAL (e) & 31); + unsigned int elt = 31 - (sel[i] & 31); perm[i] = GEN_INT (elt); } @@ -35708,10 +35707,14 @@ altivec_expand_vec_perm_le (rtx operands[4]) } /* Expand an Altivec constant permutation. Return true if we match - an efficient implementation; false to fall back to VPERM. */ + an efficient implementation; false to fall back to VPERM. -bool -altivec_expand_vec_perm_const (rtx operands[4]) + OP0 and OP1 are the input vectors and TARGET is the output vector. + SEL specifies the constant permutation vector. */ + +static bool +altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { struct altivec_perm_insn { HOST_WIDE_INT mask; @@ -35759,19 +35762,13 @@ altivec_expand_vec_perm_const (rtx operands[4]) unsigned int i, j, elt, which; unsigned char perm[16]; - rtx target, op0, op1, sel, x; + rtx x; bool one_vec; - target = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - sel = operands[3]; - /* Unpack the constant selector. */ for (i = which = 0; i < 16; ++i) { - rtx e = XVECEXP (sel, 0, i); - elt = INTVAL (e) & 31; + elt = sel[i] & 31; which |= (elt < 16 ? 1 : 2); perm[i] = elt; } @@ -35927,7 +35924,7 @@ altivec_expand_vec_perm_const (rtx operands[4]) if (!BYTES_BIG_ENDIAN) { - altivec_expand_vec_perm_const_le (operands); + altivec_expand_vec_perm_const_le (target, op0, op1, sel); return true; } @@ -35987,59 +35984,53 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, return true; } -bool -rs6000_expand_vec_perm_const (rtx operands[4]) -{ - rtx target, op0, op1, sel; - unsigned char perm0, perm1; - - target = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - sel = operands[3]; - - /* Unpack the constant selector. */ - perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3; - perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3; - - return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1); -} - -/* Test whether a constant permutation is supported. */ +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ static bool -rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) +rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { + bool testing_p = !target; + /* AltiVec (and thus VSX) can handle arbitrary permutations. */ - if (TARGET_ALTIVEC) + if (TARGET_ALTIVEC && testing_p) return true; - /* Check for ps_merge* or evmerge* insns. */ - if (TARGET_PAIRED_FLOAT && vmode == V2SFmode) + /* Check for ps_merge* or xxpermdi insns. */ + if ((vmode == V2SFmode && TARGET_PAIRED_FLOAT) + || ((vmode == V2DFmode || vmode == V2DImode) + && VECTOR_MEM_VSX_P (vmode))) + { + if (testing_p) + { + op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); + op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); + } + if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1])) + return true; + } + + if (TARGET_ALTIVEC) { - rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); - rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); - return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]); + /* Force the target-independent code to lower to V16QImode. */ + if (vmode != V16QImode) + return false; + if (altivec_expand_vec_perm_const (target, op0, op1, sel)) + return true; } return false; } -/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */ +/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. + OP0 and OP1 are the input vectors and TARGET is the output vector. + PERM specifies the constant permutation vector. */ static void rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1, - machine_mode vmode, unsigned nelt, rtx perm[]) + machine_mode vmode, const vec_perm_builder &perm) { - machine_mode imode; - rtx x; - - imode = vmode; - if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT) - imode = mode_for_int_vector (vmode).require (); - - x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm)); - x = expand_vec_perm (vmode, op0, op1, x, target); + rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target); if (x != target) emit_move_insn (target, x); } @@ -36051,12 +36042,12 @@ rs6000_expand_extract_even (rtx target, rtx op0, rtx op1) { machine_mode vmode = GET_MODE (target); unsigned i, nelt = GET_MODE_NUNITS (vmode); - rtx perm[16]; + vec_perm_builder perm (nelt); for (i = 0; i < nelt; i++) - perm[i] = GEN_INT (i * 2); + perm.quick_push (i * 2); - rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); + rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); } /* Expand a vector interleave operation. */ @@ -36066,16 +36057,16 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) { machine_mode vmode = GET_MODE (target); unsigned i, high, nelt = GET_MODE_NUNITS (vmode); - rtx perm[16]; + vec_perm_builder perm (nelt); high = (highp ? 0 : nelt / 2); for (i = 0; i < nelt / 2; i++) { - perm[i * 2] = GEN_INT (i + high); - perm[i * 2 + 1] = GEN_INT (i + nelt + high); + perm.quick_push (i + high); + perm.quick_push (i + nelt + high); } - rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); + rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); } /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index f6f2bd48363..7c6bb17a33b 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3189,19 +3189,6 @@ } [(set_attr "type" "vecperm")]) -(define_expand "vec_perm_const<mode>" - [(match_operand:VSX_D 0 "vsx_register_operand" "") - (match_operand:VSX_D 1 "vsx_register_operand" "") - (match_operand:VSX_D 2 "vsx_register_operand" "") - (match_operand:V2DI 3 "" "")] - "VECTOR_MEM_VSX_P (<MODE>mode)" -{ - if (rs6000_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - ;; Extraction of a single element in a small integer vector. Until ISA 3.0, ;; none of the small types were allowed in a vector register, so we had to ;; extract to a DImode and either do a direct move or store. diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 288264f0e8c..62bc492229e 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -688,6 +688,8 @@ static bool sparc_modes_tieable_p (machine_mode, machine_mode); static bool sparc_can_change_mode_class (machine_mode, machine_mode, reg_class_t); static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT); +static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, + const vec_perm_indices &); #ifdef SUBTARGET_ATTRIBUTE_TABLE /* Table of valid machine attributes. */ @@ -932,6 +934,9 @@ char sparc_hard_reg_printed[8]; #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const + struct gcc_target targetm = TARGET_INITIALIZER; /* Return the memory reference contained in X if any, zero otherwise. */ @@ -12813,6 +12818,32 @@ sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel) emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1)); } +/* Implement TARGET_VEC_PERM_CONST. */ + +static bool +sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) +{ + /* All permutes are supported. */ + if (!target) + return true; + + /* Force target-independent code to convert constant permutations on other + modes down to V8QI. Rely on this to avoid the complexity of the byte + order of the permutation. */ + if (vmode != V8QImode) + return false; + + unsigned int i, mask; + for (i = mask = 0; i < 8; ++i) + mask |= (sel[i] & 0xf) << (28 - i*4); + rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode)); + + emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx)); + emit_insn (gen_bshufflev8qi_vis (target, op0, op1)); + return true; +} + /* Implement TARGET_FRAME_POINTER_REQUIRED. */ static bool diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index d1af68034dd..f7dff435642 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -9327,28 +9327,6 @@ visl") (set_attr "subtype" "other") (set_attr "fptype" "double")]) -;; The rtl expanders will happily convert constant permutations on other -;; modes down to V8QI. Rely on this to avoid the complexity of the byte -;; order of the permutation. -(define_expand "vec_perm_constv8qi" - [(match_operand:V8QI 0 "register_operand" "") - (match_operand:V8QI 1 "register_operand" "") - (match_operand:V8QI 2 "register_operand" "") - (match_operand:V8QI 3 "" "")] - "TARGET_VIS2" -{ - unsigned int i, mask; - rtx sel = operands[3]; - - for (i = mask = 0; i < 8; ++i) - mask |= (INTVAL (XVECEXP (sel, 0, i)) & 0xf) << (28 - i*4); - sel = force_reg (SImode, gen_int_mode (mask, SImode)); - - emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx)); - emit_insn (gen_bshufflev8qi_vis (operands[0], operands[1], operands[2])); - DONE; -}) - ;; Unlike constant permutation, we can vastly simplify the compression of ;; the 64-bit selector input to the 32-bit %gsr value by knowing what the ;; width of the input is. diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index a131e5972df..85c180082ee 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -4996,20 +4996,8 @@ where @var{q} is a vector of @code{QImode} of the same width as @var{m}, the middle-end will lower the mode @var{m} @code{VEC_PERM_EXPR} to mode @var{q}. -@cindex @code{vec_perm_const@var{m}} instruction pattern -@item @samp{vec_perm_const@var{m}} -Like @samp{vec_perm} except that the permutation is a compile-time -constant. That is, operand 3, the @dfn{selector}, is a @code{CONST_VECTOR}. - -Some targets cannot perform a permutation with a variable selector, -but can efficiently perform a constant permutation. Further, the -target hook @code{vec_perm_ok} is queried to determine if the -specific constant permutation is available efficiently; the named -pattern is never expanded without @code{vec_perm_ok} returning true. - -There is no need for a target to supply both @samp{vec_perm@var{m}} -and @samp{vec_perm_const@var{m}} if the former can trivially implement -the operation with, say, the vector constant loaded into a register. +See also @code{TARGET_VECTORIZER_VEC_PERM_CONST}, which performs +the analogous operation for constant selectors. @cindex @code{push@var{m}1} instruction pattern @item @samp{push@var{m}1} diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 3a4229f3c6f..9793a0ed230 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5811,8 +5811,24 @@ correct for most targets. Return true if vector alignment is reachable (by peeling N iterations) for the given scalar type @var{type}. @var{is_packed} is false if the scalar access using @var{type} is known to be naturally aligned. @end deftypefn -@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (machine_mode, @var{vec_perm_indices}) -Return true if a vector created for @code{vec_perm_const} is valid. +@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST (machine_mode @var{mode}, rtx @var{output}, rtx @var{in0}, rtx @var{in1}, const vec_perm_indices @var{&sel}) +This hook is used to test whether the target can permute up to two +vectors of mode @var{mode} using the permutation vector @code{sel}, and +also to emit such a permutation. In the former case @var{in0}, @var{in1} +and @var{out} are all null. In the latter case @var{in0} and @var{in1} are +the source vectors and @var{out} is the destination vector; all three are +registers of mode @var{mode}. @var{in1} is the same as @var{in0} if +@var{sel} describes a permutation on one vector instead of two. + +Return true if the operation is possible, emitting instructions for it +if rtxes are provided. + +@cindex @code{vec_perm@var{m}} instruction pattern +If the hook returns false for a mode with multibyte elements, GCC will +try the equivalent byte operation. If that also fails, it will try forcing +the selector into a register and using the @var{vec_perm@var{mode}} +instruction pattern. There is no need for the hook to handle these two +implementation approaches itself. @end deftypefn @deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_CONVERSION (unsigned @var{code}, tree @var{dest_type}, tree @var{src_type}) diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index c1d2a8cdc1e..7bcfb37572a 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4081,7 +4081,7 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE -@hook TARGET_VECTORIZE_VEC_PERM_CONST_OK +@hook TARGET_VECTORIZE_VEC_PERM_CONST @hook TARGET_VECTORIZE_BUILTIN_CONVERSION diff --git a/gcc/expr.c b/gcc/expr.c index bae853c00cc..74a32802bc4 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -9515,28 +9515,24 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, goto binop; case VEC_PERM_EXPR: - expand_operands (treeop0, treeop1, target, &op0, &op1, EXPAND_NORMAL); - op2 = expand_normal (treeop2); - - /* Careful here: if the target doesn't support integral vector modes, - a constant selection vector could wind up smooshed into a normal - integral constant. */ - if (CONSTANT_P (op2) && !VECTOR_MODE_P (GET_MODE (op2))) - { - tree sel_type = TREE_TYPE (treeop2); - machine_mode vmode - = mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (sel_type)), - TYPE_VECTOR_SUBPARTS (sel_type)).require (); - gcc_assert (GET_MODE_CLASS (vmode) == MODE_VECTOR_INT); - op2 = simplify_subreg (vmode, op2, TYPE_MODE (sel_type), 0); - gcc_assert (op2 && GET_CODE (op2) == CONST_VECTOR); - } - else - gcc_assert (GET_MODE_CLASS (GET_MODE (op2)) == MODE_VECTOR_INT); - - temp = expand_vec_perm (mode, op0, op1, op2, target); - gcc_assert (temp); - return temp; + { + expand_operands (treeop0, treeop1, target, &op0, &op1, EXPAND_NORMAL); + vec_perm_builder sel; + if (TREE_CODE (treeop2) == VECTOR_CST + && tree_to_vec_perm_builder (&sel, treeop2)) + { + machine_mode sel_mode = TYPE_MODE (TREE_TYPE (treeop2)); + temp = expand_vec_perm_const (mode, op0, op1, sel, + sel_mode, target); + } + else + { + op2 = expand_normal (treeop2); + temp = expand_vec_perm_var (mode, op0, op1, op2, target); + } + gcc_assert (temp); + return temp; + } case DOT_PROD_EXPR: { diff --git a/gcc/fold-const.c b/gcc/fold-const.c index d9430ab473b..421bdd3c68e 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -82,6 +82,7 @@ along with GCC; see the file COPYING3. If not see #include "stringpool.h" #include "attribs.h" #include "tree-vector-builder.h" +#include "vec-perm-indices.h" /* Nonzero if we are folding constants inside an initializer; zero otherwise. */ diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c index 90925908d67..4060b4fc4db 100644 --- a/gcc/optabs-query.c +++ b/gcc/optabs-query.c @@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see #include "insn-config.h" #include "rtl.h" #include "recog.h" +#include "vec-perm-indices.h" struct target_optabs default_target_optabs; struct target_optabs *this_fn_optabs = &default_target_optabs; @@ -361,6 +362,17 @@ qimode_for_vec_perm (machine_mode mode) return opt_machine_mode (); } +/* Return true if selector SEL can be represented in the integer + equivalent of vector mode MODE. */ + +bool +selector_fits_mode_p (machine_mode mode, const vec_perm_indices &sel) +{ + unsigned HOST_WIDE_INT mask = GET_MODE_MASK (GET_MODE_INNER (mode)); + return (mask == HOST_WIDE_INT_M1U + || sel.all_in_range_p (0, mask + 1)); +} + /* Return true if VEC_PERM_EXPRs with variable selector operands can be expanded using SIMD extensions of the CPU. MODE is the mode of the vectors being permuted. */ @@ -415,7 +427,7 @@ can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel, return false; /* It's probably cheaper to test for the variable case first. */ - if (allow_variable_p) + if (allow_variable_p && selector_fits_mode_p (mode, sel)) { if (direct_optab_handler (vec_perm_optab, mode) != CODE_FOR_nothing) return true; @@ -424,20 +436,28 @@ can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel, related computing the QImode selector, since that happens at compile time. */ machine_mode qimode; - if (qimode_for_vec_perm (mode).exists (&qimode) - && direct_optab_handler (vec_perm_optab, qimode) != CODE_FOR_nothing) - return true; + if (qimode_for_vec_perm (mode).exists (&qimode)) + { + vec_perm_indices qimode_indices; + qimode_indices.new_expanded_vector (sel, GET_MODE_UNIT_SIZE (mode)); + if (selector_fits_mode_p (qimode, qimode_indices) + && (direct_optab_handler (vec_perm_optab, qimode) + != CODE_FOR_nothing)) + return true; + } } - if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing) + if (targetm.vectorize.vec_perm_const != NULL) { - if (targetm.vectorize.vec_perm_const_ok == NULL - || targetm.vectorize.vec_perm_const_ok (mode, sel)) + if (targetm.vectorize.vec_perm_const (mode, NULL_RTX, NULL_RTX, + NULL_RTX, sel)) return true; /* ??? For completeness, we ought to check the QImode version of vec_perm_const_optab. But all users of this implicit lowering - feature implement the variable vec_perm_optab. */ + feature implement the variable vec_perm_optab, and the ia64 + port specifically doesn't want us to lower V2SF operations + into integer operations. */ } return false; diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index 28f20e7d0fd..5b9d0721f36 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -175,6 +175,7 @@ enum insn_code can_float_p (machine_mode, machine_mode, int); enum insn_code can_fix_p (machine_mode, machine_mode, int, bool *); bool can_conditionally_move_p (machine_mode mode); opt_machine_mode qimode_for_vec_perm (machine_mode); +bool selector_fits_mode_p (machine_mode, const vec_perm_indices &); bool can_vec_perm_var_p (machine_mode); bool can_vec_perm_const_p (machine_mode, const vec_perm_indices &, bool = true); diff --git a/gcc/optabs.c b/gcc/optabs.c index 3549b4a8495..9099ba29143 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -5387,35 +5387,33 @@ vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode, return gen_rtx_fmt_ee (rcode, cmp_mode, ops[0].value, ops[1].value); } -/* Checks if vec_perm mask SEL is a constant equivalent to a shift of the first - vec_perm operand, assuming the second operand is a constant vector of zeroes. - Return the shift distance in bits if so, or NULL_RTX if the vec_perm is not a - shift. */ +/* Check if vec_perm mask SEL is a constant equivalent to a shift of + the first vec_perm operand, assuming the second operand is a constant + vector of zeros. Return the shift distance in bits if so, or NULL_RTX + if the vec_perm is not a shift. MODE is the mode of the value being + shifted. */ static rtx -shift_amt_for_vec_perm_mask (rtx sel) +shift_amt_for_vec_perm_mask (machine_mode mode, const vec_perm_indices &sel) { - unsigned int i, first, nelt = GET_MODE_NUNITS (GET_MODE (sel)); - unsigned int bitsize = GET_MODE_UNIT_BITSIZE (GET_MODE (sel)); + unsigned int i, first, nelt = GET_MODE_NUNITS (mode); + unsigned int bitsize = GET_MODE_UNIT_BITSIZE (mode); - if (GET_CODE (sel) != CONST_VECTOR) - return NULL_RTX; - - first = INTVAL (CONST_VECTOR_ELT (sel, 0)); + first = sel[0]; if (first >= nelt) return NULL_RTX; for (i = 1; i < nelt; i++) { - int idx = INTVAL (CONST_VECTOR_ELT (sel, i)); + int idx = sel[i]; unsigned int expected = i + first; /* Indices into the second vector are all equivalent. */ if (idx < 0 || (MIN (nelt, (unsigned) idx) != MIN (nelt, expected))) return NULL_RTX; } - return gen_int_shift_amount (GET_MODE (sel), first * bitsize); + return gen_int_shift_amount (mode, first * bitsize); } -/* A subroutine of expand_vec_perm for expanding one vec_perm insn. */ +/* A subroutine of expand_vec_perm_var for expanding one vec_perm insn. */ static rtx expand_vec_perm_1 (enum insn_code icode, rtx target, @@ -5453,38 +5451,32 @@ expand_vec_perm_1 (enum insn_code icode, rtx target, return NULL_RTX; } -static rtx expand_vec_perm_var (machine_mode, rtx, rtx, rtx, rtx); - /* Implement a permutation of vectors v0 and v1 using the permutation vector in SEL and return the result. Use TARGET to hold the result if nonnull and convenient. - MODE is the mode of the vectors being permuted (V0 and V1). */ + MODE is the mode of the vectors being permuted (V0 and V1). SEL_MODE + is the TYPE_MODE associated with SEL, or BLKmode if SEL isn't known + to have a particular mode. */ rtx -expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) +expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1, + const vec_perm_builder &sel, machine_mode sel_mode, + rtx target) { - enum insn_code icode; - machine_mode qimode; - unsigned int i, w, e, u; - rtx tmp, sel_qi = NULL; - rtvec vec; - - if (GET_CODE (sel) != CONST_VECTOR) - return expand_vec_perm_var (mode, v0, v1, sel, target); - - if (!target || GET_MODE (target) != mode) + if (!target || !register_operand (target, mode)) target = gen_reg_rtx (mode); - w = GET_MODE_SIZE (mode); - e = GET_MODE_NUNITS (mode); - u = GET_MODE_UNIT_SIZE (mode); - /* Set QIMODE to a different vector mode with byte elements. If no such mode, or if MODE already has byte elements, use VOIDmode. */ + machine_mode qimode; if (!qimode_for_vec_perm (mode).exists (&qimode)) qimode = VOIDmode; + rtx_insn *last = get_last_insn (); + + bool single_arg_p = rtx_equal_p (v0, v1); + /* See if this can be handled with a vec_shr. We only do this if the second vector is all zeroes. */ insn_code shift_code = optab_handler (vec_shr_optab, mode); @@ -5496,7 +5488,7 @@ expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) && (shift_code != CODE_FOR_nothing || shift_code_qi != CODE_FOR_nothing)) { - rtx shift_amt = shift_amt_for_vec_perm_mask (sel); + rtx shift_amt = shift_amt_for_vec_perm_mask (mode, sel); if (shift_amt) { struct expand_operand ops[3]; @@ -5520,65 +5512,81 @@ expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) } } - icode = direct_optab_handler (vec_perm_const_optab, mode); - if (icode != CODE_FOR_nothing) + if (targetm.vectorize.vec_perm_const != NULL) { - tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); - if (tmp) - return tmp; + v0 = force_reg (mode, v0); + if (single_arg_p) + v1 = v0; + else + v1 = force_reg (mode, v1); + + if (targetm.vectorize.vec_perm_const (mode, target, v0, v1, sel)) + return target; } /* Fall back to a constant byte-based permutation. */ + vec_perm_indices qimode_indices; + rtx target_qi = NULL_RTX, v0_qi = NULL_RTX, v1_qi = NULL_RTX; if (qimode != VOIDmode) { - vec = rtvec_alloc (w); - for (i = 0; i < e; ++i) - { - unsigned int j, this_e; + qimode_indices.new_expanded_vector (sel, GET_MODE_UNIT_SIZE (mode)); + target_qi = gen_reg_rtx (qimode); + v0_qi = gen_lowpart (qimode, v0); + v1_qi = gen_lowpart (qimode, v1); + if (targetm.vectorize.vec_perm_const != NULL + && targetm.vectorize.vec_perm_const (qimode, target_qi, v0_qi, + v1_qi, qimode_indices)) + return gen_lowpart (mode, target_qi); + } - this_e = INTVAL (CONST_VECTOR_ELT (sel, i)); - this_e &= 2 * e - 1; - this_e *= u; + /* Otherwise expand as a fully variable permuation. */ - for (j = 0; j < u; ++j) - RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j); - } - sel_qi = gen_rtx_CONST_VECTOR (qimode, vec); + /* The optabs are only defined for selectors with the same width + as the values being permuted. */ + machine_mode required_sel_mode; + if (!mode_for_int_vector (mode).exists (&required_sel_mode) + || !VECTOR_MODE_P (required_sel_mode)) + { + delete_insns_since (last); + return NULL_RTX; + } - icode = direct_optab_handler (vec_perm_const_optab, qimode); - if (icode != CODE_FOR_nothing) + /* We know that it is semantically valid to treat SEL as having SEL_MODE. + If that isn't the mode we want then we need to prove that using + REQUIRED_SEL_MODE is OK. */ + if (sel_mode != required_sel_mode) + { + if (!selector_fits_mode_p (required_sel_mode, sel)) { - tmp = gen_reg_rtx (qimode); - tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), - gen_lowpart (qimode, v1), sel_qi); - if (tmp) - return gen_lowpart (mode, tmp); + delete_insns_since (last); + return NULL_RTX; } + sel_mode = required_sel_mode; } - /* Otherwise expand as a fully variable permuation. */ - - icode = direct_optab_handler (vec_perm_optab, mode); + insn_code icode = direct_optab_handler (vec_perm_optab, mode); if (icode != CODE_FOR_nothing) { - rtx tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); + rtx sel_rtx = vec_perm_indices_to_rtx (sel_mode, sel); + rtx tmp = expand_vec_perm_1 (icode, target, v0, v1, sel_rtx); if (tmp) return tmp; } - if (qimode != VOIDmode) + if (qimode != VOIDmode + && selector_fits_mode_p (qimode, qimode_indices)) { icode = direct_optab_handler (vec_perm_optab, qimode); if (icode != CODE_FOR_nothing) { - rtx tmp = gen_reg_rtx (qimode); - tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), - gen_lowpart (qimode, v1), sel_qi); + rtx sel_qi = vec_perm_indices_to_rtx (qimode, qimode_indices); + rtx tmp = expand_vec_perm_1 (icode, target_qi, v0_qi, v1_qi, sel_qi); if (tmp) return gen_lowpart (mode, tmp); } } + delete_insns_since (last); return NULL_RTX; } @@ -5590,7 +5598,7 @@ expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) SEL must have the integer equivalent of MODE and is known to be unsuitable for permutes with a constant permutation vector. */ -static rtx +rtx expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) { enum insn_code icode; @@ -5633,17 +5641,16 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) gcc_assert (sel != NULL); /* Broadcast the low byte each element into each of its bytes. */ - vec = rtvec_alloc (w); + vec_perm_builder const_sel (w); for (i = 0; i < w; ++i) { int this_e = i / u * u; if (BYTES_BIG_ENDIAN) this_e += u - 1; - RTVEC_ELT (vec, i) = GEN_INT (this_e); + const_sel.quick_push (this_e); } - tmp = gen_rtx_CONST_VECTOR (qimode, vec); sel = gen_lowpart (qimode, sel); - sel = expand_vec_perm (qimode, sel, sel, tmp, NULL); + sel = expand_vec_perm_const (qimode, sel, sel, const_sel, qimode, NULL); gcc_assert (sel != NULL); /* Add the byte offset to each byte element. */ @@ -5838,9 +5845,8 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx op1, enum insn_code icode; int method, i, nunits; machine_mode wmode; - rtx m1, m2, perm; + rtx m1, m2; optab tab1, tab2; - rtvec v; method = can_mult_highpart_p (mode, uns_p); switch (method) @@ -5883,21 +5889,20 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx op1, expand_insn (optab_handler (tab2, mode), 3, eops); m2 = gen_lowpart (mode, eops[0].value); - v = rtvec_alloc (nunits); + auto_vec_perm_indices sel (nunits); if (method == 2) { for (i = 0; i < nunits; ++i) - RTVEC_ELT (v, i) = GEN_INT (!BYTES_BIG_ENDIAN + (i & ~1) - + ((i & 1) ? nunits : 0)); - perm = gen_rtx_CONST_VECTOR (mode, v); + sel.quick_push (!BYTES_BIG_ENDIAN + (i & ~1) + + ((i & 1) ? nunits : 0)); } else { - int base = BYTES_BIG_ENDIAN ? 0 : 1; - perm = gen_const_vec_series (mode, GEN_INT (base), GEN_INT (2)); + for (i = 0; i < nunits; ++i) + sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); } - return expand_vec_perm (mode, m1, m2, perm, target); + return expand_vec_perm_const (mode, m1, m2, sel, BLKmode, target); } /* Helper function to find the MODE_CC set in a sync_compare_and_swap diff --git a/gcc/optabs.def b/gcc/optabs.def index dc587f821f8..7fbc30ca6d6 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -302,7 +302,6 @@ OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a") OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a") OPTAB_D (vec_pack_ufix_trunc_optab, "vec_pack_ufix_trunc_$a") OPTAB_D (vec_pack_usat_optab, "vec_pack_usat_$a") -OPTAB_D (vec_perm_const_optab, "vec_perm_const$a") OPTAB_D (vec_perm_optab, "vec_perm$a") OPTAB_D (vec_realign_load_optab, "vec_realign_load_$a") OPTAB_D (vec_set_optab, "vec_set$a") diff --git a/gcc/optabs.h b/gcc/optabs.h index 388f828428c..e968132dce0 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see #include "optabs-query.h" #include "optabs-libfuncs.h" +#include "vec-perm-indices.h" /* Generate code for a widening multiply. */ extern rtx expand_widening_mult (machine_mode, rtx, rtx, rtx, int, optab); @@ -301,7 +302,9 @@ extern int have_insn_for (enum rtx_code, machine_mode); extern rtx_insn *gen_cond_trap (enum rtx_code, rtx, rtx, rtx); /* Generate code for VEC_PERM_EXPR. */ -extern rtx expand_vec_perm (machine_mode, rtx, rtx, rtx, rtx); +extern rtx expand_vec_perm_var (machine_mode, rtx, rtx, rtx, rtx); +extern rtx expand_vec_perm_const (machine_mode, rtx, rtx, + const vec_perm_builder &, machine_mode, rtx); /* Generate code for vector comparison. */ extern rtx expand_vec_cmp_expr (tree, tree, rtx); diff --git a/gcc/target.def b/gcc/target.def index 27bfcfc070f..e9eacc891ed 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1841,12 +1841,27 @@ DEFHOOK bool, (const_tree type, bool is_packed), default_builtin_vector_alignment_reachable) -/* Return true if a vector created for vec_perm_const is valid. - A NULL indicates that all constants are valid permutations. */ DEFHOOK -(vec_perm_const_ok, - "Return true if a vector created for @code{vec_perm_const} is valid.", - bool, (machine_mode, vec_perm_indices), +(vec_perm_const, + "This hook is used to test whether the target can permute up to two\n\ +vectors of mode @var{mode} using the permutation vector @code{sel}, and\n\ +also to emit such a permutation. In the former case @var{in0}, @var{in1}\n\ +and @var{out} are all null. In the latter case @var{in0} and @var{in1} are\n\ +the source vectors and @var{out} is the destination vector; all three are\n\ +registers of mode @var{mode}. @var{in1} is the same as @var{in0} if\n\ +@var{sel} describes a permutation on one vector instead of two.\n\ +\n\ +Return true if the operation is possible, emitting instructions for it\n\ +if rtxes are provided.\n\ +\n\ +@cindex @code{vec_perm@var{m}} instruction pattern\n\ +If the hook returns false for a mode with multibyte elements, GCC will\n\ +try the equivalent byte operation. If that also fails, it will try forcing\n\ +the selector into a register and using the @var{vec_perm@var{mode}}\n\ +instruction pattern. There is no need for the hook to handle these two\n\ +implementation approaches itself.", + bool, (machine_mode mode, rtx output, rtx in0, rtx in1, + const vec_perm_indices &sel), NULL) /* Return true if the target supports misaligned store/load of a diff --git a/gcc/target.h b/gcc/target.h index 9696b4d61e1..429285937b0 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -193,13 +193,7 @@ enum vect_cost_model_location { vect_epilogue = 2 }; -/* The type to use for vector permutes with a constant permute vector. - Each entry is an index into the concatenated input vectors. */ -typedef vec<unsigned short> vec_perm_indices; - -/* Same, but can be used to construct local permute vectors that are - automatically freed. */ -typedef auto_vec<unsigned short, 32> auto_vec_perm_indices; +class vec_perm_indices; /* The target structure. This holds all the backend hooks. */ #define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME; diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c index 783ceb689b8..d6e3d986255 100644 --- a/gcc/tree-ssa-forwprop.c +++ b/gcc/tree-ssa-forwprop.c @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see #include "cfganal.h" #include "optabs-tree.h" #include "tree-vector-builder.h" +#include "vec-perm-indices.h" /* This pass propagates the RHS of assignment statements into use sites of the LHS of the assignment. It's basically a specialized diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index bbbeef6f816..367b08541fa 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "tree-cfg.h" #include "tree-hash-traits.h" +#include "vec-perm-indices.h" /* Return true if load- or store-lanes optab OPTAB is implemented for COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index 7e78df8b5dd..7daf9aebd97 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see #include "gimplify.h" #include "tree-cfg.h" #include "tree-vector-builder.h" +#include "vec-perm-indices.h" static void expand_vector_operations_1 (gimple_stmt_iterator *); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 02f6f7f2c76..81060e03f34 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-if-conv.h" #include "internal-fn.h" #include "tree-vector-builder.h" +#include "vec-perm-indices.h" /* Loop Vectorization Pass. diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 0f6005338df..9240fcd5d0b 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "gimple-walk.h" #include "dbgcnt.h" #include "tree-vector-builder.h" +#include "vec-perm-indices.h" /* Recursively free the memory allocated for the SLP tree rooted at NODE. */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 85167d3123a..71929d9ff1c 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "internal-fn.h" #include "tree-vector-builder.h" +#include "vec-perm-indices.h" /* For lang_hooks.types.type_for_mode. */ #include "langhooks.h" diff --git a/gcc/vec-perm-indices.c b/gcc/vec-perm-indices.c new file mode 100644 index 00000000000..20299fab963 --- /dev/null +++ b/gcc/vec-perm-indices.c @@ -0,0 +1,93 @@ +/* A representation of vector permutation indices. + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "vec-perm-indices.h" +#include "tree.h" +#include "backend.h" +#include "rtl.h" +#include "memmodel.h" +#include "emit-rtl.h" + +/* Switch to a new permutation vector that selects the same input elements + as ORIG, but with each element split into FACTOR pieces. For example, + if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is + { 2, 3, 4, 5, 0, 1, 6, 7 }. */ + +void +vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig, + unsigned int factor) +{ + truncate (0); + reserve (orig.length () * factor); + for (unsigned int i = 0; i < orig.length (); ++i) + { + element_type base = orig[i] * factor; + for (unsigned int j = 0; j < factor; ++j) + quick_push (base + j); + } +} + +/* Return true if all elements of the permutation vector are in the range + [START, START + SIZE). */ + +bool +vec_perm_indices::all_in_range_p (element_type start, element_type size) const +{ + for (unsigned int i = 0; i < length (); ++i) + if ((*this)[i] < start || ((*this)[i] - start) >= size) + return false; + return true; +} + +/* Try to read the contents of VECTOR_CST CST as a constant permutation + vector. Return true and add the elements to BUILDER on success, + otherwise return false without modifying BUILDER. */ + +bool +tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst) +{ + unsigned int nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)); + for (unsigned int i = 0; i < nelts; ++i) + if (!tree_fits_shwi_p (vector_cst_elt (cst, i))) + return false; + + builder->reserve (nelts); + for (unsigned int i = 0; i < nelts; ++i) + builder->quick_push (tree_to_shwi (vector_cst_elt (cst, i)) + & (2 * nelts - 1)); + return true; +} + +/* Return a CONST_VECTOR of mode MODE that contains the elements of + INDICES. */ + +rtx +vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices) +{ + gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + && GET_MODE_NUNITS (mode) == indices.length ()); + unsigned int nelts = indices.length (); + rtvec v = rtvec_alloc (nelts); + for (unsigned int i = 0; i < nelts; ++i) + RTVEC_ELT (v, i) = gen_int_mode (indices[i], GET_MODE_INNER (mode)); + return gen_rtx_CONST_VECTOR (mode, v); +} diff --git a/gcc/vec-perm-indices.h b/gcc/vec-perm-indices.h new file mode 100644 index 00000000000..6892bb75d14 --- /dev/null +++ b/gcc/vec-perm-indices.h @@ -0,0 +1,49 @@ +/* A representation of vector permutation indices. + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_VEC_PERN_INDICES_H +#define GCC_VEC_PERN_INDICES_H 1 + +/* This class represents a constant permutation vector, such as that used + as the final operand to a VEC_PERM_EXPR. */ +class vec_perm_indices : public auto_vec<unsigned short, 32> +{ + typedef unsigned short element_type; + typedef auto_vec<element_type, 32> parent_type; + +public: + vec_perm_indices () {} + vec_perm_indices (unsigned int nunits) : parent_type (nunits) {} + + void new_expanded_vector (const vec_perm_indices &, unsigned int); + + bool all_in_range_p (element_type, element_type) const; + +private: + vec_perm_indices (const vec_perm_indices &); +}; + +/* Temporary. */ +typedef vec_perm_indices vec_perm_builder; +typedef vec_perm_indices auto_vec_perm_indices; + +bool tree_to_vec_perm_builder (vec_perm_builder *, tree); +rtx vec_perm_indices_to_rtx (machine_mode, const vec_perm_indices &); + +#endif |