diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-01-02 18:27:05 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-01-02 18:27:05 +0000 |
commit | d980067b1e9394b2b8482b3fc888ac5e8e3ebe59 (patch) | |
tree | ebde117c5d31c80df9b86ffd24c029b7dfc3f5db /gcc/optabs.c | |
parent | 736d0f28783f12fa042892bc186866dd5101088f (diff) |
Use explicit encodings for simple permutes
This patch makes users of vec_perm_builders use the compressed encoding
where possible. This means that they work with variable-length vectors.
2018-01-02 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* optabs.c (expand_vec_perm_var): Use an explicit encoding for
the broadcast of the low byte.
(expand_mult_highpart): Use an explicit encoding for the permutes.
* optabs-query.c (can_mult_highpart_p): Likewise.
* tree-vect-loop.c (calc_vec_perm_mask_for_shift): Likewise.
* tree-vect-stmts.c (perm_mask_for_reverse): Likewise.
(vectorizable_bswap): Likewise.
* tree-vect-data-refs.c (vect_grouped_store_supported): Use an
explicit encoding for the power-of-2 permutes.
(vect_permute_store_chain): Likewise.
(vect_grouped_load_supported): Likewise.
(vect_permute_load_chain): Likewise.
From-SVN: r256097
Diffstat (limited to 'gcc/optabs.c')
-rw-r--r-- | gcc/optabs.c | 27 |
1 files changed, 15 insertions, 12 deletions
diff --git a/gcc/optabs.c b/gcc/optabs.c index db13a25b15a..e9ce42fb9ee 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -5646,15 +5646,14 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) NULL, 0, OPTAB_DIRECT); gcc_assert (sel != NULL); - /* Broadcast the low byte each element into each of its bytes. */ - vec_perm_builder const_sel (w, w, 1); - for (i = 0; i < w; ++i) - { - int this_e = i / u * u; - if (BYTES_BIG_ENDIAN) - this_e += u - 1; - const_sel.quick_push (this_e); - } + /* Broadcast the low byte each element into each of its bytes. + The encoding has U interleaved stepped patterns, one for each + byte of an element. */ + vec_perm_builder const_sel (w, u, 3); + unsigned int low_byte_in_u = BYTES_BIG_ENDIAN ? u - 1 : 0; + for (i = 0; i < 3; ++i) + for (unsigned int j = 0; j < u; ++j) + const_sel.quick_push (i * u + low_byte_in_u); sel = gen_lowpart (qimode, sel); sel = expand_vec_perm_const (qimode, sel, sel, const_sel, qimode, NULL); gcc_assert (sel != NULL); @@ -5895,16 +5894,20 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx op1, expand_insn (optab_handler (tab2, mode), 3, eops); m2 = gen_lowpart (mode, eops[0].value); - vec_perm_builder sel (nunits, nunits, 1); + vec_perm_builder sel; if (method == 2) { - for (i = 0; i < nunits; ++i) + /* The encoding has 2 interleaved stepped patterns. */ + sel.new_vector (nunits, 2, 3); + for (i = 0; i < 6; ++i) sel.quick_push (!BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0)); } else { - for (i = 0; i < nunits; ++i) + /* The encoding has a single interleaved stepped pattern. */ + sel.new_vector (nunits, 1, 3); + for (i = 0; i < 3; ++i) sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); } |