summaryrefslogtreecommitdiff
path: root/gcc/optabs.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-02 18:27:05 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-02 18:27:05 +0000
commitd980067b1e9394b2b8482b3fc888ac5e8e3ebe59 (patch)
treeebde117c5d31c80df9b86ffd24c029b7dfc3f5db /gcc/optabs.c
parent736d0f28783f12fa042892bc186866dd5101088f (diff)
Use explicit encodings for simple permutes
This patch makes users of vec_perm_builders use the compressed encoding where possible. This means that they work with variable-length vectors. 2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * optabs.c (expand_vec_perm_var): Use an explicit encoding for the broadcast of the low byte. (expand_mult_highpart): Use an explicit encoding for the permutes. * optabs-query.c (can_mult_highpart_p): Likewise. * tree-vect-loop.c (calc_vec_perm_mask_for_shift): Likewise. * tree-vect-stmts.c (perm_mask_for_reverse): Likewise. (vectorizable_bswap): Likewise. * tree-vect-data-refs.c (vect_grouped_store_supported): Use an explicit encoding for the power-of-2 permutes. (vect_permute_store_chain): Likewise. (vect_grouped_load_supported): Likewise. (vect_permute_load_chain): Likewise. From-SVN: r256097
Diffstat (limited to 'gcc/optabs.c')
-rw-r--r--gcc/optabs.c27
1 files changed, 15 insertions, 12 deletions
diff --git a/gcc/optabs.c b/gcc/optabs.c
index db13a25b15a..e9ce42fb9ee 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5646,15 +5646,14 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
NULL, 0, OPTAB_DIRECT);
gcc_assert (sel != NULL);
- /* Broadcast the low byte each element into each of its bytes. */
- vec_perm_builder const_sel (w, w, 1);
- for (i = 0; i < w; ++i)
- {
- int this_e = i / u * u;
- if (BYTES_BIG_ENDIAN)
- this_e += u - 1;
- const_sel.quick_push (this_e);
- }
+ /* Broadcast the low byte each element into each of its bytes.
+ The encoding has U interleaved stepped patterns, one for each
+ byte of an element. */
+ vec_perm_builder const_sel (w, u, 3);
+ unsigned int low_byte_in_u = BYTES_BIG_ENDIAN ? u - 1 : 0;
+ for (i = 0; i < 3; ++i)
+ for (unsigned int j = 0; j < u; ++j)
+ const_sel.quick_push (i * u + low_byte_in_u);
sel = gen_lowpart (qimode, sel);
sel = expand_vec_perm_const (qimode, sel, sel, const_sel, qimode, NULL);
gcc_assert (sel != NULL);
@@ -5895,16 +5894,20 @@ expand_mult_highpart (machine_mode mode, rtx op0, rtx op1,
expand_insn (optab_handler (tab2, mode), 3, eops);
m2 = gen_lowpart (mode, eops[0].value);
- vec_perm_builder sel (nunits, nunits, 1);
+ vec_perm_builder sel;
if (method == 2)
{
- for (i = 0; i < nunits; ++i)
+ /* The encoding has 2 interleaved stepped patterns. */
+ sel.new_vector (nunits, 2, 3);
+ for (i = 0; i < 6; ++i)
sel.quick_push (!BYTES_BIG_ENDIAN + (i & ~1)
+ ((i & 1) ? nunits : 0));
}
else
{
- for (i = 0; i < nunits; ++i)
+ /* The encoding has a single interleaved stepped pattern. */
+ sel.new_vector (nunits, 1, 3);
+ for (i = 0; i < 3; ++i)
sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
}