summaryrefslogtreecommitdiff
path: root/gcc/simplify-rtx.c
diff options
context:
space:
mode:
authorJames Greenhalgh <james.greenhalgh@arm.com>2017-12-21 16:39:43 +0000
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>2017-12-21 16:39:43 +0000
commit6b6d8f38f7b3bd8a2f4e4dbeb3014ba1339afa89 (patch)
treea26bfcca0fadf5099804a4c2887409b687957d8e /gcc/simplify-rtx.c
parentc587c0a9c8cbe80a64461fe9cab0a23d3ff35211 (diff)
[patch AArch64] Do not perform a vector splat for vector initialisation if it is not useful
Our current vector initialisation code will first duplicate the first element to both lanes, then overwrite the top lane with a new value. This duplication can be clunky and wasteful. Better would be to simply use the fact that we will always be overwriting the remaining bits, and simply move the first element to the corrcet place (implicitly zeroing all other bits). We also need a new pattern in simplify-rtx.c:simplify_ternary_operation , to ensure we can still simplify: (vec_merge:OUTER (vec_duplicate:OUTER x:INNER) (subreg:OUTER y:INNER 0) (const_int N)) To: (vec_concat:OUTER x:INNER y:INNER) or (vec_concat y x) --- gcc/ * config/aarch64/aarch64.c (aarch64_expand_vector_init): Modify code generation for cases where splatting a value is not useful. * simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge across a vec_duplicate and a paradoxical subreg forming a vector mode to a vec_concat. gcc/testsuite/ * gcc.target/aarch64/vect-slp-dup.c: New. From-SVN: r255946
Diffstat (limited to 'gcc/simplify-rtx.c')
-rw-r--r--gcc/simplify-rtx.c51
1 files changed, 51 insertions, 0 deletions
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 4f9796c7c84..6b163f91699 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5860,6 +5860,57 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
}
+ /* Replace:
+
+ (vec_merge:outer (vec_duplicate:outer x:inner)
+ (subreg:outer y:inner 0)
+ (const_int N))
+
+ with (vec_concat:outer x:inner y:inner) if N == 1,
+ or (vec_concat:outer y:inner x:inner) if N == 2.
+ We assume that degenrate cases (N == 0 or N == 3), which
+ represent taking all elements from either input, are handled
+ elsewhere.
+
+ Implicitly, this means we have a paradoxical subreg, but such
+ a check is cheap, so make it anyway.
+
+ Only applies for vectors of two elements. */
+
+ if ((GET_CODE (op0) == VEC_DUPLICATE
+ || GET_CODE (op1) == VEC_DUPLICATE)
+ && GET_MODE (op0) == GET_MODE (op1)
+ && GET_MODE_NUNITS (GET_MODE (op0)) == 2
+ && GET_MODE_NUNITS (GET_MODE (op1)) == 2
+ && IN_RANGE (sel, 1, 2))
+ {
+ rtx newop0 = op0, newop1 = op1;
+
+ /* Canonicalize locally such that the VEC_DUPLICATE is always
+ the first operand. */
+ if (GET_CODE (newop1) == VEC_DUPLICATE)
+ {
+ std::swap (newop0, newop1);
+ /* If we swap the operand order, we also need to swap
+ the selector mask. */
+ sel = sel == 1 ? 2 : 1;
+ }
+
+ if (GET_CODE (newop1) == SUBREG
+ && paradoxical_subreg_p (newop1)
+ && subreg_lowpart_p (newop1)
+ && GET_MODE (SUBREG_REG (newop1))
+ == GET_MODE (XEXP (newop0, 0)))
+ {
+ newop0 = XEXP (newop0, 0);
+ newop1 = SUBREG_REG (newop1);
+ if (sel == 2)
+ std::swap (newop0, newop1);
+ return simplify_gen_binary (VEC_CONCAT, mode,
+ newop0, newop1);
+ }
+ }
+
/* Replace (vec_merge (vec_duplicate x) (vec_duplicate y)
(const_int n))
with (vec_concat x y) or (vec_concat y x) depending on value