summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>2018-05-14 16:29:13 +0000
committerktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>2018-05-14 16:29:13 +0000
commit34f8174d1dd0f89a6dc8d852cc634f598583d6d7 (patch)
treeeca5aa957a2478f1838484727fc83df4398d9efb
parent600d3f1ae24b1798df3cea77b209399b9d6bfc95 (diff)
[AArch64] Add combine pattern to fuse AESE/AESMC instructions
When the AESE,AESD and AESMC, AESMC instructions are generated through the appropriate arm_neon.h intrinsics we really want to keep them together when the AESE feeds into an AESMC and fusion is supported by the target CPU. We have macro-fusion hooks and scheduling model forwarding paths defined to facilitate that. It is, however, not always enough. This patch adds another mechanism for doing that. When we can detect during combine that the required dependency is exists (AESE -> AESMC, AESD -> AESIMC) just keep them together with a combine pattern throughout the rest of compilation. We won't ever want to split them. The testcases generate 4 AESE(D) instructions in a block followed by 4 AES(I)MC instructions that consume the corresponding results and it also adds a bunch of computations in-between so that the AESE and AESMC instructions are not trivially back-to-back, thus exercising the compiler's ability to bring them together. With this patch all 4 pairs are fused whereas before a couple of fusions would be missed due to intervening arithmetic and memory instructions. * config/aarch64/aarch64-simd.md (*aarch64_crypto_aese_fused): New pattern. (aarch64_crypto_aesd_fused): Likewise. * gcc.target/aarch64/crypto-fuse-1.c: New test. * gcc.target/aarch64/crypto-fuse-2.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@260234 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/aarch64/aarch64-simd.md38
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c45
-rw-r--r--gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c45
5 files changed, 139 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 553c21e13705..0ad13682cfd6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2018-05-14 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * config/aarch64/aarch64-simd.md (*aarch64_crypto_aese_fused):
+ New pattern.
+ (aarch64_crypto_aesd_fused): Likewise.
+
2018-05-14 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.md (mov<mode>): Remove '*' in alternatives.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1154fc3d58de..9cfd4d30515a 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5821,6 +5821,44 @@
(const_string "yes")])]
)
+;; When AESE/AESMC fusion is enabled we really want to keep the two together
+;; and enforce the register dependency without scheduling or register
+;; allocation messing up the order or introducing moves inbetween.
+;; Mash the two together during combine.
+
+(define_insn "*aarch64_crypto_aese_fused"
+ [(set (match_operand:V16QI 0 "register_operand" "=&w")
+ (unspec:V16QI
+ [(unspec:V16QI
+ [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
+ ] UNSPEC_AESMC))]
+ "TARGET_SIMD && TARGET_AES
+ && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+ "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
+ [(set_attr "type" "crypto_aese")
+ (set_attr "length" "8")]
+)
+
+;; When AESD/AESIMC fusion is enabled we really want to keep the two together
+;; and enforce the register dependency without scheduling or register
+;; allocation messing up the order or introducing moves inbetween.
+;; Mash the two together during combine.
+
+(define_insn "*aarch64_crypto_aesd_fused"
+ [(set (match_operand:V16QI 0 "register_operand" "=&w")
+ (unspec:V16QI
+ [(unspec:V16QI
+ [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
+ ] UNSPEC_AESIMC))]
+ "TARGET_SIMD && TARGET_AES
+ && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
+ "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
+ [(set_attr "type" "crypto_aese")
+ (set_attr "length" "8")]
+)
+
;; sha1
(define_insn "aarch64_crypto_sha1hsi"
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 259d578eb0a3..09d97e0364c0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2018-05-14 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * gcc.target/aarch64/crypto-fuse-1.c: New test.
+ * gcc.target/aarch64/crypto-fuse-2.c: Likewise.
+
2018-05-14 Wilco Dijkstra <wdijkstr@arm.com>
* gcc.target/aarch64/vmov_n_1.c: Update test.
diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c b/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c
new file mode 100644
index 000000000000..d8adc89466c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
+
+#include <arm_neon.h>
+
+#define AESE(r, v, key) (r = vaeseq_u8 ((v), (key)));
+#define AESMC(r, i) (r = vaesmcq_u8 (i))
+
+uint8x16_t dummy;
+uint8x16_t a;
+uint8x16_t b;
+uint8x16_t c;
+uint8x16_t d;
+uint8x16_t e;
+
+void
+foo (void)
+{
+ AESE (a, a, e);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESE (b, b, e);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESE (c, c, e);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESE (d, d, e);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+
+ AESMC (a, a);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESMC (b, b);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESMC (c, c);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESMC (d, d);
+}
+
+/* { dg-final { scan-assembler-times "crypto_aese_fused" 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c b/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c
new file mode 100644
index 000000000000..b12df2d3ee4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
+
+#include <arm_neon.h>
+
+#define AESE(r, v, key) (r = vaesdq_u8 ((v), (key)));
+#define AESMC(r, i) (r = vaesimcq_u8 (i))
+
+uint8x16_t dummy;
+uint8x16_t a;
+uint8x16_t b;
+uint8x16_t c;
+uint8x16_t d;
+uint8x16_t e;
+
+void
+foo (void)
+{
+ AESE (a, a, e);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESE (b, b, e);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESE (c, c, e);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESE (d, d, e);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+
+ AESMC (a, a);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESMC (b, b);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESMC (c, c);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESMC (d, d);
+}
+
+/* { dg-final { scan-assembler-times "crypto_aesd_fused" 4 } } */
+