summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2020-05-11 11:16:31 +0200
committerUros Bizjak <ubizjak@gmail.com>2020-05-11 11:16:31 +0200
commit7c355156aa20eaec7401d7c66f6a6cfbe597abc2 (patch)
treea9f8173b3d26182fb35cf752dfb7a22637550fb9
parentdbeaa7ab81a37acadc9af6e7990332604252de20 (diff)
i386: Vectorize basic V2SFmode operations [PR94913]
Enable V2SFmode vectorization and vectorize V2SFmode PLUS, MINUS, MULT, MIN and MAX operations using XMM registers. To avoid unwanted secondary effects (e.g. exceptions), load values to XMM registers using MOVQ that clears high bits of the XMM register outside V2SFmode. The compiler now vectorizes e.g.: float r[2], a[2], b[2]; void test_plus (void) { for (int i = 0; i < 2; i++) r[i] = a[i] + b[i]; } to: movq a(%rip), %xmm0 movq b(%rip), %xmm1 addps %xmm1, %xmm0 movlps %xmm0, r(%rip) ret gcc/ChangeLog: PR target/95046 * config/i386/i386.c (ix86_vector_mode_supported_p): Vectorize 3dNOW! vector modes for TARGET_MMX_WITH_SSE. * config/i386/mmx.md (*mov<mode>_internal): Do not set mode of alternative 13 to V2SF for TARGET_MMX_WITH_SSE. (mmx_addv2sf3): Change operand predicates from nonimmediate_operand to register_mmxmem_operand. (addv2sf3): New expander. (*mmx_addv2sf3): Add SSE/AVX alternatives. Change operand predicates from nonimmediate_operand to register_mmxmem_operand. Enable instruction pattern for TARGET_MMX_WITH_SSE. (mmx_subv2sf3): Change operand predicate from nonimmediate_operand to register_mmxmem_operand. (mmx_subrv2sf3): Ditto. (subv2sf3): New expander. (*mmx_subv2sf3): Add SSE/AVX alternatives. Change operand predicates from nonimmediate_operand to register_mmxmem_operand. Enable instruction pattern for TARGET_MMX_WITH_SSE. (mmx_mulv2sf3): Change operand predicates from nonimmediate_operand to register_mmxmem_operand. (mulv2sf3): New expander. (*mmx_mulv2sf3): Add SSE/AVX alternatives. Change operand predicates from nonimmediate_operand to register_mmxmem_operand. Enable instruction pattern for TARGET_MMX_WITH_SSE. (mmx_<code>v2sf3): Change operand predicates from nonimmediate_operand to register_mmxmem_operand. (<code>v2sf3): New expander. (*mmx_<code>v2sf3): Add SSE/AVX alternatives. Change operand predicates from nonimmediate_operand to register_mmxmem_operand. Enable instruction pattern for TARGET_MMX_WITH_SSE. (mmx_ieee_<ieee_maxmin>v2sf3): Ditto. testsuite/ChangeLog: PR target/95046 * gcc.target/i386/pr95046-1.c: New test.
-rw-r--r--gcc/ChangeLog38
-rw-r--r--gcc/config/i386/i386.c6
-rw-r--r--gcc/config/i386/mmx.md179
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.target/i386/pr95046-1.c51
5 files changed, 229 insertions, 52 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 05aa9edc968..0a98c7441e1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,41 @@
+2020-05-11 Uroš Bizjak <ubizjak@gmail.com>
+
+ PR target/95046
+ * config/i386/i386.c (ix86_vector_mode_supported_p):
+ Vectorize 3dNOW! vector modes for TARGET_MMX_WITH_SSE.
+ * config/i386/mmx.md (*mov<mode>_internal): Do not set
+ mode of alternative 13 to V2SF for TARGET_MMX_WITH_SSE.
+
+ (mmx_addv2sf3): Change operand predicates from
+ nonimmediate_operand to register_mmxmem_operand.
+ (addv2sf3): New expander.
+ (*mmx_addv2sf3): Add SSE/AVX alternatives. Change operand
+ predicates from nonimmediate_operand to register_mmxmem_operand.
+ Enable instruction pattern for TARGET_MMX_WITH_SSE.
+
+ (mmx_subv2sf3): Change operand predicate from
+ nonimmediate_operand to register_mmxmem_operand.
+ (mmx_subrv2sf3): Ditto.
+ (subv2sf3): New expander.
+ (*mmx_subv2sf3): Add SSE/AVX alternatives. Change operand
+ predicates from nonimmediate_operand to register_mmxmem_operand.
+ Enable instruction pattern for TARGET_MMX_WITH_SSE.
+
+ (mmx_mulv2sf3): Change operand predicates from
+ nonimmediate_operand to register_mmxmem_operand.
+ (mulv2sf3): New expander.
+ (*mmx_mulv2sf3): Add SSE/AVX alternatives. Change operand
+ predicates from nonimmediate_operand to register_mmxmem_operand.
+ Enable instruction pattern for TARGET_MMX_WITH_SSE.
+
+ (mmx_<code>v2sf3): Change operand predicates from
+ nonimmediate_operand to register_mmxmem_operand.
+ (<code>v2sf3): New expander.
+ (*mmx_<code>v2sf3): Add SSE/AVX alternatives. Change operand
+ predicates from nonimmediate_operand to register_mmxmem_operand.
+ Enable instruction pattern for TARGET_MMX_WITH_SSE.
+ (mmx_ieee_<ieee_maxmin>v2sf3): Ditto.
+
2020-05-11 Martin Liska <mliska@suse.cz>
PR c/95040
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b40f443ba8a..d1c0e354162 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21007,9 +21007,11 @@ ix86_vector_mode_supported_p (machine_mode mode)
return true;
if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
return true;
- if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
+ if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && VALID_MMX_REG_MODE (mode))
return true;
- if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
+ if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+ && VALID_MMX_REG_MODE_3DNOW (mode))
return true;
return false;
}
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 472f90f9bc1..d3e0004d3a0 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -175,7 +175,13 @@
]
(const_string "TI"))
- (and (eq_attr "alternative" "13,14")
+ (and (eq_attr "alternative" "13")
+ (ior (and (match_test "<MODE>mode == V2SFmode")
+ (not (match_test "TARGET_MMX_WITH_SSE")))
+ (not (match_test "TARGET_SSE2"))))
+ (const_string "V2SF")
+
+ (and (eq_attr "alternative" "14")
(ior (match_test "<MODE>mode == V2SFmode")
(not (match_test "TARGET_SSE2"))))
(const_string "V2SF")
@@ -235,67 +241,112 @@
(define_expand "mmx_addv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(plus:V2SF
- (match_operand:V2SF 1 "nonimmediate_operand")
- (match_operand:V2SF 2 "nonimmediate_operand")))]
+ (match_operand:V2SF 1 "register_mmxmem_operand")
+ (match_operand:V2SF 2 "register_mmxmem_operand")))]
"TARGET_3DNOW"
"ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
+(define_expand "addv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (plus:V2SF
+ (match_operand:V2SF 1 "register_operand")
+ (match_operand:V2SF 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
+
(define_insn "*mmx_addv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
- (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
- "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
- "pfadd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
+ (plus:V2SF
+ (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
+ "@
+ pfadd\t{%2, %0|%0, %2}
+ addps\t{%2, %0|%0, %2}
+ vaddps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx")
+ (set_attr "mmx_isa" "native,*,*")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
(define_expand "mmx_subv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(minus:V2SF (match_operand:V2SF 1 "register_operand")
- (match_operand:V2SF 2 "nonimmediate_operand")))]
+ (match_operand:V2SF 2 "register_mmxmem_operand")))]
"TARGET_3DNOW")
(define_expand "mmx_subrv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(minus:V2SF (match_operand:V2SF 2 "register_operand")
- (match_operand:V2SF 1 "nonimmediate_operand")))]
+ (match_operand:V2SF 1 "register_mmxmem_operand")))]
"TARGET_3DNOW")
+(define_expand "subv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (minus:V2SF
+ (match_operand:V2SF 1 "register_operand")
+ (match_operand:V2SF 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (MINUS, V2SFmode, operands);")
+
(define_insn "*mmx_subv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y,y")
- (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
- "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ [(set (match_operand:V2SF 0 "register_operand" "=y,y,x,Yv")
+ (minus:V2SF
+ (match_operand:V2SF 1 "register_mmxmem_operand" "0,ym,0,Yv")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,0,x,Yv")))]
+ "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
pfsub\t{%2, %0|%0, %2}
- pfsubr\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ pfsubr\t{%1, %0|%0, %1}
+ subps\t{%2, %0|%0, %2}
+ vsubps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,sse2_noavx,avx")
+ (set_attr "mmx_isa" "native,native,*,*")
+ (set_attr "type" "mmxadd,mmxadd,sseadd,sseadd")
+ (set_attr "prefix_extra" "1,1,*,*")
+ (set_attr "mode" "V2SF,V2SF,V4SF,V4SF")])
(define_expand "mmx_mulv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
- (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
- (match_operand:V2SF 2 "nonimmediate_operand")))]
+ (mult:V2SF (match_operand:V2SF 1 "register_mmxmem_operand")
+ (match_operand:V2SF 2 "register_mmxmem_operand")))]
"TARGET_3DNOW"
"ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
+(define_expand "mulv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (mult:V2SF
+ (match_operand:V2SF 1 "register_operand")
+ (match_operand:V2SF 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
+
(define_insn "*mmx_mulv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
- (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
- "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
- "pfmul\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
+ (mult:V2SF
+ (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
+ "@
+ pfmul\t{%2, %0|%0, %2}
+ mulps\t{%2, %0|%0, %2}
+ vmulps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx")
+ (set_attr "mmx_isa" "native,*,*")
+ (set_attr "type" "mmxmul,ssemul,ssemul")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "btver2_decode" "*,direct,double")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
(define_expand "mmx_<code>v2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(smaxmin:V2SF
- (match_operand:V2SF 1 "nonimmediate_operand")
- (match_operand:V2SF 2 "nonimmediate_operand")))]
+ (match_operand:V2SF 1 "register_mmxmem_operand")
+ (match_operand:V2SF 2 "register_mmxmem_operand")))]
"TARGET_3DNOW"
{
if (!flag_finite_math_only || flag_signed_zeros)
@@ -309,21 +360,45 @@
ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
})
+(define_expand "<code>v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (smaxmin:V2SF
+ (match_operand:V2SF 1 "register_operand")
+ (match_operand:V2SF 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+{
+ if (!flag_finite_math_only || flag_signed_zeros)
+ {
+ emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3
+ (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ else
+ ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
+})
+
;; These versions of the min/max patterns are intentionally ignorant of
;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
;; are undefined in this condition, we're certain this is correct.
(define_insn "*mmx_<code>v2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
(smaxmin:V2SF
- (match_operand:V2SF 1 "nonimmediate_operand" "%0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
- "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
- "pf<maxmin_float>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
+ "@
+ pf<maxmin_float>\t{%2, %0|%0, %2}
+ <maxmin_float>ps\t{%2, %0|%0, %2}
+ v<maxmin_float>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx")
+ (set_attr "mmx_isa" "native,*,*")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "btver2_sse_attr" "*,maxmin,maxmin")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
;; These versions of the min/max patterns implement exactly the operations
;; min = (op1 < op2 ? op1 : op2)
@@ -332,16 +407,22 @@
;; presence of -0.0 and NaN.
(define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
(unspec:V2SF
- [(match_operand:V2SF 1 "register_operand" "0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ [(match_operand:V2SF 1 "register_operand" "0,0,Yv")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")]
IEEE_MAXMIN))]
- "TARGET_3DNOW"
- "pf<ieee_maxmin>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ "TARGET_3DNOW || TARGET_MMX_WITH_SSE"
+ "@
+ pf<ieee_maxmin>\t{%2, %0|%0, %2}
+ <ieee_maxmin>ps\t{%2, %0|%0, %2}
+ v<ieee_maxmin>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx")
+ (set_attr "mmx_isa" "native,*,*")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "btver2_sse_attr" "*,maxmin,maxmin")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
(define_insn "mmx_rcpv2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=y")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ea329c740a3..c35e084b366 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,10 @@
+2020-05-11 Uroš Bizjak <ubizjak@gmail.com>
+
+ PR target/95046
+ * gcc.target/i386/pr95046-1.c: New test.
+
2020-05-11 Mark Eggleston <markeggleston@gcc.gnu.org>
-
+
PR fortran/59107
* gfortran.dg/pr59107.f90: New test.
diff --git a/gcc/testsuite/gcc.target/i386/pr95046-1.c b/gcc/testsuite/gcc.target/i386/pr95046-1.c
new file mode 100644
index 00000000000..f93d9e1a507
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95046-1.c
@@ -0,0 +1,51 @@
+/* PR target/94942 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O3 -ffast-math -msse2" } */
+
+
+float r[2], a[2], b[2];
+
+void
+test_plus (void)
+{
+ for (int i = 0; i < 2; i++)
+ r[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-assembler "addps" } } */
+
+void
+test_minus (void)
+{
+ for (int i = 0; i < 2; i++)
+ r[i] = a[i] - b[i];
+}
+
+/* { dg-final { scan-assembler "subps" } } */
+
+void
+test_mult (void)
+{
+ for (int i = 0; i < 2; i++)
+ r[i] = a[i] * b[i];
+}
+
+/* { dg-final { scan-assembler "mulps" } } */
+
+void
+test_min (void)
+{
+ for (int i = 0; i < 2; i++)
+ r[i] = a[i] < b[i] ? a[i] : b[i];
+}
+
+/* { dg-final { scan-assembler "minps" } } */
+
+void
+test_max (void)
+{
+ for (int i = 0; i < 2; i++)
+ r[i] = a[i] > b[i] ? a[i] : b[i];
+}
+
+/* { dg-final { scan-assembler "maxps" } } */