diff options
author | Xionghu Luo <luoxhu@linux.ibm.com> | 2020-05-10 21:06:20 -0500 |
---|---|---|
committer | Xionghu Luo <luoxhu@linux.ibm.com> | 2020-05-10 21:12:46 -0500 |
commit | 0447929f11e6a3e1b076841712b90a8b6bc7d33a (patch) | |
tree | ffc4b06c5fda9efeb839ecdea5cea6986b92169d /gcc/testsuite | |
parent | e7ae6d32c7df009973616d62829a431a6d206ccc (diff) |
Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
Use determine_value_range to get value range info for fold convert expressions
with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
wrapping overflow inner type. i.e.:
(long unsigned int)((unsigned int)n * 10 + 1)
=>
(long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
With this patch for affine combination, load/store motion could detect
more address refs independency and promote some memory expressions to
registers within loop.
PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
Bootstrap and regression tested pass on Power8-LE.
gcc/ChangeLog
2020-05-11 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR tree-optimization/83403
* tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
determine_value_range, Add fold conversion of MULT_EXPR, fix the
previous PLUS_EXPR.
gcc/testsuite/ChangeLog
2020-05-11 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR tree-optimization/83403
* gcc.dg/tree-ssa/pr83403-1.c: New test.
* gcc.dg/tree-ssa/pr83403-2.c: New test.
* gcc.dg/tree-ssa/pr83403.h: New header.
Diffstat (limited to 'gcc/testsuite')
-rw-r--r-- | gcc/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/pr83403.h | 30 |
4 files changed, 53 insertions, 0 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 791c62aeab8..aa11ca3fd99 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2020-05-11 Xiong Hu Luo <luoxhu@linux.ibm.com> + + PR tree-optimization/83403 + * gcc.dg/tree-ssa/pr83403-1.c: New test. + * gcc.dg/tree-ssa/pr83403-2.c: New test. + * gcc.dg/tree-ssa/pr83403.h: New header. + 2020-05-10 Harald Anlauf <anlauf@gmx.de> PR fortran/93499 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c new file mode 100644 index 00000000000..748375b03af --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ + +#define TYPE unsigned int + +#include "pr83403.h" + +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c new file mode 100644 index 00000000000..ca2e6bbd61c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ + +#define TYPE int + +#include "pr83403.h" + +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h new file mode 100644 index 00000000000..0da8a835b5f --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h @@ -0,0 +1,30 @@ +__attribute__ ((noinline)) void +calculate (const double *__restrict__ A, const double *__restrict__ B, + double *__restrict__ C) +{ + TYPE m = 0; + TYPE n = 0; + TYPE k = 0; + + A = (const double *) __builtin_assume_aligned (A, 16); + B = (const double *) __builtin_assume_aligned (B, 16); + C = (double *) __builtin_assume_aligned (C, 16); + + for (n = 0; n < 9; n++) + { + for (m = 0; m < 10; m++) + { + C[(n * 10) + m] = 0.0; + } + + for (k = 0; k < 17; k++) + { +#pragma simd + for (m = 0; m < 10; m++) + { + C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k]; + } + } + } +} + |