diff options
author | Thomas Koenig <tkoenig@gcc.gnu.org> | 2017-06-24 07:07:56 +0000 |
---|---|---|
committer | Thomas Koenig <tkoenig@gcc.gnu.org> | 2017-06-24 07:07:56 +0000 |
commit | e56e3fda6092548f3cd5336d131b412be986b1e6 (patch) | |
tree | 2c28733ee096cbdb641e42b511b18f7c28205330 /libgfortran/generated/cshift1_8.c | |
parent | ef5b7d19b688bb92178f41f504133f0ee51bffc6 (diff) |
re PR fortran/52473 (CSHIFT slow - inline it?)
2017-06-24 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/52473
* Makefile.am: Add i_cshift1a_c. Add rules to generate files
from cshift1a.m4.
* Makefile.in: Regenerated.
* m4/cshift1a.m4: New file.
* m4/cshift.m4 (cshift1): Split up inner loop by removing
condition. Use memcpy where possible. Call helper functions
based on dtype.
* libgfortran.h: Add prototypes for cshift1_16_c10,
cshift1_16_c16, cshift1_16_c4, cshift1_16_c8, cshift1_16_i1,
cshift1_16_i16, cshift1_16_i2, cshift1_16_i4, cshift1_16_i8,
cshift1_16_r10, cshift1_16_r16, cshift1_16_r4, cshift1_16_r8,
cshift1_4_c10, cshift1_4_c16, cshift1_4_c4, cshift1_4_c8,
cshift1_4_i1, cshift1_4_i16, cshift1_4_i2, cshift1_4_i4,
cshift1_4_i8, cshift1_4_r10, cshift1_4_r16, cshift1_4_r4,
cshift1_4_r8, cshift1_8_c10, cshift1_8_c16, cshift1_8_c4,
cshift1_8_c8, cshift1_8_i1, cshift1_8_i16, cshift1_8_i2,
cshift1_8_i4, cshift1_8_i8, cshift1_8_r10, cshift1_8_r16,
cshift1_8_r4 and cshift1_8_r8.
* generated/cshift1_16_c10.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_c16.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_c4.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_c8.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_i1.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_i16.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_i2.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_i4.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_i8.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_r10.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_r16.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_r4.c: New file, generated from cshift1a.m4.
* generated/cshift1_16_r8.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_c10.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_c16.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_c4.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_c8.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_i1.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_i16.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_i2.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_i4.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_i8.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_r10.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_r16.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_r4.c: New file, generated from cshift1a.m4.
* generated/cshift1_4_r8.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_c10.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_c16.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_c4.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_c8.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_i1.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_i16.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_i2.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_i4.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_i8.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_r10.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_r16.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_r4.c: New file, generated from cshift1a.m4.
* generated/cshift1_8_r8.c: New file, generated from cshift1a.m4.
2017-06-24 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/52473
* gfortran.dg/cshift_2.f90: New test.
From-SVN: r249620
Diffstat (limited to 'libgfortran/generated/cshift1_8.c')
-rw-r--r-- | libgfortran/generated/cshift1_8.c | 136 |
1 files changed, 124 insertions, 12 deletions
diff --git a/libgfortran/generated/cshift1_8.c b/libgfortran/generated/cshift1_8.c index bd4f2c3a94c..1acfd3895ae 100644 --- a/libgfortran/generated/cshift1_8.c +++ b/libgfortran/generated/cshift1_8.c @@ -61,12 +61,13 @@ cshift1 (gfc_array_char * const restrict ret, GFC_INTEGER_8 sh; index_type arraysize; index_type size; - + index_type type_size; + if (pwhich) which = *pwhich - 1; else which = 0; - + if (which < 0 || (which + 1) > GFC_DESCRIPTOR_RANK (array)) runtime_error ("Argument 'DIM' is out of range in call to 'CSHIFT'"); @@ -111,6 +112,98 @@ cshift1 (gfc_array_char * const restrict ret, if (arraysize == 0) return; + /* See if we should dispatch to a helper function. */ + + type_size = GFC_DTYPE_TYPE_SIZE (array); + + switch (type_size) + { + case GFC_DTYPE_LOGICAL_1: + case GFC_DTYPE_INTEGER_1: + case GFC_DTYPE_DERIVED_1: + cshift1_8_i1 ((gfc_array_i1 *)ret, (gfc_array_i1 *) array, + h, pwhich); + return; + + case GFC_DTYPE_LOGICAL_2: + case GFC_DTYPE_INTEGER_2: + cshift1_8_i2 ((gfc_array_i2 *)ret, (gfc_array_i2 *) array, + h, pwhich); + return; + + case GFC_DTYPE_LOGICAL_4: + case GFC_DTYPE_INTEGER_4: + cshift1_8_i4 ((gfc_array_i4 *)ret, (gfc_array_i4 *) array, + h, pwhich); + return; + + case GFC_DTYPE_LOGICAL_8: + case GFC_DTYPE_INTEGER_8: + cshift1_8_i8 ((gfc_array_i8 *)ret, (gfc_array_i8 *) array, + h, pwhich); + return; + +#if defined (HAVE_INTEGER_16) + case GFC_DTYPE_LOGICAL_16: + case GFC_DTYPE_INTEGER_16: + cshift1_8_i16 ((gfc_array_i16 *)ret, (gfc_array_i16 *) array, + h, pwhich); + return; +#endif + + case GFC_DTYPE_REAL_4: + cshift1_8_r4 ((gfc_array_r4 *)ret, (gfc_array_r4 *) array, + h, pwhich); + return; + + case GFC_DTYPE_REAL_8: + cshift1_8_r8 ((gfc_array_r8 *)ret, (gfc_array_r8 *) array, + h, pwhich); + return; + +#if defined (HAVE_REAL_10) + case GFC_DTYPE_REAL_10: + cshift1_8_r10 ((gfc_array_r10 *)ret, (gfc_array_r10 *) array, + h, pwhich); + return; +#endif + +#if defined (HAVE_REAL_16) + case GFC_DTYPE_REAL_16: + cshift1_8_r16 ((gfc_array_r16 *)ret, (gfc_array_r16 *) array, + h, pwhich); + return; +#endif + + case GFC_DTYPE_COMPLEX_4: + cshift1_8_c4 ((gfc_array_c4 *)ret, (gfc_array_c4 *) array, + h, pwhich); + return; + + case GFC_DTYPE_COMPLEX_8: + cshift1_8_c8 ((gfc_array_c8 *)ret, (gfc_array_c8 *) array, + h, pwhich); + return; + +#if defined (HAVE_COMPLEX_10) + case GFC_DTYPE_COMPLEX_10: + cshift1_8_c10 ((gfc_array_c10 *)ret, (gfc_array_c10 *) array, + h, pwhich); + return; +#endif + +#if defined (HAVE_COMPLEX_16) + case GFC_DTYPE_COMPLEX_16: + cshift1_8_c16 ((gfc_array_c16 *)ret, (gfc_array_c16 *) array, + h, pwhich); + return; +#endif + + default: + break; + + } + extent[0] = 1; count[0] = 0; n = 0; @@ -162,22 +255,41 @@ cshift1 (gfc_array_char * const restrict ret, { /* Do the shift for this dimension. */ sh = *hptr; - sh = (div (sh, len)).rem; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ if (sh < 0) sh += len; + if (unlikely (sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } src = &sptr[sh * soffset]; dest = rptr; - - for (n = 0; n < len; n++) + if (soffset == size && roffset == size) + { + size_t len1 = sh * size; + size_t len2 = (len - sh) * size; + memcpy (rptr, sptr + len1, len2); + memcpy (rptr + len2, sptr, len1); + } + else { - memcpy (dest, src, size); - dest += roffset; - if (n == len - sh - 1) - src = sptr; - else - src += soffset; - } + for (n = 0; n < len - sh; n++) + { + memcpy (dest, src, size); + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + memcpy (dest, src, size); + dest += roffset; + src += soffset; + } + } /* Advance to the next section. */ rptr += rstride0; |