summaryrefslogtreecommitdiff
path: root/gcc/tree-ssa-sccvn.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-02-24 12:56:39 +0100
committerJakub Jelinek <jakub@redhat.com>2020-02-24 12:56:39 +0100
commit7f5617b00445dcc861a498a4cecc8aaa59e05b8c (patch)
tree5d7841580c0462c271e5028ddf10cbcdbe38a5b4 /gcc/tree-ssa-sccvn.c
parent2bd8c3ff3511df8781dd9f3777efab20572d29ab (diff)
sccvn: Handle bitfields in push_partial_def [PR93582]
The following patch adds support for bitfields to push_partial_def. Previously pd.offset and pd.size were counted in bytes and maxsizei in bits, now everything is counted in bits. Not really sure how much of the further code can be outlined and moved, e.g. the full def and partial def code doesn't have pretty much anything in common (the partial defs case basically have some load bit range and a set of store bit ranges that at least partially overlap and we need to handle all the different cases, like negative pd.offset or non-negative, little vs. bit endian, size so small that we need to preserve original bits on both sides of the byte, size that fits or is too large. Perhaps the storing of some value into a middle of existing buffer (i.e. what push_partial_def now does in the loop) could, but the candidate for sharing would be most likely store-merging rather than the other spots in sccvn, and I think it is better not to touch store-merging at this stage. Yes, I've thought about trying to do everything in place, but the code is quite hard to understand and get right already now and if we tried to do the optimize on the fly, it would need more special cases and would for gcov coverage need more testcases to cover it. Most of the time the sizes will be small. Furthermore, for bitfields native_encode_expr stores actually number of bytes in the mode and not say actual bitsize rounded up to bytes, so it wouldn't be just a matter of saving/restoring bytes at the start and end, but we might need even 7 further bytes e.g. for __int128 bitfields. Perhaps we could have just a fast path for the case where everything is byte aligned and (for integral types the mode bitsize is equal to the size too)? 2020-02-24 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/93582 * tree-ssa-sccvn.c (vn_walk_cb_data::push_partial_def): Consider pd.offset and pd.size to be counted in bits rather than bytes, add support for maxsizei that is not a multiple of BITS_PER_UNIT and handle bitfield stores and loads. (vn_reference_lookup_3): Don't call ranges_known_overlap_p with uncomparable quantities - bytes vs. bits. Allow push_partial_def on offsets/sizes that aren't multiple of BITS_PER_UNIT and adjust pd.offset/pd.size to be counted in bits rather than bytes. Formatting fix. Rename shadowed len variable to buflen. * gcc.dg/tree-ssa/pr93582-4.c: New test. * gcc.dg/tree-ssa/pr93582-5.c: New test. * gcc.dg/tree-ssa/pr93582-6.c: New test. * gcc.dg/tree-ssa/pr93582-7.c: New test. * gcc.dg/tree-ssa/pr93582-8.c: New test.
Diffstat (limited to 'gcc/tree-ssa-sccvn.c')
-rw-r--r--gcc/tree-ssa-sccvn.c239
1 files changed, 184 insertions, 55 deletions
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
index 30518abfddd..a3fba2878f8 100644
--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
@@ -1774,7 +1774,11 @@ vn_walk_cb_data::push_partial_def (const pd_data &pd,
const HOST_WIDE_INT bufsize = 64;
/* We're using a fixed buffer for encoding so fail early if the object
we want to interpret is bigger. */
- if (maxsizei > bufsize * BITS_PER_UNIT)
+ if (maxsizei > bufsize * BITS_PER_UNIT
+ || CHAR_BIT != 8
+ || BITS_PER_UNIT != 8
+ /* Not prepared to handle PDP endian. */
+ || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
return (void *)-1;
bool pd_constant_p = (TREE_CODE (pd.rhs) == CONSTRUCTOR
@@ -1854,41 +1858,39 @@ vn_walk_cb_data::push_partial_def (const pd_data &pd,
/* Now we have merged newr into the range tree. When we have covered
[offseti, sizei] then the tree will contain exactly one node which has
the desired properties and it will be 'r'. */
- if (!known_subrange_p (0, maxsizei / BITS_PER_UNIT, r->offset, r->size))
+ if (!known_subrange_p (0, maxsizei, r->offset, r->size))
/* Continue looking for partial defs. */
return NULL;
/* Now simply native encode all partial defs in reverse order. */
unsigned ndefs = partial_defs.length ();
/* We support up to 512-bit values (for V8DFmode). */
- unsigned char buffer[bufsize];
+ unsigned char buffer[bufsize + 1];
+ unsigned char this_buffer[bufsize + 1];
int len;
+ memset (buffer, 0, bufsize + 1);
+ unsigned needed_len = ROUND_UP (maxsizei, BITS_PER_UNIT) / BITS_PER_UNIT;
while (!partial_defs.is_empty ())
{
pd_data pd = partial_defs.pop ();
- gcc_checking_assert (pd.offset < bufsize);
+ unsigned int amnt;
if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
- /* Empty CONSTRUCTOR. */
- memset (buffer + MAX (0, pd.offset),
- 0, MIN (bufsize - MAX (0, pd.offset),
- pd.size + MIN (0, pd.offset)));
+ {
+ /* Empty CONSTRUCTOR. */
+ if (pd.size >= needed_len * BITS_PER_UNIT)
+ len = needed_len;
+ else
+ len = ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT;
+ memset (this_buffer, 0, len);
+ }
else
{
- unsigned pad = 0;
- if (BYTES_BIG_ENDIAN
- && is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (pd.rhs))))
- {
- /* On big-endian the padding is at the 'front' so just skip
- the initial bytes. */
- fixed_size_mode mode
- = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (pd.rhs)));
- pad = GET_MODE_SIZE (mode) - pd.size;
- }
- len = native_encode_expr (pd.rhs, buffer + MAX (0, pd.offset),
- bufsize - MAX (0, pd.offset),
- MAX (0, -pd.offset) + pad);
- if (len <= 0 || len < (pd.size - MAX (0, -pd.offset)))
+ len = native_encode_expr (pd.rhs, this_buffer, bufsize,
+ MAX (0, -pd.offset) / BITS_PER_UNIT);
+ if (len <= 0
+ || len < (ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT
+ - MAX (0, -pd.offset) / BITS_PER_UNIT))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Failed to encode %u "
@@ -1896,6 +1898,125 @@ vn_walk_cb_data::push_partial_def (const pd_data &pd,
return (void *)-1;
}
}
+
+ unsigned char *p = buffer;
+ HOST_WIDE_INT size = pd.size;
+ if (pd.offset < 0)
+ size -= ROUND_DOWN (-pd.offset, BITS_PER_UNIT);
+ this_buffer[len] = 0;
+ if (BYTES_BIG_ENDIAN)
+ {
+ /* LSB of this_buffer[len - 1] byte should be at
+ pd.offset + pd.size - 1 bits in buffer. */
+ amnt = ((unsigned HOST_WIDE_INT) pd.offset
+ + pd.size) % BITS_PER_UNIT;
+ if (amnt)
+ shift_bytes_in_array_right (this_buffer, len + 1, amnt);
+ unsigned char *q = this_buffer;
+ unsigned int off = 0;
+ if (pd.offset >= 0)
+ {
+ unsigned int msk;
+ off = pd.offset / BITS_PER_UNIT;
+ gcc_assert (off < needed_len);
+ p = buffer + off;
+ if (size <= amnt)
+ {
+ msk = ((1 << size) - 1) << (BITS_PER_UNIT - amnt);
+ *p = (*p & ~msk) | (this_buffer[len] & msk);
+ size = 0;
+ }
+ else
+ {
+ if (TREE_CODE (pd.rhs) != CONSTRUCTOR)
+ q = (this_buffer + len
+ - (ROUND_UP (size - amnt, BITS_PER_UNIT)
+ / BITS_PER_UNIT));
+ if (pd.offset % BITS_PER_UNIT)
+ {
+ msk = -1U << (BITS_PER_UNIT
+ - (pd.offset % BITS_PER_UNIT));
+ *p = (*p & msk) | (*q & ~msk);
+ p++;
+ q++;
+ off++;
+ size -= BITS_PER_UNIT - (pd.offset % BITS_PER_UNIT);
+ gcc_assert (size >= 0);
+ }
+ }
+ }
+ else if (TREE_CODE (pd.rhs) != CONSTRUCTOR)
+ {
+ q = (this_buffer + len
+ - (ROUND_UP (size - amnt, BITS_PER_UNIT)
+ / BITS_PER_UNIT));
+ if (pd.offset % BITS_PER_UNIT)
+ {
+ q++;
+ size -= BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) pd.offset
+ % BITS_PER_UNIT);
+ gcc_assert (size >= 0);
+ }
+ }
+ if ((unsigned HOST_WIDE_INT) size / BITS_PER_UNIT + off
+ > needed_len)
+ size = (needed_len - off) * BITS_PER_UNIT;
+ memcpy (p, q, size / BITS_PER_UNIT);
+ if (size % BITS_PER_UNIT)
+ {
+ unsigned int msk
+ = -1U << (BITS_PER_UNIT - (size % BITS_PER_UNIT));
+ p += size / BITS_PER_UNIT;
+ q += size / BITS_PER_UNIT;
+ *p = (*q & msk) | (*p & ~msk);
+ }
+ }
+ else
+ {
+ size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT);
+ if (pd.offset >= 0)
+ {
+ /* LSB of this_buffer[0] byte should be at pd.offset bits
+ in buffer. */
+ unsigned int msk;
+ amnt = pd.offset % BITS_PER_UNIT;
+ if (amnt)
+ shift_bytes_in_array_left (this_buffer, len + 1, amnt);
+ unsigned int off = pd.offset / BITS_PER_UNIT;
+ gcc_assert (off < needed_len);
+ p = buffer + off;
+ if (amnt + size < BITS_PER_UNIT)
+ {
+ /* Low amnt bits come from *p, then size bits
+ from this_buffer[0] and the remaining again from
+ *p. */
+ msk = ((1 << size) - 1) << amnt;
+ *p = (*p & ~msk) | (this_buffer[0] & msk);
+ size = 0;
+ }
+ else if (amnt)
+ {
+ msk = -1U << amnt;
+ *p = (*p & ~msk) | (this_buffer[0] & msk);
+ p++;
+ size -= (BITS_PER_UNIT - amnt);
+ }
+ }
+ else
+ {
+ amnt = (unsigned HOST_WIDE_INT) pd.offset % BITS_PER_UNIT;
+ if (amnt)
+ shift_bytes_in_array_left (this_buffer, len + 1, amnt);
+ }
+ memcpy (p, this_buffer + (amnt != 0), size / BITS_PER_UNIT);
+ p += size / BITS_PER_UNIT;
+ if (size % BITS_PER_UNIT)
+ {
+ unsigned int msk = -1U << (size % BITS_PER_UNIT);
+ *p = (this_buffer[(amnt != 0) + size / BITS_PER_UNIT]
+ & ~msk) | (*p & msk);
+ }
+ }
}
tree type = vr->type;
@@ -1903,7 +2024,26 @@ vn_walk_cb_data::push_partial_def (const pd_data &pd,
access size. */
if (INTEGRAL_TYPE_P (vr->type) && maxsizei != TYPE_PRECISION (vr->type))
type = build_nonstandard_integer_type (maxsizei, TYPE_UNSIGNED (type));
- tree val = native_interpret_expr (type, buffer, maxsizei / BITS_PER_UNIT);
+ tree val;
+ if (BYTES_BIG_ENDIAN)
+ {
+ unsigned sz = needed_len;
+ if (maxsizei % BITS_PER_UNIT)
+ shift_bytes_in_array_right (buffer, needed_len,
+ BITS_PER_UNIT
+ - (maxsizei % BITS_PER_UNIT));
+ if (INTEGRAL_TYPE_P (type))
+ sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
+ if (sz > needed_len)
+ {
+ memcpy (this_buffer + (sz - needed_len), buffer, needed_len);
+ val = native_interpret_expr (type, this_buffer, sz);
+ }
+ else
+ val = native_interpret_expr (type, buffer, needed_len);
+ }
+ else
+ val = native_interpret_expr (type, buffer, bufsize);
/* If we chop off bits because the types precision doesn't match the memory
access size this is ok when optimizing reads but not when called from
the DSE code during elimination. */
@@ -2478,8 +2618,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
tree val;
if (integer_zerop (gimple_call_arg (def_stmt, 1)))
val = build_zero_cst (vr->type);
- else if (INTEGRAL_TYPE_P (vr->type)
- && known_eq (ref->size, 8))
+ else if (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8))
{
gimple_match_op res_op (gimple_match_cond::UNCOND, NOP_EXPR,
vr->type, gimple_call_arg (def_stmt, 1));
@@ -2491,11 +2630,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
}
else
{
- unsigned len = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type));
- unsigned char *buf = XALLOCAVEC (unsigned char, len);
+ unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type));
+ unsigned char *buf = XALLOCAVEC (unsigned char, buflen);
memset (buf, TREE_INT_CST_LOW (gimple_call_arg (def_stmt, 1)),
- len);
- val = native_interpret_expr (vr->type, buf, len);
+ buflen);
+ val = native_interpret_expr (vr->type, buf, buflen);
if (!val)
return (void *)-1;
}
@@ -2506,15 +2645,17 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
&& integer_zerop (gimple_call_arg (def_stmt, 1))
&& tree_fits_poly_int64_p (len)
&& tree_to_poly_int64 (len).is_constant (&leni)
+ && leni <= INTTYPE_MAXIMUM (HOST_WIDE_INT) / BITS_PER_UNIT
&& offset.is_constant (&offseti)
&& offset2.is_constant (&offset2i)
&& maxsize.is_constant (&maxsizei)
- && ranges_known_overlap_p (offseti, maxsizei, offset2i, leni))
+ && ranges_known_overlap_p (offseti, maxsizei, offset2i,
+ leni << LOG2_BITS_PER_UNIT))
{
pd_data pd;
pd.rhs = build_constructor (NULL_TREE, NULL);
- pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
- pd.size = leni;
+ pd.offset = offset2i - offseti;
+ pd.size = leni << LOG2_BITS_PER_UNIT;
return data->push_partial_def (pd, 0, maxsizei);
}
}
@@ -2558,13 +2699,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
}
else if (known_eq (ref->size, maxsize)
&& maxsize.is_constant (&maxsizei)
- && maxsizei % BITS_PER_UNIT == 0
&& offset.is_constant (&offseti)
- && offseti % BITS_PER_UNIT == 0
&& offset2.is_constant (&offset2i)
- && offset2i % BITS_PER_UNIT == 0
&& size2.is_constant (&size2i)
- && size2i % BITS_PER_UNIT == 0
&& ranges_known_overlap_p (offseti, maxsizei,
offset2i, size2i))
{
@@ -2573,8 +2710,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
by a later def. */
pd_data pd;
pd.rhs = gimple_assign_rhs1 (def_stmt);
- pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
- pd.size = size2i / BITS_PER_UNIT;
+ pd.offset = offset2i - offseti;
+ pd.size = size2i;
return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
}
}
@@ -2719,19 +2856,15 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
}
}
else if (ranges_known_overlap_p (offseti, maxsizei, offset2i,
- size2i)
- && maxsizei % BITS_PER_UNIT == 0
- && offseti % BITS_PER_UNIT == 0
- && size2i % BITS_PER_UNIT == 0
- && offset2i % BITS_PER_UNIT == 0)
+ size2i))
{
pd_data pd;
tree rhs = gimple_assign_rhs1 (def_stmt);
if (TREE_CODE (rhs) == SSA_NAME)
rhs = SSA_VAL (rhs);
pd.rhs = rhs;
- pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
- pd.size = size2i / BITS_PER_UNIT;
+ pd.offset = offset2i - offseti;
+ pd.size = size2i;
return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
}
}
@@ -2803,19 +2936,15 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
return data->finish (get_alias_set (lhs), val);
}
else if (maxsize.is_constant (&maxsizei)
- && maxsizei % BITS_PER_UNIT == 0
&& offset.is_constant (&offseti)
- && offseti % BITS_PER_UNIT == 0
&& offset2.is_constant (&offset2i)
- && offset2i % BITS_PER_UNIT == 0
&& size2.is_constant (&size2i)
- && size2i % BITS_PER_UNIT == 0
&& ranges_known_overlap_p (offset, maxsize, offset2, size2))
{
pd_data pd;
pd.rhs = SSA_VAL (def_rhs);
- pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
- pd.size = size2i / BITS_PER_UNIT;
+ pd.offset = offset2i - offseti;
+ pd.size = size2i;
return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
}
}
@@ -2945,14 +3074,14 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
coming from targets that like to gimplify init-ctors as
aggregate copies from constant data like aarch64 for
PR83518. */
- if (maxsize.is_constant (&maxsizei)
- && known_eq (ref->size, maxsize))
+ if (maxsize.is_constant (&maxsizei) && known_eq (ref->size, maxsize))
{
pd_data pd;
pd.rhs = val;
pd.offset = 0;
- pd.size = maxsizei / BITS_PER_UNIT;
- return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
+ pd.size = maxsizei;
+ return data->push_partial_def (pd, get_alias_set (lhs),
+ maxsizei);
}
}