summaryrefslogtreecommitdiff
path: root/gcc/match.pd
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-05-05 11:36:47 +0200
committerJakub Jelinek <jakub@redhat.com>2020-05-05 11:36:47 +0200
commit144aee70b80de50f96a97ee64edd2f1c237c4906 (patch)
treea0b80710604088be014e425ee8ef0cb9a64cf5bc /gcc/match.pd
parent7f916201ac390a5e1c88562bb91b1b4ab2852f22 (diff)
match.pd: Canonicalize (x + (x << cst)) into (x * cst2) [PR94800]
The popcount* testcases show yet another creative way to write popcount, but rather than adjusting the popcount matcher to deal with it, I think we just should canonicalize those (X + (X << C) to X * (1 + (1 << C)) and (X << C1) + (X << C2) to X * ((1 << C1) + (1 << C2)), because for multiplication we already have simplification rules that can handle nested multiplication (X * CST1 * CST2), while the the shifts and adds we have nothing like that. And user could have written the multiplication anyway, so if we don't emit the fastest or smallest code for the multiplication by constant, we should improve that. At least on the testcases seems the emitted code is reasonable according to cost, except that perhaps we could in some cases try to improve expansion of vector multiplication by uniform constant. 2020-05-05 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/94800 * match.pd (X + (X << C) to X * (1 + (1 << C)), (X << C1) + (X << C2) to X * ((1 << C1) + (1 << C2))): New canonicalizations. * gcc.dg/tree-ssa/pr94800.c: New test. * gcc.dg/tree-ssa/popcount5.c: New test. * gcc.dg/tree-ssa/popcount5l.c: New test. * gcc.dg/tree-ssa/popcount5ll.c: New test.
Diffstat (limited to 'gcc/match.pd')
-rw-r--r--gcc/match.pd35
1 files changed, 35 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index c45e94c0c6a..d9575179655 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2570,6 +2570,41 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& single_use (@3))
(mult (plusminus @2 { build_one_cst (type); }) @0))))))
+#if GIMPLE
+/* Canonicalize X + (X << C) into X * (1 + (1 << C)) and
+ (X << C1) + (X << C2) into X * ((1 << C1) + (1 << C2)). */
+(simplify
+ (plus:c @0 (lshift:s @0 INTEGER_CST@1))
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && tree_fits_uhwi_p (@1)
+ && tree_to_uhwi (@1) < element_precision (type))
+ (with { tree t = type;
+ if (!TYPE_OVERFLOW_WRAPS (t)) t = unsigned_type_for (t);
+ wide_int w = wi::set_bit_in_zero (tree_to_uhwi (@1),
+ element_precision (type));
+ w += 1;
+ tree cst = wide_int_to_tree (VECTOR_TYPE_P (t) ? TREE_TYPE (t)
+ : t, w);
+ cst = build_uniform_cst (t, cst); }
+ (convert (mult (convert:t @0) { cst; })))))
+(simplify
+ (plus (lshift:s @0 INTEGER_CST@1) (lshift:s @0 INTEGER_CST@2))
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && tree_fits_uhwi_p (@1)
+ && tree_to_uhwi (@1) < element_precision (type)
+ && tree_fits_uhwi_p (@2)
+ && tree_to_uhwi (@2) < element_precision (type))
+ (with { tree t = type;
+ if (!TYPE_OVERFLOW_WRAPS (t)) t = unsigned_type_for (t);
+ unsigned int prec = element_precision (type);
+ wide_int w = wi::set_bit_in_zero (tree_to_uhwi (@1), prec);
+ w += wi::set_bit_in_zero (tree_to_uhwi (@2), prec);
+ tree cst = wide_int_to_tree (VECTOR_TYPE_P (t) ? TREE_TYPE (t)
+ : t, w);
+ cst = build_uniform_cst (t, cst); }
+ (convert (mult (convert:t @0) { cst; })))))
+#endif
+
/* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */
(for minmax (min max FMIN_ALL FMAX_ALL)