summaryrefslogtreecommitdiff
path: root/gcc/omp-expand.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2019-07-03 07:03:58 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2019-07-03 07:03:58 +0200
commit2f6bb511d1003d31ec1213081b6c2514cc10f0f9 (patch)
tree2389139c9c8169da0772490aa58f0cc7c40f8599 /gcc/omp-expand.c
parent83eb9522087c0f1f152873da00ade34e5f3e67e5 (diff)
tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_ clause.
* tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_ clause. * tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__SCANTEMP_ instead of OMP_CLAUSE__CONDTEMP_ as range's upper bound. (OMP_CLAUSE__SCANTEMP__ALLOC, OMP_CLAUSE__SCANTEMP__CONTROL): Define. * tree.c (omp_clause_num_ops, omp_clause_code_name): Add OMP_CLAUSE__SCANTEMP_ entry. (walk_tree_1): Handle OMP_CLAUSE__SCANTEMP_. * tree-pretty-print.c (dump_omp_clause): Likewise. * tree-nested.c (convert_nonlocal_omp_clauses, convert_local_omp_clauses): Likewise. * omp-general.h (struct omp_for_data): Add have_scantemp and have_nonctrl_scantemp members. * omp-general.c (omp_extract_for_data): Initialize them. * omp-low.c (struct omp_context): Add scan_exclusive member. (scan_omp_1_stmt): Don't unnecessarily mask gimple_omp_for_kind result again with GF_OMP_FOR_KIND_MASK. Initialize also ctx->scan_exclusive. (lower_rec_simd_input_clauses): Use ctx->scan_exclusive instead of !ctx->scan_inclusive. (lower_rec_input_clauses): Simplify gimplification of dtors using gimplify_and_add. For non-is_simd test OMP_CLAUSE_REDUCTION_INSCAN rather than rvarp. Handle OMP_CLAUSE_REDUCTION_INSCAN in worksharing loops. Don't add barrier for reduction_omp_orig_ref if ctx->scan_??xclusive. (lower_reduction_clauses): Don't do anything for ctx->scan_??xclusive. (lower_omp_scan): Use ctx->scan_exclusive instead of !ctx->scan_inclusive. Handle worksharing loops with inscan reductions. Use new_vard != new_var instead of repeated omp_is_reference calls. (omp_find_scan, lower_omp_for_scan): New functions. (lower_omp_for): Call lower_omp_for_scan for worksharing loops with inscan reductions. * omp-expand.c (expand_omp_scantemp_alloc): New function. (expand_omp_for_static_nochunk): Handle fd->have_nonctrl_scantemp and fd->have_scantemp. * c-c++-common/gomp/scan-3.c (f1): Don't expect a sorry message. * c-c++-common/gomp/scan-5.c (foo): Likewise. * testsuite/libgomp.c++/scan-1.C: New test. * testsuite/libgomp.c++/scan-2.C: New test. * testsuite/libgomp.c++/scan-3.C: New test. * testsuite/libgomp.c++/scan-4.C: New test. * testsuite/libgomp.c++/scan-5.C: New test. * testsuite/libgomp.c++/scan-6.C: New test. * testsuite/libgomp.c++/scan-7.C: New test. * testsuite/libgomp.c++/scan-8.C: New test. * testsuite/libgomp.c/scan-1.c: New test. * testsuite/libgomp.c/scan-2.c: New test. * testsuite/libgomp.c/scan-3.c: New test. * testsuite/libgomp.c/scan-4.c: New test. * testsuite/libgomp.c/scan-5.c: New test. * testsuite/libgomp.c/scan-6.c: New test. * testsuite/libgomp.c/scan-7.c: New test. * testsuite/libgomp.c/scan-8.c: New test. From-SVN: r272958
Diffstat (limited to 'gcc/omp-expand.c')
-rw-r--r--gcc/omp-expand.c292
1 files changed, 278 insertions, 14 deletions
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index 6902425a3d6..9236b3665e4 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -3502,6 +3502,98 @@ expand_omp_for_generic (struct omp_region *region,
}
}
+/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
+ compute needed allocation size. If !ALLOC of team allocations,
+ if ALLOC of thread allocation. SZ is the initial needed size for
+ other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
+ CNT number of elements of each array, for !ALLOC this is
+ omp_get_num_threads (), for ALLOC number of iterations handled by the
+ current thread. If PTR is non-NULL, it is the start of the allocation
+ and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
+ clauses pointers to the corresponding arrays. */
+
+static tree
+expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
+ unsigned HOST_WIDE_INT alloc_align, tree cnt,
+ gimple_stmt_iterator *gsi, bool alloc)
+{
+ tree eltsz = NULL_TREE;
+ unsigned HOST_WIDE_INT preval = 0;
+ if (ptr && sz)
+ ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
+ ptr, size_int (sz));
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+ && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
+ && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
+ {
+ tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
+ unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
+ if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
+ {
+ unsigned HOST_WIDE_INT szl
+ = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
+ szl = least_bit_hwi (szl);
+ if (szl)
+ al = MIN (al, szl);
+ }
+ if (ptr == NULL_TREE)
+ {
+ if (eltsz == NULL_TREE)
+ eltsz = TYPE_SIZE_UNIT (pointee_type);
+ else
+ eltsz = size_binop (PLUS_EXPR, eltsz,
+ TYPE_SIZE_UNIT (pointee_type));
+ }
+ if (preval == 0 && al <= alloc_align)
+ {
+ unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
+ sz += diff;
+ if (diff && ptr)
+ ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
+ ptr, size_int (diff));
+ }
+ else if (al > preval)
+ {
+ if (ptr)
+ {
+ ptr = fold_convert (pointer_sized_int_node, ptr);
+ ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
+ build_int_cst (pointer_sized_int_node,
+ al - 1));
+ ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
+ build_int_cst (pointer_sized_int_node,
+ -(HOST_WIDE_INT) al));
+ ptr = fold_convert (ptr_type_node, ptr);
+ }
+ else
+ sz += al - 1;
+ }
+ if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
+ preval = al;
+ else
+ preval = 1;
+ if (ptr)
+ {
+ expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
+ ptr = OMP_CLAUSE_DECL (c);
+ ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
+ size_binop (MULT_EXPR, cnt,
+ TYPE_SIZE_UNIT (pointee_type)));
+ }
+ }
+
+ if (ptr == NULL_TREE)
+ {
+ eltsz = size_binop (MULT_EXPR, eltsz, cnt);
+ if (sz)
+ eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
+ return eltsz;
+ }
+ else
+ return ptr;
+}
+
/* A subroutine of expand_omp_for. Generate code for a parallel
loop with static schedule and no specified chunk size. Given
parameters:
@@ -3544,11 +3636,12 @@ expand_omp_for_static_nochunk (struct omp_region *region,
struct omp_for_data *fd,
gimple *inner_stmt)
{
- tree n, q, s0, e0, e, t, tt, nthreads, threadid;
+ tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
tree type, itype, vmain, vback;
basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
basic_block body_bb, cont_bb, collapse_bb = NULL;
- basic_block fin_bb;
+ basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
+ basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
gimple_stmt_iterator gsi, gsip;
edge ep;
bool broken_loop = region->cont == NULL;
@@ -3650,7 +3743,9 @@ expand_omp_for_static_nochunk (struct omp_region *region,
c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
cond_var = OMP_CLAUSE_DECL (c);
}
- if (fd->have_reductemp || fd->have_pointer_condtemp)
+ if (fd->have_reductemp
+ || fd->have_pointer_condtemp
+ || fd->have_nonctrl_scantemp)
{
tree t1 = build_int_cst (long_integer_type_node, 0);
tree t2 = build_int_cst (long_integer_type_node, 1);
@@ -3660,8 +3755,11 @@ expand_omp_for_static_nochunk (struct omp_region *region,
gimple_stmt_iterator gsi2 = gsi_none ();
gimple *g = NULL;
tree mem = null_pointer_node, memv = NULL_TREE;
+ unsigned HOST_WIDE_INT condtemp_sz = 0;
+ unsigned HOST_WIDE_INT alloc_align = 0;
if (fd->have_reductemp)
{
+ gcc_assert (!fd->have_nonctrl_scantemp);
tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
reductions = OMP_CLAUSE_DECL (c);
gcc_assert (TREE_CODE (reductions) == SSA_NAME);
@@ -3678,16 +3776,40 @@ expand_omp_for_static_nochunk (struct omp_region *region,
gsi2 = gsip;
reductions = null_pointer_node;
}
- if (fd->have_pointer_condtemp)
+ if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
{
- tree type = TREE_TYPE (condtemp);
+ tree type;
+ if (fd->have_pointer_condtemp)
+ type = TREE_TYPE (condtemp);
+ else
+ type = ptr_type_node;
memv = create_tmp_var (type);
TREE_ADDRESSABLE (memv) = 1;
- unsigned HOST_WIDE_INT sz
- = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
- sz *= fd->lastprivate_conditional;
- expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
- false);
+ unsigned HOST_WIDE_INT sz = 0;
+ tree size = NULL_TREE;
+ if (fd->have_pointer_condtemp)
+ {
+ sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
+ sz *= fd->lastprivate_conditional;
+ condtemp_sz = sz;
+ }
+ if (fd->have_nonctrl_scantemp)
+ {
+ nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
+ gimple *g = gimple_build_call (nthreads, 0);
+ nthreads = create_tmp_var (integer_type_node);
+ gimple_call_set_lhs (g, nthreads);
+ gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
+ nthreads = fold_convert (sizetype, nthreads);
+ alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
+ size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
+ alloc_align, nthreads, NULL,
+ false);
+ size = fold_convert (type, size);
+ }
+ else
+ size = build_int_cst (type, sz);
+ expand_omp_build_assign (&gsi2, memv, size, false);
mem = build_fold_addr_expr (memv);
}
tree t
@@ -3698,6 +3820,12 @@ expand_omp_for_static_nochunk (struct omp_region *region,
true, GSI_SAME_STMT);
if (fd->have_pointer_condtemp)
expand_omp_build_assign (&gsi2, condtemp, memv, false);
+ if (fd->have_nonctrl_scantemp)
+ {
+ tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
+ expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
+ alloc_align, nthreads, &gsi2, false);
+ }
if (fd->have_reductemp)
{
gsi_remove (&gsi2, true);
@@ -3788,6 +3916,72 @@ expand_omp_for_static_nochunk (struct omp_region *region,
gsi = gsi_last_nondebug_bb (third_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
+ if (fd->have_nonctrl_scantemp)
+ {
+ tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+ tree controlp = NULL_TREE, controlb = NULL_TREE;
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+ && OMP_CLAUSE__SCANTEMP__CONTROL (c))
+ {
+ if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
+ controlb = OMP_CLAUSE_DECL (c);
+ else
+ controlp = OMP_CLAUSE_DECL (c);
+ if (controlb && controlp)
+ break;
+ }
+ gcc_assert (controlp && controlb);
+ tree cnt = create_tmp_var (sizetype);
+ gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
+ tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
+ alloc_align, cnt, NULL, true);
+ tree size = create_tmp_var (sizetype);
+ expand_omp_build_assign (&gsi, size, sz, false);
+ tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
+ size, size_int (16384));
+ expand_omp_build_assign (&gsi, controlb, cmp);
+ g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
+ NULL_TREE, NULL_TREE);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ fourth_bb = split_block (third_bb, g)->dest;
+ gsi = gsi_last_nondebug_bb (fourth_bb);
+ /* FIXME: Once we have allocators, this should use allocator. */
+ g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
+ gimple_call_set_lhs (g, controlp);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
+ &gsi, true);
+ gsi_prev (&gsi);
+ g = gsi_stmt (gsi);
+ fifth_bb = split_block (fourth_bb, g)->dest;
+ gsi = gsi_last_nondebug_bb (fifth_bb);
+
+ g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
+ gimple_call_set_lhs (g, controlp);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+ && OMP_CLAUSE__SCANTEMP__ALLOC (c))
+ {
+ tree tmp = create_tmp_var (sizetype);
+ tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
+ g = gimple_build_assign (tmp, MULT_EXPR, cnt,
+ TYPE_SIZE_UNIT (pointee_type));
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ g = gimple_build_call (alloca_decl, 2, tmp,
+ size_int (TYPE_ALIGN (pointee_type)));
+ gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ }
+
+ sixth_bb = split_block (fifth_bb, g)->dest;
+ gsi = gsi_last_nondebug_bb (sixth_bb);
+ }
+
t = build2 (MULT_EXPR, itype, q, threadid);
t = build2 (PLUS_EXPR, itype, t, tt);
s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
@@ -4018,7 +4212,9 @@ expand_omp_for_static_nochunk (struct omp_region *region,
if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
{
t = gimple_omp_return_lhs (gsi_stmt (gsi));
- if (fd->have_reductemp || fd->have_pointer_condtemp)
+ if (fd->have_reductemp
+ || ((fd->have_pointer_condtemp || fd->have_scantemp)
+ && !fd->have_nonctrl_scantemp))
{
tree fn;
if (t)
@@ -4045,6 +4241,38 @@ expand_omp_for_static_nochunk (struct omp_region *region,
gcall *g = gimple_build_call (fn, 0);
gsi_insert_after (&gsi, g, GSI_SAME_STMT);
}
+ if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
+ {
+ tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+ tree controlp = NULL_TREE, controlb = NULL_TREE;
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+ && OMP_CLAUSE__SCANTEMP__CONTROL (c))
+ {
+ if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
+ controlb = OMP_CLAUSE_DECL (c);
+ else
+ controlp = OMP_CLAUSE_DECL (c);
+ if (controlb && controlp)
+ break;
+ }
+ gcc_assert (controlp && controlb);
+ gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
+ NULL_TREE, NULL_TREE);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ exit1_bb = split_block (exit_bb, g)->dest;
+ gsi = gsi_after_labels (exit1_bb);
+ g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
+ controlp);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ exit2_bb = split_block (exit1_bb, g)->dest;
+ gsi = gsi_after_labels (exit2_bb);
+ g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
+ controlp);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+ exit3_bb = split_block (exit2_bb, g)->dest;
+ gsi = gsi_after_labels (exit3_bb);
+ }
gsi_remove (&gsi, true);
/* Connect all the blocks. */
@@ -4053,8 +4281,34 @@ expand_omp_for_static_nochunk (struct omp_region *region,
ep = find_edge (entry_bb, second_bb);
ep->flags = EDGE_TRUE_VALUE;
ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
- find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
- find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
+ if (fourth_bb)
+ {
+ ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
+ ep->probability
+ = profile_probability::guessed_always ().apply_scale (1, 2);
+ ep = find_edge (third_bb, fourth_bb);
+ ep->flags = EDGE_TRUE_VALUE;
+ ep->probability
+ = profile_probability::guessed_always ().apply_scale (1, 2);
+ ep = find_edge (fourth_bb, fifth_bb);
+ redirect_edge_and_branch (ep, sixth_bb);
+ }
+ else
+ sixth_bb = third_bb;
+ find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
+ find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
+ if (exit1_bb)
+ {
+ ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
+ ep->probability
+ = profile_probability::guessed_always ().apply_scale (1, 2);
+ ep = find_edge (exit_bb, exit1_bb);
+ ep->flags = EDGE_TRUE_VALUE;
+ ep->probability
+ = profile_probability::guessed_always ().apply_scale (1, 2);
+ ep = find_edge (exit1_bb, exit2_bb);
+ redirect_edge_and_branch (ep, exit3_bb);
+ }
if (!broken_loop)
{
@@ -4082,12 +4336,22 @@ expand_omp_for_static_nochunk (struct omp_region *region,
set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
- set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
+ if (fourth_bb)
+ {
+ set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
+ set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
+ }
+ set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
set_immediate_dominator (CDI_DOMINATORS, body_bb,
recompute_dominator (CDI_DOMINATORS, body_bb));
set_immediate_dominator (CDI_DOMINATORS, fin_bb,
recompute_dominator (CDI_DOMINATORS, fin_bb));
+ if (exit1_bb)
+ {
+ set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
+ set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
+ }
struct loop *loop = body_bb->loop_father;
if (loop != entry_bb->loop_father)