diff options
author | Jakub Jelinek <jakub@redhat.com> | 2019-07-03 07:03:58 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2019-07-03 07:03:58 +0200 |
commit | 2f6bb511d1003d31ec1213081b6c2514cc10f0f9 (patch) | |
tree | 2389139c9c8169da0772490aa58f0cc7c40f8599 /gcc/omp-expand.c | |
parent | 83eb9522087c0f1f152873da00ade34e5f3e67e5 (diff) |
tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_ clause.
* tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__SCANTEMP_
clause.
* tree.h (OMP_CLAUSE_DECL): Use OMP_CLAUSE__SCANTEMP_ instead of
OMP_CLAUSE__CONDTEMP_ as range's upper bound.
(OMP_CLAUSE__SCANTEMP__ALLOC, OMP_CLAUSE__SCANTEMP__CONTROL): Define.
* tree.c (omp_clause_num_ops, omp_clause_code_name): Add
OMP_CLAUSE__SCANTEMP_ entry.
(walk_tree_1): Handle OMP_CLAUSE__SCANTEMP_.
* tree-pretty-print.c (dump_omp_clause): Likewise.
* tree-nested.c (convert_nonlocal_omp_clauses,
convert_local_omp_clauses): Likewise.
* omp-general.h (struct omp_for_data): Add have_scantemp and
have_nonctrl_scantemp members.
* omp-general.c (omp_extract_for_data): Initialize them.
* omp-low.c (struct omp_context): Add scan_exclusive member.
(scan_omp_1_stmt): Don't unnecessarily mask gimple_omp_for_kind
result again with GF_OMP_FOR_KIND_MASK. Initialize also
ctx->scan_exclusive.
(lower_rec_simd_input_clauses): Use ctx->scan_exclusive instead
of !ctx->scan_inclusive.
(lower_rec_input_clauses): Simplify gimplification of dtors using
gimplify_and_add. For non-is_simd test OMP_CLAUSE_REDUCTION_INSCAN
rather than rvarp. Handle OMP_CLAUSE_REDUCTION_INSCAN in worksharing
loops. Don't add barrier for reduction_omp_orig_ref if
ctx->scan_??xclusive.
(lower_reduction_clauses): Don't do anything for ctx->scan_??xclusive.
(lower_omp_scan): Use ctx->scan_exclusive instead
of !ctx->scan_inclusive. Handle worksharing loops with inscan
reductions. Use new_vard != new_var instead of repeated
omp_is_reference calls.
(omp_find_scan, lower_omp_for_scan): New functions.
(lower_omp_for): Call lower_omp_for_scan for worksharing loops with
inscan reductions.
* omp-expand.c (expand_omp_scantemp_alloc): New function.
(expand_omp_for_static_nochunk): Handle fd->have_nonctrl_scantemp
and fd->have_scantemp.
* c-c++-common/gomp/scan-3.c (f1): Don't expect a sorry message.
* c-c++-common/gomp/scan-5.c (foo): Likewise.
* testsuite/libgomp.c++/scan-1.C: New test.
* testsuite/libgomp.c++/scan-2.C: New test.
* testsuite/libgomp.c++/scan-3.C: New test.
* testsuite/libgomp.c++/scan-4.C: New test.
* testsuite/libgomp.c++/scan-5.C: New test.
* testsuite/libgomp.c++/scan-6.C: New test.
* testsuite/libgomp.c++/scan-7.C: New test.
* testsuite/libgomp.c++/scan-8.C: New test.
* testsuite/libgomp.c/scan-1.c: New test.
* testsuite/libgomp.c/scan-2.c: New test.
* testsuite/libgomp.c/scan-3.c: New test.
* testsuite/libgomp.c/scan-4.c: New test.
* testsuite/libgomp.c/scan-5.c: New test.
* testsuite/libgomp.c/scan-6.c: New test.
* testsuite/libgomp.c/scan-7.c: New test.
* testsuite/libgomp.c/scan-8.c: New test.
From-SVN: r272958
Diffstat (limited to 'gcc/omp-expand.c')
-rw-r--r-- | gcc/omp-expand.c | 292 |
1 files changed, 278 insertions, 14 deletions
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 6902425a3d6..9236b3665e4 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -3502,6 +3502,98 @@ expand_omp_for_generic (struct omp_region *region, } } +/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL, + compute needed allocation size. If !ALLOC of team allocations, + if ALLOC of thread allocation. SZ is the initial needed size for + other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes, + CNT number of elements of each array, for !ALLOC this is + omp_get_num_threads (), for ALLOC number of iterations handled by the + current thread. If PTR is non-NULL, it is the start of the allocation + and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_ + clauses pointers to the corresponding arrays. */ + +static tree +expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz, + unsigned HOST_WIDE_INT alloc_align, tree cnt, + gimple_stmt_iterator *gsi, bool alloc) +{ + tree eltsz = NULL_TREE; + unsigned HOST_WIDE_INT preval = 0; + if (ptr && sz) + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), + ptr, size_int (sz)); + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ + && !OMP_CLAUSE__SCANTEMP__CONTROL (c) + && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc) + { + tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); + unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type); + if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) + { + unsigned HOST_WIDE_INT szl + = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type)); + szl = least_bit_hwi (szl); + if (szl) + al = MIN (al, szl); + } + if (ptr == NULL_TREE) + { + if (eltsz == NULL_TREE) + eltsz = TYPE_SIZE_UNIT (pointee_type); + else + eltsz = size_binop (PLUS_EXPR, eltsz, + TYPE_SIZE_UNIT (pointee_type)); + } + if (preval == 0 && al <= alloc_align) + { + unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz; + sz += diff; + if (diff && ptr) + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), + ptr, size_int (diff)); + } + else if (al > preval) + { + if (ptr) + { + ptr = fold_convert (pointer_sized_int_node, ptr); + ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr, + build_int_cst (pointer_sized_int_node, + al - 1)); + ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr, + build_int_cst (pointer_sized_int_node, + -(HOST_WIDE_INT) al)); + ptr = fold_convert (ptr_type_node, ptr); + } + else + sz += al - 1; + } + if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) + preval = al; + else + preval = 1; + if (ptr) + { + expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false); + ptr = OMP_CLAUSE_DECL (c); + ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, + size_binop (MULT_EXPR, cnt, + TYPE_SIZE_UNIT (pointee_type))); + } + } + + if (ptr == NULL_TREE) + { + eltsz = size_binop (MULT_EXPR, eltsz, cnt); + if (sz) + eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz)); + return eltsz; + } + else + return ptr; +} + /* A subroutine of expand_omp_for. Generate code for a parallel loop with static schedule and no specified chunk size. Given parameters: @@ -3544,11 +3636,12 @@ expand_omp_for_static_nochunk (struct omp_region *region, struct omp_for_data *fd, gimple *inner_stmt) { - tree n, q, s0, e0, e, t, tt, nthreads, threadid; + tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid; tree type, itype, vmain, vback; basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; basic_block body_bb, cont_bb, collapse_bb = NULL; - basic_block fin_bb; + basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL; + basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL; gimple_stmt_iterator gsi, gsip; edge ep; bool broken_loop = region->cont == NULL; @@ -3650,7 +3743,9 @@ expand_omp_for_static_nochunk (struct omp_region *region, c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); cond_var = OMP_CLAUSE_DECL (c); } - if (fd->have_reductemp || fd->have_pointer_condtemp) + if (fd->have_reductemp + || fd->have_pointer_condtemp + || fd->have_nonctrl_scantemp) { tree t1 = build_int_cst (long_integer_type_node, 0); tree t2 = build_int_cst (long_integer_type_node, 1); @@ -3660,8 +3755,11 @@ expand_omp_for_static_nochunk (struct omp_region *region, gimple_stmt_iterator gsi2 = gsi_none (); gimple *g = NULL; tree mem = null_pointer_node, memv = NULL_TREE; + unsigned HOST_WIDE_INT condtemp_sz = 0; + unsigned HOST_WIDE_INT alloc_align = 0; if (fd->have_reductemp) { + gcc_assert (!fd->have_nonctrl_scantemp); tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); reductions = OMP_CLAUSE_DECL (c); gcc_assert (TREE_CODE (reductions) == SSA_NAME); @@ -3678,16 +3776,40 @@ expand_omp_for_static_nochunk (struct omp_region *region, gsi2 = gsip; reductions = null_pointer_node; } - if (fd->have_pointer_condtemp) + if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp) { - tree type = TREE_TYPE (condtemp); + tree type; + if (fd->have_pointer_condtemp) + type = TREE_TYPE (condtemp); + else + type = ptr_type_node; memv = create_tmp_var (type); TREE_ADDRESSABLE (memv) = 1; - unsigned HOST_WIDE_INT sz - = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); - sz *= fd->lastprivate_conditional; - expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz), - false); + unsigned HOST_WIDE_INT sz = 0; + tree size = NULL_TREE; + if (fd->have_pointer_condtemp) + { + sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); + sz *= fd->lastprivate_conditional; + condtemp_sz = sz; + } + if (fd->have_nonctrl_scantemp) + { + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + gimple *g = gimple_build_call (nthreads, 0); + nthreads = create_tmp_var (integer_type_node); + gimple_call_set_lhs (g, nthreads); + gsi_insert_before (&gsi2, g, GSI_SAME_STMT); + nthreads = fold_convert (sizetype, nthreads); + alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node); + size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz, + alloc_align, nthreads, NULL, + false); + size = fold_convert (type, size); + } + else + size = build_int_cst (type, sz); + expand_omp_build_assign (&gsi2, memv, size, false); mem = build_fold_addr_expr (memv); } tree t @@ -3698,6 +3820,12 @@ expand_omp_for_static_nochunk (struct omp_region *region, true, GSI_SAME_STMT); if (fd->have_pointer_condtemp) expand_omp_build_assign (&gsi2, condtemp, memv, false); + if (fd->have_nonctrl_scantemp) + { + tree ptr = fd->have_pointer_condtemp ? condtemp : memv; + expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz, + alloc_align, nthreads, &gsi2, false); + } if (fd->have_reductemp) { gsi_remove (&gsi2, true); @@ -3788,6 +3916,72 @@ expand_omp_for_static_nochunk (struct omp_region *region, gsi = gsi_last_nondebug_bb (third_bb); gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + if (fd->have_nonctrl_scantemp) + { + tree clauses = gimple_omp_for_clauses (fd->for_stmt); + tree controlp = NULL_TREE, controlb = NULL_TREE; + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ + && OMP_CLAUSE__SCANTEMP__CONTROL (c)) + { + if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) + controlb = OMP_CLAUSE_DECL (c); + else + controlp = OMP_CLAUSE_DECL (c); + if (controlb && controlp) + break; + } + gcc_assert (controlp && controlb); + tree cnt = create_tmp_var (sizetype); + gimple *g = gimple_build_assign (cnt, NOP_EXPR, q); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node); + tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0, + alloc_align, cnt, NULL, true); + tree size = create_tmp_var (sizetype); + expand_omp_build_assign (&gsi, size, sz, false); + tree cmp = fold_build2 (GT_EXPR, boolean_type_node, + size, size_int (16384)); + expand_omp_build_assign (&gsi, controlb, cmp); + g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + fourth_bb = split_block (third_bb, g)->dest; + gsi = gsi_last_nondebug_bb (fourth_bb); + /* FIXME: Once we have allocators, this should use allocator. */ + g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size); + gimple_call_set_lhs (g, controlp); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt, + &gsi, true); + gsi_prev (&gsi); + g = gsi_stmt (gsi); + fifth_bb = split_block (fourth_bb, g)->dest; + gsi = gsi_last_nondebug_bb (fifth_bb); + + g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0); + gimple_call_set_lhs (g, controlp); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN); + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ + && OMP_CLAUSE__SCANTEMP__ALLOC (c)) + { + tree tmp = create_tmp_var (sizetype); + tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); + g = gimple_build_assign (tmp, MULT_EXPR, cnt, + TYPE_SIZE_UNIT (pointee_type)); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + g = gimple_build_call (alloca_decl, 2, tmp, + size_int (TYPE_ALIGN (pointee_type))); + gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c)); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + } + + sixth_bb = split_block (fifth_bb, g)->dest; + gsi = gsi_last_nondebug_bb (sixth_bb); + } + t = build2 (MULT_EXPR, itype, q, threadid); t = build2 (PLUS_EXPR, itype, t, tt); s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); @@ -4018,7 +4212,9 @@ expand_omp_for_static_nochunk (struct omp_region *region, if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) { t = gimple_omp_return_lhs (gsi_stmt (gsi)); - if (fd->have_reductemp || fd->have_pointer_condtemp) + if (fd->have_reductemp + || ((fd->have_pointer_condtemp || fd->have_scantemp) + && !fd->have_nonctrl_scantemp)) { tree fn; if (t) @@ -4045,6 +4241,38 @@ expand_omp_for_static_nochunk (struct omp_region *region, gcall *g = gimple_build_call (fn, 0); gsi_insert_after (&gsi, g, GSI_SAME_STMT); } + if (fd->have_scantemp && !fd->have_nonctrl_scantemp) + { + tree clauses = gimple_omp_for_clauses (fd->for_stmt); + tree controlp = NULL_TREE, controlb = NULL_TREE; + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ + && OMP_CLAUSE__SCANTEMP__CONTROL (c)) + { + if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) + controlb = OMP_CLAUSE_DECL (c); + else + controlp = OMP_CLAUSE_DECL (c); + if (controlb && controlp) + break; + } + gcc_assert (controlp && controlb); + gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + exit1_bb = split_block (exit_bb, g)->dest; + gsi = gsi_after_labels (exit1_bb); + g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1, + controlp); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + exit2_bb = split_block (exit1_bb, g)->dest; + gsi = gsi_after_labels (exit2_bb); + g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1, + controlp); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + exit3_bb = split_block (exit2_bb, g)->dest; + gsi = gsi_after_labels (exit3_bb); + } gsi_remove (&gsi, true); /* Connect all the blocks. */ @@ -4053,8 +4281,34 @@ expand_omp_for_static_nochunk (struct omp_region *region, ep = find_edge (entry_bb, second_bb); ep->flags = EDGE_TRUE_VALUE; ep->probability = profile_probability::guessed_always ().apply_scale (1, 4); - find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; - find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; + if (fourth_bb) + { + ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE); + ep->probability + = profile_probability::guessed_always ().apply_scale (1, 2); + ep = find_edge (third_bb, fourth_bb); + ep->flags = EDGE_TRUE_VALUE; + ep->probability + = profile_probability::guessed_always ().apply_scale (1, 2); + ep = find_edge (fourth_bb, fifth_bb); + redirect_edge_and_branch (ep, sixth_bb); + } + else + sixth_bb = third_bb; + find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; + find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE; + if (exit1_bb) + { + ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE); + ep->probability + = profile_probability::guessed_always ().apply_scale (1, 2); + ep = find_edge (exit_bb, exit1_bb); + ep->flags = EDGE_TRUE_VALUE; + ep->probability + = profile_probability::guessed_always ().apply_scale (1, 2); + ep = find_edge (exit1_bb, exit2_bb); + redirect_edge_and_branch (ep, exit3_bb); + } if (!broken_loop) { @@ -4082,12 +4336,22 @@ expand_omp_for_static_nochunk (struct omp_region *region, set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); + if (fourth_bb) + { + set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb); + set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb); + } + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb); set_immediate_dominator (CDI_DOMINATORS, body_bb, recompute_dominator (CDI_DOMINATORS, body_bb)); set_immediate_dominator (CDI_DOMINATORS, fin_bb, recompute_dominator (CDI_DOMINATORS, fin_bb)); + if (exit1_bb) + { + set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb); + set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb); + } struct loop *loop = body_bb->loop_father; if (loop != entry_bb->loop_father) |