summaryrefslogtreecommitdiff
path: root/gcc/omp-offload.c
diff options
context:
space:
mode:
authorMartin Jambor <mjambor@suse.cz>2016-12-14 23:30:41 +0100
committerMartin Jambor <jamborm@gcc.gnu.org>2016-12-14 23:30:41 +0100
commit629b3d75c8c5a244d891a9c292bca6912d4b0dd9 (patch)
tree21a84ad4210bfa6213a06a75a3311f6c824b1b65 /gcc/omp-offload.c
parentcfce1a4a42a9f76477e732fbe7408459742a92a2 (diff)
Split omp-low into multiple files
2016-12-14 Martin Jambor <mjambor@suse.cz> * omp-general.h: New file. * omp-general.c: New file. * omp-expand.h: Likewise. * omp-expand.c: Likewise. * omp-offload.h: Likewise. * omp-offload.c: Likewise. * omp-grid.c: Likewise. * omp-grid.c: Likewise. * omp-low.h: Include omp-general.h and omp-grid.h. Removed includes of params.h, symbol-summary.h, lto-section-names.h, cilk.h, tree-eh.h, ipa-prop.h, tree-cfgcleanup.h, cfgloop.h, except.h, expr.h, stmt.h, varasm.h, calls.h, explow.h, dojump.h, flags.h, tree-into-ssa.h, tree-cfg.h, cfganal.h, alias.h, emit-rtl.h, optabs.h, expmed.h, alloc-pool.h, cfghooks.h, rtl.h and memmodel.h. (omp_find_combined_for): Declare. (find_omp_clause): Renamed to omp_find_clause and moved to omp-general.h. (free_omp_regions): Renamed to omp_free_regions and moved to omp-expand.h. (replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved to omp-general.h. (set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to omp-general.h. (build_oacc_routine_dims): Renamed to oacc_build_routine_dims and moved to omp-general.h. (get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to omp-general.h. (oacc_fn_attrib_kernels_p): Moved to omp-general.h. (get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to omp-general.c. (omp_expand_local): Moved to omp-expand.h. (make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to omp-expand.h. (omp_finish_file): Moved to omp-offload.h. (default_goacc_validate_dims): Renamed to oacc_default_goacc_validate_dims and moved to omp-offload.h. (offload_funcs, offload_vars): Moved to omp-offload.h. * omp-low.c: Include omp-general.h, omp-offload.h and omp-grid.h. (omp_region): Moved to omp-expand.c. (omp_for_data_loop): Moved to omp-general.h. (omp_for_data): Likewise. (oacc_loop): Moved to omp-offload.c. (oacc_loop_flags): Moved to omp-general.h. (offload_funcs, offload_vars): Moved to omp-offload.c. (root_omp_region): Moved to omp-expand.c. (omp_any_child_fn_dumped): Likewise. (find_omp_clause): Renamed to omp_find_clause and moved to omp-general.c. (is_combined_parallel): Moved to omp-expand.c. (is_reference): Renamed to omp_is_reference and and moved to omp-general.c. (adjust_for_condition): Renamed to omp_adjust_for_condition and moved to omp-general.c. (get_omp_for_step_from_incr): Renamed to omp_get_for_step_from_incr and moved to omp-general.c. (extract_omp_for_data): Renamed to omp_extract_for_data and moved to omp-general.c. (workshare_safe_to_combine_p): Moved to omp-expand.c. (omp_adjust_chunk_size): Likewise. (get_ws_args_for): Likewise. (get_base_type): Removed. (dump_omp_region): Moved to omp-expand.c. (debug_omp_region): Likewise. (debug_all_omp_regions): Likewise. (new_omp_region): Likewise. (free_omp_region_1): Likewise. (free_omp_regions): Renamed to omp_free_regions and moved to omp-expand.c. (find_combined_for): Renamed to omp_find_combined_for, made global. (build_omp_barrier): Renamed to omp_build_barrier and moved to omp-general.c. (omp_max_vf): Moved to omp-general.c. (omp_max_simt_vf): Likewise. (gimple_build_cond_empty): Moved to omp-expand.c. (parallel_needs_hsa_kernel_p): Likewise. (expand_omp_build_assign): Moved declaration to omp-expand.c. (expand_parallel_call): Moved to omp-expand.c. (expand_cilk_for_call): Likewise. (expand_task_call): Likewise. (vec2chain): Likewise. (remove_exit_barrier): Likewise. (remove_exit_barriers): Likewise. (optimize_omp_library_calls): Likewise. (expand_omp_regimplify_p): Likewise. (expand_omp_build_assign): Likewise. (expand_omp_taskreg): Likewise. (oacc_collapse): Likewise. (expand_oacc_collapse_init): Likewise. (expand_oacc_collapse_vars): Likewise. (expand_omp_for_init_counts): Likewise. (expand_omp_for_init_vars): Likewise. (extract_omp_for_update_vars): Likewise. (expand_omp_ordered_source): Likewise. (expand_omp_ordered_sink): Likewise. (expand_omp_ordered_source_sink): Likewise. (expand_omp_for_ordered_loops): Likewise. (expand_omp_for_generic): Likewise. (expand_omp_for_static_nochunk): Likewise. (find_phi_with_arg_on_edge): Likewise. (expand_omp_for_static_chunk): Likewise. (expand_cilk_for): Likewise. (expand_omp_simd): Likewise. (expand_omp_taskloop_for_outer): Likewise. (expand_omp_taskloop_for_inner): Likewise. (expand_oacc_for): Likewise. (expand_omp_for): Likewise. (expand_omp_sections): Likewise. (expand_omp_single): Likewise. (expand_omp_synch): Likewise. (expand_omp_atomic_load): Likewise. (expand_omp_atomic_store): Likewise. (expand_omp_atomic_fetch_op): Likewise. (expand_omp_atomic_pipeline): Likewise. (expand_omp_atomic_mutex): Likewise. (expand_omp_atomic): Likewise. (oacc_launch_pack): and moved to omp-general.c, made public. (OACC_FN_ATTRIB): Likewise. (replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved to omp-general.c. (set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to omp-general.c. (build_oacc_routine_dims): Renamed to oacc_build_routine_dims and moved to omp-general.c. (get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to omp-general.c. (oacc_fn_attrib_kernels_p): Moved to omp-general.c. (oacc_fn_attrib_level): Moved to omp-offload.c. (get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to omp-general.c. (get_oacc_ifn_dim_arg): Renamed to oacc_get_ifn_dim_arg and moved to omp-general.c. (mark_loops_in_oacc_kernels_region): Moved to omp-expand.c. (grid_launch_attributes_trees): Likewise. (grid_attr_trees): Likewise. (grid_create_kernel_launch_attr_types): Likewise. (grid_insert_store_range_dim): Likewise. (grid_get_kernel_launch_attributes): Likewise. (get_target_argument_identifier_1): Likewise. (get_target_argument_identifier): Likewise. (get_target_argument_value): Likewise. (push_target_argument_according_to_value): Likewise. (get_target_arguments): Likewise. (expand_omp_target): Likewise. (grid_expand_omp_for_loop): Moved to omp-grid.c. (grid_arg_decl_map): Likewise. (grid_remap_kernel_arg_accesses): Likewise. (grid_expand_target_grid_body): Likewise. (expand_omp): Renamed to omp_expand and moved to omp-expand.c. (build_omp_regions_1): Moved to omp-expand.c. (build_omp_regions_root): Likewise. (omp_expand_local): Likewise. (build_omp_regions): Likewise. (execute_expand_omp): Likewise. (pass_data_expand_omp): Likewise. (pass_expand_omp): Likewise. (make_pass_expand_omp): Likewise. (pass_data_expand_omp_ssa): Likewise. (pass_expand_omp_ssa): Likewise. (make_pass_expand_omp_ssa): Likewise. (grid_lastprivate_predicate): Renamed to omp_grid_lastprivate_predicate and moved to omp-grid.c, made public. (grid_prop): Moved to omp-grid.c. (GRID_MISSED_MSG_PREFIX): Likewise. (grid_safe_assignment_p): Likewise. (grid_seq_only_contains_local_assignments): Likewise. (grid_find_single_omp_among_assignments_1): Likewise. (grid_find_single_omp_among_assignments): Likewise. (grid_find_ungridifiable_statement): Likewise. (grid_parallel_clauses_gridifiable): Likewise. (grid_inner_loop_gridifiable_p): Likewise. (grid_dist_follows_simple_pattern): Likewise. (grid_gfor_follows_tiling_pattern): Likewise. (grid_call_permissible_in_distribute_p): Likewise. (grid_handle_call_in_distribute): Likewise. (grid_dist_follows_tiling_pattern): Likewise. (grid_target_follows_gridifiable_pattern): Likewise. (grid_remap_prebody_decls): Likewise. (grid_var_segment): Likewise. (grid_mark_variable_segment): Likewise. (grid_copy_leading_local_assignments): Likewise. (grid_process_grid_body): Likewise. (grid_eliminate_combined_simd_part): Likewise. (grid_mark_tiling_loops): Likewise. (grid_mark_tiling_parallels_and_loops): Likewise. (grid_process_kernel_body_copy): Likewise. (grid_attempt_target_gridification): Likewise. (grid_gridify_all_targets_stmt): Likewise. (grid_gridify_all_targets): Renamed to omp_grid_gridify_all_targets and moved to omp-grid.c, made public. (make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to omp-expand.c. (add_decls_addresses_to_decl_constructor): Moved to omp-offload.c. (omp_finish_file): Likewise. (oacc_thread_numbers): Likewise. (oacc_xform_loop): Likewise. (oacc_default_dims, oacc_min_dims): Likewise. (oacc_parse_default_dims): Likewise. (oacc_validate_dims): Likewise. (new_oacc_loop_raw): Likewise. (new_oacc_loop_outer): Likewise. (new_oacc_loop): Likewise. (new_oacc_loop_routine): Likewise. (finish_oacc_loop): Likewise. (free_oacc_loop): Likewise. (dump_oacc_loop_part): Likewise. (dump_oacc_loop): Likewise. (debug_oacc_loop): Likewise. (oacc_loop_discover_walk): Likewise. (oacc_loop_sibling_nreverse): Likewise. (oacc_loop_discovery): Likewise. (oacc_loop_xform_head_tail): Likewise. (oacc_loop_xform_loop): Likewise. (oacc_loop_process): Likewise. (oacc_loop_fixed_partitions): Likewise. (oacc_loop_auto_partitions): Likewise. (oacc_loop_partition): Likewise. (default_goacc_fork_join): Likewise. (default_goacc_reduction): Likewise. (execute_oacc_device_lower): Likewise. (default_goacc_validate_dims): Likewise. (default_goacc_dim_limit): Likewise. (pass_data_oacc_device_lower): Likewise. (pass_oacc_device_lower): Likewise. (make_pass_oacc_device_lower): Likewise. (execute_omp_device_lower): Likewise. (pass_data_omp_device_lower): Likewise. (pass_omp_device_lower): Likewise. (make_pass_omp_device_lower): Likewise. (pass_data_omp_target_link): Likewise. (pass_omp_target_link): Likewise. (find_link_var_op): Likewise. (pass_omp_target_link::execute): Likewise. (make_pass_omp_target_link): Likewise. * Makefile.in (OBJS): Added omp-offload.o, omp-expand.o, omp-general.o and omp-grid.o. (GTFILES): Added omp-offload.h, omp-offload.c and omp-expand.c, removed omp-low.h. * gimple-fold.c: Include omp-general.h instead of omp-low.h. (fold_internal_goacc_dim): Adjusted calls to get_oacc_ifn_dim_arg and get_oacc_fn_dim_size to use their new names. * gimplify.c: Include omp-low.h. (omp_notice_variable): Adjust the call to get_oacc_fn_attrib to use its new name. (gimplify_omp_task): Adjusted calls to find_omp_clause to use its new name. (gimplify_omp_for): Likewise. * lto-cgraph.c: Include omp-offload.h instead of omp-low.h. * toplev.c: Include omp-offload.h instead of omp-low.h. * tree-cfg.c: Include omp-general.h instead of omp-low.h. Also include omp-expand.h. (make_edges_bb): Adjusted the call to make_gimple_omp_edges to use its new name. (make_edges): Adjust the call to free_omp_regions to use its new name. * tree-parloops.c: Include omp-general.h. (create_parallel_loop): Adjusted the call to set_oacc_fn_attrib to use its new name. (parallelize_loops): Adjusted the call to get_oacc_fn_attrib to use its new name. * tree-ssa-loop.c: Include omp-general.h instead of omp-low.h. (gate_oacc_kernels): Adjusted the call to get_oacc_fn_attrib to use its new name. * tree-vrp.c: Include omp-general.h instead of omp-low.h. (extract_range_basic): Adjusted calls to get_oacc_ifn_dim_arg and get_oacc_fn_dim_size to use their new names. * varpool.c: Include omp-offload.h instead of omp-low.h. * gengtype.c (open_base_files): Replace omp-low.h with omp-offload.h in ifiles. * config/nvptx/nvptx.c: Include omp-general.c. (nvptx_expand_call): Adjusted the call to get_oacc_fn_attrib to use its new name. (nvptx_reorg): Likewise. (nvptx_record_offload_symbol): Likewise. gcc/c-family: * c-omp.c: Include omp-general.h instead of omp-low.h. (c_finish_oacc_wait): Adjusted call to find_omp_clause to use its new name. gcc/c/ * c-parser.c: Include omp-general.h and omp-offload.h instead of omp-low.h. (c_finish_oacc_routine): Adjusted call to get_oacc_fn_attrib, build_oacc_routine_dims and replace_oacc_fn_attrib to use their new names. (c_parser_oacc_enter_exit_data): Adjusted call to find_omp_clause to use its new name. (c_parser_oacc_update): Likewise. (c_parser_omp_simd): Likewise. (c_parser_omp_target_update): Likewise. * c-typeck.c: Include omp-general.h instead of omp-low.h. (c_finish_omp_cancel): Adjusted call to find_omp_clause to use its new name. (c_finish_omp_cancellation_point): Likewise. * gimple-parser.c: Do not include omp-low.h gcc/cp/ * parser.c: Include omp-general.h and omp-offload.h instead of omp-low.h. (cp_parser_omp_simd): Adjusted calls to find_omp_clause to use its new name. (cp_parser_omp_target_update): Likewise. (cp_parser_oacc_declare): Likewise. (cp_parser_oacc_enter_exit_data): Likewise. (cp_parser_oacc_update): Likewise. (cp_finalize_oacc_routine): Adjusted call to get_oacc_fn_attrib, build_oacc_routine_dims and replace_oacc_fn_attrib to use their new names. * semantics.c: Include omp-general insteda of omp-low.h. (finish_omp_for): Adjusted calls to find_omp_clause to use its new name. (finish_omp_cancel): Likewise. (finish_omp_cancellation_point): Likewise. fortran/ * trans-openmp.c: Include omp-general.h. From-SVN: r243673
Diffstat (limited to 'gcc/omp-offload.c')
-rw-r--r--gcc/omp-offload.c1718
1 files changed, 1718 insertions, 0 deletions
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
new file mode 100644
index 00000000000..fabdf2d21d9
--- /dev/null
+++ b/gcc/omp-offload.c
@@ -0,0 +1,1718 @@
+/* Bits of OpenMP and OpenACC handling that is specific to device offloading
+ and a lowering pass for OpenACC device directives.
+
+ Copyright (C) 2005-2016 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "cgraph.h"
+#include "pretty-print.h"
+#include "diagnostic-core.h"
+#include "fold-const.h"
+#include "internal-fn.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimple-walk.h"
+#include "tree-cfg.h"
+#include "tree-into-ssa.h"
+#include "common/common-target.h"
+#include "omp-general.h"
+#include "omp-offload.h"
+#include "lto-section-names.h"
+#include "gomp-constants.h"
+#include "gimple-pretty-print.h"
+
+/* Describe the OpenACC looping structure of a function. The entire
+ function is held in a 'NULL' loop. */
+
+struct oacc_loop
+{
+ oacc_loop *parent; /* Containing loop. */
+
+ oacc_loop *child; /* First inner loop. */
+
+ oacc_loop *sibling; /* Next loop within same parent. */
+
+ location_t loc; /* Location of the loop start. */
+
+ gcall *marker; /* Initial head marker. */
+
+ gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
+ gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
+
+ tree routine; /* Pseudo-loop enclosing a routine. */
+
+ unsigned mask; /* Partitioning mask. */
+ unsigned inner; /* Partitioning of inner loops. */
+ unsigned flags; /* Partitioning flags. */
+ unsigned ifns; /* Contained loop abstraction functions. */
+ tree chunk_size; /* Chunk size. */
+ gcall *head_end; /* Final marker of head sequence. */
+};
+
+/* Holds offload tables with decls. */
+vec<tree, va_gc> *offload_funcs, *offload_vars;
+
+/* Return level at which oacc routine may spawn a partitioned loop, or
+ -1 if it is not a routine (i.e. is an offload fn). */
+
+static int
+oacc_fn_attrib_level (tree attr)
+{
+ tree pos = TREE_VALUE (attr);
+
+ if (!TREE_PURPOSE (pos))
+ return -1;
+
+ int ix = 0;
+ for (ix = 0; ix != GOMP_DIM_MAX;
+ ix++, pos = TREE_CHAIN (pos))
+ if (!integer_zerop (TREE_PURPOSE (pos)))
+ break;
+
+ return ix;
+}
+
+/* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
+ adds their addresses and sizes to constructor-vector V_CTOR. */
+
+static void
+add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
+ vec<constructor_elt, va_gc> *v_ctor)
+{
+ unsigned len = vec_safe_length (v_decls);
+ for (unsigned i = 0; i < len; i++)
+ {
+ tree it = (*v_decls)[i];
+ bool is_var = VAR_P (it);
+ bool is_link_var
+ = is_var
+#ifdef ACCEL_COMPILER
+ && DECL_HAS_VALUE_EXPR_P (it)
+#endif
+ && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
+
+ tree size = NULL_TREE;
+ if (is_var)
+ size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
+
+ tree addr;
+ if (!is_link_var)
+ addr = build_fold_addr_expr (it);
+ else
+ {
+#ifdef ACCEL_COMPILER
+ /* For "omp declare target link" vars add address of the pointer to
+ the target table, instead of address of the var. */
+ tree value_expr = DECL_VALUE_EXPR (it);
+ tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
+ varpool_node::finalize_decl (link_ptr_decl);
+ addr = build_fold_addr_expr (link_ptr_decl);
+#else
+ addr = build_fold_addr_expr (it);
+#endif
+
+ /* Most significant bit of the size marks "omp declare target link"
+ vars in host and target tables. */
+ unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
+ isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
+ * BITS_PER_UNIT - 1);
+ size = wide_int_to_tree (const_ptr_type_node, isize);
+ }
+
+ CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
+ if (is_var)
+ CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
+ }
+}
+
+/* Create new symbols containing (address, size) pairs for global variables,
+ marked with "omp declare target" attribute, as well as addresses for the
+ functions, which are outlined offloading regions. */
+void
+omp_finish_file (void)
+{
+ unsigned num_funcs = vec_safe_length (offload_funcs);
+ unsigned num_vars = vec_safe_length (offload_vars);
+
+ if (num_funcs == 0 && num_vars == 0)
+ return;
+
+ if (targetm_common.have_named_sections)
+ {
+ vec<constructor_elt, va_gc> *v_f, *v_v;
+ vec_alloc (v_f, num_funcs);
+ vec_alloc (v_v, num_vars * 2);
+
+ add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
+ add_decls_addresses_to_decl_constructor (offload_vars, v_v);
+
+ tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
+ num_vars * 2);
+ tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
+ num_funcs);
+ SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
+ SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
+ tree ctor_v = build_constructor (vars_decl_type, v_v);
+ tree ctor_f = build_constructor (funcs_decl_type, v_f);
+ TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
+ TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
+ tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+ get_identifier (".offload_func_table"),
+ funcs_decl_type);
+ tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+ get_identifier (".offload_var_table"),
+ vars_decl_type);
+ TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
+ /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
+ otherwise a joint table in a binary will contain padding between
+ tables from multiple object files. */
+ DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
+ SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
+ SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
+ DECL_INITIAL (funcs_decl) = ctor_f;
+ DECL_INITIAL (vars_decl) = ctor_v;
+ set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
+ set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
+
+ varpool_node::finalize_decl (vars_decl);
+ varpool_node::finalize_decl (funcs_decl);
+ }
+ else
+ {
+ for (unsigned i = 0; i < num_funcs; i++)
+ {
+ tree it = (*offload_funcs)[i];
+ targetm.record_offload_symbol (it);
+ }
+ for (unsigned i = 0; i < num_vars; i++)
+ {
+ tree it = (*offload_vars)[i];
+ targetm.record_offload_symbol (it);
+ }
+ }
+}
+
+/* Find the number of threads (POS = false), or thread number (POS =
+ true) for an OpenACC region partitioned as MASK. Setup code
+ required for the calculation is added to SEQ. */
+
+static tree
+oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
+{
+ tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
+ unsigned ix;
+
+ /* Start at gang level, and examine relevant dimension indices. */
+ for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
+ if (GOMP_DIM_MASK (ix) & mask)
+ {
+ tree arg = build_int_cst (unsigned_type_node, ix);
+
+ if (res)
+ {
+ /* We had an outer index, so scale that by the size of
+ this dimension. */
+ tree n = create_tmp_var (integer_type_node);
+ gimple *call
+ = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
+
+ gimple_call_set_lhs (call, n);
+ gimple_seq_add_stmt (seq, call);
+ res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
+ }
+ if (pos)
+ {
+ /* Determine index in this dimension. */
+ tree id = create_tmp_var (integer_type_node);
+ gimple *call = gimple_build_call_internal
+ (IFN_GOACC_DIM_POS, 1, arg);
+
+ gimple_call_set_lhs (call, id);
+ gimple_seq_add_stmt (seq, call);
+ if (res)
+ res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
+ else
+ res = id;
+ }
+ }
+
+ if (res == NULL_TREE)
+ res = integer_zero_node;
+
+ return res;
+}
+
+/* Transform IFN_GOACC_LOOP calls to actual code. See
+ expand_oacc_for for where these are generated. At the vector
+ level, we stride loops, such that each member of a warp will
+ operate on adjacent iterations. At the worker and gang level,
+ each gang/warp executes a set of contiguous iterations. Chunking
+ can override this such that each iteration engine executes a
+ contiguous chunk, and then moves on to stride to the next chunk. */
+
+static void
+oacc_xform_loop (gcall *call)
+{
+ gimple_stmt_iterator gsi = gsi_for_stmt (call);
+ enum ifn_goacc_loop_kind code
+ = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
+ tree dir = gimple_call_arg (call, 1);
+ tree range = gimple_call_arg (call, 2);
+ tree step = gimple_call_arg (call, 3);
+ tree chunk_size = NULL_TREE;
+ unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
+ tree lhs = gimple_call_lhs (call);
+ tree type = TREE_TYPE (lhs);
+ tree diff_type = TREE_TYPE (range);
+ tree r = NULL_TREE;
+ gimple_seq seq = NULL;
+ bool chunking = false, striding = true;
+ unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
+ unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
+
+#ifdef ACCEL_COMPILER
+ chunk_size = gimple_call_arg (call, 4);
+ if (integer_minus_onep (chunk_size) /* Force static allocation. */
+ || integer_zerop (chunk_size)) /* Default (also static). */
+ {
+ /* If we're at the gang level, we want each to execute a
+ contiguous run of iterations. Otherwise we want each element
+ to stride. */
+ striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
+ chunking = false;
+ }
+ else
+ {
+ /* Chunk of size 1 is striding. */
+ striding = integer_onep (chunk_size);
+ chunking = !striding;
+ }
+#endif
+
+ /* striding=true, chunking=true
+ -> invalid.
+ striding=true, chunking=false
+ -> chunks=1
+ striding=false,chunking=true
+ -> chunks=ceil (range/(chunksize*threads*step))
+ striding=false,chunking=false
+ -> chunk_size=ceil(range/(threads*step)),chunks=1 */
+ push_gimplify_context (true);
+
+ switch (code)
+ {
+ default: gcc_unreachable ();
+
+ case IFN_GOACC_LOOP_CHUNKS:
+ if (!chunking)
+ r = build_int_cst (type, 1);
+ else
+ {
+ /* chunk_max
+ = (range - dir) / (chunks * step * num_threads) + dir */
+ tree per = oacc_thread_numbers (false, mask, &seq);
+ per = fold_convert (type, per);
+ chunk_size = fold_convert (type, chunk_size);
+ per = fold_build2 (MULT_EXPR, type, per, chunk_size);
+ per = fold_build2 (MULT_EXPR, type, per, step);
+ r = build2 (MINUS_EXPR, type, range, dir);
+ r = build2 (PLUS_EXPR, type, r, per);
+ r = build2 (TRUNC_DIV_EXPR, type, r, per);
+ }
+ break;
+
+ case IFN_GOACC_LOOP_STEP:
+ {
+ /* If striding, step by the entire compute volume, otherwise
+ step by the inner volume. */
+ unsigned volume = striding ? mask : inner_mask;
+
+ r = oacc_thread_numbers (false, volume, &seq);
+ r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
+ }
+ break;
+
+ case IFN_GOACC_LOOP_OFFSET:
+ if (striding)
+ {
+ r = oacc_thread_numbers (true, mask, &seq);
+ r = fold_convert (diff_type, r);
+ }
+ else
+ {
+ tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
+ tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
+ tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
+ inner_size, outer_size);
+
+ volume = fold_convert (diff_type, volume);
+ if (chunking)
+ chunk_size = fold_convert (diff_type, chunk_size);
+ else
+ {
+ tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
+
+ chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
+ chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
+ chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
+ }
+
+ tree span = build2 (MULT_EXPR, diff_type, chunk_size,
+ fold_convert (diff_type, inner_size));
+ r = oacc_thread_numbers (true, outer_mask, &seq);
+ r = fold_convert (diff_type, r);
+ r = build2 (MULT_EXPR, diff_type, r, span);
+
+ tree inner = oacc_thread_numbers (true, inner_mask, &seq);
+ inner = fold_convert (diff_type, inner);
+ r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
+
+ if (chunking)
+ {
+ tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
+ tree per
+ = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
+ per = build2 (MULT_EXPR, diff_type, per, chunk);
+
+ r = build2 (PLUS_EXPR, diff_type, r, per);
+ }
+ }
+ r = fold_build2 (MULT_EXPR, diff_type, r, step);
+ if (type != diff_type)
+ r = fold_convert (type, r);
+ break;
+
+ case IFN_GOACC_LOOP_BOUND:
+ if (striding)
+ r = range;
+ else
+ {
+ tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
+ tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
+ tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
+ inner_size, outer_size);
+
+ volume = fold_convert (diff_type, volume);
+ if (chunking)
+ chunk_size = fold_convert (diff_type, chunk_size);
+ else
+ {
+ tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
+
+ chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
+ chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
+ chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
+ }
+
+ tree span = build2 (MULT_EXPR, diff_type, chunk_size,
+ fold_convert (diff_type, inner_size));
+
+ r = fold_build2 (MULT_EXPR, diff_type, span, step);
+
+ tree offset = gimple_call_arg (call, 6);
+ r = build2 (PLUS_EXPR, diff_type, r,
+ fold_convert (diff_type, offset));
+ r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
+ diff_type, r, range);
+ }
+ if (diff_type != type)
+ r = fold_convert (type, r);
+ break;
+ }
+
+ gimplify_assign (lhs, r, &seq);
+
+ pop_gimplify_context (NULL);
+
+ gsi_replace_with_seq (&gsi, seq, true);
+}
+
+/* Default partitioned and minimum partitioned dimensions. */
+
+static int oacc_default_dims[GOMP_DIM_MAX];
+static int oacc_min_dims[GOMP_DIM_MAX];
+
+/* Parse the default dimension parameter. This is a set of
+ :-separated optional compute dimensions. Each specified dimension
+ is a positive integer. When device type support is added, it is
+ planned to be a comma separated list of such compute dimensions,
+ with all but the first prefixed by the colon-terminated device
+ type. */
+
+static void
+oacc_parse_default_dims (const char *dims)
+{
+ int ix;
+
+ for (ix = GOMP_DIM_MAX; ix--;)
+ {
+ oacc_default_dims[ix] = -1;
+ oacc_min_dims[ix] = 1;
+ }
+
+#ifndef ACCEL_COMPILER
+ /* Cannot be overridden on the host. */
+ dims = NULL;
+#endif
+ if (dims)
+ {
+ const char *pos = dims;
+
+ for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
+ {
+ if (ix)
+ {
+ if (*pos != ':')
+ goto malformed;
+ pos++;
+ }
+
+ if (*pos != ':')
+ {
+ long val;
+ const char *eptr;
+
+ errno = 0;
+ val = strtol (pos, CONST_CAST (char **, &eptr), 10);
+ if (errno || val <= 0 || (int) val != val)
+ goto malformed;
+ pos = eptr;
+ oacc_default_dims[ix] = (int) val;
+ }
+ }
+ if (*pos)
+ {
+ malformed:
+ error_at (UNKNOWN_LOCATION,
+ "-fopenacc-dim operand is malformed at '%s'", pos);
+ }
+ }
+
+ /* Allow the backend to validate the dimensions. */
+ targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
+ targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
+}
+
+/* Validate and update the dimensions for offloaded FN. ATTRS is the
+ raw attribute. DIMS is an array of dimensions, which is filled in.
+ LEVEL is the partitioning level of a routine, or -1 for an offload
+ region itself. USED is the mask of partitioned execution in the
+ function. */
+
+static void
+oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
+{
+ tree purpose[GOMP_DIM_MAX];
+ unsigned ix;
+ tree pos = TREE_VALUE (attrs);
+ bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
+
+ /* Make sure the attribute creator attached the dimension
+ information. */
+ gcc_assert (pos);
+
+ for (ix = 0; ix != GOMP_DIM_MAX; ix++)
+ {
+ purpose[ix] = TREE_PURPOSE (pos);
+ tree val = TREE_VALUE (pos);
+ dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
+ pos = TREE_CHAIN (pos);
+ }
+
+ bool changed = targetm.goacc.validate_dims (fn, dims, level);
+
+ /* Default anything left to 1 or a partitioned default. */
+ for (ix = 0; ix != GOMP_DIM_MAX; ix++)
+ if (dims[ix] < 0)
+ {
+ /* The OpenACC spec says 'If the [num_gangs] clause is not
+ specified, an implementation-defined default will be used;
+ the default may depend on the code within the construct.'
+ (2.5.6). Thus an implementation is free to choose
+ non-unity default for a parallel region that doesn't have
+ any gang-partitioned loops. However, it appears that there
+ is a sufficient body of user code that expects non-gang
+ partitioned regions to not execute in gang-redundant mode.
+ So we (a) don't warn about the non-portability and (b) pick
+ the minimum permissible dimension size when there is no
+ partitioned execution. Otherwise we pick the global
+ default for the dimension, which the user can control. The
+ same wording and logic applies to num_workers and
+ vector_length, however the worker- or vector- single
+ execution doesn't have the same impact as gang-redundant
+ execution. (If the minimum gang-level partioning is not 1,
+ the target is probably too confusing.) */
+ dims[ix] = (used & GOMP_DIM_MASK (ix)
+ ? oacc_default_dims[ix] : oacc_min_dims[ix]);
+ changed = true;
+ }
+
+ if (changed)
+ {
+ /* Replace the attribute with new values. */
+ pos = NULL_TREE;
+ for (ix = GOMP_DIM_MAX; ix--;)
+ {
+ pos = tree_cons (purpose[ix],
+ build_int_cst (integer_type_node, dims[ix]),
+ pos);
+ if (is_kernel)
+ TREE_PUBLIC (pos) = 1;
+ }
+ oacc_replace_fn_attrib (fn, pos);
+ }
+}
+
+/* Create an empty OpenACC loop structure at LOC. */
+
+static oacc_loop *
+new_oacc_loop_raw (oacc_loop *parent, location_t loc)
+{
+ oacc_loop *loop = XCNEW (oacc_loop);
+
+ loop->parent = parent;
+ loop->child = loop->sibling = NULL;
+
+ if (parent)
+ {
+ loop->sibling = parent->child;
+ parent->child = loop;
+ }
+
+ loop->loc = loc;
+ loop->marker = NULL;
+ memset (loop->heads, 0, sizeof (loop->heads));
+ memset (loop->tails, 0, sizeof (loop->tails));
+ loop->routine = NULL_TREE;
+
+ loop->mask = loop->flags = loop->inner = 0;
+ loop->ifns = 0;
+ loop->chunk_size = 0;
+ loop->head_end = NULL;
+
+ return loop;
+}
+
+/* Create an outermost, dummy OpenACC loop for offloaded function
+ DECL. */
+
+static oacc_loop *
+new_oacc_loop_outer (tree decl)
+{
+ return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
+}
+
+/* Start a new OpenACC loop structure beginning at head marker HEAD.
+ Link into PARENT loop. Return the new loop. */
+
+static oacc_loop *
+new_oacc_loop (oacc_loop *parent, gcall *marker)
+{
+ oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
+
+ loop->marker = marker;
+
+ /* TODO: This is where device_type flattening would occur for the loop
+ flags. */
+
+ loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
+
+ tree chunk_size = integer_zero_node;
+ if (loop->flags & OLF_GANG_STATIC)
+ chunk_size = gimple_call_arg (marker, 4);
+ loop->chunk_size = chunk_size;
+
+ return loop;
+}
+
+/* Create a dummy loop encompassing a call to a openACC routine.
+ Extract the routine's partitioning requirements. */
+
+static void
+new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
+{
+ oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
+ int level = oacc_fn_attrib_level (attrs);
+
+ gcc_assert (level >= 0);
+
+ loop->marker = call;
+ loop->routine = decl;
+ loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
+ ^ (GOMP_DIM_MASK (level) - 1));
+}
+
+/* Finish off the current OpenACC loop ending at tail marker TAIL.
+ Return the parent loop. */
+
+static oacc_loop *
+finish_oacc_loop (oacc_loop *loop)
+{
+ /* If the loop has been collapsed, don't partition it. */
+ if (!loop->ifns)
+ loop->mask = loop->flags = 0;
+ return loop->parent;
+}
+
+/* Free all OpenACC loop structures within LOOP (inclusive). */
+
+static void
+free_oacc_loop (oacc_loop *loop)
+{
+ if (loop->sibling)
+ free_oacc_loop (loop->sibling);
+ if (loop->child)
+ free_oacc_loop (loop->child);
+
+ free (loop);
+}
+
+/* Dump out the OpenACC loop head or tail beginning at FROM. */
+
+static void
+dump_oacc_loop_part (FILE *file, gcall *from, int depth,
+ const char *title, int level)
+{
+ enum ifn_unique_kind kind
+ = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
+
+ fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
+ {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (gimple_call_internal_p (stmt, IFN_UNIQUE))
+ {
+ enum ifn_unique_kind k
+ = ((enum ifn_unique_kind) TREE_INT_CST_LOW
+ (gimple_call_arg (stmt, 0)));
+
+ if (k == kind && stmt != from)
+ break;
+ }
+ print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
+
+ gsi_next (&gsi);
+ while (gsi_end_p (gsi))
+ gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
+ }
+}
+
+/* Dump OpenACC loops LOOP, its siblings and its children. */
+
+static void
+dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
+{
+ int ix;
+
+ fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
+ loop->flags, loop->mask,
+ LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
+
+ if (loop->marker)
+ print_gimple_stmt (file, loop->marker, depth * 2, 0);
+
+ if (loop->routine)
+ fprintf (file, "%*sRoutine %s:%u:%s\n",
+ depth * 2, "", DECL_SOURCE_FILE (loop->routine),
+ DECL_SOURCE_LINE (loop->routine),
+ IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
+
+ for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
+ if (loop->heads[ix])
+ dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
+ for (ix = GOMP_DIM_MAX; ix--;)
+ if (loop->tails[ix])
+ dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
+
+ if (loop->child)
+ dump_oacc_loop (file, loop->child, depth + 1);
+ if (loop->sibling)
+ dump_oacc_loop (file, loop->sibling, depth);
+}
+
+void debug_oacc_loop (oacc_loop *);
+
+/* Dump loops to stderr. */
+
+DEBUG_FUNCTION void
+debug_oacc_loop (oacc_loop *loop)
+{
+ dump_oacc_loop (stderr, loop, 0);
+}
+
+/* DFS walk of basic blocks BB onwards, creating OpenACC loop
+ structures as we go. By construction these loops are properly
+ nested. */
+
+static void
+oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
+{
+ int marker = 0;
+ int remaining = 0;
+
+ if (bb->flags & BB_VISITED)
+ return;
+
+ follow:
+ bb->flags |= BB_VISITED;
+
+ /* Scan for loop markers. */
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (!is_gimple_call (stmt))
+ continue;
+
+ gcall *call = as_a <gcall *> (stmt);
+
+ /* If this is a routine, make a dummy loop for it. */
+ if (tree decl = gimple_call_fndecl (call))
+ if (tree attrs = oacc_get_fn_attrib (decl))
+ {
+ gcc_assert (!marker);
+ new_oacc_loop_routine (loop, call, decl, attrs);
+ }
+
+ if (!gimple_call_internal_p (call))
+ continue;
+
+ switch (gimple_call_internal_fn (call))
+ {
+ default:
+ break;
+
+ case IFN_GOACC_LOOP:
+ /* Count the goacc loop abstraction fns, to determine if the
+ loop was collapsed already. */
+ loop->ifns++;
+ break;
+
+ case IFN_UNIQUE:
+ enum ifn_unique_kind kind
+ = (enum ifn_unique_kind) (TREE_INT_CST_LOW
+ (gimple_call_arg (call, 0)));
+ if (kind == IFN_UNIQUE_OACC_HEAD_MARK
+ || kind == IFN_UNIQUE_OACC_TAIL_MARK)
+ {
+ if (gimple_call_num_args (call) == 2)
+ {
+ gcc_assert (marker && !remaining);
+ marker = 0;
+ if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
+ loop = finish_oacc_loop (loop);
+ else
+ loop->head_end = call;
+ }
+ else
+ {
+ int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
+
+ if (!marker)
+ {
+ if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
+ loop = new_oacc_loop (loop, call);
+ remaining = count;
+ }
+ gcc_assert (count == remaining);
+ if (remaining)
+ {
+ remaining--;
+ if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
+ loop->heads[marker] = call;
+ else
+ loop->tails[remaining] = call;
+ }
+ marker++;
+ }
+ }
+ }
+ }
+ if (remaining || marker)
+ {
+ bb = single_succ (bb);
+ gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
+ goto follow;
+ }
+
+ /* Walk successor blocks. */
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ oacc_loop_discover_walk (loop, e->dest);
+}
+
+/* LOOP is the first sibling. Reverse the order in place and return
+ the new first sibling. Recurse to child loops. */
+
+static oacc_loop *
+oacc_loop_sibling_nreverse (oacc_loop *loop)
+{
+ oacc_loop *last = NULL;
+ do
+ {
+ if (loop->child)
+ loop->child = oacc_loop_sibling_nreverse (loop->child);
+
+ oacc_loop *next = loop->sibling;
+ loop->sibling = last;
+ last = loop;
+ loop = next;
+ }
+ while (loop);
+
+ return last;
+}
+
+/* Discover the OpenACC loops marked up by HEAD and TAIL markers for
+ the current function. */
+
+static oacc_loop *
+oacc_loop_discovery ()
+{
+ /* Clear basic block flags, in particular BB_VISITED which we're going to use
+ in the following. */
+ clear_bb_flags ();
+
+ oacc_loop *top = new_oacc_loop_outer (current_function_decl);
+ oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
+
+ /* The siblings were constructed in reverse order, reverse them so
+ that diagnostics come out in an unsurprising order. */
+ top = oacc_loop_sibling_nreverse (top);
+
+ return top;
+}
+
+/* Transform the abstract internal function markers starting at FROM
+ to be for partitioning level LEVEL. Stop when we meet another HEAD
+ or TAIL marker. */
+
+static void
+oacc_loop_xform_head_tail (gcall *from, int level)
+{
+ enum ifn_unique_kind kind
+ = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
+ tree replacement = build_int_cst (unsigned_type_node, level);
+
+ for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
+ {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (gimple_call_internal_p (stmt, IFN_UNIQUE))
+ {
+ enum ifn_unique_kind k
+ = ((enum ifn_unique_kind)
+ TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
+
+ if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
+ *gimple_call_arg_ptr (stmt, 2) = replacement;
+ else if (k == kind && stmt != from)
+ break;
+ }
+ else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
+ *gimple_call_arg_ptr (stmt, 3) = replacement;
+
+ gsi_next (&gsi);
+ while (gsi_end_p (gsi))
+ gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
+ }
+}
+
+/* Transform the IFN_GOACC_LOOP internal functions by providing the
+ determined partitioning mask and chunking argument. END_MARKER
+ points at the end IFN_HEAD_TAIL call intgroducing the loop. IFNS
+ is the number of IFN_GOACC_LOOP calls for the loop. MASK_ARG is
+ the replacement partitioning mask and CHUNK_ARG is the replacement
+ chunking arg. */
+
+static void
+oacc_loop_xform_loop (gcall *end_marker, unsigned ifns,
+ tree mask_arg, tree chunk_arg)
+{
+ gimple_stmt_iterator gsi = gsi_for_stmt (end_marker);
+
+ gcc_checking_assert (ifns);
+ for (;;)
+ {
+ for (; !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (!is_gimple_call (stmt))
+ continue;
+
+ gcall *call = as_a <gcall *> (stmt);
+
+ if (!gimple_call_internal_p (call))
+ continue;
+
+ if (gimple_call_internal_fn (call) != IFN_GOACC_LOOP)
+ continue;
+
+ *gimple_call_arg_ptr (call, 5) = mask_arg;
+ *gimple_call_arg_ptr (call, 4) = chunk_arg;
+ ifns--;
+ if (!ifns)
+ return;
+ }
+
+ /* The LOOP_BOUND ifn could be in the single successor
+ block. */
+ basic_block bb = single_succ (gsi_bb (gsi));
+ gsi = gsi_start_bb (bb);
+ }
+}
+
+/* Process the discovered OpenACC loops, setting the correct
+ partitioning level etc. */
+
+static void
+oacc_loop_process (oacc_loop *loop)
+{
+ if (loop->child)
+ oacc_loop_process (loop->child);
+
+ if (loop->mask && !loop->routine)
+ {
+ int ix;
+ unsigned mask = loop->mask;
+ unsigned dim = GOMP_DIM_GANG;
+ tree mask_arg = build_int_cst (unsigned_type_node, mask);
+ tree chunk_arg = loop->chunk_size;
+
+ oacc_loop_xform_loop (loop->head_end, loop->ifns, mask_arg, chunk_arg);
+
+ for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
+ {
+ while (!(GOMP_DIM_MASK (dim) & mask))
+ dim++;
+
+ oacc_loop_xform_head_tail (loop->heads[ix], dim);
+ oacc_loop_xform_head_tail (loop->tails[ix], dim);
+
+ mask ^= GOMP_DIM_MASK (dim);
+ }
+ }
+
+ if (loop->sibling)
+ oacc_loop_process (loop->sibling);
+}
+
+/* Walk the OpenACC loop heirarchy checking and assigning the
+ programmer-specified partitionings. OUTER_MASK is the partitioning
+ this loop is contained within. Return mask of partitioning
+ encountered. If any auto loops are discovered, set GOMP_DIM_MAX
+ bit. */
+
+static unsigned
+oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
+{
+ unsigned this_mask = loop->mask;
+ unsigned mask_all = 0;
+ bool noisy = true;
+
+#ifdef ACCEL_COMPILER
+ /* When device_type is supported, we want the device compiler to be
+ noisy, if the loop parameters are device_type-specific. */
+ noisy = false;
+#endif
+
+ if (!loop->routine)
+ {
+ bool auto_par = (loop->flags & OLF_AUTO) != 0;
+ bool seq_par = (loop->flags & OLF_SEQ) != 0;
+
+ this_mask = ((loop->flags >> OLF_DIM_BASE)
+ & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
+
+ if ((this_mask != 0) + auto_par + seq_par > 1)
+ {
+ if (noisy)
+ error_at (loop->loc,
+ seq_par
+ ? "%<seq%> overrides other OpenACC loop specifiers"
+ : "%<auto%> conflicts with other OpenACC loop "
+ "specifiers");
+ auto_par = false;
+ loop->flags &= ~OLF_AUTO;
+ if (seq_par)
+ {
+ loop->flags &=
+ ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
+ this_mask = 0;
+ }
+ }
+ if (auto_par && (loop->flags & OLF_INDEPENDENT))
+ mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
+ }
+
+ if (this_mask & outer_mask)
+ {
+ const oacc_loop *outer;
+ for (outer = loop->parent; outer; outer = outer->parent)
+ if (outer->mask & this_mask)
+ break;
+
+ if (noisy)
+ {
+ if (outer)
+ {
+ error_at (loop->loc,
+ "%s uses same OpenACC parallelism as containing loop",
+ loop->routine ? "routine call" : "inner loop");
+ inform (outer->loc, "containing loop here");
+ }
+ else
+ error_at (loop->loc,
+ "%s uses OpenACC parallelism disallowed by containing "
+ "routine", loop->routine ? "routine call" : "loop");
+
+ if (loop->routine)
+ inform (DECL_SOURCE_LOCATION (loop->routine),
+ "routine %qD declared here", loop->routine);
+ }
+ this_mask &= ~outer_mask;
+ }
+ else
+ {
+ unsigned outermost = least_bit_hwi (this_mask);
+
+ if (outermost && outermost <= outer_mask)
+ {
+ if (noisy)
+ {
+ error_at (loop->loc,
+ "incorrectly nested OpenACC loop parallelism");
+
+ const oacc_loop *outer;
+ for (outer = loop->parent;
+ outer->flags && outer->flags < outermost;
+ outer = outer->parent)
+ continue;
+ inform (outer->loc, "containing loop here");
+ }
+
+ this_mask &= ~outermost;
+ }
+ }
+
+ loop->mask = this_mask;
+ mask_all |= this_mask;
+
+ if (loop->child)
+ {
+ loop->inner = oacc_loop_fixed_partitions (loop->child,
+ outer_mask | this_mask);
+ mask_all |= loop->inner;
+ }
+
+ if (loop->sibling)
+ mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
+
+ return mask_all;
+}
+
+/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
+ OUTER_MASK is the partitioning this loop is contained within.
+ Return the cumulative partitioning used by this loop, siblings and
+ children. */
+
+static unsigned
+oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
+{
+ bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
+ bool noisy = true;
+
+#ifdef ACCEL_COMPILER
+ /* When device_type is supported, we want the device compiler to be
+ noisy, if the loop parameters are device_type-specific. */
+ noisy = false;
+#endif
+
+ if (assign && outer_mask < GOMP_DIM_MASK (GOMP_DIM_MAX - 1))
+ {
+ /* Allocate the outermost loop at the outermost available
+ level. */
+ unsigned this_mask = outer_mask + 1;
+
+ if (!(this_mask & loop->inner))
+ loop->mask = this_mask;
+ }
+
+ if (loop->child)
+ {
+ unsigned child_mask = outer_mask | loop->mask;
+
+ if (loop->mask || assign)
+ child_mask |= GOMP_DIM_MASK (GOMP_DIM_MAX);
+
+ loop->inner = oacc_loop_auto_partitions (loop->child, child_mask);
+ }
+
+ if (assign && !loop->mask)
+ {
+ /* Allocate the loop at the innermost available level. */
+ unsigned this_mask = 0;
+
+ /* Determine the outermost partitioning used within this loop. */
+ this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
+ this_mask = least_bit_hwi (this_mask);
+
+ /* Pick the partitioning just inside that one. */
+ this_mask >>= 1;
+
+ /* And avoid picking one use by an outer loop. */
+ this_mask &= ~outer_mask;
+
+ if (!this_mask && noisy)
+ warning_at (loop->loc, 0,
+ "insufficient partitioning available to parallelize loop");
+
+ loop->mask = this_mask;
+ }
+
+ if (assign && dump_file)
+ fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
+ LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
+ loop->mask);
+
+ unsigned inner_mask = 0;
+
+ if (loop->sibling)
+ inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
+
+ inner_mask |= loop->inner | loop->mask;
+
+ return inner_mask;
+}
+
+/* Walk the OpenACC loop heirarchy to check and assign partitioning
+ axes. Return mask of partitioning. */
+
+static unsigned
+oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
+{
+ unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
+
+ if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
+ {
+ mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
+ mask_all |= oacc_loop_auto_partitions (loop, outer_mask);
+ }
+ return mask_all;
+}
+
+/* Default fork/join early expander. Delete the function calls if
+ there is no RTL expander. */
+
+bool
+default_goacc_fork_join (gcall *ARG_UNUSED (call),
+ const int *ARG_UNUSED (dims), bool is_fork)
+{
+ if (is_fork)
+ return targetm.have_oacc_fork ();
+ else
+ return targetm.have_oacc_join ();
+}
+
+/* Default goacc.reduction early expander.
+
+ LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
+ If RES_PTR is not integer-zerop:
+ SETUP - emit 'LHS = *RES_PTR', LHS = NULL
+ TEARDOWN - emit '*RES_PTR = VAR'
+ If LHS is not NULL
+ emit 'LHS = VAR' */
+
+void
+default_goacc_reduction (gcall *call)
+{
+ unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
+ gimple_stmt_iterator gsi = gsi_for_stmt (call);
+ tree lhs = gimple_call_lhs (call);
+ tree var = gimple_call_arg (call, 2);
+ gimple_seq seq = NULL;
+
+ if (code == IFN_GOACC_REDUCTION_SETUP
+ || code == IFN_GOACC_REDUCTION_TEARDOWN)
+ {
+ /* Setup and Teardown need to copy from/to the receiver object,
+ if there is one. */
+ tree ref_to_res = gimple_call_arg (call, 1);
+
+ if (!integer_zerop (ref_to_res))
+ {
+ tree dst = build_simple_mem_ref (ref_to_res);
+ tree src = var;
+
+ if (code == IFN_GOACC_REDUCTION_SETUP)
+ {
+ src = dst;
+ dst = lhs;
+ lhs = NULL;
+ }
+ gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
+ }
+ }
+
+ /* Copy VAR to LHS, if there is an LHS. */
+ if (lhs)
+ gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
+
+ gsi_replace_with_seq (&gsi, seq, true);
+}
+
+/* Main entry point for oacc transformations which run on the device
+ compiler after LTO, so we know what the target device is at this
+ point (including the host fallback). */
+
+static unsigned int
+execute_oacc_device_lower ()
+{
+ tree attrs = oacc_get_fn_attrib (current_function_decl);
+
+ if (!attrs)
+ /* Not an offloaded function. */
+ return 0;
+
+ /* Parse the default dim argument exactly once. */
+ if ((const void *)flag_openacc_dims != &flag_openacc_dims)
+ {
+ oacc_parse_default_dims (flag_openacc_dims);
+ flag_openacc_dims = (char *)&flag_openacc_dims;
+ }
+
+ /* Discover, partition and process the loops. */
+ oacc_loop *loops = oacc_loop_discovery ();
+ int fn_level = oacc_fn_attrib_level (attrs);
+
+ if (dump_file)
+ fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
+ ? "Function is kernels offload\n"
+ : fn_level < 0 ? "Function is parallel offload\n"
+ : "Function is routine level %d\n", fn_level);
+
+ unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
+ unsigned used_mask = oacc_loop_partition (loops, outer_mask);
+ int dims[GOMP_DIM_MAX];
+
+ oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
+
+ if (dump_file)
+ {
+ const char *comma = "Compute dimensions [";
+ for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
+ fprintf (dump_file, "%s%d", comma, dims[ix]);
+ fprintf (dump_file, "]\n");
+ }
+
+ oacc_loop_process (loops);
+ if (dump_file)
+ {
+ fprintf (dump_file, "OpenACC loops\n");
+ dump_oacc_loop (dump_file, loops, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ /* Offloaded targets may introduce new basic blocks, which require
+ dominance information to update SSA. */
+ calculate_dominance_info (CDI_DOMINATORS);
+
+ /* Now lower internal loop functions to target-specific code
+ sequences. */
+ basic_block bb;
+ FOR_ALL_BB_FN (bb, cfun)
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (!is_gimple_call (stmt))
+ {
+ gsi_next (&gsi);
+ continue;
+ }
+
+ gcall *call = as_a <gcall *> (stmt);
+ if (!gimple_call_internal_p (call))
+ {
+ gsi_next (&gsi);
+ continue;
+ }
+
+ /* Rewind to allow rescan. */
+ gsi_prev (&gsi);
+ bool rescan = false, remove = false;
+ enum internal_fn ifn_code = gimple_call_internal_fn (call);
+
+ switch (ifn_code)
+ {
+ default: break;
+
+ case IFN_GOACC_LOOP:
+ oacc_xform_loop (call);
+ rescan = true;
+ break;
+
+ case IFN_GOACC_REDUCTION:
+ /* Mark the function for SSA renaming. */
+ mark_virtual_operands_for_renaming (cfun);
+
+ /* If the level is -1, this ended up being an unused
+ axis. Handle as a default. */
+ if (integer_minus_onep (gimple_call_arg (call, 3)))
+ default_goacc_reduction (call);
+ else
+ targetm.goacc.reduction (call);
+ rescan = true;
+ break;
+
+ case IFN_UNIQUE:
+ {
+ enum ifn_unique_kind kind
+ = ((enum ifn_unique_kind)
+ TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
+
+ switch (kind)
+ {
+ default:
+ gcc_unreachable ();
+
+ case IFN_UNIQUE_OACC_FORK:
+ case IFN_UNIQUE_OACC_JOIN:
+ if (integer_minus_onep (gimple_call_arg (call, 2)))
+ remove = true;
+ else if (!targetm.goacc.fork_join
+ (call, dims, kind == IFN_UNIQUE_OACC_FORK))
+ remove = true;
+ break;
+
+ case IFN_UNIQUE_OACC_HEAD_MARK:
+ case IFN_UNIQUE_OACC_TAIL_MARK:
+ remove = true;
+ break;
+ }
+ break;
+ }
+ }
+
+ if (gsi_end_p (gsi))
+ /* We rewound past the beginning of the BB. */
+ gsi = gsi_start_bb (bb);
+ else
+ /* Undo the rewind. */
+ gsi_next (&gsi);
+
+ if (remove)
+ {
+ if (gimple_vdef (call))
+ replace_uses_by (gimple_vdef (call), gimple_vuse (call));
+ if (gimple_call_lhs (call))
+ {
+ /* Propagate the data dependency var. */
+ gimple *ass = gimple_build_assign (gimple_call_lhs (call),
+ gimple_call_arg (call, 1));
+ gsi_replace (&gsi, ass, false);
+ }
+ else
+ gsi_remove (&gsi, true);
+ }
+ else if (!rescan)
+ /* If not rescanning, advance over the call. */
+ gsi_next (&gsi);
+ }
+
+ free_oacc_loop (loops);
+
+ return 0;
+}
+
+/* Default launch dimension validator. Force everything to 1. A
+ backend that wants to provide larger dimensions must override this
+ hook. */
+
+bool
+default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
+ int ARG_UNUSED (fn_level))
+{
+ bool changed = false;
+
+ for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
+ {
+ if (dims[ix] != 1)
+ {
+ dims[ix] = 1;
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+/* Default dimension bound is unknown on accelerator and 1 on host. */
+
+int
+default_goacc_dim_limit (int ARG_UNUSED (axis))
+{
+#ifdef ACCEL_COMPILER
+ return 0;
+#else
+ return 1;
+#endif
+}
+
+namespace {
+
+const pass_data pass_data_oacc_device_lower =
+{
+ GIMPLE_PASS, /* type */
+ "oaccdevlow", /* name */
+ OPTGROUP_OPENMP, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_cfg, /* properties_required */
+ 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
+};
+
+class pass_oacc_device_lower : public gimple_opt_pass
+{
+public:
+ pass_oacc_device_lower (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *) { return flag_openacc; };
+
+ virtual unsigned int execute (function *)
+ {
+ return execute_oacc_device_lower ();
+ }
+
+}; // class pass_oacc_device_lower
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_oacc_device_lower (gcc::context *ctxt)
+{
+ return new pass_oacc_device_lower (ctxt);
+}
+
+/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
+ VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
+ LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
+ internal functions on non-SIMT targets, and likewise some SIMD internal
+ functions on SIMT targets. */
+
+static unsigned int
+execute_omp_device_lower ()
+{
+ int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
+ basic_block bb;
+ gimple_stmt_iterator gsi;
+ FOR_EACH_BB_FN (bb, cfun)
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
+ continue;
+ tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
+ tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
+ switch (gimple_call_internal_fn (stmt))
+ {
+ case IFN_GOMP_USE_SIMT:
+ rhs = vf == 1 ? integer_zero_node : integer_one_node;
+ break;
+ case IFN_GOMP_SIMT_LANE:
+ case IFN_GOMP_SIMT_LAST_LANE:
+ rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
+ break;
+ case IFN_GOMP_SIMT_VF:
+ rhs = build_int_cst (type, vf);
+ break;
+ case IFN_GOMP_SIMT_ORDERED_PRED:
+ rhs = vf == 1 ? integer_zero_node : NULL_TREE;
+ if (rhs || !lhs)
+ unlink_stmt_vdef (stmt);
+ break;
+ case IFN_GOMP_SIMT_VOTE_ANY:
+ case IFN_GOMP_SIMT_XCHG_BFLY:
+ case IFN_GOMP_SIMT_XCHG_IDX:
+ rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
+ break;
+ case IFN_GOMP_SIMD_LANE:
+ case IFN_GOMP_SIMD_LAST_LANE:
+ rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
+ break;
+ case IFN_GOMP_SIMD_VF:
+ rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
+ break;
+ default:
+ continue;
+ }
+ if (lhs && !rhs)
+ continue;
+ stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
+ gsi_replace (&gsi, stmt, false);
+ }
+ if (vf != 1)
+ cfun->has_force_vectorize_loops = false;
+ return 0;
+}
+
+namespace {
+
+const pass_data pass_data_omp_device_lower =
+{
+ GIMPLE_PASS, /* type */
+ "ompdevlow", /* name */
+ OPTGROUP_OPENMP, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_cfg, /* properties_required */
+ PROP_gimple_lomp_dev, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_omp_device_lower : public gimple_opt_pass
+{
+public:
+ pass_omp_device_lower (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *ARG_UNUSED (fun))
+ {
+ /* FIXME: this should use PROP_gimple_lomp_dev. */
+#ifdef ACCEL_COMPILER
+ return true;
+#else
+ return ENABLE_OFFLOADING && (flag_openmp || in_lto_p);
+#endif
+ }
+ virtual unsigned int execute (function *)
+ {
+ return execute_omp_device_lower ();
+ }
+
+}; // class pass_expand_omp_ssa
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_omp_device_lower (gcc::context *ctxt)
+{
+ return new pass_omp_device_lower (ctxt);
+}
+
+/* "omp declare target link" handling pass. */
+
+namespace {
+
+const pass_data pass_data_omp_target_link =
+{
+ GIMPLE_PASS, /* type */
+ "omptargetlink", /* name */
+ OPTGROUP_OPENMP, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_omp_target_link : public gimple_opt_pass
+{
+public:
+ pass_omp_target_link (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_omp_target_link, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *fun)
+ {
+#ifdef ACCEL_COMPILER
+ tree attrs = DECL_ATTRIBUTES (fun->decl);
+ return lookup_attribute ("omp declare target", attrs)
+ || lookup_attribute ("omp target entrypoint", attrs);
+#else
+ (void) fun;
+ return false;
+#endif
+ }
+
+ virtual unsigned execute (function *);
+};
+
+/* Callback for walk_gimple_stmt used to scan for link var operands. */
+
+static tree
+find_link_var_op (tree *tp, int *walk_subtrees, void *)
+{
+ tree t = *tp;
+
+ if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)
+ && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
+ {
+ *walk_subtrees = 0;
+ return t;
+ }
+
+ return NULL_TREE;
+}
+
+unsigned
+pass_omp_target_link::execute (function *fun)
+{
+ basic_block bb;
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
+ gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
+ }
+
+ return 0;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_omp_target_link (gcc::context *ctxt)
+{
+ return new pass_omp_target_link (ctxt);
+}