On 09/30/15 09:27, Bernd Schmidt wrote:
The function name and/or comment should mention OpenACC, it could be confusing otherwise.
Done (both)
I don't know what style to use for unused args now that we have C++. I'm fine with this, and it presumably will be changed anyway.
Correct.
I feel the documentation should be expanded to say what FN_LEVEL does. I guess it's one of gang/worker/vector. Without real code to look at yet I'm left wondering how the hook would use it.
Expanded documentation.
dims is not unused in this function.
Fixed.
Otherwise it looks ok to me.
For avoidance of doubt, is this approval, or 'LGTM, but needs Jakub's approval'? nathan
2015-09-30 Nathan Sidwell <nat...@codesourcery.com> Cesar Philippidis <ce...@codesourcery.com> gcc/ * config/nvptx/nvptx.c (nvptx_goacc_validate_dims): New. (TARGET_GOACC_VALIDATE_DIMS): Override. * target.def (TARGET_GOACC): New target hook prefix. (validate_dims): New hook. * targhooks.h (default_goacc_validate_dims): New. * omp-low.c (oacc_validate_dims): New. (execute_oacc_device_lower): New. (default_goacc_validate_dims): New. (pass_data_oacc_device_lower): New. (pass_oacc_device_lower): New pass. (make_pass_oacc_device_lower): New. * tree-pass.h (make_pass_oacc_device_lower): Declare. * passes.def (pass_oacc_device_lower): Add it. * doc/tm.texi: Rebuilt. * doc/tm.texi.in (TARGET_GOACC_VALIDATE_DIMS): Add hook. * doc/invoke.texi (oaccdevlow): Document tree dump flag. Index: gcc/targhooks.h =================================================================== --- gcc/targhooks.h (revision 228245) +++ gcc/targhooks.h (working copy) @@ -107,6 +107,9 @@ extern unsigned default_add_stmt_cost (v extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *); extern void default_destroy_cost_data (void *); +/* OpenACC hooks. */ +extern bool default_goacc_validate_dims (tree, int [], int); + /* These are here, and not in hooks.[ch], because not all users of hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */ Index: gcc/target.def =================================================================== --- gcc/target.def (revision 228245) +++ gcc/target.def (working copy) @@ -1639,6 +1639,27 @@ int, (struct cgraph_node *), NULL) HOOK_VECTOR_END (simd_clone) +/* Functions relating to openacc. */ +#undef HOOK_PREFIX +#define HOOK_PREFIX "TARGET_GOACC_" +HOOK_VECTOR (TARGET_GOACC, goacc) + +DEFHOOK +(validate_dims, +"This hook should check the launch dimensions provided for an OpenACC\n\ +compute region, or routine. Defaulted values are represented as -1\n\ +and non-constant values as 0. The @var{fn_level} is negative for the\n\ +function corresponding to the compute region. For a routine is is the\n\ +outermost level at which partitioned execution may be spawned. It\n\ +should fill in anything that needs to default to non-unity and verify\n\ +non-defaults. Diagnostics should be issued as appropriate. Return\n\ +true, if changes have been made. You must override this hook to\n\ +provide dimensions larger than 1.", +bool, (tree decl, int dims[], int fn_level), +default_goacc_validate_dims) + +HOOK_VECTOR_END (goacc) + /* Functions relating to vectorization. */ #undef HOOK_PREFIX #define HOOK_PREFIX "TARGET_VECTORIZE_" Index: gcc/tree-pass.h =================================================================== --- gcc/tree-pass.h (revision 228245) +++ gcc/tree-pass.h (working copy) @@ -406,6 +406,7 @@ extern gimple_opt_pass *make_pass_lower_ extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_oacc_device_lower (gcc::context *ctxt); extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt); extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt); extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt); Index: gcc/omp-low.c =================================================================== --- gcc/omp-low.c (revision 228245) +++ gcc/omp-low.c (working copy) @@ -8867,7 +8867,6 @@ expand_omp_atomic (struct omp_region *re expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); } - /* Encode an oacc launc argument. This matches the GOMP_LAUNCH_PACK macro on gomp-constants.h. We do not check for overflow. */ @@ -14020,4 +14019,146 @@ omp_finish_file (void) } } +/* Validate and update the dimensions for offloaded FN. ATTRS is the + raw attribute. DIMS is an array of dimensions, which is returned. + Returns the function level dimensionality -- the level at which an + offload routine wishes to partition a loop. */ + +static int +oacc_validate_dims (tree fn, tree attrs, int *dims) +{ + tree purpose[GOMP_DIM_MAX]; + unsigned ix; + tree pos = TREE_VALUE (attrs); + int fn_level = -1; + + /* Make sure the attribute creator attached the dimension + information. */ + gcc_assert (pos); + + for (ix = 0; ix != GOMP_DIM_MAX; ix++) + { + purpose[ix] = TREE_PURPOSE (pos); + + if (purpose[ix]) + { + if (integer_zerop (purpose[ix])) + fn_level = ix + 1; + else if (fn_level < 0) + fn_level = ix; + } + + tree val = TREE_VALUE (pos); + dims[ix] = val ? TREE_INT_CST_LOW (val) : -1; + pos = TREE_CHAIN (pos); + } + + bool changed = targetm.goacc.validate_dims (fn, dims, fn_level); + + /* Default anything left to 1. */ + for (ix = 0; ix != GOMP_DIM_MAX; ix++) + if (dims[ix] < 0) + { + dims[ix] = 1; + changed = true; + } + + if (changed) + { + /* Replace the attribute with new values. */ + pos = NULL_TREE; + for (ix = GOMP_DIM_MAX; ix--;) + pos = tree_cons (purpose[ix], + build_int_cst (integer_type_node, dims[ix]), + pos); + replace_oacc_fn_attrib (fn, pos); + } + + return fn_level; +} + +/* Main entry point for oacc transformations which run on the device + compiler after LTO, so we know what the target device is at this + point (including the host fallback). */ + +static unsigned int +execute_oacc_device_lower () +{ + tree attrs = get_oacc_fn_attrib (current_function_decl); + int dims[GOMP_DIM_MAX]; + + if (!attrs) + /* Not an offloaded function. */ + return 0; + + oacc_validate_dims (current_function_decl, attrs, dims); + + return 0; +} + +/* Default launch dimension validator. Force everything to 1. A + backend that wants to provide larger dimensions must override this + hook. */ + +bool +default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims, + int ARG_UNUSED (fn_level)) +{ + bool changed = false; + + for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++) + { + if (dims[ix] != 1) + { + dims[ix] = 1; + changed = true; + } + } + + return changed; +} + +namespace { + +const pass_data pass_data_oacc_device_lower = +{ + GIMPLE_PASS, /* type */ + "oaccdevlow", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_cfg, /* properties_required */ + 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */ +}; + +class pass_oacc_device_lower : public gimple_opt_pass +{ +public: + pass_oacc_device_lower (gcc::context *ctxt) + : gimple_opt_pass (pass_data_oacc_device_lower, ctxt) + {} + + /* opt_pass methods: */ + virtual unsigned int execute (function *) + { + bool gate = (flag_openacc != 0 && !seen_error ()); + + if (!gate) + return 0; + + return execute_oacc_device_lower (); + } + +}; // class pass_oacc_transform + +} // anon namespace + +gimple_opt_pass * +make_pass_oacc_device_lower (gcc::context *ctxt) +{ + return new pass_oacc_device_lower (ctxt); +} + #include "gt-omp-low.h" Index: gcc/passes.def =================================================================== --- gcc/passes.def (revision 228245) +++ gcc/passes.def (working copy) @@ -148,6 +148,7 @@ along with GCC; see the file COPYING3. INSERT_PASSES_AFTER (all_passes) NEXT_PASS (pass_fixup_cfg); NEXT_PASS (pass_lower_eh_dispatch); + NEXT_PASS (pass_oacc_device_lower); NEXT_PASS (pass_all_optimizations); PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations) NEXT_PASS (pass_remove_cgraph_callee_edges); Index: gcc/doc/tm.texi.in =================================================================== --- gcc/doc/tm.texi.in (revision 228245) +++ gcc/doc/tm.texi.in (working copy) @@ -4247,6 +4247,8 @@ address; but often a machine-dependent @hook TARGET_SIMD_CLONE_USABLE +@hook TARGET_GOACC_VALIDATE_DIMS + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (revision 228245) +++ gcc/doc/invoke.texi (working copy) @@ -7247,6 +7247,11 @@ is made by appending @file{.slp} to the Dump each function after Value Range Propagation (VRP). The file name is made by appending @file{.vrp} to the source file name. +@item oaccdevlow +@opindex fdump-tree-oaccdevlow +Dump each function after applying device-specific OpenACC transformations. +The file name is made by appending @file{.oaccdevlow} to the source file name. + @item all @opindex fdump-tree-all Enable all the available tree dumps with the flags provided in this option. Index: gcc/doc/tm.texi =================================================================== --- gcc/doc/tm.texi (revision 228245) +++ gcc/doc/tm.texi (working copy) @@ -5748,6 +5748,18 @@ usable. In that case, the smaller the n to use it. @end deftypefn +@deftypefn {Target Hook} bool TARGET_GOACC_VALIDATE_DIMS (tree @var{decl}, int @var{dims[]}, int @var{fn_level}) +This hook should check the launch dimensions provided for an OpenACC +compute region, or routine. Defaulted values are represented as -1 +and non-constant values as 0. The @var{fn_level} is negative for the +function corresponding to the compute region. For a routine is is the +outermost level at which partitioned execution may be spawned. It +should fill in anything that needs to default to non-unity and verify +non-defaults. Diagnostics should be issued as appropriate. Return +true, if changes have been made. You must override this hook to +provide dimensions larger than 1. +@end deftypefn + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses Index: gcc/config/nvptx/nvptx.c =================================================================== --- gcc/config/nvptx/nvptx.c (revision 228245) +++ gcc/config/nvptx/nvptx.c (working copy) @@ -2141,6 +2141,22 @@ nvptx_file_end (void) fputs (func_decls.str().c_str(), asm_out_file); } +/* Validate compute dimensions of an OpenACC offload or routine, fill + in non-unity defaults. FN_LEVEL indicates the level at which a + routine might spawn a loop. It is negative for non-routines. */ + +static bool +nvptx_goacc_validate_dims (tree ARG_UNUSED (decl), int *ARG_UNUSED (dims), + int ARG_UNUSED (fn_level)) +{ + bool changed = false; + + /* TODO: Leave dimensions unaltered. Partitioned execution needs + porting before filtering dimensions makes sense. */ + + return changed; +} + #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE nvptx_option_override @@ -2227,6 +2243,9 @@ nvptx_file_end (void) #undef TARGET_VECTOR_ALIGNMENT #define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment +#undef TARGET_GOACC_VALIDATE_DIMS +#define TARGET_GOACC_VALIDATE_DIMS nvptx_goacc_validate_dims + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-nvptx.h"