Hello, Here's an alternative patch that does not depend on exposure of shared-memory address space, and does not try to use pass_late_lower_omp. It's based on Bernd's suggestion to transform
(use .omp_data_o) GOMP_parallel (fn, &omp_data_o, ...); .omp_data_o = {CLOBBER}; to .omp_data_o_ptr = __internal_omp_alloc_shared (&.omp_data_o, sizeof ...); (use (*.omp_data_o_ptr) instead of .omp_data_o) GOMP_parallel (fn, .omp_data_o_ptr, ...); __internal_omp_free_shared (.omp_data_o_ptr); .omp_data_o = {CLOBBER}; Every target except nvptx can lower free_shared to nothing and alloc_shared to just returning the first argument, and nvptx can select storage in shared memory or global memory. For now it simply uses malloc/free. Sanity-checked by running the libgomp testsuite. I realize the #ifdef in internal-fn.c is not appropriate: it's there to make the patch smaller, I'll replace it with a target hook if otherwise this approach is ok. Thanks. Alexander diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index bf0f23e..3145a8d 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -175,6 +175,38 @@ expand_GOMP_SIMD_LAST_LANE (gcall *) gcc_unreachable (); } +static void +expand_GOMP_ALLOC_SHARED (gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + + /* XXX PoC only, needs to be a target hook. */ +#ifdef GCC_NVPTX_H + tree fndecl = builtin_decl_explicit (BUILT_IN_MALLOC); + tree t = build_call_expr (fndecl, 1, gimple_call_arg (stmt, 1)); + + expand_call (t, target, 0); +#else + tree rhs = gimple_call_arg (stmt, 0); + + rtx src = expand_normal (rhs); + + emit_move_insn (target, src); +#endif +} + +static void +expand_GOMP_FREE_SHARED (gcall *stmt) +{ +#ifdef GCC_NVPTX_H + tree fndecl = builtin_decl_explicit (BUILT_IN_FREE); + tree t = build_call_expr (fndecl, 1, gimple_call_arg (stmt, 0)); + + expand_call (t, NULL_RTX, 1); +#endif +} + /* This should get expanded in the sanopt pass. */ static void diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 0db03f1..0c8e76a 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -44,6 +44,8 @@ DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF, NULL) DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_ALLOC_SHARED, ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_FREE_SHARED, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (LOOP_VECTORIZED, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (MASK_LOAD, ECF_PURE | ECF_LEAF, NULL) DEF_INTERNAL_FN (MASK_STORE, ECF_LEAF, NULL) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 696889d..225bf20 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -5870,7 +5870,8 @@ expand_omp_taskreg (struct omp_region *region) a function call that has been inlined, the original PARM_DECL .OMP_DATA_I may have been converted into a different local variable. In which case, we need to keep the assignment. */ - if (gimple_omp_taskreg_data_arg (entry_stmt)) + tree data_arg = gimple_omp_taskreg_data_arg (entry_stmt); + if (data_arg) { basic_block entry_succ_bb = single_succ_p (entry_bb) ? single_succ (entry_bb) @@ -5894,9 +5895,10 @@ expand_omp_taskreg (struct omp_region *region) /* We're ignore the subcode because we're effectively doing a STRIP_NOPS. */ - if (TREE_CODE (arg) == ADDR_EXPR - && TREE_OPERAND (arg, 0) - == gimple_omp_taskreg_data_arg (entry_stmt)) + if ((TREE_CODE (arg) == ADDR_EXPR + && TREE_OPERAND (arg, 0) == data_arg) + || (TREE_CODE (data_arg) == INDIRECT_REF + && TREE_OPERAND (data_arg, 0) == arg)) { parcopy_stmt = stmt; break; @@ -11835,27 +11837,44 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) record_vars_into (ctx->block_vars, child_fn); record_vars_into (gimple_bind_vars (par_bind), child_fn); + ilist = NULL; + tree sender_decl = NULL_TREE; + if (ctx->record_type) { - ctx->sender_decl + sender_decl = create_tmp_var (ctx->srecord_type ? ctx->srecord_type : ctx->record_type, ".omp_data_o"); - DECL_NAMELESS (ctx->sender_decl) = 1; - TREE_ADDRESSABLE (ctx->sender_decl) = 1; + DECL_NAMELESS (sender_decl) = 1; + TREE_ADDRESSABLE (sender_decl) = 1; + + /* Instead of using the automatic variable .omp_data_o directly, build + .omp_data_o_ptr = GOMP_ALLOC_SHARED (&.omp_data_o, sizeof .omp_data_o) + ... and replace SENDER_DECL with indirect ref *.omp_data_o_ptr. */ + tree ae = build_fold_addr_expr (sender_decl); + tree sz = TYPE_SIZE_UNIT (TREE_TYPE (sender_decl)); + gimple g = gimple_build_call_internal (IFN_GOMP_ALLOC_SHARED, 2, ae, sz); + gimple_seq_add_stmt (&ilist, g); + tree result = create_tmp_var (TREE_TYPE (ae), ".omp_data_o_ptr"); + gimple_call_set_lhs (g, result); + ctx->sender_decl = build_fold_indirect_ref (result); gimple_omp_taskreg_set_data_arg (stmt, ctx->sender_decl); } olist = NULL; - ilist = NULL; lower_send_clauses (clauses, &ilist, &olist, ctx); lower_send_shared_vars (&ilist, &olist, ctx); if (ctx->record_type) { - tree clobber = build_constructor (TREE_TYPE (ctx->sender_decl), NULL); + /* GOMP_FREE_SHARED (.omp_data_o_ptr). */ + tree ae = build_fold_addr_expr (ctx->sender_decl); + gimple g = gimple_build_call_internal (IFN_GOMP_FREE_SHARED, 1, ae); + gimple_seq_add_stmt (&olist, g); + /* Clobber the original stack variable. */ + tree clobber = build_constructor (TREE_TYPE (sender_decl), NULL); TREE_THIS_VOLATILE (clobber) = 1; - gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl, - clobber)); + gimple_seq_add_stmt (&olist, gimple_build_assign (sender_decl, clobber)); } /* Once all the expansions are done, sequence all the different