On 09/25/2015 04:22 PM, Martin Liška wrote: > Hello. > > In the following patch HSA is capable of handling various OMP builtins > that are utilized to set or get the number of threads. > > Martin >
Hello. This patch is a small follow-up which preserves hsa_num_threads among kernel dispatches. Martin
>From 2897bc5c5485430f1102688a437785fdf2a80add Mon Sep 17 00:00:00 2001 From: marxin <mli...@suse.cz> Date: Fri, 25 Sep 2015 17:01:00 +0200 Subject: [PATCH] HSA: distribute hsa_num_threads among kernel dispatches. libgomp/ChangeLog: 2015-09-25 Martin Liska <mli...@suse.cz> * hsa-traits.h: Add omp_num_threads to hsa_kernel_dispatch structure. * plugin/plugin-hsa.c (print_kernel_dispatch): Print the struct field. (create_kernel_dispatch_recursive): Set default value to omp_num_threads (GOMP_OFFLOAD_run): Add shadow_reg to all kernel dispatches. gcc/ChangeLog: 2015-09-25 Martin Liska <mli...@suse.cz> * hsa-gen.c (struct hsa_kernel_dispatch): New field. (gen_hsa_insns_for_kernel_call): Distribute hsa_num_threads for a kernel dispatch. (init_omp_in_prologue): Emit loading of shadow argument. (gen_body_from_gimple): Remove usage of init_omp_in_prologue. (generate_hsa): Move it to this function. --- gcc/hsa-gen.c | 42 +++++++++++++++++++++++++++++++++++------- libgomp/hsa-traits.h | 2 ++ libgomp/plugin/plugin-hsa.c | 16 ++++++++-------- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index 6f45bfe..185b9cc 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -101,6 +101,8 @@ struct hsa_kernel_dispatch uint32_t group_segment_size; /* Number of children kernel dispatches. */ uint64_t kernel_dispatch_count; + /* Number of threads. */ + uint32_t omp_num_threads; /* Debug purpose argument. */ uint64_t debug; /* Kernel dispatch structures created for children kernel dispatches. */ @@ -3523,6 +3525,16 @@ gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call) addr); hbb->append_insn (mem); + /* Write to shadow_reg->omp_num_threads = hsa_num_threads. */ + hbb->append_insn (new hsa_insn_comment + ("set shadow_reg->omp_num_threads = hsa_num_threads")); + + addr = new hsa_op_address (shadow_reg, offsetof (hsa_kernel_dispatch, + omp_num_threads)); + hbb->append_insn + (new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads_reg->type, + hsa_num_threads_reg, addr)); + /* Write to packet->workgroup_size_x. */ hbb->append_insn (new hsa_insn_comment ("set packet->workgroup_size_x = hsa_num_threads")); @@ -4507,12 +4519,27 @@ hsa_init_new_bb (basic_block bb) /* Initialize OMP in an HSA basic block PROLOGUE. */ static void -init_omp_in_prologue (hsa_bb *prologue) +init_omp_in_prologue (void) { - BrigType16_t t = hsa_num_threads->type; - prologue->append_insn - (new hsa_insn_mem (BRIG_OPCODE_ST, t, new hsa_op_immed (64, t), - new hsa_op_address (hsa_num_threads))); + if (!hsa_cfun->kern_p) + return; + + hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + + /* Load a default value from shadow argument. */ + hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg (); + hsa_op_address *addr = new hsa_op_address + (shadow_reg_ptr, offsetof (hsa_kernel_dispatch, omp_num_threads)); + + hsa_op_reg *threads = new hsa_op_reg (BRIG_TYPE_U32); + hsa_insn_basic *basic = new hsa_insn_mem + (BRIG_OPCODE_LD, threads->type, threads, addr); + prologue->append_insn (basic); + + /* Save it to private variable hsa_num_threads. */ + basic = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->type, threads, + new hsa_op_address (hsa_num_threads)); + prologue->append_insn (basic); } /* Go over gimple representation and generate our internal HSA one. SSA_MAP @@ -4554,8 +4581,6 @@ gen_body_from_gimple (vec <hsa_op_reg_p> *ssa_map) } } - init_omp_in_prologue (hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))); - FOR_EACH_BB_FN (bb, cfun) { gimple_stmt_iterator gsi; @@ -5012,6 +5037,9 @@ generate_hsa (bool kernel) gen_function_def_parameters (hsa_cfun, &ssa_map); if (seen_error ()) goto fail; + + init_omp_in_prologue (); + gen_body_from_gimple (&ssa_map); if (seen_error ()) goto fail; diff --git a/libgomp/hsa-traits.h b/libgomp/hsa-traits.h index 3b20008..6fb7e48 100644 --- a/libgomp/hsa-traits.h +++ b/libgomp/hsa-traits.h @@ -43,6 +43,8 @@ struct hsa_kernel_dispatch uint32_t group_segment_size; /* Number of children kernel dispatches. */ uint64_t kernel_dispatch_count; + /* Number of threads. */ + uint32_t omp_num_threads; /* Debug purpose argument. */ uint64_t debug; /* Kernel dispatch structures created for children kernel dispatches. */ diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c index f9be015..76a3b45 100644 --- a/libgomp/plugin/plugin-hsa.c +++ b/libgomp/plugin/plugin-hsa.c @@ -743,6 +743,9 @@ print_kernel_dispatch (struct hsa_kernel_dispatch *dispatch, unsigned indent) indent_stream (stderr, indent); fprintf (stderr, "children dispatches: %lu\n", dispatch->kernel_dispatch_count); + indent_stream (stderr, indent); + fprintf (stderr, "omp_num_threads: %u\n", + dispatch->omp_num_threads); fprintf (stderr, "\n"); for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++) @@ -761,6 +764,7 @@ create_kernel_dispatch_recursive (struct kernel_info *kernel, struct hsa_kernel_dispatch *shadow = create_kernel_dispatch (kernel, omp_data_size); + shadow->omp_num_threads = 64; shadow->debug = 0; for (unsigned i = 0; i < kernel->dependencies_count; i++) @@ -926,15 +930,11 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, const void* kern_launch) hsa_signal_store_relaxed (s, 1); memcpy (shadow->kernarg_address, &vars, sizeof (vars)); - /* Append shadow pointer to kernel arguments. */ - if (kernel->dependencies_count > 0) - { - memcpy (shadow->kernarg_address + sizeof (vars), &shadow, - sizeof (struct hsa_kernel_runtime *)); + memcpy (shadow->kernarg_address + sizeof (vars), &shadow, + sizeof (struct hsa_kernel_runtime *)); - if (debug) - fprintf (stderr, "Copying kernel runtime pointer to kernarg_address\n"); - } + if (debug) + fprintf (stderr, "Copying kernel runtime pointer to kernarg_address\n"); uint16_t header; header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; -- 2.5.1