Hello.

Following patch fixes up HSA kernel from kernel dispatching mechanism,
where we forgot to wait in a loop for the dispatched child kernel. Apart from 
that,
there's a small follow-up which changes naming scheme for HSA modules.

Martin
>From eca686b6495bf7faa32ecb292f94558c6bfdbdce Mon Sep 17 00:00:00 2001
From: marxin <mli...@suse.cz>
Date: Tue, 20 Oct 2015 11:32:58 +0200
Subject: [PATCH 1/2] HSA: fix kernel-from-kernel dispatch mechanism

libgomp/ChangeLog:

2015-10-20  Martin Liska  <mli...@suse.cz>

	* plugin/plugin-hsa.c (struct agent_info): Add new
	kernel_dispatch_command_q member.
	(GOMP_OFFLOAD_init_device): Initialize the queue.
	(init_single_kernel): Verify that kernel dispatching
	is not run with a big depth.
	(create_kernel_dispatch): Rename from
	create_kernel_dispatch_recursive.
	(GOMP_OFFLOAD_run): Add signal waiting workaround.
	(GOMP_OFFLOAD_fini_device): Destroy the newly added queue.

gcc/ChangeLog:

2015-10-20  Martin Liska  <mli...@suse.cz>

	* hsa-gen.c (gen_hsa_insns_for_kernel_call): Add waiting
	for the signal in a loop.
	(gen_body_from_gimple): Skip HSA basic blocks with already
	created HBB.
---
 gcc/hsa-gen.c               | 38 ++++++++++++++++++++++++------
 libgomp/plugin/plugin-hsa.c | 56 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 78 insertions(+), 16 deletions(-)

diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index ab27eb1..c36c9e0 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -3943,22 +3943,41 @@ gen_hsa_insns_for_kernel_call (hsa_bb *hbb, gcall *call)
   signal->m_memoryscope = BRIG_MEMORY_SCOPE_SYSTEM;
   hbb->append_insn (signal);
 
+  /* Prepare CFG for waiting loop.  */
+  edge e = split_block (hbb->m_bb, call);
+
+  basic_block dest = split_edge (e);
+  edge false_e = EDGE_SUCC (dest, 0);
+
+  false_e->flags &= ~EDGE_FALLTHRU;
+  false_e->flags |= EDGE_FALSE_VALUE;
+
+  make_edge (e->dest, dest, EDGE_TRUE_VALUE);
+
   /* Emit blocking signal waiting instruction.  */
+  hsa_bb *new_hbb = hsa_init_new_bb (dest);
+
   hbb->append_insn (new hsa_insn_comment ("wait for the signal"));
 
-  hsa_op_reg *signal_result_reg = new hsa_op_reg (BRIG_TYPE_U64);
+  hsa_op_reg *signal_result_reg = new hsa_op_reg (BRIG_TYPE_S64);
   c = new hsa_op_immed (1, BRIG_TYPE_S64);
-  hsa_op_immed *c2 = new hsa_op_immed (UINT64_MAX, BRIG_TYPE_U64);
 
-  signal = new hsa_insn_signal (4, BRIG_OPCODE_SIGNAL,
-				BRIG_ATOMIC_WAITTIMEOUT_LT, BRIG_TYPE_S64);
+  signal = new hsa_insn_signal (3, BRIG_OPCODE_SIGNAL,
+				BRIG_ATOMIC_WAIT_LT, BRIG_TYPE_S64);
   signal->m_memoryorder = BRIG_MEMORY_ORDER_SC_ACQUIRE;
   signal->m_memoryscope = BRIG_MEMORY_SCOPE_SYSTEM;
   signal->set_op (0, signal_result_reg);
   signal->set_op (1, signal_reg);
   signal->set_op (2, c);
-  signal->set_op (3, c2);
-  hbb->append_insn (signal);
+  new_hbb->append_insn (signal);
+
+  hsa_op_reg *ctrl = new hsa_op_reg (BRIG_TYPE_B1);
+  hsa_insn_cmp *cmp = new hsa_insn_cmp
+    (BRIG_COMPARE_EQ, ctrl->m_type, ctrl, signal_result_reg,
+     new hsa_op_immed (1, signal_result_reg->m_type));
+
+  new_hbb->append_insn (cmp);
+  new_hbb->append_insn (new hsa_insn_br (ctrl));
 
   hsa_cfun->m_kernel_dispatch_count++;
 }
@@ -4763,7 +4782,11 @@ gen_body_from_gimple (vec <hsa_op_reg_p> *ssa_map)
   FOR_EACH_BB_FN (bb, cfun)
     {
       gimple_stmt_iterator gsi;
-      hsa_bb *hbb = hsa_init_new_bb (bb);
+      hsa_bb *hbb = hsa_bb_for_bb (bb);
+      if (hbb)
+	continue;
+
+      hbb = hsa_init_new_bb (bb);
 
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 	{
@@ -4777,6 +4800,7 @@ gen_body_from_gimple (vec <hsa_op_reg_p> *ssa_map)
     {
       gimple_stmt_iterator gsi;
       hsa_bb *hbb = hsa_bb_for_bb (bb);
+      gcc_assert (hbb != NULL);
 
       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 	if (!virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index ed3573f..45dfe9b 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -220,6 +220,8 @@ struct agent_info
   hsa_isa_t isa;
   /* Command queue of the agent.  */
   hsa_queue_t* command_q;
+  /* Kernel from kernel dispatch command queue.  */
+  hsa_queue_t* kernel_dispatch_command_q;
   /* The HSA memory region from which to allocate kernel arguments.  */
   hsa_region_t kernarg_region;
 
@@ -447,6 +449,12 @@ GOMP_OFFLOAD_init_device (int n)
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Error creating command queue", status);
 
+  status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
+			     queue_callback, NULL, UINT32_MAX, UINT32_MAX,
+			     &agent->kernel_dispatch_command_q);
+  if (status != HSA_STATUS_SUCCESS)
+    hsa_fatal ("Error creating kernel dispatch command queue", status);
+
   agent->kernarg_region.handle = (uint64_t) -1;
   status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region,
 				      &agent->kernarg_region);
@@ -455,6 +463,8 @@ GOMP_OFFLOAD_init_device (int n)
 		       "arguments");
   HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
 	     (long long unsigned) agent->command_q->id);
+  HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
+	     (long long unsigned) agent->kernel_dispatch_command_q->id);
   agent->initialized = true;
 }
 
@@ -739,7 +749,8 @@ final:
 /* Create kernel dispatch data structure for given KERNEL.  */
 
 static struct hsa_kernel_dispatch *
-create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
+create_single_kernel_dispatch (struct kernel_info *kernel,
+			       unsigned omp_data_size)
 {
   struct agent_info *agent = kernel->agent;
   struct hsa_kernel_dispatch *shadow = GOMP_PLUGIN_malloc_cleared
@@ -861,6 +872,12 @@ init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
 	  goto failure;
 	}
 
+      if (dependency->dependencies_count > 0)
+	{
+	  HSA_DEBUG ("HSA does not allow kernel dispatching code with "
+		     "a depth bigger than one\n")
+	  goto failure;
+	}
 
       init_single_kernel (dependency, max_omp_data_size);
     }
@@ -919,20 +936,23 @@ print_kernel_dispatch (struct hsa_kernel_dispatch *dispatch, unsigned indent)
    dependencies.  */
 
 static struct hsa_kernel_dispatch *
-create_kernel_dispatch_recursive (struct kernel_info *kernel,
-				  unsigned omp_data_size)
+create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
 {
-  struct hsa_kernel_dispatch *shadow = create_kernel_dispatch (kernel,
-							       omp_data_size);
+  struct hsa_kernel_dispatch *shadow = create_single_kernel_dispatch
+    (kernel, omp_data_size);
   shadow->omp_num_threads = 64;
   shadow->debug = 0;
 
+  /* Create kernel dispatch data structures.  We do not allow to have
+     a kernel dispatch with depth bigger than one.  */
   for (unsigned i = 0; i < kernel->dependencies_count; i++)
     {
       struct kernel_info *dependency = get_kernel_for_agent
 	(kernel->agent, kernel->dependencies[i]);
-      shadow->children_dispatches[i] = create_kernel_dispatch_recursive
+      shadow->children_dispatches[i] = create_single_kernel_dispatch
 	(dependency, omp_data_size);
+      shadow->children_dispatches[i]->queue =
+	kernel->agent->kernel_dispatch_command_q;
     }
 
   return shadow;
@@ -1069,7 +1089,7 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, const void* kern_launch)
   if (!kernel->initialized)
     GOMP_PLUGIN_fatal ("Called kernel must be initialized");
 
-  struct hsa_kernel_dispatch *shadow = create_kernel_dispatch_recursive
+  struct hsa_kernel_dispatch *shadow = create_kernel_dispatch
     (kernel, kernel->max_omp_data_size);
 
   if (debug)
@@ -1129,9 +1149,24 @@ GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, const void* kern_launch)
   __atomic_store_n ((uint16_t*)(&packet->header), header, __ATOMIC_RELEASE);
   hsa_signal_store_release (agent->command_q->doorbell_signal, index);
 
+  /* TODO: fixup, following workaround is necessary to run kernel from
+     kernel dispatch mechanism on a Carrizo machine.  */
+
+  for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
+    {
+      hsa_signal_t child_s;
+      child_s.handle = shadow->children_dispatches[i]->signal;
+
+      HSA_DEBUG ("Waiting for children completion signal: %lu\n",
+		 shadow->children_dispatches[i]->signal);
+      while (hsa_signal_wait_acquire
+	     (child_s, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX,
+	      HSA_WAIT_STATE_BLOCKED) != 0);
+    }
+
   HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
-  hsa_signal_wait_acquire (s, HSA_SIGNAL_CONDITION_LT, 1,
-			   UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
+  while (hsa_signal_wait_acquire (s, HSA_SIGNAL_CONDITION_LT, 1,
+				  UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0);
 
   release_kernel_dispatch (shadow);
 
@@ -1214,6 +1249,9 @@ GOMP_OFFLOAD_fini_device (int n)
   hsa_status_t status = hsa_queue_destroy (agent->command_q);
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Error destroying command queue", status);
+  status = hsa_queue_destroy (agent->kernel_dispatch_command_q);
+  if (status != HSA_STATUS_SUCCESS)
+    hsa_fatal ("Error destroying kernel dispatch command queue", status);
   if (pthread_mutex_destroy (&agent->prog_mutex))
     GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent program mutex");
   if (pthread_rwlock_destroy (&agent->modules_rwlock))
-- 
2.6.0

>From f11e79721e71ca7682bff136e300594001d70b8e Mon Sep 17 00:00:00 2001
From: marxin <mli...@suse.cz>
Date: Tue, 20 Oct 2015 13:41:23 +0200
Subject: [PATCH 2/2] HSA: prepend unique prefix for names of HSA modules.

gcc/ChangeLog:

2015-10-20  Martin Liska  <mli...@suse.cz>

	* hsa-brig.c (brig_init): Prepand '__hsa_module_' prefix
	to all names of HSA modules.
---
 gcc/hsa-brig.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index c4ec007..be7ef59 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -427,7 +427,7 @@ brig_init (void)
 	part = main_input_filename;
       else
 	part++;
-      asprintf (&modname, "&%s", part);
+      asprintf (&modname, "&__hsa_module_%s", part);
       char* extension = strchr (modname, '.');
       if (extension)
 	*extension = '\0';
@@ -451,7 +451,7 @@ brig_init (void)
       free (modname);
     }
   else
-    moddir.name = brig_emit_string ("unnamed_brig_module", '&');
+    moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
   moddir.base.kind = htole16 (BRIG_KIND_DIRECTIVE_MODULE);
   moddir.hsailMajor = htole32 (BRIG_VERSION_HSAIL_MAJOR) ;
   moddir.hsailMinor = htole32 (BRIG_VERSION_HSAIL_MINOR);
-- 
2.6.0

Reply via email to