Hi Andrew,
Thanks for the review! I have attached a revised patch containing the changes 
that you suggested.

On 20.01.20 11:00, Andrew Stubbs wrote:

> On 20/01/2020 06:57, Harwath, Frederik wrote:
>> Is it ok to commit this patch to the master branch?
> 
> I can't see anything significantly wrong with the code of the patch, however 
> I have some minor issues I'd like fixed in the text.
> 
> [...] Please move the functions down into the "Utility functions" group. The 
> const static variables should probably go with them.

Done.

>> @@ -3294,7 +3415,11 @@ GOMP_OFFLOAD_init_device (int n)
>>                        &buf);
>>    if (status != HSA_STATUS_SUCCESS)
>>      return hsa_error ("Error querying the name of the agent", status);
>> -  agent->gfx900_p = (strncmp (buf, "gfx900", 6) == 0);
>> +  agent->gfx900_p = (strncmp (buf, gcn_gfx900_s, gcn_isa_name_len) == 0);
>> +
>> +  agent->device_isa = isa_code (buf);
>> +  if (agent->device_isa < 0)
>> +    return hsa_error ("Unknown GCN agent architecture.", HSA_STATUS_ERROR);
> 
> Can device_isa not just replace gfx900_p? I think it's only tested in one 
> place, and that would be easily substituted.
> 

Yes, I have changed that one place to use agent->device_isa.

I would commit the patch then if nobody objects :-). The other approaches (fat 
binaries etc.) that have been discussed in
this thread seem to be long-term projects and until something like this gets 
implemented the early error checking
implemented by this patch seems to be better than nothing.

Frederik
From 470892454bf0d67ea71c2399f5819713592e46a0 Mon Sep 17 00:00:00 2001
From: Frederik Harwath <frede...@codesourcery.com>
Date: Mon, 20 Jan 2020 07:45:43 +0100
Subject: [PATCH] Add runtime ISA check for amdgcn offloading

When executing code that uses amdgcn GPU offloading, the ISA of the GPU must
match the ISA for which the code has been compiled.  So far, the libgomp amdgcn
plugin did not attempt to verify this.  In case of a mismatch, the user is
confronted with an unhelpful error message produced by the HSA runtime.

This commit implements a runtime ISA check. In the case of a ISA mismatch, the
execution is aborted with a clear error message and a hint at the correct
compilation parameters for the GPU on which the execution has been attempted.

libgomp/
	* plugin/plugin-gcn.c (EF_AMDGPU_MACH): New enum.
	* (EF_AMDGPU_MACH_MASK): New constant.
	* (gcn_isa): New typedef.
	* (gcn_gfx801_s): New constant.
	* (gcn_gfx803_s): New constant.
	* (gcn_gfx900_s): New constant.
	* (gcn_gfx906_s): New constant.
	* (gcn_isa_name_len): New constant.
	* (elf_gcn_isa_field): New function.
	* (isa_hsa_name): New function.
	* (isa_gcc_name): New function.
	* (isa_code): New function.
	* (struct agent_info): Add field "device_isa" and remove field
	"gfx900_p".
	* (GOMP_OFFLOAD_init_device): Adapt agent init to "agent_info"
	field changes, fail if device has unknown ISA.
	* (parse_target_attributes): Replace "gfx900_p" by "device_isa".
	* (isa_matches_agent): New function ...
	* (create_and_finalize_hsa_program): ... used from here to check
	that the GPU ISA and the code-object ISA match.
---
 libgomp/plugin/plugin-gcn.c | 131 ++++++++++++++++++++++++++++++++++--
 1 file changed, 127 insertions(+), 4 deletions(-)

diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 16ce251f3a5..de470a3dd33 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -396,6 +396,20 @@ struct gcn_image_desc
   struct global_var_info *global_variables;
 };
 
+/* This enum mirrors the corresponding LLVM enum's values for all ISAs that we
+   support.
+   See https://llvm.org/docs/AMDGPUUsage.html#amdgpu-ef-amdgpu-mach-table */
+
+typedef enum {
+  EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028,
+  EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
+  EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
+  EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
+} EF_AMDGPU_MACH;
+
+const static int EF_AMDGPU_MACH_MASK = 0x000000ff;
+typedef EF_AMDGPU_MACH gcn_isa;
+
 /* Description of an HSA GPU agent (device) and the program associated with
    it.  */
 
@@ -408,8 +422,9 @@ struct agent_info
   /* Whether the agent has been initialized.  The fields below are usable only
      if it has been.  */
   bool initialized;
-  /* Precomputed check for problem architectures.  */
-  bool gfx900_p;
+
+  /* The instruction set architecture of the device. */
+  gcn_isa device_isa;
 
   /* Command queues of the agent.  */
   hsa_queue_t *sync_queue;
@@ -1232,7 +1247,8 @@ parse_target_attributes (void **input,
 
   if (gcn_dims_found)
     {
-      if (agent->gfx900_p && gcn_threads == 0 && override_z_dim == 0)
+      if (agent->device_isa == EF_AMDGPU_MACH_AMDGCN_GFX900
+	  && gcn_threads == 0 && override_z_dim == 0)
 	{
 	  gcn_threads = 4;
 	  GCN_WARNING ("VEGA BUG WORKAROUND: reducing default number of "
@@ -1578,6 +1594,74 @@ get_data_memory_region (hsa_region_t region, void *data)
 			    HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED);
 }
 
+static int
+elf_gcn_isa_field (Elf64_Ehdr *image)
+{
+  return image->e_flags & EF_AMDGPU_MACH_MASK;
+}
+
+const static char *gcn_gfx801_s = "gfx801";
+const static char *gcn_gfx803_s = "gfx803";
+const static char *gcn_gfx900_s = "gfx900";
+const static char *gcn_gfx906_s = "gfx906";
+const static int gcn_isa_name_len = 6;
+
+/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
+   support the ISA. */
+
+static const char*
+isa_hsa_name (int isa) {
+  switch(isa)
+    {
+    case EF_AMDGPU_MACH_AMDGCN_GFX801:
+      return gcn_gfx801_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX803:
+      return gcn_gfx803_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX900:
+      return gcn_gfx900_s;
+    case EF_AMDGPU_MACH_AMDGCN_GFX906:
+      return gcn_gfx906_s;
+    }
+  return NULL;
+}
+
+/* Returns the user-facing name that GCC uses to identify the architecture (e.g.
+   with -march) or NULL if we do not support the ISA.
+   Keep in sync with /gcc/config/gcn/gcn.{c,opt}.  */
+
+static const char*
+isa_gcc_name (int isa) {
+  switch(isa)
+    {
+    case EF_AMDGPU_MACH_AMDGCN_GFX801:
+      return "carrizo";
+    case EF_AMDGPU_MACH_AMDGCN_GFX803:
+      return "fiji";
+    default:
+      return isa_hsa_name (isa);
+    }
+}
+
+/* Returns the code which is used in the GCN object code to identify the ISA with
+   the given name (as used by the HSA runtime).  */
+
+static gcn_isa
+isa_code(const char *isa) {
+  if (!strncmp (isa, gcn_gfx801_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX801;
+
+  if (!strncmp (isa, gcn_gfx803_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX803;
+
+  if (!strncmp (isa, gcn_gfx900_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX900;
+
+  if (!strncmp (isa, gcn_gfx906_s, gcn_isa_name_len))
+    return EF_AMDGPU_MACH_AMDGCN_GFX906;
+
+  return -1;
+}
+
 /* }}}  */
 /* {{{ Run  */
 
@@ -2257,6 +2341,39 @@ find_load_offset (Elf64_Addr *load_offset, struct agent_info *agent,
   return res;
 }
 
+/* Check that the GCN ISA of the given image matches the ISA of the agent. */
+
+static bool
+isa_matches_agent (struct agent_info *agent, Elf64_Ehdr *image)
+{
+  int isa_field = elf_gcn_isa_field (image);
+  const char* isa_s = isa_hsa_name (isa_field);
+  if (!isa_s)
+    {
+      hsa_error ("Unsupported ISA in GCN code object.", HSA_STATUS_ERROR);
+      return false;
+    }
+
+  if (isa_field != agent->device_isa)
+    {
+      char msg[120];
+      const char *agent_isa_s = isa_hsa_name (agent->device_isa);
+      const char *agent_isa_gcc_s = isa_gcc_name (agent->device_isa);
+      assert (agent_isa_s);
+      assert (agent_isa_gcc_s);
+
+      snprintf (msg, sizeof msg,
+		"GCN code object ISA '%s' does not match GPU ISA '%s'.\n"
+		"Try to recompile with '-foffload=-march=%s'.\n",
+		isa_s, agent_isa_s, agent_isa_gcc_s);
+
+      hsa_error (msg, HSA_STATUS_ERROR);
+      return false;
+    }
+
+  return true;
+}
+
 /* Create and finalize the program consisting of all loaded modules.  */
 
 static bool
@@ -2289,6 +2406,9 @@ create_and_finalize_hsa_program (struct agent_info *agent)
     {
       Elf64_Ehdr *image = (Elf64_Ehdr *)module->image_desc->gcn_image->image;
 
+      if (!isa_matches_agent (agent, image))
+	goto fail;
+
       /* Hide relocations from the HSA runtime loader.
 	 Keep a copy of the unmodified section headers to use later.  */
       Elf64_Shdr *image_sections = (Elf64_Shdr *)((char *)image
@@ -3294,7 +3414,10 @@ GOMP_OFFLOAD_init_device (int n)
 					  &buf);
   if (status != HSA_STATUS_SUCCESS)
     return hsa_error ("Error querying the name of the agent", status);
-  agent->gfx900_p = (strncmp (buf, "gfx900", 6) == 0);
+
+  agent->device_isa = isa_code (buf);
+  if (agent->device_isa < 0)
+    return hsa_error ("Unknown GCN agent architecture.", HSA_STATUS_ERROR);
 
   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
 					HSA_QUEUE_TYPE_MULTI,
-- 
2.17.1

Reply via email to