This patch restricts non-kernel functions to using a maximum of 64 SGPRs and 24 VGPRs.

Kernels can request various pieces of information from the HSA runtime, and these will be loaded into the registers consecutively before the kernel executes. These registers are normally fixed. Since non-kernel functions cannot make these requests, they have to assume that the default set of information has been requested. If a non-leaf kernel requests information not in the defaults, a warning is now emitted as pieces of info needed by callees may have shifted locations. A leaf kernel can do whatever it wants.

I have setup FIXED_REGISTERS for the default case now - if a different set of startup info is requested (which should be rare), then the set of fixed registers will be adjusted accordingly by gcn_conditional_register_usage. Compared to before, v0, s2 and s3 are now unfixed (due to the newlib patch 'Stash reent marker in upper bits of s1 on AMD GCN' and the first patch in this series).

Okay to commit?

Kwok


2019-11-14  Kwok Cheung Yeung  <k...@codesourcery.com>

        gcc/
        * config/gcn/gcn.c (default_requested_args): New.
        (gcn_parse_amdgpu_hsa_kernel_attribute): Initialize requested args
        set with default_requested_args.
        (gcn_conditional_register_usage): Limit register usage of non-kernel
        functions.  Reassign fixed registers if a non-standard set of args is
        requested.
        * config/gcn/gcn.h (FIXED_REGISTERS): Fix registers according to ABI.
---
gcc/config/gcn/gcn.c | 63 ++++++++++++++++++++++++++++++----------------------
 gcc/config/gcn/gcn.h |  6 ++---
 2 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 09dfabb..8a2f7d7 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -191,6 +191,17 @@ static const struct gcn_kernel_arg_type
   {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
 };

+static const long default_requested_args
+       = (1 << PRIVATE_SEGMENT_BUFFER_ARG)
+         | (1 << DISPATCH_PTR_ARG)
+         | (1 << QUEUE_PTR_ARG)
+         | (1 << KERNARG_SEGMENT_PTR_ARG)
+         | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)
+         | (1 << WORKGROUP_ID_X_ARG)
+         | (1 << WORK_ITEM_ID_X_ARG)
+         | (1 << WORK_ITEM_ID_Y_ARG)
+         | (1 << WORK_ITEM_ID_Z_ARG);
+
 /* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
    This function also sets the default values for some arguments.

@@ -201,10 +212,7 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
                                       tree list)
 {
   bool err = false;
-  args->requested = ((1 << PRIVATE_SEGMENT_BUFFER_ARG)
-                    | (1 << QUEUE_PTR_ARG)
-                    | (1 << KERNARG_SEGMENT_PTR_ARG)
-                    | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG));
+  args->requested = default_requested_args;
   args->nargs = 0;

   for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
@@ -242,8 +250,6 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
       args->requested |= (1 << a);
       args->order[args->nargs++] = a;
     }
-  args->requested |= (1 << WORKGROUP_ID_X_ARG);
-  args->requested |= (1 << WORK_ITEM_ID_Z_ARG);

/* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
      WORK_ITEM_ID_Y_ARG.  Similarly, requesting WORK_ITEM_ID_Y_ARG implies
@@ -253,10 +259,6 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
   if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
     args->requested |= (1 << WORK_ITEM_ID_X_ARG);

-  /* Always enable this so that kernargs is in a predictable place for
-     gomp_print, etc.  */
-  args->requested |= (1 << DISPATCH_PTR_ARG);
-
   int sgpr_regno = FIRST_SGPR_REG;
   args->nsgprs = 0;
   for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
@@ -2041,27 +2043,34 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
 static void
 gcn_conditional_register_usage (void)
 {
-  int i;
+  if (!cfun || !cfun->machine)
+    return;

-  /* FIXME: Do we need to reset fixed_regs?  */
+  if (cfun->machine->normal_function)
+    {
+ /* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */
+      for (int i = SGPR_REGNO (62); i <= LAST_SGPR_REG; i++)
+       fixed_regs[i] = 1, call_used_regs[i] = 1;

-/* Limit ourselves to 1/16 the register file for maximimum sized workgroups.
-   There are enough SGPRs not to limit those.
-   TODO: Adjust this more dynamically.  */
-  for (i = FIRST_VGPR_REG + 64; i <= LAST_VGPR_REG; i++)
-    fixed_regs[i] = 1, call_used_regs[i] = 1;
+      for (int i = VGPR_REGNO (24); i <= LAST_VGPR_REG; i++)
+       fixed_regs[i] = 1, call_used_regs[i] = 1;

-  if (!cfun || !cfun->machine || cfun->machine->normal_function)
-    {
-      /* Normal functions can't know what kernel argument registers are
-         live, so just fix the bottom 16 SGPRs, and bottom 3 VGPRs.  */
-      for (i = 0; i < 16; i++)
-       fixed_regs[FIRST_SGPR_REG + i] = 1;
-      for (i = 0; i < 3; i++)
-       fixed_regs[FIRST_VGPR_REG + i] = 1;
       return;
     }

+  /* If the set of requested args is the default set, nothing more needs to
+     be done.  */
+  if (cfun->machine->args.requested == default_requested_args)
+    return;
+
+ /* Requesting a set of args different from the default violates the ABI. */
+  if (!leaf_function_p ())
+    warning (0, "A non-default set of initial values has been requested, "
+               "which violates the ABI!");
+
+  for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++)
+    fixed_regs[i] = 0;
+
   /* Fix the runtime argument register containing values that may be
      needed later.  DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
      needed after the prologue so there's no need to fix them.  */
@@ -2069,10 +2078,10 @@ gcn_conditional_register_usage (void)

fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
   if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
     {
+      /* The upper 32-bits of the 64-bit descriptor are not used, so allow
+       the containing registers to be used for other purposes.  */
       fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1; - fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 2] = 1; - fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 3] = 1;
     }
   if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
     {
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index b3b2d1a..dd3789b 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -160,9 +160,9 @@
 
 #define FIXED_REGISTERS {                          \
     /* Scalars.  */                                \
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,                  \
+    1, 1, 0, 0, 1, 1, 0, 0, 1, 1,                  \
 /*             fp    sp    lr.  */                 \
-    0, 0, 0, 0, 1, 1, 1, 1, 0, 0,                  \
+    1, 1, 0, 0, 1, 1, 1, 1, 0, 0,                  \
 /*  exec_save, cc_save */                          \
     1, 1, 1, 1, 0, 0, 0, 0, 0, 0,                  \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,                  \
@@ -180,7 +180,7 @@
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1,                  \
     /* VGRPs */                                            \
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
--
2.8.1

Reply via email to