This patch restricts non-kernel functions to using a maximum of 64 SGPRs
and 24 VGPRs.
Kernels can request various pieces of information from the HSA runtime,
and these will be loaded into the registers consecutively before the
kernel executes. These registers are normally fixed. Since non-kernel
functions cannot make these requests, they have to assume that the
default set of information has been requested. If a non-leaf kernel
requests information not in the defaults, a warning is now emitted as
pieces of info needed by callees may have shifted locations. A leaf
kernel can do whatever it wants.
I have setup FIXED_REGISTERS for the default case now - if a different
set of startup info is requested (which should be rare), then the set of
fixed registers will be adjusted accordingly by
gcn_conditional_register_usage. Compared to before, v0, s2 and s3 are
now unfixed (due to the newlib patch 'Stash reent marker in upper bits
of s1 on AMD GCN' and the first patch in this series).
Okay to commit?
Kwok
2019-11-14 Kwok Cheung Yeung <k...@codesourcery.com>
gcc/
* config/gcn/gcn.c (default_requested_args): New.
(gcn_parse_amdgpu_hsa_kernel_attribute): Initialize requested args
set with default_requested_args.
(gcn_conditional_register_usage): Limit register usage of non-kernel
functions. Reassign fixed registers if a non-standard set of args is
requested.
* config/gcn/gcn.h (FIXED_REGISTERS): Fix registers according to ABI.
---
gcc/config/gcn/gcn.c | 63
++++++++++++++++++++++++++++++----------------------
gcc/config/gcn/gcn.h | 6 ++---
2 files changed, 39 insertions(+), 30 deletions(-)
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 09dfabb..8a2f7d7 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -191,6 +191,17 @@ static const struct gcn_kernel_arg_type
{"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
};
+static const long default_requested_args
+ = (1 << PRIVATE_SEGMENT_BUFFER_ARG)
+ | (1 << DISPATCH_PTR_ARG)
+ | (1 << QUEUE_PTR_ARG)
+ | (1 << KERNARG_SEGMENT_PTR_ARG)
+ | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)
+ | (1 << WORKGROUP_ID_X_ARG)
+ | (1 << WORK_ITEM_ID_X_ARG)
+ | (1 << WORK_ITEM_ID_Y_ARG)
+ | (1 << WORK_ITEM_ID_Z_ARG);
+
/* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
This function also sets the default values for some arguments.
@@ -201,10 +212,7 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct
gcn_kernel_args *args,
tree list)
{
bool err = false;
- args->requested = ((1 << PRIVATE_SEGMENT_BUFFER_ARG)
- | (1 << QUEUE_PTR_ARG)
- | (1 << KERNARG_SEGMENT_PTR_ARG)
- | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG));
+ args->requested = default_requested_args;
args->nargs = 0;
for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
@@ -242,8 +250,6 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct
gcn_kernel_args *args,
args->requested |= (1 << a);
args->order[args->nargs++] = a;
}
- args->requested |= (1 << WORKGROUP_ID_X_ARG);
- args->requested |= (1 << WORK_ITEM_ID_Z_ARG);
/* Requesting WORK_ITEM_ID_Z_ARG implies requesting
WORK_ITEM_ID_X_ARG and
WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies
@@ -253,10 +259,6 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct
gcn_kernel_args *args,
if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
args->requested |= (1 << WORK_ITEM_ID_X_ARG);
- /* Always enable this so that kernargs is in a predictable place for
- gomp_print, etc. */
- args->requested |= (1 << DISPATCH_PTR_ARG);
-
int sgpr_regno = FIRST_SGPR_REG;
args->nsgprs = 0;
for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
@@ -2041,27 +2043,34 @@ gcn_secondary_reload (bool in_p, rtx x,
reg_class_t rclass,
static void
gcn_conditional_register_usage (void)
{
- int i;
+ if (!cfun || !cfun->machine)
+ return;
- /* FIXME: Do we need to reset fixed_regs? */
+ if (cfun->machine->normal_function)
+ {
+ /* Restrict the set of SGPRs and VGPRs used by non-kernel
functions. */
+ for (int i = SGPR_REGNO (62); i <= LAST_SGPR_REG; i++)
+ fixed_regs[i] = 1, call_used_regs[i] = 1;
-/* Limit ourselves to 1/16 the register file for maximimum sized
workgroups.
- There are enough SGPRs not to limit those.
- TODO: Adjust this more dynamically. */
- for (i = FIRST_VGPR_REG + 64; i <= LAST_VGPR_REG; i++)
- fixed_regs[i] = 1, call_used_regs[i] = 1;
+ for (int i = VGPR_REGNO (24); i <= LAST_VGPR_REG; i++)
+ fixed_regs[i] = 1, call_used_regs[i] = 1;
- if (!cfun || !cfun->machine || cfun->machine->normal_function)
- {
- /* Normal functions can't know what kernel argument registers are
- live, so just fix the bottom 16 SGPRs, and bottom 3 VGPRs. */
- for (i = 0; i < 16; i++)
- fixed_regs[FIRST_SGPR_REG + i] = 1;
- for (i = 0; i < 3; i++)
- fixed_regs[FIRST_VGPR_REG + i] = 1;
return;
}
+ /* If the set of requested args is the default set, nothing more needs to
+ be done. */
+ if (cfun->machine->args.requested == default_requested_args)
+ return;
+
+ /* Requesting a set of args different from the default violates the
ABI. */
+ if (!leaf_function_p ())
+ warning (0, "A non-default set of initial values has been requested, "
+ "which violates the ABI!");
+
+ for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++)
+ fixed_regs[i] = 0;
+
/* Fix the runtime argument register containing values that may be
needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
needed after the prologue so there's no need to fix them. */
@@ -2069,10 +2078,10 @@ gcn_conditional_register_usage (void)
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
{
+ /* The upper 32-bits of the 64-bit descriptor are not used, so allow
+ the containing registers to be used for other purposes. */
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] +
1] = 1;
- fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] +
2] = 1;
- fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] +
3] = 1;
}
if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
{
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index b3b2d1a..dd3789b 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -160,9 +160,9 @@
#define FIXED_REGISTERS { \
/* Scalars. */ \
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, \
/* fp sp lr. */ \
- 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, \
+ 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, \
/* exec_save, cc_save */ \
1, 1, 1, 1, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
@@ -180,7 +180,7 @@
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
/* VGRPs */ \
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
--
2.8.1