Hi,
this patch introduces a new target hook that allows the target's
INITIAL_ELIMINATION_OFFSET function to use cached values instead of
re-computing the frame layout every time.
I have updated the documentation a bit and hope it is clearer this time.
It still needs a review by ARM port maintainers.
If the ARM port maintainers find this patch useful, that would be fine.
Thanks
Bernd.
On 06/21/16 23:29, Jeff Law wrote:
> On 06/16/2016 08:47 AM, Bernd Edlinger wrote:
>> Hi!
>>
>>
>> By the design of the target hook INITIAL_ELIMINATION_OFFSET
>> it is necessary to call this function several times with
>> different register combinations.
>> Most targets use a cached data structure that describes the
>> exact frame layout of the current function.
>>
>> It is safe to skip the computation when reload_completed = true,
>> and most targets do that already.
>>
>> However while reload is doing its work, it is not clear when to
>> do the computation and when not. This results in unnecessary
>> work. Computing the frame layout can be a simple function or an
>> arbitrarily complex one, that walks all instructions of the current
>> function for instance, which is more or less the common case.
>>
>>
>> This patch adds a new optional target hook that can be used
>> by the target to factor the INITIAL_ELIMINATION_OFFSET-hook
>> into a O(n) computation part, and a O(1) result function.
>>
>> The patch implements a compute_frame_layout target hook just
>> for ARM in the moment, to show the principle.
>> Other targets may also implement that hook, if it seems appropriate.
>>
>>
>> Boot-strapped and reg-tested on arm-linux-gnueabihf.
>> OK for trunk?
>>
>>
>> Thanks
>> Bernd.
>>
>>
>> changelog-frame-layout.txt
>>
>>
>> 2016-06-16 Bernd Edlinger <[email protected]>
>>
>> * target.def (compute_frame_layout): New optional target hook.
>> * doc/tm.texi.in (TARGET_COMPUTE_FRAME_LAYOUT): Add hook.
>> * doc/tm.texi (TARGET_COMPUTE_FRAME_LAYOUT): Add documentation.
>> * lra-eliminations.c (update_reg_eliminate): Call
>> compute_frame_layout
>> target hook.
>> * reload1.c (verify_initial_elim_offsets): Likewise.
>> * config/arm/arm.c (TARGET_COMPUTE_FRAME_LAYOUT): Define.
>> (use_simple_return_p): Call arm_compute_frame_layout if needed.
>> (arm_get_frame_offsets): Split up into this ...
>> (arm_compute_frame_layout): ... and this function.
> The ARM maintainers would need to chime in on the ARM specific changes
> though.
>
>
>
>> Index: gcc/target.def
>> ===================================================================
>> --- gcc/target.def (Revision 233176)
>> +++ gcc/target.def (Arbeitskopie)
>> @@ -5245,8 +5245,19 @@ five otherwise. This is best for most machines.",
>> unsigned int, (void),
>> default_case_values_threshold)
>>
>> -/* Retutn true if a function must have and use a frame pointer. */
> s/Retutn/Return
>
>> +/* Optional callback to advise the target to compute the frame
>> layout. */
>> DEFHOOK
>> +(compute_frame_layout,
>> + "This target hook is called immediately before reload wants to call\n\
>> +@code{INITIAL_ELIMINATION_OFFSET} and allows the target to cache the
>> frame\n\
>> +layout instead of re-computing it on every invocation. This is
>> particularly\n\
>> +useful for targets that have an O(n) frame layout function.
>> Implementing\n\
>> +this callback is optional.",
>> + void, (void),
>> + hook_void_void)
> So the docs say "immediately before", but that's not actually reality in
> lra-eliminations. I think you can just say "This target hook is called
> before reload or lra-eliminations calls
> @code{INITIAL_ELIMINATION_OFFSET} and allows ..."
>
>
> How does this macro interact with INITIAL_FRAME_POINTER_OFFSET?
>
> I'm OK with this conceptually. I think you need a minor doc update and
> OK from the ARM maintainers before it can be installed though.
>
> jeff
2016-06-16 Bernd Edlinger <[email protected]>
* target.def (compute_frame_layout): New optional target hook.
* doc/tm.texi.in (TARGET_COMPUTE_FRAME_LAYOUT): Add hook.
* doc/tm.texi (TARGET_COMPUTE_FRAME_LAYOUT): Add documentation.
* lra-eliminations.c (update_reg_eliminate): Call compute_frame_layout
target hook.
* reload1.c (verify_initial_elim_offsets): Likewise.
* config/arm/arm.c (TARGET_COMPUTE_FRAME_LAYOUT): Define.
(use_simple_return_p): Call arm_compute_frame_layout if needed.
(arm_get_frame_offsets): Split up into this ...
(arm_compute_frame_layout): ... and this function.
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c (revision 239144)
+++ gcc/config/arm/arm.c (working copy)
@@ -81,6 +81,7 @@ static bool arm_const_not_ok_for_debug_p (rtx);
static bool arm_needs_doubleword_align (machine_mode, const_tree);
static int arm_compute_static_chain_stack_bytes (void);
static arm_stack_offsets *arm_get_frame_offsets (void);
+static void arm_compute_frame_layout (void);
static void arm_add_gc_roots (void);
static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
unsigned HOST_WIDE_INT, rtx, rtx, int, int);
@@ -663,6 +664,9 @@ static const struct attribute_spec arm_attribute_t
#undef TARGET_SCALAR_MODE_SUPPORTED_P
#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
+#undef TARGET_COMPUTE_FRAME_LAYOUT
+#define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
+
#undef TARGET_FRAME_POINTER_REQUIRED
#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
@@ -3880,6 +3884,10 @@ use_simple_return_p (void)
{
arm_stack_offsets *offsets;
+ /* Note this function can be called before or after reload. */
+ if (!reload_completed)
+ arm_compute_frame_layout ();
+
offsets = arm_get_frame_offsets ();
return offsets->outgoing_args != 0;
}
@@ -19370,7 +19378,7 @@ arm_compute_static_chain_stack_bytes (void)
/* Compute a bit mask of which registers need to be
saved on the stack for the current function.
- This is used by arm_get_frame_offsets, which may add extra registers. */
+ This is used by arm_compute_frame_layout, which may add extra registers. */
static unsigned long
arm_compute_save_reg_mask (void)
@@ -20926,12 +20934,25 @@ any_sibcall_could_use_r3 (void)
alignment. */
+/* Return cached stack offsets. */
+
+static arm_stack_offsets *
+arm_get_frame_offsets (void)
+{
+ struct arm_stack_offsets *offsets;
+
+ offsets = &cfun->machine->stack_offsets;
+
+ return offsets;
+}
+
+
/* Calculate stack offsets. These are used to calculate register elimination
offsets and in prologue/epilogue code. Also calculates which registers
should be saved. */
-static arm_stack_offsets *
-arm_get_frame_offsets (void)
+static void
+arm_compute_frame_layout (void)
{
struct arm_stack_offsets *offsets;
unsigned long func_type;
@@ -20943,19 +20964,6 @@ any_sibcall_could_use_r3 (void)
offsets = &cfun->machine->stack_offsets;
- /* We need to know if we are a leaf function. Unfortunately, it
- is possible to be called after start_sequence has been called,
- which causes get_insns to return the insns for the sequence,
- not the function, which will cause leaf_function_p to return
- the incorrect result.
-
- to know about leaf functions once reload has completed, and the
- frame size cannot be changed after that time, so we can safely
- use the cached value. */
-
- if (reload_completed)
- return offsets;
-
/* Initially this is the size of the local variables. It will translated
into an offset once we have determined the size of preceding data. */
frame_size = ROUND_UP_WORD (get_frame_size ());
@@ -21022,7 +21030,7 @@ any_sibcall_could_use_r3 (void)
{
offsets->outgoing_args = offsets->soft_frame;
offsets->locals_base = offsets->soft_frame;
- return offsets;
+ return;
}
/* Ensure SFP has the correct alignment. */
@@ -21098,8 +21106,6 @@ any_sibcall_could_use_r3 (void)
offsets->outgoing_args += 4;
gcc_assert (!(offsets->outgoing_args & 7));
}
-
- return offsets;
}
Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi (revision 239144)
+++ gcc/doc/tm.texi (working copy)
@@ -3693,6 +3693,14 @@ registers. This macro must be defined if @code{EL
defined.
@end defmac
+@deftypefn {Target Hook} void TARGET_COMPUTE_FRAME_LAYOUT (void)
+This target hook allows the target to compute the frame layout once and
+make use of the cached frame layout in @code{INITIAL_ELIMINATION_OFFSET}
+instead of re-computing it on every invocation. This is particularly
+useful for targets that have an expensive frame layout function.
+Implementing this callback is optional.
+@end deftypefn
+
@node Stack Arguments
@subsection Passing Function Arguments on the Stack
@cindex arguments on stack
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in (revision 239144)
+++ gcc/doc/tm.texi.in (working copy)
@@ -3227,6 +3227,8 @@ registers. This macro must be defined if @code{EL
defined.
@end defmac
+@hook TARGET_COMPUTE_FRAME_LAYOUT
+
@node Stack Arguments
@subsection Passing Function Arguments on the Stack
@cindex arguments on stack
Index: gcc/lra-eliminations.c
===================================================================
--- gcc/lra-eliminations.c (revision 239144)
+++ gcc/lra-eliminations.c (working copy)
@@ -1203,6 +1203,10 @@ update_reg_eliminate (bitmap insns_with_changed_of
struct lra_elim_table *ep, *ep1;
HARD_REG_SET temp_hard_reg_set;
+#ifdef ELIMINABLE_REGS
+ targetm.compute_frame_layout ();
+#endif
+
/* Clear self elimination offsets. */
for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
self_elim_offsets[ep->from] = 0;
Index: gcc/reload1.c
===================================================================
--- gcc/reload1.c (revision 239144)
+++ gcc/reload1.c (working copy)
@@ -3831,6 +3831,7 @@ verify_initial_elim_offsets (void)
{
struct elim_table *ep;
+ targetm.compute_frame_layout ();
for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
{
INITIAL_ELIMINATION_OFFSET (ep->from, ep->to, t);
@@ -3855,6 +3856,7 @@ set_initial_elim_offsets (void)
struct elim_table *ep = reg_eliminate;
#ifdef ELIMINABLE_REGS
+ targetm.compute_frame_layout ();
for (; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
{
INITIAL_ELIMINATION_OFFSET (ep->from, ep->to, ep->initial_offset);
Index: gcc/target.def
===================================================================
--- gcc/target.def (revision 239144)
+++ gcc/target.def (working copy)
@@ -5269,8 +5269,19 @@ five otherwise. This is best for most machines.",
unsigned int, (void),
default_case_values_threshold)
-/* Retutn true if a function must have and use a frame pointer. */
+/* Optional callback to advise the target to compute the frame layout. */
DEFHOOK
+(compute_frame_layout,
+ "This target hook allows the target to compute the frame layout once and\n\
+make use of the cached frame layout in @code{INITIAL_ELIMINATION_OFFSET}\n\
+instead of re-computing it on every invocation. This is particularly\n\
+useful for targets that have an expensive frame layout function.\n\
+Implementing this callback is optional.",
+ void, (void),
+ hook_void_void)
+
+/* Return true if a function must have and use a frame pointer. */
+DEFHOOK
(frame_pointer_required,
"This target hook should return @code{true} if a function must have and use\n\
a frame pointer. This target hook is called in the reload pass. If its return\n\