> Am 19.08.2024 um 20:56 schrieb Richard Sandiford <richard.sandif...@arm.com>:
> 
> Prathamesh Kulkarni <prathame...@nvidia.com> writes:
>> diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
>> index cbf6041fd68..0420183faf8 100644
>> --- a/gcc/lto-streamer-in.cc
>> +++ b/gcc/lto-streamer-in.cc
>> @@ -44,6 +44,7 @@ along with GCC; see the file COPYING3.  If not see
>> #include "debug.h"
>> #include "alloc-pool.h"
>> #include "toplev.h"
>> +#include "stor-layout.h"
>> 
>> /* Allocator used to hold string slot entries for line map streaming.  */
>> static struct object_allocator<struct string_slot> *string_slot_allocator;
>> @@ -1752,6 +1753,17 @@ lto_read_tree_1 (class lto_input_block *ib, class 
>> data_in *data_in, tree expr)
>>     with -g1, see for example PR113488.  */
>>       else if (DECL_P (expr) && DECL_ABSTRACT_ORIGIN (expr) == expr)
>>    DECL_ABSTRACT_ORIGIN (expr) = NULL_TREE;
>> +
>> +#ifdef ACCEL_COMPILER
>> +      /* For decl with aggregate type, host streams out VOIDmode.
>> +     Compute the correct DECL_MODE by calling relayout_decl.  */
>> +      if ((VAR_P (expr)
>> +       || TREE_CODE (expr) == PARM_DECL
>> +       || TREE_CODE (expr) == FIELD_DECL)
>> +      && AGGREGATE_TYPE_P (TREE_TYPE (expr))
>> +      && DECL_MODE (expr) == VOIDmode)
>> +    relayout_decl (expr);
>> +#endif
> 
> Genuine question, but: is relayout_decl safe in this context?  It does
> a lot more than just reset the mode.  It also applies the target ABI's
> preferences wrt alignment, padding, and so on, rather than preserving
> those of the host's.

It would be better to just recompute the mode here.

Richard 

> Thanks,
> Richard
> 
> 
>>     }
>> }
>> 
>> diff --git a/gcc/stor-layout.cc b/gcc/stor-layout.cc
>> index 10c0809914c..0ff8bd1171e 100644
>> --- a/gcc/stor-layout.cc
>> +++ b/gcc/stor-layout.cc
>> @@ -2396,6 +2396,32 @@ finish_builtin_struct (tree type, const char *name, 
>> tree fields,
>>   layout_decl (TYPE_NAME (type), 0);
>> }
>> 
>> +/* Compute TYPE_MODE for TYPE (which is ARRAY_TYPE).  */
>> +
>> +void compute_array_mode (tree type)
>> +{
>> +  gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
>> +
>> +  SET_TYPE_MODE (type, BLKmode);
>> +  if (TYPE_SIZE (type) != 0
>> +      && ! targetm.member_type_forces_blk (type, VOIDmode)
>> +      /* BLKmode elements force BLKmode aggregate;
>> +     else extract/store fields may lose.  */
>> +      && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
>> +      || TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
>> +    {
>> +      SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
>> +                       TYPE_SIZE (type)));
>> +      if (TYPE_MODE (type) != BLKmode
>> +      && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
>> +      && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
>> +    {
>> +      TYPE_NO_FORCE_BLK (type) = 1;
>> +      SET_TYPE_MODE (type, BLKmode);
>> +    }
>> +    }
>> +}
>> +
>> /* Calculate the mode, size, and alignment for TYPE.
>>    For an array type, calculate the element separation as well.
>>    Record TYPE on the chain of permanent or temporary types
>> @@ -2709,24 +2735,7 @@ layout_type (tree type)
>>    align = MAX (align, BITS_PER_UNIT);
>> #endif
>>    SET_TYPE_ALIGN (type, align);
>> -    SET_TYPE_MODE (type, BLKmode);
>> -    if (TYPE_SIZE (type) != 0
>> -        && ! targetm.member_type_forces_blk (type, VOIDmode)
>> -        /* BLKmode elements force BLKmode aggregate;
>> -           else extract/store fields may lose.  */
>> -        && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
>> -        || TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
>> -      {
>> -        SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
>> -                         TYPE_SIZE (type)));
>> -        if (TYPE_MODE (type) != BLKmode
>> -        && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
>> -        && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
>> -          {
>> -        TYPE_NO_FORCE_BLK (type) = 1;
>> -        SET_TYPE_MODE (type, BLKmode);
>> -          }
>> -      }
>> +    compute_array_mode (type);
>>    if (AGGREGATE_TYPE_P (element))
>>      TYPE_TYPELESS_STORAGE (type) = TYPE_TYPELESS_STORAGE (element);
>>    /* When the element size is constant, check that it is at least as
>> diff --git a/gcc/stor-layout.h b/gcc/stor-layout.h
>> index 096ca811762..9d9b8c385f6 100644
>> --- a/gcc/stor-layout.h
>> +++ b/gcc/stor-layout.h
>> @@ -34,6 +34,7 @@ extern tree rli_size_so_far (record_layout_info);
>> extern void normalize_rli (record_layout_info);
>> extern void place_field (record_layout_info, tree);
>> extern void compute_record_mode (tree);
>> +extern void compute_array_mode (tree);
>> extern void finish_bitfield_layout (tree);
>> extern void finish_record_layout (record_layout_info, int);
>> extern void finalize_size_functions (void);
>> diff --git a/gcc/tree-streamer-in.cc b/gcc/tree-streamer-in.cc
>> index 40029437199..329d218e7d4 100644
>> --- a/gcc/tree-streamer-in.cc
>> +++ b/gcc/tree-streamer-in.cc
>> @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
>> #include "attribs.h"
>> #include "asan.h"
>> #include "opts.h"
>> +#include "stor-layout.h"
>> 
>> 
>> /* Read a STRING_CST from the string table in DATA_IN using input
>> @@ -395,6 +396,17 @@ unpack_ts_type_common_value_fields (struct bitpack_d 
>> *bp, tree expr)
>> #ifdef ACCEL_COMPILER
>>   if (TYPE_ALIGN (expr) > targetm.absolute_biggest_alignment)
>>     SET_TYPE_ALIGN (expr, targetm.absolute_biggest_alignment);
>> +
>> +  /* Host streams out VOIDmode for aggregate type. */
>> +  if (AGGREGATE_TYPE_P (expr) && TYPE_MODE (expr) == VOIDmode)
>> +    {
>> +      if (TREE_CODE (expr) == ARRAY_TYPE)
>> +    compute_array_mode (expr);
>> +      else if (RECORD_OR_UNION_TYPE_P (expr))
>> +    compute_record_mode (expr);
>> +      else
>> +    gcc_unreachable ();
>> +    }
>> #endif
>> }
>> 
>> diff --git a/gcc/tree-streamer-out.cc b/gcc/tree-streamer-out.cc
>> index b7205287ffb..7de4447a1b5 100644
>> --- a/gcc/tree-streamer-out.cc
>> +++ b/gcc/tree-streamer-out.cc
>> @@ -187,7 +187,17 @@ pack_ts_fixed_cst_value_fields (struct bitpack_d *bp, 
>> tree expr)
>> static void
>> pack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
>> {
>> -  bp_pack_machine_mode (bp, DECL_MODE (expr));
>> +  /* Similar to TYPE_MODE, avoid streaming out host-specific DECL_MODE
>> +     for aggregate type with offloading enabled, and while streaming-in
>> +     recompute appropriate DECL_MODE for accelerator.  */
>> +  if (lto_stream_offload_p
>> +      && (VAR_P (expr)
>> +      || TREE_CODE (expr) == PARM_DECL
>> +      || TREE_CODE (expr) == FIELD_DECL)
>> +      && AGGREGATE_TYPE_P (TREE_TYPE (expr)))
>> +    bp_pack_machine_mode (bp, VOIDmode);
>> +  else
>> +    bp_pack_machine_mode (bp, DECL_MODE (expr));
>>   bp_pack_value (bp, DECL_NONLOCAL (expr), 1);
>>   bp_pack_value (bp, DECL_VIRTUAL_P (expr), 1);
>>   bp_pack_value (bp, DECL_IGNORED_P (expr), 1);
>> @@ -317,10 +327,18 @@ pack_ts_function_decl_value_fields (struct bitpack_d 
>> *bp, tree expr)
>> static void
>> pack_ts_type_common_value_fields (struct bitpack_d *bp, tree expr)
>> {
>> +  /* For offloading, avoid streaming out TYPE_MODE for aggregate type since
>> +     it may be host-specific. For eg, aarch64 uses OImode for ARRAY_TYPE
>> +     whose size is 256-bits, which is not representable on accelerator.
>> +     Instead stream out VOIDmode, and while streaming-in, recompute
>> +     appropriate TYPE_MODE for accelerator.  */
>> +  if (lto_stream_offload_p && AGGREGATE_TYPE_P (expr))
>> +    bp_pack_machine_mode (bp, VOIDmode);
>>   /* for VECTOR_TYPE, TYPE_MODE reevaluates the mode using target_flags
>>      not necessary valid in a global context.
>>      Use the raw value previously set by layout_type.  */
>> -  bp_pack_machine_mode (bp, TYPE_MODE_RAW (expr));
>> +  else
>> +    bp_pack_machine_mode (bp, TYPE_MODE_RAW (expr));
>>   /* TYPE_NO_FORCE_BLK is private to stor-layout and need
>>      no streaming.  */
>>   bp_pack_value (bp, TYPE_PACKED (expr), 1);

Reply via email to