> Am 19.08.2024 um 20:56 schrieb Richard Sandiford <richard.sandif...@arm.com>:
>
> Prathamesh Kulkarni <prathame...@nvidia.com> writes:
>> diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
>> index cbf6041fd68..0420183faf8 100644
>> --- a/gcc/lto-streamer-in.cc
>> +++ b/gcc/lto-streamer-in.cc
>> @@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see
>> #include "debug.h"
>> #include "alloc-pool.h"
>> #include "toplev.h"
>> +#include "stor-layout.h"
>>
>> /* Allocator used to hold string slot entries for line map streaming. */
>> static struct object_allocator<struct string_slot> *string_slot_allocator;
>> @@ -1752,6 +1753,17 @@ lto_read_tree_1 (class lto_input_block *ib, class
>> data_in *data_in, tree expr)
>> with -g1, see for example PR113488. */
>> else if (DECL_P (expr) && DECL_ABSTRACT_ORIGIN (expr) == expr)
>> DECL_ABSTRACT_ORIGIN (expr) = NULL_TREE;
>> +
>> +#ifdef ACCEL_COMPILER
>> + /* For decl with aggregate type, host streams out VOIDmode.
>> + Compute the correct DECL_MODE by calling relayout_decl. */
>> + if ((VAR_P (expr)
>> + || TREE_CODE (expr) == PARM_DECL
>> + || TREE_CODE (expr) == FIELD_DECL)
>> + && AGGREGATE_TYPE_P (TREE_TYPE (expr))
>> + && DECL_MODE (expr) == VOIDmode)
>> + relayout_decl (expr);
>> +#endif
>
> Genuine question, but: is relayout_decl safe in this context? It does
> a lot more than just reset the mode. It also applies the target ABI's
> preferences wrt alignment, padding, and so on, rather than preserving
> those of the host's.
It would be better to just recompute the mode here.
Richard
> Thanks,
> Richard
>
>
>> }
>> }
>>
>> diff --git a/gcc/stor-layout.cc b/gcc/stor-layout.cc
>> index 10c0809914c..0ff8bd1171e 100644
>> --- a/gcc/stor-layout.cc
>> +++ b/gcc/stor-layout.cc
>> @@ -2396,6 +2396,32 @@ finish_builtin_struct (tree type, const char *name,
>> tree fields,
>> layout_decl (TYPE_NAME (type), 0);
>> }
>>
>> +/* Compute TYPE_MODE for TYPE (which is ARRAY_TYPE). */
>> +
>> +void compute_array_mode (tree type)
>> +{
>> + gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
>> +
>> + SET_TYPE_MODE (type, BLKmode);
>> + if (TYPE_SIZE (type) != 0
>> + && ! targetm.member_type_forces_blk (type, VOIDmode)
>> + /* BLKmode elements force BLKmode aggregate;
>> + else extract/store fields may lose. */
>> + && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
>> + || TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
>> + {
>> + SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
>> + TYPE_SIZE (type)));
>> + if (TYPE_MODE (type) != BLKmode
>> + && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
>> + && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
>> + {
>> + TYPE_NO_FORCE_BLK (type) = 1;
>> + SET_TYPE_MODE (type, BLKmode);
>> + }
>> + }
>> +}
>> +
>> /* Calculate the mode, size, and alignment for TYPE.
>> For an array type, calculate the element separation as well.
>> Record TYPE on the chain of permanent or temporary types
>> @@ -2709,24 +2735,7 @@ layout_type (tree type)
>> align = MAX (align, BITS_PER_UNIT);
>> #endif
>> SET_TYPE_ALIGN (type, align);
>> - SET_TYPE_MODE (type, BLKmode);
>> - if (TYPE_SIZE (type) != 0
>> - && ! targetm.member_type_forces_blk (type, VOIDmode)
>> - /* BLKmode elements force BLKmode aggregate;
>> - else extract/store fields may lose. */
>> - && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
>> - || TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
>> - {
>> - SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
>> - TYPE_SIZE (type)));
>> - if (TYPE_MODE (type) != BLKmode
>> - && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
>> - && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
>> - {
>> - TYPE_NO_FORCE_BLK (type) = 1;
>> - SET_TYPE_MODE (type, BLKmode);
>> - }
>> - }
>> + compute_array_mode (type);
>> if (AGGREGATE_TYPE_P (element))
>> TYPE_TYPELESS_STORAGE (type) = TYPE_TYPELESS_STORAGE (element);
>> /* When the element size is constant, check that it is at least as
>> diff --git a/gcc/stor-layout.h b/gcc/stor-layout.h
>> index 096ca811762..9d9b8c385f6 100644
>> --- a/gcc/stor-layout.h
>> +++ b/gcc/stor-layout.h
>> @@ -34,6 +34,7 @@ extern tree rli_size_so_far (record_layout_info);
>> extern void normalize_rli (record_layout_info);
>> extern void place_field (record_layout_info, tree);
>> extern void compute_record_mode (tree);
>> +extern void compute_array_mode (tree);
>> extern void finish_bitfield_layout (tree);
>> extern void finish_record_layout (record_layout_info, int);
>> extern void finalize_size_functions (void);
>> diff --git a/gcc/tree-streamer-in.cc b/gcc/tree-streamer-in.cc
>> index 40029437199..329d218e7d4 100644
>> --- a/gcc/tree-streamer-in.cc
>> +++ b/gcc/tree-streamer-in.cc
>> @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see
>> #include "attribs.h"
>> #include "asan.h"
>> #include "opts.h"
>> +#include "stor-layout.h"
>>
>>
>> /* Read a STRING_CST from the string table in DATA_IN using input
>> @@ -395,6 +396,17 @@ unpack_ts_type_common_value_fields (struct bitpack_d
>> *bp, tree expr)
>> #ifdef ACCEL_COMPILER
>> if (TYPE_ALIGN (expr) > targetm.absolute_biggest_alignment)
>> SET_TYPE_ALIGN (expr, targetm.absolute_biggest_alignment);
>> +
>> + /* Host streams out VOIDmode for aggregate type. */
>> + if (AGGREGATE_TYPE_P (expr) && TYPE_MODE (expr) == VOIDmode)
>> + {
>> + if (TREE_CODE (expr) == ARRAY_TYPE)
>> + compute_array_mode (expr);
>> + else if (RECORD_OR_UNION_TYPE_P (expr))
>> + compute_record_mode (expr);
>> + else
>> + gcc_unreachable ();
>> + }
>> #endif
>> }
>>
>> diff --git a/gcc/tree-streamer-out.cc b/gcc/tree-streamer-out.cc
>> index b7205287ffb..7de4447a1b5 100644
>> --- a/gcc/tree-streamer-out.cc
>> +++ b/gcc/tree-streamer-out.cc
>> @@ -187,7 +187,17 @@ pack_ts_fixed_cst_value_fields (struct bitpack_d *bp,
>> tree expr)
>> static void
>> pack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
>> {
>> - bp_pack_machine_mode (bp, DECL_MODE (expr));
>> + /* Similar to TYPE_MODE, avoid streaming out host-specific DECL_MODE
>> + for aggregate type with offloading enabled, and while streaming-in
>> + recompute appropriate DECL_MODE for accelerator. */
>> + if (lto_stream_offload_p
>> + && (VAR_P (expr)
>> + || TREE_CODE (expr) == PARM_DECL
>> + || TREE_CODE (expr) == FIELD_DECL)
>> + && AGGREGATE_TYPE_P (TREE_TYPE (expr)))
>> + bp_pack_machine_mode (bp, VOIDmode);
>> + else
>> + bp_pack_machine_mode (bp, DECL_MODE (expr));
>> bp_pack_value (bp, DECL_NONLOCAL (expr), 1);
>> bp_pack_value (bp, DECL_VIRTUAL_P (expr), 1);
>> bp_pack_value (bp, DECL_IGNORED_P (expr), 1);
>> @@ -317,10 +327,18 @@ pack_ts_function_decl_value_fields (struct bitpack_d
>> *bp, tree expr)
>> static void
>> pack_ts_type_common_value_fields (struct bitpack_d *bp, tree expr)
>> {
>> + /* For offloading, avoid streaming out TYPE_MODE for aggregate type since
>> + it may be host-specific. For eg, aarch64 uses OImode for ARRAY_TYPE
>> + whose size is 256-bits, which is not representable on accelerator.
>> + Instead stream out VOIDmode, and while streaming-in, recompute
>> + appropriate TYPE_MODE for accelerator. */
>> + if (lto_stream_offload_p && AGGREGATE_TYPE_P (expr))
>> + bp_pack_machine_mode (bp, VOIDmode);
>> /* for VECTOR_TYPE, TYPE_MODE reevaluates the mode using target_flags
>> not necessary valid in a global context.
>> Use the raw value previously set by layout_type. */
>> - bp_pack_machine_mode (bp, TYPE_MODE_RAW (expr));
>> + else
>> + bp_pack_machine_mode (bp, TYPE_MODE_RAW (expr));
>> /* TYPE_NO_FORCE_BLK is private to stor-layout and need
>> no streaming. */
>> bp_pack_value (bp, TYPE_PACKED (expr), 1);