Prathamesh Kulkarni <prathame...@nvidia.com> writes: > diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc > index cbf6041fd68..0420183faf8 100644 > --- a/gcc/lto-streamer-in.cc > +++ b/gcc/lto-streamer-in.cc > @@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see > #include "debug.h" > #include "alloc-pool.h" > #include "toplev.h" > +#include "stor-layout.h" > > /* Allocator used to hold string slot entries for line map streaming. */ > static struct object_allocator<struct string_slot> *string_slot_allocator; > @@ -1752,6 +1753,17 @@ lto_read_tree_1 (class lto_input_block *ib, class > data_in *data_in, tree expr) > with -g1, see for example PR113488. */ > else if (DECL_P (expr) && DECL_ABSTRACT_ORIGIN (expr) == expr) > DECL_ABSTRACT_ORIGIN (expr) = NULL_TREE; > + > +#ifdef ACCEL_COMPILER > + /* For decl with aggregate type, host streams out VOIDmode. > + Compute the correct DECL_MODE by calling relayout_decl. */ > + if ((VAR_P (expr) > + || TREE_CODE (expr) == PARM_DECL > + || TREE_CODE (expr) == FIELD_DECL) > + && AGGREGATE_TYPE_P (TREE_TYPE (expr)) > + && DECL_MODE (expr) == VOIDmode) > + relayout_decl (expr); > +#endif
Genuine question, but: is relayout_decl safe in this context? It does a lot more than just reset the mode. It also applies the target ABI's preferences wrt alignment, padding, and so on, rather than preserving those of the host's. Thanks, Richard > } > } > > diff --git a/gcc/stor-layout.cc b/gcc/stor-layout.cc > index 10c0809914c..0ff8bd1171e 100644 > --- a/gcc/stor-layout.cc > +++ b/gcc/stor-layout.cc > @@ -2396,6 +2396,32 @@ finish_builtin_struct (tree type, const char *name, > tree fields, > layout_decl (TYPE_NAME (type), 0); > } > > +/* Compute TYPE_MODE for TYPE (which is ARRAY_TYPE). */ > + > +void compute_array_mode (tree type) > +{ > + gcc_assert (TREE_CODE (type) == ARRAY_TYPE); > + > + SET_TYPE_MODE (type, BLKmode); > + if (TYPE_SIZE (type) != 0 > + && ! targetm.member_type_forces_blk (type, VOIDmode) > + /* BLKmode elements force BLKmode aggregate; > + else extract/store fields may lose. */ > + && (TYPE_MODE (TREE_TYPE (type)) != BLKmode > + || TYPE_NO_FORCE_BLK (TREE_TYPE (type)))) > + { > + SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type), > + TYPE_SIZE (type))); > + if (TYPE_MODE (type) != BLKmode > + && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT > + && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type))) > + { > + TYPE_NO_FORCE_BLK (type) = 1; > + SET_TYPE_MODE (type, BLKmode); > + } > + } > +} > + > /* Calculate the mode, size, and alignment for TYPE. > For an array type, calculate the element separation as well. > Record TYPE on the chain of permanent or temporary types > @@ -2709,24 +2735,7 @@ layout_type (tree type) > align = MAX (align, BITS_PER_UNIT); > #endif > SET_TYPE_ALIGN (type, align); > - SET_TYPE_MODE (type, BLKmode); > - if (TYPE_SIZE (type) != 0 > - && ! targetm.member_type_forces_blk (type, VOIDmode) > - /* BLKmode elements force BLKmode aggregate; > - else extract/store fields may lose. */ > - && (TYPE_MODE (TREE_TYPE (type)) != BLKmode > - || TYPE_NO_FORCE_BLK (TREE_TYPE (type)))) > - { > - SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type), > - TYPE_SIZE (type))); > - if (TYPE_MODE (type) != BLKmode > - && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT > - && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type))) > - { > - TYPE_NO_FORCE_BLK (type) = 1; > - SET_TYPE_MODE (type, BLKmode); > - } > - } > + compute_array_mode (type); > if (AGGREGATE_TYPE_P (element)) > TYPE_TYPELESS_STORAGE (type) = TYPE_TYPELESS_STORAGE (element); > /* When the element size is constant, check that it is at least as > diff --git a/gcc/stor-layout.h b/gcc/stor-layout.h > index 096ca811762..9d9b8c385f6 100644 > --- a/gcc/stor-layout.h > +++ b/gcc/stor-layout.h > @@ -34,6 +34,7 @@ extern tree rli_size_so_far (record_layout_info); > extern void normalize_rli (record_layout_info); > extern void place_field (record_layout_info, tree); > extern void compute_record_mode (tree); > +extern void compute_array_mode (tree); > extern void finish_bitfield_layout (tree); > extern void finish_record_layout (record_layout_info, int); > extern void finalize_size_functions (void); > diff --git a/gcc/tree-streamer-in.cc b/gcc/tree-streamer-in.cc > index 40029437199..329d218e7d4 100644 > --- a/gcc/tree-streamer-in.cc > +++ b/gcc/tree-streamer-in.cc > @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see > #include "attribs.h" > #include "asan.h" > #include "opts.h" > +#include "stor-layout.h" > > > /* Read a STRING_CST from the string table in DATA_IN using input > @@ -395,6 +396,17 @@ unpack_ts_type_common_value_fields (struct bitpack_d > *bp, tree expr) > #ifdef ACCEL_COMPILER > if (TYPE_ALIGN (expr) > targetm.absolute_biggest_alignment) > SET_TYPE_ALIGN (expr, targetm.absolute_biggest_alignment); > + > + /* Host streams out VOIDmode for aggregate type. */ > + if (AGGREGATE_TYPE_P (expr) && TYPE_MODE (expr) == VOIDmode) > + { > + if (TREE_CODE (expr) == ARRAY_TYPE) > + compute_array_mode (expr); > + else if (RECORD_OR_UNION_TYPE_P (expr)) > + compute_record_mode (expr); > + else > + gcc_unreachable (); > + } > #endif > } > > diff --git a/gcc/tree-streamer-out.cc b/gcc/tree-streamer-out.cc > index b7205287ffb..7de4447a1b5 100644 > --- a/gcc/tree-streamer-out.cc > +++ b/gcc/tree-streamer-out.cc > @@ -187,7 +187,17 @@ pack_ts_fixed_cst_value_fields (struct bitpack_d *bp, > tree expr) > static void > pack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr) > { > - bp_pack_machine_mode (bp, DECL_MODE (expr)); > + /* Similar to TYPE_MODE, avoid streaming out host-specific DECL_MODE > + for aggregate type with offloading enabled, and while streaming-in > + recompute appropriate DECL_MODE for accelerator. */ > + if (lto_stream_offload_p > + && (VAR_P (expr) > + || TREE_CODE (expr) == PARM_DECL > + || TREE_CODE (expr) == FIELD_DECL) > + && AGGREGATE_TYPE_P (TREE_TYPE (expr))) > + bp_pack_machine_mode (bp, VOIDmode); > + else > + bp_pack_machine_mode (bp, DECL_MODE (expr)); > bp_pack_value (bp, DECL_NONLOCAL (expr), 1); > bp_pack_value (bp, DECL_VIRTUAL_P (expr), 1); > bp_pack_value (bp, DECL_IGNORED_P (expr), 1); > @@ -317,10 +327,18 @@ pack_ts_function_decl_value_fields (struct bitpack_d > *bp, tree expr) > static void > pack_ts_type_common_value_fields (struct bitpack_d *bp, tree expr) > { > + /* For offloading, avoid streaming out TYPE_MODE for aggregate type since > + it may be host-specific. For eg, aarch64 uses OImode for ARRAY_TYPE > + whose size is 256-bits, which is not representable on accelerator. > + Instead stream out VOIDmode, and while streaming-in, recompute > + appropriate TYPE_MODE for accelerator. */ > + if (lto_stream_offload_p && AGGREGATE_TYPE_P (expr)) > + bp_pack_machine_mode (bp, VOIDmode); > /* for VECTOR_TYPE, TYPE_MODE reevaluates the mode using target_flags > not necessary valid in a global context. > Use the raw value previously set by layout_type. */ > - bp_pack_machine_mode (bp, TYPE_MODE_RAW (expr)); > + else > + bp_pack_machine_mode (bp, TYPE_MODE_RAW (expr)); > /* TYPE_NO_FORCE_BLK is private to stor-layout and need > no streaming. */ > bp_pack_value (bp, TYPE_PACKED (expr), 1);