Prathamesh Kulkarni <prathame...@nvidia.com> writes:
> diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
> index cbf6041fd68..0420183faf8 100644
> --- a/gcc/lto-streamer-in.cc
> +++ b/gcc/lto-streamer-in.cc
> @@ -44,6 +44,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "debug.h"
>  #include "alloc-pool.h"
>  #include "toplev.h"
> +#include "stor-layout.h"
>  
>  /* Allocator used to hold string slot entries for line map streaming.  */
>  static struct object_allocator<struct string_slot> *string_slot_allocator;
> @@ -1752,6 +1753,17 @@ lto_read_tree_1 (class lto_input_block *ib, class 
> data_in *data_in, tree expr)
>        with -g1, see for example PR113488.  */
>        else if (DECL_P (expr) && DECL_ABSTRACT_ORIGIN (expr) == expr)
>       DECL_ABSTRACT_ORIGIN (expr) = NULL_TREE;
> +
> +#ifdef ACCEL_COMPILER
> +      /* For decl with aggregate type, host streams out VOIDmode.
> +      Compute the correct DECL_MODE by calling relayout_decl.  */
> +      if ((VAR_P (expr)
> +        || TREE_CODE (expr) == PARM_DECL
> +        || TREE_CODE (expr) == FIELD_DECL)
> +       && AGGREGATE_TYPE_P (TREE_TYPE (expr))
> +       && DECL_MODE (expr) == VOIDmode)
> +     relayout_decl (expr);
> +#endif

Genuine question, but: is relayout_decl safe in this context?  It does
a lot more than just reset the mode.  It also applies the target ABI's
preferences wrt alignment, padding, and so on, rather than preserving
those of the host's.

Thanks,
Richard


>      }
>  }
>  
> diff --git a/gcc/stor-layout.cc b/gcc/stor-layout.cc
> index 10c0809914c..0ff8bd1171e 100644
> --- a/gcc/stor-layout.cc
> +++ b/gcc/stor-layout.cc
> @@ -2396,6 +2396,32 @@ finish_builtin_struct (tree type, const char *name, 
> tree fields,
>    layout_decl (TYPE_NAME (type), 0);
>  }
>  
> +/* Compute TYPE_MODE for TYPE (which is ARRAY_TYPE).  */
> +
> +void compute_array_mode (tree type)
> +{
> +  gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
> +
> +  SET_TYPE_MODE (type, BLKmode);
> +  if (TYPE_SIZE (type) != 0
> +      && ! targetm.member_type_forces_blk (type, VOIDmode)
> +      /* BLKmode elements force BLKmode aggregate;
> +      else extract/store fields may lose.  */
> +      && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
> +       || TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
> +    {
> +      SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
> +                                        TYPE_SIZE (type)));
> +      if (TYPE_MODE (type) != BLKmode
> +       && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
> +       && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
> +     {
> +       TYPE_NO_FORCE_BLK (type) = 1;
> +       SET_TYPE_MODE (type, BLKmode);
> +     }
> +    }
> +}
> +
>  /* Calculate the mode, size, and alignment for TYPE.
>     For an array type, calculate the element separation as well.
>     Record TYPE on the chain of permanent or temporary types
> @@ -2709,24 +2735,7 @@ layout_type (tree type)
>       align = MAX (align, BITS_PER_UNIT);
>  #endif
>       SET_TYPE_ALIGN (type, align);
> -     SET_TYPE_MODE (type, BLKmode);
> -     if (TYPE_SIZE (type) != 0
> -         && ! targetm.member_type_forces_blk (type, VOIDmode)
> -         /* BLKmode elements force BLKmode aggregate;
> -            else extract/store fields may lose.  */
> -         && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
> -             || TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
> -       {
> -         SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
> -                                              TYPE_SIZE (type)));
> -         if (TYPE_MODE (type) != BLKmode
> -             && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
> -             && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
> -           {
> -             TYPE_NO_FORCE_BLK (type) = 1;
> -             SET_TYPE_MODE (type, BLKmode);
> -           }
> -       }
> +     compute_array_mode (type);
>       if (AGGREGATE_TYPE_P (element))
>         TYPE_TYPELESS_STORAGE (type) = TYPE_TYPELESS_STORAGE (element);
>       /* When the element size is constant, check that it is at least as
> diff --git a/gcc/stor-layout.h b/gcc/stor-layout.h
> index 096ca811762..9d9b8c385f6 100644
> --- a/gcc/stor-layout.h
> +++ b/gcc/stor-layout.h
> @@ -34,6 +34,7 @@ extern tree rli_size_so_far (record_layout_info);
>  extern void normalize_rli (record_layout_info);
>  extern void place_field (record_layout_info, tree);
>  extern void compute_record_mode (tree);
> +extern void compute_array_mode (tree);
>  extern void finish_bitfield_layout (tree);
>  extern void finish_record_layout (record_layout_info, int);
>  extern void finalize_size_functions (void);
> diff --git a/gcc/tree-streamer-in.cc b/gcc/tree-streamer-in.cc
> index 40029437199..329d218e7d4 100644
> --- a/gcc/tree-streamer-in.cc
> +++ b/gcc/tree-streamer-in.cc
> @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "attribs.h"
>  #include "asan.h"
>  #include "opts.h"
> +#include "stor-layout.h"
>  
>  
>  /* Read a STRING_CST from the string table in DATA_IN using input
> @@ -395,6 +396,17 @@ unpack_ts_type_common_value_fields (struct bitpack_d 
> *bp, tree expr)
>  #ifdef ACCEL_COMPILER
>    if (TYPE_ALIGN (expr) > targetm.absolute_biggest_alignment)
>      SET_TYPE_ALIGN (expr, targetm.absolute_biggest_alignment);
> +
> +  /* Host streams out VOIDmode for aggregate type. */
> +  if (AGGREGATE_TYPE_P (expr) && TYPE_MODE (expr) == VOIDmode)
> +    {
> +      if (TREE_CODE (expr) == ARRAY_TYPE)
> +     compute_array_mode (expr);
> +      else if (RECORD_OR_UNION_TYPE_P (expr))
> +     compute_record_mode (expr);
> +      else
> +     gcc_unreachable ();
> +    }
>  #endif
>  }
>  
> diff --git a/gcc/tree-streamer-out.cc b/gcc/tree-streamer-out.cc
> index b7205287ffb..7de4447a1b5 100644
> --- a/gcc/tree-streamer-out.cc
> +++ b/gcc/tree-streamer-out.cc
> @@ -187,7 +187,17 @@ pack_ts_fixed_cst_value_fields (struct bitpack_d *bp, 
> tree expr)
>  static void
>  pack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
>  {
> -  bp_pack_machine_mode (bp, DECL_MODE (expr));
> +  /* Similar to TYPE_MODE, avoid streaming out host-specific DECL_MODE
> +     for aggregate type with offloading enabled, and while streaming-in
> +     recompute appropriate DECL_MODE for accelerator.  */
> +  if (lto_stream_offload_p
> +      && (VAR_P (expr)
> +       || TREE_CODE (expr) == PARM_DECL
> +       || TREE_CODE (expr) == FIELD_DECL)
> +      && AGGREGATE_TYPE_P (TREE_TYPE (expr)))
> +    bp_pack_machine_mode (bp, VOIDmode);
> +  else
> +    bp_pack_machine_mode (bp, DECL_MODE (expr));
>    bp_pack_value (bp, DECL_NONLOCAL (expr), 1);
>    bp_pack_value (bp, DECL_VIRTUAL_P (expr), 1);
>    bp_pack_value (bp, DECL_IGNORED_P (expr), 1);
> @@ -317,10 +327,18 @@ pack_ts_function_decl_value_fields (struct bitpack_d 
> *bp, tree expr)
>  static void
>  pack_ts_type_common_value_fields (struct bitpack_d *bp, tree expr)
>  {
> +  /* For offloading, avoid streaming out TYPE_MODE for aggregate type since
> +     it may be host-specific. For eg, aarch64 uses OImode for ARRAY_TYPE
> +     whose size is 256-bits, which is not representable on accelerator.
> +     Instead stream out VOIDmode, and while streaming-in, recompute
> +     appropriate TYPE_MODE for accelerator.  */
> +  if (lto_stream_offload_p && AGGREGATE_TYPE_P (expr))
> +    bp_pack_machine_mode (bp, VOIDmode);
>    /* for VECTOR_TYPE, TYPE_MODE reevaluates the mode using target_flags
>       not necessary valid in a global context.
>       Use the raw value previously set by layout_type.  */
> -  bp_pack_machine_mode (bp, TYPE_MODE_RAW (expr));
> +  else
> +    bp_pack_machine_mode (bp, TYPE_MODE_RAW (expr));
>    /* TYPE_NO_FORCE_BLK is private to stor-layout and need
>       no streaming.  */
>    bp_pack_value (bp, TYPE_PACKED (expr), 1);

Reply via email to