Hi Richard,
As mentioned in RFC email, for the following test:
int main()
{
long c[4];
#pragma omp target map(c)
c[0] = 0;
return 0;
}
Compiling for AArch64 host with -O2 -fopenmp -foffload=nvptx-none results in:
lto1: fatal error: nvptx-none - 256-bit integer numbers unsupported (mode 'OI')
compilation terminated.
nvptx mkoffload: fatal error:
../install/bin/aarch64-unknown-linux-gnu-accel-nvptx-none-gcc returned 1 exit
status compilation terminated.
This happens because AArch64 uses OImode for ARRAY_TYPE whose size fits
256-bits, which is not supported on nvptx, and thus
emits the above diagnostic.
Following your suggestion, the attached patch streams out VOIDmode from host
for TYPE_MODE and DECL_MODE for aggregate types
with offloading enabled, and while streaming-in on accel side, it recomputes
TYPE_MODE and DECL_MODE, which fixes the issue.
Patch survives AArch64->nvptx offload testing for libgomp and bootstrap+test on
aarch64-linux-gnu.
Does the patch look in the right direction ?
Signed-off-by: Prathamesh Kulkarni <[email protected]>
Thanks,
Prathamesh
Recompute TYPE_MODE and DECL_MODE for aggregate type for acclerator.
The patch streams out VOIDmode for aggregate types with offloading enabled,
and recomputes appropriate TYPE_MODE and DECL_MODE while streaming-in on accel
side. The rationale for this change is to avoid streaming out host-specific
modes that may be used for aggregate types, which may not be representable on
the accelerator. For eg, AArch64 uses OImode for ARRAY_TYPE whose size is
256-bits,
and nvptx doesn't have OImode, and thus ends up emitting an error from
lto_input_mode_table.
gcc/ChangeLog:
* lto-streamer-in.cc: Include stor-layout.h.
(lto_read_tree_1): Call relayout_decl if
offloading is enabled.
* stor-layout.cc (layout_type): Move computation of mode for
ARRAY_TYPE from ...
(compute_array_mode): ... to here.
* stor-layout.h (compute_array_mode): Declare.
* tree-streamer-in.cc: Include stor-layout.h.
(unpack_ts_common_value_fields): Call compute_array_mode if offloading
is enabled.
* tree-streamer-out.cc (pack_ts_fixed_cst_value_fields): Stream out
VOIDmode if decl has aggregate type and offloading is enabled.
(pack_ts_type_common_value_fields): Stream out VOIDmode for aggregate
type if offloading is enabled.
Signed-off-by: Prathamesh Kulkarni <[email protected]>
diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index cbf6041fd68..0420183faf8 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see
#include "debug.h"
#include "alloc-pool.h"
#include "toplev.h"
+#include "stor-layout.h"
/* Allocator used to hold string slot entries for line map streaming. */
static struct object_allocator<struct string_slot> *string_slot_allocator;
@@ -1752,6 +1753,17 @@ lto_read_tree_1 (class lto_input_block *ib, class
data_in *data_in, tree expr)
with -g1, see for example PR113488. */
else if (DECL_P (expr) && DECL_ABSTRACT_ORIGIN (expr) == expr)
DECL_ABSTRACT_ORIGIN (expr) = NULL_TREE;
+
+#ifdef ACCEL_COMPILER
+ /* For decl with aggregate type, host streams out VOIDmode.
+ Compute the correct DECL_MODE by calling relayout_decl. */
+ if ((VAR_P (expr)
+ || TREE_CODE (expr) == PARM_DECL
+ || TREE_CODE (expr) == FIELD_DECL)
+ && AGGREGATE_TYPE_P (TREE_TYPE (expr))
+ && DECL_MODE (expr) == VOIDmode)
+ relayout_decl (expr);
+#endif
}
}
diff --git a/gcc/stor-layout.cc b/gcc/stor-layout.cc
index 10c0809914c..0ff8bd1171e 100644
--- a/gcc/stor-layout.cc
+++ b/gcc/stor-layout.cc
@@ -2396,6 +2396,32 @@ finish_builtin_struct (tree type, const char *name, tree
fields,
layout_decl (TYPE_NAME (type), 0);
}
+/* Compute TYPE_MODE for TYPE (which is ARRAY_TYPE). */
+
+void compute_array_mode (tree type)
+{
+ gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
+
+ SET_TYPE_MODE (type, BLKmode);
+ if (TYPE_SIZE (type) != 0
+ && ! targetm.member_type_forces_blk (type, VOIDmode)
+ /* BLKmode elements force BLKmode aggregate;
+ else extract/store fields may lose. */
+ && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
+ || TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
+ {
+ SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
+ TYPE_SIZE (type)));
+ if (TYPE_MODE (type) != BLKmode
+ && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
+ && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
+ {
+ TYPE_NO_FORCE_BLK (type) = 1;
+ SET_TYPE_MODE (type, BLKmode);
+ }
+ }
+}
+
/* Calculate the mode, size, and alignment for TYPE.
For an array type, calculate the element separation as well.
Record TYPE on the chain of permanent or temporary types
@@ -2709,24 +2735,7 @@ layout_type (tree type)
align = MAX (align, BITS_PER_UNIT);
#endif
SET_TYPE_ALIGN (type, align);
- SET_TYPE_MODE (type, BLKmode);
- if (TYPE_SIZE (type) != 0
- && ! targetm.member_type_forces_blk (type, VOIDmode)
- /* BLKmode elements force BLKmode aggregate;
- else extract/store fields may lose. */
- && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
- || TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
- {
- SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
- TYPE_SIZE (type)));
- if (TYPE_MODE (type) != BLKmode
- && STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
- && TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
- {
- TYPE_NO_FORCE_BLK (type) = 1;
- SET_TYPE_MODE (type, BLKmode);
- }
- }
+ compute_array_mode (type);
if (AGGREGATE_TYPE_P (element))
TYPE_TYPELESS_STORAGE (type) = TYPE_TYPELESS_STORAGE (element);
/* When the element size is constant, check that it is at least as
diff --git a/gcc/stor-layout.h b/gcc/stor-layout.h
index 096ca811762..9d9b8c385f6 100644
--- a/gcc/stor-layout.h
+++ b/gcc/stor-layout.h
@@ -34,6 +34,7 @@ extern tree rli_size_so_far (record_layout_info);
extern void normalize_rli (record_layout_info);
extern void place_field (record_layout_info, tree);
extern void compute_record_mode (tree);
+extern void compute_array_mode (tree);
extern void finish_bitfield_layout (tree);
extern void finish_record_layout (record_layout_info, int);
extern void finalize_size_functions (void);
diff --git a/gcc/tree-streamer-in.cc b/gcc/tree-streamer-in.cc
index 40029437199..329d218e7d4 100644
--- a/gcc/tree-streamer-in.cc
+++ b/gcc/tree-streamer-in.cc
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see
#include "attribs.h"
#include "asan.h"
#include "opts.h"
+#include "stor-layout.h"
/* Read a STRING_CST from the string table in DATA_IN using input
@@ -395,6 +396,17 @@ unpack_ts_type_common_value_fields (struct bitpack_d *bp,
tree expr)
#ifdef ACCEL_COMPILER
if (TYPE_ALIGN (expr) > targetm.absolute_biggest_alignment)
SET_TYPE_ALIGN (expr, targetm.absolute_biggest_alignment);
+
+ /* Host streams out VOIDmode for aggregate type. */
+ if (AGGREGATE_TYPE_P (expr) && TYPE_MODE (expr) == VOIDmode)
+ {
+ if (TREE_CODE (expr) == ARRAY_TYPE)
+ compute_array_mode (expr);
+ else if (RECORD_OR_UNION_TYPE_P (expr))
+ compute_record_mode (expr);
+ else
+ gcc_unreachable ();
+ }
#endif
}
diff --git a/gcc/tree-streamer-out.cc b/gcc/tree-streamer-out.cc
index b7205287ffb..7de4447a1b5 100644
--- a/gcc/tree-streamer-out.cc
+++ b/gcc/tree-streamer-out.cc
@@ -187,7 +187,17 @@ pack_ts_fixed_cst_value_fields (struct bitpack_d *bp, tree
expr)
static void
pack_ts_decl_common_value_fields (struct bitpack_d *bp, tree expr)
{
- bp_pack_machine_mode (bp, DECL_MODE (expr));
+ /* Similar to TYPE_MODE, avoid streaming out host-specific DECL_MODE
+ for aggregate type with offloading enabled, and while streaming-in
+ recompute appropriate DECL_MODE for accelerator. */
+ if (lto_stream_offload_p
+ && (VAR_P (expr)
+ || TREE_CODE (expr) == PARM_DECL
+ || TREE_CODE (expr) == FIELD_DECL)
+ && AGGREGATE_TYPE_P (TREE_TYPE (expr)))
+ bp_pack_machine_mode (bp, VOIDmode);
+ else
+ bp_pack_machine_mode (bp, DECL_MODE (expr));
bp_pack_value (bp, DECL_NONLOCAL (expr), 1);
bp_pack_value (bp, DECL_VIRTUAL_P (expr), 1);
bp_pack_value (bp, DECL_IGNORED_P (expr), 1);
@@ -317,10 +327,18 @@ pack_ts_function_decl_value_fields (struct bitpack_d *bp,
tree expr)
static void
pack_ts_type_common_value_fields (struct bitpack_d *bp, tree expr)
{
+ /* For offloading, avoid streaming out TYPE_MODE for aggregate type since
+ it may be host-specific. For eg, aarch64 uses OImode for ARRAY_TYPE
+ whose size is 256-bits, which is not representable on accelerator.
+ Instead stream out VOIDmode, and while streaming-in, recompute
+ appropriate TYPE_MODE for accelerator. */
+ if (lto_stream_offload_p && AGGREGATE_TYPE_P (expr))
+ bp_pack_machine_mode (bp, VOIDmode);
/* for VECTOR_TYPE, TYPE_MODE reevaluates the mode using target_flags
not necessary valid in a global context.
Use the raw value previously set by layout_type. */
- bp_pack_machine_mode (bp, TYPE_MODE_RAW (expr));
+ else
+ bp_pack_machine_mode (bp, TYPE_MODE_RAW (expr));
/* TYPE_NO_FORCE_BLK is private to stor-layout and need
no streaming. */
bp_pack_value (bp, TYPE_PACKED (expr), 1);