[PATCH 01/25] Handle vectors that don't fit in an integer.

2018-09-05 Thread ams

GCN vector sizes range between 64 and 512 bytes, none of which have
correspondingly sized integer modes.  This breaks a number of assumptions
throughout the compiler, but I don't really want to create modes just for this
purpose.

Instead, this patch fixes up the cases that I've found, so far, such that the
compiler tries something else, or fails to optimize, rather than just ICE.

2018-09-05  Andrew Stubbs  
Kwok Cheung Yeung  
Jan Hubicka  
Martin Jambor  

gcc/
* combine.c (gen_lowpart_or_truncate): Return clobber if there is
not a integer mode if the same size as x.
(gen_lowpart_for_combine): Fail if there is no integer mode of the
same size.
* expr.c (expand_expr_real_1): Force first operand to be in memory
if it is a vector register and the result is in BLKmode.
* tree-vect-stmts.c (vectorizable_store): Don't ICE when
int_mode_for_size fails.
(vectorizable_load): Likewise.
---
 gcc/combine.c | 13 -
 gcc/expr.c|  8 
 gcc/tree-vect-stmts.c |  8 
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/gcc/combine.c b/gcc/combine.c
index a2649b6..cbf9dae 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -8621,7 +8621,13 @@ gen_lowpart_or_truncate (machine_mode mode, rtx x)
 {
   /* Bit-cast X into an integer mode.  */
   if (!SCALAR_INT_MODE_P (GET_MODE (x)))
-	x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
+	{
+	  enum machine_mode imode =
+	int_mode_for_mode (GET_MODE (x)).require ();
+	  if (imode == BLKmode)
+	return gen_rtx_CLOBBER (mode, const0_rtx);
+	  x = gen_lowpart (imode, x);
+	}
   x = simplify_gen_unary (TRUNCATE, int_mode_for_mode (mode).require (),
 			  x, GET_MODE (x));
 }
@@ -11698,6 +11704,11 @@ gen_lowpart_for_combine (machine_mode omode, rtx x)
   if (omode == imode)
 return x;
 
+  /* This can happen when there is no integer mode corresponding
+ to a size of vector mode.  */
+  if (omode == BLKmode)
+goto fail;
+
   /* We can only support MODE being wider than a word if X is a
  constant integer or has a mode the same size.  */
   if (maybe_gt (GET_MODE_SIZE (omode), UNITS_PER_WORD)
diff --git a/gcc/expr.c b/gcc/expr.c
index cd5cf12..776254a 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -10569,6 +10569,14 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 			  || maybe_gt (bitpos + bitsize,
    GET_MODE_BITSIZE (mode2)));
 
+	/* If the result is in BLKmode and the underlying object is a
+	   vector in a register, and the size of the vector is larger than
+	   the largest integer mode, then we must force OP0 to be in memory
+	   as this is assumed in later code.  */
+	if (REG_P (op0) && VECTOR_MODE_P (mode2) && mode == BLKmode
+	&& maybe_gt (bitsize, MAX_FIXED_MODE_SIZE))
+	  must_force_mem = 1;
+
 	/* Handle CONCAT first.  */
 	if (GET_CODE (op0) == CONCAT && !must_force_mem)
 	  {
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 8d94fca..607a2bd 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -6702,12 +6702,12 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		 supported.  */
 		  unsigned lsize
 		= group_size * GET_MODE_BITSIZE (elmode);
-		  elmode = int_mode_for_size (lsize, 0).require ();
 		  unsigned int lnunits = const_nunits / group_size;
 		  /* If we can't construct such a vector fall back to
 		 element extracts from the original vector type and
 		 element size stores.  */
-		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
+		  if (int_mode_for_size (lsize, 0).exists (&elmode)
+		  && mode_for_vector (elmode, lnunits).exists (&vmode)
 		  && VECTOR_MODE_P (vmode)
 		  && targetm.vector_mode_supported_p (vmode)
 		  && (convert_optab_handler (vec_extract_optab,
@@ -7839,11 +7839,11 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		 to a larger load.  */
 		  unsigned lsize
 		= group_size * TYPE_PRECISION (TREE_TYPE (vectype));
-		  elmode = int_mode_for_size (lsize, 0).require ();
 		  unsigned int lnunits = const_nunits / group_size;
 		  /* If we can't construct such a vector fall back to
 		 element loads of the original vector type.  */
-		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
+		  if (int_mode_for_size (lsize, 0).exists (&elmode)
+		  && mode_for_vector (elmode, lnunits).exists (&vmode)
 		  && VECTOR_MODE_P (vmode)
 		  && targetm.vector_mode_supported_p (vmode)
 		  && (convert_optab_handler (vec_init_optab, vmode, elmode)


[PATCH 02/25] Propagate address spaces to builtins.

2018-09-05 Thread ams

At present, pointers passed to builtin functions, including atomic operators,
are stripped of their address space properties.  This doesn't seem to be
deliberate, it just omits to copy them.

Not only that, but it forces pointer sizes to Pmode, which isn't appropriate
for all address spaces.

This patch attempts to correct both issues.  It works for GCN atomics and
GCN OpenACC gang-private variables.

2018-09-05  Andrew Stubbs  
Julian Brown  

gcc/
* builtins.c (get_builtin_sync_mem): Handle address spaces.
---
 gcc/builtins.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 58ea747..361361c 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5781,14 +5781,21 @@ static rtx
 get_builtin_sync_mem (tree loc, machine_mode mode)
 {
   rtx addr, mem;
+  int addr_space = TYPE_ADDR_SPACE (POINTER_TYPE_P (TREE_TYPE (loc))
+? TREE_TYPE (TREE_TYPE (loc))
+: TREE_TYPE (loc));
+  scalar_int_mode addr_mode = targetm.addr_space.address_mode (addr_space);
 
-  addr = expand_expr (loc, NULL_RTX, ptr_mode, EXPAND_SUM);
-  addr = convert_memory_address (Pmode, addr);
+  addr = expand_expr (loc, NULL_RTX, addr_mode, EXPAND_SUM);
 
   /* Note that we explicitly do not want any alias information for this
  memory, so that we kill all other live memories.  Otherwise we don't
  satisfy the full barrier semantics of the intrinsic.  */
-  mem = validize_mem (gen_rtx_MEM (mode, addr));
+  mem = gen_rtx_MEM (mode, addr);
+
+  set_mem_addr_space (mem, addr_space);
+
+  mem = validize_mem (mem);
 
   /* The alignment needs to be at least according to that of the mode.  */
   set_mem_align (mem, MAX (GET_MODE_ALIGNMENT (mode),


[PATCH 00/25] AMD GCN Port

2018-09-05 Thread ams
Hi All,

This patch series contains the non-OpenACC/OpenMP portions of a port to
AMD GCN3 and GCN5 GPU processors.  It's sufficient to build
single-threaded programs, with vectorization in the usual way.  C and
Fortran are supported, C++ is not supported, and the other front-ends
have not been tested.  The OpenACC/OpenMP/libgomp portion will follow,
once this is committed, eventually.

If the Steering Committee approve the port and the patches are accepted
then I'd like to see the port make it into GCC 9, please.

The patches, as they are, are not perfect; I still want to massage the
test results a little, but I'd like to find out about big review issues
sooner rather than later.

I've posted the middle-end patches first.  Some of these are target
independent issues, but are included in the series because they are
required for GCN to work properly.

I've then split the back-end patches into libgfortran, libgcc, and the
back-end proper.

Finally I have the testsuite tweaks and fix ups.  I don't have any
GCN-specific tests as yet; the existing tests serve to demonstrate
correctness, and I anticipate future GCN tests being largely
optimization issues, such as instruction selection and vectorization
coverage.

I'm aware that I still need to make the necessary documentation
adjustments.

Thanks in advance

-- 
Andrew Stubbs
Mentor Graphics / CodeSourcery


[PATCH 07/25] [pr82089] Don't sign-extend SFV 1 in BImode

2018-09-05 Thread ams

This is an update of the patch posted to PR82089 long ago.  We ran into the
same bug on GCN, so we need this fixed as part of this series.

2018-09-05  Andrew Stubbs  
Tom de Vries  

PR82089

gcc/
* expmed.c (emit_cstore): Fix handling of result_mode == BImode and
STORE_FLAG_VALUE == 1.
---
 gcc/expmed.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/gcc/expmed.c b/gcc/expmed.c
index 29ce10b..0b87fdc 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -5464,11 +5464,18 @@ emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
  If STORE_FLAG_VALUE does not have the sign bit set when
  interpreted in MODE, we can do this conversion as unsigned, which
  is usually more efficient.  */
-  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
+  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode)
+  || (result_mode == BImode && int_target_mode != BImode))
 {
-  convert_move (target, subtarget,
-		val_signbit_known_clear_p (result_mode,
-	   STORE_FLAG_VALUE));
+  gcc_assert (GET_MODE_SIZE (result_mode) != 1
+		  || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
+  bool unsignedp
+	= (GET_MODE_SIZE (result_mode) == 1
+	   ? STORE_FLAG_VALUE == 1
+	   : val_signbit_known_clear_p (result_mode, STORE_FLAG_VALUE));
+
+  convert_move (target, subtarget, unsignedp);
+
   op0 = target;
   result_mode = int_target_mode;
 }


[PATCH 04/25] SPECIAL_REGNO_P

2018-09-05 Thread ams

GCN has some registers which are special purpose, but not "fixed" because we
want the register allocator to track their usage and select alternatives that
use different special registers (e.g. scalar cc vs. vector cc).

Sometimes this leads the regrename pass to ICE.  Quite how it gets confused is
not well understood, but considering such registers for renaming is surely not
useful.

This patch creates a new macro SPECIAL_REGNO_P which disables regrename.  In
other words, the register is fixed once allocated.

2018-09-05  Kwok Cheung Yeung  

gcc/
* defaults.h (SPECIAL_REGNO_P): Define to false by default.
* regrename.c (check_new_reg_p): Do not rename to a special register.
(rename_chains): Do not rename special registers.
---
 gcc/defaults.h  | 4 
 gcc/regrename.c | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/gcc/defaults.h b/gcc/defaults.h
index 9035b33..40ecf61 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1198,6 +1198,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define NO_FUNCTION_CSE false
 #endif
 
+#ifndef SPECIAL_REGNO_P
+#define SPECIAL_REGNO_P(REGNO) false
+#endif
+
 #ifndef HARD_REGNO_RENAME_OK
 #define HARD_REGNO_RENAME_OK(FROM, TO) true
 #endif
diff --git a/gcc/regrename.c b/gcc/regrename.c
index 8424093..92e403e 100644
--- a/gcc/regrename.c
+++ b/gcc/regrename.c
@@ -320,6 +320,7 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg,
 if (TEST_HARD_REG_BIT (this_unavailable, new_reg + i)
 	|| fixed_regs[new_reg + i]
 	|| global_regs[new_reg + i]
+	|| SPECIAL_REGNO_P (new_reg + i)
 	/* Can't use regs which aren't saved by the prologue.  */
 	|| (! df_regs_ever_live_p (new_reg + i)
 	&& ! call_used_regs[new_reg + i])
@@ -480,6 +481,7 @@ rename_chains (void)
 	continue;
 
   if (fixed_regs[reg] || global_regs[reg]
+	  || SPECIAL_REGNO_P (reg)
 	  || (!HARD_FRAME_POINTER_IS_FRAME_POINTER && frame_pointer_needed
 	  && reg == HARD_FRAME_POINTER_REGNUM)
 	  || (HARD_FRAME_POINTER_IS_FRAME_POINTER && frame_pointer_needed


[PATCH 03/25] Improve TARGET_MANGLE_DECL_ASSEMBLER_NAME.

2018-09-05 Thread ams

The HSA GPU drivers can't cope with binaries that have the same symbol defined
multiple times, even though the names are not exported.  This happens whenever
there are file-scope static variables with matching names.  I believe it's also
an issue with switch tables.

This is a bug, but outside our control, so we must work around it when multiple
translation units have the same symbol defined.

Therefore, we've implemented name mangling via
TARGET_MANGLE_DECL_ASSEMBLER_NAME, but found some places where the middle-end
assumes that the decl name matches the name in the source.

This patch fixes up those cases by falling back to comparing the unmangled
name, when a lookup fails.

2018-09-05  Julian Brown  

gcc/
* cgraphunit.c (handle_alias_pairs): Scan for aliases by DECL_NAME if
decl assembler name doesn't match.

gcc/c-family/
* c-pragma.c (maye_apply_pending_pragma_weaks): Scan for aliases with
DECL_NAME if decl assembler name doesn't match.
---
 gcc/c-family/c-pragma.c | 14 ++
 gcc/cgraphunit.c| 15 +++
 2 files changed, 29 insertions(+)

diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index 84e4341..1c0be0c 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -323,6 +323,20 @@ maybe_apply_pending_pragma_weaks (void)
 	continue;
 
   target = symtab_node::get_for_asmname (id);
+
+  /* Try again if ID didn't match an assembler name by looking through
+	 decl names.  */
+  if (!target)
+	{
+	  symtab_node *node;
+	  FOR_EACH_SYMBOL (node)
+	if (strcmp (IDENTIFIER_POINTER (id), node->name ()) == 0)
+	  {
+	target = node;
+		break;
+	  }
+	}
+
   decl = build_decl (UNKNOWN_LOCATION,
 			 target ? TREE_CODE (target->decl) : FUNCTION_DECL,
 			 alias_id, default_function_type);
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index ec490d7..fc3f34e 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -1393,6 +1393,21 @@ handle_alias_pairs (void)
 {
   symtab_node *target_node = symtab_node::get_for_asmname (p->target);
 
+  /* If the alias target didn't match a symbol's assembler name (e.g.
+	 because it has been mangled by TARGET_MANGLE_DECL_ASSEMBLER_NAME),
+	 try again with the unmangled decl name.  */
+  if (!target_node)
+	{
+	  symtab_node *node;
+	  FOR_EACH_SYMBOL (node)
+	if (strcmp (IDENTIFIER_POINTER (p->target),
+			node->name ()) == 0)
+	  {
+		target_node = node;
+		break;
+	  }
+	}
+
   /* Weakrefs with target not defined in current unit are easy to handle:
 	 they behave just as external variables except we need to note the
 	 alias flag to later output the weakref pseudo op into asm file.  */


[PATCH 05/25] Add sorry_at diagnostic function.

2018-09-05 Thread ams

The plain "sorry" diagnostic only gives the "current" location, which is
typically the last line of the function or translation unit by time we get to
the back end.

GCN uses "sorry" to report unsupported language features, such as static
constructors, so it's useful to have a "sorry_at" variant.

This patch implements "sorry_at" according to the pattern of the other "at"
variants.

2018-09-05  Andrew Stubbs  

gcc/
* diagnostic-core.h (sorry_at): New prototype.
* diagnostic.c (sorry_at): New function.
---
 gcc/diagnostic-core.h |  1 +
 gcc/diagnostic.c  | 11 +++
 2 files changed, 12 insertions(+)

diff --git a/gcc/diagnostic-core.h b/gcc/diagnostic-core.h
index e4ebe00..80ff395 100644
--- a/gcc/diagnostic-core.h
+++ b/gcc/diagnostic-core.h
@@ -96,6 +96,7 @@ extern bool permerror (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern bool permerror (rich_location *, const char *,
    ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern void sorry (const char *, ...) ATTRIBUTE_GCC_DIAG(1,2);
+extern void sorry_at (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern void inform (location_t, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern void inform (rich_location *, const char *, ...) ATTRIBUTE_GCC_DIAG(2,3);
 extern void inform_n (location_t, unsigned HOST_WIDE_INT, const char *,
diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index aae0934..56a1140 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -1443,6 +1443,17 @@ sorry (const char *gmsgid, ...)
   va_end (ap);
 }
 
+/* Same as above, but use location LOC instead of input_location.  */
+void
+sorry_at (location_t loc, const char *gmsgid, ...)
+{
+  va_list ap;
+  va_start (ap, gmsgid);
+  rich_location richloc (line_table, loc);
+  diagnostic_impl (&richloc, -1, gmsgid, &ap, DK_SORRY);
+  va_end (ap);
+}
+
 /* Return true if an error or a "sorry" has been seen.  Various
processing is disabled after errors.  */
 bool


[PATCH 06/25] Remove constant vec_select restriction.

2018-09-05 Thread ams

The vec_select operator is documented to require a const_int for the lane
selector operand, but GCN has an instruction that can select the lane at
runtime, so it seems reasonable to remove this restriction.

This patch simply replaces assertions that the operand is constant with early
exits from the optimizers.  I think it's reasonable that vec_select with a
non-constant operand cannot be optimized, yet.

Also included is the necessary documentation tweak.

2018-09-05  Andrew Stubbs  

gcc/
* doc/rtl.texi: Adjust vec_select description.
* simplify-rtx.c (simplify_binary_operation_1): Allow VEC_SELECT to use
non-constant selectors.
---
 gcc/doc/rtl.texi   | 11 ++-
 gcc/simplify-rtx.c |  9 +++--
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi
index 5b1e695..0695ad2 100644
--- a/gcc/doc/rtl.texi
+++ b/gcc/doc/rtl.texi
@@ -2939,11 +2939,12 @@ a set bit indicates it is taken from @var{vec1}.
 @item (vec_select:@var{m} @var{vec1} @var{selection})
 This describes an operation that selects parts of a vector.  @var{vec1} is
 the source vector, and @var{selection} is a @code{parallel} that contains a
-@code{const_int} for each of the subparts of the result vector, giving the
-number of the source subpart that should be stored into it.
-The result mode @var{m} is either the submode for a single element of
-@var{vec1} (if only one subpart is selected), or another vector mode
-with that element submode (if multiple subparts are selected).
+@code{const_int} (or another expression, if the selection can be made at
+runtime) for each of the subparts of the result vector, giving the number of
+the source subpart that should be stored into it.  The result mode @var{m} is
+either the submode for a single element of @var{vec1} (if only one subpart is
+selected), or another vector mode with that element submode (if multiple
+subparts are selected).
 
 @findex vec_concat
 @item (vec_concat:@var{m} @var{x1} @var{x2})
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index a9f2586..b4c6883 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -3604,7 +3604,10 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 	  gcc_assert (mode == GET_MODE_INNER (GET_MODE (trueop0)));
 	  gcc_assert (GET_CODE (trueop1) == PARALLEL);
 	  gcc_assert (XVECLEN (trueop1, 0) == 1);
-	  gcc_assert (CONST_INT_P (XVECEXP (trueop1, 0, 0)));
+
+	  /* We can't reason about selections made at runtime.  */
+	  if (!CONST_INT_P (XVECEXP (trueop1, 0, 0)))
+	return 0;
 
 	  if (vec_duplicate_p (trueop0, &elt0))
 	return elt0;
@@ -3703,7 +3706,9 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
 		{
 		  rtx x = XVECEXP (trueop1, 0, i);
 
-		  gcc_assert (CONST_INT_P (x));
+		  if (!CONST_INT_P (x))
+		return 0;
+
 		  RTVEC_ELT (v, i) = CONST_VECTOR_ELT (trueop0,
 		   INTVAL (x));
 		}


[PATCH 09/25] Elide repeated RTL elements.

2018-09-05 Thread ams

GCN's 64-lane vectors tend to make RTL dumps very long.  This patch makes them
far more bearable by eliding long sequences of the same element into "repeated"
messages.

2018-09-05  Andrew Stubbs  
Jan Hubicka  
Martin Jambor  

* print-rtl.c (print_rtx_operand_codes_E_and_V): Print how many times
the same elements are repeated rather than printing all of them.
---
 gcc/print-rtl.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c
index 5dd2e31..8a04264 100644
--- a/gcc/print-rtl.c
+++ b/gcc/print-rtl.c
@@ -370,7 +370,20 @@ rtx_writer::print_rtx_operand_codes_E_and_V (const_rtx in_rtx, int idx)
 	m_sawclose = 1;
 
   for (int j = 0; j < XVECLEN (in_rtx, idx); j++)
-	print_rtx (XVECEXP (in_rtx, idx, j));
+	{
+	  int j1;
+
+	  print_rtx (XVECEXP (in_rtx, idx, j));
+	  for (j1 = j + 1; j1 < XVECLEN (in_rtx, idx); j1++)
+	if (XVECEXP (in_rtx, idx, j) != XVECEXP (in_rtx, idx, j1))
+	  break;
+
+	  if (j1 != j + 1)
+	{
+	  fprintf (m_outfile, " repeated %ix", j1 - j);
+	  j = j1 - 1;
+	}
+	}
 
   m_indent -= 2;
 }


[PATCH 08/25] Fix co-array allocation

2018-09-05 Thread ams

The Fortran front-end has a bug in which it uses "int" values for "size_t"
parameters.  I don't know why this isn't problem for all 64-bit architectures,
but GCN ends up with the data in the wrong argument register and/or stack slot,
and bad things happen.

This patch corrects the issue by setting the correct type.

2018-09-05  Kwok Cheung Yeung  

gcc/fortran/
* trans-expr.c (gfc_trans_structure_assign): Ensure that
integer_zero_node is of sizetype when used as the first
argument of a call to _gfortran_caf_register.
* trans-intrinsic.c (conv_intrinsic_event_query): Convert computed
index to a size_t type.
* trans-stmt.c (gfc_trans_event_post_wait): Likewise.
---
 gcc/fortran/trans-expr.c  | 2 +-
 gcc/fortran/trans-intrinsic.c | 3 ++-
 gcc/fortran/trans-stmt.c  | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c
index 56ce98c..91be3fb 100644
--- a/gcc/fortran/trans-expr.c
+++ b/gcc/fortran/trans-expr.c
@@ -7729,7 +7729,7 @@ gfc_trans_structure_assign (tree dest, gfc_expr * expr, bool init, bool coarray)
 		 suffices to recognize the data as array.  */
 	  if (rank < 0)
 		rank = 1;
-	  size = integer_zero_node;
+	  size = fold_convert (sizetype, integer_zero_node);
 	  desc = field;
 	  gfc_add_modify (&block, gfc_conv_descriptor_rank (desc),
 			  build_int_cst (signed_char_type_node, rank));
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index b2cea93..23c13da 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -10732,7 +10732,8 @@ conv_intrinsic_event_query (gfc_code *code)
 	  tmp = fold_build2_loc (input_location, MULT_EXPR,
  integer_type_node, extent, tmp);
 	  index = fold_build2_loc (input_location, PLUS_EXPR,
-   integer_type_node, index, tmp);
+   size_type_node, index,
+   fold_convert (size_type_node, tmp));
 	  if (i < ar->dimen - 1)
 		{
 		  ubound = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]);
diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c
index 795d3cc..2c59675 100644
--- a/gcc/fortran/trans-stmt.c
+++ b/gcc/fortran/trans-stmt.c
@@ -1096,7 +1096,8 @@ gfc_trans_event_post_wait (gfc_code *code, gfc_exec_op op)
 	  tmp = fold_build2_loc (input_location, MULT_EXPR,
  integer_type_node, extent, tmp);
 	  index = fold_build2_loc (input_location, PLUS_EXPR,
-   integer_type_node, index, tmp);
+   size_type_node, index,
+   fold_convert (size_type_node, tmp));
 	  if (i < ar->dimen - 1)
 	{
 	  ubound = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]);


[PATCH 11/25] Simplify vec_merge according to the mask.

2018-09-05 Thread ams

This patch was part of the original patch we acquired from Honza and Martin.

It simplifies vector elements that are inactive, according to the mask.

2018-09-05  Jan Hubicka  
Martin Jambor  

* simplify-rtx.c (simplify_merge_mask): New function.
(simplify_ternary_operation): Use it, also see if VEC_MERGEs with the
same masks are used in op1 or op2.
---
 gcc/simplify-rtx.c | 81 ++
 1 file changed, 81 insertions(+)

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 89487f2..6f27bda 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5578,6 +5578,65 @@ simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val)
   return NULL_RTX;
 }
 
+/* X is an operand number OP of VEC_MERGE operation with MASK.
+   Try to simplify using knowledge that values outside of MASK
+   will not be used.  */
+
+rtx
+simplify_merge_mask (rtx x, rtx mask, int op)
+{
+  gcc_assert (VECTOR_MODE_P (GET_MODE (x)));
+  poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));
+  if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
+{
+  if (!side_effects_p (XEXP (x, 1 - op)))
+	return XEXP (x, op);
+}
+  if (side_effects_p (x))
+return NULL_RTX;
+  if (UNARY_P (x)
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))
+{
+  rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+  if (top0)
+	return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,
+   GET_MODE (XEXP (x, 0)));
+}
+  if (BINARY_P (x)
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))
+{
+  rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+  rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+  if (top0 || top1)
+	return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
+top0 ? top0 : XEXP (x, 0),
+top1 ? top1 : XEXP (x, 1));
+}
+  if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)
+  && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))
+  && maybe_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))
+{
+  rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+  rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+  rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);
+  if (top0 || top1)
+	return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),
+ GET_MODE (XEXP (x, 0)),
+ top0 ? top0 : XEXP (x, 0),
+ top1 ? top1 : XEXP (x, 1),
+ top2 ? top2 : XEXP (x, 2));
+}
+  return NULL_RTX;
+}
+
 
 /* Simplify CODE, an operation with result mode MODE and three operands,
OP0, OP1, and OP2.  OP0_MODE was the mode of OP0 before it became
@@ -5967,6 +6026,28 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
 	  && !side_effects_p (op2) && !side_effects_p (op1))
 	return op0;
 
+  if (!side_effects_p (op2))
+	{
+	  rtx top0 = simplify_merge_mask (op0, op2, 0);
+	  rtx top1 = simplify_merge_mask (op1, op2, 1);
+	  if (top0 || top1)
+	return simplify_gen_ternary (code, mode, mode,
+	 top0 ? top0 : op0,
+	 top1 ? top1 : op1, op2);
+	}
+
+  if (GET_CODE (op0) == VEC_MERGE
+	  && rtx_equal_p (op2, XEXP (op0, 2))
+	  && !side_effects_p (XEXP (op0, 1)) && !side_effects_p (op2))
+	return simplify_gen_ternary (code, mode, mode,
+ XEXP (op0, 0), op1, op2);
+
+  if (GET_CODE (op1) == VEC_MERGE
+	  && rtx_equal_p (op2, XEXP (op1, 2))
+	  && !side_effects_p (XEXP (op0, 0)) && !side_effects_p (op2))
+	return simplify_gen_ternary (code, mode, mode,
+ XEXP (op0, 1), op1, op2);
+
   break;
 
 default:


[PATCH 10/25] Convert BImode vectors.

2018-09-05 Thread ams

GCN uses V64BImode to represent vector masks in the middle-end, and DImode
bit-masks to represent them in the back-end.  These must be converted at expand
time and the most convenient way is to simply use a SUBREG.

This works fine except that simplify_subreg needs to be able to convert
immediates, mostly for REG_EQUAL and REG_EQUIV, and currently does not know how
to convert vectors to integers where there is more than one element per byte.

This patch implements such conversions for the cases that we need.

I don't know why this is not a problem for other targets that use BImode
vectors, such as ARM SVE, so it's possible I missed some magic somewhere?

2018-09-05  Andrew Stubbs  

gcc/
* simplify-rtx.c (convert_packed_vector): New function.
(simplify_immed_subreg): Recognised Boolean vectors and call
convert_packed_vector.
---
 gcc/simplify-rtx.c | 76 ++
 1 file changed, 76 insertions(+)

diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index b4c6883..89487f2 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -5976,6 +5976,73 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
   return 0;
 }
 
+/* Convert a CONST_INT to a CONST_VECTOR, or vice versa.
+
+   This should only occur for VECTOR_BOOL_MODE types, so the semantics
+   specified by that are assumed.  In particular, the lowest value is
+   in the first byte.  */
+
+static rtx
+convert_packed_vector (fixed_size_mode to_mode, rtx op,
+		   machine_mode from_mode, unsigned int byte,
+		   unsigned int first_elem, unsigned int inner_bytes)
+{
+  /* Sizes greater than HOST_WIDE_INT would need a better implementation.  */
+  gcc_assert (GET_MODE_SIZE (to_mode) <= sizeof (HOST_WIDE_INT));
+
+  if (GET_CODE (op) == CONST_VECTOR)
+{
+  gcc_assert (!VECTOR_MODE_P (to_mode));
+
+  int num_elem = GET_MODE_NUNITS (from_mode).to_constant();
+  int elem_bitsize = (GET_MODE_SIZE (from_mode).to_constant()
+			  * BITS_PER_UNIT) / num_elem;
+  int elem_mask = (1 << elem_bitsize) - 1;
+  HOST_WIDE_INT subreg_mask =
+	(sizeof (HOST_WIDE_INT) == GET_MODE_SIZE (to_mode)
+	 ? -1
+	 : (((HOST_WIDE_INT)1 << (GET_MODE_SIZE (to_mode) * BITS_PER_UNIT))
+	- 1));
+
+  HOST_WIDE_INT val = 0;
+  for (int i = 0; i < num_elem; i++)
+	val |= ((INTVAL (CONST_VECTOR_ELT (op, i)) & elem_mask)
+		<< (i * elem_bitsize));
+
+  val >>= byte * BITS_PER_UNIT;
+  val &= subreg_mask;
+
+  return gen_rtx_CONST_INT (VOIDmode, val);
+}
+  else if (GET_CODE (op) == CONST_INT)
+{
+  /* Subregs of a vector not implemented yet.  */
+  gcc_assert (maybe_eq (GET_MODE_SIZE (to_mode),
+			GET_MODE_SIZE (from_mode)));
+
+  gcc_assert (VECTOR_MODE_P (to_mode));
+
+  int num_elem = GET_MODE_NUNITS (to_mode);
+  int elem_bitsize = (GET_MODE_SIZE (to_mode) * BITS_PER_UNIT) / num_elem;
+  int elem_mask = (1 << elem_bitsize) - 1;
+
+  rtvec val = rtvec_alloc (num_elem);
+  rtx *elem = &RTVEC_ELT (val, 0);
+
+  for (int i = 0; i < num_elem; i++)
+	elem[i] = gen_rtx_CONST_INT (VOIDmode,
+ (INTVAL (op) >> (i * elem_bitsize))
+ & elem_mask);
+
+  return gen_rtx_CONST_VECTOR (to_mode, val);
+}
+  else
+{
+  gcc_unreachable ();
+  return op;
+}
+}
+
 /* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE
or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or
CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR.
@@ -6017,6 +6084,15 @@ simplify_immed_subreg (fixed_size_mode outermode, rtx op,
   if (COMPLEX_MODE_P (outermode))
 return NULL_RTX;
 
+  /* Vectors with multiple elements per byte are a special case.  */
+  if ((VECTOR_MODE_P (innermode)
+   && ((GET_MODE_NUNITS (innermode).to_constant()
+	/ GET_MODE_SIZE(innermode).to_constant()) > 1))
+  || (VECTOR_MODE_P (outermode)
+	  && (GET_MODE_NUNITS (outermode) / GET_MODE_SIZE(outermode) > 1)))
+return convert_packed_vector (outermode, op, innermode, byte, first_elem,
+  inner_bytes);
+
   /* We support any size mode.  */
   max_bitsize = MAX (GET_MODE_BITSIZE (outermode),
 		 inner_bytes * BITS_PER_UNIT);


[PATCH 12/25] Make default_static_chain return NULL in non-static functions

2018-09-05 Thread ams

This patch allows default_static_chain to be called from the back-end without
it knowing if the function is static or not.  Or, to put it another way,
without duplicating the check everywhere it's used.

2018-09-05  Tom de Vries  

gcc/
* targhooks.c (default_static_chain): Return NULL in non-static
functions.
---
 gcc/targhooks.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index afd56f3..742cfbf 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1021,8 +1021,14 @@ default_internal_arg_pointer (void)
 }
 
 rtx
-default_static_chain (const_tree ARG_UNUSED (fndecl_or_type), bool incoming_p)
+default_static_chain (const_tree fndecl_or_type, bool incoming_p)
 {
+  /* While this function won't be called by the middle-end when a static
+ chain isn't needed, it's also used throughout the backend so it's
+ easiest to keep this check centralized.  */
+  if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
+return NULL;
+
   if (incoming_p)
 {
 #ifdef STATIC_CHAIN_INCOMING_REGNUM


[PATCH 13/25] Create TARGET_DISABLE_CURRENT_VECTOR_SIZE

2018-09-05 Thread ams

This feature probably ought to be reworked as a proper target hook, but I would
like to know if this is the correct solution to the problem first.

The problem is that GCN vectors have a fixed number of elements (64) and the
vector size varies with element size.  E.g. V64QI is 64 bytes and V64SI is 256
bytes.

This is a problem because GCC has an assumption that a) vector registers are
fixed size, and b) if there are multiple vector sizes you want to pick one size
and stick with it for the whole function.

This is a problem in various places, but mostly it's not fatal. However,
get_vectype_for_scalar_type caches the vector size for the first type it
encounters and then tries to apply that to all subsequent vectors, which
completely destroys vectorization.  The caching feature appears to be an
attempt to cope with AVX having a different vector size to other x86 vector
options.

This patch simply disables the cache so that it must ask the backend for the
preferred mode for every type.

2018-09-05  Andrew Stubbs  

gcc/
* tree-vect-stmts.c (get_vectype_for_scalar_type): Implement
TARGET_DISABLE_CURRENT_VECTOR_SIZE.
---
 gcc/tree-vect-stmts.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 607a2bd..8875201 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9945,9 +9945,12 @@ get_vectype_for_scalar_type (tree scalar_type)
   tree vectype;
   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
 		  current_vector_size);
+/* FIXME: use a proper target hook or macro.  */
+#ifndef TARGET_DISABLE_CURRENT_VECTOR_SIZE
   if (vectype
   && known_eq (current_vector_size, 0U))
 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
+#endif
   return vectype;
 }
 


[PATCH 16/25] Fix IRA ICE.

2018-09-05 Thread ams

The IRA pass makes an assumption that any pseudos created after the pass begins
were created explicitly by the pass itself and therefore will have
corresponding entries in its other tables.

The GCN back-end, however, often creates additional pseudos, in expand
patterns, to represent the necessary EXEC value, and these break IRA's
assumption and cause ICEs.

This patch simply has IRA skip unknown pseudos, and the problem goes away.

Presumably, it's not ideal that these registers have not been processed by IRA,
but it does not appear to do any real harm.

2018-09-05  Andrew Stubbs  

gcc/
* ira.c (setup_preferred_alternate_classes_for_new_pseudos): Skip
pseudos not created by this pass.
(move_unallocated_pseudos): Likewise.
---
 gcc/ira.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/ira.c b/gcc/ira.c
index def194a..e0c293c 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -2769,7 +2769,12 @@ setup_preferred_alternate_classes_for_new_pseudos (int start)
   for (i = start; i < max_regno; i++)
 {
   old_regno = ORIGINAL_REGNO (regno_reg_rtx[i]);
-  ira_assert (i != old_regno);
+
+  /* Skip any new pseudos not created directly by this pass.
+	 gen_move_insn can do this on AMD GCN, for example.  */
+  if (i == old_regno)
+	continue;
+
   setup_reg_classes (i, reg_preferred_class (old_regno),
 			 reg_alternate_class (old_regno),
 			 reg_allocno_class (old_regno));
@@ -5054,6 +5059,12 @@ move_unallocated_pseudos (void)
   {
 	int idx = i - first_moveable_pseudo;
 	rtx other_reg = pseudo_replaced_reg[idx];
+
+	/* Skip any new pseudos not created directly by find_moveable_pseudos.
+	   gen_move_insn can do this on AMD GCN, for example.  */
+	if (!other_reg)
+	  continue;
+
 	rtx_insn *def_insn = DF_REF_INSN (DF_REG_DEF_CHAIN (i));
 	/* The use must follow all definitions of OTHER_REG, so we can
 	   insert the new definition immediately after any of them.  */


[PATCH 14/25] Disable inefficient vectorization of elementwise loads/stores.

2018-09-05 Thread ams

If the autovectorizer tries to load a GCN 64-lane vector elementwise then it
blows away the register file and produces horrible code.

This patch simply disallows elementwise loads for such large vectors.  Is there
a better way to disable this in the middle-end?

2018-09-05  Julian Brown  

gcc/
* tree-vect-stmts.c (get_load_store_type): Don't use VMAT_ELEMENTWISE
loads/stores with many-element (>=64) vectors.
---
 gcc/tree-vect-stmts.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 8875201..a333991 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2452,6 +2452,26 @@ get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
 	*memory_access_type = VMAT_CONTIGUOUS;
 }
 
+  /* FIXME: Element-wise accesses can be extremely expensive if we have a
+ large number of elements to deal with (e.g. 64 for AMD GCN) using the
+ current generic code expansion.  Until an efficient code sequence is
+ supported for affected targets instead, don't attempt vectorization for
+ VMAT_ELEMENTWISE at all.  */
+  if (*memory_access_type == VMAT_ELEMENTWISE)
+{
+  poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
+
+  if (maybe_ge (nelements, 64))
+	{
+	  if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+	  "too many elements (%u) for elementwise accesses\n",
+	  (unsigned) nelements.to_constant ());
+
+	  return false;
+	}
+}
+
   if ((*memory_access_type == VMAT_ELEMENTWISE
|| *memory_access_type == VMAT_STRIDED_SLP)
   && !nunits.is_constant ())


[PATCH 17/25] Fix Fortran STOP.

2018-09-05 Thread ams

The minimal libgfortran setup was created for NVPTX, but will also be used by
AMD GCN.

This patch simply removes an assumption that NVPTX is the only user.
Specifically, NVPTX exit is broken, but AMD GCN exit works just fine.

2018-09-05  Andrew Stubbs  

libgfortran/
* runtime/minimal.c (exit): Only work around nvptx bugs on nvptx.
---
 libgfortran/runtime/minimal.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libgfortran/runtime/minimal.c b/libgfortran/runtime/minimal.c
index 0b1efeb..8940f97 100644
--- a/libgfortran/runtime/minimal.c
+++ b/libgfortran/runtime/minimal.c
@@ -197,10 +197,12 @@ sys_abort (void)
 #define st_printf printf
 #undef estr_write
 #define estr_write printf
+#if __nvptx__
 /* Map "exit" to "abort"; see PR85463 '[nvptx] "exit" in offloaded region
doesn't terminate process'.  */
 #undef exit
 #define exit(...) do { abort (); } while (0)
+#endif
 #undef exit_error
 #define exit_error(...) do { abort (); } while (0)
 


[PATCH 15/25] Don't double-count early-clobber matches.

2018-09-05 Thread ams

Given a pattern with a number of operands:

(match_operand 0 "" "=&v")
(match_operand 1 "" " v0")
(match_operand 2 "" " v0")
(match_operand 3 "" " v0")

GCC will currently increment "reject" once, for operand 0, and then decrement
it once for each of the other operands, ending with reject == -2 and an
assertion failure.  If there's a conflict then it might try to decrement reject
yet again.

Incidentally, what these patterns are trying to achieve is an allocation in
which operand 0 may match one of the other operands, but may not partially
overlap any of them.  Ideally there'd be a better way to do this.

In any case, it will affect any pattern in which multiple operands may (or
must) match an early-clobber operand.

The patch only allows a reject-- when one has not already occurred, for that
operand.

2018-09-05  Andrew Stubbs  

gcc/
* lra-constraints.c (process_alt_operands): Check
matching_early_clobber before decrementing reject, and set
matching_early_clobber after.
* lra-int.h (struct lra_operand_data): Add matching_early_clobber.
* lra.c (setup_operand_alternative): Initialize matching_early_clobber.
---
 gcc/lra-constraints.c | 22 ++
 gcc/lra-int.h |  3 +++
 gcc/lra.c |  1 +
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 8be4d46..55163f1 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -2202,7 +2202,13 @@ process_alt_operands (int only_alternative)
  "%d Matching earlyclobber alt:"
  " reject--\n",
  nop);
-			reject--;
+			if (!curr_static_id->operand[m]
+		 .matching_early_clobber)
+			  {
+reject--;
+curr_static_id->operand[m]
+		.matching_early_clobber = 1;
+			  }
 			  }
 			/* Otherwise we prefer no matching
 			   alternatives because it gives more freedom
@@ -2948,15 +2954,11 @@ process_alt_operands (int only_alternative)
 	  curr_alt_dont_inherit_ops[curr_alt_dont_inherit_ops_num++]
 		= last_conflict_j;
 	  losers++;
-	  /* Early clobber was already reflected in REJECT. */
-	  lra_assert (reject > 0);
 	  if (lra_dump_file != NULL)
 		fprintf
 		  (lra_dump_file,
 		   "%d Conflict early clobber reload: reject--\n",
 		   i);
-	  reject--;
-	  overall += LRA_LOSER_COST_FACTOR - 1;
 	}
 	  else
 	{
@@ -2980,17 +2982,21 @@ process_alt_operands (int only_alternative)
 		}
 	  curr_alt_win[i] = curr_alt_match_win[i] = false;
 	  losers++;
-	  /* Early clobber was already reflected in REJECT. */
-	  lra_assert (reject > 0);
 	  if (lra_dump_file != NULL)
 		fprintf
 		  (lra_dump_file,
 		   "%d Matched conflict early clobber reloads: "
 		   "reject--\n",
 		   i);
+	}
+	  /* Early clobber was already reflected in REJECT. */
+	  if (!curr_static_id->operand[i].matching_early_clobber)
+	{
+	  lra_assert (reject > 0);
 	  reject--;
-	  overall += LRA_LOSER_COST_FACTOR - 1;
+	  curr_static_id->operand[i].matching_early_clobber = 1;
 	}
+	  overall += LRA_LOSER_COST_FACTOR - 1;
 	}
   if (lra_dump_file != NULL)
 	fprintf (lra_dump_file, "  alt=%d,overall=%d,losers=%d,rld_nregs=%d\n",
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 5267b53..f193e1f 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -147,6 +147,9 @@ struct lra_operand_data
  This field is set up every time when corresponding
  operand_alternative in lra_static_insn_data is set up.  */
   unsigned int early_clobber : 1;
+  /* True if there is an early clobber that has a matching alternative.
+ This field is used to prevent multiple matches being counted.  */
+  unsigned int matching_early_clobber : 1;
   /* True if the operand is an address.  */
   unsigned int is_address : 1;
 };
diff --git a/gcc/lra.c b/gcc/lra.c
index aa768fb..01dd8b8 100644
--- a/gcc/lra.c
+++ b/gcc/lra.c
@@ -797,6 +797,7 @@ setup_operand_alternative (lra_insn_recog_data_t data,
 {
   static_data->operand[i].early_clobber_alts = 0;
   static_data->operand[i].early_clobber = false;
+  static_data->operand[i].matching_early_clobber = false;
   static_data->operand[i].is_address = false;
   if (static_data->operand[i].constraint[0] == '%')
 	{


[PATCH 19/25] GCN libgfortran.

2018-09-05 Thread ams

This patch contains the GCN port of libgfortran.  We use the minimal
configuration created for NVPTX.  That's all that's required, besides the
target-independent bug fixes posted already.

2018-09-05  Andrew Stubbs  
Kwok Cheung Yeung  
Julian Brown  
Tom de Vries  

libgfortran/
* configure.ac: Use minimal mode for amdgcn.
* configure: Regenerate.
---
 libgfortran/configure| 7 ---
 libgfortran/configure.ac | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/libgfortran/configure b/libgfortran/configure
index a583b67..fd8b697 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -5994,7 +5994,8 @@ fi
 # * C library support for other features such as signal, environment
 #   variables, time functions
 
- if test "x${target_cpu}" = xnvptx; then
+ if test "x${target_cpu}" = xnvptx \
+ || test "x${target_cpu}" = xamdgcn; then
   LIBGFOR_MINIMAL_TRUE=
   LIBGFOR_MINIMAL_FALSE='#'
 else
@@ -12514,7 +12515,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12517 "configure"
+#line 12518 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12620,7 +12621,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12623 "configure"
+#line 12624 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index 05952aa..11b629d 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -206,7 +206,8 @@ AM_CONDITIONAL(LIBGFOR_USE_SYMVER_SUN, [test "x$gfortran_use_symver" = xsun])
 # * C library support for other features such as signal, environment
 #   variables, time functions
 
-AM_CONDITIONAL(LIBGFOR_MINIMAL, [test "x${target_cpu}" = xnvptx])
+AM_CONDITIONAL(LIBGFOR_MINIMAL, [test "x${target_cpu}" = xnvptx \
+ || test "x${target_cpu}" = xamdgcn])
 
 # Figure out whether the compiler supports "-ffunction-sections -fdata-sections",
 # similarly to how libstdc++ does it


[PATCH 18/25] Fix interleaving of Fortran stop messages

2018-09-05 Thread ams

Fortran STOP and ERROR STOP use a different function to print the "STOP" string
and the message string.  On GCN this results in out-of-order output, such as
"ERROR STOP ".

This patch fixes the problem by making estr_write use the proper Fortran write,
not C printf, so both parts are now output the same way.  This also ensures
that both parts are output to STDERR (not that that means anything on GCN).

2018-09-05  Kwok Cheung Yeung  

libgfortran/
* runtime/minimal.c (estr_write): Define in terms of write.
---
 libgfortran/runtime/minimal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgfortran/runtime/minimal.c b/libgfortran/runtime/minimal.c
index 8940f97..b6d26fd 100644
--- a/libgfortran/runtime/minimal.c
+++ b/libgfortran/runtime/minimal.c
@@ -196,7 +196,7 @@ sys_abort (void)
 #undef st_printf
 #define st_printf printf
 #undef estr_write
-#define estr_write printf
+#define estr_write(X) write(STDERR_FILENO, (X), strlen (X))
 #if __nvptx__
 /* Map "exit" to "abort"; see PR85463 '[nvptx] "exit" in offloaded region
doesn't terminate process'.  */


[PATCH 20/25] GCN libgcc.

2018-09-05 Thread ams

This patch contains the GCN port of libgcc.  I've broken it out just to keep
both parts more manageable.

We have the usual stuff, plus a "gomp_print" implementation intended to provide
a means to output text to console without using the full printf.  Originally
this was because we did not have a working Newlib port, but now it provides the
underlying mechanism for printf.  It's also much lighter than printf, and
therefore more suitable for debugging offload kernels (for which there is no
debugger, yet).

In order to work in offload kernels the same function must be present in both
host and GCN toolchains.  Therefore it needs to live in libgomp (hence the
name).  However, having found it also useful in stand alone testing I have
moved the GCN implementation to libgcc.

It was also necessary to provide a means to disable EMUTLS.

2018-09-05  Andrew Stubbs  
Kwok Cheung Yeung  
Julian Brown  
Tom de Vries  

libgcc/
* Makefile.in: Don't add emutls.c when --enable-emutls is "no".
* config.host: Recognize amdgcn*-*-amdhsa.
* config/gcn/crt0.c: New file.
* config/gcn/gomp_print.c: New file.
* config/gcn/lib2-divmod-hi.c: New file.
* config/gcn/lib2-divmod.c: New file.
* config/gcn/lib2-gcn.h: New file.
* config/gcn/reduction.c: New file.
* config/gcn/sfp-machine.h: New file.
* config/gcn/t-amdgcn: New file.
---
 libgcc/Makefile.in |   2 +
 libgcc/config.host |   8 +++
 libgcc/config/gcn/crt0.c   |  23 
 libgcc/config/gcn/gomp_print.c |  99 +++
 libgcc/config/gcn/lib2-divmod-hi.c | 117 +
 libgcc/config/gcn/lib2-divmod.c| 117 +
 libgcc/config/gcn/lib2-gcn.h   |  49 
 libgcc/config/gcn/reduction.c  |  30 ++
 libgcc/config/gcn/sfp-machine.h|  51 
 libgcc/config/gcn/t-amdgcn |  25 
 10 files changed, 521 insertions(+)
 create mode 100644 libgcc/config/gcn/crt0.c
 create mode 100644 libgcc/config/gcn/gomp_print.c
 create mode 100644 libgcc/config/gcn/lib2-divmod-hi.c
 create mode 100644 libgcc/config/gcn/lib2-divmod.c
 create mode 100644 libgcc/config/gcn/lib2-gcn.h
 create mode 100644 libgcc/config/gcn/reduction.c
 create mode 100644 libgcc/config/gcn/sfp-machine.h
 create mode 100644 libgcc/config/gcn/t-amdgcn

diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
index 0c5b264..6f68257 100644
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -429,9 +429,11 @@ LIB2ADD += enable-execute-stack.c
 # While emutls.c has nothing to do with EH, it is in LIB2ADDEH*
 # instead of LIB2ADD because that's the way to be sure on some targets
 # (e.g. *-*-darwin*) only one copy of it is linked.
+ifneq ($(enable_emutls),no)
 LIB2ADDEH += $(srcdir)/emutls.c
 LIB2ADDEHSTATIC += $(srcdir)/emutls.c
 LIB2ADDEHSHARED += $(srcdir)/emutls.c
+endif
 
 # Library members defined in libgcc2.c.
 lib2funcs = _muldi3 _negdi2 _lshrdi3 _ashldi3 _ashrdi3 _cmpdi2 _ucmpdi2	   \
diff --git a/libgcc/config.host b/libgcc/config.host
index 029f656..29178da 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -91,6 +91,10 @@ alpha*-*-*)
 am33_2.0-*-linux*)
 	cpu_type=mn10300
 	;;
+amdgcn*-*-*)
+	cpu_type=gcn
+	tmake_file="${tmake_file} t-softfp-sfdf t-softfp"
+	;;
 arc*-*-*)
 	cpu_type=arc
 	;;
@@ -384,6 +388,10 @@ alpha*-dec-*vms*)
 	extra_parts="$extra_parts vms-dwarf2.o vms-dwarf2eh.o"
 	md_unwind_header=alpha/vms-unwind.h
 	;;
+amdgcn*-*-amdhsa)
+	tmake_file="$tmake_file gcn/t-amdgcn"
+	extra_parts="crt0.o"
+	;;
 arc*-*-elf*)
 	tmake_file="arc/t-arc"
 	extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o"
diff --git a/libgcc/config/gcn/crt0.c b/libgcc/config/gcn/crt0.c
new file mode 100644
index 000..f4f367b
--- /dev/null
+++ b/libgcc/config/gcn/crt0.c
@@ -0,0 +1,23 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+/* Provide an entry point sy

[PATCH 22/25] Add dg-require-effective-target exceptions

2018-09-05 Thread ams

There are a number of tests that fail because they assume that exceptions are
available, but GCN does not support them, yet.

This patch adds "dg-require-effective-target exceptions" in all the affected
tests.  There's probably an automatic way to test for exceptions, but the
current implementation simply says that AMD GCN does not support them.  This
should ensure that no other targets are affected by the change.

2018-09-05  Andrew Stubbs  
Kwok Cheung Yeung  
Julian Brown  
Tom de Vries  

gcc/testsuite/
* c-c++-common/ubsan/pr71512-1.c: Require exceptions.
* c-c++-common/ubsan/pr71512-2.c: Require exceptions.
* gcc.c-torture/compile/pr34648.c: Require exceptions.
* gcc.c-torture/compile/pr41469.c: Require exceptions.
* gcc.dg/20111216-1.c: Require exceptions.
* gcc.dg/cleanup-10.c: Require exceptions.
* gcc.dg/cleanup-11.c: Require exceptions.
* gcc.dg/cleanup-12.c: Require exceptions.
* gcc.dg/cleanup-13.c: Require exceptions.
* gcc.dg/cleanup-5.c: Require exceptions.
* gcc.dg/cleanup-8.c: Require exceptions.
* gcc.dg/cleanup-9.c: Require exceptions.
* gcc.dg/gomp/pr29955.c: Require exceptions.
* gcc.dg/lto/pr52097_0.c: Require exceptions.
* gcc.dg/nested-func-5.c: Require exceptions.
* gcc.dg/pch/except-1.c: Require exceptions.
* gcc.dg/pch/valid-2.c: Require exceptions.
* gcc.dg/pr41470.c: Require exceptions.
* gcc.dg/pr42427.c: Require exceptions.
* gcc.dg/pr44545.c: Require exceptions.
* gcc.dg/pr47086.c: Require exceptions.
* gcc.dg/pr51481.c: Require exceptions.
* gcc.dg/pr51644.c: Require exceptions.
* gcc.dg/pr52046.c: Require exceptions.
* gcc.dg/pr54669.c: Require exceptions.
* gcc.dg/pr56424.c: Require exceptions.
* gcc.dg/pr64465.c: Require exceptions.
* gcc.dg/pr65802.c: Require exceptions.
* gcc.dg/pr67563.c: Require exceptions.
* gcc.dg/tree-ssa/pr41469-1.c: Require exceptions.
* gcc.dg/tree-ssa/ssa-dse-28.c: Require exceptions.
* gcc.dg/vect/pr46663.c: Require exceptions.
* lib/target-supports.exp (check_effective_target_exceptions): New.
---
 gcc/testsuite/c-c++-common/ubsan/pr71512-1.c  |  1 +
 gcc/testsuite/c-c++-common/ubsan/pr71512-2.c  |  1 +
 gcc/testsuite/gcc.c-torture/compile/pr34648.c |  1 +
 gcc/testsuite/gcc.c-torture/compile/pr41469.c |  1 +
 gcc/testsuite/gcc.dg/20111216-1.c |  1 +
 gcc/testsuite/gcc.dg/cleanup-10.c |  1 +
 gcc/testsuite/gcc.dg/cleanup-11.c |  1 +
 gcc/testsuite/gcc.dg/cleanup-12.c |  1 +
 gcc/testsuite/gcc.dg/cleanup-13.c |  1 +
 gcc/testsuite/gcc.dg/cleanup-5.c  |  1 +
 gcc/testsuite/gcc.dg/cleanup-8.c  |  1 +
 gcc/testsuite/gcc.dg/cleanup-9.c  |  1 +
 gcc/testsuite/gcc.dg/gomp/pr29955.c   |  1 +
 gcc/testsuite/gcc.dg/lto/pr52097_0.c  |  1 +
 gcc/testsuite/gcc.dg/nested-func-5.c  |  1 +
 gcc/testsuite/gcc.dg/pch/except-1.c   |  1 +
 gcc/testsuite/gcc.dg/pch/valid-2.c|  2 +-
 gcc/testsuite/gcc.dg/pr41470.c|  1 +
 gcc/testsuite/gcc.dg/pr42427.c|  1 +
 gcc/testsuite/gcc.dg/pr44545.c|  1 +
 gcc/testsuite/gcc.dg/pr47086.c|  1 +
 gcc/testsuite/gcc.dg/pr51481.c|  1 +
 gcc/testsuite/gcc.dg/pr51644.c|  1 +
 gcc/testsuite/gcc.dg/pr52046.c|  1 +
 gcc/testsuite/gcc.dg/pr54669.c|  1 +
 gcc/testsuite/gcc.dg/pr56424.c|  1 +
 gcc/testsuite/gcc.dg/pr64465.c|  1 +
 gcc/testsuite/gcc.dg/pr65802.c|  1 +
 gcc/testsuite/gcc.dg/pr67563.c|  1 +
 gcc/testsuite/gcc.dg/tree-ssa/pr41469-1.c |  1 +
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-28.c|  1 +
 gcc/testsuite/gcc.dg/vect/pr46663.c   |  1 +
 gcc/testsuite/lib/target-supports.exp | 10 ++
 33 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c b/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c
index 2a90ab1..8af9365 100644
--- a/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c
+++ b/gcc/testsuite/c-c++-common/ubsan/pr71512-1.c
@@ -1,5 +1,6 @@
 /* PR c/71512 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -fnon-call-exceptions -ftrapv -fexceptions -fsanitize=undefined" } */
+/* { dg-require-effective-target exceptions } */
 
 #include "../../gcc.dg/pr44545.c"
diff --git a/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c b/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c
index 1c95593..0c16934 100644
--- a/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c
+++ b/gcc/testsuite/c-c++-common/ubsan/pr71512-2.c
@@ -1,5 +1,6 @@
 /* PR c/71512 */
 /* { dg-do compile } */
 /* { dg-options "-O -fexceptions -fnon-call-exceptions -ftrapv -fsanitize=un

[PATCH 23/25] Testsuite: GCN is always PIE.

2018-09-05 Thread ams

The GCN/HSA loader ignores the load address and uses a random location, so we
build all GCN binaries as PIE, by default.

This patch makes the necessary testsuite adjustments to make this work
correctly.

2018-09-05  Andrew Stubbs  

gcc/testsuite/
* gcc.dg/graphite/scop-19.c: Check pie_enabled.
* gcc.dg/pic-1.c: Disable on amdgcn.
* gcc.dg/pic-2.c: Disable on amdgcn.
* gcc.dg/pic-3.c: Disable on amdgcn.
* gcc.dg/pic-4.c: Disable on amdgcn.
* gcc.dg/pie-3.c: Disable on amdgcn.
* gcc.dg/pie-4.c: Disable on amdgcn.
* gcc.dg/uninit-19.c: Check pie_enabled.
* lib/target-supports.exp (check_effective_target_pie): Add amdgcn.
---
 gcc/testsuite/gcc.dg/graphite/scop-19.c | 4 ++--
 gcc/testsuite/gcc.dg/pic-1.c| 2 +-
 gcc/testsuite/gcc.dg/pic-2.c| 1 +
 gcc/testsuite/gcc.dg/pic-3.c| 2 +-
 gcc/testsuite/gcc.dg/pic-4.c| 2 +-
 gcc/testsuite/gcc.dg/pie-3.c| 2 +-
 gcc/testsuite/gcc.dg/pie-4.c| 2 +-
 gcc/testsuite/gcc.dg/uninit-19.c| 4 ++--
 gcc/testsuite/lib/target-supports.exp   | 3 ++-
 9 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/graphite/scop-19.c b/gcc/testsuite/gcc.dg/graphite/scop-19.c
index c89717b..6028132 100644
--- a/gcc/testsuite/gcc.dg/graphite/scop-19.c
+++ b/gcc/testsuite/gcc.dg/graphite/scop-19.c
@@ -31,6 +31,6 @@ d_growable_string_append_buffer (struct d_growable_string *dgs,
   if (need > dgs->alc)
 d_growable_string_resize (dgs, need);
 }
-/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 2 "graphite" { target nonpic } } } */
-/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 1 "graphite" { target { ! nonpic } } } } */
+/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 2 "graphite" { target { nonpic || pie_enabled } } } } */
+/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 1 "graphite" { target { ! { nonpic || pie_enabled } } } } } */
 
diff --git a/gcc/testsuite/gcc.dg/pic-1.c b/gcc/testsuite/gcc.dg/pic-1.c
index 82ba43d..4bb332e 100644
--- a/gcc/testsuite/gcc.dg/pic-1.c
+++ b/gcc/testsuite/gcc.dg/pic-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { *-*-darwin* hppa*-*-* } } } } */
+/* { dg-do compile { target { ! { *-*-darwin* hppa*-*-* amdgcn*-*-* } } } } */
 /* { dg-require-effective-target fpic } */
 /* { dg-options "-fpic" } */
 
diff --git a/gcc/testsuite/gcc.dg/pic-2.c b/gcc/testsuite/gcc.dg/pic-2.c
index bccec13..3846ec4 100644
--- a/gcc/testsuite/gcc.dg/pic-2.c
+++ b/gcc/testsuite/gcc.dg/pic-2.c
@@ -2,6 +2,7 @@
 /* { dg-require-effective-target fpic } */
 /* { dg-options "-fPIC" } */
 /* { dg-skip-if "__PIC__ is always 1 for MIPS" { mips*-*-* } } */
+/* { dg-skip-if "__PIE__ is always defined for GCN" { amdgcn*-*-* } } */
 
 #if __PIC__ != 2
 # error __PIC__ is not 2!
diff --git a/gcc/testsuite/gcc.dg/pic-3.c b/gcc/testsuite/gcc.dg/pic-3.c
index c56f06f..1397977 100644
--- a/gcc/testsuite/gcc.dg/pic-3.c
+++ b/gcc/testsuite/gcc.dg/pic-3.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */
+/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */
 /* { dg-options "-fno-pic" } */
 
 #ifdef __PIC__
diff --git a/gcc/testsuite/gcc.dg/pic-4.c b/gcc/testsuite/gcc.dg/pic-4.c
index 2afdd99..d6d9dc9 100644
--- a/gcc/testsuite/gcc.dg/pic-4.c
+++ b/gcc/testsuite/gcc.dg/pic-4.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */
+/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */
 /* { dg-options "-fno-PIC" } */
 
 #ifdef __PIC__
diff --git a/gcc/testsuite/gcc.dg/pie-3.c b/gcc/testsuite/gcc.dg/pie-3.c
index 5577437..fd4a48d 100644
--- a/gcc/testsuite/gcc.dg/pie-3.c
+++ b/gcc/testsuite/gcc.dg/pie-3.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */
+/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */
 /* { dg-options "-fno-pie" } */
 
 #ifdef __PIC__
diff --git a/gcc/testsuite/gcc.dg/pie-4.c b/gcc/testsuite/gcc.dg/pie-4.c
index 4134676..5523602 100644
--- a/gcc/testsuite/gcc.dg/pie-4.c
+++ b/gcc/testsuite/gcc.dg/pie-4.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* } } } } */
+/* { dg-do compile { target { ! { *-*-darwin* hppa*64*-*-* mips*-*-linux-* amdgcn*-*-* } } } } */
 /* { dg-options "-fno-PIE" } */
 
 #ifdef __PIC__
diff --git a/gcc/testsuite/gcc.dg/uninit-19.c b/gcc/testsuite/gcc.dg/uninit-19.c
index 094dc0e..3f5f06a 100644
--- a/gcc/testsuite/gcc.dg/uninit-19.c
+++ b/gcc/testsuite/gcc.dg/uninit-19.c
@@ -12,7 +12,7 @@ fn1 (int p1, float *f1, float *f2, float *f3, unsigned char *c1, float *f4,
 {
   if (p1 & 8)
 b[3] = p10[a];
-  /* { dg-warning "may be used uninitialized" "" { target { { nonpic } || { hppa*64*-*-* } } } .-1

[PATCH 24/25] Ignore LLVM's blank lines.

2018-09-05 Thread ams

The GCN toolchain must use the LLVM assembler and linker because there's no
binutils port.  The LLVM tools do not have the same diagnostic style as
binutils, so the "blank line(s) in output" tests are inappropriate (and very
noisy).

The LLVM tools also have different command line options, so it's not possible
to autodetect object formats in the same way.

This patch addresses both issues.

2018-09-05  Andrew Stubbs  

gcc/testsuite/
* lib/file-format.exp (gcc_target_object_format): Handle AMD GCN.
* lib/gcc-dg.exp (gcc-dg-prune): Ignore blank lines from the LLVM
linker.
* lib/target-supports.exp (check_effective_target_llvm_binutils): New.
---
 gcc/testsuite/lib/file-format.exp |  3 +++
 gcc/testsuite/lib/gcc-dg.exp  |  2 +-
 gcc/testsuite/lib/target-supports.exp | 14 ++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/file-format.exp b/gcc/testsuite/lib/file-format.exp
index 5c47246..c595fe2 100644
--- a/gcc/testsuite/lib/file-format.exp
+++ b/gcc/testsuite/lib/file-format.exp
@@ -41,6 +41,9 @@ proc gcc_target_object_format { } {
 } elseif { [istarget *-*-aix*] } {
 	# AIX doesn't necessarily have objdump, so hand-code it.
 	set gcc_target_object_format_saved coff
+} elseif { [istarget *-*-amdhsa*] } {
+	# AMD GCN uses LLVM objdump which is not CLI-compatible
+	set gcc_target_object_format_saved elf
 } else {
 set objdump_name [find_binutils_prog objdump]
 set open_file [open objfmtst.c w]
diff --git a/gcc/testsuite/lib/gcc-dg.exp b/gcc/testsuite/lib/gcc-dg.exp
index f5e6bef..7df348e 100644
--- a/gcc/testsuite/lib/gcc-dg.exp
+++ b/gcc/testsuite/lib/gcc-dg.exp
@@ -361,7 +361,7 @@ proc gcc-dg-prune { system text } {
 
 # Complain about blank lines in the output (PR other/69006)
 global allow_blank_lines
-if { !$allow_blank_lines } {
+if { !$allow_blank_lines && ![check_effective_target_llvm_binutils]} {
 	set num_blank_lines [llength [regexp -all -inline "\n\n" $text]]
 	if { $num_blank_lines } {
 	global testname_with_flags
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 61442bd..1e627fa 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -9129,6 +9129,14 @@ proc check_effective_target_offload_hsa { } {
 } "-foffload=hsa" ]
 }
 
+# Return 1 if the compiler has been configured with hsa offloading.
+
+proc check_effective_target_offload_gcn { } {
+return [check_no_compiler_messages offload_gcn assembly {
+	int main () {return 0;}
+} "-foffload=amdgcn-unknown-amdhsa" ]
+}
+
 # Return 1 if the target support -fprofile-update=atomic
 proc check_effective_target_profile_update_atomic {} {
 return [check_no_compiler_messages profile_update_atomic assembly {
@@ -9427,3 +9435,9 @@ proc check_effective_target_cet { } {
 	}
 } "-O2" ]
 }
+
+# Return 1 if this target uses an LLVM assembler and/or linker
+proc check_effective_target_llvm_binutils { } {
+return [expr { [istarget amdgcn*-*-*]
+		   || [check_effective_target_offload_gcn] } ]
+}


[PATCH 25/25] Port testsuite to GCN

2018-09-05 Thread ams

This collection of miscellaneous patches configures the testsuite to run on AMD
GCN in a standalone (i.e. not offloading) configuration.  It assumes you have
your Dejagnu set up to run binaries via the gcn-run tool.

2018-09-05  Andrew Stubbs  
Kwok Cheung Yeung  
Julian Brown  
Tom de Vries  

gcc/testsuite/
* gcc.dg/20020312-2.c: Add amdgcn support.
* gcc.dg/Wno-frame-address.c: Disable on amdgcn.
* gcc.dg/builtin-apply2.c: Likewise.
* gcc.dg/torture/stackalign/builtin-apply-2.c: Likewise.
* gcc.dg/gimplefe-28.c: Force -ffast-math.
* gcc.dg/intermod-1.c: Add -mlocal-symbol-id on amdgcn.
* gcc.dg/memcmp-1.c: Increase timeout factor.
* gcc.dg/pr59605-2.c: Addd -DMAX_COPY=1025 on amdgcn.
* gcc.dg/sibcall-10.c: xfail on amdgcn.
* gcc.dg/sibcall-9.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-11c.c: Likewise.
* gcc.dg/tree-ssa/pr84512.c: Likewise.
* gcc.dg/tree-ssa/loop-1.c: Adjust expectations for amdgcn.
* gfortran.dg/bind_c_array_params_2.f90: Likewise.
* gcc.dg/vect/tree-vect.h: Avoid signal on amdgcn.
* lib/target-supports.exp (check_effective_target_trampolines):
Configure amdgcn.
(check_profiling_available): Likewise.
(check_effective_target_global_constructor): Likewise.
(check_effective_target_return_address): Likewise.
(check_effective_target_fopenacc): Likewise.
(check_effective_target_fopenmp): Likewise.
(check_effective_target_vect_int): Likewise.
(check_effective_target_vect_intfloat_cvt): Likewise.
(check_effective_target_vect_uintfloat_cvt): Likewise.
(check_effective_target_vect_floatint_cvt): Likewise.
(check_effective_target_vect_floatuint_cvt): Likewise.
(check_effective_target_vect_simd_clones): Likewise.
(check_effective_target_vect_shift): Likewise.
(check_effective_target_whole_vector_shift): Likewise.
(check_effective_target_vect_bswap): Likewise.
(check_effective_target_vect_shift_char): Likewise.
(check_effective_target_vect_long): Likewise.
(check_effective_target_vect_float): Likewise.
(check_effective_target_vect_double): Likewise.
(check_effective_target_vect_perm): Likewise.
(check_effective_target_vect_perm_byte): Likewise.
(check_effective_target_vect_perm_short): Likewise.
(check_effective_target_vect_widen_mult_qi_to_hi): Likewise.
(check_effective_target_vect_widen_mult_hi_to_si): Likewise.
(check_effective_target_vect_widen_mult_qi_to_hi_pattern): Likewise.
(check_effective_target_vect_widen_mult_hi_to_si_pattern): Likewise.
(check_effective_target_vect_natural_alignment): Likewise.
(check_effective_target_vect_fully_masked): Likewise.
(check_effective_target_vect_element_align): Likewise.
(check_effective_target_vect_masked_store): Likewise.
(check_effective_target_vect_scatter_store): Likewise.
(check_effective_target_vect_condition): Likewise.
(check_effective_target_vect_cond_mixed): Likewise.
(check_effective_target_vect_char_mult): Likewise.
(check_effective_target_vect_short_mult): Likewise.
(check_effective_target_vect_int_mult): Likewise.
(check_effective_target_sqrt_insn): Likewise.
(check_effective_target_vect_call_sqrtf): Likewise.
(check_effective_target_vect_call_btrunc): Likewise.
(check_effective_target_vect_call_btruncf): Likewise.
(check_effective_target_vect_call_ceil): Likewise.
(check_effective_target_vect_call_floorf): Likewise.
(check_effective_target_lto): Likewise.
(check_vect_support_and_set_flags): Likewise.
(check_effective_target_vect_stridedN): Enable when fully masked is
available.
---
 gcc/testsuite/gcc.dg/20020312-2.c  |   2 +
 gcc/testsuite/gcc.dg/Wno-frame-address.c   |   2 +-
 gcc/testsuite/gcc.dg/builtin-apply2.c  |   2 +-
 gcc/testsuite/gcc.dg/gimplefe-28.c |   2 +-
 gcc/testsuite/gcc.dg/intermod-1.c  |   1 +
 gcc/testsuite/gcc.dg/memcmp-1.c|   1 +
 gcc/testsuite/gcc.dg/pr59605-2.c   |   2 +-
 gcc/testsuite/gcc.dg/sibcall-10.c  |   2 +-
 gcc/testsuite/gcc.dg/sibcall-9.c   |   2 +-
 .../gcc.dg/torture/stackalign/builtin-apply-2.c|   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c   |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/loop-1.c |   6 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr84512.c|   2 +-
 gcc/testsuite/gcc.dg/vect/tree-vect.h  |   4 +
 .../gfortran.dg/bind_c_array_params_2.f90  |   3 +-
 gcc/testsuite/lib/target-supports.exp  | 126 +++--
 16 files changed, 113 insertions(+), 48 deletions(-)

diff