Re: [PATCH] gimple-isel: Check whether IFN_VCONDEQ is supported [PR98560]

2021-01-07 Thread Richard Biener
On Wed, 6 Jan 2021, Richard Sandiford wrote:

> This patch follows on from the previous one for the PR and
> makes sure that we can handle == as well as <.  Previously
> we assumed without checking that IFN_VCONDEQ was available
> if IFN_VCOND or IFN_VCONDU wasn't.
> 
> The patch also fixes the definition of the IFN_VCOND* functions.
> The optabs are convert optabs in which the first mode is the
> data mode and the second mode is the comparison or mask mode.
> 
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

OK.

Thanks,
Richard.

> Richard
> 
> 
> gcc/
>   PR tree-optimization/98560
>   * internal-fn.def (IFN_VCONDU, IFN_VCONDEQ): Use type vec_cond.
>   * internal-fn.c (vec_cond_mask_direct): Get the data mode from
>   argument 1.
>   (vec_cond_direct): Likewise argument 2.
>   (vec_condu_direct, vec_condeq_direct): Delete.
>   (expand_vect_cond_optab_fn): Rename to...
>   (expand_vec_cond_optab_fn): ...this, replacing old macro.
>   (expand_vec_condu_optab_fn, expand_vec_condeq_optab_fn): Delete.
>   (expand_vect_cond_mask_optab_fn): Rename to...
>   (expand_vec_cond_mask_optab_fn): ...this, replacing old macro.
>   (direct_vec_cond_mask_optab_supported_p): Treat the optab as a
>   convert optab.
>   (direct_vec_cond_optab_supported_p): Likewise.
>   (direct_vec_condu_optab_supported_p): Delete.
>   (direct_vec_condeq_optab_supported_p): Delete.
>   * gimple-isel.cc: Include internal-fn.h.
>   (gimple_expand_vec_cond_expr): Check that IFN_VCONDEQ is supported
>   before using it.
> 
> gcc/testsuite/
>   PR tree-optimization/98560
>   * gcc.dg/vect/pr98560-2.c: New test.
> ---
>  gcc/gimple-isel.cc|  6 +-
>  gcc/internal-fn.c | 22 ++
>  gcc/internal-fn.def   |  4 ++--
>  gcc/testsuite/gcc.dg/vect/pr98560-2.c | 17 +
>  4 files changed, 30 insertions(+), 19 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr98560-2.c
> 
> diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> index 9c07d79a86c..3ca29191c24 100644
> --- a/gcc/gimple-isel.cc
> +++ b/gcc/gimple-isel.cc
> @@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "memmodel.h"
>  #include "optabs.h"
>  #include "gimple-fold.h"
> +#include "internal-fn.h"
>  
>  /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
> internal function based on vector type of selected expansion.
> @@ -246,7 +247,10 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>Try changing it to NE_EXPR.  */
> tcode = NE_EXPR;
>   }
> -  if (tcode == EQ_EXPR || tcode == NE_EXPR)
> +  if ((tcode == EQ_EXPR || tcode == NE_EXPR)
> +   && direct_internal_fn_supported_p (IFN_VCONDEQ, TREE_TYPE (lhs),
> +  TREE_TYPE (op0a),
> +  OPTIMIZE_FOR_BOTH))
>   {
> tree tcode_tree = build_int_cst (integer_type_node, tcode);
> return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1,
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 996f0fb6c67..dd7173126fb 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -110,10 +110,8 @@ init_internal_fns ()
>  #define mask_store_direct { 3, 2, false }
>  #define store_lanes_direct { 0, 0, false }
>  #define mask_store_lanes_direct { 0, 0, false }
> -#define vec_cond_mask_direct { 0, 0, false }
> -#define vec_cond_direct { 0, 0, false }
> -#define vec_condu_direct { 0, 0, false }
> -#define vec_condeq_direct { 0, 0, false }
> +#define vec_cond_mask_direct { 1, 0, false }
> +#define vec_cond_direct { 2, 0, false }
>  #define scatter_store_direct { 3, 1, false }
>  #define len_store_direct { 3, 3, false }
>  #define vec_set_direct { 3, 3, false }
> @@ -2766,7 +2764,7 @@ expand_partial_store_optab_fn (internal_fn, gcall 
> *stmt, convert_optab optab)
> The expansion of STMT happens based on OPTAB table associated.  */
>  
>  static void
> -expand_vect_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
> +expand_vec_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  {
>class expand_operand ops[6];
>insn_code icode;
> @@ -2802,15 +2800,11 @@ expand_vect_cond_optab_fn (internal_fn, gcall *stmt, 
> convert_optab optab)
>  emit_move_insn (target, ops[0].value);
>  }
>  
> -#define expand_vec_cond_optab_fn expand_vect_cond_optab_fn
> -#define expand_vec_condu_optab_fn expand_vect_cond_optab_fn
> -#define expand_vec_condeq_optab_fn expand_vect_cond_optab_fn
> -
>  /* Expand VCOND_MASK optab internal function.
> The expansion of STMT happens based on OPTAB table associated.  */
>  
>  static void
> -expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab 
> optab)
> +expand_vec_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  {
>class expand_operand

Re: [PATCH v3] handle MEM_REF with void* arguments (PR c++/95768)

2021-01-07 Thread Jakub Jelinek via Gcc-patches
On Sat, Jan 02, 2021 at 03:22:25PM -0700, Martin Sebor via Gcc-patches wrote:
> PR c++/95768 - pretty-printer ICE on -Wuninitialized with allocated storage
> 
> gcc/c-family/ChangeLog:
> 
>   PR c++/95768
>   * c-pretty-print.c (c_pretty_printer::primary_expression): For
>   SSA_NAMEs print VLA names and GIMPLE defining statements.
>   (print_mem_ref): New function.
>   (c_pretty_printer::unary_expression): Call it.

This broke:
+FAIL: gcc.dg/plugin/gil-1.c -fplugin=./analyzer_gil_plugin.so  (test for 
warnings, line 16)
+FAIL: gcc.dg/plugin/gil-1.c -fplugin=./analyzer_gil_plugin.so  (test for 
warnings, line 63)
+FAIL: gcc.dg/plugin/gil-1.c -fplugin=./analyzer_gil_plugin.so (test for excess 
errors)
and
+FAIL: g++.dg/cpp0x/constexpr-trivial2.C  -std=c++11  (test for errors, line 13)
+FAIL: g++.dg/cpp0x/constexpr-trivial2.C  -std=c++11 (test for excess errors)
The former one is just a different printing of the MEM_REF from what the
test expects, but the latter is an ICE (one needs
make check-c++-all RUNTESTFLAGS=dg.exp=constexpr-trivial2.C
to reproduce or GXX_TESTSUITE_STDS=11 or similar as C++11 is not tested by
default).

Jakub



[PATCH] bswap: Fix up recent vector CONSTRUCTOR optimization [PR98568]

2021-01-07 Thread Jakub Jelinek via Gcc-patches
Hi!

As the testcase shows, bswap can match even byte-swapping or indentity
from low part of some wider SSA_NAME.
For bswap replacement other than for vector CONSTRUCTOR the code has been
using NOP_EXPR casts if the types weren't compatible, but for vectors
we need to use VIEW_CONVERT_EXPR.  The problem with the latter is that
we require that it has the same size, which isn't guaranteed, so this patch
in those cases first adds a narrowing NOP_EXPR cast and only afterwards
does a VIEW_CONVERT_EXPR.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2021-01-06  Jakub Jelinek  

PR tree-optimization/98568
* gimple-ssa-store-merging.c (bswap_view_convert): New function.
(bswap_replace): Use it.

* g++.dg/torture/pr98568.C: New test.

--- gcc/gimple-ssa-store-merging.c.jj   2021-01-05 16:15:58.965337667 +0100
+++ gcc/gimple-ssa-store-merging.c  2021-01-06 19:20:37.225578412 +0100
@@ -978,6 +978,25 @@ public:
 
 }; // class pass_optimize_bswap
 
+/* Helper function for bswap_replace.  Build VIEW_CONVERT_EXPR from
+   VAL to TYPE.  If VAL has different type size, emit a NOP_EXPR cast
+   first.  */
+
+static tree
+bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val)
+{
+  gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val)));
+  if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
+{
+  HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
+  tree itype = build_nonstandard_integer_type (prec, 1);
+  gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
+  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+  val = gimple_assign_lhs (g);
+}
+  return build1 (VIEW_CONVERT_EXPR, type, val);
+}
+
 /* Perform the bswap optimization: replace the expression computed in the rhs
of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
bswap, load or load + bswap expression.
@@ -1100,7 +1119,7 @@ bswap_replace (gimple_stmt_iterator gsi,
  gimple_set_vuse (load_stmt, n->vuse);
  gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
  if (conv_code == VIEW_CONVERT_EXPR)
-   val_tmp = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), val_tmp);
+   val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp);
  gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
  update_stmt (cur_stmt);
}
@@ -1144,7 +1163,7 @@ bswap_replace (gimple_stmt_iterator gsi,
  if (!is_gimple_val (src))
return NULL_TREE;
  if (conv_code == VIEW_CONVERT_EXPR)
-   src = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), src);
+   src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src);
  g = gimple_build_assign (tgt, conv_code, src);
}
   else if (cur_stmt)
@@ -1227,7 +1246,7 @@ bswap_replace (gimple_stmt_iterator gsi,
   tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
   tree atmp = tmp;
   if (conv_code == VIEW_CONVERT_EXPR)
-   atmp = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), tmp);
+   atmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), tmp);
   convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
   gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
 }
--- gcc/testsuite/g++.dg/torture/pr98568.C.jj   2021-01-06 19:19:25.940377456 
+0100
+++ gcc/testsuite/g++.dg/torture/pr98568.C  2021-01-06 19:19:16.608482074 
+0100
@@ -0,0 +1,37 @@
+// PR tree-optimization/98568
+// { dg-do compile }
+
+char a[2];
+char b[4];
+
+void
+foo (int x)
+{
+  a[1] = x >> 8;
+  a[0] = x;
+}
+
+void
+bar (long long x)
+{
+  b[3] = x >> 24;
+  b[2] = x >> 16;
+  b[1] = x >> 8;
+  b[0] = x;
+}
+
+void
+baz (int x)
+{
+  a[0] = x >> 8;
+  a[1] = x;
+}
+
+void
+qux (long long x)
+{
+  b[0] = x >> 24;
+  b[1] = x >> 16;
+  b[2] = x >> 8;
+  b[3] = x;
+}

Jakub



Re: [PATCH] bswap: Fix up recent vector CONSTRUCTOR optimization [PR98568]

2021-01-07 Thread Richard Biener
On Thu, 7 Jan 2021, Jakub Jelinek wrote:

> Hi!
> 
> As the testcase shows, bswap can match even byte-swapping or indentity
> from low part of some wider SSA_NAME.
> For bswap replacement other than for vector CONSTRUCTOR the code has been
> using NOP_EXPR casts if the types weren't compatible, but for vectors
> we need to use VIEW_CONVERT_EXPR.  The problem with the latter is that
> we require that it has the same size, which isn't guaranteed, so this patch
> in those cases first adds a narrowing NOP_EXPR cast and only afterwards
> does a VIEW_CONVERT_EXPR.
> 
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk?

OK.

Thanks,
Richard.

> 2021-01-06  Jakub Jelinek  
> 
>   PR tree-optimization/98568
>   * gimple-ssa-store-merging.c (bswap_view_convert): New function.
>   (bswap_replace): Use it.
> 
>   * g++.dg/torture/pr98568.C: New test.
> 
> --- gcc/gimple-ssa-store-merging.c.jj 2021-01-05 16:15:58.965337667 +0100
> +++ gcc/gimple-ssa-store-merging.c2021-01-06 19:20:37.225578412 +0100
> @@ -978,6 +978,25 @@ public:
>  
>  }; // class pass_optimize_bswap
>  
> +/* Helper function for bswap_replace.  Build VIEW_CONVERT_EXPR from
> +   VAL to TYPE.  If VAL has different type size, emit a NOP_EXPR cast
> +   first.  */
> +
> +static tree
> +bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val)
> +{
> +  gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val)));
> +  if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
> +{
> +  HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
> +  tree itype = build_nonstandard_integer_type (prec, 1);
> +  gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
> +  gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +  val = gimple_assign_lhs (g);
> +}
> +  return build1 (VIEW_CONVERT_EXPR, type, val);
> +}
> +
>  /* Perform the bswap optimization: replace the expression computed in the rhs
> of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
> bswap, load or load + bswap expression.
> @@ -1100,7 +1119,7 @@ bswap_replace (gimple_stmt_iterator gsi,
> gimple_set_vuse (load_stmt, n->vuse);
> gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
> if (conv_code == VIEW_CONVERT_EXPR)
> - val_tmp = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), val_tmp);
> + val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp);
> gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
> update_stmt (cur_stmt);
>   }
> @@ -1144,7 +1163,7 @@ bswap_replace (gimple_stmt_iterator gsi,
> if (!is_gimple_val (src))
>   return NULL_TREE;
> if (conv_code == VIEW_CONVERT_EXPR)
> - src = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), src);
> + src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src);
> g = gimple_build_assign (tgt, conv_code, src);
>   }
>else if (cur_stmt)
> @@ -1227,7 +1246,7 @@ bswap_replace (gimple_stmt_iterator gsi,
>tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
>tree atmp = tmp;
>if (conv_code == VIEW_CONVERT_EXPR)
> - atmp = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), tmp);
> + atmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), tmp);
>convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
>gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
>  }
> --- gcc/testsuite/g++.dg/torture/pr98568.C.jj 2021-01-06 19:19:25.940377456 
> +0100
> +++ gcc/testsuite/g++.dg/torture/pr98568.C2021-01-06 19:19:16.608482074 
> +0100
> @@ -0,0 +1,37 @@
> +// PR tree-optimization/98568
> +// { dg-do compile }
> +
> +char a[2];
> +char b[4];
> +
> +void
> +foo (int x)
> +{
> +  a[1] = x >> 8;
> +  a[0] = x;
> +}
> +
> +void
> +bar (long long x)
> +{
> +  b[3] = x >> 24;
> +  b[2] = x >> 16;
> +  b[1] = x >> 8;
> +  b[0] = x;
> +}
> +
> +void
> +baz (int x)
> +{
> +  a[0] = x >> 8;
> +  a[1] = x;
> +}
> +
> +void
> +qux (long long x)
> +{
> +  b[0] = x >> 24;
> +  b[1] = x >> 16;
> +  b[2] = x >> 8;
> +  b[3] = x;
> +}
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Re: make FOR_EACH_IMM_USE_STMT safe for early exits

2021-01-07 Thread Richard Biener via Gcc-patches
On Wed, Jan 6, 2021 at 12:34 PM Alexandre Oliva  wrote:
>
> On Jan  4, 2021, Richard Biener  wrote:
>
> > Hmm - while the change looks good, doesn't it end up
> > calling end_imm_use_stmt_tranverse twice for those
> > uses still calling BREAK_FROM_IMM_USE_STMT?
>
> It does.  I'd considered introducing a separate method to call
> end_imm_use_stmt_traverse if imm is not NULL, and then set it to NULL,
> but calling the function multiple times is not a problem: delink_imm_use
> just returns immediately the second time.
>
> > Thus, please remove uses of BREAK_FROM_IMM_USE_STMT
> > together with this patch.
>
> And RETURN_FROM_IMM_USE_STMT, I suppose?

Sure.

> I wasn't sure whether to remove them and their users.
>
> --
> Alexandre Oliva, happy hacker  https://FSFLA.org/blogs/lxo/
>Free Software Activist GNU Toolchain Engineer
> Vim, Vi, Voltei pro Emacs -- GNUlius Caesar


[PATCH] Fix test failures from outputs.exp (PR testsuite/98225)

2021-01-07 Thread Bernd Edlinger
Hi,


this should fix the test failures in this test case.


Is it OK for trunk?


Thanks
Bernd.
From a8008af3db94a9dff7ae243ebfb40f45c54b3a81 Mon Sep 17 00:00:00 2001
From: Bernd Edlinger 
Date: Thu, 7 Jan 2021 09:37:32 +0100
Subject: [PATCH] Fix test failures from outputs.exp

The .ld1_args file is not created when HAVE_GNU_LD is false.
The ltrans0.ltrans_arg file is not created when the make jobserver
is avaliable.

2021-01-07  Bernd Edlinger  

	PR testsuite/98225
	* gcc.misc-tests/outputs.exp: Fix test case.
---
 gcc/testsuite/gcc.misc-tests/outputs.exp | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/testsuite/gcc.misc-tests/outputs.exp b/gcc/testsuite/gcc.misc-tests/outputs.exp
index 80d4b61..495dbcd 100644
--- a/gcc/testsuite/gcc.misc-tests/outputs.exp
+++ b/gcc/testsuite/gcc.misc-tests/outputs.exp
@@ -67,6 +67,10 @@ if {[board_info $dest exists output_format]} {
 append link_options " additional_flags=-Wl,-oformat,[board_info $dest output_format]"
 }
 
+# Avoid possible influence from the make jobserver,
+# otherwise ltrans0.ltrans_args files may be missing.
+unsetenv MAKEFLAGS
+
 # For the test named TEST, run the compiler with SOURCES and OPTS, and
 # look in DIRS for OUTPUTS.  SOURCES is a list of suffixes for source
 # files starting with $b in $srcdir/$subdir, OPTS is a string with
@@ -163,6 +167,9 @@ proc outest { test sources opts dirs outputs } {
 		if { $ogl != {} } {
 		pass "$test: $d$o"
 		file delete $ogl
+		} elseif { [string match "*.ld1_args" $o] } {
+		# This file may be missing if !HAVE_GNU_LD
+		pass "$test: $d$o"
 		} else {
 		fail "$test: $d$o"
 		}
-- 
1.9.1



Re: [PATCH] RISC-V: Zihintpause: add __builtin_riscv_pause

2021-01-07 Thread Kito Cheng via Gcc-patches
My point is tracking info and consistent behavior/scheme with other
extensions, so personally I strongly prefer it should be guarded with
-march.

But maybe we could create an issue on riscv-c-api-doc[1] or
riscv-toolchain-conventions[2] to
get feedback from LLVM folks, since I think this behavior should align
between LLVM and GCC.

[1] https://github.com/riscv/riscv-c-api-doc
[2] https://github.com/riscv/riscv-toolchain-conventions

On Thu, Jan 7, 2021 at 2:53 PM Philipp Tomsich  wrote:
>
> Kito:
>
> We had originally considered to guard this with a -march, but decided against 
> it
> eventually: this instruction will be (among other cases) used in the 
> cpu_relax() of
> the Linux kernel.  For cases like that, we should consider this the baseline 
> (i.e.
> either there's no pause—in which case, the encoded fence will not hurt—or the
> Zihintpause extension)... but it all maps back to a single builtin-call.
>
> Note that the Zihintfence will be enabled for all (also older) targets, as 
> the insn
> is supported there as well (as a fence that doesn't do anything)... so 
> guarding it
> will not really change the behavior.
>
> That said, I'll get going on an v2 that will include the -march guard (and we 
> can
> still turn things back to how they are today).
>
> Thanks,
> Philipp.
>
> On Thu, 7 Jan 2021 at 06:42, Kito Cheng  wrote:
>>
>> Hi Andrew:
>>
>> It's safe to execute on old machine, but it is still a new extension not 
>> included on baseline ISA, so I still prefer having -march to guard that, and 
>> then we can track that in the ELF attribute to see what extensions and which 
>> version are used in the executable / object files.
>>
>>
>> On Thu, Jan 7, 2021 at 11:51 AM Andrew Waterman  wrote:
>>>
>>> I've got a contrary opinion:
>>>
>>> Since HINTs are guaranteed to execute as no-ops--e.g., this one is
>>> just a FENCE instruction, which is already a mandatory part of the
>>> base ISA--they don't _need_ to be called out as separate extensions in
>>> the toolchain.
>>>
>>> Although there's nothing fundamentally wrong with Kito's suggestion,
>>> it seems like an extra hoop to jump through without commensurate
>>> benefit.  I see no reason to restrict the use of __builtin_pause,
>>> since all RISC-V implementations, including old ones, are required to
>>> support it.  And, of course, that's the reason we encoded it this way
>>> :)
>>>
>>>
>>> On Wed, Jan 6, 2021 at 7:35 PM Kito Cheng  wrote:
>>> >
>>> > Hi Philipp:
>>> >
>>> > Could you add zihintpause to -march parser and guard that on the
>>> > pattern and builtin like zifencei[1-2]?
>>> >
>>> > And could you sent a PR to
>>> > https://github.com/riscv/riscv-c-api-doc/blob/master/riscv-c-api.md to
>>> > mention __builtin_riscv_pause?
>>> >
>>> > Thanks!
>>> >
>>> > [1] march parser change:
>>> > https://github.com/gcc-mirror/gcc/commit/b03be74bad08c382da47e048007a78fa3fb4ef49
>>> > [2] Default version for ext.:
>>> > https://github.com/gcc-mirror/gcc/commit/4b81528241ca682025d92558ff6aeec91dafdca8
>>> >
>>> >
>>> > > --- /dev/null
>>> > > +++ b/gcc/testsuite/gcc.target/riscv/builtin_pause.c
>>> > > @@ -0,0 +1,10 @@
>>> > > +/* { dg-do compile } */
>>> > > +/* { dg-options "-O2" }  */
>>> > > +
>>> > > +void test_pause()
>>> >
>>> > I would suggest you change the function name in the testcase,
>>> > otherwise the scan-assembler test will always pass even if you didn't
>>> > generate "pause" instruction.
>>> >
>>> >
>>> > > +{
>>> > > +  __builtin_riscv_pause ();
>>> > > +}
>>> > > +
>>> > > +/* { dg-final { scan-assembler "pause" } } */
>>> > > +
>>> > > --
>>> > > 2.18.4
>>> > >


[PATCH] i386: Optimize blsi followed by comparison [PR98567]

2021-01-07 Thread Jakub Jelinek via Gcc-patches
Hi!

The BLSI instruction sets SF and ZF based on the result and clears OF.
CF is set to something unrelated.

The following patch optimizes BLSI followed by comparison, so we don't need
to emit a TEST insn in between.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-01-07  Jakub Jelinek  

PR target/98567
* config/i386/i386.md (*bmi_blsi__cmp): New define_insn.

* gcc.target/i386/pr98567-1.c: New test.
* gcc.target/i386/pr98567-2.c: New test.

--- gcc/config/i386/i386.md.jj  2021-01-04 10:25:45.072163178 +0100
+++ gcc/config/i386/i386.md 2021-01-06 17:49:13.251966127 +0100
@@ -14568,6 +14568,21 @@ (define_insn "*bmi_blsi_"
(set_attr "btver2_decode" "double")
(set_attr "mode" "")])
 
+(define_insn "*bmi_blsi__cmp"
+  [(set (reg FLAGS_REG)
+   (compare
+ (and:SWI48
+   (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
+   (match_dup 1))
+ (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+   (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
+   "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
+   "blsi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "")])
+
 (define_insn "*bmi_blsmsk_"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 (xor:SWI48
--- gcc/testsuite/gcc.target/i386/pr98567-1.c.jj2021-01-07 
09:44:54.109343371 +0100
+++ gcc/testsuite/gcc.target/i386/pr98567-1.c   2021-01-07 09:44:54.109343371 
+0100
@@ -0,0 +1,31 @@
+/* PR target/98567 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -fno-stack-protector" } */
+/* { dg-final { scan-assembler-times "\tblsi" 4 } } */
+/* { dg-final { scan-assembler-times "\tsetne\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tsete\t" 2 } } */
+/* { dg-final { scan-assembler-not "\ttest\[ld]" } } */
+
+int
+foo (unsigned long x)
+{
+  return (-x & x) == 0;
+}
+
+int
+bar (unsigned int x)
+{
+  return (-x & x) == 0;
+}
+
+int
+baz (unsigned long x)
+{
+  return (x & -x) != 0;
+}
+
+int
+qux (unsigned int x)
+{
+  return 0 != (x & -x);
+}
--- gcc/testsuite/gcc.target/i386/pr98567-2.c.jj2021-01-07 
09:45:23.037015732 +0100
+++ gcc/testsuite/gcc.target/i386/pr98567-2.c   2021-01-07 09:51:58.348539820 
+0100
@@ -0,0 +1,31 @@
+/* PR target/98567 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -fno-stack-protector" } */
+/* { dg-final { scan-assembler-times "\tblsi" 4 } } */
+/* { dg-final { scan-assembler-times "\tsetle\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tsetg\t" 2 } } */
+/* { dg-final { scan-assembler-not "\ttest\[ld]" } } */
+
+int
+foo (unsigned long x)
+{
+  return 0 >= (int) (-x & x);
+}
+
+int
+bar (unsigned int x)
+{
+  return (int) (-x & x) <= 0;
+}
+
+int
+baz (unsigned long x)
+{
+  return (int) (x & -x) > 0;
+}
+
+int
+qux (unsigned int x)
+{
+  return 0 < (int) (x & -x);
+}

Jakub



Re: [PATCH] i386: Optimize blsi followed by comparison [PR98567]

2021-01-07 Thread Uros Bizjak via Gcc-patches
On Thu, Jan 7, 2021 at 9:56 AM Jakub Jelinek  wrote:
>
> Hi!
>
> The BLSI instruction sets SF and ZF based on the result and clears OF.
> CF is set to something unrelated.
>
> The following patch optimizes BLSI followed by comparison, so we don't need
> to emit a TEST insn in between.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2021-01-07  Jakub Jelinek  
>
> PR target/98567
> * config/i386/i386.md (*bmi_blsi__cmp): New define_insn.
>
> * gcc.target/i386/pr98567-1.c: New test.
> * gcc.target/i386/pr98567-2.c: New test.
>
> --- gcc/config/i386/i386.md.jj  2021-01-04 10:25:45.072163178 +0100
> +++ gcc/config/i386/i386.md 2021-01-06 17:49:13.251966127 +0100
> @@ -14568,6 +14568,21 @@ (define_insn "*bmi_blsi_"
> (set_attr "btver2_decode" "double")
> (set_attr "mode" "")])
>
> +(define_insn "*bmi_blsi__cmp"
> +  [(set (reg FLAGS_REG)
> +   (compare
> + (and:SWI48
> +   (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
> +   (match_dup 1))
> + (const_int 0)))
> +   (set (match_operand:SWI48 0 "register_operand" "=r")
> +   (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
> +   "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
> +   "blsi\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "btver2_decode" "double")
> +   (set_attr "mode" "")])

I wonder if we should also add _cc variant where scratch is used:

(define_insn "*bmi_blsi__cc"
  [(set (reg FLAGS_REG)
   (compare
 (and:SWI48
   (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
   (match_dup 1))
 (const_int 0)))
   (clobber (match_scratch:SWI48 0 "=r"))]
   "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
   "blsi\t{%1, %0|%0, %1}"
  [(set_attr "type" "bitmanip")
   (set_attr "btver2_decode" "double")
   (set_attr "mode" "")])

The output is unused in the testcases, so there may be no difference
in the generated code, but it looks to me that additional pattern
gives the compiler more freedom. Also note, that all other CC setting
insns come in three variants.

Otherwise OK.

Uros.


Re: [PATCH] i386: Optimize blsi followed by comparison [PR98567]

2021-01-07 Thread Uros Bizjak via Gcc-patches
On Thu, Jan 7, 2021 at 10:14 AM Uros Bizjak  wrote:
>
> On Thu, Jan 7, 2021 at 9:56 AM Jakub Jelinek  wrote:
> >
> > Hi!
> >
> > The BLSI instruction sets SF and ZF based on the result and clears OF.
> > CF is set to something unrelated.
> >
> > The following patch optimizes BLSI followed by comparison, so we don't need
> > to emit a TEST insn in between.
> >
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> >
> > 2021-01-07  Jakub Jelinek  
> >
> > PR target/98567
> > * config/i386/i386.md (*bmi_blsi__cmp): New define_insn.
> >
> > * gcc.target/i386/pr98567-1.c: New test.
> > * gcc.target/i386/pr98567-2.c: New test.
> >
> > --- gcc/config/i386/i386.md.jj  2021-01-04 10:25:45.072163178 +0100
> > +++ gcc/config/i386/i386.md 2021-01-06 17:49:13.251966127 +0100
> > @@ -14568,6 +14568,21 @@ (define_insn "*bmi_blsi_"
> > (set_attr "btver2_decode" "double")
> > (set_attr "mode" "")])
> >
> > +(define_insn "*bmi_blsi__cmp"
> > +  [(set (reg FLAGS_REG)
> > +   (compare
> > + (and:SWI48
> > +   (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
> > +   (match_dup 1))
> > + (const_int 0)))
> > +   (set (match_operand:SWI48 0 "register_operand" "=r")
> > +   (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
> > +   "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
> > +   "blsi\t{%1, %0|%0, %1}"
> > +  [(set_attr "type" "bitmanip")
> > +   (set_attr "btver2_decode" "double")
> > +   (set_attr "mode" "")])
>
> I wonder if we should also add _cc variant where scratch is used:

Er, _ccno in this particular case.

> (define_insn "*bmi_blsi__cc"
>   [(set (reg FLAGS_REG)
>(compare
>  (and:SWI48
>(neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
>(match_dup 1))
>  (const_int 0)))
>(clobber (match_scratch:SWI48 0 "=r"))]
>"TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
>"blsi\t{%1, %0|%0, %1}"
>   [(set_attr "type" "bitmanip")
>(set_attr "btver2_decode" "double")
>(set_attr "mode" "")])
>
> The output is unused in the testcases, so there may be no difference
> in the generated code, but it looks to me that additional pattern
> gives the compiler more freedom. Also note, that all other CC setting
> insns come in three variants.

If I'd have to choose between _cmp and _ccno variants, I'd prefer
__ccno one (with the scratch), since output and flags are seldom used
together (and when they do, a new pack of optimization problems
arises, e.g. recently mentioned global RTX CSE issue).

Uros.


[PATCH] i386, v2: Optimize blsi followed by comparison [PR98567]

2021-01-07 Thread Jakub Jelinek via Gcc-patches
On Thu, Jan 07, 2021 at 10:14:33AM +0100, Uros Bizjak wrote:
> I wonder if we should also add _cc variant where scratch is used:

So like this then if it passes bootstrap/regtest?

I think both variants are useful, e.g. one could compare the result but
store it in one of the branches etc.

2021-01-07  Jakub Jelinek  

PR target/98567
* config/i386/i386.md (*bmi_blsi__cmp, *bmi_blsi__ccno):
New define_insn patterns.

* gcc.target/i386/pr98567-1.c: New test.
* gcc.target/i386/pr98567-2.c: New test.

--- gcc/config/i386/i386.md.jj  2021-01-07 09:57:03.711083006 +0100
+++ gcc/config/i386/i386.md 2021-01-07 10:41:12.763159199 +0100
@@ -14568,6 +14568,35 @@ (define_insn "*bmi_blsi_"
(set_attr "btver2_decode" "double")
(set_attr "mode" "")])
 
+(define_insn "*bmi_blsi__cmp"
+  [(set (reg FLAGS_REG)
+   (compare
+ (and:SWI48
+   (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
+   (match_dup 1))
+ (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+   (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
+   "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
+   "blsi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "")])
+
+(define_insn "*bmi_blsi__ccno"
+  [(set (reg FLAGS_REG)
+   (compare
+ (and:SWI48
+   (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
+   (match_dup 1))
+ (const_int 0)))
+   (clobber (match_scratch:SWI48 0 "=r"))]
+   "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
+   "blsi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "")])
+
 (define_insn "*bmi_blsmsk_"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 (xor:SWI48
--- gcc/testsuite/gcc.target/i386/pr98567-1.c.jj2021-01-07 
10:35:01.123354599 +0100
+++ gcc/testsuite/gcc.target/i386/pr98567-1.c   2021-01-07 10:35:01.123354599 
+0100
@@ -0,0 +1,31 @@
+/* PR target/98567 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -fno-stack-protector" } */
+/* { dg-final { scan-assembler-times "\tblsi" 4 } } */
+/* { dg-final { scan-assembler-times "\tsetne\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tsete\t" 2 } } */
+/* { dg-final { scan-assembler-not "\ttest\[ld]" } } */
+
+int
+foo (unsigned long x)
+{
+  return (-x & x) == 0;
+}
+
+int
+bar (unsigned int x)
+{
+  return (-x & x) == 0;
+}
+
+int
+baz (unsigned long x)
+{
+  return (x & -x) != 0;
+}
+
+int
+qux (unsigned int x)
+{
+  return 0 != (x & -x);
+}
--- gcc/testsuite/gcc.target/i386/pr98567-2.c.jj2021-01-07 
10:35:01.124354588 +0100
+++ gcc/testsuite/gcc.target/i386/pr98567-2.c   2021-01-07 10:35:01.124354588 
+0100
@@ -0,0 +1,31 @@
+/* PR target/98567 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -fno-stack-protector" } */
+/* { dg-final { scan-assembler-times "\tblsi" 4 } } */
+/* { dg-final { scan-assembler-times "\tsetle\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tsetg\t" 2 } } */
+/* { dg-final { scan-assembler-not "\ttest\[ld]" } } */
+
+int
+foo (unsigned long x)
+{
+  return 0 >= (int) (-x & x);
+}
+
+int
+bar (unsigned int x)
+{
+  return (int) (-x & x) <= 0;
+}
+
+int
+baz (unsigned long x)
+{
+  return (int) (x & -x) > 0;
+}
+
+int
+qux (unsigned int x)
+{
+  return 0 < (int) (x & -x);
+}


Jakub



[PATCH v2 0/2] RISC-V: Introduce new architecture extension test macros

2021-01-07 Thread Kito Cheng
This patch set introduce new set of architecture extension test macros
which is accept on riscv-c-api-doc[1] recently.

The motivation of this scheme is have an unify naming scheme for
extension macro and add the capability to checking version.

V2 Changes:
- Fix MacOS build issue.
- Create new header file: riscv-subset.h

[1] 
https://github.com/riscv/riscv-c-api-doc/blob/master/riscv-c-api.md#architecture-extension-test-macro




[PATCH v2 1/2] RISC-V: Move class riscv_subset_list and riscv_subset_t to riscv-protos.h

2021-01-07 Thread Kito Cheng
Pre-work of new style of architecture extension test macros, we need the
list used in `config/riscv/riscv-c.c`, so those struct/class declaration
must move to header file rather than local C file.

gcc/ChangeLog

* common/config/riscv/riscv-common.c (RISCV_DONT_CARE_VERSION):
Move to riscv-subset.h.
(struct riscv_subset_t): Ditto.
(class riscv_subset_list): Ditto.
* config/riscv/riscv-subset.h (RISCV_DONT_CARE_VERSION): Move
from riscv-common.c.
(struct riscv_subset_t): Ditto.
(class riscv_subset_list): Ditto.
* config/riscv/t-riscv ($(common_out_file)): Add file
dependency.
---
 gcc/common/config/riscv/riscv-common.c | 67 +--
 gcc/config/riscv/riscv-subset.h| 90 ++
 gcc/config/riscv/t-riscv   |  4 +-
 3 files changed, 94 insertions(+), 67 deletions(-)
 create mode 100644 gcc/config/riscv/riscv-subset.h

diff --git a/gcc/common/config/riscv/riscv-common.c 
b/gcc/common/config/riscv/riscv-common.c
index 7b75114421d..934c716a2e8 100644
--- a/gcc/common/config/riscv/riscv-common.c
+++ b/gcc/common/config/riscv/riscv-common.c
@@ -30,22 +30,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "flags.h"
 #include "diagnostic-core.h"
 #include "config/riscv/riscv-protos.h"
-
-#define RISCV_DONT_CARE_VERSION -1
-
-/* Subset info.  */
-struct riscv_subset_t
-{
-  riscv_subset_t ();
-
-  std::string name;
-  int major_version;
-  int minor_version;
-  struct riscv_subset_t *next;
-
-  bool explicit_version_p;
-  bool implied_p;
-};
+#include "config/riscv/riscv-subset.h"
 
 /* Type for implied ISA info.  */
 struct riscv_implied_info_t
@@ -123,56 +108,6 @@ static const riscv_cpu_info riscv_cpu_tables[] =
 {NULL, NULL, NULL}
 };
 
-/* Subset list.  */
-class riscv_subset_list
-{
-private:
-  /* Original arch string.  */
-  const char *m_arch;
-
-  /* Location of arch string, used for report error.  */
-  location_t m_loc;
-
-  /* Head of subset info list.  */
-  riscv_subset_t *m_head;
-
-  /* Tail of subset info list.  */
-  riscv_subset_t *m_tail;
-
-  /* X-len of m_arch. */
-  unsigned m_xlen;
-
-  riscv_subset_list (const char *, location_t);
-
-  const char *parsing_subset_version (const char *, const char *, unsigned *,
- unsigned *, bool, bool *);
-
-  const char *parse_std_ext (const char *);
-
-  const char *parse_multiletter_ext (const char *, const char *,
-const char *);
-
-  void handle_implied_ext (riscv_subset_t *);
-
-public:
-  ~riscv_subset_list ();
-
-  void add (const char *, int, int, bool, bool);
-
-  void add (const char *, bool);
-
-  riscv_subset_t *lookup (const char *,
- int major_version = RISCV_DONT_CARE_VERSION,
- int minor_version = RISCV_DONT_CARE_VERSION) const;
-
-  std::string to_string (bool) const;
-
-  unsigned xlen() const {return m_xlen;};
-
-  static riscv_subset_list *parse (const char *, location_t);
-
-};
-
 static const char *riscv_supported_std_ext (void);
 
 static riscv_subset_list *current_subset_list = NULL;
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
new file mode 100644
index 000..ae7401ac0c4
--- /dev/null
+++ b/gcc/config/riscv/riscv-subset.h
@@ -0,0 +1,90 @@
+/* Definition of data structure of RISC-V subset for GNU compiler.
+   Copyright (C) 2011-2021 Free Software Foundation, Inc.
+   Contributed by Andrew Waterman (and...@sifive.com).
+   Based on MIPS target for GNU compiler.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef GCC_RISCV_SUBSET_H
+#define GCC_RISCV_SUBSET_H
+
+#define RISCV_DONT_CARE_VERSION -1
+
+/* Subset info.  */
+struct riscv_subset_t
+{
+  riscv_subset_t ();
+
+  std::string name;
+  int major_version;
+  int minor_version;
+  struct riscv_subset_t *next;
+
+  bool explicit_version_p;
+  bool implied_p;
+};
+
+/* Subset list.  */
+class riscv_subset_list
+{
+private:
+  /* Original arch string.  */
+  const char *m_arch;
+
+  /* Location of arch string, used for report error.  */
+  location_t m_loc;
+
+  /* Head of subset info list.  */
+  riscv_subset_t *m_head;
+
+  /* Tail of subset info list.  */
+  riscv_subset_t *m_tail;
+
+  /* X-len of m_arch. */
+  unsigned m_xlen;
+
+  riscv_subset_list (const char *, location_t);
+
+  const c

[PATCH v2 2/2] RISC-V: Implement new style of architecture extension test macros.

2021-01-07 Thread Kito Cheng
- This patch introduce new set of architecture extension test macros
  which is accept on riscv-c-api-doc recently.
  - 
https://github.com/riscv/riscv-c-api-doc/blob/master/riscv-c-api.md#architecture-extension-test-macro

- We will also mark deprecated for legacy architecture extension test macros
  in GCC 11, but still support that for 1 or 2 release cycles.

gcc/ChangeLog:

* common/config/riscv/riscv-common.c (riscv_current_subset_list): New.
* config/riscv/riscv-c.c (riscv-subset.h): New.
(INCLUDE_STRING): Define.
(riscv_cpu_cpp_builtins): Add new style architecture extension
test macros.
* config/riscv/riscv-subset.h (riscv_subset_list::begin): New.
(riscv_subset_list::end): New.
(riscv_current_subset_list): New.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/predef-10.c: New.
* gcc.target/riscv/predef-11.c: New.
* gcc.target/riscv/predef-12.c: New.
* gcc.target/riscv/predef-13.c: New.
---
 gcc/common/config/riscv/riscv-common.c |  5 +++
 gcc/config/riscv/riscv-c.c | 32 
 gcc/config/riscv/riscv-subset.h|  5 +++
 gcc/testsuite/gcc.target/riscv/predef-10.c | 43 ++
 gcc/testsuite/gcc.target/riscv/predef-11.c | 43 ++
 gcc/testsuite/gcc.target/riscv/predef-12.c | 43 ++
 gcc/testsuite/gcc.target/riscv/predef-13.c | 43 ++
 7 files changed, 214 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-10.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-11.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-12.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-13.c

diff --git a/gcc/common/config/riscv/riscv-common.c 
b/gcc/common/config/riscv/riscv-common.c
index 934c716a2e8..b3f5c07c819 100644
--- a/gcc/common/config/riscv/riscv-common.c
+++ b/gcc/common/config/riscv/riscv-common.c
@@ -112,6 +112,11 @@ static const char *riscv_supported_std_ext (void);
 
 static riscv_subset_list *current_subset_list = NULL;
 
+const riscv_subset_list *riscv_current_subset_list ()
+{
+  return current_subset_list;
+}
+
 riscv_subset_t::riscv_subset_t ()
   : name (), major_version (0), minor_version (0), next (NULL),
 explicit_version_p (false), implied_p (false)
diff --git a/gcc/config/riscv/riscv-c.c b/gcc/config/riscv/riscv-c.c
index 8c52f74b6b1..efd4a61ea29 100644
--- a/gcc/config/riscv/riscv-c.c
+++ b/gcc/config/riscv/riscv-c.c
@@ -20,12 +20,14 @@ along with GCC; see the file COPYING3.  If not see
 
 #define IN_TARGET_CODE 1
 
+#define INCLUDE_STRING
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
 #include "c-family/c-common.h"
 #include "cpplib.h"
+#include "riscv-subset.h"
 
 #define builtin_define(TXT) cpp_define (pfile, TXT)
 
@@ -101,4 +103,34 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   break;
 
 }
+
+  /* Define architecture extension test macros.  */
+  builtin_define_with_int_value ("__riscv_arch_test", 1);
+
+  const riscv_subset_list *subset_list = riscv_current_subset_list ();
+  size_t max_ext_len = 0;
+
+  /* Figure out the max length of extension name for reserving buffer.   */
+  for (const riscv_subset_t *subset = subset_list->begin ();
+   subset != subset_list->end ();
+   subset = subset->next)
+max_ext_len = MAX (max_ext_len, subset->name.length ());
+
+  char *buf = (char *)alloca (max_ext_len + 10 /* For __riscv_ and '\0'.  */);
+
+  for (const riscv_subset_t *subset = subset_list->begin ();
+   subset != subset_list->end ();
+   subset = subset->next)
+{
+  int version_value = (subset->major_version * 100)
+  + (subset->minor_version * 1000);
+  /* Special rule for zicsr and zifencei, it's used for ISA spec 2.2 or
+earlier.  */
+  if ((subset->name == "zicsr" || subset->name == "zifencei")
+ && version_value == 0)
+   version_value = 200;
+
+  sprintf (buf, "__riscv_%s", subset->name.c_str ());
+  builtin_define_with_int_value (buf, version_value);
+}
 }
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index ae7401ac0c4..793655a01f2 100644
--- a/gcc/config/riscv/riscv-subset.h
+++ b/gcc/config/riscv/riscv-subset.h
@@ -85,6 +85,11 @@ public:
   unsigned xlen () const {return m_xlen;};
 
   static riscv_subset_list *parse (const char *, location_t);
+
+  const riscv_subset_t *begin () const {return m_head;};
+  const riscv_subset_t *end () const {return NULL;};
 };
 
+extern const riscv_subset_list *riscv_current_subset_list (void);
+
 #endif /* ! GCC_RISCV_SUBSET_H */
diff --git a/gcc/testsuite/gcc.target/riscv/predef-10.c 
b/gcc/testsuite/gcc.target/riscv/predef-10.c
new file mode 100644
index 000..7c447bfb08d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-10.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32i2

Re: [PATCH] i386, v2: Optimize blsi followed by comparison [PR98567]

2021-01-07 Thread Uros Bizjak via Gcc-patches
On Thu, Jan 7, 2021 at 10:46 AM Jakub Jelinek  wrote:
>
> On Thu, Jan 07, 2021 at 10:14:33AM +0100, Uros Bizjak wrote:
> > I wonder if we should also add _cc variant where scratch is used:
>
> So like this then if it passes bootstrap/regtest?
>
> I think both variants are useful, e.g. one could compare the result but
> store it in one of the branches etc.
>
> 2021-01-07  Jakub Jelinek  
>
> PR target/98567
> * config/i386/i386.md (*bmi_blsi__cmp, *bmi_blsi__ccno):
> New define_insn patterns.
>
> * gcc.target/i386/pr98567-1.c: New test.
> * gcc.target/i386/pr98567-2.c: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/i386.md.jj  2021-01-07 09:57:03.711083006 +0100
> +++ gcc/config/i386/i386.md 2021-01-07 10:41:12.763159199 +0100
> @@ -14568,6 +14568,35 @@ (define_insn "*bmi_blsi_"
> (set_attr "btver2_decode" "double")
> (set_attr "mode" "")])
>
> +(define_insn "*bmi_blsi__cmp"
> +  [(set (reg FLAGS_REG)
> +   (compare
> + (and:SWI48
> +   (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
> +   (match_dup 1))
> + (const_int 0)))
> +   (set (match_operand:SWI48 0 "register_operand" "=r")
> +   (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
> +   "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
> +   "blsi\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "btver2_decode" "double")
> +   (set_attr "mode" "")])
> +
> +(define_insn "*bmi_blsi__ccno"
> +  [(set (reg FLAGS_REG)
> +   (compare
> + (and:SWI48
> +   (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
> +   (match_dup 1))
> + (const_int 0)))
> +   (clobber (match_scratch:SWI48 0 "=r"))]
> +   "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
> +   "blsi\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "bitmanip")
> +   (set_attr "btver2_decode" "double")
> +   (set_attr "mode" "")])
> +
>  (define_insn "*bmi_blsmsk_"
>[(set (match_operand:SWI48 0 "register_operand" "=r")
>  (xor:SWI48
> --- gcc/testsuite/gcc.target/i386/pr98567-1.c.jj2021-01-07 
> 10:35:01.123354599 +0100
> +++ gcc/testsuite/gcc.target/i386/pr98567-1.c   2021-01-07 10:35:01.123354599 
> +0100
> @@ -0,0 +1,31 @@
> +/* PR target/98567 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbmi -fno-stack-protector" } */
> +/* { dg-final { scan-assembler-times "\tblsi" 4 } } */
> +/* { dg-final { scan-assembler-times "\tsetne\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tsete\t" 2 } } */
> +/* { dg-final { scan-assembler-not "\ttest\[ld]" } } */
> +
> +int
> +foo (unsigned long x)
> +{
> +  return (-x & x) == 0;
> +}
> +
> +int
> +bar (unsigned int x)
> +{
> +  return (-x & x) == 0;
> +}
> +
> +int
> +baz (unsigned long x)
> +{
> +  return (x & -x) != 0;
> +}
> +
> +int
> +qux (unsigned int x)
> +{
> +  return 0 != (x & -x);
> +}
> --- gcc/testsuite/gcc.target/i386/pr98567-2.c.jj2021-01-07 
> 10:35:01.124354588 +0100
> +++ gcc/testsuite/gcc.target/i386/pr98567-2.c   2021-01-07 10:35:01.124354588 
> +0100
> @@ -0,0 +1,31 @@
> +/* PR target/98567 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbmi -fno-stack-protector" } */
> +/* { dg-final { scan-assembler-times "\tblsi" 4 } } */
> +/* { dg-final { scan-assembler-times "\tsetle\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tsetg\t" 2 } } */
> +/* { dg-final { scan-assembler-not "\ttest\[ld]" } } */
> +
> +int
> +foo (unsigned long x)
> +{
> +  return 0 >= (int) (-x & x);
> +}
> +
> +int
> +bar (unsigned int x)
> +{
> +  return (int) (-x & x) <= 0;
> +}
> +
> +int
> +baz (unsigned long x)
> +{
> +  return (int) (x & -x) > 0;
> +}
> +
> +int
> +qux (unsigned int x)
> +{
> +  return 0 < (int) (x & -x);
> +}
>
>
> Jakub
>


Contents of PO file 'cpplib-10.1-b20200209.es.po'

2021-01-07 Thread Translation Project Robot


cpplib-10.1-b20200209.es.po.gz
Description: Binary data
The Translation Project robot, in the
name of your translation coordinator.



New Spanish PO file for 'cpplib' (version 10.1-b20200209)

2021-01-07 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'cpplib' has been submitted
by the Spanish team of translators.  The file is available at:

https://translationproject.org/latest/cpplib/es.po

(This file, 'cpplib-10.1-b20200209.es.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

https://translationproject.org/latest/cpplib/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

https://translationproject.org/domain/cpplib.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




[backport gcc10] arc: Refurbish adc/sbc patterns

2021-01-07 Thread Claudiu Zissulescu via Gcc-patches
Back port for gcc10
The adc/sbc patterns were unecessary spliting, remove that and
associated functions.

gcc/
2020-12-11  Claudiu Zissulescu  

* config/arc/arc-protos.h (arc_scheduling_not_expected): Remove
it.
(arc_sets_cc_p): Likewise.
(arc_need_delay): Likewise.
* config/arc/arc.c (arc_sets_cc_p): Likewise.
(arc_need_delay): Likewise.
(arc_scheduling_not_expected): Likewise.
* config/arc/arc.md: Convert adc/sbc patterns to simple
instruction definitions.

Signed-off-by: Claudiu Zissulescu 
(cherry picked from commit dfbe642c97f7f430926cb6b33cd5c20b42c85573)
---
 gcc/config/arc/arc-protos.h |  3 --
 gcc/config/arc/arc.c| 53 -
 gcc/config/arc/arc.md   | 95 +++--
 3 files changed, 29 insertions(+), 122 deletions(-)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index c72d78e3b9e..de4cf47c818 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -90,10 +90,7 @@ extern void split_subsi (rtx *);
 extern void arc_split_move (rtx *);
 extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
 extern rtx arc_regno_use_in (unsigned int, rtx);
-extern bool arc_scheduling_not_expected (void);
-extern bool arc_sets_cc_p (rtx_insn *insn);
 extern int arc_label_align (rtx_insn *label);
-extern bool arc_need_delay (rtx_insn *insn);
 extern bool arc_text_label (rtx_insn *insn);
 
 extern bool arc_short_comparison_p (rtx, int);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 6b96c5e4bf5..7902940c16c 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -10291,59 +10291,6 @@ arc_attr_type (rtx_insn *insn)
   return get_attr_type (insn);
 }
 
-/* Return true if insn sets the condition codes.  */
-
-bool
-arc_sets_cc_p (rtx_insn *insn)
-{
-  if (NONJUMP_INSN_P (insn))
-if (rtx_sequence *seq = dyn_cast  (PATTERN (insn)))
-  insn = seq->insn (seq->len () - 1);
-  return arc_attr_type (insn) == TYPE_COMPARE;
-}
-
-/* Return true if INSN is an instruction with a delay slot we may want
-   to fill.  */
-
-bool
-arc_need_delay (rtx_insn *insn)
-{
-  rtx_insn *next;
-
-  if (!flag_delayed_branch)
-return false;
-  /* The return at the end of a function needs a delay slot.  */
-  if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
-  && (!(next = next_active_insn (insn))
- || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE)
- && arc_attr_type (next) == TYPE_RETURN))
-  && (!TARGET_PAD_RETURN
- || (prev_active_insn (insn)
- && prev_active_insn (prev_active_insn (insn))
- && prev_active_insn (prev_active_insn (prev_active_insn 
(insn))
-return true;
-  if (NONJUMP_INSN_P (insn)
-  ? (GET_CODE (PATTERN (insn)) == USE
-|| GET_CODE (PATTERN (insn)) == CLOBBER
-|| GET_CODE (PATTERN (insn)) == SEQUENCE)
-  : JUMP_P (insn)
-  ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
-|| GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
-  : !CALL_P (insn))
-return false;
-  return num_delay_slots (insn) != 0;
-}
-
-/* Return true if the scheduling pass(es) has/have already run,
-   i.e. where possible, we should try to mitigate high latencies
-   by different instruction selection.  */
-
-bool
-arc_scheduling_not_expected (void)
-{
-  return cfun->machine->arc_reorg_started;
-}
-
 /* Code has a minimum p2 alignment of 1, which we must restore after
an ADDR_DIFF_VEC.  */
 
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index a7f4056c7af..398034d361e 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -2824,43 +2824,25 @@ (define_insn "*add_f_2"
(set_attr "type" "compare")
(set_attr "length" "4,4,8")])
 
-; w/c/c comes first (rather than w/0/C_0) to prevent the middle-end
-; needlessly prioritizing the matching constraint.
-; Rcw/0/C_0 comes before w/c/L so that the lower latency conditional
-; execution is used where possible.
-(define_insn_and_split "adc"
-  [(set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w,Rcw,w")
-   (plus:SI (plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
- (match_operand:SI 1 "nonmemory_operand"
-"%c,0,c,0,cCal"))
-(match_operand:SI 2 "nonmemory_operand" "c,C_0,L,I,cCal")))]
+(define_insn "adc"
+  [(set (match_operand:SI 0 "register_operand""=r,  r,r,r,  r,r")
+   (plus:SI
+(plus:SI
+ (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+ (match_operand:SI 1 "nonmemory_operand" "%r,  0,r,0,Cal,r"))
+(match_operand:SI 2 "nonmemory_operand"   "r,C_0,L,I,  r,Cal")))]
   "register_operand (operands[1], SImode)
|| register_operand (operands[2], SImode)"
   "@
-   adc %0,%1,%2
-   add.cs %0,%1,1
-   adc %0,%1,%2
-   adc %0,%1,%2
-   adc %0,%1,%2"
-  ; if we have a bad schedul

Re: [committed] patch to fix PR97978

2021-01-07 Thread Richard Sandiford via Gcc-patches
Vladimir Makarov via Gcc-patches  writes:
> The following fixes
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97978
>
> The patch was successfully bootstrapped on x86-64.

Can you explain this a bit more?  The assert fires if the register
allocation is inconsistent with the conflict information.  What causes
the inconsistency in this case, and why is it OK for the inconsistency
to persist until the next lra_assign pass?  Does something fix up the
inconsistency later, or is the inconsistent information never used?

Is there no chance of lra_split_hard_reg_for updating the information
itself, to keep everything self-consistent?  Bypassing the check for
every pseudo register seems like quite a big hammer.

I'm not saying this is the wrong fix.  I just think it would help
to have more commentary explaining the situation.

Thanks,
Richard
>
>
> commit fbf9b2b634e376516cd21d7aa92ef3fd5778aa10 (HEAD -> master)
> Author: Vladimir N. Makarov 
> Date:   Wed Jan 6 14:48:53 2021 -0500
>
> [PR97978] LRA: Permit temporary allocation incorrectness after hard reg 
> split.
>
> LRA can crash when a hard register was split and the same hard register
> was assigned on the previous assignment sub-pass.  The following
> patch fixes this problem.
> 
> gcc/ChangeLog:
> 
> PR rtl-optimization/97978
> * lra-int.h (lra_hard_reg_split_p): New external.
> * lra.c (lra_hard_reg_split_p): New global.
> (lra): Set up lra_hard_reg_split_p after splitting a hard reg.
> * lra-assigns.c (lra_assign): Don't check allocation correctness
> after hard reg splitting.
> 
> gcc/testsuite/ChangeLog:
> 
> PR rtl-optimization/97978
> * gcc.target/i386/pr97978.c: New.
>
> diff --git a/gcc/lra-assigns.c b/gcc/lra-assigns.c
> index 9335e4c876e..c6a941fe663 100644
> --- a/gcc/lra-assigns.c
> +++ b/gcc/lra-assigns.c
> @@ -1636,10 +1636,11 @@ lra_assign (bool &fails_p)
>bitmap_initialize (&all_spilled_pseudos, ®_obstack);
>create_live_range_start_chains ();
>setup_live_pseudos_and_spill_after_risky_transforms (&all_spilled_pseudos);
> -  if (! lra_asm_error_p && flag_checking)
> -/* Check correctness of allocation for call-crossed pseudos but
> -   only when there are no asm errors as in the case of errors the
> -   asm is removed and it can result in incorrect allocation.  */
> +  if (! lra_hard_reg_split_p && ! lra_asm_error_p && flag_checking)
> +/* Check correctness of allocation but only when there are no hard reg
> +   splits and asm errors as in the case of errors explicit insns 
> involving
> +   hard regs are added or the asm is removed and this can result in
> +   incorrect allocation.  */
>  for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
>if (lra_reg_info[i].nrefs != 0
> && reg_renumber[i] >= 0
> diff --git a/gcc/lra-int.h b/gcc/lra-int.h
> index 75ba6560bcc..1b8f7b6ae61 100644
> --- a/gcc/lra-int.h
> +++ b/gcc/lra-int.h
> @@ -273,6 +273,7 @@ typedef class lra_insn_recog_data *lra_insn_recog_data_t;
>  
>  extern FILE *lra_dump_file;
>  
> +extern bool lra_hard_reg_split_p;
>  extern bool lra_asm_error_p;
>  extern bool lra_reg_spill_p;
>  
> diff --git a/gcc/lra.c b/gcc/lra.c
> index 380a21ac2ac..aa49de6f154 100644
> --- a/gcc/lra.c
> +++ b/gcc/lra.c
> @@ -2211,6 +2211,9 @@ bitmap_head lra_subreg_reload_pseudos;
>  /* File used for output of LRA debug information.  */
>  FILE *lra_dump_file;
>  
> +/* True if we split hard reg after the last constraint sub-pass.  */
> +bool lra_hard_reg_split_p;
> +
>  /* True if we found an asm error.  */
>  bool lra_asm_error_p;
>  
> @@ -2359,6 +2362,7 @@ lra (FILE *f)
> if (live_p)
>   lra_clear_live_ranges ();
> bool fails_p;
> +   lra_hard_reg_split_p = false;
> do
>   {
> /* We need live ranges for lra_assign -- so build them.
> @@ -2403,6 +2407,7 @@ lra (FILE *f)
> live_p = false;
> if (! lra_split_hard_reg_for ())
>   break;
> +   lra_hard_reg_split_p = true;
>   }
>   }
> while (fails_p);
> diff --git a/gcc/testsuite/gcc.target/i386/pr97978.c 
> b/gcc/testsuite/gcc.target/i386/pr97978.c
> new file mode 100644
> index 000..263bca8708d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr97978.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Os -fno-PIC" } */
> +int sg;
> +long int kk;
> +
> +void
> +bp (int jz, int tj, long int li)
> +{
> +  if (jz == 0 || tj == 0)
> +__builtin_unreachable ();
> +
> +  kk = li;
> +}
> +
> +void
> +qp (void)
> +{
> +  ++kk;
> +
> +  for (;;)
> +bp (1l / sg, 0, ~0u);
> +}


Re: [PATCH] gimple-isel: Fall back to using vcond_mask [PR98560]

2021-01-07 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> On Wed, 6 Jan 2021, Richard Sandiford wrote:
>
>> PR98560 is about a case in which the vectoriser initially generates:
>> 
>>   mask_1 = a < 0;
>>   mask_2 = mask_1 & ...;
>>   res = VEC_COND_EXPR ;
>> 
>> The vectoriser thus expects res to be calculated using vcond_mask.
>> However, we later manage to fold mask_2 to mask_1, leaving:
>> 
>>   mask_1 = a < 0;
>>   res = VEC_COND_EXPR ;
>> 
>> gimple-isel then required a combined vcond to exist.
>> 
>> On most targets, it's not too onerous to provide all possible
>> (compare x select) combinations.  For each data mode, you just
>> need to provide unsigned comparisons, signed comparisons, and
>> floating-point comparisons, with the data mode and type of
>> comparison uniquely determining the mode of the compared values.
>> But for targets like SVE that support “unpacked” vectors,
>> it's not that simple: the level of unpacking adds another
>> degree of freedom.
>> 
>> Rather than insist that the combined versions exist, I think
>> we should be prepared to fall back to using separate comparisons
>> and vcond_masks.  I think that makes more sense on targets like
>> AArch64 and AArch32 in which compares and selects are fundementally
>> separate operations anyway.
>
> Indeed the mask variants (thus being able to expand the comparison)
> are more fundamental.  I guess you're running into this path because
> we did not consider using vcond_mask because of
>
>   if (used_vec_cond_exprs >= 2
>   && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
>   != CODE_FOR_nothing)
>   && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
> {
>   /* Keep the SSA name and use vcond_mask.  */
>   tcode = TREE_CODE (op0);
> }
>
> not triggering?  Which also means your patch fails to check/assert
> that we can expand_vec_cmp_expr_p the separate compare?
>
>> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
>
> It does feel like the function could need some refactoring ...
>
> But OK - preferably with the assertion that we can actually
> expand the compare (I suggest to do the expand_vec_cmp_expr_p
> above unconditionally and have a 'global' cannot_expand_mask
> flag defaulted to false and checked in the new path).

OK, how does this look?  It's not quite what you said because
I wanted to avoid the double negation in !cannot.

Thanks,
Richard


gcc/
PR tree-optimization/98560
* gimple-isel.cc (gimple_expand_vec_cond_expr): If we fail to use
IFN_VCOND{,U,EQ}, fall back on IFN_VCOND_MASK.

gcc/testsuite/
PR tree-optimization/98560
* gcc.dg/vect/pr98560-1.c: New test.
---
 gcc/gimple-isel.cc| 26 +++---
 gcc/testsuite/gcc.dg/vect/pr98560-1.c | 17 +
 2 files changed, 36 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr98560-1.c

diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
index d40338ce4a2..0f3d6bba229 100644
--- a/gcc/gimple-isel.cc
+++ b/gcc/gimple-isel.cc
@@ -154,6 +154,7 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
   return gimple_build_assign (lhs, tem3);
 }
 
+  bool can_compute_op0 = true;
   gcc_assert (!COMPARISON_CLASS_P (op0));
   if (TREE_CODE (op0) == SSA_NAME)
 {
@@ -184,13 +185,16 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
 
  tree op0_type = TREE_TYPE (op0);
  tree op0a_type = TREE_TYPE (op0a);
+ if (TREE_CODE_CLASS (tcode) == tcc_comparison)
+   can_compute_op0 = expand_vec_cmp_expr_p (op0a_type, op0_type,
+tcode);
 
  /* Try to fold x CMP y ? -1 : 0 to x CMP y.  */
 
- if (integer_minus_onep (op1)
+ if (can_compute_op0
+ && integer_minus_onep (op1)
  && integer_zerop (op2)
- && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0))
- && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
+ && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)))
{
  tree conv_op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), op0);
  gassign *new_stmt = gimple_build_assign (lhs, conv_op);
@@ -198,10 +202,10 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
  return new_stmt;
}
 
- if (used_vec_cond_exprs >= 2
+ if (can_compute_op0
+ && used_vec_cond_exprs >= 2
  && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
- != CODE_FOR_nothing)
- && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
+ != CODE_FOR_nothing))
{
  /* Keep the SSA name and use vcond_mask.  */
  tcode = TREE_CODE (op0);
@@ -254,7 +258,15 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
}
 }
 
-  gcc_assert (icode != CODE_F

Re: [PATCH 1/3] arm: Add movmisalign patterns for MVE (PR target/97875)

2021-01-07 Thread Christophe Lyon via Gcc-patches
ping^2?

On Wed, 30 Dec 2020 at 11:33, Christophe Lyon
 wrote:
>
> ping?
>
> On Thu, 17 Dec 2020 at 18:48, Christophe Lyon
>  wrote:
> >
> > This patch adds new movmisalign_mve_load and store patterns for
> > MVE to help vectorization. They are very similar to their Neon
> > counterparts, but use different iterators and instructions.
> >
> > Indeed MVE supports less vectors modes than Neon, so we use
> > the MVE_VLD_ST iterator where Neon uses VQX.
> >
> > Since the supported modes are different from the ones valid for
> > arithmetic operators, we introduce two new sets of macros:
> >
> > ARM_HAVE_NEON__LDST
> >   true if Neon has vector load/store instructions for 
> >
> > ARM_HAVE__LDST
> >   true if any vector extension has vector load/store instructions for 
> >
> > We move the movmisalign expander from neon.md to vec-commond.md, and
> > replace the TARGET_NEON enabler with ARM_HAVE__LDST.
> >
> > The patch also updates the mve-vneg.c test to scan for the better code
> > generation when loading and storing the vectors involved: it checks
> > that no 'orr' instruction is generated to cope with misalignment at
> > runtime.
> > This test was chosen among the other mve tests, but any other should
> > be OK. Using a plain vector copy loop (dest[i] = a[i]) is not a good
> > test because the compiler chooses to use memcpy.
> >
> > For instance we now generate:
> > test_vneg_s32x4:
> > vldrw.32   q3, [r1]
> > vneg.s32  q3, q3
> > vstrw.32   q3, [r0]
> > bx  lr
> >
> > instead of:
> > test_vneg_s32x4:
> > orr r3, r1, r0
> > lslsr3, r3, #28
> > bne .L15
> > vldrw.32q3, [r1]
> > vneg.s32  q3, q3
> > vstrw.32q3, [r0]
> > bx  lr
> > .L15:
> > push{r4, r5}
> > ldrdr2, r3, [r1, #8]
> > ldrdr5, r4, [r1]
> > rsbsr2, r2, #0
> > rsbsr5, r5, #0
> > rsbsr4, r4, #0
> > rsbsr3, r3, #0
> > strdr5, r4, [r0]
> > pop {r4, r5}
> > strdr2, r3, [r0, #8]
> > bx  lr
> >
> > 2020-12-15  Christophe Lyon  
> >
> > PR target/97875
> > gcc/
> > * config/arm/arm.h (ARM_HAVE_NEON_V8QI_LDST): New macro.
> > (ARM_HAVE_NEON_V16QI_LDST, ARM_HAVE_NEON_V4HI_LDST): Likewise.
> > (ARM_HAVE_NEON_V8HI_LDST, ARM_HAVE_NEON_V2SI_LDST): Likewise.
> > (ARM_HAVE_NEON_V4SI_LDST, ARM_HAVE_NEON_V4HF_LDST): Likewise.
> > (ARM_HAVE_NEON_V8HF_LDST, ARM_HAVE_NEON_V4BF_LDST): Likewise.
> > (ARM_HAVE_NEON_V8BF_LDST, ARM_HAVE_NEON_V2SF_LDST): Likewise.
> > (ARM_HAVE_NEON_V4SF_LDST, ARM_HAVE_NEON_DI_LDST): Likewise.
> > (ARM_HAVE_NEON_V2DI_LDST): Likewise.
> > (ARM_HAVE_V8QI_LDST, ARM_HAVE_V16QI_LDST): Likewise.
> > (ARM_HAVE_V4HI_LDST, ARM_HAVE_V8HI_LDST): Likewise.
> > (ARM_HAVE_V2SI_LDST, ARM_HAVE_V4SI_LDST, ARM_HAVE_V4HF_LDST): 
> > Likewise.
> > (ARM_HAVE_V8HF_LDST, ARM_HAVE_V4BF_LDST, ARM_HAVE_V8BF_LDST): 
> > Likewise.
> > (ARM_HAVE_V2SF_LDST, ARM_HAVE_V4SF_LDST, ARM_HAVE_DI_LDST): 
> > Likewise.
> > (ARM_HAVE_V2DI_LDST): Likewise.
> > * config/arm/mve.md (*movmisalign_mve_store): New pattern.
> > (*movmisalign_mve_load): New pattern.
> > * config/arm/neon.md (movmisalign): Move to ...
> > * config/arm/vec-common.md: ... here.
> >
> > PR target/97875
> > gcc/testsuite/
> > * gcc.target/arm/simd/mve-vneg.c: Update test.
> > ---
> >  gcc/config/arm/arm.h | 40 
> > 
> >  gcc/config/arm/mve.md| 25 +
> >  gcc/config/arm/neon.md   | 25 -
> >  gcc/config/arm/vec-common.md | 24 +
> >  gcc/testsuite/gcc.target/arm/simd/mve-vneg.c |  3 +++
> >  5 files changed, 92 insertions(+), 25 deletions(-)
> >
> > diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> > index 4a63d33..d44e0c6 100644
> > --- a/gcc/config/arm/arm.h
> > +++ b/gcc/config/arm/arm.h
> > @@ -1151,6 +1151,46 @@ extern const int arm_arch_cde_coproc_bits[];
> >  #define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || 
> > TARGET_HAVE_MVE_FLOAT)
> >  #define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || 
> > TARGET_HAVE_MVE_FLOAT)
> >
> > +/* The conditions under which vector modes are supported by load/store
> > +   instructions using Neon.  */
> > +
> > +#define ARM_HAVE_NEON_V8QI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V16QI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V4HI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V8HI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V2SI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V4SI_LDST TARGET_NEON
> > +#define ARM_HAVE_NEON_V4HF_LDST TARGET_NEON_FP16INST
> > +#define ARM_HAVE_NEON_V8HF_LDST TARGET_NEON_FP16INST
> > +#define ARM_HAVE_NEON_V4BF_LDST TARGET

Re: [PATCH 2/3] arm: Auto-vectorization for MVE: vshl

2021-01-07 Thread Christophe Lyon via Gcc-patches
ping^2?

On Wed, 30 Dec 2020 at 11:34, Christophe Lyon
 wrote:
>
> ping?
>
> On Thu, 17 Dec 2020 at 18:48, Christophe Lyon
>  wrote:
> >
> > This patch enables MVE vshlq instructions for auto-vectorization.
> >
> > The existing mve_vshlq_n_ is kept, as it takes a single
> > immediate as second operand, and is used by arm_mve.h.
> >
> > We move the vashl3 insn from neon.md to an expander in
> > vec-common.md, and the mve_vshlq_ insn from mve.md to
> > vec-common.md, adding the second alternative fron neon.md.
> >
> > mve_vshlq_ will be used by a later patch enabling
> > vectorization for vshr, as a unified version of
> > ashl3_[signed|unsigned] from neon.md. Keeping the use of unspec
> > VSHLQ enables to generate both 's' and 'u' variants.
> >
> > It is not clear whether the neon_shift_[reg|imm] attribute is still
> > suitable, since this insn is also used for MVE.
> >
> > I kept the mve_vshlq_ naming instead of renaming it to
> > ashl3__ as discussed because the reference in
> > arm_mve_builtins.def automatically inserts the "mve_" prefix and I
> > didn't want to make a special case for this.
> >
> > I haven't yet found why the v16qi and v8hi tests are not vectorized.
> > With dest[i] = a[i] << b[i] and:
> >   {
> > int i;
> > unsigned int i.24_1;
> > unsigned int _2;
> > int16_t * _3;
> > short int _4;
> > int _5;
> > int16_t * _6;
> > short int _7;
> > int _8;
> > int _9;
> > int16_t * _10;
> > short int _11;
> > unsigned int ivtmp_42;
> > unsigned int ivtmp_43;
> >
> >  [local count: 119292720]:
> >
> >  [local count: 954449105]:
> > i.24_1 = (unsigned int) i_23;
> > _2 = i.24_1 * 2;
> > _3 = a_15(D) + _2;
> > _4 = *_3;
> > _5 = (int) _4;
> > _6 = b_16(D) + _2;
> > _7 = *_6;
> > _8 = (int) _7;
> > _9 = _5 << _8;
> > _10 = dest_17(D) + _2;
> > _11 = (short int) _9;
> > *_10 = _11;
> > i_19 = i_23 + 1;
> > ivtmp_42 = ivtmp_43 - 1;
> > if (ivtmp_42 != 0)
> >   goto ; [87.50%]
> > else
> >   goto ; [12.50%]
> >
> >  [local count: 835156386]:
> > goto ; [100.00%]
> >
> >  [local count: 119292720]:
> > return;
> >
> >   }
> > the vectorizer says:
> > mve-vshl.c:37:96: note:   ==> examining statement: _5 = (int) _4;
> > mve-vshl.c:37:96: note:   vect_is_simple_use: operand *_3, type of def: 
> > internal
> > mve-vshl.c:37:96: note:   vect_is_simple_use: vectype vector(8) short int
> > mve-vshl.c:37:96: missed:   conversion not supported by target.
> > mve-vshl.c:37:96: note:   vect_is_simple_use: operand *_3, type of def: 
> > internal
> > mve-vshl.c:37:96: note:   vect_is_simple_use: vectype vector(8) short int
> > mve-vshl.c:37:96: note:   vect_is_simple_use: operand *_3, type of def: 
> > internal
> > mve-vshl.c:37:96: note:   vect_is_simple_use: vectype vector(8) short int
> > mve-vshl.c:37:117: missed:   not vectorized: relevant stmt not supported: 
> > _5 = (int) _4;
> > mve-vshl.c:37:96: missed:  bad operation or unsupported loop bound.
> > mve-vshl.c:37:96: note:  * Analysis failed with vector mode V8HI
> >
> > 2020-12-03  Christophe Lyon  
> >
> > gcc/
> > * config/arm/mve.md (mve_vshlq_): Move to
> > vec-commond.md.
> > * config/arm/neon.md (vashl3): Delete.
> > * config/arm/vec-common.md (mve_vshlq_): New.
> > (vasl3): New expander.
> >
> > gcc/testsuite/
> > * gcc.target/arm/simd/mve-vshl.c: Add tests for vshl.
> > ---
> >  gcc/config/arm/mve.md| 13 +-
> >  gcc/config/arm/neon.md   | 19 -
> >  gcc/config/arm/vec-common.md | 30 ++
> >  gcc/testsuite/gcc.target/arm/simd/mve-vshl.c | 62 
> > 
> >  4 files changed, 93 insertions(+), 31 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vshl.c
> >
> > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> > index 673a83c..8bdb451 100644
> > --- a/gcc/config/arm/mve.md
> > +++ b/gcc/config/arm/mve.md
> > @@ -822,18 +822,7 @@ (define_insn "mve_vcmpneq_"
> >
> >  ;;
> >  ;; [vshlq_s, vshlq_u])
> > -;;
> > -(define_insn "mve_vshlq_"
> > -  [
> > -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> > -   (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
> > -  (match_operand:MVE_2 2 "s_register_operand" "w")]
> > -VSHLQ))
> > -  ]
> > -  "TARGET_HAVE_MVE"
> > -  "vshl.%#\t%q0, %q1, %q2"
> > -  [(set_attr "type" "mve_move")
> > -])
> > +;; See vec-common.md
> >
> >  ;;
> >  ;; [vabdq_s, vabdq_u])
> > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> > index 50220be..ac9bf74 100644
> > --- a/gcc/config/arm/neon.md
> > +++ b/gcc/config/arm/neon.md
> > @@ -845,25 +845,6 @@ (define_insn "*smax3_neon"
> >  ; generic vectorizer code.  It ends up creating a V2DI constructor with
> >  ; SImode elements.
> >
> > -(define_insn "vashl3"
> > -  [(set

Re: [PATCH 3/3] arm: Auto-vectorization for MVE: vshr

2021-01-07 Thread Christophe Lyon via Gcc-patches
ping^2?

On Wed, 30 Dec 2020 at 11:34, Christophe Lyon
 wrote:
>
> ping?
>
> On Thu, 17 Dec 2020 at 18:48, Christophe Lyon
>  wrote:
> >
> > This patch enables MVE vshr instructions for auto-vectorization.  New
> > MVE patterns are introduced that take a vector of constants as second
> > operand, all constants being equal.
> >
> > The existing mve_vshrq_n_ is kept, as it takes a single
> > immediate as second operand, and is used by arm_mve.h.
> >
> > The vashr3 and vlshr3 expanders are moved fron neon.md to
> > vec-common.md, updated to rely on the normal expansion scheme to
> > generate shifts by immediate.
> >
> > 2020-12-03  Christophe Lyon  
> >
> > gcc/
> > * config/arm/mve.md (mve_vshrq_n_s_imm): New entry.
> > (mve_vshrq_n_u_imm): Likewise.
> > * config/arm/neon.md (vashr3, vlshr3): Move to ...
> > * config/arm/vec-common.md: ... here.
> >
> > gcc/testsuite/
> > * gcc.target/arm/simd/mve-vshr.c: Add tests for vshr.
> > ---
> >  gcc/config/arm/mve.md| 34 
> >  gcc/config/arm/neon.md   | 34 
> >  gcc/config/arm/vec-common.md | 38 +-
> >  gcc/testsuite/gcc.target/arm/simd/mve-vshr.c | 59 
> > 
> >  4 files changed, 130 insertions(+), 35 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vshr.c
> >
> > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> > index 8bdb451..eea8b20 100644
> > --- a/gcc/config/arm/mve.md
> > +++ b/gcc/config/arm/mve.md
> > @@ -763,6 +763,7 @@ (define_insn "mve_vcreateq_"
> >  ;;
> >  ;; [vshrq_n_s, vshrq_n_u])
> >  ;;
> > +;; Version that takes an immediate as operand 2.
> >  (define_insn "mve_vshrq_n_"
> >[
> > (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> > @@ -775,6 +776,39 @@ (define_insn "mve_vshrq_n_"
> >[(set_attr "type" "mve_move")
> >  ])
> >
> > +;; Versions that take constant vectors as operand 2 (with all elements
> > +;; equal).
> > +(define_insn "mve_vshrq_n_s_imm"
> > +  [
> > +   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> > +   (ashiftrt:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
> > +   (match_operand:MVE_2 2 
> > "imm_for_neon_rshift_operand" "i")))
> > +  ]
> > +  "TARGET_HAVE_MVE"
> > +  {
> > +return neon_output_shift_immediate ("vshr", 's', &operands[2],
> > +   mode,
> > +   VALID_NEON_QREG_MODE (mode),
> > +   true);
> > +  }
> > +  [(set_attr "type" "mve_move")
> > +])
> > +(define_insn "mve_vshrq_n_u_imm"
> > +  [
> > +   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> > +   (lshiftrt:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
> > +   (match_operand:MVE_2 2 
> > "imm_for_neon_rshift_operand" "i")))
> > +  ]
> > +  "TARGET_HAVE_MVE"
> > +  {
> > +return neon_output_shift_immediate ("vshr", 'u', &operands[2],
> > +   mode,
> > +   VALID_NEON_QREG_MODE (mode),
> > +   true);
> > +  }
> > +  [(set_attr "type" "mve_move")
> > +])
> > +
> >  ;;
> >  ;; [vcvtq_n_from_f_s, vcvtq_n_from_f_u])
> >  ;;
> > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> > index ac9bf74..a0e8d7a 100644
> > --- a/gcc/config/arm/neon.md
> > +++ b/gcc/config/arm/neon.md
> > @@ -899,40 +899,6 @@ (define_insn "ashl3_unsigned"
> >[(set_attr "type" "neon_shift_reg")]
> >  )
> >
> > -(define_expand "vashr3"
> > -  [(set (match_operand:VDQIW 0 "s_register_operand")
> > -   (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
> > -   (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
> > -  "TARGET_NEON"
> > -{
> > -  if (s_register_operand (operands[2], mode))
> > -{
> > -  rtx neg = gen_reg_rtx (mode);
> > -  emit_insn (gen_neon_neg2 (neg, operands[2]));
> > -  emit_insn (gen_ashl3_signed (operands[0], operands[1], neg));
> > -}
> > -  else
> > -emit_insn (gen_vashr3_imm (operands[0], operands[1], 
> > operands[2]));
> > -  DONE;
> > -})
> > -
> > -(define_expand "vlshr3"
> > -  [(set (match_operand:VDQIW 0 "s_register_operand")
> > -   (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
> > -   (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
> > -  "TARGET_NEON"
> > -{
> > -  if (s_register_operand (operands[2], mode))
> > -{
> > -  rtx neg = gen_reg_rtx (mode);
> > -  emit_insn (gen_neon_neg2 (neg, operands[2]));
> > -  emit_insn (gen_ashl3_unsigned (operands[0], operands[1], neg));
> > -}
> > -  else
> > -emit_insn (gen_vlshr3_imm (operands[0], operands[1], 
> > operands[2]));
> > -  DONE;
> > -})
> > -
> >  ;; 64-bit shifts
> >
> >  ;; This pattern loads a 32-bit shift count into a 64-bit N

Re: [PATCH v3] libgcc: Thumb-1 Floating-Point Library for Cortex M0

2021-01-07 Thread Richard Earnshaw via Gcc-patches
On 07/01/2021 00:59, Daniel Engel wrote:
> --snip--
> 
> On Wed, Jan 6, 2021, at 9:05 AM, Richard Earnshaw wrote:
> 
>>
>> Thanks for working on this, Daniel.
>>
>> This is clearly stage1 material, so we've got time for a couple of
>> iterations to sort things out.
> 
> I appreciate your feedback.  I had been hoping that with no regressions
> this might still be eligible for stage2.  Christophe never indicated
> either way. but the fact that he was looking at it seemed positive.
> I thought I would be a couple of weeks faster with this last
> iteration, but holidays got in the way.

GCC doesn't have a stage 2 any more (historical wart).  We were in
(late) stage3 when this was first posted, and because of the significant
impact this might have on not just CM0 but other targets as well, I
don't think it's something we should try to squeeze in at the last
minute.  We're now in stage 4, so that is doubly the case.

Christophe is a very valuable member of our community, but he's not a
port maintainer and thus cannot really rule on what can go into the
tools, or when.

> 
> I actually think your comments below could all be addressable within a
> couple of days.  But, I'm not accounting for the review process.
>  
>> Firstly, the patch is very large, but contains a large number of
>> distinct changes, so it would really benefit from being broken down into
>> a number of distinct patches.  This will make reviewing the individual
>> changes much more straight-forward.  
> 
> I have no context for "large" or "small" with respect to gcc.  This
> patch comprises about 30% of a previously-monolithic library that's
> been shipping since ~2016 (the rest is libm material).  Other than
> (1) the aforementioned change to div0(), (2) a nascent adaptation
> for __truncdfsf2() (not enabled), and (3) the gratuitous addition of
> the bitwise functions, the library remains pretty much as it was
> originally released.

Large, like many other terms is relative.  For assembler file changes,
which this is primarily, the overall size can be much smaller and still
be considered 'large'.

> 
> The driving force in the development of this library was small size,
> which of course was never possible with the softfp routines.  It's not
> half-slow, either, for the limitations of the M0 architecture.   And,
> it's IEEE compliant.  But, that means that most of the functions are
> highly interconnected.  So, some of it can be broken up as you outline
> below, but that last patch is still worth more than half of the total.

Nevertheless, having the floating-point code separated out will make
reviewing more straight forward.  I'll likely need to ask one of our FP
experts to have a specific look at that part and that will be easier if
it is disentangled from the other changes.

> 
> I also have ~70k lines of test vectors that seem mostly redundant, but
> not completely.  I haven't decided what to do here.  For example, I have
> coverage for __aeabi_u/ldivmod, while GCC does not.  If I do anything
> with this code it will be in a separate thread.

Publishing the test code, even if it isn't integrated into the GCC
testsuite would be useful.  Perhaps someone else could then help with that.

> 
>> I'd suggest:
>>
>> 1) Some basic makefile cleanups to ease initial integration - in
>> particular where we have things like
>>
>> LIB1FUNCS += 
>>
>> that this be rewritten with one function per line (and sorted
>> alphabetically) - then we can see which functions are being changed in
>> subsequent patches.  It makes the Makefile fragments longer, but the
>> improvement in clarity for makes this worthwhile.
> 
> I know next to nothing about Makefiles, particularly ones as complex as
> GCC's.  I was just trying to work with the existing style to avoid
> breaking something.  However, I can certainly adopt this suggestion.
>  
>> 2) The changes for the existing integer functions - preferably one
>> function per patch.
>>
>> 3) The new integer functions that you're adding
> 
> These wouldn't be too hard to do, but what are the expectations for
> testing?  A clean build of GCC takes about 6 hours in my VM, and
> regression testing takes about 4 hours per architecture.  You would want
> a full regression report for each incremental patch?  I have no idea how
> to target regression tests that apply to particular runtime functions
> without the risk of missing something.
> 

Most of this can be tested in a cross-compile environment using qemu as
a model.  A cross build shouldn't take that long (especially if you
restrict the compiler to just C and C++ - other languages are
vanishingly unlikely to pick up errors in the parts of the compiler
you're changing).  But build checks will be mostly sufficient for most
of the intermediate patches.

>> 4) The floating-point support.
>>
>> Some more general observations:
>>
>> - where functions are already in lib1funcs.asm, please leave them there.
> 
> I guess I have a different vision here.  I have had a really hard time
> 

Re: [PATCH] gimple-isel: Fall back to using vcond_mask [PR98560]

2021-01-07 Thread Richard Biener
On Thu, 7 Jan 2021, Richard Sandiford wrote:

> Richard Biener  writes:
> > On Wed, 6 Jan 2021, Richard Sandiford wrote:
> >
> >> PR98560 is about a case in which the vectoriser initially generates:
> >> 
> >>   mask_1 = a < 0;
> >>   mask_2 = mask_1 & ...;
> >>   res = VEC_COND_EXPR ;
> >> 
> >> The vectoriser thus expects res to be calculated using vcond_mask.
> >> However, we later manage to fold mask_2 to mask_1, leaving:
> >> 
> >>   mask_1 = a < 0;
> >>   res = VEC_COND_EXPR ;
> >> 
> >> gimple-isel then required a combined vcond to exist.
> >> 
> >> On most targets, it's not too onerous to provide all possible
> >> (compare x select) combinations.  For each data mode, you just
> >> need to provide unsigned comparisons, signed comparisons, and
> >> floating-point comparisons, with the data mode and type of
> >> comparison uniquely determining the mode of the compared values.
> >> But for targets like SVE that support “unpacked” vectors,
> >> it's not that simple: the level of unpacking adds another
> >> degree of freedom.
> >> 
> >> Rather than insist that the combined versions exist, I think
> >> we should be prepared to fall back to using separate comparisons
> >> and vcond_masks.  I think that makes more sense on targets like
> >> AArch64 and AArch32 in which compares and selects are fundementally
> >> separate operations anyway.
> >
> > Indeed the mask variants (thus being able to expand the comparison)
> > are more fundamental.  I guess you're running into this path because
> > we did not consider using vcond_mask because of
> >
> >   if (used_vec_cond_exprs >= 2
> >   && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
> >   != CODE_FOR_nothing)
> >   && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
> > {
> >   /* Keep the SSA name and use vcond_mask.  */
> >   tcode = TREE_CODE (op0);
> > }
> >
> > not triggering?  Which also means your patch fails to check/assert
> > that we can expand_vec_cmp_expr_p the separate compare?
> >
> >> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
> >
> > It does feel like the function could need some refactoring ...
> >
> > But OK - preferably with the assertion that we can actually
> > expand the compare (I suggest to do the expand_vec_cmp_expr_p
> > above unconditionally and have a 'global' cannot_expand_mask
> > flag defaulted to false and checked in the new path).
> 
> OK, how does this look?  It's not quite what you said because
> I wanted to avoid the double negation in !cannot.

LGTM.

Thanks,
Richard.

> Thanks,
> Richard
> 
> 
> gcc/
>   PR tree-optimization/98560
>   * gimple-isel.cc (gimple_expand_vec_cond_expr): If we fail to use
>   IFN_VCOND{,U,EQ}, fall back on IFN_VCOND_MASK.
> 
> gcc/testsuite/
>   PR tree-optimization/98560
>   * gcc.dg/vect/pr98560-1.c: New test.
> ---
>  gcc/gimple-isel.cc| 26 +++---
>  gcc/testsuite/gcc.dg/vect/pr98560-1.c | 17 +
>  2 files changed, 36 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr98560-1.c
> 
> diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> index d40338ce4a2..0f3d6bba229 100644
> --- a/gcc/gimple-isel.cc
> +++ b/gcc/gimple-isel.cc
> @@ -154,6 +154,7 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>return gimple_build_assign (lhs, tem3);
>  }
>  
> +  bool can_compute_op0 = true;
>gcc_assert (!COMPARISON_CLASS_P (op0));
>if (TREE_CODE (op0) == SSA_NAME)
>  {
> @@ -184,13 +185,16 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>  
> tree op0_type = TREE_TYPE (op0);
> tree op0a_type = TREE_TYPE (op0a);
> +   if (TREE_CODE_CLASS (tcode) == tcc_comparison)
> + can_compute_op0 = expand_vec_cmp_expr_p (op0a_type, op0_type,
> +  tcode);
>  
> /* Try to fold x CMP y ? -1 : 0 to x CMP y.  */
>  
> -   if (integer_minus_onep (op1)
> +   if (can_compute_op0
> +   && integer_minus_onep (op1)
> && integer_zerop (op2)
> -   && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0))
> -   && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
> +   && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (TREE_TYPE (op0)))
>   {
> tree conv_op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), op0);
> gassign *new_stmt = gimple_build_assign (lhs, conv_op);
> @@ -198,10 +202,10 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
> return new_stmt;
>   }
>  
> -   if (used_vec_cond_exprs >= 2
> +   if (can_compute_op0
> +   && used_vec_cond_exprs >= 2
> && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
> -   != CODE_FOR_nothing)
> -   && expand_vec_cmp_expr_p (op0a_type, op0_type, tcode))
> +   != CODE_F

Re: [PATCH 2/8 v9]middle-end slp: fix is_linear_load_p to prevent multiple answers

2021-01-07 Thread Richard Biener
On Mon, 28 Dec 2020, Tamar Christina wrote:

> Hi All,
> 
> This fixes an issue where is_linear_load_p could return the incorrect
> permutation kind because it is singe pass.
> 
> This arranges the candidates in such a way that there won't be any ambiguity 
> so
> that the function can still be linear but give correct values.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues.
> 
> Ok for master?

OK.  I see no testcases in any of the patch in this series though.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * tree-vect-slp-patterns.c (is_linear_load_p): Fix ambiguity.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
> index 
> fede88923af8521ee4954c8ae27b0e589f975610..7fd79d91c6ba4ccdbf361307a6105fb7e46aa961
>  100644
> --- a/gcc/tree-vect-slp-patterns.c
> +++ b/gcc/tree-vect-slp-patterns.c
> @@ -140,32 +140,32 @@ is_linear_load_p (load_permutation_t loads)
>  
>unsigned load, i;
>complex_perm_kinds_t candidates[4]
> -= { PERM_EVENODD
> -  , PERM_ODDEVEN
> -  , PERM_ODDODD
> += { PERM_ODDODD
>, PERM_EVENEVEN
> +  , PERM_EVENODD
> +  , PERM_ODDEVEN
>};
>  
>int valid_patterns = 4;
> -  FOR_EACH_VEC_ELT_FROM (loads, i, load, 1)
> +  FOR_EACH_VEC_ELT (loads, i, load)
>  {
> -  if (candidates[0] != PERM_UNKNOWN && load != i)
> +  if (candidates[0] != PERM_UNKNOWN && load != 1)
>   {
> candidates[0] = PERM_UNKNOWN;
> valid_patterns--;
>   }
> -  if (candidates[1] != PERM_UNKNOWN
> -   && load != (i % 2 == 0 ? i + 1 : i - 1))
> +  if (candidates[1] != PERM_UNKNOWN && load != 0)
>   {
> candidates[1] = PERM_UNKNOWN;
> valid_patterns--;
>   }
> -  if (candidates[2] != PERM_UNKNOWN && load != 1)
> +  if (candidates[2] != PERM_UNKNOWN && load != i)
>   {
> candidates[2] = PERM_UNKNOWN;
> valid_patterns--;
>   }
> -  if (candidates[3] != PERM_UNKNOWN && load != 0)
> +  if (candidates[3] != PERM_UNKNOWN
> +   && load != (i % 2 == 0 ? i + 1 : i - 1))
>   {
> candidates[3] = PERM_UNKNOWN;
> valid_patterns--;
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Re: [PATCH 3/8 v9]middle-end slp: handle externals correctly in linear_loads_p

2021-01-07 Thread Richard Biener
On Mon, 28 Dec 2020, Tamar Christina wrote:

> Hi All,
> 
> This fixes a bug with externals and linear_loads_p where I forgot to save the
> value before returning.
> 
> It also fixes handling of nodes with multiple children on a non VEC_PERM node.
> There the child iteration would already resolve the kind and the loads are All
> expected to be the same if valid so just return one.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues.
> 
> Ok for master?

OK.

Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * tree-vect-slp-patterns.c (linear_loads_p): Fix externals.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
> index 
> 7fd79d91c6ba4ccdbf361307a6105fb7e46aa961..235c0741c78b04f14725751ec399c0fdb32a0823
>  100644
> --- a/gcc/tree-vect-slp-patterns.c
> +++ b/gcc/tree-vect-slp-patterns.c
> @@ -229,6 +229,7 @@ linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, 
> slp_tree root)
>else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
>  {
>retval.first = PERM_TOP;
> +  perm_cache->put (root, retval);
>return retval;
>  }
>  
> @@ -241,6 +242,7 @@ linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, 
> slp_tree root)
>complex_load_perm_t res = linear_loads_p (perm_cache, child);
>kind = vect_merge_perms (kind, res.first);
>/* Unknown and Top are not valid on blends as they produce no permute. 
>  */
> +  retval.first = kind;
>if (kind == PERM_UNKNOWN || kind == PERM_TOP)
>   return retval;
>all_loads.safe_push (res.second);
> @@ -258,7 +260,7 @@ linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, 
> slp_tree root)
>retval.first = kind;
>retval.second = nloads;
>  }
> -  else if (all_loads.length () == 1)
> +  else
>  {
>retval.first = kind;
>retval.second = all_loads[0];
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Re: [PATCH 4/8 v9]middle-end slp: upgrade complex add to new format and fix memory leaks

2021-01-07 Thread Richard Biener
On Mon, 28 Dec 2020, Tamar Christina wrote:

> Hi All,
> 
> This fixes a memory leak in complex_add_pattern because I was not calling
> vect_free_slp_tree when dissolving one side of the TWO_OPERANDS nodes.
> 
> Secondly it also upgrades the class to the new inteface required by the other
> patterns.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues.
> 
> Ok for master?

OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * tree-vect-slp-patterns.c (class complex_pattern,
>   class complex_add_pattern): Add parameters to matches.
>   (complex_add_pattern::build): Free memory.
>   (complex_add_pattern::matches): Move validation end of match.
>   (complex_add_pattern::recognize): Likewise.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
> index 
> 235c0741c78b04f14725751ec399c0fdb32a0823..dbc58f7c53868ed431fc67de1f0162eb0d3b2c24
>  100644
> --- a/gcc/tree-vect-slp-patterns.c
> +++ b/gcc/tree-vect-slp-patterns.c
> @@ -503,7 +503,7 @@ class complex_pattern : public vect_pattern
>  void build (vec_info *);
>  
>  static internal_fn
> -matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
> +matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree 
> *,
>vec *);
>  };
>  
> @@ -608,11 +608,17 @@ class complex_add_pattern : public complex_pattern
>public:
>  void build (vec_info *);
>  static internal_fn
> -matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
> +matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree 
> *,
>vec *);
>  
>  static vect_pattern*
>  recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
> +
> +static vect_pattern*
> +mkInstance (slp_tree *node, vec *m_ops, internal_fn ifn)
> +{
> +  return new complex_add_pattern (node, m_ops, ifn);
> +}
>  };
>  
>  /* Perform a replacement of the detected complex add pattern with the new
> @@ -630,6 +636,11 @@ complex_add_pattern::build (vec_info *vinfo)
>nodes.quick_push (children[0]);
>nodes.quick_push (vect_build_swap_evenodd_node (children[1]));
>  
> +  SLP_TREE_REF_COUNT (nodes[0])++;
> +  SLP_TREE_REF_COUNT (nodes[1])++;
> +  vect_free_slp_tree (this->m_ops[0]);
> +  vect_free_slp_tree (this->m_ops[1]);
> +
>SLP_TREE_CHILDREN (*this->m_node).truncate (0);
>SLP_TREE_CHILDREN (*this->m_node).safe_splice (nodes);
>  
> @@ -650,7 +661,7 @@ complex_add_pattern::build (vec_info *vinfo)
>  internal_fn
>  complex_add_pattern::matches (complex_operation_t op,
> slp_tree_to_load_perm_map_t *perm_cache,
> -   vec *ops)
> +   slp_tree *node, vec *ops)
>  {
>internal_fn ifn = IFN_LAST;
>  
> @@ -685,6 +696,9 @@ complex_add_pattern::matches (complex_operation_t op,
>if (linear_loads_p (perm_cache, children[1]).first != PERM_ODDEVEN)
>  return IFN_LAST;
>  
> +  if (!vect_pattern_validate_optab (ifn, *node))
> +return IFN_LAST;
> +
>return ifn;
>  }
>  
> @@ -697,8 +711,9 @@ complex_add_pattern::recognize 
> (slp_tree_to_load_perm_map_t *perm_cache,
>auto_vec ops;
>complex_operation_t op
>  = vect_detect_pair_op (*node, true, &ops);
> -  internal_fn ifn = complex_add_pattern::matches (op, perm_cache, &ops);
> -  if (!vect_pattern_validate_optab (ifn, *node))
> +  internal_fn ifn
> += complex_add_pattern::matches (op, perm_cache, node, &ops);
> +  if (ifn == IFN_LAST)
>  return NULL;
>  
>return new complex_add_pattern (node, &ops, ifn);
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


Re: [PATCH 1/8 v9]middle-end slp: Support optimizing load distribution

2021-01-07 Thread Richard Biener

From tamar.christ...@arm.com Mon Dec 28 14:36:32 2020
Date: Mon, 28 Dec 2020 13:35:56 +
From: Tamar Christina 
To: gcc-patches@gcc.gnu.org
Cc: n...@arm.com, rguent...@suse.de, o...@ucw.cz
Subject: [PATCH 1/8 v9]middle-end slp: Support optimizing load distribution

Hi All,

This introduces a post processing step for the pattern matcher to flatten
permutes introduced by the complex multiplications patterns. 


This performs a blend early such that SLP is not cancelled by the LOAD_LANES
permute.  This is a temporary workaround to the fact that loads are not CSEd
during building and is required to produce efficient code.

Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* tree-vect-slp.c (optimize_load_redistribution_1): New.
(optimize_load_redistribution): New.
(vect_match_slp_patterns): Use it.

--- inline copy of patch -- 
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c

index 
2a58e54fe51471df5f55ce4a524d0022744054b0..8360a59098f517498f3155f325cf8406466ac25c
 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2228,6 +2228,115 @@ calculate_unrolling_factor (poly_uint64 nunits, 
unsigned int group_size)
   return exact_div (common_multiple (nunits, group_size), group_size);
 }

+/* Helper function of optimize_load_redistribution that performs the operation
+   recursively.  */
+
+static slp_tree
+optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map,
+   hash_set *visited, slp_tree root)
+{
+  if (visited->add (root))
+return NULL;
+
+  slp_tree node;
+  unsigned i;
+
+  /* For now, we don't know anything about externals so do not do anything.  */
+  if (SLP_TREE_DEF_TYPE (root) == vect_external_def
+  || SLP_TREE_DEF_TYPE (root) == vect_constant_def)


use a single != vect_internal_def test please


+return NULL;
+  else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR
+  && SLP_TREE_LANE_PERMUTATION (root).exists ()
+  && !SLP_TREE_SCALAR_STMTS (root).exists ())


I think both last tests are unnecessary


+{
+  /* First convert this node into a load node and add it to the leaves
+ list and flatten the permute from a lane to a load one.  If it's
+ unneeded it will be elided later.  */
+  auto_vec stmts;
+  stmts.create (SLP_TREE_LANES (root));
+  load_permutation_t load_perm;
+  load_perm.create (SLP_TREE_LANES (root));
+  lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION (root);


load_perm leaks when any of the below outs is taken


+  for (unsigned j = 0; j < lane_perm.length (); j++)
+{
+  std::pair perm = lane_perm[j];
+ /* This isn't strictly needed, but this function is a temporary
+one for specifically pattern matching, so don't want it to
+optimize things the remainder of the pipeline will.  */
+ if (perm.first != j)
+   goto next;


but please elide it nevertheless


+  node = SLP_TREE_CHILDREN (root)[perm.first];
+
+ if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
+   return NULL;


so you want to check whether this is a load, I think more to the point
would be a vect_internal_def + zero SLP children check.  And a comment
on what we test (we do lack classification of SLP nodes, so a helper
like vect_is_slp_load_node or so would be OK as well)


+
+ stmts.quick_push (SLP_TREE_SCALAR_STMTS (node)[perm.second]);
+  load_perm.safe_push (SLP_TREE_LOAD_PERMUTATION (node)[perm.second]);


As you're doing here lacks a check that we are actually loading from
the same DR group.  I think it might be easier to just collect scalar
stmts and throw them at vect_build_slp_tree?  That should perform
the necessary verification, build the appropriate lane permute and
perform the CSE.  Which leads to the question why the VEC_PERM node
doesn't have scalar stmts set while we are actually be able to compute
them here ... that is, the CSE opportunity could have been noticed
during pattern matching itself?


+}
+
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_NOTE, vect_location,
+"converting stmts on permute node %p\n", root);
+
+  slp_tree *value = bst_map->get (stmts);
+  if (value)
+   node = *value;
+  else
+   {
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, node)
+   SLP_TREE_REF_COUNT (node)++;
+
+ vec stmts_cpy = stmts.copy ();
+ node = vect_create_new_slp_node (stmts_cpy.copy (), 0);
+ SLP_TREE_VECTYPE (node) = SLP_TREE_VECTYPE (root);
+ SLP_TREE_LOAD_PERMUTATION (node) = load_perm;
+ bst_map->put (stmts_cpy, node);
+   }
+  SLP_TREE_REF_COUNT (node)++;


Adjusting the refcount here but doing the replacement in the caller
is a bit awkward to follow - how about passing a reference so you
can adjust the edge here?


+
+  return node;
+  

RE: [PATCH 1/8 v9]middle-end slp: Support optimizing load distribution

2021-01-07 Thread Tamar Christina via Gcc-patches
> -Original Message-
> From: Richard Biener 
> Sent: Thursday, January 7, 2021 1:21 PM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd ; o...@ucw.cz
> Subject: Re: [PATCH 1/8 v9]middle-end slp: Support optimizing load
> distribution
> 
> > From tamar.christ...@arm.com Mon Dec 28 14:36:32 2020
> > Date: Mon, 28 Dec 2020 13:35:56 +
> > From: Tamar Christina 
> > To: gcc-patches@gcc.gnu.org
> > Cc: n...@arm.com, rguent...@suse.de, o...@ucw.cz
> > Subject: [PATCH 1/8 v9]middle-end slp: Support optimizing load
> > distribution
> >
> > Hi All,
> >
> > This introduces a post processing step for the pattern matcher to
> > flatten permutes introduced by the complex multiplications patterns.
> >
> > This performs a blend early such that SLP is not cancelled by the
> > LOAD_LANES permute.  This is a temporary workaround to the fact that
> > loads are not CSEd during building and is required to produce efficient 
> > code.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> > and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * tree-vect-slp.c (optimize_load_redistribution_1): New.
> > (optimize_load_redistribution): New.
> > (vect_match_slp_patterns): Use it.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index
> >
> 2a58e54fe51471df5f55ce4a524d0022744054b0..8360a59098f517498f3155f325c
> f
> > 8406466ac25c 100644
> > --- a/gcc/tree-vect-slp.c
> > +++ b/gcc/tree-vect-slp.c
> > @@ -2228,6 +2228,115 @@ calculate_unrolling_factor (poly_uint64 nunits,
> unsigned int group_size)
> >return exact_div (common_multiple (nunits, group_size),
> > group_size);  }
> >
> > +/* Helper function of optimize_load_redistribution that performs the
> operation
> > +   recursively.  */
> > +
> > +static slp_tree
> > +optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t
> *bst_map,
> > +   hash_set *visited, slp_tree root) {
> > +  if (visited->add (root))
> > +return NULL;
> > +
> > +  slp_tree node;
> > +  unsigned i;
> > +
> > +  /* For now, we don't know anything about externals so do not do
> > + anything.  */  if (SLP_TREE_DEF_TYPE (root) == vect_external_def
> > +  || SLP_TREE_DEF_TYPE (root) == vect_constant_def)
> 
> use a single != vect_internal_def test please
> 
> > +return NULL;
> > +  else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR
> > +  && SLP_TREE_LANE_PERMUTATION (root).exists ()
> > +  && !SLP_TREE_SCALAR_STMTS (root).exists ())
> 
> I think both last tests are unnecessary

It's there to prevent it from trying to optimize  two_operands nodes
which are a vec_perm but contain no scalar statements. I didn't find a different
way to distinguish between the two. The SLP tree can contain a number of these
that haven't been pattern matched away.

> 
> > +{
> > +  /* First convert this node into a load node and add it to the leaves
> > + list and flatten the permute from a lane to a load one.  If it's
> > + unneeded it will be elided later.  */
> > +  auto_vec stmts;
> > +  stmts.create (SLP_TREE_LANES (root));
> > +  load_permutation_t load_perm;
> > +  load_perm.create (SLP_TREE_LANES (root));
> > +  lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION
> > + (root);
> 
> load_perm leaks when any of the below outs is taken
> 
> > +  for (unsigned j = 0; j < lane_perm.length (); j++)
> > +{
> > +  std::pair perm = lane_perm[j];
> > + /* This isn't strictly needed, but this function is a temporary
> > +one for specifically pattern matching, so don't want it to
> > +optimize things the remainder of the pipeline will.  */
> > + if (perm.first != j)
> > +   goto next;
> 
> but please elide it nevertheless
> 
> > +  node = SLP_TREE_CHILDREN (root)[perm.first];
> > +
> > + if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
> > +   return NULL;
> 
> so you want to check whether this is a load, I think more to the point would
> be a vect_internal_def + zero SLP children check.  And a comment on what
> we test (we do lack classification of SLP nodes, so a helper like
> vect_is_slp_load_node or so would be OK as well)
> 
> > +
> > + stmts.quick_push (SLP_TREE_SCALAR_STMTS
> (node)[perm.second]);
> > +  load_perm.safe_push (SLP_TREE_LOAD_PERMUTATION
> > +(node)[perm.second]);
> 
> As you're doing here lacks a check that we are actually loading from the same
> DR group.  I think it might be easier to just collect scalar stmts and throw
> them at vect_build_slp_tree?  That should perform the necessary
> verification, build the appropriate lane permute and perform the CSE.  Which
> leads to the question why the VEC_PERM node doesn't have scalar stmts set
> while we are actually be able to compute them here ... that is, the CSE
> opportunity could have been noticed during pattern matching itself?
> 
> > +}
> > +

Re: [PATCH v3] libgcc: Thumb-1 Floating-Point Library for Cortex M0

2021-01-07 Thread Christophe Lyon via Gcc-patches
On Thu, 7 Jan 2021 at 13:56, Richard Earnshaw
 wrote:
>
> On 07/01/2021 00:59, Daniel Engel wrote:
> > --snip--
> >
> > On Wed, Jan 6, 2021, at 9:05 AM, Richard Earnshaw wrote:
> >
> >>
> >> Thanks for working on this, Daniel.
> >>
> >> This is clearly stage1 material, so we've got time for a couple of
> >> iterations to sort things out.
> >
> > I appreciate your feedback.  I had been hoping that with no regressions
> > this might still be eligible for stage2.  Christophe never indicated
> > either way. but the fact that he was looking at it seemed positive.
> > I thought I would be a couple of weeks faster with this last
> > iteration, but holidays got in the way.
>
> GCC doesn't have a stage 2 any more (historical wart).  We were in
> (late) stage3 when this was first posted, and because of the significant
> impact this might have on not just CM0 but other targets as well, I
> don't think it's something we should try to squeeze in at the last
> minute.  We're now in stage 4, so that is doubly the case.
>
> Christophe is a very valuable member of our community, but he's not a
> port maintainer and thus cannot really rule on what can go into the
> tools, or when.
>
> >
> > I actually think your comments below could all be addressable within a
> > couple of days.  But, I'm not accounting for the review process.
> >
> >> Firstly, the patch is very large, but contains a large number of
> >> distinct changes, so it would really benefit from being broken down into
> >> a number of distinct patches.  This will make reviewing the individual
> >> changes much more straight-forward.
> >
> > I have no context for "large" or "small" with respect to gcc.  This
> > patch comprises about 30% of a previously-monolithic library that's
> > been shipping since ~2016 (the rest is libm material).  Other than
> > (1) the aforementioned change to div0(), (2) a nascent adaptation
> > for __truncdfsf2() (not enabled), and (3) the gratuitous addition of
> > the bitwise functions, the library remains pretty much as it was
> > originally released.
>
> Large, like many other terms is relative.  For assembler file changes,
> which this is primarily, the overall size can be much smaller and still
> be considered 'large'.
>
> >
> > The driving force in the development of this library was small size,
> > which of course was never possible with the softfp routines.  It's not
> > half-slow, either, for the limitations of the M0 architecture.   And,
> > it's IEEE compliant.  But, that means that most of the functions are
> > highly interconnected.  So, some of it can be broken up as you outline
> > below, but that last patch is still worth more than half of the total.
>
> Nevertheless, having the floating-point code separated out will make
> reviewing more straight forward.  I'll likely need to ask one of our FP
> experts to have a specific look at that part and that will be easier if
> it is disentangled from the other changes.
>
> >
> > I also have ~70k lines of test vectors that seem mostly redundant, but
> > not completely.  I haven't decided what to do here.  For example, I have
> > coverage for __aeabi_u/ldivmod, while GCC does not.  If I do anything
> > with this code it will be in a separate thread.
>
> Publishing the test code, even if it isn't integrated into the GCC
> testsuite would be useful.  Perhaps someone else could then help with that.
>
> >
> >> I'd suggest:
> >>
> >> 1) Some basic makefile cleanups to ease initial integration - in
> >> particular where we have things like
> >>
> >> LIB1FUNCS += 
> >>
> >> that this be rewritten with one function per line (and sorted
> >> alphabetically) - then we can see which functions are being changed in
> >> subsequent patches.  It makes the Makefile fragments longer, but the
> >> improvement in clarity for makes this worthwhile.
> >
> > I know next to nothing about Makefiles, particularly ones as complex as
> > GCC's.  I was just trying to work with the existing style to avoid
> > breaking something.  However, I can certainly adopt this suggestion.
> >
> >> 2) The changes for the existing integer functions - preferably one
> >> function per patch.
> >>
> >> 3) The new integer functions that you're adding
> >
> > These wouldn't be too hard to do, but what are the expectations for
> > testing?  A clean build of GCC takes about 6 hours in my VM, and
> > regression testing takes about 4 hours per architecture.  You would want
> > a full regression report for each incremental patch?  I have no idea how
> > to target regression tests that apply to particular runtime functions
> > without the risk of missing something.
> >
>
> Most of this can be tested in a cross-compile environment using qemu as
> a model.  A cross build shouldn't take that long (especially if you
> restrict the compiler to just C and C++ - other languages are
> vanishingly unlikely to pick up errors in the parts of the compiler
> you're changing).  But build checks will be mostly sufficient for most
> of the intermediat

RE: [PATCH 1/8 v9]middle-end slp: Support optimizing load distribution

2021-01-07 Thread Richard Biener
On Thu, 7 Jan 2021, Tamar Christina wrote:

> > -Original Message-
> > From: Richard Biener 
> > Sent: Thursday, January 7, 2021 1:21 PM
> > To: Tamar Christina 
> > Cc: gcc-patches@gcc.gnu.org; nd ; o...@ucw.cz
> > Subject: Re: [PATCH 1/8 v9]middle-end slp: Support optimizing load
> > distribution
> > 
> > > From tamar.christ...@arm.com Mon Dec 28 14:36:32 2020
> > > Date: Mon, 28 Dec 2020 13:35:56 +
> > > From: Tamar Christina 
> > > To: gcc-patches@gcc.gnu.org
> > > Cc: n...@arm.com, rguent...@suse.de, o...@ucw.cz
> > > Subject: [PATCH 1/8 v9]middle-end slp: Support optimizing load
> > > distribution
> > >
> > > Hi All,
> > >
> > > This introduces a post processing step for the pattern matcher to
> > > flatten permutes introduced by the complex multiplications patterns.
> > >
> > > This performs a blend early such that SLP is not cancelled by the
> > > LOAD_LANES permute.  This is a temporary workaround to the fact that
> > > loads are not CSEd during building and is required to produce efficient 
> > > code.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> > > and no issues.
> > >
> > > Ok for master?
> > >
> > > Thanks,
> > > Tamar
> > >
> > > gcc/ChangeLog:
> > >
> > >   * tree-vect-slp.c (optimize_load_redistribution_1): New.
> > >   (optimize_load_redistribution): New.
> > >   (vect_match_slp_patterns): Use it.
> > >
> > > --- inline copy of patch --
> > > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index
> > >
> > 2a58e54fe51471df5f55ce4a524d0022744054b0..8360a59098f517498f3155f325c
> > f
> > > 8406466ac25c 100644
> > > --- a/gcc/tree-vect-slp.c
> > > +++ b/gcc/tree-vect-slp.c
> > > @@ -2228,6 +2228,115 @@ calculate_unrolling_factor (poly_uint64 nunits,
> > unsigned int group_size)
> > >return exact_div (common_multiple (nunits, group_size),
> > > group_size);  }
> > >
> > > +/* Helper function of optimize_load_redistribution that performs the
> > operation
> > > +   recursively.  */
> > > +
> > > +static slp_tree
> > > +optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t
> > *bst_map,
> > > + hash_set *visited, slp_tree root) {
> > > +  if (visited->add (root))
> > > +return NULL;
> > > +
> > > +  slp_tree node;
> > > +  unsigned i;
> > > +
> > > +  /* For now, we don't know anything about externals so do not do
> > > + anything.  */  if (SLP_TREE_DEF_TYPE (root) == vect_external_def
> > > +  || SLP_TREE_DEF_TYPE (root) == vect_constant_def)
> > 
> > use a single != vect_internal_def test please
> > 
> > > +return NULL;
> > > +  else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR
> > > +  && SLP_TREE_LANE_PERMUTATION (root).exists ()
> > > +  && !SLP_TREE_SCALAR_STMTS (root).exists ())
> > 
> > I think both last tests are unnecessary
> 
> It's there to prevent it from trying to optimize  two_operands nodes
> which are a vec_perm but contain no scalar statements. I didn't find a 
> different
> way to distinguish between the two. The SLP tree can contain a number of these
> that haven't been pattern matched away.

Well, that's because of the weak check for what you want to pattern match
below.  Certainly !SLP_TREE_SCALAR_STMTS (root).exists () isn't a reliable
way to catch these.

> > 
> > > +{
> > > +  /* First convert this node into a load node and add it to the 
> > > leaves
> > > + list and flatten the permute from a lane to a load one.  If it's
> > > + unneeded it will be elided later.  */
> > > +  auto_vec stmts;
> > > +  stmts.create (SLP_TREE_LANES (root));
> > > +  load_permutation_t load_perm;
> > > +  load_perm.create (SLP_TREE_LANES (root));
> > > +  lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION
> > > + (root);
> > 
> > load_perm leaks when any of the below outs is taken
> > 
> > > +  for (unsigned j = 0; j < lane_perm.length (); j++)
> > > +{
> > > +  std::pair perm = lane_perm[j];
> > > +   /* This isn't strictly needed, but this function is a temporary
> > > +  one for specifically pattern matching, so don't want it to
> > > +  optimize things the remainder of the pipeline will.  */
> > > +   if (perm.first != j)
> > > + goto next;
> > 
> > but please elide it nevertheless
> > 
> > > +  node = SLP_TREE_CHILDREN (root)[perm.first];
> > > +
> > > +   if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
> > > + return NULL;
> > 
> > so you want to check whether this is a load, I think more to the point would
> > be a vect_internal_def + zero SLP children check.  And a comment on what
> > we test (we do lack classification of SLP nodes, so a helper like
> > vect_is_slp_load_node or so would be OK as well)
> > 
> > > +
> > > +   stmts.quick_push (SLP_TREE_SCALAR_STMTS
> > (node)[perm.second]);
> > > +  load_perm.safe_push (SLP_TREE_LOAD_PERMUTATION
> > > +(node)[perm.second]);
> > 
> > As you're doing here lacks a check that we are actually loading from the 
> > same
> > DR gro

[PATCH] i386: Merge various insn name mapping code attributes

2021-01-07 Thread Uros Bizjak via Gcc-patches
2021-01-07  Uroš Bizjak  

No functional changes.

gcc/
* config/i386/i386.md (insn): Merge from plusminus_insn, shift_insn,
rotate_insn and optab code attributes.
Update all uses to merged code attribute.
* config/i386/sse.md: Update all uses to merged code attribute.
* config/i386/mmx.md: Update all uses to merged code attribute.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index cfff16ec07e..6f6af8c3cbf 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -869,7 +869,8 @@
 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
 (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI")
-(eq_attr "isa" "avx512vnnivl") (symbol_ref "TARGET_AVX512VNNI && 
TARGET_AVX512VL")
+(eq_attr "isa" "avx512vnnivl")
+  (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL")
 
 (eq_attr "mmx_isa" "native")
   (symbol_ref "!TARGET_MMX_WITH_SSE")
@@ -894,17 +895,13 @@
 
 (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
 
-(define_code_iterator multdiv [mult div])
-
-;; Base name for define_insn
-(define_code_attr plusminus_insn
-  [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
-   (minus "sub") (ss_minus "sssub") (us_minus "ussub")])
-
 ;; Base name for insn mnemonic.
 (define_code_attr plusminus_mnemonic
   [(plus "add") (ss_plus "adds") (us_plus "addus")
(minus "sub") (ss_minus "subs") (us_minus "subus")])
+
+(define_code_iterator multdiv [mult div])
+
 (define_code_attr multdiv_mnemonic
   [(mult "mul") (div "div")])
 
@@ -951,10 +948,6 @@
 ;; Mapping of all shift operators
 (define_code_iterator any_shift [ashift lshiftrt ashiftrt])
 
-;; Base name for define_insn
-(define_code_attr shift_insn
-  [(ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")])
-
 ;; Base name for insn mnemonic.
 (define_code_attr shift [(ashift "sll") (lshiftrt "shr") (ashiftrt "sar")])
 (define_code_attr vshift [(ashift "sll") (lshiftrt "srl") (ashiftrt "sra")])
@@ -962,9 +955,6 @@
 ;; Mapping of rotate operators
 (define_code_iterator any_rotate [rotate rotatert])
 
-;; Base name for define_insn
-(define_code_attr rotate_insn [(rotate "rotl") (rotatert "rotr")])
-
 ;; Base name for insn mnemonic.
 (define_code_attr rotate [(rotate "rol") (rotatert "ror")])
 
@@ -977,13 +967,9 @@
 ;; Base name for x87 insn mnemonic.
 (define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])
 
-;; Used in signed and unsigned widening multiplications.
+;; Mapping of extend operators
 (define_code_iterator any_extend [sign_extend zero_extend])
 
-;; Used for representing standard name for extend
-(define_code_attr optab [(sign_extend "extend")
-(zero_extend "zero_extend")])
-
 ;; Prefix for insn menmonic.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
 (div "i") (udiv "")])
@@ -997,7 +983,8 @@
 ;; Used in signed and unsigned truncations.
 (define_code_iterator any_truncate [ss_truncate truncate us_truncate])
 ;; Instruction suffix for truncations.
-(define_code_attr trunsuffix [(ss_truncate "s") (truncate "") (us_truncate 
"us")])
+(define_code_attr trunsuffix
+  [(ss_truncate "s") (truncate "") (us_truncate "us")])
 
 ;; Used in signed and unsigned fix.
 (define_code_iterator any_fix [fix unsigned_fix])
@@ -1011,6 +998,14 @@
 (define_code_attr floatunssuffix [(float "") (unsigned_float "uns")])
 (define_code_attr floatprefix [(float "s") (unsigned_float "u")])
 
+;; Base name for expression
+(define_code_attr insn
+  [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
+   (minus "sub") (ss_minus "sssub") (us_minus "ussub")
+   (sign_extend "extend") (zero_extend "zero_extend")
+   (ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")
+   (rotate "rotl") (rotatert "rotr")])
+
 ;; All integer modes.
 (define_mode_iterator SWI1248x [QI HI SI DI])
 
@@ -7460,14 +7455,14 @@
 
 ;; The patterns that match these are at the end of this file.
 
-(define_expand "xf3"
+(define_expand "xf3"
   [(set (match_operand:XF 0 "register_operand")
(plusminus:XF
  (match_operand:XF 1 "register_operand")
  (match_operand:XF 2 "register_operand")))]
   "TARGET_80387")
 
-(define_expand "3"
+(define_expand "3"
   [(set (match_operand:MODEF 0 "register_operand")
(plusminus:MODEF
  (match_operand:MODEF 1 "register_operand")
@@ -11399,7 +11394,7 @@
 
 ;; See comment above `ashl3' about how this works.
 
-(define_expand "3"
+(define_expand "3"
   [(set (match_operand:SDWIM 0 "")
(any_shiftrt:SDWIM (match_operand:SDWIM 1 "")
   (match_operand:QI 2 "nonmemory_operand")))]
@@ -11407,7 +11402,7 @@
   "ix86_expand_binary_operator (, mode, operands); DONE;")
 
 ;; Avoid useless masking of count operand.
-(define_insn_and_split "*3_mask"
+(define_insn_and_split "*3_mask"
   [(set (ma

Re: [PATCH v5] rtl: builtins: (not just) rs6000: Add builtins for fegetround, feclearexcept and feraiseexcept [PR94193]

2021-01-07 Thread Raoni Fassina Firmino via Gcc-patches
It seems to me we have two unrelated concerns mixed in the threads, I
will reply in two different sub-threads to make this easier.

This one to discuss the handling of target and output from the expands


On Wed, Nov 18, 2020 at 02:45:44PM -0700, AL gcc-patches wrote:
> 
> 
> On 11/18/20 12:31 AM, Richard Biener wrote:
> > On Tue, 17 Nov 2020, Jeff Law wrote:
> >
> >>
> >> On 11/4/20 8:10 AM, Raoni Fassina Firmino via Gcc-patches wrote:
> >>> On Wed, Nov 04, 2020 at 10:35:03AM +0100, Richard Biener wrote:
> > +/* Expand call EXP to the fegetround builtin (from C99 fenv.h), 
> > returning the
> > +   result and setting it in TARGET.  Otherwise return NULL_RTX on 
> > failure.  */
> > +static rtx
> > +expand_builtin_fegetround (tree exp, rtx target, machine_mode 
> > target_mode)
> > +{
> > +  if (!validate_arglist (exp, VOID_TYPE))
> > +return NULL_RTX;
> > +
> > +  insn_code icode = direct_optab_handler (fegetround_optab, SImode);
> > +  if (icode == CODE_FOR_nothing)
> > +return NULL_RTX;
> > +
> > +  if (target == 0
> > +  || GET_MODE (target) != target_mode
> > +  || !(*insn_data[icode].operand[0].predicate) (target, 
> > target_mode))
> > +target = gen_reg_rtx (target_mode);
> > +
> > +  rtx pat = GEN_FCN (icode) (target);
> > +  if (!pat)
> > +return NULL_RTX;
> > +  emit_insn (pat);
>  I think you need to verify whether the expansion ended up in 'target'
>  and otherwise emit a move since usually 'target' is just a hint.
> >>> I thought the "if (target == 0 ..." took care of that. The expands do
> >>> emit a move, if that helps.
> >> It looks like if we have a passed in target and it either has the wrong
> >> mode or it does not match the predicate, then we generaet a new target
> >> and use that instead.? I don't see where we'd copy from that new target
> >> to the original desired target.? For some expanders the caller would
> >> handle that, but I don't see how that's possible for this one without
> >> the caller digging into the generated RTL to determine that
> >> expand_builtin_fegetround put the result somewhere other than TARGET and
> >> thus a copy is needed.
> >>
> >> That may be what Richi is worried about.
> > I know we've added missing
> >
> >   if (!rtx_equal_p (target, ops[0].value))
> > emit_move_insn (target, ops[0].value);
> >
> > to several expanders (using expand_insn rather than manual
> > GEN_FCN (icode) calls).
> Yes.  But I think we end up doing that mostly for expanders that return
> the object where the value was stored in some reasonably convenient
> location (either as a return value or in an ops array).  I don't think
> that's the case here. 

So, I think I got it wrong then, I thought the semantics where that
the expander was responsible to provide a suitable target to the
expand, and the expand was responsible to output to that target.  That
is how I created both, so if the expand can change the target maybe
then it should be also responsible to generate the correct target.
But this seems to me that we will have more repeated code for expands
in different archs.


o/
Raoni Fassina


Re: [PATCH v5] rtl: builtins: (not just) rs6000: Add builtins for fegetround, feclearexcept and feraiseexcept [PR94193]

2021-01-07 Thread Raoni Fassina Firmino via Gcc-patches
It seems to me we have two unrelated concerns mixed in the threads, I
will reply in two different sub-threads to make this easier.

This one to discuss the values of FE_* macros.


On Tue, Nov 17, 2020 at 03:23:02PM -0700, AL gcc-patches wrote:
> >>> +@cindex @code{fegetround@var{m}} instruction pattern
> >>> +@item @samp{fegetround@var{m}}
> >>> +Store the current machine floating-point rounding mode into operand 0.
> >>> +Operand 0 has mode @var{m}, which is scalar.  This pattern is used to
> >>> +implement the @code{fegetround} function from the ISO C99 standard.
> >> I think this needs to elaborate on the format of the "rounding mode".
> >>
> >> AFAICS you do nothing to marshall with the actually used libc
> >> implementation which AFAIU can choose arbitrary values for
> >> the FE_* macros.  I'm not sure we require the compiler to be
> >> configured for one specific C library and for example require
> >> matching FE_* macro definitions for all uses of the built
> >> compiler.
> >>
> >> For the patch at hand you seem to assume the libc "format"
> >> matches the hardware one (which would of course be reasonable).
> >>
> >> Does that actually hold up when looking at libcs other than 
> >> glibc supporting powerpc?
> > I checked in some other libc implementations that have POWER support and
> > all have the same value as glic for the four rounding modes and the five
> > exception flags from libc. The libcs implementations I checked are:
> >
> >  - musl
> >  - uclibc & uclibc-ng
> >  - freebsd
> >
> > Is There any other I am missing?
> I think the concern here is that while the libcs we have visibility into
> have consistent values, I don't think that's necessarily guaranteed.  
> I'm not sure how to DTRT here.  We may not have the target headers if
> we're doing a cross compile, so a configure test may not do what we 
> need.  In fact, ISTM that there is no reliable configure or compile time
> check we can do since the constants are part of the runtime and can
> change independently of the compiler.


>From other subthreads Joseph and segher mentioned this:

On Wed, Nov 04, 2020 at 09:06:02PM +, Joseph Myers wrote:
> On Wed, 4 Nov 2020, Raoni Fassina Firmino via Gcc-patches wrote:
> 
> > IMHO, It seems like it is not necessary if there not a libc that have
> > different values for the FE_* macros. I didn't check other archs, but if
> > is the case for some other arch I think it could be changed if and when
> > some other arch implements expands for these builtins.
> 
> SPARC is the case I know of where the FE_* values vary depending on target 
> libc (see the SPARC_LOW_FE_EXCEPT_VALUES target macro).

On Wed, Nov 18, 2020 at 06:38:22AM -0600, Segher Boessenkool wrote:
> We can handle the constants issue similarly to what we do for
> __builtin_fpclassify, too.


I think that if we must safe-guard for future or unforeseen libc
implementations doing what __builtin_fpclassify does is the way to go.
I don't know what is the GCC police here, but IMHO I don't think we
should add complexity before it is needed in this case.  And looking at
__builtin_fpclassify, it seems a lot, IIUC this solution needs
fixinclude to work, seems to me too much add maintenance for something
that is not needed yet, because SPARC don't have this expands, none has
for now.

I don't know if it helps, but the included tests also check the values
changes against the libc implementations, so may catch discrepancies if
building gcc with other libcs.  It, of course, doesn't help if using for
example a gcc built with glibc and compiling a program with it with some
unknown libc.  I wonder if some safe check that can be done at runtime,
whilst building a program with gcc and some unknown libc.


o/
Raoni Fassina


Re: [committed] patch to fix PR97978

2021-01-07 Thread Vladimir Makarov via Gcc-patches



On 2021-01-07 6:01 a.m., Richard Sandiford wrote:

Vladimir Makarov via Gcc-patches  writes:

The following fixes

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97978

The patch was successfully bootstrapped on x86-64.

Can you explain this a bit more?  The assert fires if the register
allocation is inconsistent with the conflict information.  What causes
the inconsistency in this case, and why is it OK for the inconsistency
to persist until the next lra_assign pass?  Does something fix up the
inconsistency later, or is the inconsistent information never used?


Sorry, Richard.  I should have described it in more details in the bugzilla.

Hard register splitting was added to remove reload failures to find a 
hard register after the first scheduling.  Although unfortunately it 
does not remove all the 1st insn scheduling problems.


A pseudo (p1) lives through insn and one insn alternative requires a 
specific hard reg (dx in this case) although there is no explicit hard 
register in the insn (simply another pseudo p2 in the insn requires a 
class containing only dx reg).  So the pseudo p1 does not have conflict 
with dx for now and p1 was assigned to dx.


Now constraint sub-pass choose the insn alternative and the next assign 
sub-pass creates a reload pseudo rp2 for p2 and tries to assign dx to 
rp2.  It fails and we made hard reg splitting to assign dx to a reload 
pseudo of rp2.


p3<-dx

insn (rp2)

dx<-p3

After this transformation p1 now got dx as a conflicting reg and 
allocation is incorrect at this stage as p1 assigned to dx living 
through the insns conflicts explicitly with dx.


Next assign subpass is trying to assign dx to rp2 by spilling p1 (and 
assigning another hard reg to p1).


At the end p2,rp2,p3 get dx and all trivial moves (dx <- dx) are removed.

That is explanation when we have an incorrect allocation.

I also should say that LRA has a final register allocation check.  The 
check in assign sub-pass is for earlier bug recognition during initial 
LRA development and probably is not necessary anymore.  So another 
solution would be remove the check in assign sub-pass at all.  But I 
decided not to do this as it still permits to find some bugs earlier.



Is there no chance of lra_split_hard_reg_for updating the information
itself, to keep everything self-consistent?
I think it would be the old reload approach (ignoring sub-pass 
separation and making all in on place).  To correct allocations right in 
lra_split_hard_reg_for we would need to check all pseudos living through 
the new insns (and for this we need correct live info) and spill pseudos 
conflicting with split hard reg.  But we don't need to do this as right 
after splitting sub-pass we have live info sub-pass and assign sub-pass 
which make all of this.

   Bypassing the check for
every pseudo register seems like quite a big hammer.

I'm not saying this is the wrong fix.  I just think it would help
to have more commentary explaining the situation.





[committed] aarch64: Support conditional unpacked integer unary arithmetic on SVE

2021-01-07 Thread Richard Sandiford via Gcc-patches
This patch extends the conditional unary integer operations
from SVE_FULL_I to SVE_I.  In each case the type suffix is
taken from the element size rather than the container size:
this matters for ABS and NEG, but doesn't matter for NOT.

Tested on aarch64-linux-gnu and aarch64_be-elf, pushed to trunk.

Richard


gcc/
* config/aarch64/aarch64-sve.md (@cond_)
(*cond__2): Extend from SVE_FULL_I to SVE_I.
(*cond__any): Likewise.

gcc/testsuite/
* gcc.target/aarch64/sve/cond_unary_5.c: New test.
* gcc.target/aarch64/sve/cond_unary_5_run.c: Likewise.
* gcc.target/aarch64/sve/cond_unary_6.c: Likewise.
* gcc.target/aarch64/sve/cond_unary_6_run.c: Likewise.
* gcc.target/aarch64/sve/cond_unary_7.c: Likewise.
* gcc.target/aarch64/sve/cond_unary_7_run.c: Likewise.
* gcc.target/aarch64/sve/cond_unary_8.c: Likewise.
* gcc.target/aarch64/sve/cond_unary_8_run.c: Likewise.
---
 gcc/config/aarch64/aarch64-sve.md | 28 +-
 .../gcc.target/aarch64/sve/cond_unary_5.c | 49 +
 .../gcc.target/aarch64/sve/cond_unary_5_run.c | 26 +
 .../gcc.target/aarch64/sve/cond_unary_6.c | 53 +++
 .../gcc.target/aarch64/sve/cond_unary_6_run.c | 27 ++
 .../gcc.target/aarch64/sve/cond_unary_7.c | 48 +
 .../gcc.target/aarch64/sve/cond_unary_7_run.c | 26 +
 .../gcc.target/aarch64/sve/cond_unary_8.c | 50 +
 .../gcc.target/aarch64/sve/cond_unary_8_run.c | 28 ++
 9 files changed, 321 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5_run.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_6_run.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_7.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_7_run.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_8_run.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 2be05ee9fdd..2ec9acbf38d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -2940,23 +2940,23 @@ (define_insn "@aarch64_pred_"
 
 ;; Predicated integer unary arithmetic with merging.
 (define_expand "@cond_"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand")
-  (SVE_INT_UNARY:SVE_FULL_I
-(match_operand:SVE_FULL_I 2 "register_operand"))
-  (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
+  (SVE_INT_UNARY:SVE_I
+(match_operand:SVE_I 2 "register_operand"))
+  (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
  UNSPEC_SEL))]
   "TARGET_SVE"
 )
 
 ;; Predicated integer unary arithmetic, merging with the first input.
 (define_insn "*cond__2"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand" "Upl, Upl")
-  (SVE_INT_UNARY:SVE_FULL_I
-(match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
+  (SVE_INT_UNARY:SVE_I
+(match_operand:SVE_I 2 "register_operand" "0, w"))
   (match_dup 2)]
  UNSPEC_SEL))]
   "TARGET_SVE"
@@ -2974,12 +2974,12 @@ (define_insn "*cond__2"
 ;; as earlyclobber helps to make the instruction more regular to the
 ;; register allocator.
 (define_insn "*cond__any"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
-   (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
+   (unspec:SVE_I
  [(match_operand: 1 "register_operand" "Upl, Upl, Upl")
-  (SVE_INT_UNARY:SVE_FULL_I
-(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w"))
-  (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+  (SVE_INT_UNARY:SVE_I
+(match_operand:SVE_I 2 "register_operand" "w, w, w"))
+  (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
  UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
   "@
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c
new file mode 100644
index 000..17b3f86c8c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include 
+
+#define abs(A) ((A) < 0 ? -(A) : (A))
+#define neg(A) (-(A))
+#def

[PATCH] c++: ICE with constexpr call that returns a PMF [PR98551]

2021-01-07 Thread Patrick Palka via Gcc-patches
We shouldn't do replace_result_decl after evaluating a call that returns
a PMF because PMF temporaries aren't wrapped in a TARGET_EXPR (and so we
can't trust ctx->object), and PMF initializers can't be self-referential
anyway, so replace_result_decl would always be a no-op.  This fixes an
ICE from the sanity check in replace_result_decl in the below testcase
during cxx_eval_call_expression of the call f() in the initializer g(f()).

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

gcc/cp/ChangeLog:

PR c++/98551
* constexpr.c (cxx_eval_call_expression): Don't call
replace_result_decl when the result is a PMF.

gcc/testsuite/ChangeLog:

PR c++/98551
* g++.dg/cpp0x/constexpr-pmf2.C: New test.
---
 gcc/cp/constexpr.c  | 1 +
 gcc/testsuite/g++.dg/cpp0x/constexpr-pmf2.C | 9 +
 2 files changed, 10 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-pmf2.C

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 0c12f608d36..a7272d49d0d 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -2788,6 +2788,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree 
t,
   current object under construction.  */
if (!*non_constant_p && ctx->object
&& AGGREGATE_TYPE_P (TREE_TYPE (res))
+   && !TYPE_PTRMEMFUNC_P (TREE_TYPE (res))
&& !is_empty_class (TREE_TYPE (res)))
  if (replace_result_decl (&result, res, ctx->object))
cacheable = false;
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-pmf2.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-pmf2.C
new file mode 100644
index 000..a76e712afe1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-pmf2.C
@@ -0,0 +1,9 @@
+// PR c++/98551
+// { dg-do compile { target c++11 } }
+
+struct A {};
+struct B { int t(); };
+using pmf = decltype(&B::t);
+constexpr pmf f() { return &B::t; }
+constexpr A g(pmf) { return {}; };
+constexpr A x = g(f());
-- 
2.30.0



[PATCH] libstdc++: Fix long double to_chars testcase [PR98384]

2021-01-07 Thread Patrick Palka via Gcc-patches
The testcase was failing to compile on some targets due to its use of
the non-standard functions nextupl and nextdownl.  This patch makes the
testcase instead use the C99 function nexttowardl in an equivalent
manner.

libstdc++-v3/ChangeLog:

PR libstdc++/98384
* testsuite/20_util/to_chars/long_double.cc: Use nexttowardl
instead of the non-standard nextupl and nextdownl.
---
 .../testsuite/20_util/to_chars/long_double.cc | 25 +--
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc 
b/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc
index 9d9ede7cf8a..4f72cb65400 100644
--- a/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc
+++ b/libstdc++-v3/testsuite/20_util/to_chars/long_double.cc
@@ -32,6 +32,17 @@
 
 using namespace std;
 
+namespace detail
+{
+  long double
+  nextupl(long double x)
+  { return nexttowardl(x, numeric_limits::infinity()); }
+
+  long double
+  nextdownl(long double x)
+  { return nexttowardl(x, -numeric_limits::infinity()); }
+}
+
 // The long double overloads of std::to_chars currently just go through printf
 // (except for the hexadecimal formatting).
 
@@ -40,8 +51,8 @@ void
 test01()
 {
   const long double hex_testcases[]
-= { nextdownl(numeric_limits::max()),
-   nextupl(numeric_limits::min()),
+= { detail::nextdownl(numeric_limits::max()),
+   detail::nextupl(numeric_limits::min()),
42.0L,
0x1.2p+0L,
0x1.23p+0L,
@@ -94,7 +105,7 @@ test01()
 
  {
// Verify that the nearby values have a different shortest form.
-   testcase = nextdownl(testcase);
+   testcase = detail::nextdownl(testcase);
result = to_chars(begin(to_chars_buffer), end(to_chars_buffer),
  testcase, chars_format::hex);
VERIFY( result.ec == errc{} );
@@ -103,7 +114,7 @@ test01()
sprintf(printf_buffer, "%La", testcase);
VERIFY( !strcmp(to_chars_buffer, printf_buffer+strlen("0x")) );
 
-   testcase = nextupl(nextupl(testcase));
+   testcase = detail::nextupl(detail::nextupl(testcase));
result = to_chars(begin(to_chars_buffer), end(to_chars_buffer),
  testcase, chars_format::hex);
VERIFY( result.ec == errc{} );
@@ -112,7 +123,7 @@ test01()
sprintf(printf_buffer, "%La", testcase);
VERIFY( !strcmp(to_chars_buffer, printf_buffer+strlen("0x")) );
 
-   testcase = nextdownl(testcase);
+   testcase = detail::nextdownl(testcase);
  }
 
for (int precision = -1; precision < 50; precision++)
@@ -173,7 +184,7 @@ test02()
  *result.ptr = '\0';
  char nearby_buffer[5];
{
- const long double smaller = nextdownl(value);
+ const long double smaller = detail::nextdownl(value);
  result = to_chars(begin(nearby_buffer), end(nearby_buffer),
smaller, fmt);
  VERIFY( result.ec == errc{} );
@@ -182,7 +193,7 @@ test02()
}
 
{
- long double larger = nextupl(value);
+ long double larger = detail::nextupl(value);
  result = to_chars(begin(nearby_buffer), end(nearby_buffer),
larger, fmt);
  VERIFY( result.ec == errc{} );
-- 
2.30.0



[PATCH] fix GIMPLE parser for loops

2021-01-07 Thread Richard Biener
We do not tolerate "growing" a vector to a lower size.

Bootstrap on x86_64-unknown-linux-gnu running, will commit
as obvious.

2021-01-07  Richard Biener  

gcc/c/
* gimple-parser.c (c_parser_gimple_compound_statement): Only
reallocate loop array if it is too small.
---
 gcc/c/gimple-parser.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/c/gimple-parser.c b/gcc/c/gimple-parser.c
index e64c6e5ebd5..724d8394b4f 100644
--- a/gcc/c/gimple-parser.c
+++ b/gcc/c/gimple-parser.c
@@ -616,8 +616,9 @@ c_parser_gimple_compound_statement (gimple_parser &parser, 
gimple_seq *seq)
  class loop *loop = alloc_loop ();
  loop->num = is_loop_header_of;
  loop->header = bb;
- vec_safe_grow_cleared (loops_for_fn (cfun)->larray,
-is_loop_header_of + 1, true);
+ if (number_of_loops (cfun) <= is_loop_header_of)
+   vec_safe_grow_cleared (loops_for_fn (cfun)->larray,
+  is_loop_header_of + 1, true);
  (*loops_for_fn (cfun)->larray)[is_loop_header_of] = loop;
  flow_loop_tree_node_add (loops_for_fn (cfun)->tree_root,
   loop);
-- 
2.26.2


Re: [PATCH] libphobos: Allow building libphobos using Solaris/x86 assembler

2021-01-07 Thread Iain Buclaw via Gcc-patches
Excerpts from Rainer Orth's message of January 6, 2021 2:57 pm:
> Hi Iain,
> 
>>> This patch removes the disabling of libphobos when the Solaris/x86
>>> assembler is being used.
>>>
>>> Since r11-6373, D symbols are now compressed using back references, this
>>> helped reduce the average symbol length by a factor of about 3, while
>>> the longest symbol shrank from 416133 to 1142 characters.  So the issues
>>> that were seen on Solaris/x86 should no longer be a problem.
>>>
>>> However, I have only used x86_64-apple-darwin10 for testing, as
>>> libphobos couldn't be built on that target for the same reason, except
>>> it was the system linker segfaulting due to long symbol names.
>>>
>>> It would be good to know if Solaris has also benefitted from the change.
>>
>> great, thanks.  I'll give this a whirl once today's regular bootstraps
>> have finished.
> 
> here's what I found: the build itself worked just fine and the libphobos
> test results are identical to those with gas.  However, a few gdc tests
> fail when Solaris/x86 as is used, for two reasons:
> 
> +UNRESOLVED: gdc.test/runnable/mangle.d   compilation failed to produce 
> executable
> +UNRESOLVED: gdc.test/runnable/mangle.d -shared-libphobos   compilation 
> failed to produce executable
> 
> Assembler: mangle.d
> "/var/tmp//ccG72ALc.s", line 200 : Syntax error
> Near line: "movzbl  test_эльфийские_письмена_9, %eax"
> [...]
> 
> +UNRESOLVED: gdc.test/runnable/testmodule.d   compilation failed to produce 
> executable
> +UNRESOLVED: gdc.test/runnable/testmodule.d -shared-libphobos   compilation 
> failed to produce executable
> 
> Assembler: testmodule.d
> "/var/tmp//ccw9j5oa.s", line 20 : Syntax error
> Near line: "call_D7dstress3run17unicode_06_哪里6哪里FiZi"
> [...]
> 
> +UNRESOLVED: gdc.test/runnable/ufcs.d   compilation failed to produce 
> executable
> +UNRESOLVED: gdc.test/runnable/ufcs.d -shared-libphobos   compilation failed 
> to produce executable
> 
> Assembler: ufcs.d
> "/var/tmp//ccWd6kud.s", line 7774 : Syntax error
> Near line: ".globl  _D4ufcs6α8503FiZv"
> [...]
> 
> The Solaris assemblers don't support UTF-8 identifiers.  Unless gdc can
> encode them in some way for toolchains like this (no idea if this is
> worth the effort), it may be possible to guard the tests with the ucn
> effective-target keyword.
> 
> Apart from that, it seems strange that the failing tests should only
> show up as UNSUPPORTED.  I'd have expected the compilation to FAIL, but
> IIRC the gdc testsuite has to ignore all output, so the test for excess
> errors which would usually catch this is disabled effectively.
> 

Indeed, the testsuite is far too verbose.  Although many tests have a
TEST_OUTPUT directive, converting them to a Dejagnu style is probably
too much effort for the gain.

Those tests can just be explicitly disabled, I'll look into that.

> The last failure is different and due to how COMDAT group handling is
> done with Solaris as:
> 
> +UNRESOLVED: gdc.test/runnable/test42.d   compilation failed to produce 
> executable
> +UNRESOLVED: gdc.test/runnable/test42.d -shared-libphobos   compilation 
> failed to produce executable
> 
> which yields
> 
> Input string too long, limit 10240
> 
> The offending input lines are (stripped for brevity)
> 
>   .section.tdata._D6test42__T5Foo71VAyaa2623[...]
>   .group  _D6test42__T5Foo71VAyaa2623_68656c6c6f616[...]
> 
> The first line is 10597 chars, the second even 15869.
> 

Is there a max symbol length macro available internally?  Maybe could
just compress symbols using MD5 if they exceed a certain length...

Iain.


Re: [PATCH] Fix test failures from outputs.exp (PR testsuite/98225)

2021-01-07 Thread Rainer Orth
Hi Bernd,

> this should fix the test failures in this test case.
>
> Is it OK for trunk?

unfortunately not: there are two bugs and a couple of nits:

* When testing with

  runtest --tool gcc outputs.exp

  I get

ERROR: tcl error sourcing 
/vol/gcc/src/hg/master/local/gcc/testsuite/gcc.misc-tests/outputs.exp.
ERROR: can't unset "env(MAKEFLAGS)": no such element in array
while executing
"unset env($var)"
(procedure "unsetenv" line 3)
invoked from within
"unsetenv MAKEFLAGS"
(file 
"/vol/gcc/src/hg/master/local/gcc/testsuite/gcc.misc-tests/outputs.exp" line 72)
invoked from within
"source /vol/gcc/src/hg/master/local/gcc/testsuite/gcc.misc-tests/outputs.exp"
("uplevel" body line 1)
invoked from within
"uplevel #0 source 
/vol/gcc/src/hg/master/local/gcc/testsuite/gcc.misc-tests/outputs.exp"
invoked from within
"catch "uplevel #0 source $test_file_name""

  The unsetenv needs to be wrapped in

if [info exists env(MAKEFLAGS)] {

  to avoid this.

* 

diff --git a/gcc/testsuite/gcc.misc-tests/outputs.exp 
b/gcc/testsuite/gcc.misc-tests/outputs.exp
--- a/gcc/testsuite/gcc.misc-tests/outputs.exp
+++ b/gcc/testsuite/gcc.misc-tests/outputs.exp
@@ -67,6 +67,10 @@ if {[board_info $dest exists output_form
 append link_options " additional_flags=-Wl,-oformat,[board_info $dest 
output_format]"
 }
 
+# Avoid possible influence from the make jobserver,
+# otherwise ltrans0.ltrans_args files may be missing.
+unsetenv MAKEFLAGS

  The comment is misleading: it's not just *.ltrans_args, but also
  *.ltrans.args.0.  Maybe there's a collective term for those files in
  lto-wrapper instead?

@@ -163,6 +167,9 @@ proc outest { test sources opts dirs out
if { $ogl != {} } {
pass "$test: $d$o"
file delete $ogl
+   } elseif { [string match "*.ld1_args" $o] } {
+   # This file may be missing if !HAVE_GNU_LD
+   pass "$test: $d$o"

  Always PASSing the test even if it isn't run is wrong.  Either wrap
  the whole group of tests with response files in

if [check_effective_target_gld] {

  or make the test for the *.ld1_args file conditional on that
  (e.g. along the lines of $ltop used elsewhere).  I'd welcome input
  from Alexandre which is preferred.

A few nits on the patch submission:

* Please review https://gcc.gnu.org/contribute.html for the syntax of
  subject lines: in the present case this should be something like

  [PATCH] testsuite: Fix test failures from outputs.exp [PR98225]

* Both the mail and the patch description should contain a
  self-contained description of the bug and the fix so potential
  reviewers don't have to re-read a (potentially excessively long)
  bugzilla report.

* Your ChangeLog entry isn't particularly helpful:

2021-01-07  Bernd Edlinger  

PR testsuite/98225
* gcc.misc-tests/outputs.exp: Fix test case.

  This tells the reader almost nothing.  Instead, it should list *what
  changed* in the patch; for the current patch something like

* gcc.misc-tests/outputs.exp: Unset MAKEFLAGS.
Always pass *.ld1_args tests.

  There's more than you ever wanted to know on ChangeLogs in the GNU
  Coding Standards ;-)

Thanks for working on this.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Add pytest for a GCOV test-case

2021-01-07 Thread Martin Liška

On 1/6/21 12:36 AM, Jeff Law wrote:

unresolved "could not find python interpreter $testcase" in
run-gcov-pytest if you find the right magic in the output of your spawn.


Achieved that with the updated patch.

Ready for master?
Thanks,
Martin
>From 53f5169156044acf8ecec498aa89d6be44c7173a Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Mon, 21 Dec 2020 09:14:28 +0100
Subject: [PATCH] Add pytest for a GCOV test-case

gcc/testsuite/ChangeLog:

	PR gcov-profile/98273
	* lib/gcov.exp: Add run-gcov-pytest function which runs pytest.
	* g++.dg/gcov/pr98273.C: New test.
	* g++.dg/gcov/gcov.py: New test.
	* g++.dg/gcov/test-pr98273.py: New test.
---
 gcc/testsuite/g++.dg/gcov/gcov.py | 10 +
 gcc/testsuite/g++.dg/gcov/pr98273.C   | 24 
 gcc/testsuite/g++.dg/gcov/test-pr98273.py | 27 ++
 gcc/testsuite/lib/gcov.exp| 45 +++
 4 files changed, 106 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov.py
 create mode 100644 gcc/testsuite/g++.dg/gcov/pr98273.C
 create mode 100644 gcc/testsuite/g++.dg/gcov/test-pr98273.py

diff --git a/gcc/testsuite/g++.dg/gcov/gcov.py b/gcc/testsuite/g++.dg/gcov/gcov.py
new file mode 100644
index 000..a8c4ea9ae71
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gcov/gcov.py
@@ -0,0 +1,10 @@
+import gzip
+import json
+import os
+
+
+def gcov_from_env():
+# return parsed JSON content a GCOV_PATH file
+json_filename = os.environ['GCOV_PATH'] + '.gcov.json.gz'
+json_data = gzip.open(json_filename).read()
+return json.loads(json_data)
diff --git a/gcc/testsuite/g++.dg/gcov/pr98273.C b/gcc/testsuite/g++.dg/gcov/pr98273.C
new file mode 100644
index 000..bfa83cbe4d0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gcov/pr98273.C
@@ -0,0 +1,24 @@
+/* PR gcov-profile/98273 */
+
+/* { dg-options "--coverage -std=c++11" } */
+/* { dg-do run { target native } } */
+
+int
+main ()
+{
+  int i = 42;
+  {
+auto f = [] () {
+  auto g = [] () {};
+  g ();
+  g ();
+};
+f ();
+  }
+  ++i;
+  ++i;
+  ++i;
+  return 45 - i;
+}
+
+/* { dg-final { run-gcov-pytest pr98273.C "test-pr98273.py" } } */
diff --git a/gcc/testsuite/g++.dg/gcov/test-pr98273.py b/gcc/testsuite/g++.dg/gcov/test-pr98273.py
new file mode 100644
index 000..6cb39d10c1e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gcov/test-pr98273.py
@@ -0,0 +1,27 @@
+from gcov import gcov_from_env
+
+import pytest
+
+
+@pytest.fixture(scope='function', autouse=True)
+def gcov():
+return gcov_from_env()
+
+
+def test_basics(gcov):
+files = gcov['files']
+assert len(files) == 1
+functions = files[0]['functions']
+assert len(functions) == 3
+
+
+def test_lines(gcov):
+lines = gcov['files'][0]['lines']
+linesdict = {}
+for line in lines:
+linesdict[int(line['line_number'])] = line
+
+assert linesdict[21]['function_name'] == 'main'
+assert linesdict[15]['function_name'] == '_ZZ4mainENKUlvE_clEv'
+assert (linesdict[12]['function_name']
+== '_ZZZ4mainENKUlvE_clEvENKUlvE_clEv')
diff --git a/gcc/testsuite/lib/gcov.exp b/gcc/testsuite/lib/gcov.exp
index bb956439cf7..4bcab1d3f1d 100644
--- a/gcc/testsuite/lib/gcov.exp
+++ b/gcc/testsuite/lib/gcov.exp
@@ -247,6 +247,51 @@ proc verify-calls { testname testcase file } {
 return $failed
 }
 
+# Call by dg-final to run gcov --json-format which produces a JSON file
+# that is later analysed by a pytest Python script.
+# We pass filename of a test via GCOV_PATH environment variable.
+
+proc run-gcov-pytest { args } {
+global GCOV
+global srcdir subdir
+# Extract the test file name from the arguments.
+set testcase [lindex $args 0]
+
+verbose "Running $GCOV $testcase in $srcdir/$subdir" 2
+set testcase [remote_download host $testcase]
+set result [remote_exec host $GCOV "$testcase -i"]
+
+set result [remote_exec host "pytest -m pytest --version"]
+set status [lindex $result 0]
+if { $status != 0 } then {
+  unresolved "could not find Python interpreter and (or) pytest module for $testcase"
+  return
+}
+
+set pytest_script [lindex $args 1]
+setenv GCOV_PATH $testcase
+verbose "pytest_script: $pytest_script" 2
+spawn -noecho python3 -m pytest --color=no -rA -s --tb=no $srcdir/$subdir/$pytest_script
+
+set prefix "\[^\r\n\]*"
+expect {
+  -re "FAILED($prefix)\[^\r\n\]+\r\n" {
+   fail "$expect_out(1,string)"
+   exp_continue
+  }
+  -re "ERROR($prefix)\[^\r\n\]+\r\n" {
+   fail "$expect_out(1,string)"
+   exp_continue
+  }
+  -re "PASSED($prefix)\[^\r\n\]+\r\n" {
+   pass "$expect_out(1,string)"
+   exp_continue
+  }
+}
+
+clean-gcov $testcase
+}
+
 # Called by dg-final to run gcov and analyze the results.
 #
 # ARGS consists of the optional strings "branches" and/or "calls",
-- 
2.29.2



Re: [PATCH] libphobos: Allow building libphobos using Solaris/x86 assembler

2021-01-07 Thread Rainer Orth
Hi Iain,

>> The Solaris assemblers don't support UTF-8 identifiers.  Unless gdc can
>> encode them in some way for toolchains like this (no idea if this is
>> worth the effort), it may be possible to guard the tests with the ucn
>> effective-target keyword.
>> 
>> Apart from that, it seems strange that the failing tests should only
>> show up as UNSUPPORTED.  I'd have expected the compilation to FAIL, but
>> IIRC the gdc testsuite has to ignore all output, so the test for excess
>> errors which would usually catch this is disabled effectively.
>
> Indeed, the testsuite is far too verbose.  Although many tests have a
> TEST_OUTPUT directive, converting them to a Dejagnu style is probably
> too much effort for the gain.
>
> Those tests can just be explicitly disabled, I'll look into that.

Great, thanks.

>> The last failure is different and due to how COMDAT group handling is
>> done with Solaris as:
>> 
>> +UNRESOLVED: gdc.test/runnable/test42.d compilation failed to produce
>> executable
>> +UNRESOLVED: gdc.test/runnable/test42.d -shared-libphobos compilation
>> failed to produce executable
>> 
>> which yields
>> 
>> Input string too long, limit 10240
>> 
>> The offending input lines are (stripped for brevity)
>> 
>>  .section.tdata._D6test42__T5Foo71VAyaa2623[...]
>>  .group  _D6test42__T5Foo71VAyaa2623_68656c6c6f616[...]
>> 
>> The first line is 10597 chars, the second even 15869.
>> 
>
> Is there a max symbol length macro available internally?  Maybe could

Not that I'm aware of.  I believe D tests are the first time ever that I
ran into this Solaris/x86 as limit.  One might try to iteratively
determine the value at configure time if this is helpful.  No idea if
other non-gas assemblers are even worse in that apartment.  E.g. the
Solaris/SPARC one has a considerably higher limit...

> just compress symbols using MD5 if they exceed a certain length...

That's certainly an easy option.  OTOH if this is unlikely to occur in
real-life code, once could just xfail the test on Solaris/x86 with as...

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


[PATCH] c++: Fix up tsubst of BIT_CAST_EXPR [PR98329]

2021-01-07 Thread Jakub Jelinek via Gcc-patches
Hi!

As the testcase shows, calling cp_build_bit_cast in tsubst_copy doesn't seem
to be a good idea, because tsubst_copy might not really make the operand
non-dependent, but as processing_template_decl can be 0,
type_dependent_expression_p will return false and then cp_build_bit_cast
assumes the type is non-NULL and non-dependent.
So, this patch just follows what is done e.g. for NOP_EXPR etc. and just
builds some tree in tsubst_copy, and only calls the semantics.c function
from tsubst_copy_and_build.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-01-07  Jakub Jelinek  

PR c++/98329
* pt.c (tsubst_copy) : Don't call
cp_build_bit_cast here, instead just build_min a BIT_CAST_EXPR and set
its location.
(tsubst_copy_and_build): Handle BIT_CAST_EXPR.

* g++.dg/cpp2a/bit-cast10.C: New test.

--- gcc/cp/pt.c.jj  2021-01-05 22:33:41.635816799 +0100
+++ gcc/cp/pt.c 2021-01-07 11:45:40.935610649 +0100
@@ -16796,7 +16796,9 @@ tsubst_copy (tree t, tree args, tsubst_f
   {
tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
tree op0 = tsubst_copy (TREE_OPERAND (t, 0), args, complain, in_decl);
-   return cp_build_bit_cast (EXPR_LOCATION (t), type, op0, complain);
+   r = build_min (BIT_CAST_EXPR, type, op0);
+   SET_EXPR_LOCATION (r, EXPR_LOCATION (t));
+   return r;
   }
 
 case SIZEOF_EXPR:
@@ -19631,6 +19633,13 @@ tsubst_copy_and_build (tree t,
RETURN (r);
   }
 
+case BIT_CAST_EXPR:
+  {
+   tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
+   tree op0 = RECUR (TREE_OPERAND (t, 0));
+   RETURN (cp_build_bit_cast (EXPR_LOCATION (t), type, op0, complain));
+  }
+
 case POSTDECREMENT_EXPR:
 case POSTINCREMENT_EXPR:
   op1 = tsubst_non_call_postfix_expression (TREE_OPERAND (t, 0),
--- gcc/testsuite/g++.dg/cpp2a/bit-cast10.C.jj  2021-01-07 11:48:02.998020930 
+0100
+++ gcc/testsuite/g++.dg/cpp2a/bit-cast10.C 2021-01-07 11:47:52.224141482 
+0100
@@ -0,0 +1,42 @@
+// PR c++/98329
+// { dg-do compile { target c++20 } }
+
+template 
+constexpr To
+foo (const From &from)
+{
+  return __builtin_bit_cast (To, &from);
+}
+
+template 
+constexpr To
+bar (const From &from)
+{
+  return __builtin_bit_cast (To, *from);
+}
+
+template 
+constexpr To
+baz (const From &from)
+{
+  return __builtin_bit_cast (To, **from);
+}
+
+template 
+constexpr To
+qux (const From &from)
+{
+  return __builtin_bit_cast (To, -from);
+}
+
+void
+test ()
+{
+  int i = 0;
+  int *j = &i;
+  int **k = &j;
+  foo  (i);
+  bar  (j);
+  baz  (k);
+  qux  (i);
+}

Jakub



Re: [PATCH v3] libgcc: Thumb-1 Floating-Point Library for Cortex M0

2021-01-07 Thread Richard Earnshaw via Gcc-patches
On 07/01/2021 13:27, Christophe Lyon via Gcc-patches wrote:
> On Thu, 7 Jan 2021 at 13:56, Richard Earnshaw
>  wrote:
>>
>> On 07/01/2021 00:59, Daniel Engel wrote:
>>> --snip--
>>>
>>> On Wed, Jan 6, 2021, at 9:05 AM, Richard Earnshaw wrote:
>>>

 Thanks for working on this, Daniel.

 This is clearly stage1 material, so we've got time for a couple of
 iterations to sort things out.
>>>
>>> I appreciate your feedback.  I had been hoping that with no regressions
>>> this might still be eligible for stage2.  Christophe never indicated
>>> either way. but the fact that he was looking at it seemed positive.
>>> I thought I would be a couple of weeks faster with this last
>>> iteration, but holidays got in the way.
>>
>> GCC doesn't have a stage 2 any more (historical wart).  We were in
>> (late) stage3 when this was first posted, and because of the significant
>> impact this might have on not just CM0 but other targets as well, I
>> don't think it's something we should try to squeeze in at the last
>> minute.  We're now in stage 4, so that is doubly the case.
>>
>> Christophe is a very valuable member of our community, but he's not a
>> port maintainer and thus cannot really rule on what can go into the
>> tools, or when.
>>
>>>
>>> I actually think your comments below could all be addressable within a
>>> couple of days.  But, I'm not accounting for the review process.
>>>
 Firstly, the patch is very large, but contains a large number of
 distinct changes, so it would really benefit from being broken down into
 a number of distinct patches.  This will make reviewing the individual
 changes much more straight-forward.
>>>
>>> I have no context for "large" or "small" with respect to gcc.  This
>>> patch comprises about 30% of a previously-monolithic library that's
>>> been shipping since ~2016 (the rest is libm material).  Other than
>>> (1) the aforementioned change to div0(), (2) a nascent adaptation
>>> for __truncdfsf2() (not enabled), and (3) the gratuitous addition of
>>> the bitwise functions, the library remains pretty much as it was
>>> originally released.
>>
>> Large, like many other terms is relative.  For assembler file changes,
>> which this is primarily, the overall size can be much smaller and still
>> be considered 'large'.
>>
>>>
>>> The driving force in the development of this library was small size,
>>> which of course was never possible with the softfp routines.  It's not
>>> half-slow, either, for the limitations of the M0 architecture.   And,
>>> it's IEEE compliant.  But, that means that most of the functions are
>>> highly interconnected.  So, some of it can be broken up as you outline
>>> below, but that last patch is still worth more than half of the total.
>>
>> Nevertheless, having the floating-point code separated out will make
>> reviewing more straight forward.  I'll likely need to ask one of our FP
>> experts to have a specific look at that part and that will be easier if
>> it is disentangled from the other changes.
>>
>>>
>>> I also have ~70k lines of test vectors that seem mostly redundant, but
>>> not completely.  I haven't decided what to do here.  For example, I have
>>> coverage for __aeabi_u/ldivmod, while GCC does not.  If I do anything
>>> with this code it will be in a separate thread.
>>
>> Publishing the test code, even if it isn't integrated into the GCC
>> testsuite would be useful.  Perhaps someone else could then help with that.
>>
>>>
 I'd suggest:

 1) Some basic makefile cleanups to ease initial integration - in
 particular where we have things like

 LIB1FUNCS += 

 that this be rewritten with one function per line (and sorted
 alphabetically) - then we can see which functions are being changed in
 subsequent patches.  It makes the Makefile fragments longer, but the
 improvement in clarity for makes this worthwhile.
>>>
>>> I know next to nothing about Makefiles, particularly ones as complex as
>>> GCC's.  I was just trying to work with the existing style to avoid
>>> breaking something.  However, I can certainly adopt this suggestion.
>>>
 2) The changes for the existing integer functions - preferably one
 function per patch.

 3) The new integer functions that you're adding
>>>
>>> These wouldn't be too hard to do, but what are the expectations for
>>> testing?  A clean build of GCC takes about 6 hours in my VM, and
>>> regression testing takes about 4 hours per architecture.  You would want
>>> a full regression report for each incremental patch?  I have no idea how
>>> to target regression tests that apply to particular runtime functions
>>> without the risk of missing something.
>>>
>>
>> Most of this can be tested in a cross-compile environment using qemu as
>> a model.  A cross build shouldn't take that long (especially if you
>> restrict the compiler to just C and C++ - other languages are
>> vanishingly unlikely to pick up errors in the parts of the compi

[PATCH] c++, abi: Fix abi_tag attribute handling [PR98481]

2021-01-07 Thread Jakub Jelinek via Gcc-patches
Hi!

In GCC10 cp_walk_subtrees has been changed to walk template arguments.
As the following testcase, that changed the mangling of some functions.
I believe the previous behavior that find_abi_tags_r doesn't recurse into
template args has been the correct one, but setting *walk_subtrees = 0
for the types and handling the types subtree walking manually in
find_abi_tags_r looks too hard, there are a lot of subtrees and details what
should and shouldn't be walked, both in tree.c (walk_type_fields there,
which is static) and in cp_walk_subtrees itself.

The following patch abuses the fact that *walk_subtrees is an int to
tell cp_walk_subtrees it shouldn't walk the template args.

Another option would be to have two separate cp_walk_subtrees-like
callbacks, one that wouldn't walk into template args and the other
that would and then would tail call the other one, and
cp_walk_tree_without_duplicates but call walk_tree_1 directly or use
some other macro.

Now that I look at it, likely mark_abi_tags_r should behave the same way.

Bootstrapped/regtested on x86_64-linux and i686-linux.

2021-01-07  Jakub Jelinek  

PR c++/98481
* tree.c (cp_walk_subtrees): Don't walk template args if
*walk_subtrees_p is 2.
* class.c (find_abi_tags_r): Set *walk_subtrees to 2 instead of 1
for types.

* g++.dg/abi/abi-tag24.C: New test.

--- gcc/cp/tree.c.jj2021-01-04 10:25:49.102117545 +0100
+++ gcc/cp/tree.c   2021-01-07 12:43:17.674974823 +0100
@@ -5147,8 +5147,9 @@ cp_walk_subtrees (tree *tp, int *walk_su
   if (TYPE_P (*tp))
 {
   /* Walk into template args without looking through typedefs.  */
-  if (tree ti = TYPE_TEMPLATE_INFO_MAYBE_ALIAS (*tp))
-   WALK_SUBTREE (TI_ARGS (ti));
+  if (*walk_subtrees_p != 2)
+   if (tree ti = TYPE_TEMPLATE_INFO_MAYBE_ALIAS (*tp))
+ WALK_SUBTREE (TI_ARGS (ti));
   /* Don't look through typedefs; walk_tree_fns that want to look through
 typedefs (like min_vis_r) need to do that themselves.  */
   if (typedef_variant_p (*tp))
--- gcc/cp/class.c.jj   2021-01-04 10:25:48.933119459 +0100
+++ gcc/cp/class.c  2021-01-07 12:50:51.723881933 +0100
@@ -1508,7 +1508,12 @@ static tree
 find_abi_tags_r (tree *tp, int *walk_subtrees, void *data)
 {
   if (!OVERLOAD_TYPE_P (*tp))
-return NULL_TREE;
+{
+  if (TYPE_P (*tp) && *walk_subtrees == 1)
+   /* Tell cp_walk_subtrees not to walk into template args.  */
+   *walk_subtrees = 2;
+  return NULL_TREE;
+}
 
   /* walk_tree shouldn't be walking into any subtrees of a RECORD_TYPE
  anyway, but let's make sure of it.  */
--- gcc/testsuite/g++.dg/abi/abi-tag24.C.jj 2021-01-07 12:58:12.128942173 
+0100
+++ gcc/testsuite/g++.dg/abi/abi-tag24.C2021-01-07 12:58:47.995539911 
+0100
@@ -0,0 +1,17 @@
+// PR c++/98481
+// { dg-do compile { target c++11 } }
+inline namespace N __attribute ((__abi_tag__ ("myabi")))
+{
+  struct A {};
+}
+template 
+struct B { typedef int size_type; };
+struct S1 { B::size_type foo () const { return 1; } };
+struct S2 { B::size_type foo () const; };
+int S2::foo () const { return 2; }
+int (S1::*f1) () const = &S1::foo;
+int (S2::*f2) () const = &S2::foo;
+
+// { dg-final { scan-assembler "_ZNK2S13fooEv" } }
+// { dg-final { scan-assembler "_ZNK2S23fooEv" } }
+// { dg-final { scan-assembler-not "_ZNK2S13fooB5myabiEv" } }

Jakub



[committed] d: Merge upstream dmd 9038e64c5.

2021-01-07 Thread Iain Buclaw via Gcc-patches
Hi,

This patch  adds support for using user-defined attributes on function
arguments and single-parameter alias declarations.  These attributes
behave analogous to existing UDAs.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32, and
committed to mainline.

Regards
Iain.

---
gcc/d/ChangeLog:

* dmd/MERGE: Merge upstream dmd 9038e64c5.
* d-builtins.cc (build_frontend_type): Update call to
Parameter::create.
---
 gcc/d/d-builtins.cc   |   2 +-
 gcc/d/dmd/MERGE   |   2 +-
 gcc/d/dmd/arrayop.c   |   8 +-
 gcc/d/dmd/clone.c |  16 +-
 gcc/d/dmd/cond.c  |   2 +-
 gcc/d/dmd/declaration.c   |   2 +-
 gcc/d/dmd/dtemplate.c |   2 +-
 gcc/d/dmd/expression.c|   2 +-
 gcc/d/dmd/expressionsem.c |   5 +-
 gcc/d/dmd/func.c  |  24 ++-
 gcc/d/dmd/hdrgen.c|  12 ++
 gcc/d/dmd/mtype.c |  30 +--
 gcc/d/dmd/mtype.h |   7 +-
 gcc/d/dmd/parse.c | 113 --
 gcc/d/dmd/statementsem.c  |  24 +--
 gcc/d/dmd/traits.c|  39 +++-
 .../gdc.test/compilable/extra-files/header1.d |  18 ++
 .../gdc.test/compilable/testheaderudamodule.d |   2 +
 .../gdc.test/fail_compilation/fail10207.d |   2 +-
 gcc/testsuite/gdc.test/runnable/uda.d | 194 ++
 20 files changed, 432 insertions(+), 74 deletions(-)

diff --git a/gcc/d/d-builtins.cc b/gcc/d/d-builtins.cc
index 26ccd00c79a..3f1533b592f 100644
--- a/gcc/d/d-builtins.cc
+++ b/gcc/d/d-builtins.cc
@@ -311,7 +311,7 @@ build_frontend_type (tree type)
  return NULL;
}
 
- args->push (Parameter::create (sc, targ, NULL, NULL));
+ args->push (Parameter::create (sc, targ, NULL, NULL, NULL));
}
 
  /* GCC generic and placeholder built-ins are marked as variadic, yet
diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE
index 1629b4535ee..25b2b3ac965 100644
--- a/gcc/d/dmd/MERGE
+++ b/gcc/d/dmd/MERGE
@@ -1,4 +1,4 @@
-a5c86f5b92c4cd3afde910c89881ccaea11de554
+9038e64c5b67a10763d32893f53bb6c610df3595
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/dmd repository.
diff --git a/gcc/d/dmd/arrayop.c b/gcc/d/dmd/arrayop.c
index 72abd5e9b30..20cdb6fd184 100644
--- a/gcc/d/dmd/arrayop.c
+++ b/gcc/d/dmd/arrayop.c
@@ -51,7 +51,7 @@ FuncDeclaration *buildArrayOp(Identifier *ident, BinExp *exp, 
Scope *sc)
 Parameter *p = (*fparams)[0];
 // foreach (i; 0 .. p.length)
 Statement *s1 = new ForeachRangeStatement(Loc(), TOKforeach,
-new Parameter(0, NULL, Id::p, NULL),
+new Parameter(0, NULL, Id::p, NULL, NULL),
 new IntegerExp(Loc(), 0, Type::tsize_t),
 new ArrayLengthExp(Loc(), new IdentifierExp(Loc(), p->ident)),
 new ExpStatement(Loc(), loopbody),
@@ -422,7 +422,7 @@ Expression *buildArrayLoop(Expression *e, Parameters 
*fparams)
 void visit(Expression *e)
 {
 Identifier *id = Identifier::generateId("c", fparams->length);
-Parameter *param = new Parameter(0, e->type, id, NULL);
+Parameter *param = new Parameter(0, e->type, id, NULL, NULL);
 fparams->shift(param);
 result = new IdentifierExp(Loc(), id);
 }
@@ -441,7 +441,7 @@ Expression *buildArrayLoop(Expression *e, Parameters 
*fparams)
 void visit(ArrayLiteralExp *e)
 {
 Identifier *id = Identifier::generateId("p", fparams->length);
-Parameter *param = new Parameter(STCconst, e->type, id, NULL);
+Parameter *param = new Parameter(STCconst, e->type, id, NULL, 
NULL);
 fparams->shift(param);
 Expression *ie = new IdentifierExp(Loc(), id);
 Expression *index = new IdentifierExp(Loc(), Id::p);
@@ -451,7 +451,7 @@ Expression *buildArrayLoop(Expression *e, Parameters 
*fparams)
 void visit(SliceExp *e)
 {
 Identifier *id = Identifier::generateId("p", fparams->length);
-Parameter *param = new Parameter(STCconst, e->type, id, NULL);
+Parameter *param = new Parameter(STCconst, e->type, id, NULL, 
NULL);
 fparams->shift(param);
 Expression *ie = new IdentifierExp(Loc(), id);
 Expression *index = new IdentifierExp(Loc(), Id::p);
diff --git a/gcc/d/dmd/clone.c b/gcc/d/dmd/clone.c
index dd22fb340f5..73c4a660368 100644
--- a/gcc/d/dmd/clone.c
+++ b/gcc/d/dmd/clone.c
@@ -244,7 +244,7 @@ FuncDeclaration *buildOpAssign(StructDeclaration *sd, Scope 
*sc)
 }
 
 Parameters *fparams = new Parameters;
-fparams->push(new Parameter(STCnodtor, sd->type, Id::p, NULL));
+fparams->push(n

Re: [PATCH] libstdc++: Fix long double to_chars testcase [PR98384]

2021-01-07 Thread Jonathan Wakely via Gcc-patches

On 07/01/21 10:37 -0500, Patrick Palka via Libstdc++ wrote:

The testcase was failing to compile on some targets due to its use of
the non-standard functions nextupl and nextdownl.  This patch makes the
testcase instead use the C99 function nexttowardl in an equivalent
manner.

libstdc++-v3/ChangeLog:

PR libstdc++/98384
* testsuite/20_util/to_chars/long_double.cc: Use nexttowardl
instead of the non-standard nextupl and nextdownl.


OK, thanks.



Re: [PATCH] libphobos: Allow building libphobos using Solaris/x86 assembler

2021-01-07 Thread Iain Buclaw via Gcc-patches
Excerpts from Rainer Orth's message of January 7, 2021 5:17 pm:
> Hi Iain,
> 
>>> The Solaris assemblers don't support UTF-8 identifiers.  Unless gdc can
>>> encode them in some way for toolchains like this (no idea if this is
>>> worth the effort), it may be possible to guard the tests with the ucn
>>> effective-target keyword.
>>> 
>>> Apart from that, it seems strange that the failing tests should only
>>> show up as UNSUPPORTED.  I'd have expected the compilation to FAIL, but
>>> IIRC the gdc testsuite has to ignore all output, so the test for excess
>>> errors which would usually catch this is disabled effectively.
>>
>> Indeed, the testsuite is far too verbose.  Although many tests have a
>> TEST_OUTPUT directive, converting them to a Dejagnu style is probably
>> too much effort for the gain.
>>
>> Those tests can just be explicitly disabled, I'll look into that.
> 
> Great, thanks.
> 
>>> The last failure is different and due to how COMDAT group handling is
>>> done with Solaris as:
>>> 
>>> +UNRESOLVED: gdc.test/runnable/test42.d compilation failed to produce
>>> executable
>>> +UNRESOLVED: gdc.test/runnable/test42.d -shared-libphobos compilation
>>> failed to produce executable
>>> 
>>> which yields
>>> 
>>> Input string too long, limit 10240
>>> 
>>> The offending input lines are (stripped for brevity)
>>> 
>>> .section.tdata._D6test42__T5Foo71VAyaa2623[...]
>>> .group  _D6test42__T5Foo71VAyaa2623_68656c6c6f616[...]
>>> 
>>> The first line is 10597 chars, the second even 15869.
>>> 
>>
>> Is there a max symbol length macro available internally?  Maybe could
> 
> Not that I'm aware of.  I believe D tests are the first time ever that I
> ran into this Solaris/x86 as limit.  One might try to iteratively
> determine the value at configure time if this is helpful.  No idea if
> other non-gas assemblers are even worse in that apartment.  E.g. the
> Solaris/SPARC one has a considerably higher limit...
> 
>> just compress symbols using MD5 if they exceed a certain length...
> 
> That's certainly an easy option.  OTOH if this is unlikely to occur in
> real-life code, once could just xfail the test on Solaris/x86 with as...
> 

Looking at the test which generates that symbol, it is very much a
contrived example that deliberately exceeds the limit of another linker.
However given the size and use of meta and templates in some production
codebases out there, I can't say that symbols like it for sure won't
appear in real-life code.  Though it's going to be unlikely said code
will be compiled for Solaris anyway.

Iain.


Re: Patch RFA: Support non-ASCII file names in git-changelog

2021-01-07 Thread Ian Lance Taylor via Gcc-patches
On Wed, Jan 6, 2021 at 5:37 AM Martin Liška  wrote:
>
> On 1/6/21 8:25 AM, Martin Liška wrote:
> > Anyway, I've got a workaround that I'm going to push.
>
> It's fixed now.
>
> @Ian: Can you please try to push the changes now?

It worked.

Thanks.

Ian
b87ec922c4090fcacf802c73b6bfd59a8632f8a5
diff --git 
"a/gcc/testsuite/go.test/test/fixedbugs/issue27836.dir/\303\204foo.go" 
"b/gcc/testsuite/go.test/test/fixedbugs/issue27836.dir/\303\204foo.go"
new file mode 100644
index 000..8b6a814c3c4
--- /dev/null
+++ "b/gcc/testsuite/go.test/test/fixedbugs/issue27836.dir/\303\204foo.go"
@@ -0,0 +1,13 @@
+package Äfoo
+
+var ÄbarV int = 101
+
+func Äbar(x int) int {
+   defer func() { ÄbarV += 3 }()
+   return Äblix(x)
+}
+
+func Äblix(x int) int {
+   defer func() { ÄbarV += 9 }()
+   return ÄbarV + x
+}
diff --git 
"a/gcc/testsuite/go.test/test/fixedbugs/issue27836.dir/\303\204main.go" 
"b/gcc/testsuite/go.test/test/fixedbugs/issue27836.dir/\303\204main.go"
new file mode 100644
index 000..25d2c71fc00
--- /dev/null
+++ "b/gcc/testsuite/go.test/test/fixedbugs/issue27836.dir/\303\204main.go"
@@ -0,0 +1,13 @@
+package main
+
+import (
+   "fmt"
+
+   "./Äfoo"
+   Äblix "./Äfoo"
+)
+
+func main() {
+   fmt.Printf("Äfoo.Äbar(33) returns %v\n", Äfoo.Äbar(33))
+   fmt.Printf("Äblix.Äbar(33) returns %v\n", Äblix.Äbar(33))
+}
diff --git a/gcc/testsuite/go.test/test/fixedbugs/issue27836.go 
b/gcc/testsuite/go.test/test/fixedbugs/issue27836.go
new file mode 100644
index 000..128cf9d06ad
--- /dev/null
+++ b/gcc/testsuite/go.test/test/fixedbugs/issue27836.go
@@ -0,0 +1,7 @@
+// compiledir
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ignored


Re: [PATCH] Fix array-quals-1.c for RISC-V

2021-01-07 Thread Jim Wilson
On Wed, Jan 6, 2021 at 1:17 AM Kito Cheng  wrote:

> RISC-V will put those variable on srodata rather than rodata.
> gcc/testsuite/ChangeLog:
> * gcc.dg/array-quals-1.c: Allow srodata.
>

OK.

Jim


Re: [OG10] Fortran: delinearize multi-dimensional array accesses

2021-01-07 Thread Sandra Loosemore

On 12/26/20 3:41 AM, Thomas Koenig wrote:


Hi Sandra,

The attached patch implements delinearization of array accesses in the 
Fortran front end, something that has been discussed for a long time.


Definitely - among others, this is the subject of PR 14741, which is by
now quite historic.

I've been asked to try to get this patch committed on the OG10 branch 
since it is blocking some further optimization work with Graphite for 
OpenACC kernels regions.  I have a mainline version of this patch as 
well that I can send to anyone interested in trying it out, but TBH, I 
don't think this is ready for mainline yet.


That would be indeed interesting.  Could you post that to the list as
well?


Attached to this mail.


The current status is that
there are still two gfortran tests that are regressing 
(gfortran.dg/graphite/id-9.f and 
gfortran.dg/vect/fast-math-mgrid-resid.f), and while it's been 
confirmed that this helps with Graphite optimizations as intended, we 
haven't yet run any benchmarks to confirm that it doesn't make other 
things slower. 


It is probably too late; this could go in for the next stage 1.


Yes, I hope we can get this more polished by that time.


Does the patch actually make loop interchange for matrix multiplication
work (done with C for loops or Fortran DO loops), or is there additional
work required?


There's some additional work required, and probably not by me since I 
know next to nothing about graphite or GCC's loop optimization framework 
generally.  :-(  (The motivation for this patch at this time is that we 
need the delinearization for some ongoing OpenACC parallelization work.)



Regarding scalarized loops: We still to not collapse loops for

   subroutine foo(a)
     real, dimension(:,:), contiguous :: a
     a = 5.
   end subroutine foo

so an extension to scalarized loops would be quite valuable.


Yes.  I guesstimated that would be a medium-sized project since the 
scalarized references are generated by a completely different code path.



It would be interesting to see if

   subroutine foo(a,n,m)
     real, dimension(n,m) :: a
     do j=1,m
   do i=1,n
     a(i,j) = 5.
   end do
     end do
   end subroutine foo

is collapsed to a single loop with the patch and Graphite.


This one also requires some additional work.  At least Graphite 
recognizes the SCoP around the loop nest with this patch which it did 
not do without delinearization.


-Sandra
>From 2e714885f2aab9c24b6f3d37c995e7e4a5913ee5 Mon Sep 17 00:00:00 2001
From: Sandra Loosemore 
Date: Thu, 7 Jan 2021 11:09:03 -0800
Subject: [PATCH] Fortran: delinearize multi-dimensional array accesses

The Fortran front end presently linearizes accesses to
multi-dimensional arrays by combining the indices for the various
dimensions into a series of explicit multiplies and adds with
refactoring to allow CSE of invariant parts of the computation.
Unfortunately this representation interferes with Graphite-based loop
optimizations.  It is difficult to recover the original
multi-dimensional form of the access by the time loop optimizations
run because parts of it have already been optimized away or into a
form that is not easily recognizable, so it seems better to have the
Fortran front end produce delinearized accesses to begin with, a set
of nested ARRAY_REFs similar to the existing behavior of the C and C++
front ends.  This is a long-standing problem that has previously been
discussed e.g. in PR 14741 and PR61000.

This patch is an initial implementation for explicit array accesses
only; it doesn't handle the accesses generated during scalarization of
whole-array or array-section operations, which follow a different code
path.

2020-12-17  Sandra Loosemore  
	Tobias Burnus  

	gcc/
	* expr.c (get_inner_reference): Handle NOP_EXPR like
	VIEW_CONVERT_EXPR.

	gcc/fortran/
	* lang.opt (-param=delinearize=): New.
	* trans-array.c (get_class_array_vptr): New, split from...
	(build_array_ref): ...here.
	(get_array_lbound, get_array_ubound): New, split from...
	(gfc_conv_array_ref): ...here.  Additional code refactoring
	plus support for delinearization of the array access.

	gcc/testsuite/
	* gfortran.dg/assumed_type_2.f90: Adjust patterns.
	* gfortran.dg/goacc/kernels-loop-inner.f95: Likewise.
	* gfortran.dg/graphite/block-3.f90: Remove xfails.
	* gfortran.dg/graphite/block-4.f90: Likewise.
	* gfortran.dg/inline_matmul_24.f90: Adjust patterns.
	* gfortran.dg/no_arg_check_2.f90: Likewise.
	* gfortran.dg/pr32921.f: Likewise.
	* gfortran.dg/reassoc_4.f: Disable delinearization for this test.
---
 gcc/expr.c |   1 +
 gcc/fortran/lang.opt   |   4 +
 gcc/fortran/trans-array.c  | 321 -
 gcc/testsuite/gfortran.dg/assumed_type_2.f90   |   6 +-
 .../gfortran.dg/goacc/kernels-loop-inner.f95   |   2 +-
 gcc/testsuite/gfortran.dg/graphite/block-3.f90 |   1 -
 gcc/testsuite/gfortran.dg/graphite/blo

Re: [PATCH v2 0/2] RISC-V: Introduce new architecture extension test macros

2021-01-07 Thread Jim Wilson
On Thu, Jan 7, 2021 at 1:55 AM Kito Cheng  wrote:

> This patch set introduce new set of architecture extension test macros
> which is accept on riscv-c-api-doc[1] recently.
>
> The motivation of this scheme is have an unify naming scheme for
> extension macro and add the capability to checking version.
>
> V2 Changes:
> - Fix MacOS build issue.
> - Create new header file: riscv-subset.h
>

This patch series looks good to me.

Jim


[committed] analyzer: fix ICE when DECL_INITIAL is error_mark_node [PR98580]

2021-01-07 Thread David Malcolm via Gcc-patches
lto-streamer-out.c's get_symbol_initial_value can return error_mark_node
rather than DECL_INITIAL as an optimization to avoid extra sections for
simple scalar values.

Add a check to the analyzer to handle such cases gracefully.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-6527-g0677759f753d321bde52d7343227f842b7e759d2.

gcc/analyzer/ChangeLog:
PR analyzer/98580
* region.cc (decl_region::get_svalue_for_initializer): Gracefully
handle when LTO writes out DECL_INITIAL as error_mark_node.

gcc/testsuite/ChangeLog:
PR analyzer/98580
* gcc.dg/analyzer/pr98580-a.c: New test.
* gcc.dg/analyzer/pr98580-b.c: New test.
---
 gcc/analyzer/region.cc| 5 +
 gcc/testsuite/gcc.dg/analyzer/pr98580-a.c | 9 +
 gcc/testsuite/gcc.dg/analyzer/pr98580-b.c | 2 ++
 3 files changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr98580-a.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr98580-b.c

diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc
index aefc389edd3..6db1fc91afd 100644
--- a/gcc/analyzer/region.cc
+++ b/gcc/analyzer/region.cc
@@ -969,6 +969,11 @@ decl_region::get_svalue_for_initializer 
(region_model_manager *mgr) const
 c.get_map ());
 }
 
+  /* LTO can write out error_mark_node as the DECL_INITIAL for simple scalar
+ values (to avoid writing out an extra section).  */
+  if (init == error_mark_node)
+return NULL;
+
   if (TREE_CODE (init) == CONSTRUCTOR)
 return get_svalue_for_constructor (init, mgr);
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr98580-a.c 
b/gcc/testsuite/gcc.dg/analyzer/pr98580-a.c
new file mode 100644
index 000..d2b10d6df2f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr98580-a.c
@@ -0,0 +1,9 @@
+/* { dg-do link } */
+/* { dg-require-effective-target lto } */
+/* { dg-additional-options "-flto" } */
+/* { dg-additional-sources pr98580-b.c } */
+
+int a;
+int *p = &a;
+int foo();
+int main() { return foo(); }
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr98580-b.c 
b/gcc/testsuite/gcc.dg/analyzer/pr98580-b.c
new file mode 100644
index 000..629ebcec3c8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/pr98580-b.c
@@ -0,0 +1,2 @@
+extern int *p;
+int foo() { return *p; }
-- 
2.26.2



Re: [PATCH] libstdc++: Add support for C++20 barriers

2021-01-07 Thread Thomas Rodgers via Gcc-patches


Tested x86_64-pc-linux-gnu, committed to master.

Jonathan Wakely writes:

> On 17/12/20 15:37 -0800, Thomas Rodgers wrote:
>>From: Thomas Rodgers 
>>
>>Cleans up a few things mentioned on IRC.
>>
>>Adds 
>>
>>libstdc++/ChangeLog:
>>
>>  * doc/doxygen/user.cfg.in: Add new header.
>>  * include/Makefile.am (std_headers): likewise.
>>  * include/Makefile.in: Regenerate.
>>* include/precompiled/stdc++.h: Add new header.
>>  * include/std/barrier: New file.
>>  * include/std/version: Add __cpp_lib_barrier feature test macro.
>>  * testsuite/30_thread/barrier/1.cc: New test.
>>  * testsuite/30_thread/barrier/2.cc: Likewise.
>>  * testsuite/30_thread/barrier/arrive_and_drop.cc: Likewise.
>>  * testsuite/30_thread/barrier/arrive_and_wait.cc: Likewise.
>>  * testsuite/30_thread/barrier/arrive.cc: Likewise.
>>  * testsuite/30_thread/barrier/completion.cc: Likewise.
>>  * testsuite/30_thread/barrier/max.cc: Likewise.
>
>
>>+#ifndef _GLIBCXX_BARRIER
>>+#define _GLIBCXX_BARRIER 1
>>+
>>+#pragma GCC system_header
>>+
>>+#if __cplusplus > 201703L
>>+#include 
>>+#if __cpp_lib_atomic_wait  && __cpp_aligned_new
>
> There's an extra space here before the && operator.
>
>>+#endif // __cpp_lib_atomic_wait  && __cpp_aligned_new
>
> And here.
>
>>+#endif // __cplusplus > 201703L
>>+#endif // _GLIBCXX_BARRIER
>>+
>>diff --git a/libstdc++-v3/include/std/version 
>>b/libstdc++-v3/include/std/version
>>index e4a8bed52ab..07d17433c5b 100644
>>--- a/libstdc++-v3/include/std/version
>>+++ b/libstdc++-v3/include/std/version
>>@@ -200,6 +200,9 @@
>> #if defined _GLIBCXX_HAS_GTHREADS || defined _GLIBCXX_HAVE_LINUX_FUTEX
>> # define __cpp_lib_atomic_wait 201907L
>> #endif
>>+#if __cpp_lib_atomic_wait
>
> This needs to match the condition used in .
>
>>+#define __cpp_lib_barrier 201907L
>>+#endif
>
> You could just put it inside the previous block where
> __cpp_lib_atomic_wait is defined:
>
> #if defined _GLIBCXX_HAS_GTHREADS || defined _GLIBCXX_HAVE_LINUX_FUTEX
> # define __cpp_lib_atomic_wait 201907L
> # if __cpp_aligned_new
> #  define __cpp_lib_barrier 201907L
> # endif
> #endif
>
>> #define __cpp_lib_bind_front 201907L
>> #if __has_builtin(__builtin_bit_cast)
>> # define __cpp_lib_bit_cast 201806L
>>diff --git a/libstdc++-v3/testsuite/30_threads/barrier/1.cc 
>>b/libstdc++-v3/testsuite/30_threads/barrier/1.cc
>>new file mode 100644
>>index 000..0b38160a58b
>>--- /dev/null
>>+++ b/libstdc++-v3/testsuite/30_threads/barrier/1.cc
>>@@ -0,0 +1,27 @@
>>+// Copyright (C) 2020 Free Software Foundation, Inc.
>>+//
>>+// This file is part of the GNU ISO C++ Library.  This library is free
>>+// software; you can redistribute it and/or modify it under the
>>+// terms of the GNU General Public License as published by the
>>+// Free Software Foundation; either version 3, or (at your option)
>>+// any later version.
>>+
>>+// This library is distributed in the hope that it will be useful,
>>+// but WITHOUT ANY WARRANTY; without even the implied warranty of
>>+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>+// GNU General Public License for more details.
>>+
>>+// You should have received a copy of the GNU General Public License along
>>+// with this library; see the file COPYING3.  If not see
>>+// .
>>+
>>+// { dg-options "-std=gnu++2a" }
>>+// { dg-do compile { target c++2a } }
>
> This test will fail for non-gthreads targets, because they don't
> define the macro. This needs the same condition as the similar
> 29_atomics/atomic/wait_notify/1.cc test:
>
> // { dg-require-effective-target gthreads }
>
> (which is the new way to say { dg-requires-gthread "" })
>
>>+#include 
>>+
>>+#ifndef __cpp_lib_barrier
>>+# error "Feature-test macro for barrier missing in "
>>+#elif __cpp_lib_barrier != 201907L
>>+# error "Feature-test macro for barrier has wrong value in "
>>+#endif
>>diff --git a/libstdc++-v3/testsuite/30_threads/barrier/2.cc 
>>b/libstdc++-v3/testsuite/30_threads/barrier/2.cc
>>new file mode 100644
>>index 000..1d8d83639e0
>>--- /dev/null
>>+++ b/libstdc++-v3/testsuite/30_threads/barrier/2.cc
>>@@ -0,0 +1,27 @@
>>+// Copyright (C) 2019-2020 Free Software Foundation, Inc.
>>+//
>>+// This file is part of the GNU ISO C++ Library.  This library is free
>>+// software; you can redistribute it and/or modify it under the
>>+// terms of the GNU General Public License as published by the
>>+// Free Software Foundation; either version 3, or (at your option)
>>+// any later version.
>>+
>>+// This library is distributed in the hope that it will be useful,
>>+// but WITHOUT ANY WARRANTY; without even the implied warranty of
>>+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>+// GNU General Public License for more details.
>>+
>>+// You should have received a copy of the GNU General Public License along
>>+// with this library; see the file COPYING3.  If not see
>>+// .
>>+
>>+// { 

Re: [PATCH] c++: private inheritance access diagnostics fix [PR17314]

2021-01-07 Thread Jason Merrill via Gcc-patches

On 1/5/21 9:24 AM, Anthony Sharp via Gcc-patches wrote:

This patch fixes PR17314 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17314).
Previously, when class C attempted to access member a declared in class A
through class B, where class B privately inherits from A and class C inherits
from B, GCC would correctly report an access violation, but would erroneously
report that the reason was because a was "protected", when in fact, from the
point of view of class C, it was "private". This patch updates the
diagnostics code to generate more correct errors in cases of failed
inheritance such as these.

The reason this bug happened was because GCC was examining the
declared access of decl, instead of looking at it in the context of class
inheritance.

--- COMMENTS ---

This is my first GCC patch ever


Thanks, and welcome!

To start with, do you have a copyright assignment on file or in the 
works already?  If not, see the top of


  https://gcc.gnu.org/contribute.html

for more information.  I probably shouldn't look at the patch in detail 
until that's addressed.


Second, your patch was mangled by word wrap so that it can't be applied 
without manual repair.  If you can't prevent word wrap in your mail 
client, please send it as an attachment rather than inline.


Also, there are a few whitespace issues in the patch; please run 
contrib/check_GNU_style.sh on the patch before submitting.



so there is probably something I have done
very wrong. Please let me know :) The thought of my code being scrutinised
by people with PhDs and doctorates is quite frankly terrifying.

Note that since it is a new year I had to make a new changelog file so the
diff for the patch might be slightly off.


If you use contrib/gcc-git-customization.sh and then git 
gcc-commit-mklog you don't need to touch ChangeLog files at all, just 
adjust the generated ChangeLog entries in the git commit message.  I 
personally tend to commit first with a placeholder message and then use 
git gcc-commit-mklog --amend to generate the ChangeLog entries.



There was no need to add additional regression tests since it was adequate
to simply change some of the regression tests that were there originally
(all the patch changes is the informative message telling the user where
a decl was defined as private).


Agreed.


--- REGRESSION ANALYSIS ---

No regressions reported.

G++ (CLEAN) RESULTS

# of expected passes202879
# of unexpected failures1
# of expected failures988
# of unsupported tests8654

GCC (CLEAN) RESULTS

# of expected passes163377
# of unexpected failures94
# of unexpected successes37
# of expected failures915
# of unsupported tests2530

G++ (PR17314 PATCHED) RESULTS

# of expected passes202871
# of unexpected failures1
# of expected failures988
# of unsupported tests8654

GCC (PR17314 PATCHED) RESULTS

# of expected passes163377
# of unexpected failures94
# of unexpected successes37
# of expected failures915
# of unsupported tests2530

When I build and make -k check -j 6 on the patched source it reports
202871 passes (8 fewer), although the FAILs do not increase. I am not 100%
sure why this happens since I have not removed any testcases, only edited a
few, but I think this happens because in files like dr142.c I removed more
output checks than I added. make -k check -j 6 also returns error 2
sometimes, although there are no obvious errors or warnings in the logs
explaining why. Probably harmless?


Probably.  Can you use sort/uniq/diff on the .sum testsuite output to 
determine which passes are missing in the patched sources?



--- BUILD REPORT ---

GCC builds normally on x86_64-pc-linux-gnu for x86_64-pc-linux-gnu using
make -j 6. I didn't see it necessary to test on other build targets since the
patch only affects the C++ front end and so functionality is unlikely
to differ between platforms.

The compile log reports:

Comparing stages 2 and 3
warning: gcc/cc1obj-checksum.o differs
Comparison successful.

and then continues. I assume this means it was actually successful.


Yes.

Thanks,
Jason


Index: gcc/cp/ChangeLog
from  Anthony Sharp  

 Fixes PR17314
 * typeck.c (complain_about_unrecognized_member): Updated function
 arguments in complain_about_access.
 * call.c (complain_about_access): Altered function.
 * semantics.c (get_parent_with_private_access): Added function.
 (access_in_type): Added as extern function.
 * search.c (access_in_type): Made function non-static so it can be
 used in semantics.c.
 * cp-tree.h (complain_about_access): Changed parameters of function.
Index: gcc/testsuite/ChangeLog
from  Anthony Sharp  

 Fixes PR17314
 * g++.dg/lookup/scoped1.c modified testcase to run successfully with
 changes.
 * g++.dg/tc1/dr142.c modified testcase to run successfully with
 changes.
 * g++.dg/t

[pushed] c++: Add some conversion sanity checking.

2021-01-07 Thread Jason Merrill via Gcc-patches
Another change I was working on revealed that for complex numbers we were
building a ck_identity with build_conv, leading to the wrong active member
in the union being set.  Rather than add another enumeration of the
appropriate conversion codes, I factored that out.

gcc/cp/ChangeLog:

* call.c (has_next): Factor out from...
(next_conversion): ...here.
(strip_standard_conversion): And here.
(is_subseq): And here.
(build_conv): Check it.
(standard_conversion): Don't call build_conv
for ck_identity.
---
 gcc/cp/call.c | 37 ++---
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 182ea94bb7c..218157088ef 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -761,12 +761,26 @@ alloc_conversions (size_t n)
   return (conversion **) conversion_obstack_alloc (n * sizeof (conversion *));
 }
 
+/* True iff the active member of conversion::u for code CODE is NEXT.  */
+
+static inline bool
+has_next (conversion_kind code)
+{
+  return !(code == ck_identity
+  || code == ck_ambig
+  || code == ck_list
+  || code == ck_aggr);
+}
+
 static conversion *
 build_conv (conversion_kind code, tree type, conversion *from)
 {
   conversion *t;
   conversion_rank rank = CONVERSION_RANK (from);
 
+  /* Only call this function for conversions that use u.next.  */
+  gcc_assert (from == NULL || has_next (code));
+
   /* Note that the caller is responsible for filling in t->cand for
  user-defined conversions.  */
   t = alloc_conversion (code);
@@ -863,10 +877,7 @@ static conversion *
 next_conversion (conversion *conv)
 {
   if (conv == NULL
-  || conv->kind == ck_identity
-  || conv->kind == ck_ambig
-  || conv->kind == ck_list
-  || conv->kind == ck_aggr)
+  || !has_next (conv->kind))
 return NULL;
   return conv->u.next;
 }
@@ -879,10 +890,7 @@ strip_standard_conversion (conversion *conv)
 {
   while (conv
 && conv->kind != ck_user
-&& conv->kind != ck_ambig
-&& conv->kind != ck_list
-&& conv->kind != ck_aggr
-&& conv->kind != ck_identity)
+&& has_next (conv->kind))
 conv = next_conversion (conv);
   return conv;
 }
@@ -1266,13 +1274,15 @@ standard_conversion (tree to, tree from, tree expr, 
bool c_cast_p,
(TREE_TYPE (to), TREE_TYPE (from), NULL_TREE, c_cast_p, flags,
 complain);
 
-  if (part_conv)
+  if (!part_conv)
+   conv = NULL;
+  else if (part_conv->kind == ck_identity)
+   /* Leave conv alone.  */;
+  else
{
  conv = build_conv (part_conv->kind, to, conv);
  conv->rank = part_conv->rank;
}
-  else
-   conv = NULL;
 
   return conv;
 }
@@ -10619,10 +10629,7 @@ is_subseq (conversion *ics1, conversion *ics2)
ics2 = next_conversion (ics2);
 
   if (ics2->kind == ck_user
- || ics2->kind == ck_ambig
- || ics2->kind == ck_aggr
- || ics2->kind == ck_list
- || ics2->kind == ck_identity)
+ || !has_next (ics2->kind))
/* At this point, ICS1 cannot be a proper subsequence of
   ICS2.  We can get a USER_CONV when we are comparing the
   second standard conversion sequence of two user conversion

base-commit: b7c3f201be582553f9e3506536aa406bbd7ee71c
-- 
2.27.0



[PATCH] c++: ICE with constrained placeholder return type [PR98346]

2021-01-07 Thread Patrick Palka via Gcc-patches
This is essentially a followup to r11-3714 -- we ICEing from another
"unguarded" call to build_concept_check, this time in do_auto_deduction,
due to the presence of templated trees when !processing_template_decl.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps the 10 branch?

gcc/cp/ChangeLog:

PR c++/98346
* pt.c (do_auto_deduction): Temporarily increment
processing_template_decl before calling build_concept_check.

gcc/testsuite/ChangeLog:

PR c++/98346
* g++.dg/cpp2a/concepts-placeholder3.C: New test.
---
 gcc/cp/pt.c   |  2 ++
 .../g++.dg/cpp2a/concepts-placeholder3.C  | 15 +++
 2 files changed, 17 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder3.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index beabcc4b027..111a694e0c5 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -29464,7 +29464,9 @@ do_auto_deduction (tree type, tree init, tree auto_node,
   cargs = targs;
 
/* Rebuild the check using the deduced arguments.  */
+   ++processing_template_decl;
check = build_concept_check (cdecl, cargs, tf_none);
+   --processing_template_decl;
 
if (!constraints_satisfied_p (check))
   {
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder3.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder3.C
new file mode 100644
index 000..a5d0b1e1d0f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder3.C
@@ -0,0 +1,15 @@
+// PR c++/98346
+// { dg-do compile { target c++20 } }
+
+template 
+concept always_satisfied = true;
+
+using arg_alias = int;
+
+template 
+using result_of = decltype(F{}(arg_alias{}));
+
+template 
+always_satisfied> auto foo(F) {}
+
+void bar() { foo(0); }
-- 
2.30.0



Re: [PATCH] c++, v2: Fix ICE with __builtin_bit_cast [PR98469]

2021-01-07 Thread Jason Merrill via Gcc-patches

On 1/5/21 10:26 AM, Jakub Jelinek wrote:

On Mon, Jan 04, 2021 at 04:01:25PM -0500, Jason Merrill via Gcc-patches wrote:

On 1/4/21 3:48 PM, Jakub Jelinek wrote:

On Mon, Jan 04, 2021 at 03:44:46PM -0500, Jason Merrill wrote:

This change is OK, but part of the problem is that we're trying to do
overload resolution for an S copy/move constructor, which we shouldn't be
because bit_cast is a prvalue, so in C++17 and up we should use it to
directly initialize the target without any implied constructor call.

It seems we're mishandling this because the code in
build_special_member_call specifically looks for TARGET_EXPR or CONSTRUCTOR,
and BIT_CAST_EXPR is neither of those.

Wrapping a BIT_CAST_EXPR of aggregate type in a TARGET_EXPR would address
this, and any other places that expect a class prvalue to come in the form
of a TARGET_EXPR.


I can try that tomorrow.  Won't that cause copying through extra temporary
in some cases though, or is that guaranteed to be optimized?


It won't cause any extra copying when it's used to initialize another object
(like the return value of std::bit_cast).  Class prvalues are always
expressed with a TARGET_EXPR in the front end; the TARGET_EXPR melts away
when used as an initializer, it only creates a temporary when it's used in
another way.


Ok, this version wraps it into a TARGET_EXPR then, it alone fixes the bug,
but I've kept the constexpr.c change too.


This patch corrects this and one other place to not be as dependent on 
TARGET_EXPR, but I think I'm going to save it for stage 1.


Jason
commit 0d732b8c7fb3f8378dc1c894358bb5d766e6be5d
Author: Jason Merrill 
Date:   Mon Jan 4 16:11:08 2021 -0500

c++: Tweak prvalue test [PR98469]

Discussing the 98469 patch and class prvalues with Jakub also inspired me to
change the place that was mishandling BIT_CAST_EXPR and one other to use the
lvalue_kind machinery to decide whether something is a prvalue, instead of
looking specifically for a TARGET_EXPR.

gcc/cp/ChangeLog:

* call.c (build_special_member_call): Use !glvalue_p rather
than specific tree codes to test for prvalue.
(conv_is_prvalue): Likewise.
(implicit_conversion): Check CLASS_TYPE_P first.

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 218157088ef..e2d2b23e449 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -2118,8 +2118,8 @@ implicit_conversion (tree to, tree from, tree expr, bool c_cast_p,
 	flags, complain);
   if (!conv || conv->bad_p)
 return conv;
-  if (conv_is_prvalue (conv)
-  && CLASS_TYPE_P (conv->type)
+  if (CLASS_TYPE_P (conv->type)
+  && conv_is_prvalue (conv)
   && CLASSTYPE_PURE_VIRTUALS (conv->type))
 conv->bad_p = true;
   return conv;
@@ -8500,8 +8500,7 @@ conv_is_prvalue (conversion *c)
 return true;
   if (c->kind == ck_user && !TYPE_REF_P (c->type))
 return true;
-  if (c->kind == ck_identity && c->u.expr
-  && TREE_CODE (c->u.expr) == TARGET_EXPR)
+  if (c->kind == ck_identity && c->u.expr && !glvalue_p (c->u.expr))
 return true;
 
   return false;
@@ -9950,8 +9949,7 @@ build_special_member_call (tree instance, tree name, vec **args,
 	  && CONSTRUCTOR_NELTS (arg) == 1)
 	arg = CONSTRUCTOR_ELT (arg, 0)->value;
 
-  if ((TREE_CODE (arg) == TARGET_EXPR
-	   || TREE_CODE (arg) == CONSTRUCTOR)
+  if (!glvalue_p (arg)
 	  && (same_type_ignoring_top_level_qualifiers_p
 	  (class_type, TREE_TYPE (arg
 	{


Re: [PATCH] c++: Fix thinko in auto return type checking [PR98441]

2021-01-07 Thread Jason Merrill via Gcc-patches

On 1/5/21 7:31 PM, Marek Polacek wrote:

This fixes a thinko in my r11-2085 patch: when I said "But only give the
!late_return_type errors when funcdecl_p, to accept e.g. auto (*fp)() = f;
in C++11" I should've done this, otherwise we give bogus errors mentioning
"function with trailing return type" when there is none.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


gcc/cp/ChangeLog:

PR c++/98441
* decl.c (grokdeclarator): Move the !funcdecl_p check inside the
!late_return_type block.

gcc/testsuite/ChangeLog:

PR c++/98441
* g++.dg/cpp0x/auto55.C: New test.
---
  gcc/cp/decl.c   |  8 +---
  gcc/testsuite/g++.dg/cpp0x/auto55.C | 13 +
  2 files changed, 18 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/auto55.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index bf6f12c26a0..1a114a2e2d0 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -12241,10 +12241,12 @@ grokdeclarator (const cp_declarator *declarator,
tree late_return_type = declarator->u.function.late_return_type;
if (tree auto_node = type_uses_auto (type))
  {
-   if (!late_return_type && funcdecl_p)
+   if (!late_return_type)
  {
-   if (current_class_type
-   && LAMBDA_TYPE_P (current_class_type))
+   if (!funcdecl_p)
+ /* auto (*fp)() = f; is OK.  */;
+   else if (current_class_type
+&& LAMBDA_TYPE_P (current_class_type))
  /* OK for C++11 lambdas.  */;
else if (cxx_dialect < cxx14)
  {
diff --git a/gcc/testsuite/g++.dg/cpp0x/auto55.C 
b/gcc/testsuite/g++.dg/cpp0x/auto55.C
new file mode 100644
index 000..5bd32ac890d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/auto55.C
@@ -0,0 +1,13 @@
+// PR c++/98441
+// { dg-do compile { target c++11 } }
+
+struct a {
+int& mfn();
+};
+
+void fn()
+{
+int&  (a::*myvar1)(void) = &a::mfn;
+auto& (a::*myvar2)(void) = &a::mfn;
+auto  (a::*myvar3)(void) = &a::mfn;
+}

base-commit: ad92bf4b165935b58195825dc8f089f53fd2710b





[committed] fix another ICE in MEM_REF formatting (PR 98578)

2021-01-07 Thread Martin Sebor via Gcc-patches

Fixing the ICE in MEM_REF formatting (or the enhancements that
came along with the fix) introduced another, ICE plus a plugin
test failure.  I have committed the attached simple patch to
fix both.

Martin

PS There are outstanding bugs to fix/improvements to be made
to the MEM_REF formatting (though hopefully no more ICEs) that
I found while testing the attached fix.  I xfailed the tests
and opened the referenced bugs to keep track of them.
commit 178f0afce3611282170de380fcea9db9d6e3ff0c
Author: Martin Sebor 
Date:   Thu Jan 7 14:20:39 2021 -0700

PR middle-end/98578 - ICE warning on uninitialized VLA access

gcc/c-family/ChangeLog:

PR middle-end/98578
* c-pretty-print.c (print_mem_ref): Strip array from access type.
Avoid assuming acces type's size is constant.  Correct condition
guarding the printing of a parenthesis.

gcc/testsuite/ChangeLog:

PR middle-end/98578
* gcc.dg/plugin/gil-1.c: Adjust expected output.
* gcc.dg/uninit-pr98578.c: New test.

diff --git a/gcc/c-family/c-pretty-print.c b/gcc/c-family/c-pretty-print.c
index e963cf53091..87301a2091c 100644
--- a/gcc/c-family/c-pretty-print.c
+++ b/gcc/c-family/c-pretty-print.c
@@ -1844,22 +1844,25 @@ print_mem_ref (c_pretty_printer *pp, tree e)
 	}
 }
 
-  const tree access_type = TREE_TYPE (e);
+  tree access_type = TREE_TYPE (e);
+  if (TREE_CODE (access_type) == ARRAY_TYPE)
+access_type = TREE_TYPE (access_type);
   tree arg_type = TREE_TYPE (TREE_TYPE (arg));
   if (TREE_CODE (arg_type) == ARRAY_TYPE)
 arg_type = TREE_TYPE (arg_type);
 
   if (tree access_size = TYPE_SIZE_UNIT (access_type))
-{
-  /* For naturally aligned accesses print the nonzero offset
-	 in units of the accessed type, in the form of an index.
-	 For unaligned accesses also print the residual byte offset.  */
-  offset_int asize = wi::to_offset (access_size);
-  offset_int szlg2 = wi::floor_log2 (asize);
-
-  elt_idx = byte_off >> szlg2;
-  byte_off = byte_off - (elt_idx << szlg2);
-}
+if (TREE_CODE (access_size) == INTEGER_CST)
+  {
+	/* For naturally aligned accesses print the nonzero offset
+	   in units of the accessed type, in the form of an index.
+	   For unaligned accesses also print the residual byte offset.  */
+	offset_int asize = wi::to_offset (access_size);
+	offset_int szlg2 = wi::floor_log2 (asize);
+
+	elt_idx = byte_off >> szlg2;
+	byte_off = byte_off - (elt_idx << szlg2);
+  }
 
   /* True to include a cast to the accessed type.  */
   const bool access_cast = VOID_TYPE_P (arg_type)
@@ -1924,9 +1927,9 @@ print_mem_ref (c_pretty_printer *pp, tree e)
 }
   if (elt_idx != 0)
 {
-  if (byte_off == 0 && char_cast)
+  if (access_cast || char_cast)
 	pp_c_right_paren (pp);
-  pp_c_right_paren (pp);
+
   if (addr)
 	{
 	  pp_space (pp);
diff --git a/gcc/testsuite/gcc.dg/plugin/gil-1.c b/gcc/testsuite/gcc.dg/plugin/gil-1.c
index 4e8f535ba85..66872f07466 100644
--- a/gcc/testsuite/gcc.dg/plugin/gil-1.c
+++ b/gcc/testsuite/gcc.dg/plugin/gil-1.c
@@ -13,7 +13,7 @@ void test_2 (PyObject *obj)
 {
   Py_BEGIN_ALLOW_THREADS /* { dg-message "releasing the GIL here" } */
 
-  Py_INCREF (obj); /* { dg-warning "use of PyObject '\\*\\(obj\\)' without the GIL" } */
+  Py_INCREF (obj); /* { dg-warning "use of PyObject '\\*obj' without the GIL" } */
   Py_DECREF (obj);
 
   Py_END_ALLOW_THREADS
@@ -60,7 +60,7 @@ void test_5 (PyObject *obj)
 static void  __attribute__((noinline))
 called_by_test_6 (PyObject *obj)
 {
-  Py_INCREF (obj); /* { dg-warning "use of PyObject '\\*\\(obj\\)' without the GIL" } */
+  Py_INCREF (obj); /* { dg-warning "use of PyObject '\\*obj' without the GIL" } */
   Py_DECREF (obj);
 }
 
diff --git a/gcc/testsuite/gcc.dg/uninit-pr98578.c b/gcc/testsuite/gcc.dg/uninit-pr98578.c
new file mode 100644
index 000..98d611757ab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/uninit-pr98578.c
@@ -0,0 +1,110 @@
+/* PR middle-end/98578 - ICE warning on uninitialized VLA access
+   { dg-do compile }
+   { dg-options "-O2 -Wall" } */
+
+void* malloc (__SIZE_TYPE__);
+
+void T (int, ...);
+
+void vla_n (int n, int i)
+{
+  int a1[n];
+
+  /* a1[I] should be formatted as as a1[I] (or, for I == 0, perhaps
+ as *a1), but definitely not as *a1[I].  This is a bug in VLA
+ formatting.  */
+  T (a1[0]);// { dg-warning "'a1\\\[0]' is used uninitialized" "pr98587" { xfail *-*-* } }
+// { dg-warning "'\\*a1\\\[0]' is used uninitialized" "spurious star" { target *-*-* } .-1 }
+  T (a1[1]);// { dg-warning "a1\\\[1]' is used uninitialized" }
+  T (a1[i]);// { dg-warning "a1\\\[i]' is used uninitialized" }
+}
+
+void vla_n_2 (int n, int i)
+{
+  int a2[n][2];
+
+  T (a2[0][0]);   // { dg-warning "a2\\\[0]\\\[0]' is used uninitialized" }
+  T (a2[2][1]);   // { dg-warning "a2\\\[2]\\\[1]' is used uninitialized" }
+  T (a2[3][i]);   // { dg-warning "a2

Re: [PATCH v3] handle MEM_REF with void* arguments (PR c++/95768)

2021-01-07 Thread Martin Sebor via Gcc-patches

On 1/7/21 1:26 AM, Jakub Jelinek wrote:

On Sat, Jan 02, 2021 at 03:22:25PM -0700, Martin Sebor via Gcc-patches wrote:

PR c++/95768 - pretty-printer ICE on -Wuninitialized with allocated storage

gcc/c-family/ChangeLog:

PR c++/95768
* c-pretty-print.c (c_pretty_printer::primary_expression): For
SSA_NAMEs print VLA names and GIMPLE defining statements.
(print_mem_ref): New function.
(c_pretty_printer::unary_expression): Call it.


This broke:
+FAIL: gcc.dg/plugin/gil-1.c -fplugin=./analyzer_gil_plugin.so  (test for 
warnings, line 16)
+FAIL: gcc.dg/plugin/gil-1.c -fplugin=./analyzer_gil_plugin.so  (test for 
warnings, line 63)
+FAIL: gcc.dg/plugin/gil-1.c -fplugin=./analyzer_gil_plugin.so (test for excess 
errors)
and
+FAIL: g++.dg/cpp0x/constexpr-trivial2.C  -std=c++11  (test for errors, line 13)
+FAIL: g++.dg/cpp0x/constexpr-trivial2.C  -std=c++11 (test for excess errors)
The former one is just a different printing of the MEM_REF from what the
test expects, but the latter is an ICE (one needs
make check-c++-all RUNTESTFLAGS=dg.exp=constexpr-trivial2.C
to reproduce or GXX_TESTSUITE_STDS=11 or similar as C++11 is not tested by
default).


I saw the gcc.dg/plugin/gil-1.c failure but missed the subtle difference
in the output.  Rerunning the test didn't show any failures so I assumed
it was something transient.  But when I tried gain today and looked more
carefully at the output I saw the test doesn't run at all by itself.
This doesn't work:

 $ nice make -C /ssd/test/build/gcc-95768/gcc check-c 
RUNTESTFLAGS="plugin.exp=gil-1.c"


Is there some special magic to get it to run by itself or do these
tests just not do that?

Running all the plugin tests does work but also shows the failures
below that don't show up in the final summary (or on gcc-testresults).
I assume those are unrelated (I think I've seen this test fail in
a similarly mysterious way before).

FAIL: gcc.dg/plugin/diagnostic-test-expressions-1.c 
-fplugin=./diagnostic_plugin_test_tree_expression_range.so  1 blank 
line(s) in output
FAIL: gcc.dg/plugin/diagnostic-test-expressions-1.c 
-fplugin=./diagnostic_plugin_test_tree_expression_range.so  expected 
multiline pattern lines 550-551 not found: " 
__builtin_types_compatible_p \(long, int\) \+ f \(i\)\);.*\n 
   ~\^~~\n"
FAIL: gcc.dg/plugin/diagnostic-test-expressions-1.c 
-fplugin=./diagnostic_plugin_test_tree_expression_range.so (test for 
excess errors)


Martin

PS I committed a fix for both the ICE and the gil-1.c failure
in r11-6532.


Re: [committed] fix another ICE in MEM_REF formatting (PR 98578)

2021-01-07 Thread Jakub Jelinek via Gcc-patches
On Thu, Jan 07, 2021 at 02:29:50PM -0700, Martin Sebor via Gcc-patches wrote:
> --- a/gcc/c-family/c-pretty-print.c
> +++ b/gcc/c-family/c-pretty-print.c
> @@ -1844,22 +1844,25 @@ print_mem_ref (c_pretty_printer *pp, tree e)
>   }
>  }
>  
> -  const tree access_type = TREE_TYPE (e);
> +  tree access_type = TREE_TYPE (e);
> +  if (TREE_CODE (access_type) == ARRAY_TYPE)
> +access_type = TREE_TYPE (access_type);
>tree arg_type = TREE_TYPE (TREE_TYPE (arg));
>if (TREE_CODE (arg_type) == ARRAY_TYPE)
>  arg_type = TREE_TYPE (arg_type);

The array types can be multidimensional, are you sure you don't want
to use strip_array_types instead?

Jakub



Re: [PATCH] c++: Fix access checking of scoped non-static member [PR98515]

2021-01-07 Thread Jason Merrill via Gcc-patches

On 1/6/21 1:19 PM, Patrick Palka wrote:

In the first testcase below, we incorrectly reject the use of the
protected non-static member A::var0 from C::g() because
check_accessibility_of_qualified_id, at template parse time, determines
that the access doesn't go through 'this'.  (This happens because the
dependent base B of C doesn't have a binfo object, so it appears
to DERIVED_FROM_P that A is not an indirect base of C.)  From there
we create the corresponding deferred access check, which we then
perform at instantiation time and which (unsurprisingly) fails.

The problem ultimately seems to be that we can't, in general, know
whether a use of a scoped non-static member goes through 'this' until
instantiation time, as the second testcase below demonstrates.  So this
patch makes check_accessibility_of_qualified_id punt in this situation.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK to
commit?

gcc/cp/ChangeLog:

PR c++/98515
* semantics.c (check_accessibility_of_qualified_id): Punt if
we're checking the access of a scoped non-static member at
class template parse time.

gcc/testsuite/ChangeLog:

PR c++/98515
* g++.dg/template/access32.C: New test.
* g++.dg/template/access33.C: New test.
---
  gcc/cp/semantics.c   | 20 +++-
  gcc/testsuite/g++.dg/template/access32.C |  8 
  gcc/testsuite/g++.dg/template/access33.C |  9 +
  3 files changed, 32 insertions(+), 5 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/template/access32.C
  create mode 100644 gcc/testsuite/g++.dg/template/access33.C

diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index b448efe024a..f52b2e4d1e7 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -2107,14 +2107,24 @@ check_accessibility_of_qualified_id (tree decl,
/* If the reference is to a non-static member of the
 current class, treat it as if it were referenced through
 `this'.  */
-  tree ct;
if (DECL_NONSTATIC_MEMBER_P (decl)
- && current_class_ptr
- && DERIVED_FROM_P (scope, ct = current_nonlambda_class_type ()))
-   qualifying_type = ct;
+ && current_class_ptr)
+   {
+ if (dependent_type_p (TREE_TYPE (current_class_ptr)))


This should also look at current_nonlambda_class_type.


+ /* In general we can't know whether this access goes through `this'
+until instantiation of the current class.  Punt now, or else
+we might create a deferred access check that's relative to the
+wrong class.  We'll check this access again after substitution,
+e.g. from tsubst_qualified_id.  */
+   return true;
+
+ if (tree current = current_nonlambda_class_type ())
+   if (DERIVED_FROM_P (scope, current))
+ qualifying_type = current;
+   }
/* Otherwise, use the type indicated by the
 nested-name-specifier.  */
-  else
+  if (!qualifying_type)
qualifying_type = nested_name_specifier;
  }
else
diff --git a/gcc/testsuite/g++.dg/template/access32.C 
b/gcc/testsuite/g++.dg/template/access32.C
new file mode 100644
index 000..08faa9f0f97
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/access32.C
@@ -0,0 +1,8 @@
+// PR c++/98515
+// { dg-do compile }
+
+struct A { protected: int var0; };
+template  struct B : public A { };
+template  struct C : public B { void g(); };
+template  void C::g() { A::var0++; }
+template class C;
diff --git a/gcc/testsuite/g++.dg/template/access33.C 
b/gcc/testsuite/g++.dg/template/access33.C
new file mode 100644
index 000..9fb9b9a1236
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/access33.C
@@ -0,0 +1,9 @@
+// PR c++/98515
+// { dg-do compile }
+
+struct A { protected: int var0; };
+template  struct B : public A { };
+template  struct C : public B { void g(); };
+template  void C::g() { A::var0++; } // { dg-error 
"protected|invalid" }
+template <> struct B { };
+template class C;





Re: [PATCH] c++: Fix up tsubst of BIT_CAST_EXPR [PR98329]

2021-01-07 Thread Jason Merrill via Gcc-patches

On 1/7/21 11:30 AM, Jakub Jelinek wrote:

Hi!

As the testcase shows, calling cp_build_bit_cast in tsubst_copy doesn't seem
to be a good idea, because tsubst_copy might not really make the operand
non-dependent, but as processing_template_decl can be 0,
type_dependent_expression_p will return false and then cp_build_bit_cast
assumes the type is non-NULL and non-dependent.
So, this patch just follows what is done e.g. for NOP_EXPR etc. and just
builds some tree in tsubst_copy, and only calls the semantics.c function
from tsubst_copy_and_build.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


OK.


2021-01-07  Jakub Jelinek  

PR c++/98329
* pt.c (tsubst_copy) : Don't call
cp_build_bit_cast here, instead just build_min a BIT_CAST_EXPR and set
its location.
(tsubst_copy_and_build): Handle BIT_CAST_EXPR.

* g++.dg/cpp2a/bit-cast10.C: New test.

--- gcc/cp/pt.c.jj  2021-01-05 22:33:41.635816799 +0100
+++ gcc/cp/pt.c 2021-01-07 11:45:40.935610649 +0100
@@ -16796,7 +16796,9 @@ tsubst_copy (tree t, tree args, tsubst_f
{
tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
tree op0 = tsubst_copy (TREE_OPERAND (t, 0), args, complain, in_decl);
-   return cp_build_bit_cast (EXPR_LOCATION (t), type, op0, complain);
+   r = build_min (BIT_CAST_EXPR, type, op0);
+   SET_EXPR_LOCATION (r, EXPR_LOCATION (t));
+   return r;
}
  
  case SIZEOF_EXPR:

@@ -19631,6 +19633,13 @@ tsubst_copy_and_build (tree t,
RETURN (r);
}
  
+case BIT_CAST_EXPR:

+  {
+   tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
+   tree op0 = RECUR (TREE_OPERAND (t, 0));
+   RETURN (cp_build_bit_cast (EXPR_LOCATION (t), type, op0, complain));
+  }
+
  case POSTDECREMENT_EXPR:
  case POSTINCREMENT_EXPR:
op1 = tsubst_non_call_postfix_expression (TREE_OPERAND (t, 0),
--- gcc/testsuite/g++.dg/cpp2a/bit-cast10.C.jj  2021-01-07 11:48:02.998020930 
+0100
+++ gcc/testsuite/g++.dg/cpp2a/bit-cast10.C 2021-01-07 11:47:52.224141482 
+0100
@@ -0,0 +1,42 @@
+// PR c++/98329
+// { dg-do compile { target c++20 } }
+
+template 
+constexpr To
+foo (const From &from)
+{
+  return __builtin_bit_cast (To, &from);
+}
+
+template 
+constexpr To
+bar (const From &from)
+{
+  return __builtin_bit_cast (To, *from);
+}
+
+template 
+constexpr To
+baz (const From &from)
+{
+  return __builtin_bit_cast (To, **from);
+}
+
+template 
+constexpr To
+qux (const From &from)
+{
+  return __builtin_bit_cast (To, -from);
+}
+
+void
+test ()
+{
+  int i = 0;
+  int *j = &i;
+  int **k = &j;
+  foo  (i);
+  bar  (j);
+  baz  (k);
+  qux  (i);
+}

Jakub





[PATCH] testsuite: Fix test failures from outputs.exp [PR98225]

2021-01-07 Thread Bernd Edlinger
Hi,

On 1/7/21 5:12 PM, Rainer Orth wrote:
>   The unsetenv needs to be wrapped in
> 
> if [info exists env(MAKEFLAGS)] {
> 

Done.

> @@ -163,6 +167,9 @@ proc outest { test sources opts dirs out
>   if { $ogl != {} } {
>   pass "$test: $d$o"
>   file delete $ogl
> + } elseif { [string match "*.ld1_args" $o] } {
> + # This file may be missing if !HAVE_GNU_LD
> + pass "$test: $d$o"
> 
>   Always PASSing the test even if it isn't run is wrong.  Either wrap
>   the whole group of tests with response files in
> 
> if [check_effective_target_gld] {
> 
>   or make the test for the *.ld1_args file conditional on that
>   (e.g. along the lines of $ltop used elsewhere).  I'd welcome input
>   from Alexandre which is preferred.
> 

Ah, yes that is a good idea.  Thanks.


I think the .cdtor.* handling, is probably a bad example that I followed here.
I don't know why that is there in the first place, as there
are no C++ test cases, these files should not be created at all.
If they are ever created we would have a couple of other files created
as well IMHO.
If there are still misssing files in some cases,
I'd prefer to track these per test case, instead of globally.

Therefore I propose to remove that exception for now.


Is it OK for trunk?


Thanks
Bernd.
From 9e0fc10b1c655320ccb63c1798141f4a572410f8 Mon Sep 17 00:00:00 2001
From: Bernd Edlinger 
Date: Thu, 7 Jan 2021 09:37:32 +0100
Subject: [PATCH] testsuite: Fix test failures from outputs.exp [PR98225]

The .ld1_args file is not created when HAVE_GNU_LD is false.
The ltrans0.ltrans_arg file is not created when the make jobserver
is available, so remove the MAKEFLAGS variable.
There are no .cdtor.* files ever created with any of the tests,
so remove the exception for those files.

2021-01-07  Bernd Edlinger  

	PR testsuite/98225
	* gcc.misc-tests/outputs.exp: Unset MAKEFLAGS.
	Expect .ld1_args only when GNU LD is used.
	Remove exception for .cdtor.* files.
---
 gcc/testsuite/gcc.misc-tests/outputs.exp | 24 +++-
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.misc-tests/outputs.exp b/gcc/testsuite/gcc.misc-tests/outputs.exp
index 80d4b61..05e5b1b2 100644
--- a/gcc/testsuite/gcc.misc-tests/outputs.exp
+++ b/gcc/testsuite/gcc.misc-tests/outputs.exp
@@ -50,6 +50,9 @@ if !$skip_lto {
 set ltop [check_linker_plugin_available]
 }
 
+# Check for GNU LD.  Some files like .ld1_args depend on this.
+set gld [check_effective_target_gld]
+
 # Prepare additional options to be used for linking.
 # We do not compile to an executable, because that requires naming an output.
 set link_options ""
@@ -67,6 +70,12 @@ if {[board_info $dest exists output_format]} {
 append link_options " additional_flags=-Wl,-oformat,[board_info $dest output_format]"
 }
 
+# Avoid possible influence from the make jobserver,
+# otherwise ltrans0.ltrans_args files may be missing.
+if [info exists env(MAKEFLAGS)] {
+unsetenv MAKEFLAGS
+}
+
 # For the test named TEST, run the compiler with SOURCES and OPTS, and
 # look in DIRS for OUTPUTS.  SOURCES is a list of suffixes for source
 # files starting with $b in $srcdir/$subdir, OPTS is a string with
@@ -130,6 +139,7 @@ proc outest { test sources opts dirs outputs } {
 	foreach og $olist {
 	if { [string index $og 0] == "!" } {
 		global gspd ltop
+		global gld
 		set cond [expr $og]
 		continue
 	}
@@ -179,11 +189,7 @@ proc outest { test sources opts dirs outputs } {
 set outb {}
 foreach f $outs {
 	file delete $f
-	# collect2 may create .cdtor* files in -save-temps link tests,
-	# ??? without regard to aux output naming conventions.
-	if ![string match "*.cdtor.*" $f] then {
-	lappend outb $f
-	}
+	lappend outb $f
 }
 foreach d $dirs {
 	file delete -force $d
@@ -285,10 +291,10 @@ outest "$b exe savetmp namedb" $sing "-o $b.exe -save-temps" {} {{--0.i --0.s --
 outest "$b exe savetmp named2" $mult "-o $b.exe -save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o .exe}}
 
 # Additional files are created when an @file is used
-outest "$b exe savetmp namedb" $sing "@/dev/null -o $b.exe -save-temps" {} {{--0.i --0.s --0.o .args.0 .ld1_args .exe}}
-outest "$b exe savetmp named2" $mult "@/dev/null -o $b.exe -save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o .args.0 .ld1_args .exe}}
-outest "$b exe savetmp named2" $mult "@/dev/null -I dummy -o $b.exe -save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 .args.2 .ld1_args .exe}}
-outest "$b exe savetmp named2" $mult "@/dev/null -I dummy -L dummy -o $b.exe -save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o -args.0 -args.1 .args.2 .args.3 .ld1_args .exe}}
+outest "$b exe savetmp namedb" $sing "@/dev/null -o $b.exe -save-temps" {} {{--0.i --0.s --0.o .args.0 !!$gld .ld1_args !0 .exe}}
+outest "$b exe savetmp named2" $mult "@/dev/null -o $b.exe -save-temps" {} {{--1.i --1.s --1.o --2.i --2.s --2.o .args.0 !!$gld .ld1_arg

Re: [PATCH] c++: ICE with constexpr call that returns a PMF [PR98551]

2021-01-07 Thread Jason Merrill via Gcc-patches

On 1/7/21 10:10 AM, Patrick Palka wrote:

We shouldn't do replace_result_decl after evaluating a call that returns
a PMF because PMF temporaries aren't wrapped in a TARGET_EXPR (and so we
can't trust ctx->object), and PMF initializers can't be self-referential
anyway, so replace_result_decl would always be a no-op.  This fixes an
ICE from the sanity check in replace_result_decl in the below testcase
during cxx_eval_call_expression of the call f() in the initializer g(f()).

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

gcc/cp/ChangeLog:

PR c++/98551
* constexpr.c (cxx_eval_call_expression): Don't call
replace_result_decl when the result is a PMF.

gcc/testsuite/ChangeLog:

PR c++/98551
* g++.dg/cpp0x/constexpr-pmf2.C: New test.
---
  gcc/cp/constexpr.c  | 1 +
  gcc/testsuite/g++.dg/cpp0x/constexpr-pmf2.C | 9 +
  2 files changed, 10 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-pmf2.C

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 0c12f608d36..a7272d49d0d 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -2788,6 +2788,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree 
t,
   current object under construction.  */
if (!*non_constant_p && ctx->object
&& AGGREGATE_TYPE_P (TREE_TYPE (res))
+   && !TYPE_PTRMEMFUNC_P (TREE_TYPE (res))


It ought to work to change AGGREGATE_TYPE_P to CLASS_TYPE_P; we can't 
return an array, and a vector can't contain a pointer to itself.


Alternately, we could change the same-type assert in replace_result_decl 
to a test and return false if different.


OK with either change.


&& !is_empty_class (TREE_TYPE (res)))
  if (replace_result_decl (&result, res, ctx->object))
cacheable = false;
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-pmf2.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-pmf2.C
new file mode 100644
index 000..a76e712afe1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-pmf2.C
@@ -0,0 +1,9 @@
+// PR c++/98551
+// { dg-do compile { target c++11 } }
+
+struct A {};
+struct B { int t(); };
+using pmf = decltype(&B::t);
+constexpr pmf f() { return &B::t; }
+constexpr A g(pmf) { return {}; };
+constexpr A x = g(f());





Re: [PATCH] c++: Fix access checking of scoped non-static member [PR98515]

2021-01-07 Thread Patrick Palka via Gcc-patches
On Thu, 7 Jan 2021, Jason Merrill wrote:

> On 1/6/21 1:19 PM, Patrick Palka wrote:
> > In the first testcase below, we incorrectly reject the use of the
> > protected non-static member A::var0 from C::g() because
> > check_accessibility_of_qualified_id, at template parse time, determines
> > that the access doesn't go through 'this'.  (This happens because the
> > dependent base B of C doesn't have a binfo object, so it appears
> > to DERIVED_FROM_P that A is not an indirect base of C.)  From there
> > we create the corresponding deferred access check, which we then
> > perform at instantiation time and which (unsurprisingly) fails.
> > 
> > The problem ultimately seems to be that we can't, in general, know
> > whether a use of a scoped non-static member goes through 'this' until
> > instantiation time, as the second testcase below demonstrates.  So this
> > patch makes check_accessibility_of_qualified_id punt in this situation.
> > 
> > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK to
> > commit?
> > 
> > gcc/cp/ChangeLog:
> > 
> > PR c++/98515
> > * semantics.c (check_accessibility_of_qualified_id): Punt if
> > we're checking the access of a scoped non-static member at
> > class template parse time.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > PR c++/98515
> > * g++.dg/template/access32.C: New test.
> > * g++.dg/template/access33.C: New test.
> > ---
> >   gcc/cp/semantics.c   | 20 +++-
> >   gcc/testsuite/g++.dg/template/access32.C |  8 
> >   gcc/testsuite/g++.dg/template/access33.C |  9 +
> >   3 files changed, 32 insertions(+), 5 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/template/access32.C
> >   create mode 100644 gcc/testsuite/g++.dg/template/access33.C
> > 
> > diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
> > index b448efe024a..f52b2e4d1e7 100644
> > --- a/gcc/cp/semantics.c
> > +++ b/gcc/cp/semantics.c
> > @@ -2107,14 +2107,24 @@ check_accessibility_of_qualified_id (tree decl,
> > /* If the reference is to a non-static member of the
> >  current class, treat it as if it were referenced through
> >  `this'.  */
> > -  tree ct;
> > if (DECL_NONSTATIC_MEMBER_P (decl)
> > - && current_class_ptr
> > - && DERIVED_FROM_P (scope, ct = current_nonlambda_class_type ()))
> > -   qualifying_type = ct;
> > + && current_class_ptr)
> > +   {
> > + if (dependent_type_p (TREE_TYPE (current_class_ptr)))
> 
> This should also look at current_nonlambda_class_type.

Ah, ack.  But it seems to me we really only need to be checking
dependence of current_nonlambda_class_type here.  IIUC, dependence of
these two types should coincide except in the case where we're inside a
generic lambda within a non-template class (in which case
current_class_ptr will dependent and current_nonlambda_class_type won't).
But in this case we have enough information to be able to resolve the
access check at parse time, I think (and so we shouldn't punt).

The below patch, which seems to pass 'make check-c++', checks the
dependence of current_nonlambda_class_type instead of that of
current_class_ptr.  Does this approach seem right?

-- >8 --

Subject: [PATCH] c++: Fix access checking of scoped non-static member
 [PR98515]

In the first testcase below, we incorrectly reject the use of the
protected non-static member A::var0 from C::g() because
check_accessibility_of_qualified_id, at template parse time, determines
that the access doesn't go through 'this'.  (This happens because the
dependent base B of C doesn't have a binfo object, so it appears
to DERIVED_FROM_P that A is not an indirect base of C.)  From there
we create the corresponding deferred access check, which we then
perform at instantiation time and which (unsurprisingly) fails.

The problem ultimately seems to be that we can't in general determine
whether a use of a scoped non-static member goes through 'this' until
instantiation time, as the second testcase below illustrates.  So this
patch makes check_accessibility_of_qualified_id punt in such situations
to avoid creating a bogus deferred access check.

gcc/cp/ChangeLog:

PR c++/98515
* semantics.c (check_accessibility_of_qualified_id): Punt if
we're checking access of a scoped non-static member inside a
class template.

gcc/testsuite/ChangeLog:

PR c++/98515
* g++.dg/template/access32.C: New test.
* g++.dg/template/access33.C: New test.
---
 gcc/cp/semantics.c   | 22 +-
 gcc/testsuite/g++.dg/template/access32.C |  8 
 gcc/testsuite/g++.dg/template/access33.C |  9 +
 3 files changed, 34 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/access32.C
 create mode 100644 gcc/testsuite/g++.dg/template/access33.C

diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index b448efe024a..51f7c114b03 100644
--- a/gcc/cp/semantics.c
++

[PATCH] ipa-modref: avoid linebreak split in debug print

2021-01-07 Thread Sergei Trofimovich via Gcc-patches
From: Sergei Trofimovich 

* ipa-modref.c (merge_call_side_effects): Fix
linebreak split by reordering two print calls.
---
 gcc/ipa-modref.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index fcc676d25e4..04613201f1f 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -835,10 +835,6 @@ merge_call_side_effects (modref_summary *cur_summary,
   auto_vec  parm_map;
   bool changed = false;
 
-  if (dump_file)
-fprintf (dump_file, " - Merging side effects of %s with parm map:",
-callee_node->dump_name ());
-
   /* We can not safely optimize based on summary of callee if it does
  not always bind to current def: it is possible that memory load
  was optimized out earlier which may not happen in the interposed
@@ -850,6 +846,10 @@ merge_call_side_effects (modref_summary *cur_summary,
   cur_summary->loads->collapse ();
 }
 
+  if (dump_file)
+fprintf (dump_file, " - Merging side effects of %s with parm map:",
+callee_node->dump_name ());
+
   parm_map.safe_grow_cleared (gimple_call_num_args (stmt), true);
   for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
 {
-- 
2.30.0



Re: [committed] fix another ICE in MEM_REF formatting (PR 98578)

2021-01-07 Thread Martin Sebor via Gcc-patches

On 1/7/21 2:37 PM, Jakub Jelinek wrote:

On Thu, Jan 07, 2021 at 02:29:50PM -0700, Martin Sebor via Gcc-patches wrote:

--- a/gcc/c-family/c-pretty-print.c
+++ b/gcc/c-family/c-pretty-print.c
@@ -1844,22 +1844,25 @@ print_mem_ref (c_pretty_printer *pp, tree e)
}
  }
  
-  const tree access_type = TREE_TYPE (e);

+  tree access_type = TREE_TYPE (e);
+  if (TREE_CODE (access_type) == ARRAY_TYPE)
+access_type = TREE_TYPE (access_type);
tree arg_type = TREE_TYPE (TREE_TYPE (arg));
if (TREE_CODE (arg_type) == ARRAY_TYPE)
  arg_type = TREE_TYPE (arg_type);


The array types can be multidimensional, are you sure you don't want
to use strip_array_types instead?


Pretty sure.

access_type is used to figure out the element index and residual
byte offset into the argument, so that needs the size of the array.

Both access_type and arg_type are then checked for compatibility,
to decide if the type of the access needs to be included as a cast.
So there again I think the outer array bounds need to be preserved.

There are a few simple tests involving multidimensional VLAs but
a more involved example I just tried triggers another ICE, this
time in gimple_canonical_types_compatible_p.  Apparently
the default invocation of the function doesn't like mixed arrays
and scalars.  So clearly there's a whole bounty of ICEs here and
more to do.

Martin


[PATCH] x86-64: Use R10 for profiling large model

2021-01-07 Thread H.J. Lu via Gcc-patches
Since R10 is preserved when calling mcount, R10 can be used a scratch
register to call mcount in large model.

gcc/

PR target/98482
* config/i386/i386.c (x86_function_profiler): Use R10 to call
mcount in large model. Sorry for large model with PIC.

gcc/testsuite/

PR target/98482
* gcc.target/i386/pr98482-1.c: New test.
* gcc.target/i386/pr98482-1.c: Likewise.
---
 gcc/config/i386/i386.c| 25 +--
 gcc/testsuite/gcc.target/i386/pr98482-1.c |  9 
 gcc/testsuite/gcc.target/i386/pr98482-2.c |  9 
 3 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98482-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98482-2.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index fad50e7e537..7a07ab8b5f5 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20794,8 +20794,29 @@ x86_function_profiler (FILE *file, int labelno 
ATTRIBUTE_UNUSED)
   fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
 #endif
 
-  if (!TARGET_PECOFF && flag_pic)
-   fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
+  if (!TARGET_PECOFF)
+   {
+ switch (ix86_cmodel)
+   {
+   case CM_LARGE:
+ /* NB: R10 can be used as a scratch register here since
+R10 is preserved when calling mcount.  */
+ fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
+  mcount_name);
+ break;
+   case CM_LARGE_PIC:
+ sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
+ break;
+   case CM_SMALL_PIC:
+   case CM_MEDIUM_PIC:
+ fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n",
+  mcount_name);
+ break;
+   default:
+ x86_print_call_or_nop (file, mcount_name);
+ break;
+   }
+   }
   else
x86_print_call_or_nop (file, mcount_name);
 }
diff --git a/gcc/testsuite/gcc.target/i386/pr98482-1.c 
b/gcc/testsuite/gcc.target/i386/pr98482-1.c
new file mode 100644
index 000..72d5ccb269c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98482-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-require-effective-target mfentry } */
+/* { dg-options "-fprofile -mfentry -O2 -mcmodel=large" } */
+/* { dg-final { scan-assembler "movabsq\t\\\$__fentry__, 
%r10\n\tcall\t\\*%r10" } } */
+
+void
+func (void)
+{
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr98482-2.c 
b/gcc/testsuite/gcc.target/i386/pr98482-2.c
new file mode 100644
index 000..aed3ca4b6ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98482-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-require-effective-target mfentry } */
+/* { dg-require-effective-target fpic } */
+/* { dg-options "-fpic -fprofile -mfentry -O2 -mcmodel=large" } */
+
+void
+func (void)
+{
+} /* { dg-message "sorry, unimplemented: profiling '-mcmodel=large' with PIC 
is not supported" } */
-- 
2.29.2



[PATCH] IBM Z: Introduce __LONG_DOUBLE_VX__ macro

2021-01-07 Thread Ilya Leoshkevich via Gcc-patches
Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?



Give end users the opportunity to find out whether long doubles are
stored in floating-point register pairs or in vector registers, so that
they could fine-tune their asm statements.

gcc/ChangeLog:

2020-12-14  Ilya Leoshkevich  

* config/s390/s390-c.c (s390_def_or_undef_macro): Accept
callables instead of mask values.
(struct target_flag_set_p): New predicate.
(s390_cpu_cpp_builtins_internal): Define or undefine
__LONG_DOUBLE_VX__ macro.

gcc/testsuite/ChangeLog:

2020-12-14  Ilya Leoshkevich  

* gcc.target/s390/vector/long-double-vx-macro-off.c: New test.
* gcc.target/s390/vector/long-double-vx-macro-on.c: New test.
---
 gcc/config/s390/s390-c.c  | 59 ---
 .../s390/vector/long-double-vx-macro-off-on.c | 11 
 .../s390/vector/long-double-vx-macro-on-off.c | 11 
 3 files changed, 60 insertions(+), 21 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/s390/vector/long-double-vx-macro-off-on.c
 create mode 100644 
gcc/testsuite/gcc.target/s390/vector/long-double-vx-macro-on-off.c

diff --git a/gcc/config/s390/s390-c.c b/gcc/config/s390/s390-c.c
index 95cd2df505d..29b87d76ab1 100644
--- a/gcc/config/s390/s390-c.c
+++ b/gcc/config/s390/s390-c.c
@@ -294,9 +294,9 @@ s390_macro_to_expand (cpp_reader *pfile, const cpp_token 
*tok)
 /* Helper function that defines or undefines macros.  If SET is true, the macro
MACRO_DEF is defined.  If SET is false, the macro MACRO_UNDEF is undefined.
Nothing is done if SET and WAS_SET have the same value.  */
+template 
 static void
-s390_def_or_undef_macro (cpp_reader *pfile,
-unsigned int mask,
+s390_def_or_undef_macro (cpp_reader *pfile, F is_set,
 const struct cl_target_option *old_opts,
 const struct cl_target_option *new_opts,
 const char *macro_def, const char *macro_undef)
@@ -304,8 +304,8 @@ s390_def_or_undef_macro (cpp_reader *pfile,
   bool was_set;
   bool set;
 
-  was_set = (!old_opts) ? false : old_opts->x_target_flags & mask;
-  set = new_opts->x_target_flags & mask;
+  was_set = (!old_opts) ? false : is_set (old_opts);
+  set = is_set (new_opts);
   if (was_set == set)
 return;
   if (set)
@@ -314,6 +314,19 @@ s390_def_or_undef_macro (cpp_reader *pfile,
 cpp_undef (pfile, macro_undef);
 }
 
+struct target_flag_set_p
+{
+  target_flag_set_p (unsigned int mask) : m_mask (mask) {}
+
+  bool
+  operator() (const struct cl_target_option *opts) const
+  {
+return opts->x_target_flags & m_mask;
+  }
+
+  unsigned int m_mask;
+};
+
 /* Internal function to either define or undef the appropriate system
macros.  */
 static void
@@ -321,18 +334,18 @@ s390_cpu_cpp_builtins_internal (cpp_reader *pfile,
struct cl_target_option *opts,
const struct cl_target_option *old_opts)
 {
-  s390_def_or_undef_macro (pfile, MASK_OPT_HTM, old_opts, opts,
-  "__HTM__", "__HTM__");
-  s390_def_or_undef_macro (pfile, MASK_OPT_VX, old_opts, opts,
-  "__VX__", "__VX__");
-  s390_def_or_undef_macro (pfile, MASK_ZVECTOR, old_opts, opts,
-  "__VEC__=10303", "__VEC__");
-  s390_def_or_undef_macro (pfile, MASK_ZVECTOR, old_opts, opts,
-  "__vector=__attribute__((vector_size(16)))",
+  s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_OPT_HTM), old_opts,
+  opts, "__HTM__", "__HTM__");
+  s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_OPT_VX), old_opts,
+  opts, "__VX__", "__VX__");
+  s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_ZVECTOR), old_opts,
+  opts, "__VEC__=10303", "__VEC__");
+  s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_ZVECTOR), old_opts,
+  opts, "__vector=__attribute__((vector_size(16)))",
   "__vector__");
-  s390_def_or_undef_macro (pfile, MASK_ZVECTOR, old_opts, opts,
-  "__bool=__attribute__((s390_vector_bool)) unsigned",
-  "__bool");
+  s390_def_or_undef_macro (
+  pfile, target_flag_set_p (MASK_ZVECTOR), old_opts, opts,
+  "__bool=__attribute__((s390_vector_bool)) unsigned", "__bool");
   {
 char macro_def[64];
 gcc_assert (s390_arch != PROCESSOR_NATIVE);
@@ -340,16 +353,20 @@ s390_cpu_cpp_builtins_internal (cpp_reader *pfile,
 cpp_undef (pfile, "__ARCH__");
 cpp_define (pfile, macro_def);
   }
+  s390_def_or_undef_macro (
+  pfile,
+  [] (const struct cl_target_option *opts) { return TARGET_Z14_P (opts); },
+  old_opts, opts, "__LONG_DOUBLE_VX__", "__LONG_DOUBLE_VX__");
 
   if (!flag_iso)
 {
-  s390_def_or_undef_macro (pfile, MASK_ZVECTOR, old_opts, opts,
- 

[PATCH] c++: Add support for -std=c++2b

2021-01-07 Thread Paul Fee via Gcc-patches
Derived from the changes that added C++2a support in 2017.
https://gcc.gnu.org/g:026a79f70cf33f836ea5275eda72d4870a3041e5

No C++2b features are added here.
Use of -std=c++2b sets __cplusplus to 202101L.


diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5541e694bb3..3a0d452b62b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2021-01-08  Paul Fee  
+
+Add support for -std=c++2b
+* doc/cpp.texi (__cplusplus): Document value for -std=c++2b
+or -std=gnu+2b.
+* doc/invoke.texi: Document -std=c++2b and -std=gnu++2b.
+
 2021-01-06  Vladimir N. Makarov  

 PR rtl-optimization/97978
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
index 654c2360085..9af4050fdc8 100644
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,3 +1,13 @@
+2021-01-08  Paul Fee  
+
+Add support for -std=c++2b
+* c-common.h (cxx_dialect): Add cxx2b as a dialect.
+* c.opt: Add options for -std=c++2b and -std=gnu++2b.
+* c-opts.c (set_std_cxx2b): New.
+(c_common_handle_option): Set options when -std=c++2b is enabled.
+(c_common_post_options): Adjust comments.
+(set_std_cxx20): Likewise.
+
 2021-01-06  Martin Sebor  

 PR c++/95768
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index a65c78f7240..cfa056a6e28 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -738,7 +738,9 @@ enum cxx_dialect {
   /* C++17 */
   cxx17,
   /* C++20 */
-  cxx20
+  cxx20,
+  /* C++23? */
+  cxx2b
 };

 /* The C++ dialect being used. C++98 is the default.  */
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 3cdf41bc6e2..2612fac5a62 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -113,6 +113,7 @@ static void set_std_cxx11 (int);
 static void set_std_cxx14 (int);
 static void set_std_cxx17 (int);
 static void set_std_cxx20 (int);
+static void set_std_cxx2b (int);
 static void set_std_c89 (int, int);
 static void set_std_c99 (int);
 static void set_std_c11 (int);
@@ -649,6 +650,12 @@ c_common_handle_option (size_t scode, const char
*arg, HOST_WIDE_INT value,
 set_std_cxx20 (code == OPT_std_c__20 /* ISO */);
   break;

+case OPT_std_c__2b:
+case OPT_std_gnu__2b:
+  if (!preprocessing_asm_p)
+set_std_cxx2b (code == OPT_std_c__2b /* ISO */);
+  break;
+
 case OPT_std_c90:
 case OPT_std_iso9899_199409:
   if (!preprocessing_asm_p)
@@ -1019,7 +1026,7 @@ c_common_post_options (const char **pfilename)
 warn_narrowing = 1;

   /* Unless -f{,no-}ext-numeric-literals has been used explicitly,
- for -std=c++{11,14,17,2a} default to -fno-ext-numeric-literals.  */
+ for -std=c++{11,14,17,20,2b} default to -fno-ext-numeric-literals.  */
   if (flag_iso && !global_options_set.x_flag_ext_numeric_literals)
 cpp_opts->ext_numeric_literals = 0;
 }
@@ -1763,7 +1770,7 @@ set_std_cxx20 (int iso)
   flag_no_gnu_keywords = iso;
   flag_no_nonansi_builtin = iso;
   flag_iso = iso;
-  /* C++17 includes the C11 standard library.  */
+  /* C++20 includes the C11 standard library.  */
   flag_isoc94 = 1;
   flag_isoc99 = 1;
   flag_isoc11 = 1;
@@ -1773,6 +1780,24 @@ set_std_cxx20 (int iso)
   lang_hooks.name = "GNU C++20";
 }

+/* Set the C++ 2020 standard (without GNU extensions if ISO).  */
+static void
+set_std_cxx2b (int iso)
+{
+cpp_set_lang (parse_in, iso ? CLK_CXX2B: CLK_GNUCXX2B);
+flag_no_gnu_keywords = iso;
+flag_no_nonansi_builtin = iso;
+flag_iso = iso;
+/* C++2b includes the C11 standard library.  */
+flag_isoc94 = 1;
+flag_isoc99 = 1;
+flag_isoc11 = 1;
+/* C++2b includes coroutines.  */
+flag_coroutines = true;
+cxx_dialect = cxx2b;
+lang_hooks.name = "GNU C++20"; /* Pretend C++20 until standardization.  */
+}
+
 /* Args to -d specify what to dump.  Silently ignore
unrecognized options; they may be aimed at toplev.c.  */
 static void
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 1766364806e..3464d72591b 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -2214,6 +2214,11 @@ std=c++20
 C++ ObjC++
 Conform to the ISO 2020 C++ draft standard (experimental and
incomplete support).

+std=c++2b
+C++ ObjC++
+Conform to the ISO 2023 (?) C++ draft standard (experimental and
+incomplete support).
+
 std=c11
 C ObjC
 Conform to the ISO 2011 C standard.
@@ -2292,6 +2297,11 @@ std=gnu++20
 C++ ObjC++
 Conform to the ISO 2020 C++ draft standard with GNU extensions
(experimental and incomplete support).

+std=gnu++2b
+C++ ObjC++
+Conform to the ISO 2023 (?) C++ draft standard with GNU extensions
(experimental
+and incomplete support).
+
 std=gnu11
 C ObjC
 Conform to the ISO 2011 C standard with GNU extensions.
diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi
index 25f2625d8bd..f801024affd 100644
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -1907,9 +1907,10 @@ selected, the value of the macro is
 @code{201103L} for the 2011 C++ standard,
 @code{201402L} for the 2014 C++ standard,
 @code{201703L} f

[PATCH] issue -Wstring-compare for member arrays (PR 98097)

2021-01-07 Thread Martin Sebor via Gcc-patches

In PR 98097 Richard expects -Wstring-compare for a call to strcmp()
with a member array and a string literal of larger size, used in
an equality test.

In virtually all cases the test will indicate the two are unequal
because the string stored in the member must be shorter (to fit
the terminating nul), but GCC doesn't fold the result because
there's wicked code out there that treats whole aggregates as if
they were strings, up their full size.  Because the warning is
based on the same conservative assumptions as the optimization,
it doesn't trigger, letting the almost certain bug go unnoticed.

The attached patch allows -Wstring-compare to trigger for these
bugs by partly decoupling the warning from the underlying strcmp
optimization.  Making this possible requires adding a new member
to the c_strlen_data struct, which in turn called for changing
the meaning of the existing decl member to nonstr.  That led to
changes elsewhere, simply to adjust to the name change.  For
the purposes of review, the meat of the warning changes is in
tree-ssa-strlen.c.  All the rest of changes simply adjust code
to the new name.

Tested on x86_64-linux (None of Binutils, GDB, Glibc, or Valgrind
triggers any instances of the warning with this change.)

Martin
PR middle-end/98097 - missing -Wstring-compare with a member array

gcc/ChangeLog:

	PR middle-end/98097
	* builtins.c (unterminated_array): Adjust to a name change.  Adjust
	indentation.
	(c_strlen): Use a member instead of a local variable.
	(expand_builtin_stpcpy_1): Adjust to a name change.
	(fold_builtin_strlen): Same.
	* builtins.h (struct c_strlen_data::nonstr): New data member to use
	instead of decl.
	 (struct c_strlen_data::decl): Adjust comment.
	* gimple-fold.c (get_range_strlen_tree): Set c_strlen_data::nonstr
	in addition to c_strlen_data::decl.
	(get_maxval_strlen): Adjust to a name change.
	(gimple_fold_builtin_stpcpy): Same.
	(gimple_fold_builtin_strlen): Same.
	* gimple-ssa-sprintf.c (get_string_length): Same.
	* tree-ssa-strlen.c (get_range_strlen_dynamic): Same.  Also set
	struct c_strlen_data::decl.
	(get_len_or_size): Use c_strlen_data::decl.  Succeed even for
	nonconstant member arrays.
	(strxcmp_eqz_result): Handle member arrays.
	(handle_builtin_string_cmp): Issue warnings for member arrays.

gcc/testsuite/ChangeLog:

	PR middle-end/98097
	* gcc.dg/Wstring-compare.c:
	* gcc.dg/strcmpopt_10.c:
	* gcc.dg/Wstring-compare-4.c: New test.
	* gcc.dg/Wstring-compare-5.c: New test.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index ffbb9b7f5f1..9b7a82153c8 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -1253,42 +1253,41 @@ check_nul_terminated_array (tree expr, tree src,
 tree
 unterminated_array (tree exp, tree *size /* = NULL */, bool *exact /* = NULL */)
 {
-  /* C_STRLEN will return NULL and set DECL in the info
- structure if EXP references a unterminated array.  */
+  /* C_STRLEN will return NULL and set LENDATA.NONSTR to the DECL
+ of the unterminated array if EXP references one.  */
   c_strlen_data lendata = { };
   tree len = c_strlen (exp, 1, &lendata);
-  if (len == NULL_TREE && lendata.minlen && lendata.decl)
- {
-   if (size)
+  if (len || !lendata.minlen || !lendata.nonstr)
+return NULL_TREE;
+
+  if (size)
+{
+  len = lendata.minlen;
+  if (lendata.off)
 	{
-	  len = lendata.minlen;
-	  if (lendata.off)
+	  /* Constant offsets are already accounted for in LENDATA.MINLEN,
+	 but not in a SSA_NAME + CST expression.  */
+	  if (TREE_CODE (lendata.off) == INTEGER_CST)
+	*exact = true;
+	  else if (TREE_CODE (lendata.off) == PLUS_EXPR
+		   && TREE_CODE (TREE_OPERAND (lendata.off, 1)) == INTEGER_CST)
 	{
-	  /* Constant offsets are already accounted for in LENDATA.MINLEN,
-		 but not in a SSA_NAME + CST expression.  */
-	  if (TREE_CODE (lendata.off) == INTEGER_CST)
-		*exact = true;
-	  else if (TREE_CODE (lendata.off) == PLUS_EXPR
-		   && TREE_CODE (TREE_OPERAND (lendata.off, 1)) == INTEGER_CST)
-		{
-		  /* Subtract the offset from the size of the array.  */
-		  *exact = false;
-		  tree temp = TREE_OPERAND (lendata.off, 1);
-		  temp = fold_convert (ssizetype, temp);
-		  len = fold_build2 (MINUS_EXPR, ssizetype, len, temp);
-		}
-	  else
-		*exact = false;
+	  /* Subtract the offset from the size of the array.  */
+	  *exact = false;
+	  tree temp = TREE_OPERAND (lendata.off, 1);
+	  temp = fold_convert (ssizetype, temp);
+	  len = fold_build2 (MINUS_EXPR, ssizetype, len, temp);
 	}
 	  else
-	*exact = true;
-
-	  *size = len;
+	*exact = false;
 	}
-   return lendata.decl;
- }
+  else
+	*exact = true;
 
-  return NULL_TREE;
+  *size = len;
+}
+
+  return lendata.nonstr;
 }
 
 /* Compute the length of a null-terminated character string or wide
@@ -1353,8 +1352,7 @@ c_strlen (tree arg, int only_value, c_strlen_data *data, unsigned eltsize)
   /* Offset from the beginning of the string in bytes.  */
   tree byteoff;
   tr

Re: [PATCH] c++: Fix access checking of scoped non-static member [PR98515]

2021-01-07 Thread Jason Merrill via Gcc-patches

On 1/7/21 5:47 PM, Patrick Palka wrote:

On Thu, 7 Jan 2021, Jason Merrill wrote:


On 1/6/21 1:19 PM, Patrick Palka wrote:

In the first testcase below, we incorrectly reject the use of the
protected non-static member A::var0 from C::g() because
check_accessibility_of_qualified_id, at template parse time, determines
that the access doesn't go through 'this'.  (This happens because the
dependent base B of C doesn't have a binfo object, so it appears
to DERIVED_FROM_P that A is not an indirect base of C.)  From there
we create the corresponding deferred access check, which we then
perform at instantiation time and which (unsurprisingly) fails.

The problem ultimately seems to be that we can't, in general, know
whether a use of a scoped non-static member goes through 'this' until
instantiation time, as the second testcase below demonstrates.  So this
patch makes check_accessibility_of_qualified_id punt in this situation.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK to
commit?

gcc/cp/ChangeLog:

PR c++/98515
* semantics.c (check_accessibility_of_qualified_id): Punt if
we're checking the access of a scoped non-static member at
class template parse time.

gcc/testsuite/ChangeLog:

PR c++/98515
* g++.dg/template/access32.C: New test.
* g++.dg/template/access33.C: New test.
---
   gcc/cp/semantics.c   | 20 +++-
   gcc/testsuite/g++.dg/template/access32.C |  8 
   gcc/testsuite/g++.dg/template/access33.C |  9 +
   3 files changed, 32 insertions(+), 5 deletions(-)
   create mode 100644 gcc/testsuite/g++.dg/template/access32.C
   create mode 100644 gcc/testsuite/g++.dg/template/access33.C

diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index b448efe024a..f52b2e4d1e7 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -2107,14 +2107,24 @@ check_accessibility_of_qualified_id (tree decl,
 /* If the reference is to a non-static member of the
 current class, treat it as if it were referenced through
 `this'.  */
-  tree ct;
 if (DECL_NONSTATIC_MEMBER_P (decl)
- && current_class_ptr
- && DERIVED_FROM_P (scope, ct = current_nonlambda_class_type ()))
-   qualifying_type = ct;
+ && current_class_ptr)
+   {
+ if (dependent_type_p (TREE_TYPE (current_class_ptr)))


This should also look at current_nonlambda_class_type.


Ah, ack.  But it seems to me we really only need to be checking
dependence of current_nonlambda_class_type here.


Yes, that's what I meant, sorry about the ambiguous use of "also".  :)


 IIUC, dependence of
these two types should coincide except in the case where we're inside a
generic lambda within a non-template class (in which case
current_class_ptr will dependent and current_nonlambda_class_type won't).
But in this case we have enough information to be able to resolve the
access check at parse time, I think (and so we shouldn't punt).

The below patch, which seems to pass 'make check-c++', checks the
dependence of current_nonlambda_class_type instead of that of
current_class_ptr.  Does this approach seem right?


OK.


-- >8 --

Subject: [PATCH] c++: Fix access checking of scoped non-static member
  [PR98515]

In the first testcase below, we incorrectly reject the use of the
protected non-static member A::var0 from C::g() because
check_accessibility_of_qualified_id, at template parse time, determines
that the access doesn't go through 'this'.  (This happens because the
dependent base B of C doesn't have a binfo object, so it appears
to DERIVED_FROM_P that A is not an indirect base of C.)  From there
we create the corresponding deferred access check, which we then
perform at instantiation time and which (unsurprisingly) fails.

The problem ultimately seems to be that we can't in general determine
whether a use of a scoped non-static member goes through 'this' until
instantiation time, as the second testcase below illustrates.  So this
patch makes check_accessibility_of_qualified_id punt in such situations
to avoid creating a bogus deferred access check.

gcc/cp/ChangeLog:

PR c++/98515
* semantics.c (check_accessibility_of_qualified_id): Punt if
we're checking access of a scoped non-static member inside a
class template.

gcc/testsuite/ChangeLog:

PR c++/98515
* g++.dg/template/access32.C: New test.
* g++.dg/template/access33.C: New test.
---
  gcc/cp/semantics.c   | 22 +-
  gcc/testsuite/g++.dg/template/access32.C |  8 
  gcc/testsuite/g++.dg/template/access33.C |  9 +
  3 files changed, 34 insertions(+), 5 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/template/access32.C
  create mode 100644 gcc/testsuite/g++.dg/template/access33.C

diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index b448efe024a..51f7c114b03 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/sema

[PATCH 0/4] Fixes for `const_double_zero' use + VAX iterator simplification

2021-01-07 Thread Maciej W. Rozycki
Hi,

 This patch series fixes an issue with `const_double_zero' use causing a 
build failure reported with the `pdp11-aout' target by Martin Liška: 

superseding an earlier proposal discussed here:
.

 I have decided to split the fix into pieces so as to have targets bits as 
separate changes in order to make possible backports easier.

 In the course of investigation I have noticed a VAX iterator attribute I 
have recently introduced duplicates the standard `mode' attribute 
unnecessarily so I have decided to remove it along with the fix; it has a 
syntactic dependency on 3/4, so it has to be a part of the series.

 See individual changes for detailed descriptions.

 These changes have been regression-tested with the `vax-netbsdelf' target 
and verified manually with the `pdp11-aout' target.  For 4/4 I have also
verified that no change has been made to target library code produced.

 OK to apply?

  Maciej


[PATCH 1/4] RTL: Update `const_double_zero' handling for mode and callable insns

2021-01-07 Thread Maciej W. Rozycki
Handle machine mode specification with `const_double_zero' and handle 
the rtx with callable code produced from named insns.  Complementing 
commit 20ab43b5cad6 ("RTL: Add `const_double_zero' syntactic rtx") and 
removing a commit c60d0736dff7 ("PDP11: Use `const_double_zero' to 
express double zero constant") build regression observed with the 
`pdp11-aout' target:

genemit: Internal error: abort in gen_exp, at genemit.c:202
make[2]: *** [Makefile:2427: s-emit] Error 1

where a:

(const_double 0 [0] 0 [0] 0 [0] 0 [0])

rtx coming from:

(parallel [
(set (reg:CC 16)
(compare:CC (abs:DF (match_operand:DF 1 ("general_operand") 
("0,0")))
(const_double 0 [0] 0 [0] 0 [0] 0 [0])))
(set (match_operand:DF 0 ("nonimmediate_operand") ("=fR,Q"))
(abs:DF (match_dup 1)))
])

and ultimately `(const_double_zero)' referred in a named RTL insn cannot 
be interpreted.  Handle the rtx then by supplying the constant 0 double 
operand requested, resulting in the following update to insn-emit.c code 
produced for the `pdp11-aout' target, relative to before the triggering 
commit:

@@ -1514,7 +1514,7 @@ gen_absdf2_cc (rtx operand0 ATTRIBUTE_UN
gen_rtx_COMPARE (CCmode,
gen_rtx_ABS (DFmode,
operand1),
-   const0_rtx)),
+   CONST_DOUBLE_ATOF ("0", VOIDmode))),
gen_rtx_SET (operand0,
gen_rtx_ABS (DFmode,
copy_rtx (operand1);
@@ -1555,7 +1555,7 @@ gen_negdf2_cc (rtx operand0 ATTRIBUTE_UN
gen_rtx_COMPARE (CCmode,
gen_rtx_NEG (DFmode,
operand1),
-   const0_rtx)),
+   CONST_DOUBLE_ATOF ("0", VOIDmode))),
gen_rtx_SET (operand0,
gen_rtx_NEG (DFmode,
copy_rtx (operand1);
@@ -1790,7 +1790,7 @@ gen_muldf3_cc (rtx operand0 ATTRIBUTE_UN
gen_rtx_MULT (DFmode,
operand1,
operand2),
-   const0_rtx)),
+   CONST_DOUBLE_ATOF ("0", VOIDmode))),
gen_rtx_SET (operand0,
gen_rtx_MULT (DFmode,
copy_rtx (operand1),
@@ -1942,7 +1942,7 @@ gen_divdf3_cc (rtx operand0 ATTRIBUTE_UN
gen_rtx_DIV (DFmode,
operand1,
operand2),
-   const0_rtx)),
+   CONST_DOUBLE_ATOF ("0", VOIDmode))),
gen_rtx_SET (operand0,
gen_rtx_DIV (DFmode,
copy_rtx (operand1),

This does not (yet) remove VOIDmode CONST_DOUBLE use, as it is up to 
individual machine descriptions to choose.

gcc/
* genemit.c (gen_exp) : Handle `const_double_zero' 
rtx.
* read-rtl.c (rtx_reader::read_rtx_code): Handle machine mode 
with `const_double_zero'.
* doc/rtl.texi (Constant Expression Types): Document it.
---
 gcc/doc/rtl.texi |4 ++--
 gcc/genemit.c|8 
 gcc/read-rtl.c   |9 +
 3 files changed, 19 insertions(+), 2 deletions(-)

gcc-const-double-zero-mode.diff
Index: gcc/gcc/doc/rtl.texi
===
--- gcc.orig/gcc/doc/rtl.texi
+++ gcc/gcc/doc/rtl.texi
@@ -1719,13 +1719,13 @@ of code @code{const_double} and therefor
 provided:
 
 @smallexample
-(const_double_zero)
+(const_double_zero:@var{m})
 @end smallexample
 
 standing for:
 
 @smallexample
-(const_double 0 0 @dots{})
+(const_double:@var{m} 0 0 @dots{})
 @end smallexample
 
 for matching the floating-point value zero, possibly the only useful one.
Index: gcc/gcc/genemit.c
===
--- gcc.orig/gcc/genemit.c
+++ gcc/gcc/genemit.c
@@ -195,6 +195,14 @@ gen_exp (rtx x, enum rtx_code subroutine
   return;
 
 case CONST_DOUBLE:
+  /* Handle `const_double_zero' rtx.  */
+  if (CONST_DOUBLE_REAL_VALUE (x)->cl == rvc_zero)
+   {
+ printf ("CONST_DOUBLE_ATOF (\"0\", %smode)",
+ GET_MODE_NAME (GET_MODE (x)));
+ return;
+   }
+  /* Fall through.  */
 case CONST_FIXED:
 case CONST_WIDE_INT:
   /* These shouldn't be written in MD files.  Instead, the appropriate
Index: gcc/gcc/read-rtl.c
===
--- gcc.orig/gcc/read-rtl.c
+++ gcc/gcc/read-rtl.c
@@ -1658,6 +1658,15 @@ rtx_reader::read_rtx_code (const char *c
   return_rtx = rtx_alloc (code);
   memset (return_rtx, 0, RTX_CODE_SIZE (code));
   PUT_CODE (return_rtx, code);
+  c = read_skip_spaces ();
+  if (c == ':')
+   {
+ file_location loc = read_name (&name);
+ record_potential_iterator_use (&modes, loc, return_rtx, 0,
+name.string);
+   }
+  else
+   unread_char (c);
   return return_rtx;
 }
 


[PATCH 2/4] PDP11: Use a mode with `const_double_zero' expressions

2021-01-07 Thread Maciej W. Rozycki
For predictable semantics propagate the mode from operands referred by 
FP substitutions to the `const_double_zero' expressions used with the 
associated condition code calculation, resulting in the following update 
to insn-emit.c code produced for the `pdp11-aout' target (with machine 
description line numbering change noise removed):

@@ -1514,7 +1514,7 @@
gen_rtx_COMPARE (CCmode,
gen_rtx_ABS (DFmode,
operand1),
-   CONST_DOUBLE_ATOF ("0", VOIDmode))),
+   CONST_DOUBLE_ATOF ("0", DFmode))),
gen_rtx_SET (operand0,
gen_rtx_ABS (DFmode,
copy_rtx (operand1);
@@ -1555,7 +1555,7 @@
gen_rtx_COMPARE (CCmode,
gen_rtx_NEG (DFmode,
operand1),
-   CONST_DOUBLE_ATOF ("0", VOIDmode))),
+   CONST_DOUBLE_ATOF ("0", DFmode))),
gen_rtx_SET (operand0,
gen_rtx_NEG (DFmode,
copy_rtx (operand1);
@@ -1790,7 +1790,7 @@
gen_rtx_MULT (DFmode,
operand1,
operand2),
-   CONST_DOUBLE_ATOF ("0", VOIDmode))),
+   CONST_DOUBLE_ATOF ("0", DFmode))),
gen_rtx_SET (operand0,
gen_rtx_MULT (DFmode,
copy_rtx (operand1),
@@ -1942,7 +1942,7 @@
gen_rtx_DIV (DFmode,
operand1,
operand2),
-   CONST_DOUBLE_ATOF ("0", VOIDmode))),
+   CONST_DOUBLE_ATOF ("0", DFmode))),
gen_rtx_SET (operand0,
gen_rtx_DIV (DFmode,
copy_rtx (operand1),

Provide a new iterator to provide copies of FP substitutions across the 
FP modes supported as the substitutions now need to match the mode of 
the operands.

gcc/
* config/pdp11/pdp11.md (PDPfp): New mode iterator.
(fcc_cc, fcc_ccnz): Use it.  Add mode to `const_double_zero' and 
operands.
---
 gcc/config/pdp11/pdp11.md |   10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

gcc-pdp11-const-double-zero-mode.diff
Index: gcc/gcc/config/pdp11/pdp11.md
===
--- gcc.orig/gcc/config/pdp11/pdp11.md
+++ gcc/gcc/config/pdp11/pdp11.md
@@ -82,6 +82,8 @@
 
 (define_code_iterator SHF [ashift ashiftrt lshiftrt])
 
+(define_mode_iterator PDPfp [SF DF])
+
 ;; Substitution to turn a CC clobber into a CC setter.  We have four of
 ;; these: for CCmode vs. CCNZmode, and for CC_REGNUM vs. FCC_REGNUM.
 (define_subst "cc_cc"
@@ -101,19 +103,19 @@
(set (match_dup 0) (match_dup 1))])
 
 (define_subst "fcc_cc"
-  [(set (match_operand 0 "") (match_operand 1 ""))
+  [(set (match_operand:PDPfp 0 "") (match_operand:PDPfp 1 ""))
(clobber (reg FCC_REGNUM))]
   ""
   [(set (reg:CC FCC_REGNUM)
-   (compare:CC (match_dup 1) (const_double_zero)))
+   (compare:CC (match_dup 1) (const_double_zero:PDPfp)))
(set (match_dup 0) (match_dup 1))])
 
 (define_subst "fcc_ccnz"
-  [(set (match_operand 0 "") (match_operand 1 ""))
+  [(set (match_operand:PDPfp 0 "") (match_operand:PDPfp 1 ""))
(clobber (reg FCC_REGNUM))]
   ""
   [(set (reg:CCNZ FCC_REGNUM)
-   (compare:CCNZ (match_dup 1) (const_double_zero)))
+   (compare:CCNZ (match_dup 1) (const_double_zero:PDPfp)))
(set (match_dup 0) (match_dup 1))])
 
 (define_subst_attr "cc_cc" "cc_cc" "_nocc" "_cc")


[PATCH 3/4] VAX: Use a mode with `const_double_zero' expressions

2021-01-07 Thread Maciej W. Rozycki
For predictable semantics propagate the mode from operands referred by 
the FP substitution to the `const_double_zero' expressions used with the 
associated condition code calculation.  Use an iterator to make copies 
of the FP substitution across the FP modes supported as the substitution 
now has to match the mode of the operands.

gcc/
* config/pdp11/pdp11.md (subst_f): Add mode to operands and 
`const_double_zero'.
---
 gcc/config/vax/vax.md |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

gcc-vax-const-double-zero-mode.diff
Index: gcc/gcc/config/vax/vax.md
===
--- gcc.orig/gcc/config/vax/vax.md
+++ gcc/gcc/config/vax/vax.md
@@ -79,13 +79,13 @@
(match_dup 1))])
 
 (define_subst "subst_f"
-  [(set (match_operand 0 "")
-   (match_operand 1 ""))
+  [(set (match_operand:VAXfp 0 "")
+   (match_operand:VAXfp 1 ""))
(clobber (reg:CC VAX_PSL_REGNUM))]
   ""
   [(set (reg:VAXccnz VAX_PSL_REGNUM)
(compare:VAXccnz (match_dup 1)
-(const_double_zero)))
+(const_double_zero:VAXfp)))
(set (match_dup 0)
(match_dup 1))])
 


[PATCH 4/4] VAX: Remove a duplicate `cc' mode attribute

2021-01-07 Thread Maciej W. Rozycki
Remove the `cc' mode attribute that duplicates the implicitly defined 
`mode' attribute.  No change to semantics.

gcc/
* config/vax/vax.md (cc): Remove mode attribute.
(subst_, subst_f): Rename to...
(subst_, subst_f): ... these respectively.
(*cbranch4_): Update for `cc' removal.
(*cbranch4_): Likewise.
(*branch_, *branch__reversed): Likewise.
---
 gcc/config/vax/vax.md |   13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

gcc-vax-cc-mode.diff
Index: gcc/gcc/config/vax/vax.md
===
--- gcc.orig/gcc/config/vax/vax.md
+++ gcc/gcc/config/vax/vax.md
@@ -58,7 +58,6 @@
 
 (define_mode_iterator VAXcc [CC CCN CCNZ CCZ])
 (define_mode_iterator VAXccnz [CCN CCNZ CCZ])
-(define_mode_attr cc [(CC "cc") (CCN "ccn") (CCNZ "ccnz") (CCZ "ccz")])
 
 (define_code_iterator any_extract [sign_extract zero_extract])
 
@@ -67,7 +66,7 @@
 (include "predicates.md")
 
 ;; Make instructions that set the N, N+Z, and Z condition codes respectively.
-(define_subst "subst_"
+(define_subst "subst_"
   [(set (match_operand 0 "")
(match_operand 1 ""))
(clobber (reg:CC VAX_PSL_REGNUM))]
@@ -78,7 +77,7 @@
(set (match_dup 0)
(match_dup 1))])
 
-(define_subst "subst_f"
+(define_subst "subst_f"
   [(set (match_operand:VAXfp 0 "")
(match_operand:VAXfp 1 ""))
(clobber (reg:CC VAX_PSL_REGNUM))]
@@ -2174,7 +2173,7 @@
 (define_insn_and_split "*cbranch4_"
   [(set (pc)
(if_then_else
- (match_operator 0 "vax__comparison_operator"
+ (match_operator 0 "vax__comparison_operator"
  [(match_operand:VAXint 1 "general_operand" "nrmT")
   (match_operand:VAXint 2 "general_operand" "nrmT")])
  (label_ref (match_operand 3 "" ""))
@@ -2206,7 +2205,7 @@
 (define_insn_and_split "*cbranch4_"
   [(set (pc)
(if_then_else
- (match_operator 0 "vax__comparison_operator"
+ (match_operator 0 "vax__comparison_operator"
  [(match_operand:VAXfp 1 "general_operand" "gF")
   (match_operand:VAXfp 2 "general_operand" "gF")])
  (label_ref (match_operand 3 "" ""))
@@ -2226,7 +2225,7 @@
 
 (define_insn "*branch_"
   [(set (pc)
-   (if_then_else (match_operator 0 "vax__comparison_operator"
+   (if_then_else (match_operator 0 "vax__comparison_operator"
  [(reg:VAXcc VAX_PSL_REGNUM)
   (const_int 0)])
  (label_ref (match_operand 1 "" ""))
@@ -2237,7 +2236,7 @@
 ;; Recognize reversed jumps.
 (define_insn "*branch__reversed"
   [(set (pc)
-   (if_then_else (match_operator 0 "vax__comparison_operator"
+   (if_then_else (match_operator 0 "vax__comparison_operator"
  [(reg:VAXcc VAX_PSL_REGNUM)
   (const_int 0)])
  (pc)


Re: [PATCH] genemit: Handle `const_double_zero' rtx

2021-01-07 Thread Maciej W. Rozycki
On Wed, 6 Jan 2021, Richard Sandiford wrote:

> VOIDmode const_doubles should only be used for integers that cannot
> be expressed as a sign-extended HOST_WIDE_INT.  So (const_double 0 0)
> is an invalid rtx in both integer and FP contexts.

 The updated change makes the VAX and PDP-11 backends stop producing 
those, but as I noted this is IMHO tangential to the choice between 
CONST0_RTX and CONST_DOUBLE_ATOF for code generators produced from named 
insns.  And the latter does not interpret the mode, i.e. does not enforce 
the policy (which itself I don't argue against), leaving it up to people 
writing machine descriptions to get it right, which I think is the right 
place.

> (FTR, the constant should also be the second operand to the plus,
> but that's obviously tangential.)

 Also FTR, I made it the first one deliberately, as I made my experimental 
change across all the four basic arithmetic operations, so for obvious 
reasons I wanted to prevent a divisor from being 0 lest the middle end get 
anxious about it.  Also contrary to documentation the middle end does 
present constants as the first operand in some cases where it is not 
supposed to (and does insist on doing so even if the backend says it's 
more expensive), as it was previously discussed in the context of COMPARE.

 Actually the VAX machine operand syntax is generic enough you can have an 
immediate as any operand, and in particular the first input to binary 
operations -- e.g. the minuend, and obviously also the first addend -- in 
their three-operand encodings; all the four basic arithmetic operations 
have them.

 I have posted the updated change as a patch series now, including you in 
the list of the recipients in case you wanted to chime in.  I appreciate 
your feedback and experience in this area, thank you!

  Maciej


Re: [PATCH] i386: Merge various insn name mapping code attributes

2021-01-07 Thread Hongtao Liu via Gcc-patches
On Thu, Jan 7, 2021 at 9:43 PM Uros Bizjak via Gcc-patches
 wrote:
>
> 2021-01-07  Uroš Bizjak  
>
> No functional changes.
>
> gcc/
> * config/i386/i386.md (insn): Merge from plusminus_insn, shift_insn,
> rotate_insn and optab code attributes.
> Update all uses to merged code attribute.
> * config/i386/sse.md: Update all uses to merged code attribute.
> * config/i386/mmx.md: Update all uses to merged code attribute.
>
> Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
>
> Uros.

BTW, I'm thinking of putting all define_mode/code/int_iterator(also
corresponding attribute) into a new file name "iterators.md".
Also do some refactoring for those iterators, some of them seem duplicated.


-- 
BR,
Hongtao


Re: [PATCH] Fix array-quals-1.c for RISC-V

2021-01-07 Thread Kito Cheng via Gcc-patches
Committed

On Fri, Jan 8, 2021 at 3:29 AM Jim Wilson  wrote:
>
> On Wed, Jan 6, 2021 at 1:17 AM Kito Cheng  wrote:
>
> > RISC-V will put those variable on srodata rather than rodata.
> > gcc/testsuite/ChangeLog:
> > * gcc.dg/array-quals-1.c: Allow srodata.
> >
>
> OK.
>
> Jim


Re: [PATCH v2 0/2] RISC-V: Introduce new architecture extension test macros

2021-01-07 Thread Kito Cheng via Gcc-patches
Committed, thanks :)

On Fri, Jan 8, 2021 at 3:49 AM Jim Wilson  wrote:
>
> On Thu, Jan 7, 2021 at 1:55 AM Kito Cheng  wrote:
>
> > This patch set introduce new set of architecture extension test macros
> > which is accept on riscv-c-api-doc[1] recently.
> >
> > The motivation of this scheme is have an unify naming scheme for
> > extension macro and add the capability to checking version.
> >
> > V2 Changes:
> > - Fix MacOS build issue.
> > - Create new header file: riscv-subset.h
> >
>
> This patch series looks good to me.
>
> Jim


[PATCH v2] aarch64: Add cpu cost tables for A64FX

2021-01-07 Thread Qian Jianhua
This patch add cost tables for A64FX.

ChangeLog:
2021-01-08 Qian jianhua 

gcc/
* config/aarch64/aarch64-cost-tables.h (a64fx_extra_costs): New.
* config/aarch64/aarch64.c (a64fx_addrcost_table): New.
(a64fx_regmove_cost, a64fx_vector_cost): New.
(a64fx_tunings): Use the new added cost tables.

Test Results:
* Bootstrap on aarch64 --- [OK]
* Regression tests --- [OK]
* Compile with -mcpu=a64fx --- [OK]

Regards!
---
 gcc/config/aarch64/aarch64-cost-tables.h | 103 +++
 gcc/config/aarch64/aarch64.c |  72 +++-
 2 files changed, 171 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cost-tables.h 
b/gcc/config/aarch64/aarch64-cost-tables.h
index 8a98bf4278c..c6805717f6e 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -541,4 +541,107 @@ const struct cpu_cost_table tsv110_extra_costs =
   }
 };
 
+const struct cpu_cost_table a64fx_extra_costs =
+{
+  /* ALU */
+  {
+0, /* arith.  */
+0, /* logical.  */
+0, /* shift.  */
+0, /* shift_reg.  */
+COSTS_N_INSNS (1), /* arith_shift.  */
+COSTS_N_INSNS (1), /* arith_shift_reg.  */
+COSTS_N_INSNS (1), /* log_shift.  */
+COSTS_N_INSNS (1), /* log_shift_reg.  */
+0, /* extend.  */
+COSTS_N_INSNS (1), /* extend_arith.  */
+0, /* bfi.  */
+0, /* bfx.  */
+0, /* clz.  */
+0, /* rev.  */
+0, /* non_exec.  */
+true   /* non_exec_costs_exec.  */
+  },
+  {
+/* MULT SImode */
+{
+  COSTS_N_INSNS (4),   /* simple.  */
+  COSTS_N_INSNS (4),   /* flag_setting.  */
+  COSTS_N_INSNS (4),   /* extend.  */
+  COSTS_N_INSNS (5),   /* add.  */
+  COSTS_N_INSNS (5),   /* extend_add.  */
+  COSTS_N_INSNS (18)   /* idiv.  */
+},
+/* MULT DImode */
+{
+  COSTS_N_INSNS (4),   /* simple.  */
+  0,   /* flag_setting (N/A).  */
+  COSTS_N_INSNS (4),   /* extend.  */
+  COSTS_N_INSNS (5),   /* add.  */
+  COSTS_N_INSNS (5),   /* extend_add.  */
+  COSTS_N_INSNS (26)   /* idiv.  */
+}
+  },
+  /* LD/ST */
+  {
+COSTS_N_INSNS (4), /* load.  */
+COSTS_N_INSNS (4), /* load_sign_extend.  */
+COSTS_N_INSNS (5), /* ldrd.  */
+COSTS_N_INSNS (4), /* ldm_1st.  */
+1, /* ldm_regs_per_insn_1st.  */
+2, /* ldm_regs_per_insn_subsequent.  */
+COSTS_N_INSNS (4), /* loadf.  */
+COSTS_N_INSNS (4), /* loadd.  */
+COSTS_N_INSNS (5), /* load_unaligned.  */
+0, /* store.  */
+0, /* strd.  */
+0, /* stm_1st.  */
+1, /* stm_regs_per_insn_1st.  */
+2, /* stm_regs_per_insn_subsequent.  */
+0, /* storef.  */
+0, /* stored.  */
+0, /* store_unaligned.  */
+COSTS_N_INSNS (1), /* loadv.  */
+COSTS_N_INSNS (1)  /* storev.  */
+  },
+  {
+/* FP SFmode */
+{
+  COSTS_N_INSNS (6),  /* div.  */
+  COSTS_N_INSNS (1),   /* mult.  */
+  COSTS_N_INSNS (1),   /* mult_addsub.  */
+  COSTS_N_INSNS (2),   /* fma.  */
+  COSTS_N_INSNS (1),   /* addsub.  */
+  COSTS_N_INSNS (1),   /* fpconst.  */
+  COSTS_N_INSNS (1),   /* neg.  */
+  COSTS_N_INSNS (1),   /* compare.  */
+  COSTS_N_INSNS (2),   /* widen.  */
+  COSTS_N_INSNS (2),   /* narrow.  */
+  COSTS_N_INSNS (2),   /* toint.  */
+  COSTS_N_INSNS (2),   /* fromint.  */
+  COSTS_N_INSNS (2)/* roundint.  */
+},
+/* FP DFmode */
+{
+  COSTS_N_INSNS (11),  /* div.  */
+  COSTS_N_INSNS (1),   /* mult.  */
+  COSTS_N_INSNS (1),   /* mult_addsub.  */
+  COSTS_N_INSNS (2),   /* fma.  */
+  COSTS_N_INSNS (1),   /* addsub.  */
+  COSTS_N_INSNS (1),   /* fpconst.  */
+  COSTS_N_INSNS (1),   /* neg.  */
+  COSTS_N_INSNS (1),   /* compare.  */
+  COSTS_N_INSNS (2),   /* widen.  */
+  COSTS_N_INSNS (2),   /* narrow.  */
+  COSTS_N_INSNS (2),   /* toint.  */
+  COSTS_N_INSNS (2),   /* fromint.  */
+  COSTS_N_INSNS (2)/* roundint.  */
+}
+  },
+  /* Vector */
+  {
+COSTS_N_INSNS (1)  /* alu.  */
+  }
+};
+
 #endif
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 70ddd70556f..0bab5a3f3b8 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aar