Re: [PATCH 2/2] doc/implement-c.texi: About same-as-scalar-type volatile aggregate accesses, PR94600

2020-07-07 Thread Richard Biener via Gcc-patches
On Tue, Jul 7, 2020 at 6:03 AM Hans-Peter Nilsson via Gcc-patches
 wrote:
>
> We say very little about reads and writes to aggregate /
> compound objects, just scalar objects (i.e. assignments don't
> cause reads).  Let's lets say something safe about aggregate
> objects, but only for those that are the same size as a scalar
> type.
>
> There's an equal-sounding section (Volatiles) in extend.texi,
> but this seems a more appropriate place, as specifying the
> behavior of a standard qualifier.

Hmm, might be true only up to word-mode size, not, say, __int128_t.
Also very likely only in case the object has the same alignment
as the naturally aligned integer type.

Richard.

> gcc:
> PR middle-end/94600
> * doc/implement-c.texi (Qualifiers implementation): Add blurb
> about access to the whole of a volatile aggregate object, only for
> same-size as scalar object.
> ---
>  gcc/doc/implement-c.texi | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/gcc/doc/implement-c.texi b/gcc/doc/implement-c.texi
> index 692297b69c4..d64922b28ad 100644
> --- a/gcc/doc/implement-c.texi
> +++ b/gcc/doc/implement-c.texi
> @@ -576,6 +576,10 @@ are of scalar types, the expression is interpreted by 
> GCC as a read of
>  the volatile object; in the other cases, the expression is only evaluated
>  for its side effects.
>
> +When an object of aggregate type has the same size as a scalar type, GCC
> +handles an access to the whole of that volatile aggregate type object
> +equal to an access to that volatile same-sized scalar type object.
> +
>  @end itemize
>
>  @node Declarators implementation
> --
> 2.11.0
>


Re: [PATCH 1/2] PR94600: fix volatile access to the whole of a compound object.

2020-07-07 Thread Richard Biener via Gcc-patches
On Tue, Jul 7, 2020 at 6:03 AM Hans-Peter Nilsson via Gcc-patches
 wrote:
>
> The store to the whole of each volatile object was picked apart
> like there had been an individual assignment to each of the
> fields.  Reads were added as part of that; see PR for details.
> The reads from volatile memory were a clear bug; individual
> stores questionable.  A separate patch clarifies the docs.
>
> Tested x86_64-linux, powerpc64le-linux and cris-elf.
> Ok to commit?  Backport to gcc-10?

OK for both.

Thanks,
Richard.

> gcc:
> PR middle-end/94600
> * expr.c (expand_constructor): Make a temporary also if we're
> storing to volatile memory.
>
> gcc/testsuite:
> PR middle-end/94600
> * gcc.dg/pr94600-1.c, gcc.dg/pr94600-2.c, gcc.dg/pr94600-3.c,
> gcc.dg/pr94600-4.c, gcc.dg/pr94600-5.c, gcc.dg/pr94600-6.c,
> gcc.dg/pr94600-7.c, gcc.dg/pr94600-8.c: New tests.
> ---
>  gcc/expr.c   |  5 -
>  gcc/testsuite/gcc.dg/pr94600-1.c | 36 
>  gcc/testsuite/gcc.dg/pr94600-2.c | 34 ++
>  gcc/testsuite/gcc.dg/pr94600-3.c | 35 +++
>  gcc/testsuite/gcc.dg/pr94600-4.c | 34 ++
>  gcc/testsuite/gcc.dg/pr94600-5.c | 34 ++
>  gcc/testsuite/gcc.dg/pr94600-6.c | 33 +
>  gcc/testsuite/gcc.dg/pr94600-7.c | 33 +
>  gcc/testsuite/gcc.dg/pr94600-8.c | 33 +
>  9 files changed, 276 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr94600-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr94600-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr94600-3.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr94600-4.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr94600-5.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr94600-6.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr94600-7.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr94600-8.c
>
> diff --git a/gcc/expr.c b/gcc/expr.c
> index 3c68b0d754c..44ea577e03d 100644
> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -8379,7 +8379,10 @@ expand_constructor (tree exp, rtx target, enum 
> expand_modifier modifier,
>/* Handle calls that pass values in multiple non-contiguous
>   locations.  The Irix 6 ABI has examples of this.  */
>if (target == 0 || ! safe_from_p (target, exp, 1)
> -  || GET_CODE (target) == PARALLEL || modifier == EXPAND_STACK_PARM)
> +  || GET_CODE (target) == PARALLEL || modifier == EXPAND_STACK_PARM
> +  /* Also make a temporary if the store is to volatile memory, to
> +avoid individual accesses to aggregate members.  */
> +  || (GET_CODE (target) == MEM && MEM_VOLATILE_P (target)))
>  {
>if (avoid_temp_mem)
> return NULL_RTX;
> diff --git a/gcc/testsuite/gcc.dg/pr94600-1.c 
> b/gcc/testsuite/gcc.dg/pr94600-1.c
> new file mode 100644
> index 000..b5913a0939c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr94600-1.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target size32plus } */
> +/* { dg-options "-fdump-rtl-final -O2" } */
> +
> +/* Assignments to a whole struct of suitable size (32 bytes) must not be
> +   picked apart into field accesses. */
> +
> +typedef struct {
> +  unsigned int f0 : 4;
> +  unsigned int f1 : 11;
> +  unsigned int f2 : 10;
> +  unsigned int f3 : 7;
> +} t0;
> +
> +static t0 a0[] = {
> + { .f0 = 7, .f1 = 99, .f3 = 1, },
> + { .f0 = 7, .f1 = 251, .f3 = 1, },
> + { .f0 = 8, .f1 = 127, .f3 = 5, },
> + { .f0 = 5, .f1 = 1, .f3 = 1, },
> + { .f0 = 5, .f1 = 1, .f3 = 1, },
> + { .f0 = 5, .f1 = 1, .f3 = 1, },
> +};
> +
> +void
> +foo(void)
> +{
> +  __SIZE_TYPE__ i;
> +  __SIZE_TYPE__ base = 0x000a;
> +  for (i = 0; i < (sizeof (a0) / sizeof ((a0)[0])); i++) {
> +*(volatile t0 *) (base + 44 + i * 4) = a0[i];
> +  }
> +}
> +
> +/* The only volatile accesses should be the obvious writes.  */
> +/* { dg-final { scan-rtl-dump-times {\(mem/v} 6 "final" } } */
> +/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 6 "final" } } */
> diff --git a/gcc/testsuite/gcc.dg/pr94600-2.c 
> b/gcc/testsuite/gcc.dg/pr94600-2.c
> new file mode 100644
> index 000..cb96cc98a2d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr94600-2.c
> @@ -0,0 +1,34 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target size32plus } */
> +/* { dg-options "-fdump-rtl-final -O2" } */
> +
> +/* Unrolled version of pr94600-1.c. */
> +
> +typedef struct {
> +  unsigned int f0 : 4;
> +  unsigned int f1 : 11;
> +  unsigned int f2 : 10;
> +  unsigned int f3 : 7;
> +} t0;
> +
> +void
> +bar(void)
> +{
> +  t0 a00 = { .f0 = 7, .f1 = 99, .f3 = 1, };
> +  t0 a01 = { .f0 = 7, .f1 = 251, .f3 = 1, };
> +  t0 a02 = { .f0 = 8, .f1 = 127, .f3 = 5, };
> +  t0 a03 = { .f0 = 5, .f1 = 1, .f3 = 1, };
> +  t0 a04 = { .f0 = 5, .f1 = 1, .f3 = 1, };
> +  t0 a05 = { .f0 = 5, .f1

[PATCH] RISC-V: Disable remove unneeded save-restore call optimization if there are any arguments on stack.

2020-07-07 Thread Kito Cheng
 - This optimization will adjust stack, but it not check/update other
   stack pointer use-site, the example is when the arguments put on
   stack, the offset become wrong after optimization.

 - However adjust stack frame usage after register allocation could be
   error prone, so we decide to turn off this optimization for such case.

 - Ye-Ting Kuo report this issue on github:
   https://github.com/riscv/riscv-gcc/pull/192

gcc/ChangeLog:

* config/riscv/riscv-sr.c (riscv_remove_unneeded_save_restore_calls):
Abort if any arguments on stack.

gcc/testsuite/ChangeLog

* gcc.target/riscv/save-restore-9.c: New.
---
 gcc/config/riscv/riscv-sr.c   |  6 +
 .../gcc.target/riscv/save-restore-9.c | 23 +++
 2 files changed, 29 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/save-restore-9.c

diff --git a/gcc/config/riscv/riscv-sr.c b/gcc/config/riscv/riscv-sr.c
index 9af50ef028e3..694f90c1583a 100644
--- a/gcc/config/riscv/riscv-sr.c
+++ b/gcc/config/riscv/riscv-sr.c
@@ -244,6 +244,12 @@ check_for_no_return_call (rtx_insn *prologue)
 void
 riscv_remove_unneeded_save_restore_calls (void)
 {
+  /* We'll adjust stack size after this optimization, that require update every
+ sp use site, which could be unsafe, so we decide to turn off this
+ optimization if there are any arguments put on stack.  */
+  if (crtl->args.size != 0)
+return;
+
   /* Will point to the first instruction of the function body, after the
  prologue end note.  */
   rtx_insn *body = NULL;
diff --git a/gcc/testsuite/gcc.target/riscv/save-restore-9.c 
b/gcc/testsuite/gcc.target/riscv/save-restore-9.c
new file mode 100644
index ..2567daeb376b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/save-restore-9.c
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msave-restore" } */
+
+int
+__attribute__((noinline,noclone))
+foo (int u)
+{
+  return u + 1;
+}
+
+int
+__attribute__((noinline,noclone))
+bar (int a, int b, int c, int d, int e, int f, int g, int h, int u)
+{
+  return foo (u);
+}
+
+int main()
+{
+  if (bar (1, 2, 3, 4, 5, 6, 7, 8, 9) != 10)
+__builtin_abort();
+  return 0;
+}
-- 
2.27.0



Re: [PATCH] libgomp: Add OMPD process functions and datatypes.

2020-07-07 Thread Jakub Jelinek via Gcc-patches
On Fri, Jul 03, 2020 at 10:43:55PM -0400, y2s1982 via Gcc-patches wrote:
> This patch adds OMPD functions defined in 5.5.2 of the OpenMP 5.0 API
> documentation. It adds per-process and per-device functions, defines
> related handle data types, and adds a helper function for storing device
> id.
> 
> 2020-07-03  Tony Sim  
> 
> libgomp/ChangeLog:
> 
>   * Makefile.am (libgompd_la_OBJECTS): Add ompd-proc.c and
>ompd-helper.c

Formatting.  If ompd-helper.c doesn't fit, then it would go below
the * on next line (i.e. a single tab indented).
And period at the end.  But I think it still fits.
* Makefile.am (libgompd_la_OBJECTS): Add ompd-proc.c and ompd-helper.c.

> +ompd_rc_t
> +gompd_get_gompd_device_id (void *input_id, ompd_size_t id_size,
> +_gompd_device_id *output_id)

Identifiers starting with underscore are reserved for implementation, so
better avoid them if they are private to the implementation.

> +  switch (id_size)
> +{
> +case 1:
> +  *output_id = (_gompd_device_id) *((__UINT8_TYPE__ *) input_id);
> +  break;
> +case 2:
> +  *output_id = (_gompd_device_id) *((__UINT16_TYPE__ *) input_id);

I have no idea what this function is doing, but e.g. from aliasing point of
view trying to access something as short/int/long long is dangerous, and
there might be alignment implications too.

> --- /dev/null
> +++ b/libgomp/ompd-helper.h
> @@ -0,0 +1,37 @@
> +/* Copyright (C) 2020 Free Software Foundation, Inc.
> +   Contributed by Yoosuk Sim .
> +
> +   This file is part of the GNU Offloading and Multi Processing Library
> +   (libgomp).
> +
> +   Libgomp is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.
> +
> +   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
> +   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> +   more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   .  */
> +
> +/* This header declares helper functions of OMPD library.  */

IMHO you don't need hundreds of headers, putting most OMPD related private
stuff in libgompd.h should be enough.

> +ompd_rc_t gompd_get_gompd_device_id (void *, ompd_size_t, _gompd_device_id *)
> + __GOMPD_NOTHROW;

The __GOMPD_NOTHROW; should be indented by 2 spaces from line start if it
doesn't fit after the ).

> +ompd_rc_t
> +ompd_process_initialize (ompd_address_space_context_t *context,
> +  ompd_address_space_handle_t **handle)
> +{
> +  ompd_rc_t ret = handle ? ompd_rc_ok : ompd_rc_stale_handle;
> +  if (ret != ompd_rc_ok)
> +return ret;
> +
> +  ret = context ? ompd_rc_ok : ompd_rc_bad_input;

Why so complicated?  Just do:
  ompd_rc_t ret;

  if (handle == NULL)
return ompd_rc_stale_handle;
  if (context == NULL)
return ompd_rc_bad_input;


> +ompd_rc_t
> +ompd_rel_address_space_handle (ompd_address_space_handle_t *handle)
> +{
> +  ompd_rc_t ret = handle && handle->context
> +   ? ompd_rc_ok : ompd_rc_stale_handle;
> +  if (ret != ompd_rc_ok)
> +return ret;
> +
> +  ret = handle->ref_count == 0 ? ompd_rc_ok : ompd_rc_unavailable;
> +  if (ret != ompd_rc_ok)
> +return ret;

Similarly.

> --- /dev/null
> +++ b/libgomp/ompd-types.h

ompd-types.h is an installed header I think, so Makefile.am should install
it.

Jakub



[PATCH] Fix typo in the document of GCC Internals

2020-07-07 Thread Qian Jianhua
Hi

This patch fixes a typo in the document of GCC Internals.

---
 gcc/doc/generic.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index 827c4232aef..fb98727928a 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -886,7 +886,7 @@ This lookup table needs to be kept up to date with the tree 
structure
 hierarchy, or else checking and containment macros will fail
 inappropriately.
 
-For language specific @code{DECL} nodes, their is an @code{init_ts}
+For language specific @code{DECL} nodes, there is an @code{init_ts}
 function in an appropriate @file{.c} file, which initializes the lookup
 table.
 Code setting up the table for new @code{DECL} nodes should be added
-- 
2.18.1





Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]

2020-07-07 Thread luoxhu via Gcc-patches



On 2020/7/7 08:18, Segher Boessenkool wrote:
> Hi!
> 
> On Sun, Jul 05, 2020 at 09:17:57PM -0500, Xionghu Luo wrote:
>> For extracting high part element from DImode register like:
>>
>> {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>>
>> split it before reload with "and mask" to avoid generating shift right
>> 32 bit then shift left 32 bit.
>>
>> srdi 3,3,32
>> sldi 9,3,32
>> mtvsrd 1,9
>> xscvspdpn 1,1
>>
>> =>
>>
>> rldicr 3,3,0,31
>> mtvsrd 1,3
>> xscvspdpn 1,1
> 
> Great :-)
> 
>> +;; For extracting high part element from DImode register like:
>> +;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
>> +;; split it before reload with "and mask" to avoid generating shift right
>> +;; 32 bit then shift left 32 bit.
>> +(define_insn_and_split "movsf_from_si2"
>> +  [(set (match_operand:SF 0 "nonimmediate_operand"
>> +"=!r,   f, v, wa,m, Z,
>> + Z, wa,?r,!r")
>> +(unspec:SF [
>> + (subreg:SI (ashiftrt:DI
>> +   (match_operand:DI 1 "input_operand"
>> +   "m, m, wY,Z, r, f,
>> +   wa,r, wa,r")
>> +  (const_int 32)) 0)]
>> +   UNSPEC_SF_FROM_SI))
>> +  (clobber (match_scratch:DI 2
>> +"=X,X, X, X, X, X,
>> +X, r, X, X"))]
>> +  "TARGET_NO_SF_SUBREG
>> +   && (register_operand (operands[0], SFmode)
>> +   && register_operand (operands[1], DImode))"
> 
> If the insn condition requires operands 0 and 1 to be register_operands,
> it can ask for that in the predicates, instead: not nonimmediate_operand
> and input_operand, but just gpc_reg_operand instead.  You can leave out
> the impossible alternatives as well (0, 1, 2, 3, 4, 5, 6), leaving just
> 
> (define_insn_and_split "movsf_from_si2"
>[(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r,!r")
>   (unspec:SF
> [(subreg:SI (ashiftrt:DI
>   (match_operand:DI 1 "input_operand" "r,wa,r")
>   (const_int 32))
> 0)]
> UNSPEC_SF_FROM_SI)))]
>"TARGET_NO_SF_SUBREG"
>"@
> #
> mfvsrwz %0,%x1
> mr %0,%1"
> 
>"&& !reload_completed
> && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
>[(const_int 0)]
> {
>rtx op0 = operands[0];
>rtx op1 = operands[1];
>rtx tmp = gen_reg_rtx (DImode);
> 
> You cannot call gen_reg_rtx too late in the pass pipeline.  What we
> usually do for such cases is put it as a match_scratch in the pattern,
> and then do code like
> 
>if (GET_CODE (operands[2]) == SCRATCH)
>  operands[2] = gen_reg_rtx (DImode);
> 
> so that it will work both before and after reload.
> 
>/* Avoid split {r155:SI#0=unspec[r133:DI>>0x20#0] 86;clobber scratch;} 
> from PR42745.  */
> 
> (This line is too long, btw.)
> 
>if (!SUBREG_P (operands[0]))
>  {
>rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
>emit_insn (gen_anddi3 (tmp, op1, mask));
>emit_insn (gen_p8_mtvsrd_sf (op0, tmp));
>emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>DONE;
>  }
>else
>  FAIL;
> }
>[(set_attr "length" "12,*,*")
> (set_attr "type" "vecfloat,mffgpr,*")
> (set_attr "isa" "p8v,p8v,*")])
> 
> I wonder what will happen if you actually do FAIL here...  There then is
> no insn alternative that can match, so we ICE?  In that case, just leave
> out the whole FAIL thing, it is useless?  You can do a gcc_assert if you
> want to check something.
> 
> Oh, and maybe you only want to handle GPRs here, not VSRs?  So just the
> "r", not the "wa" at all?  What code would it generate for vector regs?
> 
> Lots of questions, sorry!

Thanks for the nice suggestions of the initial patch contains many issues:),

For this case, %1:SF matches with "=wa"?  And how to construct cases to
match("=?r", "wa") and ("=!r", "r") combinations, please?

Removed lots of copy-paste from "movsf_from_si" and update the patch with 
your comments:


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

Bootstrap and regression tested pass on Power8-LE.

gcc/ChangeLog:

2020-07-07  Xionghu Luo  

PR rtl-optimization/89310
* config/rs6000/rs6000.md (movsf_from_si2): New
define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-07  Xionghu Luo  

PR rtl-optimization/89310
* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md| 40 ++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 +
 2 files changed, 57 insertions(+)
 create mode 100644 gcc/t

Re: [PATCH] gcov: rename 2 options.

2020-07-07 Thread Martin Liška

On 7/4/20 9:12 PM, Fangrui Song wrote:

Good idea! I've done that and made an alias for -i -> -j option.
I'm going to push it to master.

Martin
-- next part --
A non-text attachment was scrubbed...
Name: 0001-gcov-rename-2-options.patch
Type: text/x-patch
Size: 5457 bytes
Desc: not available
URL:



Note that -i was --intermediate-format in many releases, dropping -i should be 
fine if
there is no intention bring back --intermediate-format.


Hello.

I know, but we have one release out (GCC 10.x) that uses that for the JSON 
format.
Richi, I quite support drop of the option.



(Personally I still find --intermediate-format useful: for template
instantiations, in many cases users only care about aggregated line
execution counts (0 vs 1). They don't want to see 10 instantiations of a
function.)


You can still find the very same information in the JSON file format. Note that
a JSON format is much easier to parse and one can easily extend it.



-j -> --json-format looks good.

I don't think anyone uses --human-readable. The short option can even be
removed.


I used it couple of times together with --use-hotness-colors and I see it handy
for a report similar to `perf report`.

Martin


[PATCH] [og10] libgomp, Fortran: Fix OpenACC "gang reduction on an orphan loop" error message

2020-07-07 Thread Frederik Harwath

Hi,
This patch fixes the check for reductions on orphaned gang loops in
the Fortran frontend which (in contrast to the C, C++ frontends)
erroneously rejects reductions on gang loops that are contained in
"kernels" constructs and which hence are not orphaned.

According to the OpenACC standard version 2.5 and later, reductions on
orphaned gang loops are explicitly disallowed (cf.  section "Changes
from Version 2.0 to 2.5").  Remember that a loop is "orphaned" if it is
not lexically contained in a compute construct (cf. section "Loop
construct" of the OpenACC standard), i.e. in either a "parallel", a
"serial", or a "kernels" construct.

The patch has been tested by running the GCC and libgomp testsuites.
The latter tests ran with offloading to nvptx although that should not
be important here unless there was some very subtle reason for
forbidding the gang reductions on kernels loops. As expect, there seems
to be no such reason, i.e. I observed no regressions with the patch.

Can I include the patch in OG10?

Best regards,
Frederik

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
>From 7320635211fff3a773beb0de1914dbfcc317ab37 Mon Sep 17 00:00:00 2001
From: Frederik Harwath 
Date: Tue, 7 Jul 2020 10:41:21 +0200
Subject: [PATCH] libgomp, Fortran: Fix OpenACC "gang reduction on an orphan
 loop" error message

According to the OpenACC standard version 2.5 and later, reductions on
orphaned gang loops are explicitly disallowed (cf.  section "Changes
from Version 2.0 to 2.5").  A loop is "orphaned" if it is not
lexically contained in a compute construct (cf. section "Loop
construct" of the OpenACC standard), i.e. in either a "parallel", a
"serial", or a "kernels" construct.

This commit fixes the check for reductions on orphaned gang loops in
the Fortran frontend which (in contrast to the C, C++ frontends)
erroneously rejects reductions on gang loops that are contained in
"kernels" constructs.

2020-07-07  Frederik Harwath  

gcc/fortran/

	* openmp.c (oacc_is_parallel_or_serial): Removed function.
	(oacc_is_kernels): New function.
	(oacc_is_compute_construct): New function.
	(resolve_oacc_loop_blocks): Use "oacc_is_compute_construct"
	instead of "oacc_is_parallel_or_serial" for checking that a
	loop is not orphaned.

gcc/testsuite/

	* gfortran.dg/goacc/orphan-reductions-2.f90: New test
	verifying that the error message is not emitted for
	non-orphaned loops.

	* c-c++-common/goacc/orphan-reductions-2.c: Likewise for C and C++.
---
 gcc/fortran/openmp.c  | 13 +++-
 .../c-c++-common/goacc/orphan-reductions-2.c  | 69 +++
 .../gfortran.dg/goacc/orphan-reductions-2.f90 | 58 
 3 files changed, 137 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/orphan-reductions-2.c
 create mode 100644 gcc/testsuite/gfortran.dg/goacc/orphan-reductions-2.f90

diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 28408c4c99a..83c498112a8 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -5926,9 +5926,16 @@ oacc_is_serial (gfc_code *code)
 }
 
 static bool
-oacc_is_parallel_or_serial (gfc_code *code)
+oacc_is_kernels (gfc_code *code)
 {
-  return oacc_is_parallel (code) || oacc_is_serial (code);
+  return code->op == EXEC_OACC_KERNELS || code->op == EXEC_OACC_KERNELS_LOOP;
+}
+
+static bool
+oacc_is_compute_construct (gfc_code *code)
+{
+  return oacc_is_parallel (code) || oacc_is_serial (code)
+|| oacc_is_kernels (code);
 }
 
 static gfc_statement
@@ -6222,7 +6229,7 @@ resolve_oacc_loop_blocks (gfc_code *code)
   for (c = omp_current_ctx; c; c = c->previous)
 	if (!oacc_is_loop (c->code))
 	  break;
-  if (c == NULL || !oacc_is_parallel_or_serial (c->code))
+  if (c == NULL || !oacc_is_compute_construct (c->code))
 	gfc_error ("gang reduction on an orphan loop at %L", &code->loc);
 }
 
diff --git a/gcc/testsuite/c-c++-common/goacc/orphan-reductions-2.c b/gcc/testsuite/c-c++-common/goacc/orphan-reductions-2.c
new file mode 100644
index 000..2b651fd2b9f
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/orphan-reductions-2.c
@@ -0,0 +1,69 @@
+/* Verify that the error message for gang reduction on orphaned OpenACC loops
+   is not reported for non-orphaned loops. */
+
+#include 
+
+int
+kernels (int n)
+{
+  int i, s1 = 0, s2 = 0;
+#pragma acc kernels
+  {
+#pragma acc loop gang reduction(+:s1) /* { dg-bogus "gang reduction on an orphan loop" } */
+  for (i = 0; i < n; i++)
+s1 = s1 + 2;
+
+#pragma acc loop gang reduction(+:s2) /* { dg-bogus "gang reduction on an orphan loop" } */
+  for (i = 0; i < n; i++)
+s2 = s2 + 2;
+  }
+  return s1 + s2;
+}
+
+int
+parallel (int n)
+{
+  int i, s1 = 0, s2 = 0;
+#pragma acc parallel
+  {
+#pragma acc loop gang reduction(+:s1) /* { dg-bogus "gang reduction on an orphan loop" } */
+  for (i = 0; i < n; i++)
+s1 = s1 +

[wwwdocs PATCH] remove tree-browser page and links

2020-07-07 Thread Hu Jiangping
Hi,

I'm trying Tree Browser during debugging, but failed.
I found that tree-browser.c and tree-browser.def have been
removed at 2015-07-25. So, to avoid misunderstanding,
can we remove this tree-browser page too?

Regards.
hujp

---
 htdocs/projects/tree-ssa/index.html|   6 -
 htdocs/projects/tree-ssa/tree-browser.html | 306 -
 2 files changed, 312 deletions(-)
 delete mode 100644 htdocs/projects/tree-ssa/tree-browser.html

diff --git a/htdocs/projects/tree-ssa/index.html 
b/htdocs/projects/tree-ssa/index.html
index a15d0f32..930df390 100644
--- a/htdocs/projects/tree-ssa/index.html
+++ b/htdocs/projects/tree-ssa/index.html
@@ -21,7 +21,6 @@
 GENERIC and GIMPLE
 SSA implementation
 Unparsing GENERIC trees
-Tree Browser
 Implementation Status (last updated: 2003-11-22)
 TODO list (last updated: 2003-12-27)
 
@@ -221,11 +220,6 @@ functions that given a GENERIC tree node, they print a C 
representation of
 the tree.  The output is not meant to be compilable, but it is of great
 help when debugging transformations done by the transformation passes.
 
-
-Tree Browser
-For debugging, browsing, discovering, and playing with trees you can
-use the Tree Browser directly from gdb.
-
 
 Implementation Status
 
diff --git a/htdocs/projects/tree-ssa/tree-browser.html 
b/htdocs/projects/tree-ssa/tree-browser.html
deleted file mode 100644
index ce95a103..
--- a/htdocs/projects/tree-ssa/tree-browser.html
+++ /dev/null
@@ -1,306 +0,0 @@
-
-
-
-
-
-Tree Browser
-https://gcc.gnu.org/gcc.css"; />
-
-  
-  
-Tree Browser
-
-Until recently the only way to debug trees from gdb was to call
-debug_tree as follows:
-
-
-(gdb) p debug_tree (current_function_decl) 
-
-
-An alternative for interactively scan tree structures is to use the
-Tree Browser.  You can access Tree Browser from anywhere during a debugging
-session as follows:
-
-
-(gdb) p browse_tree (current_function_decl)
- 
-Tree Browser 
-foo 
-Up/prev expressions updated. 
-TB> 
-
-
-For listing available commands, you could try:
-
-
-TB> h 
-Possible commands are: 
- 
-   x  -  Exits tree-browser. 
-   q  -  Exits tree-browser. 
-   h  -  Prints this help message. 
-  update  -  Update information about parent expressions. 
- verbose  -  Sets/unsets verbose mode (default is on). 
- fun  -  Go to the current function declaration. 
-  nx  -  Go to the next expression in a BIND_EXPR. 
-  pr  -  Go to the previous expression in a BIND_EXPR. 
-  up  -  Go to the parent tree node. 
-last  -  Go to the last expression in a BIND_EXPR. 
-   first  -  Go to the first expression in a BIND_EXPR. 
- hpr  -  Go to the previous visited node (history previous). 
-arg0  -  Child 0. 
-arg1  -  Child 1. 
-arg2  -  Child 2. 
-arg3  -  Child 3. 
- decl_saved_tree  -  Body of a function. 
-type  -  Field accessor. 
-size  -  Field accessor. 
-   unit_size  -  Field accessor. 
-  offset  -  Field accessor. 
-  bit_offset  -  Field accessor. 
- context  -  Field accessor. 
-  attributes  -  Field accessor. 
- abstract_origin  -  Field accessor. 
-   arguments  -  Field accessor. 
-  result  -  Field accessor. 
- initial  -  Field accessor. 
-arg-type  -  Field accessor. 
- arg-type-as-written  -  Field accessor. 
-   chain  -  Field accessor. 
-  values  -  Field accessor. 
-  domain  -  Field accessor. 
- method_basetype  -  Field accessor. 
-  fields  -  Field accessor. 
-   arg-types  -  Field accessor. 
-basetype  -  Field accessor. 
- pointer_to_this  -  Field accessor. 
-   reference_to_this  -  Field accessor. 
-vars  -  Field accessor. 
-supercontext  -  Field accessor. 
-body  -  Field accessor. 
-   subblocks  -  Field accessor. 
-   block  -  Field accessor. 
-real  -  Field accessor. 
-imag  -  Field accessor. 
- purpose  -  Field accessor. 
-   value  -  Field accessor. 
- elt  -  Field accessor. 
- min  -  Field accessor. 
- max  -  Field accessor. 
-  sc  -  Search a node having a TREE_CODE given as a 
parameter. 
-  sn  -  Search an identifier having a name given as a 
parameter. 
-  pp  -  Pretty print current node. 
-   p  -  Prints the current node. 
-TB>  
-
-
-Note that this list of commands is susceptible to change, since this
-is a pretty new tool and is still in development.
-
-Now let's try some of these commands: we're on the declaration of the
-curr

Re: [PATCH] pass correct parameters to c_parser_do_statement

2020-07-07 Thread Richard Sandiford
"zhoukaipeng (A)"  writes:
> Sorry for my mistake.
>
> The previous patch description is incorrect.  A new patch was attached.
>
> Can anyone help me install this patch?

Pushed to trunk, thanks.

Richard

>
> Thanks,
> Kaipeng Zhou
>
>> -Original Message-
>> From: zhoukaipeng (A)
>> Sent: Tuesday, July 7, 2020 11:26 AM
>> To: gcc-patches@gcc.gnu.org
>> Subject: [PATCH] pass correct parameters to c_parser_do_statement
>> 
>> Hi,
>> 
>> It is a patch to pass correct parameters to c_parser_do_statement.
>> 
>> Can anyone help me install this patch?
>> 
>> Thanks,
>> Kaipeng Zhou


Re: [PATCH] Fix typo in the document of GCC Internals

2020-07-07 Thread Richard Sandiford
Qian Jianhua  writes:
> This patch fixes a typo in the document of GCC Internals.

Pushed to trunk, thanks.

Richard
>
> ---
>  gcc/doc/generic.texi | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
> index 827c4232aef..fb98727928a 100644
> --- a/gcc/doc/generic.texi
> +++ b/gcc/doc/generic.texi
> @@ -886,7 +886,7 @@ This lookup table needs to be kept up to date with the 
> tree structure
>  hierarchy, or else checking and containment macros will fail
>  inappropriately.
>  
> -For language specific @code{DECL} nodes, their is an @code{init_ts}
> +For language specific @code{DECL} nodes, there is an @code{init_ts}
>  function in an appropriate @file{.c} file, which initializes the lookup
>  table.
>  Code setting up the table for new @code{DECL} nodes should be added


Re: [PATCH 1/7 v8] ifn/optabs: Support vector load/store with length

2020-07-07 Thread Richard Sandiford
"Kewen.Lin"  writes:
> gcc/ChangeLog:
>
> 2020-MM-DD  Kewen Lin  
>
>   * doc/md.texi (len_load_@var{m}): Document.
>   (len_store_@var{m}): Likewise.
>   * internal-fn.c (len_load_direct): New macro.
>   (len_store_direct): Likewise.
>   (expand_len_load_optab_fn): Likewise.
>   (expand_len_store_optab_fn): Likewise.
>   (direct_len_load_optab_supported_p): Likewise.
>   (direct_len_store_optab_supported_p): Likewise.
>   (expand_mask_load_optab_fn): New macro.  Original renamed to ...
>   (expand_partial_load_optab_fn): ... here.  Add handlings for
>   len_load_optab.
>   (expand_mask_store_optab_fn): New macro.  Original renamed to ...
>   (expand_partial_store_optab_fn): ... here. Add handlings for
>   len_store_optab.
>   (internal_load_fn_p): Handle IFN_LEN_LOAD.
>   (internal_store_fn_p): Handle IFN_LEN_STORE.
>   (internal_fn_stored_value_index): Handle IFN_LEN_STORE.
>   * internal-fn.def (LEN_LOAD): New internal function.
>   (LEN_STORE): Likewise.
>   * optabs.def (len_load_optab, len_store_optab): New optab.

OK, thanks.

Richard


Re: [PATCH 5/7 v6] vect: Support vector load/store with length in vectorizer

2020-07-07 Thread Kewen.Lin via Gcc-patches
Hi Richard,

on 2020/7/2 下午1:20, Kewen.Lin via Gcc-patches wrote:
> on 2020/7/1 下午11:17, Richard Sandiford wrote:
>> "Kewen.Lin"  writes:
>>> on 2020/7/1 上午3:53, Richard Sandiford wrote:
 "Kewen.Lin"  writes:
[...]
>> Hmm, OK.  But in that case can you update the names of the variables
>> to match?  It's confusing to have some nscalars_* variables actually
>> count scalars (and thus have “nitems” equivalents) and other nscalars_*
>> variables count something else (and thus effectively be nitems_* variables
>> themselves).
>>
> 
> OK.  I'll update the names like nscalars_total/nscalars_step and equivalents
> to nitems_total/... (or nunits_total better?)
> 

Please ignore this part, I have used nitems_ for the names.  :)

> +  /* Work out how many bits we need to represent the length limit.  */
> +  unsigned int nscalars_per_iter_ft = rgl->max_nscalars_per_iter * 
> rgl->factor;

 I think this breaks the abstraction.  There's no guarantee that the
 factor is the same for each rgroup_control, so there's no guarantee
 that the maximum bytes per iter comes the last entry.  (Also, it'd
 be better to avoid talking about bytes if we're trying to be general.)
 I think we should take the maximum of each entry instead.

>>>
>>> Agree!  I guess the above "maximum bytes per iter" is a typo? and you meant
>>> "maximum elements per iter"?  Yes, the code is for length in bytes, checking
>>> the last entry is only reasonable for it.  Will update it to check all 
>>> entries
>>> instead.
>>
>> I meant bytes, since that's what the code is effectively calculating
>> (at least for Power).  I.e. I think this breaks the abstraction even
>> if we assume the Power scheme to measuring length, since in principle
>> it's possible to fix different vector sizes in the same vector region.
>>
> 
> Sorry I didn't catch the meaning of "it's possible to fix different
> vector sizes in the same vector region."  I guess if we are counting
> bytes, the max nunits per iteration should come from the last entry
> since the last one holds max bytes which is the result of 
> max_nscalar_per_iter * factor.  But I agree that it breaks abstraction
> here since it's not applied to length in lanes.
> 

By further thought, I guessed you meant we can have different vector
sizes for the same loop in future?  Yes, the assumption doesn't hold then.

> 
> +  /* Decide whether to use fully-masked approach.  */
> +  if (vect_verify_full_masking (loop_vinfo))
> + LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
> +  /* Decide whether to use length-based approach.  */
> +  else if (vect_verify_loop_lens (loop_vinfo))
> + {
> +   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
> +   || LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
> + {
> +   if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +  "can't vectorize this loop with length-based"
> +  " partial vectors approach becuase peeling"
> +  " for alignment or gaps is required.\n");
> +   LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
> + }

 Why are these peeling cases necessary?  Peeling for gaps should
 just mean subtracting one scalar iteration from the iteration count
 and shouldn't otherwise affect the main loop.  Similarly, peeling for
 alignment can be handled in the normal way, with a scalar prologue loop.

>>>
>>> I was thinking to relax this later and to avoid to handle too many cases
>>> in the first enablement patch.  Since Power hw whose level is able to 
>>> support
>>> vector with length, it supports unaligned load/store, need to construct
>>> some cases for them.  May I postpone it a bit?  Or you prefer me to support
>>> it here?
>>
>> I've no objection to postponing it if there are specific known
>> problems that make it difficult, but I think we should at least
>> say what they are.  On the face of it, I'm not sure why it doesn't
>> Just Work, since the way that we control the main loop should be
>> mostly orthogonal to how we handle peeled prologue iterations
>> and how we handle a single peeled epilogue iteration.
>>
> 
> OK, I will remove it to see the impact.  By the way, do you think to
> use partial vectors for prologue is something worth to trying in future?
> 

I tested the updated patch with this releasing, LOOP_VINFO_PEELING_FOR_GAPS
part looks fine, but LOOP_VINFO_PEELING_FOR_ALIGNMENT caused one case to
fail at execution during vect-partial-vector-usage=2.  So far the patch
doesn't handle any niters_skip cases.  I think if we want to support it, 
we have to add some handlings in/like what we have for masking, such as: 
mask_skip_niters, vect_prepare_for_masked_peels etc.  

Do you prefer me to extend the support in this patch series?

>>> Sorry I might miss something, but all undetermined 

[Ada] Spurious elaboration warnings with -gnatc and null procedures

2020-07-07 Thread Pierre-Marie de Rodat
When expansion is disabled (-gnatc or gnatprove_mode), null procedures
need to be taken into account explicitly since they are not expanded.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_elab.adb (Is_Guaranteed_ABE): Take into account null
procedures.diff --git a/gcc/ada/sem_elab.adb b/gcc/ada/sem_elab.adb
--- a/gcc/ada/sem_elab.adb
+++ b/gcc/ada/sem_elab.adb
@@ -9246,6 +9246,7 @@ package body Sem_Elab is
  Target_Decl : Node_Id;
  Target_Body : Node_Id) return Boolean
   is
+ Spec : Node_Id;
   begin
  --  Avoid cascaded errors if there were previous serious infractions.
  --  As a result the scenario will not be treated as a guaranteed ABE.
@@ -9266,12 +9267,20 @@ package body Sem_Elab is
return Earlier_In_Extended_Unit (N, Target_Body);
 
 --  Otherwise the body has not been encountered yet. The scenario
---  is a guaranteed ABE since the body will appear later. It is
---  assumed that the caller has already ensured that the scenario
---  is ABE-safe because optional bodies are not considered here.
+--  is a guaranteed ABE since the body will appear later, unless
+--  this is a null specification, which can occur if expansion is
+--  disabled (e.g. -gnatc or GNATprove mode). It is assumed that
+--  the caller has already ensured that the scenario is ABE-safe
+--  because optional bodies are not considered here.
 
 else
-   return True;
+   Spec := Specification (Target_Decl);
+
+   if Nkind (Spec) /= N_Procedure_Specification
+ or else not Null_Present (Spec)
+   then
+  return True;
+   end if;
 end if;
  end if;
 
@@ -9574,7 +9583,7 @@ package body Sem_Elab is
Error_Msg_N ("\Program_Error will be raised at run time", Call);
 end if;
 
---  Mark the call as a guarnateed ABE
+--  Mark the call as a guaranteed ABE
 
 Set_Is_Known_Guaranteed_ABE (Call);
 




[Ada] Checking type invariants on in params of procedures, not functions (AI12-0044)

2020-07-07 Thread Pierre-Marie de Rodat
AI12-0044 is a binding interpretation that restricts type-invariant
checking to occur on in parameters of procedures but not functions after
a call, because such checks done on function calls within type
invariants could cause unbounded recursion. This modifies AI05-0289,
which specified that invariant checks be done for all parameter modes,
but apparently that AI was never fully implemented, so the checks are
added now for procedure parameters of mode in.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* contracts.adb (Add_Invariant_And_Predicate_Checks): Relax the
condition for doing invariant checks so that in-mode parameters
of procedures are also checked (required by AI05-0289, and
restricted to procedures by AI12-0044). This is done in a
procedure's nested postconditions procedure.
* exp_ch6.adb (Expand_Actuals): Also perform postcall invariant
checks for in parameters of procedures (but not functions).
Moved invariant-checking code to end of Expand_Actuals
(including the nested function Is_Public_Subp).diff --git a/gcc/ada/contracts.adb b/gcc/ada/contracts.adb
--- a/gcc/ada/contracts.adb
+++ b/gcc/ada/contracts.adb
@@ -1864,13 +1864,15 @@ package body Contracts is
 Add_Invariant_Access_Checks (Result);
  end if;
 
- --  Add invariant and predicates for all formals that qualify
+ --  Add invariant checks for all formals that qualify (see AI05-0289
+ --  and AI12-0044).
 
  Formal := First_Formal (Subp_Id);
  while Present (Formal) loop
 Typ := Etype (Formal);
 
 if Ekind (Formal) /= E_In_Parameter
+  or else Ekind (Subp_Id) = E_Procedure
   or else Is_Access_Type (Typ)
 then
if Invariant_Checks_OK (Typ) then


diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -2461,46 +2461,6 @@ package body Exp_Ch6 is
Aund : constant Entity_Id := Underlying_Type (E_Actual);
Atyp : Entity_Id;
 
-   function Is_Public_Subp return Boolean;
-   --  Check whether the subprogram being called is a visible
-   --  operation of the type of the actual. Used to determine
-   --  whether an invariant check must be generated on the
-   --  caller side.
-
-   -
-   --  Is_Public_Subp --
-   -
-
-   function Is_Public_Subp return Boolean is
-  Pack  : constant Entity_Id := Scope (Subp);
-  Subp_Decl : Node_Id;
-
-   begin
-  if not Is_Subprogram (Subp) then
- return False;
-
-  --  The operation may be inherited, or a primitive of the
-  --  root type.
-
-  elsif
-Nkind_In (Parent (Subp), N_Private_Extension_Declaration,
- N_Full_Type_Declaration)
-  then
- Subp_Decl := Parent (Subp);
-
-  else
- Subp_Decl := Unit_Declaration_Node (Subp);
-  end if;
-
-  return Ekind (Pack) = E_Package
-and then
-  List_Containing (Subp_Decl) =
-Visible_Declarations
-  (Specification (Unit_Declaration_Node (Pack)));
-   end Is_Public_Subp;
-
---  Start of processing for By_Ref_Predicate_Check
-
 begin
if No (Aund) then
   Atyp := E_Actual;
@@ -2518,33 +2478,6 @@ package body Exp_Ch6 is
   Append_To (Post_Call,
 Make_Predicate_Check (Atyp, Actual));
end if;
-
-   --  We generated caller-side invariant checks in two cases:
-
-   --  a) when calling an inherited operation, where there is an
-   --  implicit view conversion of the actual to the parent type.
-
-   --  b) When the conversion is explicit
-
-   --  We treat these cases separately because the required
-   --  conversion for a) is added later when expanding the call.
-
-   if Has_Invariants (Etype (Actual))
-  and then
-Nkind (Parent (Subp)) = N_Private_Extension_Declaration
-   then
-  if Comes_From_Source (N) and then Is_Public_Subp then
- Append_To (Post_Call, Make_Invariant_Call (Actual));
-  end if;
-
-   elsif Nkind (Actual) = N_Type_Conversion
- and then Has_Invariants (Etype (Expression (Actual)))
-   then
-  if Comes_From_Source (N) and then Is_Public_Subp then
- Append_To (P

[Ada] Handle explicit dereferences in expression functions

2020-07-07 Thread Pierre-Marie de Rodat
An expression function that is a completion freezes the types that
appear in the expression. This includes the types of all entities
within. In most cases an access type does not freeze its designated
type, but implicit dereferences in a dispatching call do freeze the
designated type. Additionally, an explicit dereference also freezes the
corresponding designated type; this was not done previously.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* freeze.adb (Freeze_Expr_Types): Freeze the designated type of
the explicit dereference.diff --git a/gcc/ada/freeze.adb b/gcc/ada/freeze.adb
--- a/gcc/ada/freeze.adb
+++ b/gcc/ada/freeze.adb
@@ -7934,6 +7934,15 @@ package body Freeze is
and then Node = Controlling_Argument (Parent (Node))
  then
 Check_And_Freeze_Type (Designated_Type (Etype (Node)));
+
+ --  An explicit dereference freezes the designated type as well,
+ --  even though that type is not attached to an entity in the
+ --  expression.
+
+ elsif Nkind (Node) in N_Has_Etype
+   and then Nkind (Parent (Node)) = N_Explicit_Dereference
+ then
+Check_And_Freeze_Type (Designated_Type (Etype (Node)));
  end if;
 
  --  No point in posting several errors on the same expression




[Ada] Fix expansion of delta aggregates with slices

2020-07-07 Thread Pierre-Marie de Rodat
When creating an anonymous object of the delta aggregate, use the type
of the base expression, not the type of the delta aggregate itself. This
follows from Ada 2020 4.3.4(14/5):

  "The bounds of the anonymous object of an array_delta_aggregate and the
   discriminants and tag (if any) of the anonymous object of a
   record_delta_aggregate are those of the base_expression."

Ideally both types should be the same, but the type of the
N_Delta_Aggregate (which is set in Resolve_Delta_Aggregate) is the type
expected by the context and it might have different bounds,
discriminants or tags (while still being compatible wrt assignments). In
particular, then the bounds are different, they must come from the base
expression, because the choices of the array_component_association_list
refer to this base expression, not to the bounds expected by the
context.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_aggr.adb (Expand_N_Delta_Aggregate): Use type of the delta
base expression for the anonymous object of the delta aggregate.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -6845,7 +6845,7 @@ package body Exp_Aggr is
 
procedure Expand_N_Delta_Aggregate (N : Node_Id) is
   Loc  : constant Source_Ptr := Sloc (N);
-  Typ  : constant Entity_Id  := Etype (N);
+  Typ  : constant Entity_Id  := Etype (Expression (N));
   Decl : Node_Id;
 
begin




[Ada] Remove extra checks for non-static context from resolving 'Update

2020-07-07 Thread Pierre-Marie de Rodat
Choices in attribute Update are evaluated, which includes checks for
non-static context, as part of their analysis and resolution, which
happens in Analyze_Array_Component_Update. There is no need to repeat
them while attribute is resolved.

This is just a cleanup, with no impact on the compiler; is motivated by
adding support for delta_aggregates in GNATprove and making sure that
both attribute Update and delta_aggregate are handled similarly.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_attr.adb (Resolve_Attribute): Do not call
Check_Non_Static_Context.diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb
--- a/gcc/ada/sem_attr.adb
+++ b/gcc/ada/sem_attr.adb
@@ -11999,13 +11999,10 @@ package body Sem_Attr is
 
 if Nkind (C) /= N_Aggregate then
Analyze_And_Resolve (C, Etype (Indx));
-   Check_Non_Static_Context (C);
-
 else
C_E := First (Expressions (C));
while Present (C_E) loop
   Analyze_And_Resolve (C_E, Etype (Indx));
-  Check_Non_Static_Context (C_E);
 
   Next (C_E);
   Next_Index (Indx);




[Ada] Ensure No_Specification_Of_Aspect forbids pragmas and repr. clauses

2020-07-07 Thread Pierre-Marie de Rodat
Section 13.12.1 of the Ada reference manual states the following: [The
No_Specification_of_Aspect pragma] Identifies an aspect for which no
aspect_specification, attribute_definition_clause, or pragma is given.
GNAT already checks for aspects, this commit adds checks for attribute
definitions and pragmas.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch13.adb (Analyze_Attribute_Definition_Clause): Add call
to Check_Restriction_No_Specification_Of_Aspect.
* sem_prag.adb (Analyze_Pragma): Likewise.
* restrict.ads (Check_Restriction_No_Specification_Of_Aspect):
Mention possible new node kinds in documentation.
* restrict.adb (Check_Restriction_No_Specification_Of_Aspect):
Retrieve aspect id from different fields if given node is an
N_Pragma or an N_Attribute_Definition_Clause.diff --git a/gcc/ada/restrict.adb b/gcc/ada/restrict.adb
--- a/gcc/ada/restrict.adb
+++ b/gcc/ada/restrict.adb
@@ -626,7 +626,14 @@ package body Restrict is
  return;
   end if;
 
-  Id := Identifier (N);
+  if Nkind (N) = N_Pragma then
+ Id := Pragma_Identifier (N);
+  elsif Nkind (N) = N_Attribute_Definition_Clause then
+ Id := N;
+  else
+ Id := Identifier (N);
+  end if;
+
   A_Id := Get_Aspect_Id (Chars (Id));
   pragma Assert (A_Id /= No_Aspect);
 


diff --git a/gcc/ada/restrict.ads b/gcc/ada/restrict.ads
--- a/gcc/ada/restrict.ads
+++ b/gcc/ada/restrict.ads
@@ -283,9 +283,10 @@ package Restrict is
--  the node to which an error will be attached if necessary.
 
procedure Check_Restriction_No_Specification_Of_Aspect (N : Node_Id);
-   --  N is the node id for an N_Aspect_Specification. An error message
-   --  (warning) will be issued if a restriction (warning) was previously set
-   --  for this aspect using Set_No_Specification_Of_Aspect.
+   --  N is the node id for an N_Aspect_Specification, an N_Pragma, or an
+   --  N_Attribute_Definition_Clause. An error message (warning) will be issued
+   --  if a restriction (warning) was previously set for this aspect using
+   --  Set_No_Specification_Of_Aspect.
 
procedure Check_Restriction_No_Use_Of_Attribute (N : Node_Id);
--  N denotes an attribute definition clause or an attribute reference. An


diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -5643,6 +5643,13 @@ package body Sem_Ch13 is
 
   Check_Restriction_No_Use_Of_Attribute (N);
 
+  if Get_Aspect_Id (Chars (N)) /= No_Aspect then
+ --  6.1/3 No_Specification_of_Aspect: Identifies an aspect for which
+ --no aspect_specification, attribute_definition_clause, or pragma
+ --is given.
+ Check_Restriction_No_Specification_Of_Aspect (N);
+  end if;
+
   --  Ignore some selected attributes in CodePeer mode since they are not
   --  relevant in this context.
 


diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb
--- a/gcc/ada/sem_prag.adb
+++ b/gcc/ada/sem_prag.adb
@@ -11477,6 +11477,13 @@ package body Sem_Prag is
 
   Check_Restriction_No_Use_Of_Pragma (N);
 
+  if Get_Aspect_Id (Chars (Pragma_Identifier (N))) /= No_Aspect then
+ --  6.1/3 No_Specification_of_Aspect: Identifies an aspect for which
+ --no aspect_specification, attribute_definition_clause, or pragma
+ --is given.
+ Check_Restriction_No_Specification_Of_Aspect (N);
+  end if;
+
   --  Ignore pragma if Ignore_Pragma applies. Also ignore pragma
   --  Default_Scalar_Storage_Order if the -gnatI switch was given.
 




[Ada] Ada2020: AI12-0198 potentially unevaluated components of arrays

2020-07-07 Thread Pierre-Marie de Rodat
Minor reformatting: This patch reorders the routines of the internal
package Interval_Lists. No functional change.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_util.ads (Interval_Lists): Reordering routine.
* sem_util.adb (Interval_Lists): Reordering routines to keep
them alphabetically ordered.diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -28897,6 +28897,16 @@ package body Sem_Util is
   --  Check that list is sorted, lacks null intervals, and has gaps
   --  between intervals.
 
+  function Chosen_Interval (Choice : Node_Id) return Discrete_Interval;
+  --  Given an element of a Discrete_Choices list, a
+  --  Static_Discrete_Predicate list, or an Others_Discrete_Choices
+  --  list (but not an N_Others_Choice node) return the corresponding
+  --  interval. If an element that does not represent a single
+  --  contiguous interval due to a static predicate (or which
+  --  represents a single contiguous interval whose bounds depend on
+  --  a static predicate) is encountered, then that is an error on the
+  --  part of whoever built the list in question.
+
   function In_Interval
 (Value : Uint; Interval : Discrete_Interval) return Boolean;
   --  Does the given value lie within the given interval?
@@ -28948,6 +28958,8 @@ package body Sem_Util is
  Intervals : Discrete_Interval_List (1 .. Max_I);
  Num_I : Nat := 0;
 
+  --  Start of processing for Aggregate_Intervals
+
   begin
  --  No action needed if there are no intervals
 
@@ -28984,18 +28996,10 @@ package body Sem_Util is
  end;
   end Aggregate_Intervals;
 
-  -
-  -- In_Interval --
-  -
-  function In_Interval
-(Value : Uint; Interval : Discrete_Interval) return Boolean is
-  begin
- return Value >= Interval.Low and then Value <= Interval.High;
-  end In_Interval;
-
   
   --  Check_Consistency --
   
+
   procedure Check_Consistency (Intervals : Discrete_Interval_List) is
   begin
  if Serious_Errors_Detected > 0 then
@@ -29016,19 +29020,79 @@ package body Sem_Util is
  end loop;
   end Check_Consistency;
 
-  function Chosen_Interval (Choice : Node_Id) return Discrete_Interval;
-  --  Given an element of a Discrete_Choices list, a
-  --  Static_Discrete_Predicate list, or an Others_Discrete_Choices
-  --  list (but not an N_Others_Choice node) return the corresponding
-  --  interval. If an element that does not represent a single
-  --  contiguous interval due to a static predicate (or which
-  --  represents a single contiguous interval whose bounds depend on
-  --  a static predicate) is encountered, then that is an error on the
-  --  part of whoever built the list in question.
+  ---
+  -- Choice_List_Intervals --
+  ---
+
+  function Choice_List_Intervals
+(Discrete_Choices : List_Id) return Discrete_Interval_List
+  is
+ function Unmerged_Choice_Count return Nat;
+ --  The number of intervals before adjacent intervals are merged.
+
+ ---
+ -- Unmerged_Choice_Count --
+ ---
+
+ function Unmerged_Choice_Count return Nat is
+Choice : Node_Id := First (Discrete_Choices);
+Count  : Nat := 0;
+ begin
+while Present (Choice) loop
+   --  Non-contiguous choices involving static predicates
+   --  have already been normalized away.
+
+   if Nkind (Choice) = N_Others_Choice then
+  Count :=
+Count + List_Length (Others_Discrete_Choices (Choice));
+   else
+  Count := Count + 1;  -- an ordinary expression or range
+   end if;
+
+   Next (Choice);
+end loop;
+return Count;
+ end Unmerged_Choice_Count;
+
+ --  Local variables
+
+ Choice : Node_Id := First (Discrete_Choices);
+ Result : Discrete_Interval_List (1 .. Unmerged_Choice_Count);
+ Count  : Nat := 0;
+
+  --  Start of processing for Choice_List_Intervals
+
+  begin
+ while Present (Choice) loop
+if Nkind (Choice) = N_Others_Choice then
+   declare
+  Others_Choice : Node_Id
+:= First (Others_Discrete_Choices (Choice));
+   begin
+  while Present (Others_Choice) loop
+ Count := Count + 1;
+ Result (Count) := Chosen_Interval (Others_Choice);
+ Next (Others_Choice);
+  end loop;
+   end;
+  

[Ada] Set range checks for for 'Update on arrays in GNATprove expansion

2020-07-07 Thread Pierre-Marie de Rodat
This is a follow-up of a recent change, where setting of range checks
for 'Update on records was moved from Resolve_Attribute (where it was
shared between GNAT and GNATprove) to custom expansion for GNATprove
(and GNAT sets them as well in its own expansion).  This patch does the
same for 'Update on arrays.

Just like the previous patch, this one also eliminates unnecessary
checks, for example, on a code like this:

   type T is array (Positive range <>) of Boolean;
   function P return Positive;
   X : T := ...'Update (P => ...);  --  no need for range check

we no longer emit a range check for the result of P being in T'Range,
while still generating them where required, e.g.:

   function N return Natural;
   X : T := ...'Update (N => ...);  -- range check needed

Also, range checks for single-dimensional arrays were added in both
analysis and resolution, while for multi-dimensional arrays only in
resulution, which was inconsistent.

Note: attribute Update is soon to be replaced by delta_aggregate. This
cleanup is for reusing its implementation (if possible) or at least to
mirror it and not introduce more confusion.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_spark.adb (Expand_SPARK_N_Attribute_Reference): Add scalar
range checks for 'Update on arrays just like for 'Update on
records.
* sem_attr.adb (Analyze_Array_Component_Update): Do not set
range checks for single-dimensional arrays.
(Resolve_Attribute): Do not set range checks for both single-
and multi- dimensional arrays.diff --git a/gcc/ada/exp_spark.adb b/gcc/ada/exp_spark.adb
--- a/gcc/ada/exp_spark.adb
+++ b/gcc/ada/exp_spark.adb
@@ -258,25 +258,91 @@ package body Exp_SPARK is
 
 Assoc : Node_Id;
 Comp  : Node_Id;
-Comp_Type : Node_Id;
+Comp_Type : Entity_Id;
 Expr  : Node_Id;
+Index : Node_Id;
+Index_Typ : Entity_Id;
 
  begin
 --  Apply scalar range checks on the updated components, if needed
 
 if Is_Array_Type (Typ) then
-   Assoc := First (Component_Associations (Aggr));
 
-   while Present (Assoc) loop
-  Expr  := Expression (Assoc);
-  Comp_Type := Component_Type (Typ);
+   --  Multi-dimensional array
 
-  if Is_Scalar_Type (Comp_Type) then
- Apply_Scalar_Range_Check (Expr, Comp_Type);
-  end if;
+   if Present (Next_Index (First_Index (Typ))) then
+  Assoc := First (Component_Associations (Aggr));
 
-  Next (Assoc);
-   end loop;
+  while Present (Assoc) loop
+ Expr  := Expression (Assoc);
+ Comp_Type := Component_Type (Typ);
+
+ if Is_Scalar_Type (Comp_Type) then
+Apply_Scalar_Range_Check (Expr, Comp_Type);
+ end if;
+
+ --  The current association contains a sequence of indexes
+ --  denoting an element of a multidimensional array:
+ --
+ --(Index_1, ..., Index_N)
+
+ Expr := First (Choices (Assoc));
+
+ pragma Assert (Nkind (Aggr) = N_Aggregate);
+
+ while Present (Expr) loop
+Index := First (Expressions (Expr));
+Index_Typ := First_Index (Typ);
+
+while Present (Index_Typ) loop
+   Apply_Scalar_Range_Check (Index, Etype (Index_Typ));
+   Next (Index);
+   Next_Index (Index_Typ);
+end loop;
+
+Next (Expr);
+ end loop;
+
+ Next (Assoc);
+  end loop;
+
+   --  One-dimensional array
+
+   else
+  Assoc := First (Component_Associations (Aggr));
+
+  while Present (Assoc) loop
+ Expr  := Expression (Assoc);
+ Comp_Type := Component_Type (Typ);
+
+ if Is_Scalar_Type (Comp_Type) then
+Apply_Scalar_Range_Check (Expr, Comp_Type);
+ end if;
+
+ Index := First (Choices (Assoc));
+ Index_Typ := First_Index (Typ);
+
+ while Present (Index) loop
+--  The index denotes a range of elements
+
+if Nkind (Index) = N_Range then
+   Apply_Scalar_Range_Check
+ (Low_Bound  (Index), Etype (Index_Typ));
+   Apply_Scalar_Range_Check
+ (High_Bound (Index), Etype (Index_Typ));
+
+  

[Ada] Fix documentation of -gnatVa switch

2020-07-07 Thread Pierre-Marie de Rodat
Switch -gnatVa enables all validity checks, including for parameters,
which can be specifically enabled with -gnatVp.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* doc/gnat_ugn/building_executable_programs_with_gnat.rst
(Validity Checking): Add "p" to the list of switches enabled by
-gnatVa.
* gnat_ugn.texi: Regenerate.diff --git a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
--- a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
+++ b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
@@ -4403,7 +4403,7 @@ to the default checks required by Ada as described above.
 
   All validity checks are turned on.
   That is, :switch:`-gnatVa` is
-  equivalent to ``gnatVcdfimorst``.
+  equivalent to ``gnatVcdfimoprst``.
 
 
 .. index:: -gnatVc  (gcc)


diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -13161,7 +13161,7 @@ to the default checks required by Ada as described above.
 
 All validity checks are turned on.
 That is, @code{-gnatVa} is
-equivalent to @code{gnatVcdfimorst}.
+equivalent to @code{gnatVcdfimoprst}.
 @end table
 
 @geindex -gnatVc (gcc)




[Ada] Allow aspect Relaxed_Initialization on protected entries

2020-07-07 Thread Pierre-Marie de Rodat
Fix the initial support for aspect Relaxed_Initialization based on the
finalized SPARK RM wording.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch13.adb (Analyze_Relaxed_Parameter): Fix for protected
entries.diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -2276,7 +2276,7 @@ package body Sem_Ch13 is
 
--  Annotation of a subprogram; aspect expression is required
 
-   elsif Is_Subprogram (E) then
+   elsif Is_Subprogram_Or_Entry (E) then
   if Present (Expr) then
 
  --  If we analyze subprogram body that acts as its own




[Ada] Use pragma Unsuppress in Time_IO

2020-07-07 Thread Pierre-Marie de Rodat
We are relying on language-defined checks (like slice out of bounds) to
detect certain cases of incorrect syntax in time strings.  But the
run-time system is usually compiled with checks suppressed, so this is
erroneous.

We were doing something like:

X := T'Value (A (Lo .. Hi));
-- Could be out of bounds, or not digits
...
if not X'Valid then
   raise ...

That 'Valid happened to work in some cases, but it's technically too
late; execution is erroneous before we get there. Also, X could happen
to be valid, even though it's uninitialized, in which case we would get
a wrong Time value.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/g-catiio.adb (Value, Parse_ISO_8601): Unsuppress
checks, and don't rely on 'Valid.diff --git a/gcc/ada/libgnat/g-catiio.adb b/gcc/ada/libgnat/g-catiio.adb
--- a/gcc/ada/libgnat/g-catiio.adb
+++ b/gcc/ada/libgnat/g-catiio.adb
@@ -654,6 +654,12 @@ package body GNAT.Calendar.Time_IO is
Time: out Ada.Calendar.Time;
Success : out Boolean)
is
+  pragma Unsuppress (All_Checks);
+  --  This is necessary because the run-time library is usually compiled
+  --  with checks suppressed, and we are relying on constraint checks in
+  --  this code to catch syntax errors in the Date string (e.g. out of
+  --  bounds slices).
+
   Index : Positive := Date'First;
   --  The current character scan index. After a call to Advance, Index
   --  points to the next character.
@@ -1021,7 +1027,10 @@ package body GNAT.Calendar.Time_IO is
   Success := True;
 
exception
-  when Wrong_Syntax =>
+  when Wrong_Syntax | Constraint_Error =>
+ --  If constraint check fails, we want to behave the same as
+ --  Wrong_Syntax; we want the caller (Value) to try other
+ --  allowed syntaxes.
  Time :=
Time_Of (Year_Number'First, Month_Number'First, Day_Number'First);
  Success := False;
@@ -1032,6 +1041,8 @@ package body GNAT.Calendar.Time_IO is
---
 
function Value (Date : String) return Ada.Calendar.Time is
+  pragma Unsuppress (All_Checks); -- see comment in Parse_ISO_8601
+
   D  : String (1 .. 21);
   D_Length   : constant Natural := Date'Length;
 
@@ -1281,18 +1292,6 @@ package body GNAT.Calendar.Time_IO is
  Extract_Time (1, Hour, Minute, Second, Check_Space => False);
   end if;
 
-  --  Sanity checks
-
-  if not Year'Valid
-or else not Month'Valid
-or else not Day'Valid
-or else not Hour'Valid
-or else not Minute'Valid
-or else not Second'Valid
-  then
- raise Constraint_Error;
-  end if;
-
   return Time_Of (Year, Month, Day, Hour, Minute, Second);
end Value;
 




[Ada] Add support for XDR streaming in the default runtime

2020-07-07 Thread Pierre-Marie de Rodat
Since we no longer need to distribute s-stratt__xdr.adb, some code in
Makefile.rtl is now dead. Update the GNAT UG accordingly.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* Makefile.rtl: Remove dead code.
* doc/gnat_ugn/building_executable_programs_with_gnat.rst:
Document gnatbind -xdr switch.
* gnat_ugn.texi: Regenerate.diff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl
--- a/gcc/ada/Makefile.rtl
+++ b/gcc/ada/Makefile.rtl
@@ -2704,10 +2704,7 @@ setup-rts: force
 	$(RM) $(foreach PAIR,$(LIBGNAT_TARGET_PAIRS), \
 	$(RTSDIR)/$(word 1,$(subst <, ,$(PAIR
 	for f in $(RTSDIR)/*-*__*.ads $(RTSDIR)/*-*__*.adb; do \
-	  case "$$f" in \
-	$(RTSDIR)/s-stratt__*) ;; \
-	*) $(RM) $$f ;; \
-	  esac; \
+	  $(RM) $$f ; \
 	done
 # Copy new target dependent sources
 	$(foreach PAIR,$(LIBGNAT_TARGET_PAIRS), \


diff --git a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
--- a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
+++ b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
@@ -6711,6 +6711,14 @@ be presented in subsequent sections.
   Exclude source files (check object consistency only).
 
 
+  .. index:: -xdr  (gnatbind)
+
+:switch:`-xdr`
+  Use the target-independent XDR protocol for stream oriented attributes
+  instead of the default implementation which is based on direct binary
+  representations and is therefore target-and endianness-dependent.
+
+
   .. index:: -Xnnn  (gnatbind)
 
 :switch:`-X{nnn}`


diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -16142,6 +16142,14 @@ Override default wide character encoding for standard Text_IO files.
 
 Exclude source files (check object consistency only).
 
+@geindex -xdr (gnatbind)
+
+@item @code{-xdr}
+
+Use the target-independent XDR protocol for stream oriented attributes
+instead of the default implementation which is based on direct binary
+representations and is therefore target-and endianness-dependent.
+
 @geindex -Xnnn (gnatbind)
 
 @item @code{-X@emph{nnn}}




[Ada] Ada2020: AI12-0198 potentially unevaluated components of arrays

2020-07-07 Thread Pierre-Marie de Rodat
This patch fixes a regression in the SPARK testsuite.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_util.adb (Is_Potentially_Unevaluated): Protect reading
attribute Etype.diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -17805,6 +17805,8 @@ package body Sem_Util is
N_Or_Else,
N_Quantified_Expression)
 and then not (Nkind (Par) = N_Aggregate
+and then Present (Etype (Par))
+and then Etype (Par) /= Any_Composite
 and then Is_Array_Type (Etype (Par)))
   loop
  Expr := Par;
@@ -17871,6 +17873,7 @@ package body Sem_Util is
   return True;
 
elsif Nkind (Choice) = N_Identifier
+ and then Present (Etype (Choice))
  and then Present (Scalar_Range (Etype (Choice)))
  and then
Non_Static_Or_Null_Range (Scalar_Range (Etype (Choice)))




[Ada] ACATS 4.1H - BC60005 - null exclusion matching for formal subprograms

2020-07-07 Thread Pierre-Marie de Rodat
This ACATS test shows that we had several inconsistencies in the
checking of null exclusion matching. We also realized that some old code
in sem_ch6.adb was wrong and no longer necessary, so removed it.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch6.adb (Check_Conformance): Remove unnecessary (and
wrong) code.
* sem_ch8.adb (Check_Null_Exclusion): Post error at proper
location.  Introduce new helper Null_Exclusion_Mismatch and fix
implementation wrt formal subprograms used in generic bodies.
(Analyze_Subprogram_Renaming): Fix missing setting of
Error_Msg_Sloc.
(Analyze_Object_Renaming): Replace "in Anonymous_Access_Kind" by
Is_Anonymous_Access_Type.
* sem_util.adb (Has_Null_Exclusion): Fix handling of
N_Parameter_Specification.
* sem_ch12.adb (Instantiate_Object): Replace "in
Anonymous_Access_Kind" by Is_Anonymous_Access_Type.diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -11339,9 +11339,8 @@ package body Sem_Ch12 is
 --  access type.
 
 if Ada_Version < Ada_2005
-  or else Ekind (Base_Type (Ftyp)) not in Anonymous_Access_Kind
-  or else Ekind (Base_Type (Etype (Actual)))
-not in Anonymous_Access_Kind
+  or else not Is_Anonymous_Access_Type (Base_Type (Ftyp))
+  or else not Is_Anonymous_Access_Type (Base_Type (Etype (Actual)))
 then
Error_Msg_NE
  ("type of actual does not match type of&", Actual, Gen_Obj);


diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -5668,7 +5668,6 @@ package body Sem_Ch6 is
   New_Type   : constant Entity_Id := Etype (New_Id);
   Old_Formal : Entity_Id;
   New_Formal : Entity_Id;
-  Access_Types_Match : Boolean;
   Old_Formal_Base: Entity_Id;
   New_Formal_Base: Entity_Id;
 
@@ -5869,57 +5868,6 @@ package body Sem_Ch6 is
 New_Formal_Base := Get_Instance_Of (New_Formal_Base);
  end if;
 
- Access_Types_Match := Ada_Version >= Ada_2005
-
-   --  Ensure that this rule is only applied when New_Id is a
-   --  renaming of Old_Id.
-
-   and then Nkind (Parent (Parent (New_Id))) =
-  N_Subprogram_Renaming_Declaration
-   and then Nkind (Name (Parent (Parent (New_Id in N_Has_Entity
-   and then Present (Entity (Name (Parent (Parent (New_Id)
-   and then Entity (Name (Parent (Parent (New_Id = Old_Id
-
-   --  Now handle the allowed access-type case
-
-   and then Is_Access_Type (Old_Formal_Base)
-   and then Is_Access_Type (New_Formal_Base)
-
-   --  The type kinds must match. The only exception occurs with
-   --  multiple generics of the form:
-
-   --   genericgeneric
-   -- type F is private; type A is private;
-   -- type F_Ptr is access F;type A_Ptr is access A;
-   -- with proc F_P (X : F_Ptr); with proc A_P (X : A_Ptr);
-   --   package F_Pack is ...  package A_Pack is
-   --package F_Inst is
-   --  new F_Pack (A, A_Ptr, A_P);
-
-   --  When checking for conformance between the parameters of A_P
-   --  and F_P, the type kinds of F_Ptr and A_Ptr will not match
-   --  because the compiler has transformed A_Ptr into a subtype of
-   --  F_Ptr. We catch this case in the code below.
-
-   and then (Ekind (Old_Formal_Base) = Ekind (New_Formal_Base)
-  or else
-(Is_Generic_Type (Old_Formal_Base)
-  and then Is_Generic_Type (New_Formal_Base)
-  and then Is_Internal (New_Formal_Base)
-  and then Etype (Etype (New_Formal_Base)) =
-  Old_Formal_Base))
-   and then Directly_Designated_Type (Old_Formal_Base) =
-Directly_Designated_Type (New_Formal_Base)
-   and then ((Is_Itype (Old_Formal_Base)
-   and then (Can_Never_Be_Null (Old_Formal_Base)
-  or else Is_Access_Constant
-(Old_Formal_Base)))
- or else
-  (Is_Itype (New_Formal_Base)
-and then (Can_Never_Be_Null (New_Formal_Base)
-   or else Is_Access_Constant
- (New_Formal_Base;
-
  --  Types must always match. In the visible part of an instance,
  --  usual overloading ru

[Ada] Freeze aspect expression relocated to pragma Precondition

2020-07-07 Thread Pierre-Marie de Rodat
In Analyze_Aspect_Specification we relocate expression of a Pre aspect
to pragma Precondition. From this point we now only process the
relocated expression; in particular, we freeze it and then copy to a
pragma Check (which will ultimately execute this expression when
subprogram is called).

This fixes a bug where freezing of the original expression (in
particular its analysis) was breaking the Parent links in the relocated
expression.  Then validity checks for this precondition expression were
inserted next to the original aspect, i.e. outside of the subprogram
body.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* contracts.adb (Analyze_Entry_Or_Subprogram_Contract,
Process_Preconditions_For): Freeze expression that has been
relocated to pragma Precondition, not the expression which is
still in the aspect.diff --git a/gcc/ada/contracts.adb b/gcc/ada/contracts.adb
--- a/gcc/ada/contracts.adb
+++ b/gcc/ada/contracts.adb
@@ -653,7 +653,9 @@ package body Contracts is
  Freeze_Expr_Types
(Def_Id => Subp_Id,
 Typ=> Standard_Boolean,
-Expr   => Expression (Corresponding_Aspect (Prag)),
+Expr   =>
+  Expression
+(First (Pragma_Argument_Associations (Prag))),
 N  => Bod);
   end if;
 
@@ -2603,7 +2605,9 @@ package body Contracts is
 Freeze_Expr_Types
   (Def_Id => Subp_Id,
Typ=> Standard_Boolean,
-   Expr   => Expression (Corresponding_Aspect (Prag)),
+   Expr   =>
+ Expression
+   (First (Pragma_Argument_Associations (Prag))),
N  => Body_Decl);
  end if;
 




[Ada] Hang on conditional expression as actual

2020-07-07 Thread Pierre-Marie de Rodat
This patch fixes the compiler whereby conditional expressions used as
actuals for anonymous access type in subprogram calls may result in an
infinite loop when one of the branches of said conditional expression
expands to an N_Expression_With_Actions

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch6.adb (Expand_Branch): Verify the original node is a
conditional expression before recursing further.
(Insert_Level_Assign): Transform assertion into an explicit
raise.diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -3947,6 +3947,11 @@ package body Exp_Ch6 is
 
  if Nkind (Expression (Assn)) =
   N_Expression_With_Actions
+   and then
+ Nkind_In
+   (Original_Node (Expression (Assn)),
+ N_Case_Expression,
+ N_If_Expression)
  then
 Insert_Level_Assign (Expression (Assn));
 
@@ -3983,7 +3988,10 @@ package body Exp_Ch6 is
N_If_Statement);
 
  Next (Cond);
- pragma Assert (Present (Cond));
+
+ if No (Cond) then
+raise Program_Error;
+ end if;
   end loop;
 
   --  Iterate through if expression branches




[Ada] Move generation of range checks for entry families to expander

2020-07-07 Thread Pierre-Marie de Rodat
This converts the generation of range checks for entry families to the
separate analysis and generation model: the semantic analyzer sets the
Do_Range_Check flag on the node to be range-checked and then later the
expander generates the actual range check.  This also removes a useless
conditional construct in Resolve_Indexed_Component.

No functional changes.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* checks.ads (Expander Routines): Update the description of the
Do_Range_Check mechanism.
* checks.adb (Selected_Range_Checks): Fix typo.
* exp_ch9.adb: Add with and use clause for Checks.
(Actual_Index_Expression): Generate a range check if requested.
(Entry_Index_Expression): Likewise.
* sem_attr.adb (Resolve_Attribute) : Call
Apply_Scalar_Range_Check instead of Apply_Range_Check.
* sem_ch9.adb (Analyze_Accept_Statement): Likewise.
* sem_res.adb (Resolve_Entry): Likewise, after having set the
actual index type on the prefix of the indexed component.
(Resolve_Indexed_Component): Remove useless conditional construct.diff --git a/gcc/ada/checks.adb b/gcc/ada/checks.adb
--- a/gcc/ada/checks.adb
+++ b/gcc/ada/checks.adb
@@ -354,7 +354,7 @@ package body Checks is
   Target_Typ : Entity_Id;
   Source_Typ : Entity_Id;
   Warn_Node  : Node_Id) return Check_Result;
-   --  Like Apply_Range_Checks, except it doesn't modify anything, just
+   --  Like Apply_Range_Check, except it does not modify anything, just
--  returns a list of nodes as described in the spec of this package
--  for the Range_Check function.
 


diff --git a/gcc/ada/checks.ads b/gcc/ada/checks.ads
--- a/gcc/ada/checks.ads
+++ b/gcc/ada/checks.ads
@@ -674,13 +674,13 @@ package Checks is
-- Expander Routines --
---
 
-   --  Some of the earlier processing for checks results in temporarily setting
-   --  the Do_Range_Check flag rather than actually generating checks. Probably
-   --  we could eliminate the Do_Range_Check flag entirely and generate checks
-   --  earlier, but this is a delicate area and it seems safer to implement the
-   --  following routines, which are called later on in the expansion process.
-   --  They check the Do_Range_Check flag and if it is set, generate the actual
-   --  checks and reset the flag.
+   --  In most cases, the processing for range checks done by semantic analysis
+   --  only results in setting the Do_Range_Check flag, rather than actually
+   --  generating checks. The following routines must be called later on in the
+   --  expansion process upon seeing the Do_Range_Check flag; they generate the
+   --  actual checks and reset the flag. The remaining cases where range checks
+   --  are still directly generated during semantic analysis occur as part of
+   --  the processing of constraints in (sub)type and object declarations.
 
procedure Generate_Range_Check
  (N   : Node_Id;
@@ -694,11 +694,11 @@ package Checks is
--  if raised.
--
--  Note: if the expander is not active, or if we are in GNATprove mode,
-   --  then we do not generate explicit range code. Instead we just turn the
+   --  then we do not generate explicit range checks. Instead we just turn the
--  Do_Range_Check flag on, since in these cases that's what we want to see
--  in the tree (GNATprove in particular depends on this flag being set). If
-   --  we generate the actual range check, then we make sure the flag is off,
-   --  since the code we generate takes complete care of the check.
+   --  we generate the actual range checks, then we make sure the flag is off
+   --  afterward, since the code we generate takes complete care of the checks.
--
--  Historical note: We used to just pass on the Do_Range_Check flag to the
--  back end to generate the check, but now in code-generation mode we never


diff --git a/gcc/ada/exp_ch9.adb b/gcc/ada/exp_ch9.adb
--- a/gcc/ada/exp_ch9.adb
+++ b/gcc/ada/exp_ch9.adb
@@ -24,6 +24,7 @@
 --
 
 with Atree;use Atree;
+with Checks;   use Checks;
 with Einfo;use Einfo;
 with Elists;   use Elists;
 with Errout;   use Errout;
@@ -589,6 +590,14 @@ package body Exp_Ch9 is
   if Present (Index) then
  S := Entry_Index_Type (Ent);
 
+ --  First make sure the index is in range if requested. The index type
+ --  has been directly set on the prefix, see Resolve_Entry.
+
+ if Do_Range_Check (Index) then
+Generate_Range_Check
+  (Index, Etype (Prefix (Parent (Index))), CE_Range_Check_Failed);
+ end if;
+
  Expr :=
Make_Op_Add (Sloc,
  Left_Opnd  => Num,
@@ -5624,6 +5633,13 @@ package body Exp_Ch9 is
   if Present (Index) then
  S := Entry_Index_Type (Ent);
 
+ --  First make sure the index is in range if request

[Ada] Avoid crash on node extension

2020-07-07 Thread Pierre-Marie de Rodat
If a Node_Id points to a node extension, rather than an actual node,
avoid crashing in the various debugging printouts, but just print an
indication that it's an extension.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* treepr.adb (Print_Node): Add code to test Is_Extension.diff --git a/gcc/ada/treepr.adb b/gcc/ada/treepr.adb
--- a/gcc/ada/treepr.adb
+++ b/gcc/ada/treepr.adb
@@ -1006,6 +1006,15 @@ package body Treepr is
  return;
   end if;
 
+  --  Similarly, if N points to an extension, avoid crashing
+
+  if Atree_Private_Part.Nodes.Table (N).Is_Extension then
+ Print_Int (Int (N));
+ Print_Str (" is an extension, not a node");
+ Print_Eol;
+ return;
+  end if;
+
   Prefix_Str_Char (Prefix_Str'Range):= Prefix_Str;
   Prefix_Str_Char (Prefix_Str'Last + 1) := Prefix_Char;
 




[Ada] Errors in handling of access_to_subprogram contracts

2020-07-07 Thread Pierre-Marie de Rodat
This patch fixes errors in the construction of wrappers for
Access_To_Subprogram types that carry pre/postconditions.

a) The formals of the subprogram body for the wrapper must be distinct
from those of the corresponding declarationm to prevent spurious
visibility errors when other homonyms appear in the subsequent code.

b) The Access_To_Subprogram type may carry default values. When the
actuals are omitted in an indirect call, the default values are inserted
in the call by means of parameter associations. As a result, the final
parameter in the call within the wrapper body (which is the pointer to a
subprogram) must appear as a parameter association as well.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch3.adb (Build_Access_Subprogram_Wrapper_Body): Create
proper subprogram specification for body, using names in the
subprogram declaration but distinct entities.
* exp_ch6.adb (Expand_Call): If this is an indirect call
involving a subprogram wrapper, insert pointer parameter in list
of actuals with a parameter association, not as a positional
parameter.diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -528,7 +528,8 @@ package body Exp_Ch3 is
   Type_Def  : constant Node_Id:= Type_Definition (Decl);
   Type_Id   : constant Entity_Id  := Defining_Identifier (Decl);
   Spec_Node : constant Node_Id:=
-New_Copy_Tree (Specification (New_Decl));
+Copy_Subprogram_Spec (Specification (New_Decl));
+  --  This copy creates new identifiers for formals and subprogram.
 
   Act   : Node_Id;
   Body_Node : Node_Id;
@@ -540,12 +541,8 @@ package body Exp_Ch3 is
  return;
   end if;
 
-  Set_Defining_Unit_Name (Spec_Node,
-Make_Defining_Identifier
-  (Loc, Chars (Defining_Unit_Name (Spec_Node;
-
   --  Create List of actuals for indirect call. The last parameter of the
-  --  subprogram is the access value itself.
+  --  subprogram declaration is the access value for the indirect call.
 
   Act := First (Parameter_Specifications (Spec_Node));
 
@@ -558,7 +555,7 @@ package body Exp_Ch3 is
 
   Ptr :=
 Defining_Identifier
-  (Last (Parameter_Specifications (Spec_Node)));
+  (Last (Parameter_Specifications (Specification (New_Decl;
 
   if Nkind (Type_Def) = N_Access_Procedure_Definition then
  Call_Stmt := Make_Procedure_Call_Statement (Loc,


diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -2686,25 +2686,35 @@ package body Exp_Ch6 is
 Parms: constant List_Id   := Parameter_Associations (N);
 Typ  : constant Entity_Id := Etype (N);
 New_N: Node_Id;
+Ptr_Act  : Node_Id;
 
  begin
 --  The last actual in the call is the pointer itself.
 --  If the aspect is inherited, convert the pointer to the
 --  parent type that specifies the contract.
+--  If the original access_to_subprogram has defaults for
+--  in_parameters, the call may include named associations, so
+--  we create one for the pointer as well.
 
 if Is_Derived_Type (Ptr_Type)
   and then Ptr_Type /= Etype (Last_Formal (Wrapper))
 then
-   Append
-(Make_Type_Conversion (Loc,
-   New_Occurrence_Of
-(Etype (Last_Formal (Wrapper)), Loc), Ptr),
-   Parms);
+   Ptr_Act :=
+Make_Type_Conversion (Loc,
+  New_Occurrence_Of
+(Etype (Last_Formal (Wrapper)), Loc), Ptr);
 
 else
-   Append (Ptr, Parms);
+   Ptr_Act := Ptr;
 end if;
 
+Append
+ (Make_Parameter_Association (Loc,
+Selector_Name => Make_Identifier (Loc,
+   Chars (Last_Formal (Wrapper))),
+ Explicit_Actual_Parameter => Ptr_Act),
+  Parms);
+
 if Nkind (N) = N_Procedure_Call_Statement then
New_N := Make_Procedure_Call_Statement (Loc,
   Name  => New_Occurrence_Of (Wrapper, Loc),




[Ada] ACATS 4.1K - C452003

2020-07-07 Thread Pierre-Marie de Rodat
This test generates an assertion failure when compiling
c452003_root-child.adb and shows that we are missing a null check in
membership tests.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch4.adb (Tagged_Membership): Generate a call to
CW_Membership instead of using Build_CW_Membership.
(Expand_N_In): Remove wrong handling of null access types and
corresponding comment.
* exp_intr.adb (Expand_Dispatching_Constructor_Call): Generate a
call to CW_Membership instead of using Build_CW_Membership.
* rtsfind.ads: Add CW_Membership.
* exp_atag.ads, exp_atag.adb (Build_CW_Membership): Removed.
* einfo.ads: Fix typo.
* libgnat/a-tags.ads, libgnat/a-tags.adb (CW_Membership): Moved
back to spec.diff --git a/gcc/ada/einfo.ads b/gcc/ada/einfo.ads
--- a/gcc/ada/einfo.ads
+++ b/gcc/ada/einfo.ads
@@ -591,7 +591,7 @@ package Einfo is
 --   never have a null value. Set for constant access values initialized to
 --   a non-null value. This is also set for all access parameters in Ada 83
 --   and Ada 95 modes, and for access parameters that explicitly exclude
---   exclude null in Ada 2005 mode.
+--   null in Ada 2005 mode.
 --
 --   This is used to avoid unnecessary resetting of the Is_Known_Non_Null
 --   flag for such entities. In Ada 2005 mode, this is also used when


diff --git a/gcc/ada/exp_atag.adb b/gcc/ada/exp_atag.adb
--- a/gcc/ada/exp_atag.adb
+++ b/gcc/ada/exp_atag.adb
@@ -27,7 +27,6 @@ with Atree;use Atree;
 with Einfo;use Einfo;
 with Elists;   use Elists;
 with Exp_Disp; use Exp_Disp;
-with Exp_Util; use Exp_Util;
 with Namet;use Namet;
 with Nlists;   use Nlists;
 with Nmake;use Nmake;
@@ -159,118 +158,6 @@ package body Exp_Atag is
   Make_Simple_Return_Statement (Loc;
end Build_Common_Dispatching_Select_Statements;
 
-   -
-   -- Build_CW_Membership --
-   -
-
-   procedure Build_CW_Membership
- (Loc  : Source_Ptr;
-  Obj_Tag_Node : in out Node_Id;
-  Typ_Tag_Node : Node_Id;
-  Related_Nod  : Node_Id;
-  New_Node : out Node_Id)
-   is
-  Tag_Addr : constant Entity_Id := Make_Temporary (Loc, 'D', Obj_Tag_Node);
-  Obj_TSD  : constant Entity_Id := Make_Temporary (Loc, 'D');
-  Typ_TSD  : constant Entity_Id := Make_Temporary (Loc, 'D');
-  Index: constant Entity_Id := Make_Temporary (Loc, 'D');
-
-   begin
-  --  Generate:
-
-  --Tag_Addr : constant Tag := Address!(Obj_Tag);
-  --Obj_TSD  : constant Type_Specific_Data_Ptr
-  --  := Build_TSD (Tag_Addr);
-  --Typ_TSD  : constant Type_Specific_Data_Ptr
-  --  := Build_TSD (Address!(Typ_Tag));
-  --Index: constant Integer := Obj_TSD.Idepth - Typ_TSD.Idepth
-  --Index >= 0 and then Obj_TSD.Tags_Table (Index) = Typ'Tag
-
-  Insert_Action (Related_Nod,
-Make_Object_Declaration (Loc,
-  Defining_Identifier => Tag_Addr,
-  Constant_Present=> True,
-  Object_Definition   => New_Occurrence_Of (RTE (RE_Address), Loc),
-  Expression  => Unchecked_Convert_To
-   (RTE (RE_Address), Obj_Tag_Node)));
-
-  --  Unchecked_Convert_To relocates Obj_Tag_Node and therefore we must
-  --  update it.
-
-  Obj_Tag_Node := Expression (Expression (Parent (Tag_Addr)));
-
-  Insert_Action (Related_Nod,
-Make_Object_Declaration (Loc,
-  Defining_Identifier => Obj_TSD,
-  Constant_Present=> True,
-  Object_Definition   =>
-New_Occurrence_Of (RTE (RE_Type_Specific_Data_Ptr), Loc),
-  Expression  =>
-Build_TSD (Loc, New_Occurrence_Of (Tag_Addr, Loc))),
-Suppress => All_Checks);
-
-  Insert_Action (Related_Nod,
-Make_Object_Declaration (Loc,
-  Defining_Identifier => Typ_TSD,
-  Constant_Present=> True,
-  Object_Definition   =>
-New_Occurrence_Of (RTE (RE_Type_Specific_Data_Ptr), Loc),
-  Expression  =>
-Build_TSD (Loc,
-  Unchecked_Convert_To (RTE (RE_Address), Typ_Tag_Node))),
-Suppress => All_Checks);
-
-  Insert_Action (Related_Nod,
-Make_Object_Declaration (Loc,
-  Defining_Identifier => Index,
-  Constant_Present=> True,
-  Object_Definition   => New_Occurrence_Of (Standard_Integer, Loc),
-  Expression =>
-Make_Op_Subtract (Loc,
-  Left_Opnd =>
-Make_Selected_Component (Loc,
-  Prefix=>
-Make_Explicit_Dereference (Loc,
-  New_Occurrence_Of (Obj_TSD, Loc)),
-  Selector_Name =>
- New_Occurrence_Of
-   (RTE_Record_Component (RE_Idep

[Ada] Assert failure on invalid Variable_Indexing

2020-07-07 Thread Pierre-Marie de Rodat
GNAT is getting confused and will either generate an assert failure or a
confusing error message (when assertions are disabled) in case of an
invalid Variable_Indexing.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch4.adb (Try_Container_Indexing): Add ??? comment. Protect
against malformed tree in case of errors.diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -286,6 +286,7 @@ package body Sem_Ch4 is
   Prefix : Node_Id;
   Exprs  : List_Id) return Boolean;
--  AI05-0139: Generalized indexing to support iterators over containers
+   --  ??? Need to provide a more detailed spec of what this function does
 
function Try_Indexed_Call
  (N  : Node_Id;
@@ -8469,6 +8470,12 @@ package body Sem_Ch4 is
   if not Is_Overloaded (Func_Name) then
  Func := Entity (Func_Name);
 
+ --  Can happen in case of e.g. cascaded errors
+
+ if No (Func) then
+return False;
+ end if;
+
  Indexing :=
Make_Function_Call (Loc,
  Name   => New_Occurrence_Of (Func, Loc),




[Ada] Use Is_Generic_Subprogram where possible

2020-07-07 Thread Pierre-Marie de Rodat
Replace low-level Ekind_In tests with high-level Is_Generic_Subprogram.
Cleanup only; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* einfo.adb, sem_ch10.adb, sem_ch12.adb, sem_ch8.adb: Use
Is_Generic_Subprogram.diff --git a/gcc/ada/einfo.adb b/gcc/ada/einfo.adb
--- a/gcc/ada/einfo.adb
+++ b/gcc/ada/einfo.adb
@@ -2577,9 +2577,7 @@ package body Einfo is
 
function Is_Primitive (Id : E) return B is
begin
-  pragma Assert
-(Is_Overloadable (Id)
-   or else Ekind_In (Id, E_Generic_Function, E_Generic_Procedure));
+  pragma Assert (Is_Overloadable (Id) or else Is_Generic_Subprogram (Id));
   return Flag218 (Id);
end Is_Primitive;
 
@@ -5822,9 +5820,7 @@ package body Einfo is
 
procedure Set_Is_Primitive (Id : E; V : B := True) is
begin
-  pragma Assert
-(Is_Overloadable (Id)
-   or else Ekind_In (Id, E_Generic_Function, E_Generic_Procedure));
+  pragma Assert (Is_Overloadable (Id) or else Is_Generic_Subprogram (Id));
   Set_Flag218 (Id, V);
end Set_Is_Primitive;
 


diff --git a/gcc/ada/sem_ch10.adb b/gcc/ada/sem_ch10.adb
--- a/gcc/ada/sem_ch10.adb
+++ b/gcc/ada/sem_ch10.adb
@@ -6313,7 +6313,7 @@ package body Sem_Ch10 is
  if Is_Subprogram (E) and then Has_Pragma_Inline (E) then
 return True;
 
- elsif Ekind_In (E, E_Generic_Function, E_Generic_Procedure) then
+ elsif Is_Generic_Subprogram (E) then
 
 --  A generic subprogram always requires the presence of its
 --  body because an instantiation needs both templates. The only


diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -5664,8 +5664,7 @@ package body Sem_Ch12 is
  --  If renaming, get original unit
 
  if Present (Renamed_Object (Gen_Unit))
-   and then Ekind_In (Renamed_Object (Gen_Unit), E_Generic_Procedure,
- E_Generic_Function)
+   and then Is_Generic_Subprogram (Renamed_Object (Gen_Unit))
  then
 Gen_Unit := Renamed_Object (Gen_Unit);
 Set_Is_Instantiated (Gen_Unit);


diff --git a/gcc/ada/sem_ch8.adb b/gcc/ada/sem_ch8.adb
--- a/gcc/ada/sem_ch8.adb
+++ b/gcc/ada/sem_ch8.adb
@@ -729,7 +729,7 @@ package body Sem_Ch8 is
  --  For subprograms, propagate the Intrinsic flag, to allow, e.g.
  --  renamings and subsequent instantiations of Unchecked_Conversion.
 
- if Ekind_In (Old_P, E_Generic_Function, E_Generic_Procedure) then
+ if Is_Generic_Subprogram (Old_P) then
 Set_Is_Intrinsic_Subprogram
   (New_P, Is_Intrinsic_Subprogram (Old_P));
  end if;




[Ada] Simplify statically known Max_Size_In_Storage_Elements attribute in more cases

2020-07-07 Thread Pierre-Marie de Rodat
Generalize the existing constant-folding code that is used for Size
attribute references to handle Max_Size_In_Storage_Elements attribute
references as well.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_attr.adb (Eval_Attribute): Generalize static evaluation of
Size attribute references to also handle
Max_Size_In_Storage_Elements references.diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb
--- a/gcc/ada/sem_attr.adb
+++ b/gcc/ada/sem_attr.adb
@@ -8127,14 +8127,24 @@ package body Sem_Attr is
   --  for a size from an attribute definition clause). At this stage, this
   --  can happen only for types (e.g. record types) for which the size is
   --  always non-static. We exclude generic types from consideration (since
-  --  they have bogus sizes set within templates).
+  --  they have bogus sizes set within templates). We can also fold
+  --  Max_Size_In_Storage_Elements in the same cases.
 
-  elsif Id = Attribute_Size
+  elsif (Id = Attribute_Size or
+ Id = Attribute_Max_Size_In_Storage_Elements)
 and then Is_Type (P_Entity)
 and then (not Is_Generic_Type (P_Entity))
 and then Known_Static_RM_Size (P_Entity)
   then
- Compile_Time_Known_Attribute (N, RM_Size (P_Entity));
+ declare
+Attr_Value : Uint := RM_Size (P_Entity);
+ begin
+if Id = Attribute_Max_Size_In_Storage_Elements then
+   Attr_Value := (Attr_Value + System_Storage_Unit - 1)
+ / System_Storage_Unit;
+end if;
+Compile_Time_Known_Attribute (N, Attr_Value);
+ end;
  return;
 
   --  We can fold 'Alignment applied to a type if the alignment is known




[Ada] Fix typo in code to get a better error message

2020-07-07 Thread Pierre-Marie de Rodat
For illegal name of a generic unit in a USE clause we tailor the message
based on the kind of the generic, but we failed to detect generic
procedures.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch8.adb (Analyze_Use_Package): Replace low-level,
error-prone Ekind_In tests with high-level Is_Generic_Subprogram
and Is_Subprogram.diff --git a/gcc/ada/sem_ch8.adb b/gcc/ada/sem_ch8.adb
--- a/gcc/ada/sem_ch8.adb
+++ b/gcc/ada/sem_ch8.adb
@@ -3981,20 +3981,19 @@ package body Sem_Ch8 is
  Set_Prev_Use_Clause (N, Current_Use_Clause (Pack));
   end if;
 
-  --  Mark all entities as potentially use visible.
+  --  Mark all entities as potentially use visible
 
   if Ekind (Pack) /= E_Package and then Etype (Pack) /= Any_Type then
  if Ekind (Pack) = E_Generic_Package then
 Error_Msg_N  -- CODEFIX
   ("a generic package is not allowed in a use clause", Name (N));
 
- elsif Ekind_In (Pack, E_Generic_Function, E_Generic_Package)
- then
+ elsif Is_Generic_Subprogram (Pack) then
 Error_Msg_N  -- CODEFIX
   ("a generic subprogram is not allowed in a use clause",
Name (N));
 
- elsif Ekind_In (Pack, E_Function, E_Procedure, E_Operator) then
+ elsif Is_Subprogram (Pack) then
 Error_Msg_N  -- CODEFIX
   ("a subprogram is not allowed in a use clause", Name (N));
 




[Ada] ACATS 4.1J - B854003 - Subtype conformance check missed #2

2020-07-07 Thread Pierre-Marie de Rodat
This is a follow up of previous change, which didn't handle the case of
Errmsg = False in Check_Conformance properly.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch6.adb (Check_Formal_Subprogram_Conformance): New
subprogram to handle checking without systematically emitting an
error.
(Check_Conformance): Update call to
Check_Formal_Subprogram_Conformance and fix handling of Conforms
and Errmsg parameters.diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -152,6 +152,16 @@ package body Sem_Ch6 is
--  against a formal access-to-subprogram type so Get_Instance_Of must
--  be called.
 
+   procedure Check_Formal_Subprogram_Conformance
+ (New_Id   : Entity_Id;
+  Old_Id   : Entity_Id;
+  Err_Loc  : Node_Id;
+  Errmsg   : Boolean;
+  Conforms : out Boolean);
+   --  Core implementation of Check_Formal_Subprogram_Conformance from spec.
+   --  Errmsg can be set to False to not emit error messages.
+   --  Conforms is set to True if there is conformance, False otherwise.
+
procedure Check_Limited_Return
  (N  : Node_Id;
   Expr   : Node_Id;
@@ -5759,14 +5769,19 @@ package body Sem_Ch6 is
Error_Msg_Name_2 :=
  Name_Ada + Convention_Id'Pos (Convention (New_Id));
Conformance_Error ("\prior declaration for% has convention %!");
+   return;
 
 else
Conformance_Error ("\calling conventions do not match!");
+   return;
 end if;
-
-return;
  else
-Check_Formal_Subprogram_Conformance (New_Id, Old_Id, Err_Loc);
+Check_Formal_Subprogram_Conformance
+  (New_Id, Old_Id, Err_Loc, Errmsg, Conforms);
+
+if not Conforms then
+   return;
+end if;
  end if;
   end if;
 
@@ -5932,7 +5947,11 @@ package body Sem_Ch6 is
   begin
  if Is_Protected_Type (Corresponding_Concurrent_Type (T))
  then
-Error_Msg_PT (New_Id, Ultimate_Alias (Old_Id));
+Conforms := False;
+
+if Errmsg then
+   Error_Msg_PT (New_Id, Ultimate_Alias (Old_Id));
+end if;
  else
 Conformance_Error
   ("\mode of & does not match!", New_Formal);
@@ -6489,12 +6508,16 @@ package body Sem_Ch6 is
-
 
procedure Check_Formal_Subprogram_Conformance
- (New_Id  : Entity_Id;
-  Old_Id  : Entity_Id;
-  Err_Loc : Node_Id := Empty)
+ (New_Id   : Entity_Id;
+  Old_Id   : Entity_Id;
+  Err_Loc  : Node_Id;
+  Errmsg   : Boolean;
+  Conforms : out Boolean)
is
   N : Node_Id;
begin
+  Conforms := True;
+
   if Is_Formal_Subprogram (Old_Id)
 or else Is_Formal_Subprogram (New_Id)
 or else (Is_Subprogram (New_Id)
@@ -6507,14 +6530,29 @@ package body Sem_Ch6 is
 N := New_Id;
  end if;
 
- Error_Msg_Sloc := Sloc (Old_Id);
- Error_Msg_N ("not subtype conformant with declaration#!", N);
- Error_Msg_NE
-   ("\formal subprograms are not subtype conformant "
-& "(RM 6.3.1 (17/3))", N, New_Id);
+ Conforms := False;
+
+ if Errmsg then
+Error_Msg_Sloc := Sloc (Old_Id);
+Error_Msg_N ("not subtype conformant with declaration#!", N);
+Error_Msg_NE
+  ("\formal subprograms are not subtype conformant "
+   & "(RM 6.3.1 (17/3))", N, New_Id);
+ end if;
   end if;
end Check_Formal_Subprogram_Conformance;
 
+   procedure Check_Formal_Subprogram_Conformance
+ (New_Id  : Entity_Id;
+  Old_Id  : Entity_Id;
+  Err_Loc : Node_Id := Empty)
+   is
+  Ignore : Boolean;
+   begin
+  Check_Formal_Subprogram_Conformance
+(New_Id, Old_Id, Err_Loc, True, Ignore);
+   end Check_Formal_Subprogram_Conformance;
+

-- Check_Fully_Conformant --

@@ -8848,7 +8886,7 @@ package body Sem_Ch6 is
 
 --  Warn unless genuine overloading. Do not emit warning on
 --  hiding predefined operators in Standard (these are either an
---  (artifact of our implicit declarations, or simple noise) but
+--  artifact of our implicit declarations, or simple noise) but
 --  keep warning on a operator defined on a local subtype, because
 --  of the real danger that different operators may be applied in
 --  various parts of the program.




[Ada] Small adjustment in the handling of alignment for arrays

2020-07-07 Thread Pierre-Marie de Rodat
This moves the setting of the alignment of an array with known alignment
and size for the component type from Freeze_Array_Type to Layout_Type.

The rationale is that Layout_Type is invoked after Freeze_Array_Type
during freezing and computes the component size from the size of the
component type, which is a condition under which the setting is done.

The end result is that the alignment setting is done consistently on
base array types and subtypes, which share the same component size.

No functional changes.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* freeze.adb (Freeze_Array_Type): Use Ctyp more consistently.
Move code setting the alignment in the non-packed case to...
* layout.adb (Layout_Type): ...here.diff --git a/gcc/ada/freeze.adb b/gcc/ada/freeze.adb
--- a/gcc/ada/freeze.adb
+++ b/gcc/ada/freeze.adb
@@ -2573,13 +2573,13 @@ package body Freeze is
 
 --  Propagate flags for component type
 
-if Is_Controlled (Component_Type (Arr))
+if Is_Controlled (Ctyp)
   or else Has_Controlled_Component (Ctyp)
 then
Set_Has_Controlled_Component (Arr);
 end if;
 
-if Has_Unchecked_Union (Component_Type (Arr)) then
+if Has_Unchecked_Union (Ctyp) then
Set_Has_Unchecked_Union (Arr);
 end if;
 
@@ -2590,7 +2590,7 @@ package body Freeze is
 --  that the procedure can be used to check the array type
 --  invariants if any.
 
-if Has_Invariants (Component_Type (Arr))
+if Has_Invariants (Ctyp)
   and then not GNATprove_Mode
 then
Set_Has_Own_Invariants (Arr);
@@ -2902,8 +2902,8 @@ package body Freeze is
 --  If the Esize of the component is known and equal to
 --  the component size then even packing is not needed.
 
-if Known_Static_Esize (Component_Type (Arr))
-  and then Esize (Component_Type (Arr)) = Csiz
+if Known_Static_Esize (Ctyp)
+  and then Esize (Ctyp) = Csiz
 then
--  Here the array was requested to be packed, but
--  the packing request had no effect whatsoever,
@@ -3157,21 +3157,6 @@ package body Freeze is
 
  <>
 
- --  For non-packed arrays set the alignment of the array to the
- --  alignment of the component type if it is unknown. Skip this
- --  in atomic/VFA case (atomic/VFA arrays may need larger alignments).
-
- if not Is_Packed (Arr)
-   and then Unknown_Alignment (Arr)
-   and then Known_Alignment (Ctyp)
-   and then Known_Static_Component_Size (Arr)
-   and then Known_Static_Esize (Ctyp)
-   and then Esize (Ctyp) = Component_Size (Arr)
-   and then not Is_Atomic_Or_VFA (Arr)
- then
-Set_Alignment (Arr, Alignment (Component_Type (Arr)));
- end if;
-
  --  A Ghost type cannot have a component of protected or task type
  --  (SPARK RM 6.9(19)).
 


diff --git a/gcc/ada/layout.adb b/gcc/ada/layout.adb
--- a/gcc/ada/layout.adb
+++ b/gcc/ada/layout.adb
@@ -467,6 +467,22 @@ package body Layout is
end if;
 end;
  end if;
+
+ --  For non-packed arrays set the alignment of the array to the
+ --  alignment of the component type if it is unknown. Skip this
+ --  in atomic/VFA case since a larger alignment may be needed.
+
+ if Is_Array_Type (E)
+   and then not Is_Packed (E)
+   and then Unknown_Alignment (E)
+   and then Known_Alignment (Component_Type (E))
+   and then Known_Static_Component_Size (E)
+   and then Known_Static_Esize (Component_Type (E))
+   and then Component_Size (E) = Esize (Component_Type (E))
+   and then not Is_Atomic_Or_VFA (E)
+ then
+Set_Alignment (E, Alignment (Component_Type (E)));
+ end if;
   end if;
 
   --  Even if the backend performs the layout, we still do a little in




[PATCH] RISC-V: Implment __builtin_thread_pointer

2020-07-07 Thread Kito Cheng
RISC-V has a dedicate register for thread pointer which is specified in psABI
doc, so we could support __builtin_thread_pointer in straightforward way.

Note: clang/llvm was supported __builtin_thread_pointer for RISC-V port
recently.
- https://reviews.llvm.org/rGaabc24acf0d5f8677bd22fe9c108581e07c3e180

gcc/ChangeLog:

* gcc/config/riscv/riscv.md (): New.
(TP_REGNUM): Ditto.
* doc/extend.texi (Target Builtins): Add RISC-V built-in section.
Document __builtin_thread_pointer.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/read-thread-pointer.c: New.
---
 gcc/config/riscv/riscv.md|  8 
 gcc/doc/extend.texi  | 11 +++
 gcc/testsuite/gcc.target/riscv/read-thread-pointer.c |  7 +++
 3 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/read-thread-pointer.c

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 36012ad1f778..95a02ecaa34b 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -70,6 +70,7 @@
 (define_constants
   [(RETURN_ADDR_REGNUM 1)
(GP_REGNUM  3)
+   (TP_REGNUM  4)
(T0_REGNUM  5)
(T1_REGNUM  6)
(S0_REGNUM  8)
@@ -2515,6 +2516,13 @@
   DONE;
 })
 
+;; Named pattern for expanding thread pointer reference.
+(define_expand "get_thread_pointer"
+  [(set (match_operand:P 0 "register_operand" "=r")
+   (reg:P TP_REGNUM))]
+  ""
+{})
+
 (include "sync.md")
 (include "peephole.md")
 (include "pic.md")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index ecd3661d2571..556c98f46911 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -13859,6 +13859,7 @@ instructions, but allow the compiler to schedule those 
calls.
 * PowerPC Hardware Transactional Memory Built-in Functions::
 * PowerPC Atomic Memory Operation Functions::
 * PowerPC Matrix-Multiply Assist Built-in Functions::
+* RISC-V Built-in Functions::
 * RX Built-in Functions::
 * S/390 System z Built-in Functions::
 * SH Built-in Functions::
@@ -21461,6 +21462,16 @@ vec_t __builtin_vsx_xvcvspbf16 (vec_t);
 vec_t __builtin_vsx_xvcvbf16sp (vec_t);
 @end smallexample
 
+@node RISC-V Built-in Functions
+@subsection RISC-V Built-in Functions
+
+These built-in functions are available for the RISC-V family of
+processors.
+
+@deftypefn {Built-in Function} {void *} __builtin_thread_pointer (void)
+Returns the value that is currently set in the @samp{tp} register.
+@end deftypefn
+
 @node RX Built-in Functions
 @subsection RX Built-in Functions
 GCC supports some of the RX instructions which cannot be expressed in
diff --git a/gcc/testsuite/gcc.target/riscv/read-thread-pointer.c 
b/gcc/testsuite/gcc.target/riscv/read-thread-pointer.c
new file mode 100644
index ..760f8eafb406
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/read-thread-pointer.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+
+void *get_tp()
+{
+return __builtin_thread_pointer ();
+}
+/* { dg-final { scan-assembler "mv\[ \t\]*[at][0-9]+,tp" } } */
-- 
2.27.0



[PATCH PR95804]Force reduction partition to be scheduled in the last

2020-07-07 Thread bin.cheng via Gcc-patches
Hi,
This is a followup fix for PR95638 which changed the way post order numbers are 
maintained for
partition graph.  It missed one case that when SCC of reduction partition is 
broken by runtime
alias checks, we do need to make sure the reduction partition be scheduled in 
the last.  This patch
does this by forcing a negative post order to it.

Bootstrap and test on x86_64, is it OK?

Thanks,
bin

pr95804.patch
Description: Binary data


Re: [PATCH] Add -fld-path= to specify an arbitrary executable as the linker

2020-07-07 Thread Martin Liška

On 7/3/20 7:18 PM, Fāng-ruì Sòng wrote:


On 2020-07-03, Martin Liška wrote:

On 7/2/20 9:34 PM, Fāng-ruì Sòng wrote:

On 2020-07-01, Fāng-ruì Sòng wrote:

On 2020-07-01, Martin Liška wrote:

On 6/30/20 5:32 PM, Fāng-ruì Sòng wrote:

There is some concern about clang's -fuse-ld=path
http://lists.llvm.org/pipermail/cfe-dev/2020-June/065710.html and use
of COMPILER_PATH vs PATH.
Shall we introduce another option like -fld-path=path (PATH is used,
COMPILER_PATH is not used)?


I would recommend first landing a patch to LLVM and then we can do
a corresponding change to GCC.

Martin


Thank a lot for you welcoming words! This is what I intend to add for clang: 
https://reviews.llvm.org/D83015

I'll create a GCC patch superseding this one later.


Attached the new patch.


Thank you for the update patch:


From e7f86cdcaf03e4ddb98d0df9d07894d9ffb7d91a Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 2 Jul 2020 12:26:09 -0700
Subject: [PATCH] Add -fld-path= to specify an arbitrary executable as the
linker

The value can be either a relative path (relative to a COMPILER_PATH
directory or a PATH directory) or an absolute path. -fld-path=
complements -fuse-ld={bfd,gold,lld} which specifies the linker flavor.

PR driver/93645
* common.opt (-fld-path=): Add -fld-path=
* opts.c (common_handle_option): Handle OPT_fld_path_.
* gcc.c (driver_handle_option): Likewise.
* collect2.c (main): Likewise.
* doc/invoke.texi: Document -fld-path=.
---
gcc/collect2.c  | 57 -
gcc/common.opt  |  4 
gcc/doc/invoke.texi |  6 +
gcc/gcc.c   |  2 +-
gcc/opts.c  |  1 +
5 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/gcc/collect2.c b/gcc/collect2.c
index f8a5ce45994..efa652f7f82 100644
--- a/gcc/collect2.c
+++ b/gcc/collect2.c
@@ -844,6 +844,7 @@ main (int argc, char **argv)
  const char **ld1;
  bool use_plugin = false;
  bool use_collect_ld = false;
+  const char *ld_path = NULL;
  /* The kinds of symbols we will have to consider when scanning the
 outcome of a first pass link.  This is ALL to start with, then might
@@ -961,12 +962,21 @@ main (int argc, char **argv)
    if (selected_linker == USE_DEFAULT_LD)
  selected_linker = USE_PLUGIN_LD;
  }
-    else if (strcmp (argv[i], "-fuse-ld=bfd") == 0)
-  selected_linker = USE_BFD_LD;
-    else if (strcmp (argv[i], "-fuse-ld=gold") == 0)
-  selected_linker = USE_GOLD_LD;
-    else if (strcmp (argv[i], "-fuse-ld=lld") == 0)
-  selected_linker = USE_LLD_LD;
+    else if (strncmp (argv[i], "-fuse-ld=bfd", 9) == 0
+ && selected_linker != USE_LD_MAX)
+  {


This does not seem correct to me. You match -fuse-ld=bfd and then
test other option values in the following block.


This is correct but I probably should use:

- strncmp (argv[i], "-fuse-ld=bfd", 9) == 0
+ strncmp (argv[i], "-fuse-ld=", 9) == 0


Yes, that would be much better.





+    if (strcmp (argv[i] + 9, "bfd") == 0)
+  selected_linker = USE_BFD_LD;
+    else if (strcmp (argv[i] + 9, "gold") == 0)
+  selected_linker = USE_GOLD_LD;
+    else if (strcmp (argv[i] + 9, "lld") == 0)
+  selected_linker = USE_LLD_LD;
+  }
+    else if (strncmp (argv[i], "-fld-path=", 10) == 0)
+  {
+    ld_path = argv[i] + 10;
+    selected_linker = USE_LD_MAX;
+  }
else if (strncmp (argv[i], "-o", 2) == 0)
  {
    /* Parse the output filename if it's given so that we can make
@@ -1117,14 +1127,27 @@ main (int argc, char **argv)
  ld_file_name = find_a_file (&cpath, collect_ld_suffix, X_OK);
  use_collect_ld = ld_file_name != 0;
    }
-  /* Search the compiler directories for `ld'.  We have protection against
- recursive calls in find_a_file.  */
-  if (ld_file_name == 0)
-    ld_file_name = find_a_file (&cpath, ld_suffixes[selected_linker], X_OK);
-  /* Search the ordinary system bin directories
- for `ld' (if native linking) or `TARGET-ld' (if cross).  */
-  if (ld_file_name == 0)
-    ld_file_name = find_a_file (&path, full_ld_suffixes[selected_linker], 
X_OK);
+  if (selected_linker == USE_LD_MAX)
+    {
+  /* If -fld-path= does not contain a slash, search for the command using
+ the PATH environment variable.  */


We also support file systems like Windows where the comment about a slash will 
be misleading.
You can just mention relative vs. absolute path.


The behavior is modeled after
https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html 
Command Search and Execution

   e.  Otherwise, the command shall be searched for using the PATH environment 
variable as described in XBD Environment Variables :

is performed if "the command name does not contain any  characters".

For your suggestion, I think 'word' can mean a relative path as well, along 
with 'rel\path' and 'rel/path'.

Should I say
   If -fld-path= does not contain a path component separator (e.g. slash), 
search for the

Re: [PATCH 5/7 v6] vect: Support vector load/store with length in vectorizer

2020-07-07 Thread Richard Sandiford
"Kewen.Lin"  writes:
> Hi Richard,
>
> on 2020/7/1 下午11:17, Richard Sandiford wrote:
>> "Kewen.Lin"  writes:
>>> on 2020/7/1 上午3:53, Richard Sandiford wrote:
 "Kewen.Lin"  writes:
>poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> +  tree length_limit = NULL_TREE;
> +  /* For length, we need length_limit to check length in range.  */
> +  if (!vect_for_masking)
> +{
> +  poly_uint64 len_limit = nscalars_per_ctrl * rgc->factor;
> +  length_limit = build_int_cst (compare_type, len_limit);
> +}
>  
>/* Calculate the maximum number of scalar values that the rgroup
>   handles in total, the number that it handles for each iteration
> @@ -434,12 +445,12 @@ vect_set_loop_controls_directly (class loop *loop, 
> loop_vec_info loop_vinfo,
>tree nscalars_total = niters;
>tree nscalars_step = build_int_cst (iv_type, vf);
>tree nscalars_skip = niters_skip;
> -  if (nscalars_per_iter != 1)
> +  if (nscalars_per_iter_ft != 1)
>  {
>/* We checked before setting LOOP_VINFO_USING_PARTIAL_VECTORS_P 
> that
>these multiplications don't overflow.  */
> -  tree compare_factor = build_int_cst (compare_type, 
> nscalars_per_iter);
> -  tree iv_factor = build_int_cst (iv_type, nscalars_per_iter);
> +  tree compare_factor = build_int_cst (compare_type, 
> nscalars_per_iter_ft);
> +  tree iv_factor = build_int_cst (iv_type, nscalars_per_iter_ft);
>nscalars_total = gimple_build (preheader_seq, MULT_EXPR, 
> compare_type,
>nscalars_total, compare_factor);
>nscalars_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
> @@ -509,7 +520,7 @@ vect_set_loop_controls_directly (class loop *loop, 
> loop_vec_info loop_vinfo,
>NSCALARS_SKIP to that cannot overflow.  */
> tree const_limit = build_int_cst (compare_type,
>   LOOP_VINFO_VECT_FACTOR (loop_vinfo)
> - * nscalars_per_iter);
> + * nscalars_per_iter_ft);
> first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
> nscalars_total, const_limit);
> first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,

 It looks odd that we don't need to adjust the other nscalars_* values too.
 E.g. the above seems to be comparing an unscaled nscalars_total with
 a scaled nscalars_per_iter.  I think the units ought to “agree”,
 both here and in the rest of the function.

>>>
>>> Sorry, I didn't quite follow this comment.  Both nscalars_totoal and
>>> nscalars_step are scaled here.  The remaining related nscalars_*
>>> seems only nscalars_skip, but length can't support skip.
>> 
>> Hmm, OK.  But in that case can you update the names of the variables
>> to match?  It's confusing to have some nscalars_* variables actually
>> count scalars (and thus have “nitems” equivalents) and other nscalars_*
>> variables count something else (and thus effectively be nitems_* variables
>> themselves).
>> 
>
> OK.  I'll update the names like nscalars_total/nscalars_step and equivalents
> to nitems_total/... (or nunits_total better?)

I agree “items” isn't great.  I was trying to avoid “units” because GCC
often uses that to mean bytes (BITS_PER_UNIT, UNITS_PER_WORD, etc.).
In this context that could be confusing, because sometimes the
“units” actually would be bytes, but not always.

> @@ -9850,11 +9986,30 @@ vectorizable_condition (vec_info *vinfo,
> return false;
>   }
>  
> -  if (loop_vinfo
> -   && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
> -   && reduction_type == EXTRACT_LAST_REDUCTION)
> - vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
> -ncopies * vec_num, vectype, NULL);
> +  if (loop_vinfo && for_reduction
> +   && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> + {
> +   if (reduction_type == EXTRACT_LAST_REDUCTION)
> + vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
> +ncopies * vec_num, vectype, NULL);
> +   /* Using partial vectors can introduce inactive lanes in the last
> +  iteration, since full vector of condition results are operated,
> +  it's unsafe here.  But if we can AND the condition mask with
> +  loop mask, it would be safe then.  */
> +   else if (!loop_vinfo->scalar_cond_masked_set.is_empty ())
> + {
> +   scalar_cond_masked_key cond (cond_expr, ncopies * vec_num);
> +   if (!loop_vinfo->scalar_cond_masked_set.contains (cond))
> + {
> +   bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
> +   cond.code = invert_tree_comparis

Re: [PATCH] [og10] libgomp, Fortran: Fix OpenACC "gang reduction on an orphan loop" error message

2020-07-07 Thread Thomas Schwinge
Hi Frederik!

(CC  added, for everything touching gfortran.)

On 2020-07-07T10:52:08+0200, Frederik Harwath  wrote:
> This patch fixes the check for reductions on orphaned gang loops

This is the "Make OpenACC orphan gang reductions errors" functionality
originally added in gomp-4_0-branch r247461.

> the Fortran frontend which (in contrast to the C, C++ frontends)
> erroneously rejects reductions on gang loops that are contained in
> "kernels" constructs and which hence are not orphaned.
>
> According to the OpenACC standard version 2.5 and later, reductions on
> orphaned gang loops are explicitly disallowed (cf.  section "Changes
> from Version 2.0 to 2.5").  Remember that a loop is "orphaned" if it is
> not lexically contained in a compute construct (cf. section "Loop
> construct" of the OpenACC standard), i.e. in either a "parallel", a
> "serial", or a "kernels" construct.

Or the other way round: a 'loop' construct is orphaned if it appears
inside a 'routine' region, right?

> The patch has been tested by running the GCC and libgomp testsuites.
> The latter tests ran with offloading to nvptx although that should not
> be important here unless there was some very subtle reason for
> forbidding the gang reductions on kernels loops. As expect, there seems
> to be no such reason, i.e. I observed no regressions with the patch.

Note that the aforementioned gomp-4_0-branch r247461,
openacc-gcc-7-branch commit 0554f9f79325960c72166327d442a553cd35bad9, and
openacc-gcc-8-branch commit 65dd9cf3b3c45d64d72967df1e4a54778cb4e35f
still do contain the appropriate 'kernels' handling.  Just in
openacc-gcc-9-branch commit 533beb2ec19f8486e4b1b645a153746f96b41f04 this
got (a) mixed together with a bunch of other, unrelated changes ("Various
OpenACC reduction enhancements"), and (b) the 'kernels' handling got
removed.  Julian (Git author), or Kwok (Git committer), do you remember
any rationale for that?  Later, this then got picked into devel/omp/gcc-9
commit 3fa4bb72dcb3b9171952a0eca5310bb8811d5ffd, and devel/omp/gcc-10
commit 6b3e1f7f05cd360bbd356b3f78511aa2ec3f40c3.

> Can I include the patch in OG10?

Unless Julian/Kwok speak up soon: OK, thanks.

Reviewed-by: Thomas Schwinge 

May want to remove "libgomp" from the first line of the commit log --
this commit doesn't relate to libgomp specifically.

(Ideally, we'd also test 'serial' construct in addition to 'kernels',
'parallel', but we can add that later.  I anyway have a WIP patch
waiting, adding more 'serial' construct testing, for a different reason,
so I'll include it there.)


Grüße
 Thomas


> From 7320635211fff3a773beb0de1914dbfcc317ab37 Mon Sep 17 00:00:00 2001
> From: Frederik Harwath 
> Date: Tue, 7 Jul 2020 10:41:21 +0200
> Subject: [PATCH] libgomp, Fortran: Fix OpenACC "gang reduction on an orphan
>  loop" error message
>
> According to the OpenACC standard version 2.5 and later, reductions on
> orphaned gang loops are explicitly disallowed (cf.  section "Changes
> from Version 2.0 to 2.5").  A loop is "orphaned" if it is not
> lexically contained in a compute construct (cf. section "Loop
> construct" of the OpenACC standard), i.e. in either a "parallel", a
> "serial", or a "kernels" construct.
>
> This commit fixes the check for reductions on orphaned gang loops in
> the Fortran frontend which (in contrast to the C, C++ frontends)
> erroneously rejects reductions on gang loops that are contained in
> "kernels" constructs.
>
> 2020-07-07  Frederik Harwath  
>
> gcc/fortran/
>
>   * openmp.c (oacc_is_parallel_or_serial): Removed function.
>   (oacc_is_kernels): New function.
>   (oacc_is_compute_construct): New function.
>   (resolve_oacc_loop_blocks): Use "oacc_is_compute_construct"
>   instead of "oacc_is_parallel_or_serial" for checking that a
>   loop is not orphaned.
>
> gcc/testsuite/
>
>   * gfortran.dg/goacc/orphan-reductions-2.f90: New test
>   verifying that the error message is not emitted for
>   non-orphaned loops.
>
>   * c-c++-common/goacc/orphan-reductions-2.c: Likewise for C and C++.
> ---
>  gcc/fortran/openmp.c  | 13 +++-
>  .../c-c++-common/goacc/orphan-reductions-2.c  | 69 +++
>  .../gfortran.dg/goacc/orphan-reductions-2.f90 | 58 
>  3 files changed, 137 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/c-c++-common/goacc/orphan-reductions-2.c
>  create mode 100644 gcc/testsuite/gfortran.dg/goacc/orphan-reductions-2.f90
>
> diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
> index 28408c4c99a..83c498112a8 100644
> --- a/gcc/fortran/openmp.c
> +++ b/gcc/fortran/openmp.c
> @@ -5926,9 +5926,16 @@ oacc_is_serial (gfc_code *code)
>  }
>
>  static bool
> -oacc_is_parallel_or_serial (gfc_code *code)
> +oacc_is_kernels (gfc_code *code)
>  {
> -  return oacc_is_parallel (code) || oacc_is_serial (code);
> +  return code->op == EXEC_OACC_KERNELS || code->op == EXEC_OACC_KERNELS_LOOP;
> +}
> +
> +static bool
> +oacc_is_compute

Re: [PATCH 5/7 v6] vect: Support vector load/store with length in vectorizer

2020-07-07 Thread Richard Sandiford
"Kewen.Lin"  writes:
> on 2020/7/2 下午1:20, Kewen.Lin via Gcc-patches wrote:
>> on 2020/7/1 下午11:17, Richard Sandiford wrote:
>>> "Kewen.Lin"  writes:
 on 2020/7/1 上午3:53, Richard Sandiford wrote:
> "Kewen.Lin"  writes:
>> +  /* Decide whether to use fully-masked approach.  */
>> +  if (vect_verify_full_masking (loop_vinfo))
>> +LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
>> +  /* Decide whether to use length-based approach.  */
>> +  else if (vect_verify_loop_lens (loop_vinfo))
>> +{
>> +  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
>> +  || LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
>> +{
>> +  if (dump_enabled_p ())
>> +dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> + "can't vectorize this loop with 
>> length-based"
>> + " partial vectors approach becuase 
>> peeling"
>> + " for alignment or gaps is 
>> required.\n");
>> +  LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
>> +}
>
> Why are these peeling cases necessary?  Peeling for gaps should
> just mean subtracting one scalar iteration from the iteration count
> and shouldn't otherwise affect the main loop.  Similarly, peeling for
> alignment can be handled in the normal way, with a scalar prologue loop.
>

 I was thinking to relax this later and to avoid to handle too many cases
 in the first enablement patch.  Since Power hw whose level is able to 
 support
 vector with length, it supports unaligned load/store, need to construct
 some cases for them.  May I postpone it a bit?  Or you prefer me to support
 it here?
>>>
>>> I've no objection to postponing it if there are specific known
>>> problems that make it difficult, but I think we should at least
>>> say what they are.  On the face of it, I'm not sure why it doesn't
>>> Just Work, since the way that we control the main loop should be
>>> mostly orthogonal to how we handle peeled prologue iterations
>>> and how we handle a single peeled epilogue iteration.
>>>
>> 
>> OK, I will remove it to see the impact.  By the way, do you think to
>> use partial vectors for prologue is something worth to trying in future?
>> 
>
> I tested the updated patch with this releasing, LOOP_VINFO_PEELING_FOR_GAPS
> part looks fine, but LOOP_VINFO_PEELING_FOR_ALIGNMENT caused one case to
> fail at execution during vect-partial-vector-usage=2.  So far the patch
> doesn't handle any niters_skip cases.  I think if we want to support it, 
> we have to add some handlings in/like what we have for masking, such as: 
> mask_skip_niters, vect_prepare_for_masked_peels etc.  
>
> Do you prefer me to extend the support in this patch series?

It's not so much whether it has to be supported now, but more why
it doesn't work now.  What was the reason for the failure?

The peeling-with-masking thing is just an optimisation, so that we
can vectorise the peeled iterations rather than falling back to
scalar code for them.  It shouldn't be needed for correctness.

Thanks,
Richard


Re: [PATCH PR95961] vect: ICE: in exact_div, at poly-int.h:2182

2020-07-07 Thread Richard Sandiford
"Yangfei (Felix)"  writes:
>> > +nscalars = (STMT_SLP_TYPE (stmt_info)
>> > +  ? vf * DR_GROUP_SIZE (stmt_info) : vf);
>> 
>> …the indentation on this line.  Hope you don't mind, but I also “reflowed”
>> the commit message to make it fit within 72 chars.
>> (The text itself is the same.)
>
> It's OK.  :-)
> BTW: Is this the rule for gcc git commit msg format? 72 chars instead of 80 
> chars?

Well, it was over 80 chars too, which is why I noticed :-)
But I think the idea is that it shouldn't wrap when you use “git log”
on an 80-character terminal, and “git log” indents the messages.
72 is probably overboard though.

Thanks,
Richard



Re: [committed] amdgcn: Add fold_left_plus vector reductions

2020-07-07 Thread Richard Sandiford
Andrew Stubbs  writes:
> This patch implements a floating-point fold_left_plus vector pattern, 
> which gives a significant speed-up in the BabelStream "dot" benchmark.
>
> The GCN architecture can't actually do an in-order vector reduction any 
> more efficiently than that equivalent scalar algorithm, so this is a bit 
> of a cheat.  However, dividing the problem into threads using OpenACC or 
> OpenMP has already broken the in-order semantics, so we may as well 
> optimize the operation at the vector level too.
>
> If the user has specifically sorted the input data in order to get a 
> more correct FP result then using multiple threads is already the wrong 
> thing to do. But, if the input data is in no particular numerical order 
> then this optimization will give a correct answer much faster, albeit 
> possibly a slightly different one each run.

There doesn't seem to be anything GCN-specific here though.
If pragmas say that we can ignore associativity rules, we should apply
that in target-independent code rather than in each individual target.

Thanks,
Richard


Re: [pushed] c++: -fsanitize=vptr and -fstrong-eval-order. [PR95221]

2020-07-07 Thread Thomas Schwinge
Hi!

On 2020-06-23T13:21:05+0200, I wrote:
> On 2020-05-22T17:03:01-0400, Jason Merrill via Gcc-patches 
>  wrote:
>> [...]
>>
>> This issue suggests that we should be running the ubsan tests in multiple
>> standard modes like the rest of the G++ testsuite, so I've made that change
>> as well.
>
>> --- a/gcc/testsuite/g++.dg/ubsan/ubsan.exp
>> +++ b/gcc/testsuite/g++.dg/ubsan/ubsan.exp
>> @@ -26,7 +26,7 @@ ubsan_init
>>
>>  # Main loop.
>>  if [check_effective_target_fsanitize_undefined] {
>> -  gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C 
>> $srcdir/c-c++-common/ubsan/*.c]] "" ""
>> +  g++-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C 
>> $srcdir/c-c++-common/ubsan/*.c]] "" ""
>>  }
>
> Hmm, but that means that testing is now no longer running the
> "optimization options torture testing":
>
> Running [...]/source-gcc/gcc/testsuite/g++.dg/ubsan/ubsan.exp ...
> -PASS: c-c++-common/ubsan/align-1.c   -O0  (test for excess errors)
> -PASS: c-c++-common/ubsan/align-1.c   -O0  execution test
> -PASS: c-c++-common/ubsan/align-1.c   -O1  (test for excess errors)
> -PASS: c-c++-common/ubsan/align-1.c   -O1  execution test
> -PASS: c-c++-common/ubsan/align-1.c   -O2  (test for excess errors)
> -PASS: c-c++-common/ubsan/align-1.c   -O2  execution test
> -PASS: c-c++-common/ubsan/align-1.c   -O2 -flto -fno-use-linker-plugin 
> -flto-partition=none  (test for excess errors)
> -PASS: c-c++-common/ubsan/align-1.c   -O2 -flto -fno-use-linker-plugin 
> -flto-partition=none  execution test
> -PASS: c-c++-common/ubsan/align-1.c   -O2 -flto -fuse-linker-plugin 
> -fno-fat-lto-objects  (test for excess errors)
> -PASS: c-c++-common/ubsan/align-1.c   -O2 -flto -fuse-linker-plugin 
> -fno-fat-lto-objects  execution test
> -PASS: c-c++-common/ubsan/align-1.c   -O3 -g  (test for excess errors)
> -PASS: c-c++-common/ubsan/align-1.c   -O3 -g  execution test
> -PASS: c-c++-common/ubsan/align-1.c   -Os  (test for excess errors)
> -PASS: c-c++-common/ubsan/align-1.c   -Os  execution test
> +PASS: c-c++-common/ubsan/align-1.c  -std=gnu++14 (test for excess errors)
> +PASS: c-c++-common/ubsan/align-1.c  -std=gnu++14 execution test
> +PASS: c-c++-common/ubsan/align-1.c  -std=gnu++17 (test for excess errors)
> +PASS: c-c++-common/ubsan/align-1.c  -std=gnu++17 execution test
> +PASS: c-c++-common/ubsan/align-1.c  -std=gnu++2a (test for excess errors)
> +PASS: c-c++-common/ubsan/align-1.c  -std=gnu++2a execution test
> +PASS: c-c++-common/ubsan/align-1.c  -std=gnu++98 (test for excess errors)
> +PASS: c-c++-common/ubsan/align-1.c  -std=gnu++98 execution test
>
> Etc.
>
> Not sure if that was intentional?  I suppose this removed way more
> testsuite coverage compared to what different C++ '-std=[...]' add?

Any comments/ideas here?

Note that 'g++.dg/asan/asan.exp', 'g++.dg/tsan/tsan.exp' also use
'gcc-dg-runtest' ("optimization options" variation), not 'g++-dg-runtest'
("C++ '-std=[...]'" variation).


Grüße
 Thomas


>> The testcase changes are all to accommodate that.
>
>>  /* { dg-options "-fsanitize=bounds -Wno-array-bounds" } */
>> +/* { dg-options "-fsanitize=bounds -Wno-array-bounds -Wno-volatile" { 
>> target c++ } } */
>
> Simpler would've been (untested):
>
> +/* { dg-additional-options "-Wno-volatile" { target c++ } } */
>
> Etc.
>
> ;-)
>
>
> Grüße
>  Thomas
-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [PATCH] libgomp: Add OMPD process functions and datatypes.

2020-07-07 Thread y2s1982 . via Gcc-patches
Hello Jakub,

Thank you for the review. I had some questions.

On Tue, Jul 7, 2020 at 4:13 AM Jakub Jelinek  wrote:

> On Fri, Jul 03, 2020 at 10:43:55PM -0400, y2s1982 via Gcc-patches wrote:
>
> > +  switch (id_size)
> > +{
> > +case 1:
> > +  *output_id = (_gompd_device_id) *((__UINT8_TYPE__ *) input_id);
> > +  break;
> > +case 2:
> > +  *output_id = (_gompd_device_id) *((__UINT16_TYPE__ *) input_id);
>
> I have no idea what this function is doing, but e.g. from aliasing point of
> view trying to access something as short/int/long long is dangerous, and
> there might be alignment implications too.
>

This function is used in ompd_device_initialize(). The initializing function
receives a void *id and ompd_size_t sizeof_id. My first attempt tried to
just
preserve both information as is, but I wasn't sure how the void * would
ultimately be read. In the second attempt, I tried to cast the value, based
on
the sizeof_id, and store it in a large enough type. This does assume the
void * is pointing at a numerical value.
What would be the best way to handle the void *id?

>
>
> > --- /dev/null
> > +++ b/libgomp/ompd-types.h
>
> ompd-types.h is an installed header I think, so Makefile.am should install
> it.
>
Is this similar to how omp-tools.h was handled before?

Cheers,

Tony


>
> Jakub
>
>


Re: [committed] amdgcn: Add fold_left_plus vector reductions

2020-07-07 Thread Andrew Stubbs

On 07/07/2020 12:03, Richard Sandiford wrote:

Andrew Stubbs  writes:

This patch implements a floating-point fold_left_plus vector pattern,
which gives a significant speed-up in the BabelStream "dot" benchmark.

The GCN architecture can't actually do an in-order vector reduction any
more efficiently than that equivalent scalar algorithm, so this is a bit
of a cheat.  However, dividing the problem into threads using OpenACC or
OpenMP has already broken the in-order semantics, so we may as well
optimize the operation at the vector level too.

If the user has specifically sorted the input data in order to get a
more correct FP result then using multiple threads is already the wrong
thing to do. But, if the input data is in no particular numerical order
then this optimization will give a correct answer much faster, albeit
possibly a slightly different one each run.


There doesn't seem to be anything GCN-specific here though.
If pragmas say that we can ignore associativity rules, we should apply
that in target-independent code rather than in each individual target.


Yes, I'm lazy. That, and I'm not sure what a target independent solution 
would look like.


Presumably we'd need something for both OpenMP and OpenACC, and it would 
need to be specific to certain operations (not just blanket 
-fassociative-math), which means the vectorizer (anywhere else?) would 
need to be taught about the new thing?


The nearest example I can think of is the force_vectorize flag that 
OpenMP "simd" and OpenACC "vector" already use (the latter being 
amdgcn-only as nvptx does its own OpenACC vectorization).


I'm also not completely convinced that this -- or other cases like it -- 
isn't simply a target-specific issue. Could it be harmful on other 
architectures?


Anyway, ultimately I don't have time to do much more here.

Andrew


Re: [PATCH] libgomp: Add OMPD process functions and datatypes.

2020-07-07 Thread Jakub Jelinek via Gcc-patches
On Tue, Jul 07, 2020 at 07:38:13AM -0400, y2s1982 . wrote:
> > I have no idea what this function is doing, but e.g. from aliasing point of
> > view trying to access something as short/int/long long is dangerous, and
> > there might be alignment implications too.
> >
> 
> This function is used in ompd_device_initialize(). The initializing function
> receives a void *id and ompd_size_t sizeof_id. My first attempt tried to
> just
> preserve both information as is, but I wasn't sure how the void * would
> ultimately be read. In the second attempt, I tried to cast the value, based
> on
> the sizeof_id, and store it in a large enough type. This does assume the
> void * is pointing at a numerical value.
> What would be the best way to handle the void *id?

Well, you should know what it is and only depending on that handle it.
If it e.g. would be string, you'd want to use memcpy to copy it somewhere,
or parse immediately, etc.

> > > --- /dev/null
> > > +++ b/libgomp/ompd-types.h
> >
> > ompd-types.h is an installed header I think, so Makefile.am should install
> > it.
> >
> Is this similar to how omp-tools.h was handled before?

Yes.

Jakub



Re: [PATCH] [og10] libgomp, Fortran: Fix OpenACC "gang reduction on an orphan loop" error message

2020-07-07 Thread Frederik Harwath
Thomas Schwinge  writes:

Hi Thomas,

> (CC  added, for everything touching gfortran.)

Thanks!

> On 2020-07-07T10:52:08+0200, Frederik Harwath  
> wrote:
>> This patch fixes the check for reductions on orphaned gang loops
>
> This is the "Make OpenACC orphan gang reductions errors" functionality
> originally added in gomp-4_0-branch r247461.
>
>> the Fortran frontend which (in contrast to the C, C++ frontends)
>> erroneously rejects reductions on gang loops that are contained in
>> "kernels" constructs and which hence are not orphaned.
>>
>> According to the OpenACC standard version 2.5 and later, reductions on
>> orphaned gang loops are explicitly disallowed (cf.  section "Changes
>> from Version 2.0 to 2.5").  Remember that a loop is "orphaned" if it is
>> not lexically contained in a compute construct (cf. section "Loop
>> construct" of the OpenACC standard), i.e. in either a "parallel", a
>> "serial", or a "kernels" construct.
>
> Or the other way round: a 'loop' construct is orphaned if it appears
> inside a 'routine' region, right?

The "not lexically contained in a compute construct" definition is
from the standard. Assuming that the frontend's parser rejects "loop"
directives if they do not occur inside of either the "serial",
"parallel", "kernels" compute constructs or in a function with a
"routine" directive, both definitions should be indeed equivalent ;-).

> Unless Julian/Kwok speak up soon: OK, thanks.
>
> Reviewed-by: Thomas Schwinge 
>
> May want to remove "libgomp" from the first line of the commit log --
> this commit doesn't relate to libgomp specifically.

Right.

> (Ideally, we'd also test 'serial' construct in addition to 'kernels',
> 'parallel', but we can add that later.  I anyway have a WIP patch
> waiting, adding more 'serial' construct testing, for a different reason,
> so I'll include it there.)

I had left this out intentionally, because having the gang reduction in
the serial construct leads to a "region contains gang partitioned
code but is not gang partitioned"
error. Of course, we might still add a test case with that expectation.

Thanks for the review!

Frederik
-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [PATCH][GCC][Arm] PR target/95646: Do not clobber callee saved registers with CMSE

2020-07-07 Thread Christophe Lyon via Gcc-patches
Hi,


On Mon, 6 Jul 2020 at 16:31, Andre Vieira (lists)
 wrote:
>
>
> On 30/06/2020 14:50, Andre Vieira (lists) wrote:
> >
> > On 29/06/2020 11:15, Christophe Lyon wrote:
> >> On Mon, 29 Jun 2020 at 10:56, Andre Vieira (lists)
> >>  wrote:
> >>>
> >>> On 23/06/2020 21:52, Christophe Lyon wrote:
>  On Tue, 23 Jun 2020 at 15:28, Andre Vieira (lists)
>   wrote:
> > On 23/06/2020 13:10, Kyrylo Tkachov wrote:
> >>> -Original Message-
> >>> From: Andre Vieira (lists) 
> >>> Sent: 22 June 2020 09:52
> >>> To: gcc-patches@gcc.gnu.org
> >>> Cc: Kyrylo Tkachov 
> >>> Subject: [PATCH][GCC][Arm] PR target/95646: Do not clobber
> >>> callee saved
> >>> registers with CMSE
> >>>
> >>> Hi,
> >>>
> >>> As reported in bugzilla when the -mcmse option is used while
> >>> compiling
> >>> for size (-Os) with a thumb-1 target the generated code will
> >>> clear the
> >>> registers r7-r10. These however are callee saved and should be
> >>> preserved
> >>> accross ABI boundaries. The reason this happens is because these
> >>> registers are made "fixed" when optimising for size with Thumb-1
> >>> in a
> >>> way to make sure they are not used, as pushing and popping
> >>> hi-registers
> >>> requires extra moves to and from LO_REGS.
> >>>
> >>> To fix this, this patch uses 'callee_saved_reg_p', which
> >>> accounts for
> >>> this optimisation, instead of 'call_used_or_fixed_reg_p'. Be
> >>> aware of
> >>> 'callee_saved_reg_p''s definition, as it does still take call used
> >>> registers into account, which aren't callee_saved in my opinion,
> >>> so it
> >>> is a rather misnoemer, works in our advantage here though as it
> >>> does
> >>> exactly what we need.
> >>>
> >>> Regression tested on arm-none-eabi.
> >>>
> >>> Is this OK for trunk? (Will eventually backport to previous
> >>> versions if
> >>> stable.)
> >> Ok.
> >> Thanks,
> >> Kyrill
> > As I was getting ready to push this I noticed I didn't add any
> > skip-ifs
> > to prevent this failing with specific target options. So here's a new
> > version with those.
> >
> > Still OK?
> >
>  Hi,
> 
>  This is not sufficient to skip arm-linux-gnueabi* configs built with
>  non-default cpu/fpu.
> 
>  For instance, with arm-linux-gnueabihf --with-cpu=cortex-a9
>  --with-fpu=neon-fp16 --with-float=hard
>  I see:
>  FAIL: gcc.target/arm/pr95646.c (test for excess errors)
>  Excess errors:
>  cc1: error: ARMv8-M Security Extensions incompatible with selected FPU
>  cc1: error: target CPU does not support ARM mode
> 
>  and the testcase is compiled with -mcpu=cortex-m23 -mcmse -Os
> >>> Resending as I don't think my earlier one made it to the lists
> >>> (sorry if
> >>> you are receiving this double!)
> >>>
> >>> I'm not following this, before I go off and try to reproduce it,
> >>> what do
> >>> you mean by 'the testcase is compiled with -mcpu=cortex-m23 -mcmse
> >>> -Os'?
> >>> These are the options you are seeing in the log file? Surely they
> >>> should
> >>> override the default options? Only thing I can think of is this might
> >>> need an extra -mfloat-abi=soft to make sure it overrides the default
> >>> float-abi.  Could you give that a try?
> >> No it doesn't make a difference alone.
> >>
> >> I also had to add:
> >> -mfpu=auto (that clears the above warning)
> >> -mthumb otherwise we now get cc1: error: target CPU does not support
> >> ARM mode
> >>
> >> Looks like some effective-target machinery is needed
> > So I had a look at this,  I was pretty sure that -mfloat-abi=soft
> > overwrote -mfpu=<>, which in large it does, as in no FP instructions
> > will be generated but the error you see only checks for the right
> > number of FP registers. Which doesn't check whether
> > 'TARGET_HARD_FLOAT' is set or not. I'll fix this too and use the
> > check-effective-target for armv8-m.base for this test as it is indeed
> > a better approach than my bag of skip-ifs. I'm testing it locally to
> > make sure my changes don't break anything.
> >
> > Cheers,
> > Andre
> Hi,
>
> Sorry for the delay. So I changed the test to use the effective-target
> machinery as you suggested and I also made sure that you don't get the
> "ARMv8-M Security Extensions incompatible with selected FPU" when
> -mfloat-abi=soft.
> Further changed 'asm' to '__asm__' to avoid failures with '-std=' options.
>
> Regression tested on arm-none-eabi.
> @Christophe: could you test this for your configuration, shouldn't fail
> anymore!
>

Indeed with your patch I don't see any failure with pr95646.c

Note that it is still unsupported with arm-eabi when running the tests
with -mcpu=cortex-mXX
because the compiler complains that -mcpu=cortex-mXX conflicts with
-march=armv8-m.base,
thus the effective-target test fails.

BTW, is that warning useful/practical? 

RE: [PATCH PR94442] [AArch64] Redundant ldp/stp instructions emitted at -O3

2020-07-07 Thread xiezhiheng
> -Original Message-
> From: Richard Sandiford [mailto:richard.sandif...@arm.com]
> Sent: Monday, July 6, 2020 5:31 PM
> To: xiezhiheng 
> Cc: Richard Biener ; gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH PR94442] [AArch64] Redundant ldp/stp instructions
> emitted at -O3
> 
> No, this is unfortunately a known bug.  See:
> 
>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95964
> 
> (Although the PR is recent, it's been a known bug for longer.)
> 
> As you say, the difficulty is that the correct attributes depend on what
> the built-in function does.  Most integer arithmetic is “const”, but things
> get more complicated for floating-point arithmetic.
> 
> The SVE intrinsics use a three stage process:
> 
> - each function is classified into one of several groups
> - each group has a set of flags that describe what functions in the
>   group can do
> - these flags get converted into attributes based on the current
>   command-line options
> 
> I guess we should have something similar for the arm_neon.h built-ins.
> 
> If you're willing to help fix this, that'd be great.  I think a first
> step would be to agree a design.
> 
> Thanks,
> Richard

I'd like to have a try.  I have checked the steps in SVE intrinsics.
It defines a base class "function_base" and derives different classes
to describe several intrinsics for each.  And each class may
have its own unique flags described in virtual function "call_properties".
The specific attributes will be converted from these flags in
"get_attributes" later.

I find that there are more than 100 classes in total and if I only
need to classify them into different groups by attributes, maybe
we does not need so many classes?

The difficult thing I think is how to classify neon intrinsics into
different groups.  I'm going to follow up the way in SVE intrinsics
first now.

Xie Zhiheng


[PATCH] fix detection of negative step DR groups

2020-07-07 Thread Richard Biener
This fixes a condition that caused all negative step DR groups to
be detected as single element interleaving.  Such groups are
rejected by interleaving vectorization but miscompiled by SLP
which is fixed by forcing VMAT_STRIDED_SLP for now.

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix
group overlap condition to allow negative step DR groups.
* tree-vect-stmts.c (get_group_load_store_type): For
multi element SLP groups force VMAT_STRIDED_SLP when the step
is negative.

* gcc.dg/vect/slp-47.c: New testcase.
* gcc.dg/vect/slp-48.c: Likewise.
---
 gcc/testsuite/gcc.dg/vect/slp-47.c | 56 ++
 gcc/testsuite/gcc.dg/vect/slp-48.c | 56 ++
 gcc/tree-vect-data-refs.c  |  8 +++--
 gcc/tree-vect-stmts.c  | 11 --
 4 files changed, 126 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/slp-47.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/slp-48.c

diff --git a/gcc/testsuite/gcc.dg/vect/slp-47.c 
b/gcc/testsuite/gcc.dg/vect/slp-47.c
new file mode 100644
index 000..7b2ddf664df
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-47.c
@@ -0,0 +1,56 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+int x[1024], y[1024];
+
+void __attribute__((noipa)) foo()
+{
+  for (int i = 0; i < 512; ++i)
+{
+  x[2*i] = y[1023 - (2*i)];
+  x[2*i+1] = y[1023 - (2*i+1)];
+}
+}
+
+void __attribute__((noipa)) bar()
+{
+  for (int i = 0; i < 512; ++i)
+{
+  x[2*i] = y[1023 - (2*i+1)];
+  x[2*i+1] = y[1023 - (2*i)];
+}
+}
+
+int 
+main ()
+{
+  check_vect ();
+
+  for (int i = 0; i < 1024; ++i)
+{
+  x[i] = 0;
+  y[i] = i;
+  __asm__ volatile ("");
+}
+
+  foo ();
+  for (int i = 0; i < 1024; ++i)
+if (x[i] != y[1023 - i])
+  abort ();
+
+  for (int i = 0; i < 1024; ++i)
+{
+  x[i] = 0;
+  __asm__ volatile ("");
+}
+
+  bar ();
+  for (int i = 0; i < 1024; ++i)
+if (x[i] != y[1023 - i^1])
+  abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } 
} */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-48.c 
b/gcc/testsuite/gcc.dg/vect/slp-48.c
new file mode 100644
index 000..0b327aede8e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-48.c
@@ -0,0 +1,56 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+int x[1024], y[1024];
+
+void __attribute__((noipa)) foo()
+{
+  for (int i = 0; i < 512; ++i)
+{
+  x[1023 - (2*i+1)] = y[2*i];
+  x[1023 - (2*i)] = y[2*i+1];
+}
+}
+
+void __attribute__((noipa)) bar()
+{
+  for (int i = 0; i < 512; ++i)
+{
+  x[1023 - (2*i+1)] = y[2*i+1];
+  x[1023 - (2*i)] = y[2*i];
+}
+}
+
+int 
+main ()
+{
+  check_vect ();
+
+  for (int i = 0; i < 1024; ++i)
+{
+  x[i] = 0;
+  y[i] = i;
+  __asm__ volatile ("");
+}
+
+  foo ();
+  for (int i = 0; i < 1024; ++i)
+if (x[i] != y[1023 - i^1])
+  abort ();
+
+  for (int i = 0; i < 1024; ++i)
+{
+  x[i] = 0;
+  __asm__ volatile ("");
+}
+
+  bar ();
+  for (int i = 0; i < 1024; ++i)
+if (x[i] != y[1023 - i])
+  abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } 
} */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 959c2d3378f..2b4421b5fb4 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -3074,13 +3074,15 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
  if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
break;
 
- /* If the step (if not zero or non-constant) is greater than the
+ /* If the step (if not zero or non-constant) is smaller than the
 difference between data-refs' inits this splits groups into
 suitable sizes.  */
  if (tree_fits_shwi_p (DR_STEP (dra)))
{
- HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
- if (step != 0 && step <= (init_b - init_a))
+ unsigned HOST_WIDE_INT step
+   = absu_hwi (tree_to_shwi (DR_STEP (dra)));
+ if (step != 0
+ && step <= (unsigned HOST_WIDE_INT)(init_b - init_a))
break;
}
}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index f66c5f5b367..fcae3ef5f35 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2150,8 +2150,15 @@ get_group_load_store_type (vec_info *vinfo, 
stmt_vec_info stmt_info,
}
  int cmp = compare_step_with_zero (vinfo, stmt_info);
  if (cmp < 0)
-   *memory_access_type = get_negative_load_store_type
- (vinfo, stmt_info, vectype, vls_type, 1);
+   {
+

[PATCH][GCC][aarch64] Generation of adjusted ldp/stp for vector types

2020-07-07 Thread Przemyslaw Wirkus
Hi,

Introduce simple peephole2 optimization which substitutes a sequence of
four consecutive load or store (LDR, STR) instructions with two load or
store pair (LDP, STP) instructions for 2 element supported vector modes
(V2SI, V2SF, V2DI, and V2DF).
Generated load / store pair instruction offset is adjusted accordingly.

Bootstrapped and tested on aarch64-none-linux-gnu.

Example:
$ cat stp_vec_v2sf.c
typedef float __attribute__((vector_size(8))) vec;

void
store_adjusted(vec *out, vec x, vec y)
{
  out[400] = x;
  out[401] = y;
  out[402] = y;
  out[403] = x;
}

Example compiled with:
$ ./aarch64-none-linux-gnu-gcc -S -O2 stp_vec_v2sf.c -dp

Before the patch:

store_adjusted:
str d0, [x0, 3200]// 9[c=4 l=4]  *aarch64_simd_movv2si/2
str d1, [x0, 3208]// 11   [c=4 l=4]  *aarch64_simd_movv2si/2
str d1, [x0, 3216]// 13   [c=4 l=4]  *aarch64_simd_movv2si/2
str d0, [x0, 3224]// 15   [c=4 l=4]  *aarch64_simd_movv2si/2
ret   // 26   [c=0 l=4]  *do_return

After the patch:

store_adjusted:
add x1, x0, 3200// 27   [c=4 l=4]  *adddi3_aarch64/0
stp d0, d1, [x1]// 28   [c=0 l=4]  vec_store_pairv2siv2si
stp d1, d0, [x1, 16]// 29   [c=0 l=4]  vec_store_pairv2siv2si
ret // 22   [c=0 l=4]  *do_return


OK for master ?

kind regards,
Przemyslaw

gcc/Changelog:
* config/aarch64/aarch64-ldpstp.md: Add two peepholes for adjusted 
vector
V2SI, V2SF, V2DI, V2DF load and store modes.
* config/aarch64/aarch64-protos.h (aarch64_gen_adjusted_ldpstp): Add new
parameter nunits.
(aarch64_operands_adjust_ok_for_ldpstp): Add new parameter nunits.
* config/aarch64/aarch64.c (aarch64_operands_adjust_ok_for_ldpstp): Add
new parameter nunits and support for vector types.
(aarch64_gen_adjusted_ldpstp): Add new parameter nunits and support for
vector types.
* config/aarch64/iterators.md (VP_2E): New iterator for 2 element 
vectors.
(nunits): Add SI and DI to mode attribute.

gcc/testsuite/Changelog:
* gcc.target/aarch64/ldp_vec_v2sf.c: New test.
* gcc.target/aarch64/ldp_vec_v2si.c: New test.
* gcc.target/aarch64/stp_vec_v2df.c: New test.
* gcc.target/aarch64/stp_vec_v2di.c: New test.
* gcc.target/aarch64/stp_vec_v2sf.c: New test.
* gcc.target/aarch64/stp_vec_v2si.c: New test.
diff --git a/gcc/config/aarch64/aarch64-ldpstp.md 
b/gcc/config/aarch64/aarch64-ldpstp.md
index 
dd6f39615c51105a45b7b3dcde7b86e900ae7119..94c312f8f4f6472ebbeca0c2f3e760e0e316f7b7
 100644
--- a/gcc/config/aarch64/aarch64-ldpstp.md
+++ b/gcc/config/aarch64/aarch64-ldpstp.md
@@ -186,10 +186,10 @@ (define_peephole2
(set (match_operand:GPI 6 "register_operand" "")
(match_operand:GPI 7 "memory_operand" ""))
(match_dup 8)]
-  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode)"
+  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode, 
)"
   [(const_int 0)]
 {
-  if (aarch64_gen_adjusted_ldpstp (operands, true, mode, UNKNOWN))
+  if (aarch64_gen_adjusted_ldpstp (operands, true, mode, , 
UNKNOWN))
 DONE;
   else
 FAIL;
@@ -206,10 +206,10 @@ (define_peephole2
(set (match_operand:GPF 6 "register_operand" "")
(match_operand:GPF 7 "memory_operand" ""))
(match_dup 8)]
-  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode)"
+  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode, 
)"
   [(const_int 0)]
 {
-  if (aarch64_gen_adjusted_ldpstp (operands, true, mode, UNKNOWN))
+  if (aarch64_gen_adjusted_ldpstp (operands, true, mode, , 
UNKNOWN))
 DONE;
   else
 FAIL;
@@ -226,10 +226,10 @@ (define_peephole2
(set (match_operand:DI 6 "register_operand" "")
(sign_extend:DI (match_operand:SI 7 "memory_operand" "")))
(match_dup 8)]
-  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
+  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode, 1)"
   [(const_int 0)]
 {
-  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, SIGN_EXTEND))
+  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, 1, SIGN_EXTEND))
 DONE;
   else
 FAIL;
@@ -246,10 +246,10 @@ (define_peephole2
(set (match_operand:DI 6 "register_operand" "")
(zero_extend:DI (match_operand:SI 7 "memory_operand" "")))
(match_dup 8)]
-  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
+  "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode, 1)"
   [(const_int 0)]
 {
-  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, ZERO_EXTEND))
+  if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, 1, ZERO_EXTEND))
 DONE;
   else
 FAIL;
@@ -266,10 +266,10 @@ (define_peephole2
(set (match_operand:GPI 6 "memory_operand" "")
(match_operand:GPI 7 "aarch64_reg_or_zero" ""))
(match_dup 8)]
-  "aarch64_operands_adjust_ok_for_ldpstp (operands, false, mode)"
+  "aarch64_operands_adjust_ok_for_ldps

[PATCH] c++: ICE in is_really_empty_class [PR95497]

2020-07-07 Thread Patrick Palka via Gcc-patches
We are ICEing in the testcase below because we pass the
yet-uninstantiated class type A of the PARM_DECL b to
is_really_empty_class from potential_rvalue_constant_expression when
parsing the requirement t += b.

This patch fixes the ICE by guarding the problematic call to
is_really_empty_class with a COMPLETE_TYPE_P check, which should also
subsume the existing dependent_type_p check.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK to
commit to trunk and to the 10 branch?

gcc/cp/ChangeLog:

PR c++/95497
* constexpr.c (potential_constant_expression_1): When
processing_template_decl, check COMPLETE_TYPE_P before calling
is_really_empty_class.

gcc/testsuite/ChangeLog:

PR c++/95497
* g++.dg/cpp2a/concepts-pr95497.C: New test.
---
 gcc/cp/constexpr.c|  2 +-
 gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C | 12 
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 1939166e907..ff78ebda2dc 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -7443,7 +7443,7 @@ potential_constant_expression_1 (tree t, bool want_rval, 
bool strict, bool now,
   if (now && want_rval)
{
  tree type = TREE_TYPE (t);
- if (dependent_type_p (type)
+ if ((processing_template_decl && !COMPLETE_TYPE_P (type))
  || is_really_empty_class (type, /*ignore_vptr*/false))
/* An empty class has no data to read.  */
return true;
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
new file mode 100644
index 000..4d7718ad5e8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
@@ -0,0 +1,12 @@
+// PR c++/95497
+// { dg-do compile { target c++20 } }
+
+template 
+struct A{};
+
+template 
+concept c =
+requires(T t, A b) // note that A is independent of T
+{
+{ t += b };
+};
-- 
2.27.0.203.gf402ea6816



[PATCH] c++: wrong pretty printing of nested type [PR95303]

2020-07-07 Thread Patrick Palka via Gcc-patches
In the testcase below, we pretty print the nested type A::B as
A::B because we don't check that B is itself a class template
before printing the innermost set of template arguments from B's
TEMPLATE_INFO (which in this case belongs to A).  This patch fixes this
by checking PRIMARY_TEMPLATE_P beforehand.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK to
commit to trunk and perhaps to the 10 branch?

gcc/ChangeLog:

PR c++/95303
* cxx-pretty-print.c (pp_cxx_unqualified_id): Check
PRIMARY_TEMPLATE_P before printing the innermost template
arguments.

gcc/testsuite/ChangeLog:

PR c++/95303
* g++.dg/concepts/diagnostic14.C: New test.
---
 gcc/cp/cxx-pretty-print.c| 13 +++
 gcc/testsuite/g++.dg/concepts/diagnostic14.C | 36 
 2 files changed, 43 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/concepts/diagnostic14.C

diff --git a/gcc/cp/cxx-pretty-print.c b/gcc/cp/cxx-pretty-print.c
index 188462a79e7..263f225a492 100644
--- a/gcc/cp/cxx-pretty-print.c
+++ b/gcc/cp/cxx-pretty-print.c
@@ -173,12 +173,13 @@ pp_cxx_unqualified_id (cxx_pretty_printer *pp, tree t)
 case UNBOUND_CLASS_TEMPLATE:
   pp_cxx_unqualified_id (pp, TYPE_NAME (t));
   if (tree ti = TYPE_TEMPLATE_INFO_MAYBE_ALIAS (t))
-   {
- pp_cxx_begin_template_argument_list (pp);
- tree args = INNERMOST_TEMPLATE_ARGS (TI_ARGS (ti));
- pp_cxx_template_argument_list (pp, args);
- pp_cxx_end_template_argument_list (pp);
-   }
+   if (PRIMARY_TEMPLATE_P (TI_TEMPLATE (ti)))
+ {
+   pp_cxx_begin_template_argument_list (pp);
+   tree args = INNERMOST_TEMPLATE_ARGS (TI_ARGS (ti));
+   pp_cxx_template_argument_list (pp, args);
+   pp_cxx_end_template_argument_list (pp);
+ }
   break;
 
 case BIT_NOT_EXPR:
diff --git a/gcc/testsuite/g++.dg/concepts/diagnostic14.C 
b/gcc/testsuite/g++.dg/concepts/diagnostic14.C
new file mode 100644
index 000..ec2b68c4a3c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/concepts/diagnostic14.C
@@ -0,0 +1,36 @@
+// PR c++/95303
+// { dg-do compile { target c++20 } }
+
+template
+struct A {
+struct B {};
+};
+
+template
+  requires __is_same(T, char)
+struct A {
+struct B {};
+};
+
+template<>
+  struct A {
+struct B {};
+  };
+
+template
+concept C = requires (T&& t) { // { dg-message "\\\[with T = A::B\\\]" }
+t.a;
+};
+static_assert(C::B>); // { dg-error "failed" }
+
+template
+concept D = requires (T&& t) { // { dg-message "\\\[with T = A::B\\\]" }
+t.a;
+};
+static_assert(D::B>); // { dg-error "failed" }
+
+template
+concept E = requires (T&& t) { // { dg-message "\\\[with T = A::B\\\]" }
+t.a;
+};
+static_assert(E::B>); // { dg-error "failed" }
-- 
2.27.0.203.gf402ea6816



[PATCH] libbacktrace: configure check linker support for DWARF-5

2020-07-07 Thread CHIGOT, CLEMENT via Gcc-patches
Description:
  On AIX, the compiler is able to handle DWARF-5 but not the linker.

Changelog:
2020-07-07 Clement Chigot 
 * configure.ac: Extend check to the linker for DWARF-5 support
 * configure: Regenerate

Please apply for me if approved.
Could it be backported in gcc-10 branch too, please ?

Thanks, 
ClémentFrom 35f784cf22b9f5432c805782d12177189ec68dda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Chigot?= 
Date: Wed, 10 Jun 2020 14:37:03 -0500
Subject: [PATCH] libbacktrace: configure check linker support for DWARF-5

On AIX, the compiler is able to handle DWARF-5 but not the linker.

Changelog:
2020-07-07 Clement Chigot 
 * configure.ac: Extend check to the linker for DWARF-5 support
 * configure: Regenerate
---
 libbacktrace/configure| 7 ---
 libbacktrace/configure.ac | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/libbacktrace/configure b/libbacktrace/configure
index b453bae5ae6..8149b7c6508 100755
--- a/libbacktrace/configure
+++ b/libbacktrace/configure
@@ -13577,14 +13577,15 @@ else
 CFLAGS="$CFLAGS -gdwarf-5"
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
-int i;
+int main(){return 0;}
 _ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+if ac_fn_c_try_link "$LINENO"; then :
   libbacktrace_cv_lib_dwarf5=yes
 else
   libbacktrace_cv_lib_dwarf5=no
 fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+conftest$ac_exeext conftest.$ac_ext
 CFLAGS=$CFLAGS_hold
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $libbacktrace_cv_lib_dwarf5" >&5
diff --git a/libbacktrace/configure.ac b/libbacktrace/configure.ac
index bd16f20fd0e..21feaf36294 100644
--- a/libbacktrace/configure.ac
+++ b/libbacktrace/configure.ac
@@ -465,12 +465,12 @@ AC_SUBST(PTHREAD_CFLAGS)
 
 AM_CONDITIONAL(HAVE_PTHREAD, test "$libgo_cv_lib_pthread" = yes)
 
-dnl Test whether the compiler supports the -gdwarf-5 option.
+dnl Test whether the compiler and the linker support the -gdwarf-5 option.
 AC_CACHE_CHECK([whether -gdwarf-5 is supported],
 [libbacktrace_cv_lib_dwarf5],
 [CFLAGS_hold=$CFLAGS
 CFLAGS="$CFLAGS -gdwarf-5"
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([int i;])],
+AC_LINK_IFELSE([AC_LANG_SOURCE([int main(){return 0;}])],
 [libbacktrace_cv_lib_dwarf5=yes],
 [libbacktrace_cv_lib_dwarf5=no])
 CFLAGS=$CFLAGS_hold])
-- 
2.25.0



Re: [PATCH PR94442] [AArch64] Redundant ldp/stp instructions emitted at -O3

2020-07-07 Thread Richard Sandiford
xiezhiheng  writes:
>> -Original Message-
>> From: Richard Sandiford [mailto:richard.sandif...@arm.com]
>> Sent: Monday, July 6, 2020 5:31 PM
>> To: xiezhiheng 
>> Cc: Richard Biener ; gcc-patches@gcc.gnu.org
>> Subject: Re: [PATCH PR94442] [AArch64] Redundant ldp/stp instructions
>> emitted at -O3
>> 
>> No, this is unfortunately a known bug.  See:
>> 
>>   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95964
>> 
>> (Although the PR is recent, it's been a known bug for longer.)
>> 
>> As you say, the difficulty is that the correct attributes depend on what
>> the built-in function does.  Most integer arithmetic is “const”, but things
>> get more complicated for floating-point arithmetic.
>> 
>> The SVE intrinsics use a three stage process:
>> 
>> - each function is classified into one of several groups
>> - each group has a set of flags that describe what functions in the
>>   group can do
>> - these flags get converted into attributes based on the current
>>   command-line options
>> 
>> I guess we should have something similar for the arm_neon.h built-ins.
>> 
>> If you're willing to help fix this, that'd be great.  I think a first
>> step would be to agree a design.
>> 
>> Thanks,
>> Richard
>
> I'd like to have a try.

Great!

> I have checked the steps in SVE intrinsics.
> It defines a base class "function_base" and derives different classes
> to describe several intrinsics for each.  And each class may
> have its own unique flags described in virtual function "call_properties".
> The specific attributes will be converted from these flags in
> "get_attributes" later.
>
> I find that there are more than 100 classes in total and if I only
> need to classify them into different groups by attributes, maybe
> we does not need so many classes?

Yeah, I agree.

Long term, there might be value in defining arm_neon.h in a similar
way to arm_sve.h: i.e. have arm_neon.h defer most of the work to
a special compiler pragma.  But that's going to be a lot of work.

I think it's possible to make incremental improvements to the current
arm_neon.h implementation without that work being thrown away if we ever
did switch to a pragma in future.  And the incremental approach seems
more practical.

> The difficult thing I think is how to classify neon intrinsics into
> different groups.  I'm going to follow up the way in SVE intrinsics
> first now.

For now I'd suggest just giving a name to each combination of flags
that the intrinsics need, rather than splitting instructions in a
more fine-grained way.  (It's not at all obvious from the final state
of the SVE code, but even there, the idea was to have as few groups as
possible.  I.e. the groups were supposedly only split where necessary.
As you say, there still ended up being a lot of groups in the end…)

It'd be easier to review if the work was split up into smaller steps.
E.g. maybe one way would be this, with each number being a single
patch:

(1) (a) Add a flags field to the built-in function definitions
that for now is always zero.
(b) Pick a name N to describe the most conservative set of flags.
(c) Make every built-in function definition use N.

(2) (a) Pick one type of function that cannot yet be described properly.
(b) Pick a name N for that type of function.
(c) Add whichever new flags are needed.
(d) Add the appropriate attributes when the flags are set,
possibly based on command-line options.
(e) Make (exactly) one built-in function definition use N.

(3) (a) Pick some functions that all need the same attributes and
that can already be described properly
(b) Update all of their built-in function definitions accordingly,
as a single change.

So after (1), filling out the table is an iterative process of (2) and
(3), in any order that's convenient (although it might help to order the
(2) patches so that each one adds as few flags as possible).  Each patch
would then be fairly small and self-contained.

That's just a suggestion though.  Please let me know if you have
any other suggestions.

I guess there are two obvious ways of adding the flags field:

- add a new parameter to every built-in function macro, e.g.
  BUILTIN_VSDQ_I and VAR1.

- wrap the definitions in a new macro, e.g.
  MY_NEW_GROUP (BUILTIN_VSDQ_I (BINOP, sqshl, 0))

I don't really have a preference, and I guess all other things being
equal, the first one wins by being more obvious than the second.
Just thought I'd mention the second way in case anyone preferred it.

Thanks,
Richard



Re: [PATCH] aarch64: Change costs for TX2 to expose more vectorization opportunities

2020-07-07 Thread Anton Youdkevitch

As I don't have the commit privilege, if this is a sufficient approval
can someone commit it for me?

--
  Thanks,
  Anton


On 06.7.2020 21:04 , Richard Sandiford wrote:

Joel Jones  writes:

I approve of this patch. I'm responsible for GCC for TX2 at Marvell. Andrew 
Pinski should certainly chime in if he wants.

Ah, in that case, the patch is OK.

Thanks,
Richard




Re: [PATCH][RFC] __builtin_shuffle sometimes should produce zip1 rather than TBL (PR82199)

2020-07-07 Thread Dmitrij Pochepko
Hi,

thank you for looking into this.

I prepared new patch with all your comments addressed.

Thanks,
Dmitrij

On Tue, Jun 23, 2020 at 05:53:00PM +0100, Richard Sandiford wrote:
...
> 
> I think it would be simpler to do it in this order:
> 
>   - check for Advanced SIMD, bail out if not
>   - get the new mode, bail out if none
>   - calculate the permutation vector, bail out if not suitable
>   - set up the rest of “newd”
> 
> There would then only be one walk over d->perm rather than two,
> and we'd only create the gen_lowparts when there's something to test.
> 
> The new mode can be calculated with something like:
> 
>   poly_uint64 vec_bits = GET_MODE_BITSIZE (d->vmode);
>   unsigned int new_elt_bits = GET_MODE_UNIT_BITSIZE (d->vmode) * 2;
>   auto new_elt_mode = int_mode_for_size (new_elt_bits, false).require ();
>   machine_mode new_mode = aarch64_simd_container_mode (new_elt_mode, 
> vec_bits);
> 
> “new_mode” will be “word_mode” on failure.
>
... 
> The regexp would be easier to read if quoted using {…}, which requires
> fewer backslashes.  Same for the other tests.
> 
> Thanks,
> Richard
>From 71a3f4b05edc462bcceba35ff738c6f1b5ca3f0a Mon Sep 17 00:00:00 2001
From: Dmitrij Pochepko 
Date: Tue, 7 Jul 2020 18:45:06 +0300
Subject: [PATCH] __builtin_shuffle sometimes should produce zip1 rather than
 TBL (PR82199)

The following patch enables vector permutations optimization by using another vector element size when applicable.
It allows usage of simpler instructions in applicable cases.

example:

vector float f(vector float a, vector float b)
{
  return __builtin_shuffle  (a, b, (vector int){0, 1, 4,5});
}

was compiled into:
...
	adrp	x0, .LC0
	ldr	q2, [x0, #:lo12:.LC0]
	tbl	v0.16b, {v0.16b - v1.16b}, v2.16b
...

and after patch:
...
	zip1	v0.2d, v0.2d, v1.2d
...

bootstrapped and tested on aarch64-linux-gnu with no regressions

gcc/ChangeLog:

2020-06-11	Andrew Pinski	

	PR gcc/82199

	* gcc/config/aarch64/aarch64.c (aarch64_evpc_reencode): New function

gcc/testsuite/ChangeLog:

2020-06-11  Andrew Pinski   

	PR gcc/82199

	* gcc.target/aarch64/vdup_n_3.c: New test
	* gcc.target/aarch64/vzip_1.c: New test
	* gcc.target/aarch64/vzip_2.c: New test
	* gcc.target/aarch64/vzip_3.c: New test
	* gcc.target/aarch64/vzip_4.c: New test

Co-Authored-By:	Dmitrij Pochepko	
---
 gcc/config/aarch64/aarch64.c| 60 +
 gcc/testsuite/gcc.target/aarch64/vdup_n_3.c | 16 
 gcc/testsuite/gcc.target/aarch64/vzip_1.c   | 11 ++
 gcc/testsuite/gcc.target/aarch64/vzip_2.c   | 12 ++
 gcc/testsuite/gcc.target/aarch64/vzip_3.c   | 12 ++
 gcc/testsuite/gcc.target/aarch64/vzip_4.c   | 12 ++
 6 files changed, 123 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vdup_n_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vzip_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vzip_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vzip_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vzip_4.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f3551a7..4b02bc7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -19905,6 +19905,8 @@ struct expand_vec_perm_d
   bool testing_p;
 };
 
+static bool aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
+
 /* Generate a variable permutation.  */
 
 static void
@@ -20090,6 +20092,62 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Try to re-encode the PERM constant so it use the bigger size up.
+   This rewrites constants such as {0, 1, 4, 5}/V4SF to {0, 2}/V2DI.
+   We retry with this new constant with the full suite of patterns.  */
+static bool
+aarch64_evpc_reencode (struct expand_vec_perm_d *d)
+{
+  expand_vec_perm_d newd;
+  unsigned HOST_WIDE_INT nelt;
+
+  if (d->vec_flags != VEC_ADVSIMD)
+return false;
+
+  /* Get the new mode.  Always twice the size of the inner
+ and half the elements.  */
+  poly_uint64 vec_bits = GET_MODE_BITSIZE (d->vmode);
+  unsigned int new_elt_bits = GET_MODE_UNIT_BITSIZE (d->vmode) * 2;
+  auto new_elt_mode = int_mode_for_size (new_elt_bits, false).require ();
+  machine_mode new_mode = aarch64_simd_container_mode (new_elt_mode, vec_bits);
+
+  if (new_mode == word_mode)
+return false;
+
+  /* to_constant is safe since this routine is specific to Advanced SIMD
+ vectors.  */
+  nelt = d->perm.length ().to_constant ();
+
+  vec_perm_builder newpermconst;
+  newpermconst.new_vector (nelt / 2, nelt / 2, 1);
+
+  /* Convert the perm constant if we can.  Require even, odd as the pairs.  */
+  for (unsigned int i = 0; i < nelt; i += 2)
+{
+  poly_int64 elt_poly0 = d->perm[i];
+  poly_int64 elt_poly1 = d->perm[i+1];
+  if (!elt_poly0.is_constant () || !elt_poly1.is_constant ())
+	return false;
+  unsigned int elt0 = elt_poly0.to_constant ();
+  unsigned int elt1 = elt_poly1.to_constant ();
+  if ((elt0 & 1) != 0 || e

Re: [PATCH] x86: Enable FMA in rsqrt2 expander

2020-07-07 Thread Kirill Yukhin via Gcc-patches
Hello HJ,

On 28 июн 07:19, H.J. Lu via Gcc-patches wrote:
> Enable FMA in rsqrt2 expander and fold rsqrtv16sf2 expander into
> rsqrt2 expander which expands to UNSPEC_RSQRT28 for TARGET_AVX512ER.
> Although it doesn't show performance change in our workloads, FMA can
> improve other workloads.
> 
> gcc/
> 
>   PR target/88713
>   * config/i386/i386-expand.c (ix86_emit_swsqrtsf): Enable FMA.
>   * config/i386/sse.md (VF_AVX512VL_VF1_128_256): New.
>   (rsqrt2): Replace VF1_128_256 with VF_AVX512VL_VF1_128_256.
>   (rsqrtv16sf2): Removed.
> 
> gcc/testsuite/
> 
>   PR target/88713
>   * gcc.target/i386/pr88713-1.c: New test.
>   * gcc.target/i386/pr88713-2.c: Likewise.

So, you've introduced new rsqrt expanders for DF vectors and relaxed
condition for V16SF. What I didn't get is why did you change unspec
type from RSQRT to RSQRT28 for V16SF expander?

--
K


Re: [PATCH] x86: Enable FMA in rsqrt2 expander

2020-07-07 Thread H.J. Lu via Gcc-patches
On Tue, Jul 7, 2020 at 8:56 AM Kirill Yukhin  wrote:
>
> Hello HJ,
>
> On 28 июн 07:19, H.J. Lu via Gcc-patches wrote:
> > Enable FMA in rsqrt2 expander and fold rsqrtv16sf2 expander into
> > rsqrt2 expander which expands to UNSPEC_RSQRT28 for TARGET_AVX512ER.
> > Although it doesn't show performance change in our workloads, FMA can
> > improve other workloads.
> >
> > gcc/
> >
> >   PR target/88713
> >   * config/i386/i386-expand.c (ix86_emit_swsqrtsf): Enable FMA.
> >   * config/i386/sse.md (VF_AVX512VL_VF1_128_256): New.
> >   (rsqrt2): Replace VF1_128_256 with VF_AVX512VL_VF1_128_256.
> >   (rsqrtv16sf2): Removed.
> >
> > gcc/testsuite/
> >
> >   PR target/88713
> >   * gcc.target/i386/pr88713-1.c: New test.
> >   * gcc.target/i386/pr88713-2.c: Likewise.
>
> So, you've introduced new rsqrt expanders for DF vectors and relaxed
> condition for V16SF. What I didn't get is why did you change unspec
> type from RSQRT to RSQRT28 for V16SF expander?
>

UNSPEC in define_expand is meaningless when the pattern is fully
expanded by ix86_emit_swsqrtsf.  I believe that UNSPEC in rsqrt2
expander can be removed.

-- 
H.J.


Re: [PATCH] rs6000: Refine RTL unroll adjust hook

2020-07-07 Thread will schmidt via Gcc-patches
On Mon, 2020-07-06 at 15:13 +0800, guojiufu via Gcc-patches wrote:

Hi,

Assorted comments below.   thanks :-)

> For very small loops (< 6 insns), it would be fine to unroll 4
> times to use cache line better.  Like below loops:
>  `while (i) a[--i] = NULL;   while (p < e)  *d++ = *p++;`
> 
> And for very complex loops which may cause negative impacts:
> branch-miss or cache-miss. Like below loop: there are calls,
> early exits and branches in loop.
> ```
>   for (int i = 0; i < n; i++) {
>   int e = a[I];
>  
>   if (function_call(e))  break;
>  
>   }
> ```
> 
> This patch enhances RTL unroll for small loops and prevent to
> unroll complex loops.

ok.

> 
> gcc/ChangeLog
> 2020-07-03  Jiufu Guo  
> 
> * config/rs6000/rs6000.c (rs6000_loop_unroll_adjust): Refine hook.
> (rs6000_complex_loop_p): New function.
> (num_loop_calls): New function.

Tabs versus spaces.

(num_loop_calls): New function.


> ---
>  gcc/config/rs6000/rs6000.c | 46 +---
> --
>  1 file changed, 40 insertions(+), 6 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 58f5d780603..a4874fa0efc 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -5130,22 +5130,56 @@ rs6000_destroy_cost_data (void *data)
>free (data);
>  }
> 
> +/* Count the number of call insns in LOOP.  */
> +static unsigned int
> +num_loop_calls (struct loop *loop)
> +{
> +  basic_block *bbs;
> +  rtx_insn *insn;
> +  unsigned int i;
> +  unsigned int call_ins_num = 0;
> +
> +  bbs = get_loop_body (loop);
> +  for (i = 0; i < loop->num_nodes; i++)
> +FOR_BB_INSNS (bbs[i], insn)
> +  if (CALL_P (insn))
> + call_ins_num++;
> +
> +  free (bbs);
> +
> +  return call_ins_num;
> +}

ok.


> +
> +/* Return true if LOOP is too complex to be unrolled.  */
> +static bool
> +rs6000_complex_loop_p (struct loop *loop)
> +{
> +  unsigned call_num;
> +
> +  return loop->ninsns > 10
> +&& (call_num = num_loop_calls (loop)) > 0
> +&& (call_num + num_loop_branches (loop)) * 5 > loop->ninsns
> +&& !single_exit (loop);
> +}
> +


The assignment to call_num within the logic there concerns me.  I'd
break that out.

The 5 value is not explicitly mentioned elsewhere.  Contextually this
appears to be evaluating the ratio of branches versus instructions
within the loop.  Could use some clarity.  



>  /* Implement targetm.loop_unroll_adjust.  */
> 
>  static unsigned
>  rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
>  {
> -   if (unroll_only_small_loops)
> +  if (unroll_only_small_loops)

indentation fix looks ok.

>  {
> -  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
> -  example we may want to unroll very small loops more times (4 perhaps).
> -  We also should use a PARAM for this.  */

Still hardcoded values, and may still wish to eventually have this as a
tunable param.   Probably OK to drop the 2nd sentence, but first and
last sentences should probably stay.


> +  if (loop->ninsns <= 6)
> + return MIN (4, nunroll);
>if (loop->ninsns <= 10)
>   return MIN (2, nunroll);
> -  else
> - return 0;
> +
> +  return 0;
>  }


ok

> 
> +  if (rs6000_complex_loop_p (loop))
> +return 0;
> +
>return nunroll;
>  }
> 

ok






Re: [PATCH] fix detection of negative step DR groups

2020-07-07 Thread Richard Sandiford
Richard Biener  writes:
> This fixes a condition that caused all negative step DR groups to
> be detected as single element interleaving.  Such groups are
> rejected by interleaving vectorization but miscompiled by SLP
> which is fixed by forcing VMAT_STRIDED_SLP for now.
>
> Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Where does the link between VMAT_CONTIGUOUS_REVERSE and single-element
interleaving happen?  (Sorry, should probably spend more time figuring
that out for myself.)

In principle VMAT_CONTIGUOUS_REVERSE seems like the right classification
for the testcases.

Thanks,
Richard



>
>   * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix
>   group overlap condition to allow negative step DR groups.
>   * tree-vect-stmts.c (get_group_load_store_type): For
>   multi element SLP groups force VMAT_STRIDED_SLP when the step
>   is negative.
>
>   * gcc.dg/vect/slp-47.c: New testcase.
>   * gcc.dg/vect/slp-48.c: Likewise.
> ---
>  gcc/testsuite/gcc.dg/vect/slp-47.c | 56 ++
>  gcc/testsuite/gcc.dg/vect/slp-48.c | 56 ++
>  gcc/tree-vect-data-refs.c  |  8 +++--
>  gcc/tree-vect-stmts.c  | 11 --
>  4 files changed, 126 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/slp-47.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/slp-48.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-47.c 
> b/gcc/testsuite/gcc.dg/vect/slp-47.c
> new file mode 100644
> index 000..7b2ddf664df
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/slp-47.c
> @@ -0,0 +1,56 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include "tree-vect.h"
> +
> +int x[1024], y[1024];
> +
> +void __attribute__((noipa)) foo()
> +{
> +  for (int i = 0; i < 512; ++i)
> +{
> +  x[2*i] = y[1023 - (2*i)];
> +  x[2*i+1] = y[1023 - (2*i+1)];
> +}
> +}
> +
> +void __attribute__((noipa)) bar()
> +{
> +  for (int i = 0; i < 512; ++i)
> +{
> +  x[2*i] = y[1023 - (2*i+1)];
> +  x[2*i+1] = y[1023 - (2*i)];
> +}
> +}
> +
> +int 
> +main ()
> +{
> +  check_vect ();
> +
> +  for (int i = 0; i < 1024; ++i)
> +{
> +  x[i] = 0;
> +  y[i] = i;
> +  __asm__ volatile ("");
> +}
> +
> +  foo ();
> +  for (int i = 0; i < 1024; ++i)
> +if (x[i] != y[1023 - i])
> +  abort ();
> +
> +  for (int i = 0; i < 1024; ++i)
> +{
> +  x[i] = 0;
> +  __asm__ volatile ("");
> +}
> +
> +  bar ();
> +  for (int i = 0; i < 1024; ++i)
> +if (x[i] != y[1023 - i^1])
> +  abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" 
> } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-48.c 
> b/gcc/testsuite/gcc.dg/vect/slp-48.c
> new file mode 100644
> index 000..0b327aede8e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/slp-48.c
> @@ -0,0 +1,56 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include "tree-vect.h"
> +
> +int x[1024], y[1024];
> +
> +void __attribute__((noipa)) foo()
> +{
> +  for (int i = 0; i < 512; ++i)
> +{
> +  x[1023 - (2*i+1)] = y[2*i];
> +  x[1023 - (2*i)] = y[2*i+1];
> +}
> +}
> +
> +void __attribute__((noipa)) bar()
> +{
> +  for (int i = 0; i < 512; ++i)
> +{
> +  x[1023 - (2*i+1)] = y[2*i+1];
> +  x[1023 - (2*i)] = y[2*i];
> +}
> +}
> +
> +int 
> +main ()
> +{
> +  check_vect ();
> +
> +  for (int i = 0; i < 1024; ++i)
> +{
> +  x[i] = 0;
> +  y[i] = i;
> +  __asm__ volatile ("");
> +}
> +
> +  foo ();
> +  for (int i = 0; i < 1024; ++i)
> +if (x[i] != y[1023 - i^1])
> +  abort ();
> +
> +  for (int i = 0; i < 1024; ++i)
> +{
> +  x[i] = 0;
> +  __asm__ volatile ("");
> +}
> +
> +  bar ();
> +  for (int i = 0; i < 1024; ++i)
> +if (x[i] != y[1023 - i])
> +  abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" 
> } } */
> diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
> index 959c2d3378f..2b4421b5fb4 100644
> --- a/gcc/tree-vect-data-refs.c
> +++ b/gcc/tree-vect-data-refs.c
> @@ -3074,13 +3074,15 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
> if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
>   break;
>  
> -   /* If the step (if not zero or non-constant) is greater than the
> +   /* If the step (if not zero or non-constant) is smaller than the
>difference between data-refs' inits this splits groups into
>suitable sizes.  */
> if (tree_fits_shwi_p (DR_STEP (dra)))
>   {
> -   HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
> -   if (step != 0 && step <= (init_b - init_a))
> +   unsigned HOST_WIDE_INT step
> + = absu_hwi (tree_to_shwi (DR_STEP (dra)));
> +   if (step != 0
> +

[PATCH] rs6000: Add execution tests for mma builtins.

2020-07-07 Thread Aaron Sawdey via Gcc-patches
Updated slightly, removed -Wno-psabi as requested and also fixed the
fact that it wasn't actually checking __builtin_cpu_is or
__builtin_cpu_supports. OK for trunk and backport to 10?

Thanks,
Aaron

2020-06-30  Rajalakshmi Srinivasaraghavan  
Aaron Sawdey  

gcc/testsuite/
* gcc.target/powerpc/mma-single-test.c: New file.
* gcc.target/powerpc/mma-double-test.c: New file.
---
 .../gcc.target/powerpc/mma-double-test.c  | 204 +
 .../gcc.target/powerpc/mma-single-test.c  | 213 ++
 2 files changed, 417 insertions(+)
 create mode 100755 gcc/testsuite/gcc.target/powerpc/mma-double-test.c
 create mode 100755 gcc/testsuite/gcc.target/powerpc/mma-single-test.c

diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c 
b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
new file mode 100755
index 000..9fdf6d9d2a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -0,0 +1,204 @@
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include 
+#include 
+#include 
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+  rowC[0] += result[3] ; \
+  rowC = (v4sf_t *) &CO[1*ldc+J]; \
+  rowC[0] += result[2] ; \
+  rowC = (v4sf_t *) &CO[2*ldc+J]; \
+  rowC[0] += result[1] ; \
+  rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[0] ;
+
+void
+MMA (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+{
+  double *CO;
+  double *AO;
+  AO = A;
+  CO = C;
+  C += m * 4;
+  for (int j = 0; j < m; j += 16)
+   {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+   {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+   }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+   }
+  B += k * 4;
+}
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+{
+  for (int i = 0; i < row; i++)
+   {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+   }
+}
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+for (int i = 0; i < row; i++)
+  matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+{
+  for (int j = 0; j < column; j++)
+   {
+ printf ("%f ", matrix[j * row + i]);
+   }
+  printf ("\n");
+}
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+{
+  for (int t1 = 4; t1 <= 16; t1 += 4)
+   {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double A[rowsA * common];
+ double B[common * colsB];
+ double C[rowsA * colsB];
+ double D[rowsA * colsB];
+
+
+ init (A, rowsA, common);
+ init (B, common, colsB);
+ init0 (C, D, rowsA, colsB);
+ MMA (rowsA, colsB, common, A, B, 

[PATCH] rs6000: fix power10_hw test

2020-07-07 Thread Aaron Sawdey via Gcc-patches
The code snippet for this test was returning 1 if power10
instructions executed correctly. It should return 0 if the
test passes.

OK for trunk and backport to 10?

Thanks,
   Aaron

* lib/target-supports.exp (check_power10_hw_available):
Return 0 for passing test.
---
 gcc/testsuite/lib/target-supports.exp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 4bdcaef1132..c1239535a4b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2226,7 +2226,7 @@ proc check_power10_hw_available { } {
/* Set e first and use +r to check if pli actually works.  */
long e = -1;
asm ("pli %0,%1" : "+r" (e) : "n" (0x12345));
-   return (e == 0x12345);
+   return (e != 0x12345);
}
} "-mcpu=power10"
 }]
-- 
2.17.1



preprocessor: Better line info for &

2020-07-07 Thread Nathan Sidwell
With C++ module header units it becomes important to distinguish between 
macros defined in forced headers (& commandline & builtins) from those 
defined in the header file being processed.  We weren't making that easy 
because we treated the builtins and command-line locations somewhat 
file-like, with incrementing line numbers, and showing them as included 
from line 1 of the main file.  This patch does 3 things:


0) extend the idiom that 'line 0' of a file means 'the file as a whole'

1) builtins and command-line macros are shown as-if included from line zero.

2) when emitting preprocessed output we keep resetting the line number 
so that re-reading that preprocessed output will get the same set of 
locations for the command line etc.


For instance the new c-c++-common/cpp/line-2.c test, now emits

In file included from :
./line-2.h:4:2: error: #error wrong
4 | #error wrong
  |  ^
line-2.c:3:11: error: macro "bill" passed 1 arguments, but takes just 0
3 | int bill(1);
  |   ^
   In file included from :
./line-2.h:3: note: macro "bill" defined here
3 | #define bill() 2
  |

Before it told you about including from :31.

the preprocessed output looks like:
# 0 "line-2.c"
# 0 ""
#define __STDC__ 1
# 0 ""
#define __cplusplus 201703L
# 0 ""
...

(There's a new optimization in do_line_marker to stop each of these line 
markers causing a new line map.  We can simply rewind the location, and 
keep using the same line map.)


libcpp/
* directives.c (do_linemarker): Optimize rewinding to line 
zero.
* files.c (_cpp_stack_file): Start on line zero when about 
to inject

headers.
(cpp_push_include, cpp_push_default_include): Use 
highest_line as

the location.
* include/cpplib.h (cpp_read_main_file): Add injecting parm.
* init.c (cpp_read_main_file): Likewise, inform 
_cpp_stack_file.

* internal.h (enum include_type): Add IT_MAIN_INJECT.
gcc/c-family/
* c-opts.c (c_common_post_options): Add 'injecting' arg to
cpp_read_main_file.
(c_finish_options): Add linemap_line_start calls for 
builtin and cmd

maps.  Force token position to line_table's highest line.
* c-ppoutput.c (print_line_1): Refactor, print line zero.
(cb_define): Always increment source line.
gcc/testsuite/
* c-c++-common/cpp/line-2.c: New.
* c-c++-common/cpp/line-2.h: New.
* c-c++-common/cpp/line-3.c: New.
* c-c++-common/cpp/line-4.c: New.
* c-c++-common/cpp/line-4.h: New.

--
Nathan Sidwell
diff --git c/ChangeLog w/ChangeLog
index 8c254769017..481f54a856e 100644
--- c/ChangeLog
+++ w/ChangeLog
@@ -1,3 +1,28 @@
+2020-07-07  Nathan Sidwell  
+
+	gcc/c-family/
+	* c-opts.c (c_common_post_options): Add 'injecting' arg to
+	cpp_read_main_file.
+	(c_finish_options): Add linemap_line_start calls for builtin and cmd
+	maps.  Force token position to line_table's highest line.
+	* c-ppoutput.c (print_line_1): Refactor, print line zero.
+	(cb_define): Always increment source line.
+	gcc/testsuite/
+	* c-c++-common/cpp/line-2.c: New.
+	* c-c++-common/cpp/line-2.h: New.
+	* c-c++-common/cpp/line-3.c: New.
+	* c-c++-common/cpp/line-4.c: New.
+	* c-c++-common/cpp/line-4.h: New.
+	libcpp/
+	* directives.c (do_linemarker): Optimize rewinding to line zero.
+	* files.c (_cpp_stack_file): Start on line zero when about to inject
+	headers.
+	(cpp_push_include, cpp_push_default_include): Use highest_line as
+	the location.
+	* include/cpplib.h (cpp_read_main_file): Add injecting parm.
+	* init.c (cpp_read_main_file): Likewise, inform _cpp_stack_file.
+	* internal.h (enum include_type): Add IT_MAIN_INJECT.
+
 2020-06-12  Martin Liska  
 
 	* .gitignore: Add .clang-tidy.
diff --git c/gcc/c-family/c-opts.c w/gcc/c-family/c-opts.c
index 9b6300f330f..ec3de868dd4 100644
--- c/gcc/c-family/c-opts.c
+++ w/gcc/c-family/c-opts.c
@@ -1110,7 +1110,11 @@ c_common_post_options (const char **pfilename)
   input_location = UNKNOWN_LOCATION;
 
   *pfilename = this_input_filename
-= cpp_read_main_file (parse_in, in_fnames[0]);
+= cpp_read_main_file (parse_in, in_fnames[0],
+			  /* We'll inject preamble pieces if this is
+			 not preprocessed.  */
+			  !cpp_opts->preprocessed);
+
   /* Don't do any compilation or preprocessing if there is no input file.  */
   if (this_input_filename == NULL)
 {
@@ -1429,6 +1433,7 @@ c_finish_options (void)
 	= linemap_check_ordinary (linemap_add (line_table, LC_RENAME, 0,
 	   _(""), 0));
   cb_file_change (parse_in, bltin_map);
+  linemap_line_start (line_table, 0, 1);
 
   /* Make sure all of the builtins about to be declared have
 	 BUILTINS_LOCATION has their location_t.  */
@@ -1452,9 +1457,10 @@ c_finish_options (void)
 	= linemap_check_ordinary (linemap_add (line_table, LC_REN

testsuite:missed testcase

2020-07-07 Thread Nathan Sidwell via Gcc-patches
I discovered I'd missed applying a testcase when fixing up the EOF token 
location a while back.


gcc/testsuite/
* c-c++-common/cpp/pragma-eof.c: New

--
Nathan Sidwell : Facebook
diff --git c/gcc/testsuite/c-c++-common/cpp/pragma-eof.c w/gcc/testsuite/c-c++-common/cpp/pragma-eof.c
new file mode 100644
index 000..c72be8042b5
--- /dev/null
+++ w/gcc/testsuite/c-c++-common/cpp/pragma-eof.c
@@ -0,0 +1,6 @@
+/* { dg-additional-options -fopenmp }  */
+
+/* { dg-error "expected" "" { target *-*-* } 6 } */
+/* Make sure we see pragma_eol even though lacking new line.  *
+/* no newline at end of file.  */
+#pragma omp parallel
\ No newline at end of file


testsuite: missed testcase

2020-07-07 Thread Nathan Sidwell
I discovered I'd missed applying a testcase when fixing up the EOF token 
location a while back.


gcc/testsuite/
* c-c++-common/cpp/pragma-eof.c: New

--
Nathan Sidwell
diff --git c/gcc/testsuite/c-c++-common/cpp/pragma-eof.c w/gcc/testsuite/c-c++-common/cpp/pragma-eof.c
new file mode 100644
index 000..c72be8042b5
--- /dev/null
+++ w/gcc/testsuite/c-c++-common/cpp/pragma-eof.c
@@ -0,0 +1,6 @@
+/* { dg-additional-options -fopenmp }  */
+
+/* { dg-error "expected" "" { target *-*-* } 6 } */
+/* Make sure we see pragma_eol even though lacking new line.  *
+/* no newline at end of file.  */
+#pragma omp parallel
\ No newline at end of file


Re: [PATCH] libgomp: Add OMPD process functions and datatypes.

2020-07-07 Thread y2s1982 . via Gcc-patches
Hello,

On Tue, Jul 7, 2020 at 7:46 AM Jakub Jelinek  wrote:

> On Tue, Jul 07, 2020 at 07:38:13AM -0400, y2s1982 . wrote:
> > > I have no idea what this function is doing, but e.g. from aliasing
> point of
> > > view trying to access something as short/int/long long is dangerous,
> and
> > > there might be alignment implications too.
> > >
> >
> > This function is used in ompd_device_initialize(). The initializing
> function
> > receives a void *id and ompd_size_t sizeof_id. My first attempt tried to
> > just
> > preserve both information as is, but I wasn't sure how the void * would
> > ultimately be read. In the second attempt, I tried to cast the value,
> based
> > on
> > the sizeof_id, and store it in a large enough type. This does assume the
> > void * is pointing at a numerical value.
> > What would be the best way to handle the void *id?
>
> Well, you should know what it is and only depending on that handle it.
> If it e.g. would be string, you'd want to use memcpy to copy it somewhere,
> or parse immediately, etc.
>

I have re-read the documentation trying to find a different solution.
In particular, ompd_device_initialize states that
ompd_device_t kind, ompd_size_t sizeof_id, and void *id represents
a device identifier. To dig further, I read up on the ompd_device_t. A
passage from ompd_device_t says that the OMPD library and a tool that uses
it must agree on the format of the object that is passed.
It also says that ompd_device_t is a pointer to where the device identifier
is stored and the size of the device identifier. I am not sure how this
works to ompd_device_initialize as those two information seems to be
supplied separately: *id and sizeof_id. In fact, ompd-type.h provides 4
examples, 2 of which are host and cuda, and they all simply contain unique
numerical values.  So does this mean that I should just decide on what the
library and tool will use for device id data type and simply stick to it?

Otherwise, Is it possible to know the proper data type to cast the void *id
based on the device type (host/cuda)?

Tony


Re: [PATCH] separate reading past the end from -Wstringop-overflow

2020-07-07 Thread Martin Sebor via Gcc-patches

Ping.  Despite its size, there isn't much new in the patch, it
pretty much just splits an existing warning into two, one for
buffer overflow and another for "overread."

https://gcc.gnu.org/pipermail/gcc-patches/2020-June/548786.html

On 6/23/20 8:05 PM, Martin Sebor wrote:

-Wstringop-overflow is issued for both writing and reading past
the end, even though the latter problem is often viewed as being
lower severity than the former, or at least sufficiently different
to triage separately.  In CWE, for example, each of the two kinds
of problems has its own classification (CWE-787: Out-of-bounds
Write, and CWE-125: Out-of-bounds Read).

To make this easier with GCC, the attached patch introduces
a new option called -Wstringop-overread and splits the current
indiscriminate warning into the respective two categories.  Other
than the new option the improvements also exposed a few instances
of reading past the end that GCC doesn't detect that the new code
does thanks to more consistent checking.

As usual, I tested the patch by building Binutils/GDB, Glibc, and
the Linux kernel with no unexpected (or any, in fact) instances
of the two warnings.

The work involved more churn that I expected, and maintaining
the expected precedence of warnings (missing terminating nul,
excessive bounds, etc.) turned out to be more delicate and time
consuming than I would have liked.  I think the result is cleaner
code, but I'm quite sure it can still stand to be made better.
That's also my plan for the next step when I'd like to move this
checking out of builtins.c and calls.c and into a file of its own.
Ultimately, Jeff and have would like to integrate it into the path
isolation pass.

Martin

PS There's a FIXME comment in expand_builtin_memory_chk as
a reminder of a future change.  It currently has no bearing
on anything.




[PATCH, part 2] PR fortran/95980 - ICE in get_unique_type_string, at fortran/class.c:485

2020-07-07 Thread Harald Anlauf
As Dominique pointed out in the PR, the committed patch (part 1) fixed only
one of the provided testcases, but not the original one.  That turned out
to be a long and winding road, requiring further checks for NULL pointer
dereferences.

The resulting attached changes have been regtested on x86_64-pc-linux-gnu
and confirmed on Darwin by Dominique (thanks!), see PR.

OK for master / where appropriate?

Thanks,
Harald


PR fortran/95980 - ICE in get_unique_type_string, at fortran/class.c:485

In SELECT TYPE, the argument may be an incorrectly specified unlimited
CLASS variable.  Avoid NULL pointer dereferences for clean error
recovery.

gcc/fortran/
PR fortran/95980
* class.c (gfc_add_component_ref, gfc_build_class_symbol):
Add checks for NULL pointer dereference.
* primary.c (gfc_variable_attr): Likewise.
* resolve.c (resolve_variable, resolve_assoc_var)
(resolve_fl_var_and_proc, resolve_fl_variable_derived)
(resolve_symbol): Likewise.

diff --git a/gcc/fortran/class.c b/gcc/fortran/class.c
index d6847eb0004..dfa48400712 100644
--- a/gcc/fortran/class.c
+++ b/gcc/fortran/class.c
@@ -228,7 +228,7 @@ gfc_add_component_ref (gfc_expr *e, const char *name)
 	break;
   tail = &((*tail)->next);
 }
-  if (derived->components && derived->components->next &&
+  if (derived && derived->components && derived->components->next &&
   derived->components->next->ts.type == BT_DERIVED &&
   derived->components->next->ts.u.derived == NULL)
 {
@@ -663,6 +663,10 @@ gfc_build_class_symbol (gfc_typespec *ts, symbol_attribute *attr,

   /* Determine the name of the encapsulating type.  */
   rank = !(*as) || (*as)->rank == -1 ? GFC_MAX_DIMENSIONS : (*as)->rank;
+
+  if (!ts->u.derived)
+return false;
+
   get_unique_hashed_string (tname, ts->u.derived);
   if ((*as) && attr->allocatable)
 name = xasprintf ("__class_%s_%d_%da", tname, rank, (*as)->corank);
diff --git a/gcc/fortran/primary.c b/gcc/fortran/primary.c
index 76b1607ee3d..c0f66d3df22 100644
--- a/gcc/fortran/primary.c
+++ b/gcc/fortran/primary.c
@@ -2597,7 +2597,7 @@ gfc_variable_attr (gfc_expr *expr, gfc_typespec *ts)
   sym = expr->symtree->n.sym;
   attr = sym->attr;

-  if (sym->ts.type == BT_CLASS && sym->attr.class_ok)
+  if (sym->ts.type == BT_CLASS && sym->attr.class_ok && sym->ts.u.derived)
 {
   dimension = CLASS_DATA (sym)->attr.dimension;
   codimension = CLASS_DATA (sym)->attr.codimension;
diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c
index 223de91..47a619c56b2 100644
--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
@@ -5571,6 +5571,7 @@ resolve_variable (gfc_expr *e)
 }
   /* TS 29113, C535b.  */
   else if (((sym->ts.type == BT_CLASS && sym->attr.class_ok
+	 && sym->ts.u.derived && CLASS_DATA (sym)
 	 && CLASS_DATA (sym)->as
 	 && CLASS_DATA (sym)->as->type == AS_ASSUMED_RANK)
 	|| (sym->ts.type != BT_CLASS && sym->as
@@ -5618,6 +5619,7 @@ resolve_variable (gfc_expr *e)

   /* TS 29113, C535b.  */
   if (((sym->ts.type == BT_CLASS && sym->attr.class_ok
+	&& sym->ts.u.derived && CLASS_DATA (sym)
 	&& CLASS_DATA (sym)->as
 	&& CLASS_DATA (sym)->as->type == AS_ASSUMED_RANK)
|| (sym->ts.type != BT_CLASS && sym->as
@@ -9029,7 +9031,7 @@ resolve_assoc_var (gfc_symbol* sym, bool resolve_target)
 {
   /* target's rank is 0, but the type of the sym is still array valued,
 	 which has to be corrected.  */
-  if (sym->ts.type == BT_CLASS
+  if (sym->ts.type == BT_CLASS && sym->ts.u.derived
 	  && CLASS_DATA (sym) && CLASS_DATA (sym)->as)
 	{
 	  gfc_array_spec *as;
@@ -12615,7 +12617,8 @@ resolve_fl_var_and_proc (gfc_symbol *sym, int mp_flag)
 {
   gfc_array_spec *as;

-  if (sym->ts.type == BT_CLASS && sym->attr.class_ok)
+  if (sym->ts.type == BT_CLASS && sym->attr.class_ok
+  && sym->ts.u.derived && CLASS_DATA (sym))
 as = CLASS_DATA (sym)->as;
   else
 as = sym->as;
@@ -12625,7 +12628,8 @@ resolve_fl_var_and_proc (gfc_symbol *sym, int mp_flag)
 {
   bool pointer, allocatable, dimension;

-  if (sym->ts.type == BT_CLASS && sym->attr.class_ok)
+  if (sym->ts.type == BT_CLASS && sym->attr.class_ok
+	  && sym->ts.u.derived && CLASS_DATA (sym))
 	{
 	  pointer = CLASS_DATA (sym)->attr.class_pointer;
 	  allocatable = CLASS_DATA (sym)->attr.allocatable;
@@ -12676,6 +12680,7 @@ resolve_fl_var_and_proc (gfc_symbol *sym, int mp_flag)
 {
   /* F03:C502.  */
   if (sym->attr.class_ok
+	  && sym->ts.u.derived
 	  && !sym->attr.select_type_temporary
 	  && !UNLIMITED_POLY (sym)
 	  && !gfc_type_is_extensible (CLASS_DATA (sym)->ts.u.derived))
@@ -12714,7 +12719,8 @@ resolve_fl_variable_derived (gfc_symbol *sym, int no_init_flag)
  associated by the presence of another class I symbol in the same
  namespace.  14.6.1.3 of the standard and the discussion on
  comp.lang.fortran.  */
-  if (sym->ns != sym->ts.u.derived->ns
+  if (sym->ts.u.derived
+  && sym->ns != 

Re: [PATCH] c++: ICE in is_really_empty_class [PR95497]

2020-07-07 Thread Patrick Palka via Gcc-patches
On Tue, 7 Jul 2020, Patrick Palka wrote:

> We are ICEing in the testcase below because we pass the
> yet-uninstantiated class type A of the PARM_DECL b to
> is_really_empty_class from potential_rvalue_constant_expression when
> parsing the requirement t += b.
> 
> This patch fixes the ICE by guarding the problematic call to
> is_really_empty_class with a COMPLETE_TYPE_P check, which should also
> subsume the existing dependent_type_p check.
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK to
> commit to trunk and to the 10 branch?

Oops, the regression is not present on the 10 branch so this fix wouldn't
need backporting.

> 
> gcc/cp/ChangeLog:
> 
>   PR c++/95497
>   * constexpr.c (potential_constant_expression_1): When
>   processing_template_decl, check COMPLETE_TYPE_P before calling
>   is_really_empty_class.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR c++/95497
>   * g++.dg/cpp2a/concepts-pr95497.C: New test.
> ---
>  gcc/cp/constexpr.c|  2 +-
>  gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C | 12 
>  2 files changed, 13 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
> 
> diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
> index 1939166e907..ff78ebda2dc 100644
> --- a/gcc/cp/constexpr.c
> +++ b/gcc/cp/constexpr.c
> @@ -7443,7 +7443,7 @@ potential_constant_expression_1 (tree t, bool 
> want_rval, bool strict, bool now,
>if (now && want_rval)
>   {
> tree type = TREE_TYPE (t);
> -   if (dependent_type_p (type)
> +   if ((processing_template_decl && !COMPLETE_TYPE_P (type))
> || is_really_empty_class (type, /*ignore_vptr*/false))
>   /* An empty class has no data to read.  */
>   return true;
> diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C 
> b/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
> new file mode 100644
> index 000..4d7718ad5e8
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
> @@ -0,0 +1,12 @@
> +// PR c++/95497
> +// { dg-do compile { target c++20 } }
> +
> +template 
> +struct A{};
> +
> +template 
> +concept c =
> +requires(T t, A b) // note that A is independent of T
> +{
> +{ t += b };
> +};
> -- 
> 2.27.0.203.gf402ea6816
> 
> 



Re: [PATCH] c++: wrong pretty printing of nested type [PR95303]

2020-07-07 Thread Jason Merrill via Gcc-patches

On 7/7/20 9:33 AM, Patrick Palka wrote:

In the testcase below, we pretty print the nested type A::B as
A::B because we don't check that B is itself a class template
before printing the innermost set of template arguments from B's
TEMPLATE_INFO (which in this case belongs to A).  This patch fixes this
by checking PRIMARY_TEMPLATE_P beforehand.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK to
commit to trunk and perhaps to the 10 branch?


OK for trunk, this doesn't seem worth backporting.


gcc/ChangeLog:

PR c++/95303
* cxx-pretty-print.c (pp_cxx_unqualified_id): Check
PRIMARY_TEMPLATE_P before printing the innermost template
arguments.

gcc/testsuite/ChangeLog:

PR c++/95303
* g++.dg/concepts/diagnostic14.C: New test.
---
  gcc/cp/cxx-pretty-print.c| 13 +++
  gcc/testsuite/g++.dg/concepts/diagnostic14.C | 36 
  2 files changed, 43 insertions(+), 6 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/concepts/diagnostic14.C

diff --git a/gcc/cp/cxx-pretty-print.c b/gcc/cp/cxx-pretty-print.c
index 188462a79e7..263f225a492 100644
--- a/gcc/cp/cxx-pretty-print.c
+++ b/gcc/cp/cxx-pretty-print.c
@@ -173,12 +173,13 @@ pp_cxx_unqualified_id (cxx_pretty_printer *pp, tree t)
  case UNBOUND_CLASS_TEMPLATE:
pp_cxx_unqualified_id (pp, TYPE_NAME (t));
if (tree ti = TYPE_TEMPLATE_INFO_MAYBE_ALIAS (t))
-   {
- pp_cxx_begin_template_argument_list (pp);
- tree args = INNERMOST_TEMPLATE_ARGS (TI_ARGS (ti));
- pp_cxx_template_argument_list (pp, args);
- pp_cxx_end_template_argument_list (pp);
-   }
+   if (PRIMARY_TEMPLATE_P (TI_TEMPLATE (ti)))
+ {
+   pp_cxx_begin_template_argument_list (pp);
+   tree args = INNERMOST_TEMPLATE_ARGS (TI_ARGS (ti));
+   pp_cxx_template_argument_list (pp, args);
+   pp_cxx_end_template_argument_list (pp);
+ }
break;
  
  case BIT_NOT_EXPR:

diff --git a/gcc/testsuite/g++.dg/concepts/diagnostic14.C 
b/gcc/testsuite/g++.dg/concepts/diagnostic14.C
new file mode 100644
index 000..ec2b68c4a3c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/concepts/diagnostic14.C
@@ -0,0 +1,36 @@
+// PR c++/95303
+// { dg-do compile { target c++20 } }
+
+template
+struct A {
+struct B {};
+};
+
+template
+  requires __is_same(T, char)
+struct A {
+struct B {};
+};
+
+template<>
+  struct A {
+struct B {};
+  };
+
+template
+concept C = requires (T&& t) { // { dg-message "\\\[with T = A::B\\\]" }
+t.a;
+};
+static_assert(C::B>); // { dg-error "failed" }
+
+template
+concept D = requires (T&& t) { // { dg-message "\\\[with T = A::B\\\]" }
+t.a;
+};
+static_assert(D::B>); // { dg-error "failed" }
+
+template
+concept E = requires (T&& t) { // { dg-message "\\\[with T = A::B\\\]" }
+t.a;
+};
+static_assert(E::B>); // { dg-error "failed" }





[PATCH] rs6000: fix power10_hw test [v2]

2020-07-07 Thread Aaron Sawdey via Gcc-patches
The code snippet for this test was returning 1 if power10
instructions executed correctly. It should return 0 if the
test passes.

Approved offline by Segher with slight change. Will
push after posting.


* lib/target-supports.exp (check_power10_hw_available):
Return 0 for passing test.
---
 gcc/testsuite/lib/target-supports.exp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 4bdcaef1132..848cb96aec4 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2226,7 +2226,9 @@ proc check_power10_hw_available { } {
/* Set e first and use +r to check if pli actually works.  */
long e = -1;
asm ("pli %0,%1" : "+r" (e) : "n" (0x12345));
-   return (e == 0x12345);
+   if (e == 0x12345)
+ return 0;
+   return 1;
}
} "-mcpu=power10"
 }]
-- 
2.17.1



[PATCH] expr: Fix REDUCE_BIT_FIELD for constants [PR95694]

2020-07-07 Thread Richard Sandiford
[Sorry, been sitting on this patch for a while and just realised
 I never sent it.]

This is yet another PR caused by constant integer rtxes not storing
a mode.  We were calling REDUCE_BIT_FIELD on a constant integer that
didn't fit in poly_int64, and then tripped the as_a
assert on VOIDmode.

AFAICT REDUCE_BIT_FIELD is always passed rtxes that have TYPE_MODE
(rather than some other mode) and it just fills in the redundant
sign bits of that TYPE_MODE value.  So it should be safe to get
the mode from the type instead of the rtx.  The patch does that
and asserts that the modes agree, where information is available.

That on its own is enough to fix the bug, but we might as well
extend the folding case to all constant integers, not just those
that fit poly_int64.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to trunk
and release branches?

Richard


gcc/
PR middle-end/95694
* expr.c (expand_expr_real_2): Get the mode from the type rather
than the rtx, and assert that it is consistent with the mode of
the rtx (where known).  Optimize all constant integers, not just
those that can be represented in poly_int64.

gcc/testsuite/
PR middle-end/95694
* gcc.dg/pr95694.c: New test.
---
 gcc/expr.c | 15 ---
 gcc/testsuite/gcc.dg/pr95694.c | 23 +++
 2 files changed, 31 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr95694.c

diff --git a/gcc/expr.c b/gcc/expr.c
index 3c68b0d754c..715edae819a 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -11525,26 +11525,27 @@ expand_expr_real_1 (tree exp, rtx target, 
machine_mode tmode,
 static rtx
 reduce_to_bit_field_precision (rtx exp, rtx target, tree type)
 {
+  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
   HOST_WIDE_INT prec = TYPE_PRECISION (type);
-  if (target && GET_MODE (target) != GET_MODE (exp))
+  gcc_assert (GET_MODE (exp) == VOIDmode || GET_MODE (exp) == mode);
+  if (target && GET_MODE (target) != mode)
 target = 0;
-  /* For constant values, reduce using build_int_cst_type. */
-  poly_int64 const_exp;
-  if (poly_int_rtx_p (exp, &const_exp))
+
+  /* For constant values, reduce using wide_int_to_tree. */
+  if (poly_int_rtx_p (exp))
 {
-  tree t = build_int_cst_type (type, const_exp);
+  auto value = wi::to_poly_wide (exp, mode);
+  tree t = wide_int_to_tree (type, value);
   return expand_expr (t, target, VOIDmode, EXPAND_NORMAL);
 }
   else if (TYPE_UNSIGNED (type))
 {
-  scalar_int_mode mode = as_a  (GET_MODE (exp));
   rtx mask = immed_wide_int_const
(wi::mask (prec, false, GET_MODE_PRECISION (mode)), mode);
   return expand_and (mode, exp, mask, target);
 }
   else
 {
-  scalar_int_mode mode = as_a  (GET_MODE (exp));
   int count = GET_MODE_PRECISION (mode) - prec;
   exp = expand_shift (LSHIFT_EXPR, mode, exp, count, target, 0);
   return expand_shift (RSHIFT_EXPR, mode, exp, count, target, 0);
diff --git a/gcc/testsuite/gcc.dg/pr95694.c b/gcc/testsuite/gcc.dg/pr95694.c
new file mode 100644
index 000..6f5e1900a02
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr95694.c
@@ -0,0 +1,23 @@
+/* PR tree-optimization/68835 */
+/* { dg-do run { target int128 } } */
+/* { dg-options "-fno-tree-forwprop -fno-tree-ccp -O1 -fno-tree-dominator-opts 
-fno-tree-fre" } */
+
+__attribute__((noinline, noclone)) unsigned __int128
+foo (void)
+{
+  unsigned __int128 x = (unsigned __int128) 0xULL;
+  struct { unsigned __int128 a : 65; } w;
+  w.a = x;
+  w.a += x;
+  return w.a;
+}
+
+int
+main ()
+{
+  unsigned __int128 x = foo ();
+  if ((unsigned long long) x != 0xfffeULL
+  || (unsigned long long) (x >> 64) != 1)
+__builtin_abort ();
+  return 0;
+}


Re: [PATCH] c++: ICE in is_really_empty_class [PR95497]

2020-07-07 Thread Jason Merrill via Gcc-patches

On 7/7/20 9:33 AM, Patrick Palka wrote:

We are ICEing in the testcase below because we pass the
yet-uninstantiated class type A of the PARM_DECL b to
is_really_empty_class from potential_rvalue_constant_expression when
parsing the requirement t += b.


Why are we getting to potential_rvalue_constant_expression?  My guess is 
from build_non_dependent_expr because processing_constraint isn't set?



This patch fixes the ICE by guarding the problematic call to
is_really_empty_class with a COMPLETE_TYPE_P check, which should also
subsume the existing dependent_type_p check.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK to
commit to trunk and to the 10 branch?

gcc/cp/ChangeLog:

PR c++/95497
* constexpr.c (potential_constant_expression_1): When
processing_template_decl, check COMPLETE_TYPE_P before calling
is_really_empty_class.

gcc/testsuite/ChangeLog:

PR c++/95497
* g++.dg/cpp2a/concepts-pr95497.C: New test.
---
  gcc/cp/constexpr.c|  2 +-
  gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C | 12 
  2 files changed, 13 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 1939166e907..ff78ebda2dc 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -7443,7 +7443,7 @@ potential_constant_expression_1 (tree t, bool want_rval, 
bool strict, bool now,
if (now && want_rval)
{
  tree type = TREE_TYPE (t);
- if (dependent_type_p (type)
+ if ((processing_template_decl && !COMPLETE_TYPE_P (type))
  || is_really_empty_class (type, /*ignore_vptr*/false))
/* An empty class has no data to read.  */
return true;
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
new file mode 100644
index 000..4d7718ad5e8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
@@ -0,0 +1,12 @@
+// PR c++/95497
+// { dg-do compile { target c++20 } }
+
+template 
+struct A{};
+
+template 
+concept c =
+requires(T t, A b) // note that A is independent of T
+{
+{ t += b };
+};





Re: [PATCH] c++: ICE in is_really_empty_class [PR95497]

2020-07-07 Thread Patrick Palka via Gcc-patches
On Tue, 7 Jul 2020, Jason Merrill wrote:

> On 7/7/20 9:33 AM, Patrick Palka wrote:
> > We are ICEing in the testcase below because we pass the
> > yet-uninstantiated class type A of the PARM_DECL b to
> > is_really_empty_class from potential_rvalue_constant_expression when
> > parsing the requirement t += b.
> 
> Why are we getting to potential_rvalue_constant_expression?  My guess is from
> build_non_dependent_expr because processing_constraint isn't set?

My mistake, I meant to write that we are calling is_really_empty_class
from is_rvalue_constant_expression, not from
potential_rvalue_constant_expression.  And we're getting to
is_rvalue_constant_expression from cp_parser_constant_expression when
parsing the RHS of the assignment.

> 
> > This patch fixes the ICE by guarding the problematic call to
> > is_really_empty_class with a COMPLETE_TYPE_P check, which should also
> > subsume the existing dependent_type_p check.
> > 
> > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK to
> > commit to trunk and to the 10 branch?
> > 
> > gcc/cp/ChangeLog:
> > 
> > PR c++/95497
> > * constexpr.c (potential_constant_expression_1): When
> > processing_template_decl, check COMPLETE_TYPE_P before calling
> > is_really_empty_class.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > PR c++/95497
> > * g++.dg/cpp2a/concepts-pr95497.C: New test.
> > ---
> >   gcc/cp/constexpr.c|  2 +-
> >   gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C | 12 
> >   2 files changed, 13 insertions(+), 1 deletion(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
> > 
> > diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
> > index 1939166e907..ff78ebda2dc 100644
> > --- a/gcc/cp/constexpr.c
> > +++ b/gcc/cp/constexpr.c
> > @@ -7443,7 +7443,7 @@ potential_constant_expression_1 (tree t, bool
> > want_rval, bool strict, bool now,
> > if (now && want_rval)
> > {
> >   tree type = TREE_TYPE (t);
> > - if (dependent_type_p (type)
> > + if ((processing_template_decl && !COMPLETE_TYPE_P (type))
> >   || is_really_empty_class (type, /*ignore_vptr*/false))
> > /* An empty class has no data to read.  */
> > return true;
> > diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
> > b/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
> > new file mode 100644
> > index 000..4d7718ad5e8
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-pr95497.C
> > @@ -0,0 +1,12 @@
> > +// PR c++/95497
> > +// { dg-do compile { target c++20 } }
> > +
> > +template 
> > +struct A{};
> > +
> > +template 
> > +concept c =
> > +requires(T t, A b) // note that A is independent of T
> > +{
> > +{ t += b };
> > +};
> > 
> 
> 



[PATCH] rs6000: Add execution tests for mma builtins. [v3]

2020-07-07 Thread Aaron Sawdey via Gcc-patches
This patch adds execution tests that use the MMA builtins and
check for the right answer, and a new test that checks whether
__builtin_cpu_supports and __builtin_cpu_is return sane answers.

One final time now that I've gotten things sorted out. OK for trunk
and backport to 10?

Thanks,
Aaron


2020-06-30  Rajalakshmi Srinivasaraghavan  
Aaron Sawdey  

gcc/testsuite/
* gcc.target/powerpc/p10-identify.c: New file.
* gcc.target/powerpc/mma-single-test.c: New file.
* gcc.target/powerpc/mma-double-test.c: New file.
---
 .../gcc.target/powerpc/mma-double-test.c  | 185 +
 .../gcc.target/powerpc/mma-single-test.c  | 193 ++
 .../gcc.target/powerpc/p10-identify.c |  32 +++
 3 files changed, 410 insertions(+)
 create mode 100755 gcc/testsuite/gcc.target/powerpc/mma-double-test.c
 create mode 100755 gcc/testsuite/gcc.target/powerpc/mma-single-test.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-identify.c

diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c 
b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
new file mode 100755
index 000..c892b8fd4ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -0,0 +1,185 @@
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include 
+#include 
+#include 
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+  rowC[0] += result[3] ; \
+  rowC = (v4sf_t *) &CO[1*ldc+J]; \
+  rowC[0] += result[2] ; \
+  rowC = (v4sf_t *) &CO[2*ldc+J]; \
+  rowC[0] += result[1] ; \
+  rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[0] ;
+
+void
+MMA (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+{
+  double *CO;
+  double *AO;
+  AO = A;
+  CO = C;
+  C += m * 4;
+  for (int j = 0; j < m; j += 16)
+   {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+   {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+   }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+   }
+  B += k * 4;
+}
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+{
+  for (int i = 0; i < row; i++)
+   {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+   }
+}
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+for (int i = 0; i < row; i++)
+  matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+{
+  for (int j = 0; j < column; j++)
+   {
+ printf ("%f ", matrix[j * row + i]);
+   }
+  printf ("\n");
+}
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+{
+  for (int t1 = 4; t1 <= 16; t1 += 4)
+   {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double 

Re: [PATCH] correct memcmp expansion of constant representations containing embedded nuls (PR 95189)

2020-07-07 Thread Martin Sebor via Gcc-patches

Ping: https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549225.html

On 6/30/20 6:23 PM, Martin Sebor wrote:

An enhancement to GCC 10 to improve the expansion of strncmp
calls with strings with embedded nuls introduced a regression
in similar calls to memcmp.  A review of the changes that led
up to the regression exposed a number of questionable choices
that likely conspired to cause the bug.

For example, the name of the function with both the strncmp
enhancement as well as the memcmp bug is
inline_expand_builtin_string_cmp().  It's easy to assume that
the function handles calls to strcmp and strncmp but not also
memcmp.

Another similar example is the name of the second c_getstr()
argument -- strlen -- that doesn't actually store the length
of the retrieved string but rather its size in bytes
(including any embedded nuls, but excluding those appended
implicitly to zero out the remainder of an array the string
is stored in, up to the array's size).

Yet another example of a dubious choice is string_constant()
returning the empty string (i.e., STRING_CST with size 1) for
zero initializers of constants of any type (as opposed to one
of the same size as the constant object).

Besides fixing the memcmp bug the attached patch (hopefully)
also rectifies some of the otherwise more or less benign
mistakes that precipitated it, mostly by clarifying comments
and changing misleading names of functions, their arguments,
or local variables.

A happy consequence of the fixes is that they improve codegen
for calls to memcpy with constants whose representation includes
embedded nuls.

Tested on x86_64-linux.

Martin





Re: RFA: Fix combine.c combining a move and a non-move into two non-moves, PR93372

2020-07-07 Thread Segher Boessenkool
Hi!

On Tue, Jul 07, 2020 at 02:50:09AM +0200, Hans-Peter Nilsson wrote:
> > On Mon, Jul 06, 2020 at 03:11:17AM +0200, Hans-Peter Nilsson wrote:
> > > TL;DR: fixing a misdetection of what is a "simple move".
> > 
> > That is not a very correct characterisation of what this does :-)
> 
> That's apparently where we completely disagree. :-)

Well, I wrote that code, I know what is considered "just a move" there.
You want to extend that, and that is fine, but this is not a bug.  Taken
to the extreme, anything in GCC is completely buggy, because it doesn't
solve world hunger (yet!), following that line of thought.

> > > Looking into performace degradation after de-cc0 for CRIS, I
> > > noticed combine behaving badly; it changed a move and a
> > > right-shift into two right-shifts, where the "combined" move was
> > > not eliminated in later passes, and where the deficiency caused
> > > an extra insn in a hot loop: crcu16 (and crcu32) in coremark.
> > > 
> > > Before de-cc0, the insns input to combine looked like:
> > >33: r58:SI=r56:SI 0>>r48:SI
> > >   REG_DEAD r56:SI
> > >35: r37:HI=r58:SI#0
> > > and after:
> > >33: {r58:SI=r56:SI 0>>r48:SI;clobber dccr:CC;}
> > >   REG_DEAD r56:SI
> > >   REG_UNUSED dccr:CC
> > >35: {r37:HI=r58:SI#0;clobber dccr:CC;}
> > >   REG_UNUSED dccr:CC
> > 
> > So a shift like this is at most as expensive as a move, on your target
> > (or, in your backend, anyway ;-) )
> 
> On CRIS, the backend *and* the target, yes; one cycle, one short
> instruction.

So combine did what it is supposed to do.

> > > That is, there's always a parallel with a clobber of the
> > > condition-codes register.  Being a parallel, it's not an
> > > is_just_move, but e.g. a single_set.

This is something that happens on many targets.  For some, only for some
instructions (and flag registers).  For some, for many instructions; and
for really unhappy targets, for almost all instructions, even for
register moves and/or loads and/or stores.

> > > For the de-cc0:ed "combination", it ended up as
> > >33: {r58:SI=r56:SI 0>>r48:SI;clobber dccr:CC;}
> > >   REG_UNUSED dccr:CC
> > >35: {r37:HI#0=r56:SI 0>>r48:SI;clobber dccr:CC;}
> > >   REG_DEAD r56:SI
> > >   REG_UNUSED dccr:CC
> > > That is, a move and a shift turned into two shifts; the extra
> > > shift is not eliminated by later passes, while the move was
> > > (with cc0, and "will be again") leading to redundant
> > > instructions.
> > 
> > Which was the whole point of the is_just_move() thing, yes.  Combine
> > doesn't know most moves will be eliminated by RA (but many are useful to
> > do have before RA, because it gives RA much more freedom).  If a move is
> > the same cost as a simple insn, doing two (say shift, like here) insns
> > in parallel is cheaper on most machines than having a shift and a move
> > sequentially.
> 
> Most parallel machines you mean,

No, I mean most machines, not just super-scalar machines.

> but why bring up them when
> there's no means for combine to tell the difference?

I did not bring them up, and this optimisation is important not only for
super-scalar targets.  But, the most important targets for GCC (in terms
of how many people use it, all targets are important for many other
reasons) are all supper-scalar.

> Here, the
> end result is that it *added* an instruction to the hot loop.

It did not.  Combine *never* does that.

> It's a deficiency and it caused a performance regression, can't
> argue with that.

It did not cause any regression.  It can be improved, sure, and thank
you for pointing that out!

> >  (2-2 combinations are helpful on single-scalar and even
> > in-order machines as well, btw).
> 
> I certainly don't contest that the move can be eliminated, and
> that most cost-effective 2-2 eliminations are helpful.  (See my
> other post about combine being eager with allowing same-cost
> combinations.)

I did not see that post, do you have a pointer?

> > > At first I thought this was due to parallel-ignorant old code
> > > but the "guilty" change is actually pretty recent.  Regarding a
> > > parallel with a clobber not being "just" a move, there's only
> > > the two adjacent callers seen in the patch (obviously with the
> > > rename), and their use exactly matches to check that the
> > > argument is a single_set which is a move.  It's always applied
> > > to an rtx_insn, so I changed the type and name to avoid the
> > > "just" issue.  I had to adjust the type when calling single_set.
> > 
> > But it is *not* supposed to be the same as single_set.
> 
> (I'm not saying that a single_set is a move.  But that's
> obvious.)  I guess you meant to say that a single_set with a
> general_operand as a source (as in the patch) is not supposed to
> be the same as a move.  This is the only place in combine where
> that distinction would be important.  Why?

It used to be that is_just_move was called on new*pat as well, so you
simply *could not* use single_set there (you have

[committed] wwwdocs: Simplify a bit in c99status.html.

2020-07-07 Thread Gerald Pfeifer
The sourceware.org/ml/libc-hacker links being broken (as a consequence 
of the sourceware.org upgrade) brought my attention to this bit of 
c99status.html.

This simplifies that section, uses active voice, and removes those three
links from twenty years ago.

Pushed.

Gerald
---
 htdocs/c99status.html | 14 --
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/htdocs/c99status.html b/htdocs/c99status.html
index b2b1fed6..036db53e 100644
--- a/htdocs/c99status.html
+++ b/htdocs/c99status.html
@@ -386,16 +386,10 @@ exceptions may not be generated) and contracting 
expressions (e.g.,
 using fused multiply-add) is not restricted to source-language
 expressions as required by C99.
 
-Compiler support is needed for thorough support of 
math_errhandling; see
-messages https://sourceware.org/ml/libc-hacker/2000-06/msg8.html";>1,
-https://sourceware.org/ml/libc-hacker/2000-06/msg00014.html";>2,
-https://sourceware.org/ml/libc-hacker/2000-06/msg00015.html";>3
-on this subject to libc-hacker.  The compiler needs to mark its output
-from compilations using -fno-trapping-math
-or -fno-math-errno, possibly using
+For thorough support of math_errhandling the
+compiler needs to mark its output from compilations using
+-fno-trapping-math or -fno-math-errno,
+possibly using
 the .gnu_attribute mechanism, to indicate that built-in
 function optimizations may have been applied that mean that not all
 calls report error status in a particular way; the static linker
-- 
2.27.0


Re: RFA: Fix combine.c combining a move and a non-move into two non-moves, PR93372

2020-07-07 Thread Segher Boessenkool
Hi!

On Mon, Jul 06, 2020 at 03:11:17AM +0200, Hans-Peter Nilsson wrote:
> TL;DR: fixing a misdetection of what is a "simple move".

As set before, this is not a fix, not a "misdetection", it is plain and
simple a behaviour change.

"Use single_set for is_just_move" would be a fine subject.

> Regarding a
> parallel with a clobber not being "just" a move, there's only
> the two adjacent callers seen in the patch (obviously with the
> rename), and their use exactly matches to check that the
> argument is a single_set which is a move.

This isn't true, single_set has somewhat different semantics.  Some of
which is exactly the change you are looking for, but there are more
differences.

> It's always applied
> to an rtx_insn, so I changed the type and name to avoid the
> "just" issue.  I had to adjust the type when calling single_set.

Don't change the name please.  Changing it to take an rtx_insn* is fine,
we aren't likely to change back to testing the resulting patterns.

> I checked the original commit, c4c5ad1d6d1e1e a.k.a r263067 and

The history is years older (some of which is on gcc-patches@).

I'll make a simpler patch.  Thanks!


Segher


Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]

2020-07-07 Thread Segher Boessenkool
Hi!

On Tue, Jul 07, 2020 at 04:39:58PM +0800, luoxhu wrote:
> > Lots of questions, sorry!
> 
> Thanks for the nice suggestions of the initial patch contains many issues:),

Pretty much all of it should *work*, it just can be improved and
simplified quite a bit :-)

> For this case, %1:SF matches with "=wa"?  And how to construct cases to
> match("=?r", "wa") and ("=!r", "r") combinations, please?

operands[0], not operands[1]?

Simple testcases will not put the output into a GPR, unless you force
the compiler to do that, because of the ? and !.

Often you can just do

  asm("#" : "+r"(x));

to force "x" into a GPR at that point of the program.  But there is
nothing stopping the compiler from copying it back to a VSR where it
thinks that is cheaper ;-)

So maybe this pattern should just have the GPR-to-VSR alternative?  It
does not look like the GPR destination variants are useful?

> +  rtx op0 = operands[0];
> +  rtx op1 = operands[1];
> +  rtx op2 = operands[2];

(Please just write out operands[N] everywhere).

> +  if (GET_CODE (operands[2]) == SCRATCH)
> +op2 = gen_reg_rtx (DImode);
> +
> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
> +  emit_insn (gen_anddi3 (op2, op1, mask));

Groovy :-)

So, it looks like you can remove the ? and ! alternatives, leaving just
the first alternative?


Segher


[committed] libstdc++: Whitespace changes to keep lines below 80 columns

2020-07-07 Thread Jonathan Wakely via Gcc-patches
Also remove leading spaces that are followed by tabs.

libstdc++-v3/ChangeLog:

* include/std/limits: Whitespace changes.

Committed to trunk.

commit 9ca8fefeb720c8a9dec58e9a99042e6727309251
Author: Jonathan Wakely 
Date:   Tue Jul 7 23:26:38 2020 +0100

libstdc++: Whitespace changes to keep lines below 80 columns

Also remove leading spaces that are followed by tabs.

libstdc++-v3/ChangeLog:

* include/std/limits: Whitespace changes.

diff --git a/libstdc++-v3/include/std/limits b/libstdc++-v3/include/std/limits
index 898406f91ee..bee9b9f6bb6 100644
--- a/libstdc++-v3/include/std/limits
+++ b/libstdc++-v3/include/std/limits
@@ -1479,152 +1479,152 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #if !defined(__STRICT_ANSI__)
 
-#define __INT_N(TYPE, BITSIZE, EXT, UEXT)  \
-  template<>   
\
-struct numeric_limits
\
-{  
\
-  static _GLIBCXX_USE_CONSTEXPR bool is_specialized = true;
\
-   
\
-  static _GLIBCXX_CONSTEXPR TYPE   
\
-   min() _GLIBCXX_USE_NOEXCEPT { return __glibcxx_min_b (TYPE, BITSIZE); } 
\
-   
\
-  static _GLIBCXX_CONSTEXPR TYPE   
\
-  max() _GLIBCXX_USE_NOEXCEPT { return __glibcxx_max_b (TYPE, BITSIZE); }  
\
-   
\
-  static _GLIBCXX_USE_CONSTEXPR int digits 
\
-   = BITSIZE - 1;  
\
-  static _GLIBCXX_USE_CONSTEXPR int digits10   
\
-   = (BITSIZE - 1) * 643L / 2136;  
\
-   
\
-  static _GLIBCXX_USE_CONSTEXPR bool is_signed = true; 
\
-  static _GLIBCXX_USE_CONSTEXPR bool is_integer = true;
\
-  static _GLIBCXX_USE_CONSTEXPR bool is_exact = true;  
\
-  static _GLIBCXX_USE_CONSTEXPR int radix = 2; 
\
-   
\
-  static _GLIBCXX_CONSTEXPR TYPE   
\
-  epsilon() _GLIBCXX_USE_NOEXCEPT { return 0; }
\
-   
\
-  static _GLIBCXX_CONSTEXPR TYPE   
\
-  round_error() _GLIBCXX_USE_NOEXCEPT { return 0; }
\
-   
\
-  EXT  
\
-   
\
-  static _GLIBCXX_USE_CONSTEXPR int min_exponent = 0;  
\
-  static _GLIBCXX_USE_CONSTEXPR int min_exponent10 = 0;
\
-  static _GLIBCXX_USE_CONSTEXPR int max_exponent = 0;  
\
-  static _GLIBCXX_USE_CONSTEXPR int max_exponent10 = 0;
\
-   
\
-  static _GLIBCXX_USE_CONSTEXPR bool has_infinity = false; 
\
-  static _GLIBCXX_USE_CONSTEXPR bool has_quiet_NaN = false;
\
-  static _GLIBCXX_USE_CONSTEXPR bool has_signaling_NaN = false;
\
-  static _GLIBCXX_USE_CONSTEXPR float_denorm_style has_denorm  
\
-   = denorm_absent;
\
-  static _GLIBCXX_USE_CONSTEXPR bool has_denorm_loss = false;  
\
-   
\
-  static _GLIBCXX_CONSTEXPR TYPE   
\
-  infinity() _GLIBCXX_USE_NOEXCEPT 
\
-  { return static_cast(0); } 
\
-   
\
-  static _GLIBCXX_CONSTEXPR TYPE   
\
-  quiet_NaN() _GLIBCXX_USE_NOEXCEPT
\
-  { return static_cast(0); } 
\
-   
\
-  static _GLIBCXX_CONSTEXPR TYPE   

[committed] libstdc++: Replace __int_limits with __numeric_traits_integer

2020-07-07 Thread Jonathan Wakely via Gcc-patches
I recently added std::__detail::__int_limits as a lightweight
alternative to std::numeric_limits, forgetting that the values it
provides (digits, min and max) are already provided by
__gnu_cxx::__numeric_traits.

This change adds __int_traits as an alias for __numeric_traits_integer.
This avoids instantiating __numeric_traits to decide whether to use
__numeric_traits_integer or __numeric_traits_floating. Then all uses of
__int_limits can be replaced with __int_traits, and __int_limits can be
removed.

libstdc++-v3/ChangeLog:

* include/Makefile.am: Remove bits/int_limits.h.
* include/Makefile.in: Regenerate.
* include/bits/int_limits.h: Removed.
* include/bits/parse_numbers.h (_Select_int_base): Replace
__int_limits with __int_traits.
* include/bits/range_access.h (_SSize::operator()): Likewise.
* include/ext/numeric_traits.h (__numeric_traits_integer): Add
static assertion.
(__int_traits): New alias template.
* include/std/bit (__rotl, __rotr, __countl_zero, __countl_one)
(__countr_zero, __countr_one, __popcount, __bit_ceil)
(__bit_floor, __bit_width) Replace __int_limits with
__int_traits.
* include/std/charconv (__to_chars_8, __from_chars_binary)
(__from_chars_alpha_to_num, from_chars): Likewise.
* include/std/memory_resource (polymorphic_allocator::allocate)
(polymorphic_allocator::allocate_object): Likewise.
* include/std/string_view (basic_string_view::_S_compare):
Likewise.
* include/std/utility (cmp_equal, cmp_less, in_range): Likewise.

Tested powerpc64le-linux, committed to trunk.


commit eb04805be4029716e76532babc0fa9ecb18de96e
Author: Jonathan Wakely 
Date:   Tue Jul 7 23:26:38 2020 +0100

libstdc++: Replace __int_limits with __numeric_traits_integer

I recently added std::__detail::__int_limits as a lightweight
alternative to std::numeric_limits, forgetting that the values it
provides (digits, min and max) are already provided by
__gnu_cxx::__numeric_traits.

This change adds __int_traits as an alias for __numeric_traits_integer.
This avoids instantiating __numeric_traits to decide whether to use
__numeric_traits_integer or __numeric_traits_floating. Then all uses of
__int_limits can be replaced with __int_traits, and __int_limits can be
removed.

libstdc++-v3/ChangeLog:

* include/Makefile.am: Remove bits/int_limits.h.
* include/Makefile.in: Regenerate.
* include/bits/int_limits.h: Removed.
* include/bits/parse_numbers.h (_Select_int_base): Replace
__int_limits with __int_traits.
* include/bits/range_access.h (_SSize::operator()): Likewise.
* include/ext/numeric_traits.h (__numeric_traits_integer): Add
static assertion.
(__int_traits): New alias template.
* include/std/bit (__rotl, __rotr, __countl_zero, __countl_one)
(__countr_zero, __countr_one, __popcount, __bit_ceil)
(__bit_floor, __bit_width) Replace __int_limits with
__int_traits.
* include/std/charconv (__to_chars_8, __from_chars_binary)
(__from_chars_alpha_to_num, from_chars): Likewise.
* include/std/memory_resource (polymorphic_allocator::allocate)
(polymorphic_allocator::allocate_object): Likewise.
* include/std/string_view (basic_string_view::_S_compare):
Likewise.
* include/std/utility (cmp_equal, cmp_less, in_range): Likewise.

diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 80aeb3f8959..e131ce04f8c 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -129,7 +129,6 @@ bits_headers = \
${bits_srcdir}/hashtable.h \
${bits_srcdir}/hashtable_policy.h \
${bits_srcdir}/indirect_array.h \
-   ${bits_srcdir}/int_limits.h \
${bits_srcdir}/invoke.h \
${bits_srcdir}/ios_base.h \
${bits_srcdir}/istream.tcc \
diff --git a/libstdc++-v3/include/bits/int_limits.h 
b/libstdc++-v3/include/bits/int_limits.h
deleted file mode 100644
index 7ae34377331..000
--- a/libstdc++-v3/include/bits/int_limits.h
+++ /dev/null
@@ -1,74 +0,0 @@
-// Minimal replacement for numeric_limits of integers. -*- C++ -*-
-
-// Copyright (C) 2020 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library.  This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public Li

[PATCH] c++: Better diagnostic for decltype(auto) in C++11 [PR96103]

2020-07-07 Thread Marek Polacek via Gcc-patches
If you try to use decltype(auto) in C++11, we emit obscure

  error: expected primary-expression before 'auto'

giving the user no hint as to what's wrong.  This patch improves that
diagnostic.  Since we've been giving an error, I'm also using error().

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

gcc/cp/ChangeLog:

PR c++/96103
* parser.c (cp_parser_decltype): Print error about using decltype(auto)
in C++11.  Check that the token following "auto" is ")".

gcc/testsuite/ChangeLog:

PR c++/96103
* g++.dg/cpp0x/decltype77.C: New test.
---
 gcc/cp/parser.c | 22 +++---
 gcc/testsuite/g++.dg/cpp0x/decltype77.C | 10 ++
 2 files changed, 25 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/decltype77.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index e58d8eb298c..8aaedaefb86 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -14894,11 +14894,11 @@ cp_parser_decltype_expr (cp_parser *parser,
   return expr;
 }
 
-/* Parse a `decltype' type. Returns the type.
+/* Parse a `decltype' type.  Returns the type.
 
-   simple-type-specifier:
+   decltype-specifier:
  decltype ( expression )
-   C++14 proposal:
+   C++14:
  decltype ( auto )  */
 
 static tree
@@ -14938,10 +14938,18 @@ cp_parser_decltype (cp_parser *parser)
 
   tree expr = NULL_TREE;
 
-  if (cxx_dialect >= cxx14
-  && cp_lexer_next_token_is_keyword (parser->lexer, RID_AUTO))
-/* decltype (auto) */
-cp_lexer_consume_token (parser->lexer);
+  if (cp_lexer_next_token_is_keyword (parser->lexer, RID_AUTO)
+  && cp_lexer_nth_token_is (parser->lexer, 2, CPP_CLOSE_PAREN))
+{
+  /* decltype (auto) */
+  cp_lexer_consume_token (parser->lexer);
+  if (cxx_dialect < cxx14)
+   {
+ error_at (start_token->location,
+   "% type specifier is a C++14 extension");
+ expr = error_mark_node;
+   }
+}
   else
 {
   /* decltype (expression)  */
diff --git a/gcc/testsuite/g++.dg/cpp0x/decltype77.C 
b/gcc/testsuite/g++.dg/cpp0x/decltype77.C
new file mode 100644
index 000..1f987118510
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/decltype77.C
@@ -0,0 +1,10 @@
+// PR c++/96103
+// { dg-do compile { target c++11_only } }
+
+decltype(auto) foo () { return 4; } // { dg-error ".decltype\\(auto\\). type 
specifier is a C\\+\\+14 extension" }
+
+void
+bar ()
+{
+  decltype(auto) i = 0; // { dg-error ".decltype\\(auto\\). type specifier is 
a C\\+\\+14 extension" }
+}

base-commit: 7126583af5d29235584b51b3b05eeaba2adef024
-- 
2.26.2



Re: [committed] libstdc++: Whitespace changes to keep lines below 80 columns

2020-07-07 Thread Jonathan Wakely via Gcc-patches

On 07/07/20 23:38 +0100, Jonathan Wakely wrote:

Also remove leading spaces that are followed by tabs.

libstdc++-v3/ChangeLog:

* include/std/limits: Whitespace changes.


I missed a couple of leading spaces, fixed with this one.

Committed to trunk.

commit 161aa50ee2d1142802d7200733fee1cf532d6696
Author: Jonathan Wakely 
Date:   Tue Jul 7 23:39:44 2020 +0100

libstdc++: Whitespace changes to remove spaces before tabs

libstdc++-v3/ChangeLog:

* include/std/limits: Whitespace changes.

diff --git a/libstdc++-v3/include/std/limits b/libstdc++-v3/include/std/limits
index bee9b9f6bb6..f5e403be727 100644
--- a/libstdc++-v3/include/std/limits
+++ b/libstdc++-v3/include/std/limits
@@ -1536,11 +1536,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   static _GLIBCXX_CONSTEXPR TYPE	\
   denorm_min() _GLIBCXX_USE_NOEXCEPT\
   { return static_cast(0); }	\
- 	\
+	\
   static _GLIBCXX_USE_CONSTEXPR bool is_iec559 = false; 		\
   static _GLIBCXX_USE_CONSTEXPR bool is_bounded = true; 		\
   static _GLIBCXX_USE_CONSTEXPR bool is_modulo = false; 		\
- 	\
+	\
   static _GLIBCXX_USE_CONSTEXPR bool traps \
 	= __glibcxx_integral_traps; 	\
   static _GLIBCXX_USE_CONSTEXPR bool tinyness_before = false; 	\


[PATCH] contrib: Don't pass wget options to curl

2020-07-07 Thread Mike Nolta via Gcc-patches
This is a harmless bug, as the script still works, but curl's '-O'
option isn't the same as wget's.

contrib/ChangeLog:

* download_prerequisites: Don't pass wget options to curl.
---
 contrib/download_prerequisites | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/download_prerequisites b/contrib/download_prerequisites
index da19913f9..7d0c4b5ea 100755
--- a/contrib/download_prerequisites
+++ b/contrib/download_prerequisites
@@ -222,7 +222,7 @@ for ar in $(echo_archives)
 do
 if [ ${force} -gt 0 ]; then rm -f "${directory}/${ar}"; fi
 [ -e "${directory}/${ar}" ]   \
-|| ${fetch} --no-verbose -O "${directory}/${ar}" "${base_url}${ar}"
   \
+|| ( cd "${directory}" && ${fetch} --no-verbose "${base_url}${ar}" )  \
 || die "Cannot download ${ar} from ${base_url}"
 done
 unset ar
-- 
2.21.0



Re: Inconsistencies with associative/unordered containers

2020-07-07 Thread Jonathan Wakely via Gcc-patches

On 03/07/20 08:37 +0200, François Dumont via Libstdc++ wrote:

Hi

    Here is the patch to fix the 2nd point of this mail below.

    I prefer to qualify _Rb_tree_impl move constructor based on 
std::is_nothrow_move_constructible<_Base_key_compare> so that the 
logic of copying _Compare rather than moving it stays an 
implementation detail of _Rb_tree_key_compare.


    libstdc++: Fix [multi]map/[multi]set move constructors noexcept 
qualification


    Container move constructors shall not consider their allocator move
    constructor qualification.

    libstdc++-v3/ChangeLog:

            * include/bits/stl_tree.h 
(_Rb_tree_impl(_Rb_tree_impl&&)): Add noexcept

            qualification based only on _Compare one.
            * 
testsuite/23_containers/map/cons/noexcept_move_construct.cc: Add

            static asserts.
            * 
testsuite/23_containers/multimap/cons/noexcept_move_construct.cc:

            Likewise.
            * 
testsuite/23_containers/multiset/cons/noexcept_move_construct.cc:

            Likewise.
            * 
testsuite/23_containers/set/cons/noexcept_move_construct.cc: Likewise.


New tests run under Linux x86_64, ok to commit after other tests complete ?


[...]


diff --git a/libstdc++-v3/include/bits/stl_tree.h 
b/libstdc++-v3/include/bits/stl_tree.h
index 39303c47b08..d7f5439f452 100644
--- a/libstdc++-v3/include/bits/stl_tree.h
+++ b/libstdc++-v3/include/bits/stl_tree.h
@@ -706,7 +706,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  : _Node_allocator(__a), _Base_key_compare(__comp)
  { }
#else
- _Rb_tree_impl(_Rb_tree_impl&&) = default;
+ _Rb_tree_impl(_Rb_tree_impl&&)
+   noexcept(
+ std::is_nothrow_move_constructible<_Base_key_compare>::value )
+ = default;


If you remove the redundant "std::" this will fit on one line.

OK for master with that change, thanks.

I need to check which branches will allow the noexcept-specifier on a
defaulted constructor. I don't think we can backport it to all
branches.



[PATCH v3] RS6000, add VSX mask manipulation support

2020-07-07 Thread Carl Love via Gcc-patches
Segher:

I have fixed the issues you mentioned in version 2. I also rebased the
patch onto the latest mainline.  This resulted in having to change
FUTURE to P10 everywhere.  

I reran regression testing on Power 9 with no regression issues.
I also ran test cases manually on mambo. 

Please let me know if the patch is acceptable for mainline.  Thanks for
your time and previous reviews of the patch.

 Carl Love
-

version 3  Changes
  rebased onto mainline 7/7/2020
  Change FUTURE to P10 in code and ChangeLog.
  ChangeLog, fixed the name of a couple of files which were wrong.
  Reformated define_mode_attr VSX_MM_SUFFIX definition to shorten the 
   line.
  Reworked define_expand "vec_mtvsrbm_mtvsrbmi" as it will not work as
intended.
  Changed vsx_register_operand to altivec_register_operand for "v"
constraint.
  Removed --save-temps from test cases as it is not needed.
 

---
version 2 Changes

Addressed Will's comments
  - ChangeLog: fixed name/symbol order;
changed reference from rs6000-c.c to rs6000-builtin.def.

  - define_expand "vec_mtvsrbm": changed name to vec_mtvsrbm_mtvsrbmi,
updated comment

  - vsx_mask-runnable.c: divided it up into four smaller test cases,
vsx_mask-count-runnable.c, vsx_mask-expane-runnable.c,
vsx_mask-extract-runnable.c, vsx_mask-move-runnable.c.

---
RS6000 RFC 2629, add VSX mask manipulation support

The following patch adds support for builtins vec_genbm(),  vec_genhm(),
vec_genwm(), vec_gendm(), vec_genqm(), vec_cntm(), vec_expandm(),
vec_extractm().  Support for instructions mtvsrbm, mtvsrhm, mtvsrwm,
mtvsrdm, mtvsrqm, cntm, vexpandm, vextractm.

The test has been tested on:

  powerpc64le-unknown-linux-gnu (Power 9 LE)

and mambo with no regression errors.

Please let me know if this patch is acceptable for inclusion in the mainline
branch.  Thanks.

   Carl Love
---

RS6000, add VSX mask manipulation support

gcc/ChangeLog

2020-07-07  Carl Love  

* config/rs6000/vsx.md  (VSX_MM): New define_mode_iterator.
(VSX_MM4): New define_mode_iterator.
(VSX_MM_SUFFIX4): New define_mode_attr.
(vec_mtvsrbm): New define_expand.
(vec_mtvsrbmi): New define_insn.
(vec_mtvsr_): New define_insn.
(vec_cntmb_): New define_insn.
(vec_extract_): New define_insn.
(vec_expand_): New define_insn.
(define_c_enum unspec): Add entries UNSPEC_MTVSBM, UNSPEC_VCNTMB,
UNSPEC_VEXTRACT, UNSPEC_VEXPAND.
* config/rs6000/altivec.h ( vec_genbm, vec_genhm, vec_genwm,
vec_gendm, vec_genqm, vec_cntm, vec_expandm, vec_extractm): Add
defines.
* config/rs6000/rs6000-builtin.def: Add defines BU_P10_2, BU_P10_1.
(BU_P10_1): Add definitions for mtvsrbm, mtvsrhm, mtvsrwm,
mtvsrdm, mtvsrqm, vexpandmb, vexpandmh, vexpandmw, vexpandmd,
vexpandmq, vextractmb, vextractmh, vextractmw, vextractmd, vextractmq.
(BU_P10_2): Add definitions for cntmbb, cntmbh, cntmbw, cntmbd.
(BU_P10_OVERLOAD_1): Add definitions for mtvsrbm, mtvsrhm,
mtvsrwm, mtvsrdm, mtvsrqm, vexpandm, vextractm.
(BU_P10_OVERLOAD_2): Add defition for cntm.
* config/rs6000/rs6000-call.c (rs6000_expand_binop_builtin): Add
checks for CODE_FOR_vec_cntmbb_v16qi, CODE_FOR_vec_cntmb_v8hi,
CODE_FOR_vec_cntmb_v4si, CODE_FOR_vec_cntmb_v2di.
(altivec_overloaded_builtins): Add overloaded argument entries for
FUTURE_BUILTIN_VEC_MTVSRBM, FUTURE_BUILTIN_VEC_MTVSRHM,
FUTURE_BUILTIN_VEC_MTVSRWM, FUTURE_BUILTIN_VEC_MTVSRDM,
FUTURE_BUILTIN_VEC_MTVSRQM, FUTURE_BUILTIN_VEC_VCNTMBB,
FUTURE_BUILTIN_VCNTMBB, FUTURE_BUILTIN_VCNTMBH,
FUTURE_BUILTIN_VCNTMBW, FUTURE_BUILTIN_VCNTMBD,
FUTURE_BUILTIN_VEXPANDMB, FUTURE_BUILTIN_VEXPANDMH,
FUTURE_BUILTIN_VEXPANDMW, FUTURE_BUILTIN_VEXPANDMD,
FUTURE_BUILTIN_VEXPANDMQ, FUTURE_BUILTIN_VEXTRACTMB,
FUTURE_BUILTIN_VEXTRACTMH, FUTURE_BUILTIN_VEXTRACTMW,
FUTURE_BUILTIN_VEXTRACTMD, FUTURE_BUILTIN_VEXTRACTMQ.
(builtin_function_type): Add case entries for FUTURE_BUILTIN_MTVSRBM,
FUTURE_BUILTIN_MTVSRHM, FUTURE_BUILTIN_MTVSRWM, FUTURE_BUILTIN_MTVSRDM,
FUTURE_BUILTIN_MTVSRQM, FUTURE_BUILTIN_VCNTMBB, FUTURE_BUILTIN_VCNTMBH,
FUTURE_BUILTIN_VCNTMBW, FUTURE_BUILTIN_VCNTMBD,
FUTURE_BUILTIN_VEXPANDMB, FUTURE_BUILTIN_VEXPANDMH,
FUTURE_BUILTIN_VEXPANDMW, FUTURE_BUILTIN_VEXPANDMD,
FUTURE_BUILTIN_VEXPANDMQ.
* config/rs6000/rs6000-builtin.def (altivec_overloaded_builtins): Add
entries for MTVSRBM, MTVSRHM, MTVSRWM, MTVSRDM, MTVSRQM, VCNTM,
VEXPANDM, VEXTRACTM.

gcc/testsuite/ChangeLog

2020-07-07  Carl Love  
* gcc.target/powerpc/vsx_mask-

[committed] c++: Add new test [PR92427]

2020-07-07 Thread Marek Polacek via Gcc-patches
Fixed in r10-5578.

PR c++/92427
* g++.dg/ext/flexary37.C: New test.
---
 gcc/testsuite/g++.dg/ext/flexary37.C | 15 +++
 1 file changed, 15 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/ext/flexary37.C

diff --git a/gcc/testsuite/g++.dg/ext/flexary37.C 
b/gcc/testsuite/g++.dg/ext/flexary37.C
new file mode 100644
index 000..ceb5053de2e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/flexary37.C
@@ -0,0 +1,15 @@
+// PR c++/92427
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+class C {
+private:
+int a; int b;
+public:
+C(int A, int B) : a(A), b(B) { }
+~C() { }
+};
+
+struct y { // { dg-error "unknown array size in delete" }
+int a; C b[];
+} y = { 1, { { 2, 3 } } }; // { dg-error "unknown array size in delete" }

base-commit: 161aa50ee2d1142802d7200733fee1cf532d6696
-- 
2.26.2



Re: [PATCH 2/2] doc/implement-c.texi: About same-as-scalar-type volatile aggregate accesses, PR94600

2020-07-07 Thread Martin Sebor via Gcc-patches

On 7/6/20 10:02 PM, Hans-Peter Nilsson via Gcc-patches wrote:

We say very little about reads and writes to aggregate /
compound objects, just scalar objects (i.e. assignments don't
cause reads).  Let's lets say something safe about aggregate
objects, but only for those that are the same size as a scalar
type.

There's an equal-sounding section (Volatiles) in extend.texi,
but this seems a more appropriate place, as specifying the
behavior of a standard qualifier.

gcc:
PR middle-end/94600
* doc/implement-c.texi (Qualifiers implementation): Add blurb
about access to the whole of a volatile aggregate object, only for
same-size as scalar object.
---
  gcc/doc/implement-c.texi | 4 
  1 file changed, 4 insertions(+)

diff --git a/gcc/doc/implement-c.texi b/gcc/doc/implement-c.texi
index 692297b69c4..d64922b28ad 100644
--- a/gcc/doc/implement-c.texi
+++ b/gcc/doc/implement-c.texi
@@ -576,6 +576,10 @@ are of scalar types, the expression is interpreted by GCC 
as a read of
  the volatile object; in the other cases, the expression is only evaluated
  for its side effects.
  
+When an object of aggregate type has the same size as a scalar type, GCC

+handles an access to the whole of that volatile aggregate type object
+equal to an access to that volatile same-sized scalar type object.


The grammar is a bit off here making the sentence difficult to
parse and interpret.  Richard already pointed out the alignment
requirement but I'm also wondering if the statement is meant to
apply to accesses by library functions such as memcpy.  I suspect
it should only apply to assignments (either simple or atomic),
correct?

Would something like this be more accurate?

  When an object of an aggregate type with the same size and
  alignment as a scalar type S is the subject of a volatile
  access by an assignment expression or an atomic function,
  the access to it is performed as if the object's declared
  type were volatile S.

Martin


+
  @end itemize
  
  @node Declarators implementation






Re: [PATCH] rs6000: Refine RTL unroll adjust hook

2020-07-07 Thread Segher Boessenkool
Hi!

On Mon, Jul 06, 2020 at 03:13:13PM +0800, guojiufu wrote:
> For very small loops (< 6 insns), it would be fine to unroll 4
> times to use cache line better.  Like below loops:
>  `while (i) a[--i] = NULL;   while (p < e)  *d++ = *p++;`

Yes, definitely.

> And for very complex loops which may cause negative impacts:
> branch-miss or cache-miss. Like below loop: there are calls,
> early exits and branches in loop.
> ```
>   for (int i = 0; i < n; i++) {
>   int e = a[I];
>  
>   if (function_call(e))  break;
>  
>   }
> ```
> 
> This patch enhances RTL unroll for small loops and prevent to
> unroll complex loops.

I am not happy about what is considered "a complex loop" here.

> +/* Count the number of call insns in LOOP.  */
> +static unsigned int
> +num_loop_calls (struct loop *loop)
> +{
> +  basic_block *bbs;
> +  rtx_insn *insn;
> +  unsigned int i;
> +  unsigned int call_ins_num = 0;
> +
> +  bbs = get_loop_body (loop);
> +  for (i = 0; i < loop->num_nodes; i++)
> +FOR_BB_INSNS (bbs[i], insn)
> +  if (CALL_P (insn))
> + call_ins_num++;
> +
> +  free (bbs);
> +
> +  return call_ins_num;
> +}

This function belongs in cfgloop.c really?  (Or cfgloopanal.c).  Next to
num_loop_branches ;-)

> +/* Return true if LOOP is too complex to be unrolled.  */
> +static bool
> +rs6000_complex_loop_p (struct loop *loop)
> +{
> +  unsigned call_num;
> +
> +  return loop->ninsns > 10
> +&& (call_num = num_loop_calls (loop)) > 0
> +&& (call_num + num_loop_branches (loop)) * 5 > loop->ninsns
> +&& !single_exit (loop);
> +}

Don't do initialisation in conditionals please (or in loop conditions),
like Will said already.

This calls only very specific loops "complex".  We need a better way
to decide this :-(

>  static unsigned
>  rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
>  {
> -   if (unroll_only_small_loops)
> +  if (unroll_only_small_loops)
>  {
> -  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
> -  example we may want to unroll very small loops more times (4 perhaps).
> -  We also should use a PARAM for this.  */
> +  if (loop->ninsns <= 6)
> + return MIN (4, nunroll);
>if (loop->ninsns <= 10)
>   return MIN (2, nunroll);
> -  else
> - return 0;
> +
> +  return 0;
>  }

This part is fine.  It is independent of the rest AFAICS, so if you
agree, this part is preapproved for trunk.  Thanks!

(A PARAM would be nice, but too many of those isn't actually useful
either...  Next time, add one as soon as writing the code, at least it
is useful at that point in time, when you still need to experiment with
it :-) )

> +  if (rs6000_complex_loop_p (loop))
> +return 0;
> +
>return nunroll;
>  }

So, we use rs6000_complex_loop_p only to prevent all unrolling, never to
reduce the unrolling, and only in very specific cases.

Is there no middle road possible?  Say, don't unroll to more than 25
insns total (which is what the "only small loops" does, sort of -- it
also avoids unrolling 3x a bit, yes), and don't unroll to more than 2
calls, and not to more than 4 branches (I'm making up those numbers, of
course, and PARAMS would be helpful).  Some of this already does exist,
and might need retuning for us?


Segher


Re: [PATCH] rs6000: Add execution tests for mma builtins.

2020-07-07 Thread Segher Boessenkool
On Tue, Jul 07, 2020 at 12:45:08PM -0500, Aaron Sawdey via Gcc-patches wrote:
> Updated slightly, removed -Wno-psabi as requested and also fixed the
> fact that it wasn't actually checking __builtin_cpu_is or
> __builtin_cpu_supports. OK for trunk and backport to 10?

But you don't need that, and neither is it wanted even (we also want
the tests to run on systems with an older glibc, or no glibc at all).
Instead, you want to have a mma_hw selector (or abbreviation, with
a name with "powerpc" in it).

> +/* { dg-require-effective-target power10_hw } */

This already means that we are running on a system that can execute
ISA 3.1 insns (it tests if some specific "pli" works).

> +  if ( !__builtin_cpu_is ("power10"))
> +{
> +  printf ("Error: __builtin_cpu_is says this is not power10\n");
> +  ret++;
> +}

This means it will not run on later CPUs?  Not good.

> +  if ( !__builtin_cpu_supports ("arch_3_1"))
> +{
> +  printf ("Error: __builtin_cpu_supports says arch_3_1 not 
> supported.\n");
> +  ret++;
> +}

This is always already tested for by that power10_hw selector.

> +  if ( !__builtin_cpu_supports ("mma"))
> +{
> +  printf ("Error: __builtin_cpu_supports says mma not supported.\n");
> +  ret++;
> +}

And for this, we probably want a mma_hw sooner rather than later.


Segher


[PATCH] [RISC-V] Add support for TLS stack protector canary access

2020-07-07 Thread cooper via Gcc-patches
The linux kernel guys are discussing about supporting TLS register based
stack proctector canary, the link is as follows:

https://lore.kernel.org/linux-riscv/202007051820.DABE7F87D7@keescook/T/#t
I implemented register based stack protector canary with reference to
aarch64 and x86. When adding -mstack-protector-guard=tls,
use -mstack-protector-guard= to specify a register such as tp
and mstack-protector-guard-offset= to specify the offset, then
the TLS stack protector canary code will be generated.

gcc/
* config/riscv/riscv-opts.h (stack_protector_guard): New enum.
* config/riscv/riscv.c (riscv_option_override): Handle
the new options.
* config/riscv/riscv.md (stack_protect_set): New pattern to handle
flexible stack protector guard settings.
(stack_protect_set_): Ditto.
(stack_protect_test): Ditto.
(stack_protect_test_): Ditto.
* config/riscv/riscv.opt (mstack-protector-guard=,
mstack-protector-guard-reg=, mstack-protector-guard-offset=): New
options.
* doc/invoke.texi (Option Summary) [RISC-V Options]:
Add -mstack-protector-guard=, -mstack-protector-guard-reg=, and
-mstack-protector-guard-offset=.
(RISC-V Options): Ditto.

---
 gcc/ChangeLog | 18 
 gcc/config/riscv/riscv-opts.h |  6 +++
 gcc/config/riscv/riscv.c  | 41 ++
 gcc/config/riscv/riscv.md | 80 +++
 gcc/config/riscv/riscv.opt| 28 
 gcc/doc/invoke.texi   | 22 +-
 6 files changed, 194 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ea2f78df22e..98745f9f946 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,21 @@
+2020-07-07  Cooper Qu  
+
+   * config/riscv/riscv-opts.h (stack_protector_guard): New enum.
+   * config/riscv/riscv.c (riscv_option_override): Handle
+   the new options.
+   * config/riscv/riscv.md (stack_protect_set): New pattern to handle
+   flexible stack protector guard settings.
+   (stack_protect_set_): Ditto.
+   (stack_protect_test): Ditto.
+   (stack_protect_test_): Ditto.
+   * config/riscv/riscv.opt (mstack-protector-guard=,
+   mstack-protector-guard-reg=, mstack-protector-guard-offset=): New
+   options.
+   * doc/invoke.texi (Option Summary) [RISC-V Options]:
+   Add -mstack-protector-guard=, -mstack-protector-guard-reg=, and
+   -mstack-protector-guard-offset=.
+   (RISC-V Options): Ditto.
+
 2020-07-06  Richard Biener  
 
PR tree-optimization/96075
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 8f12e50b9f1..2a3f9d9eef5 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -51,4 +51,10 @@ enum riscv_align_data {
   riscv_align_data_type_natural
 };
 
+/* Where to get the canary for the stack protector.  */
+enum stack_protector_guard {
+  SSP_TLS, /* per-thread canary in TLS block */
+  SSP_GLOBAL   /* global canary */
+};
+
 #endif /* ! GCC_RISCV_OPTS_H */
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index bfb3885ed08..e606f24fa74 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -4775,6 +4775,47 @@ riscv_option_override (void)
   " [%<-mriscv-attribute%>]");
 #endif
 
+  if (riscv_stack_protector_guard == SSP_GLOBAL
+  && global_options_set.x_riscv_stack_protector_guard_offset_str)
+{
+  error ("incompatible options %<-mstack-protector-guard=global%> and "
+"%<-mstack-protector-guard-offset=%s%>",
+riscv_stack_protector_guard_offset_str);
+}
+
+  if (riscv_stack_protector_guard == SSP_TLS
+  && !(global_options_set.x_riscv_stack_protector_guard_offset_str
+  && global_options_set.x_riscv_stack_protector_guard_reg_str))
+{
+  error ("both %<-mstack-protector-guard-offset%> and "
+"%<-mstack-protector-guard-reg%> must be used "
+"with %<-mstack-protector-guard=sysreg%>");
+}
+
+  if (global_options_set.x_riscv_stack_protector_guard_reg_str)
+{
+  const char *str = riscv_stack_protector_guard_reg_str;
+  int reg = decode_reg_name (str);
+
+  if (!IN_RANGE (reg, 1, 31))
+   error ("%qs is not a valid base register in %qs", str,
+  "-mstack-protector-guard-reg=");
+
+  riscv_stack_protector_guard_reg = reg;
+}
+
+  if (global_options_set.x_riscv_stack_protector_guard_offset_str)
+{
+  char *end;
+  const char *str = riscv_stack_protector_guard_offset_str;
+  errno = 0;
+  long offs = strtol (riscv_stack_protector_guard_offset_str, &end, 0);
+  if (!*str || *end || errno)
+   error ("%qs is not a valid offset in %qs", str,
+  "-mstack-protector-guard-offset=");
+  riscv_stack_protector_guard_offset = offs;
+}
+
 }
 
 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  *

Re: [PATCH] rs6000: Refine RTL unroll adjust hook

2020-07-07 Thread Jiufu Guo via Gcc-patches
will schmidt  writes:

Thanks!
> On Mon, 2020-07-06 at 15:13 +0800, guojiufu via Gcc-patches wrote:
>
> Hi,
>
> Assorted comments below.   thanks :-)
>
>> For very small loops (< 6 insns), it would be fine to unroll 4
>> times to use cache line better.  Like below loops:
>>  `while (i) a[--i] = NULL;   while (p < e)  *d++ = *p++;`
>> 
>> And for very complex loops which may cause negative impacts:
>> branch-miss or cache-miss. Like below loop: there are calls,
>> early exits and branches in loop.
>> ```
>>   for (int i = 0; i < n; i++) {
>>   int e = a[I];
>>  
>>   if (function_call(e))  break;
>>  
>>   }
>> ```
>> 
>> This patch enhances RTL unroll for small loops and prevent to
>> unroll complex loops.
>
> ok.
>
>> 
>> gcc/ChangeLog
>> 2020-07-03  Jiufu Guo  
>> 
>> * config/rs6000/rs6000.c (rs6000_loop_unroll_adjust): Refine hook.
>> (rs6000_complex_loop_p): New function.
>> (num_loop_calls): New function.
>
> Tabs versus spaces.
oh, thanks!

>
> (num_loop_calls): New function.
>
>
>> ---
>>  gcc/config/rs6000/rs6000.c | 46 +---
>> --
>>  1 file changed, 40 insertions(+), 6 deletions(-)
>> 
>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>> index 58f5d780603..a4874fa0efc 100644
>> --- a/gcc/config/rs6000/rs6000.c
>> +++ b/gcc/config/rs6000/rs6000.c
>> @@ -5130,22 +5130,56 @@ rs6000_destroy_cost_data (void *data)
>>free (data);
>>  }
>> 
>> +/* Count the number of call insns in LOOP.  */
>> +static unsigned int
>> +num_loop_calls (struct loop *loop)
>> +{
>> +  basic_block *bbs;
>> +  rtx_insn *insn;
>> +  unsigned int i;
>> +  unsigned int call_ins_num = 0;
>> +
>> +  bbs = get_loop_body (loop);
>> +  for (i = 0; i < loop->num_nodes; i++)
>> +FOR_BB_INSNS (bbs[i], insn)
>> +  if (CALL_P (insn))
>> +call_ins_num++;
>> +
>> +  free (bbs);
>> +
>> +  return call_ins_num;
>> +}
>
> ok.
>
>
>> +
>> +/* Return true if LOOP is too complex to be unrolled.  */
>> +static bool
>> +rs6000_complex_loop_p (struct loop *loop)
>> +{
>> +  unsigned call_num;
>> +
>> +  return loop->ninsns > 10
>> +&& (call_num = num_loop_calls (loop)) > 0
>> +&& (call_num + num_loop_branches (loop)) * 5 > loop->ninsns
>> +&& !single_exit (loop);
>> +}
>> +
>
>
> The assignment to call_num within the logic there concerns me.  I'd
> break that out.
>
> The 5 value is not explicitly mentioned elsewhere.  Contextually this
> appears to be evaluating the ratio of branches versus instructions
> within the loop.  Could use some clarity.
Yes, it is 20%. Would make it clarity. Thanks, 
>
>
>
>>  /* Implement targetm.loop_unroll_adjust.  */
>> 
>>  static unsigned
>>  rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
>>  {
>> -   if (unroll_only_small_loops)
>> +  if (unroll_only_small_loops)
>
> indentation fix looks ok.
>
>>  {
>> -  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
>> - example we may want to unroll very small loops more times (4 perhaps).
>> - We also should use a PARAM for this.  */
>
> Still hardcoded values, and may still wish to eventually have this as a
> tunable param.   Probably OK to drop the 2nd sentence, but first and
> last sentences should probably stay.
>
>
>> +  if (loop->ninsns <= 6)
>> +return MIN (4, nunroll);
>>if (loop->ninsns <= 10)
>>  return MIN (2, nunroll);
>> -  else
>> -return 0;
>> +
>> +  return 0;
>>  }
>
>
> ok
>
>> 
>> +  if (rs6000_complex_loop_p (loop))
>> +return 0;
>> +
>>return nunroll;
>>  }
>> 
>
> ok


Re: [PATCH] rs6000: Split movsf_from_si from high word before reload[PR89310]

2020-07-07 Thread luoxhu via Gcc-patches



On 2020/7/8 05:31, Segher Boessenkool wrote:
> Hi!
> 
> On Tue, Jul 07, 2020 at 04:39:58PM +0800, luoxhu wrote:
>>> Lots of questions, sorry!
>>
>> Thanks for the nice suggestions of the initial patch contains many issues:),
> 
> Pretty much all of it should *work*, it just can be improved and
> simplified quite a bit :-)
> 
>> For this case, %1:SF matches with "=wa"?  And how to construct cases to
>> match("=?r", "wa") and ("=!r", "r") combinations, please?
> 
> operands[0], not operands[1]?
> 
> Simple testcases will not put the output into a GPR, unless you force
> the compiler to do that, because of the ? and !.
> 
> Often you can just do
> 
>asm("#" : "+r"(x));
> 
> to force "x" into a GPR at that point of the program.  But there is
> nothing stopping the compiler from copying it back to a VSR where it
> thinks that is cheaper ;-)
> 
> So maybe this pattern should just have the GPR-to-VSR alternative?  It
> does not look like the GPR destination variants are useful?
> 
>> +  rtx op0 = operands[0];
>> +  rtx op1 = operands[1];
>> +  rtx op2 = operands[2];
> 
> (Please just write out operands[N] everywhere).
> 
>> +  if (GET_CODE (operands[2]) == SCRATCH)
>> +op2 = gen_reg_rtx (DImode);
>> +
>> +  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
>> +  emit_insn (gen_anddi3 (op2, op1, mask));
> 
> Groovy :-)
> 
> So, it looks like you can remove the ? and ! alternatives, leaving just
> the first alternative?
> 

Thanks.

V3 Update: Leave only GPR-to-VSR alternative and use operands[N].
Bootstrap and regression tested pass on Power8-LE.


For extracting high part element from DImode register like:

{%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}

split it before reload with "and mask" to avoid generating shift right
32 bit then shift left 32 bit.  This pattern also exists in PR42475 and
PR67741, etc.

srdi 3,3,32
sldi 9,3,32
mtvsrd 1,9
xscvspdpn 1,1

=>

rldicr 3,3,0,31
mtvsrd 1,3
xscvspdpn 1,1

gcc/ChangeLog:

2020-07-08  Xionghu Luo  

PR rtl-optimization/89310
* config/rs6000/rs6000.md (movsf_from_si2): New
define_insn_and_split.

gcc/testsuite/ChangeLog:

2020-07-08  Xionghu Luo  

PR rtl-optimization/89310
* gcc.target/powerpc/pr89310.c: New test.
---
 gcc/config/rs6000/rs6000.md| 34 ++
 gcc/testsuite/gcc.target/powerpc/pr89310.c | 17 +++
 2 files changed, 51 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr89310.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4fcd6a94022..02c5171378c 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7593,6 +7593,40 @@ (define_insn_and_split "movsf_from_si"
"*,  *, p9v,   p8v,   *, *,
 p8v,p8v,   p8v,   *")])
 
+;; For extracting high part element from DImode register like:
+;; {%1:SF=unspec[r122:DI>>0x20#0] 86;clobber scratch;}
+;; split it before reload with "and mask" to avoid generating shift right
+;; 32 bit then shift left 32 bit.
+(define_insn_and_split "movsf_from_si2"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+   (unspec:SF [
+(subreg:SI (ashiftrt:DI
+  (match_operand:DI 1 "input_operand" "r")
+  (const_int 32))
+ 0)]
+UNSPEC_SF_FROM_SI))
+   (clobber (match_scratch:DI 2 "=r"))]
+  "TARGET_NO_SF_SUBREG"
+  "@
+  #"
+
+  "&& !reload_completed
+   && vsx_reg_sfsubreg_ok (operands[0], SFmode)"
+  [(const_int 0)]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+operands[2] = gen_reg_rtx (DImode);
+
+  rtx mask = GEN_INT (HOST_WIDE_INT_M1U << 32);
+  emit_insn (gen_anddi3 (operands[2], operands[1], mask));
+  emit_insn (gen_p8_mtvsrd_sf (operands[0], operands[2]));
+  emit_insn (gen_vsx_xscvspdpn_directmove (operands[0], operands[0]));
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
+
 
 ;; Move 64-bit binary/decimal floating point
 (define_expand "mov"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr89310.c 
b/gcc/testsuite/gcc.target/powerpc/pr89310.c
new file mode 100644
index 000..15e78509246
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr89310.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct s {
+  int i;
+  float f;
+};
+
+float
+foo (struct s arg)
+{
+  return arg.f;
+}
+
+/* { dg-final { scan-assembler-not {\msrdi\M} } } */
+/* { dg-final { scan-assembler-not {\msldi\M} } } */
+/* { dg-final { scan-assembler-times {\mrldicr\M} 1 } } */
-- 
2.21.0.777.g83232e3864



  1   2   >